4 # Copyright (C) 2006, 2007 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Functions used by the node daemon"""
35 from ganeti import logger
36 from ganeti import errors
37 from ganeti import utils
38 from ganeti import ssh
39 from ganeti import hypervisor
40 from ganeti import constants
41 from ganeti import bdev
42 from ganeti import objects
43 from ganeti import ssconf
47 return ssh.SshRunner()
51 """Activate local node as master node.
53 There are two needed steps for this:
54 - run the master script
55 - register the cron script
58 result = utils.RunCmd([constants.MASTER_SCRIPT, "-d", "start"])
61 logger.Error("could not activate cluster interface with command %s,"
62 " error: '%s'" % (result.cmd, result.output))
69 """Deactivate this node as master.
71 This runs the master stop script.
74 result = utils.RunCmd([constants.MASTER_SCRIPT, "-d", "stop"])
77 logger.Error("could not deactivate cluster interface with command %s,"
78 " error: '%s'" % (result.cmd, result.output))
84 def AddNode(dsa, dsapub, rsa, rsapub, sshkey, sshpub):
85 """Joins this node to the cluster.
87 This does the following:
88 - updates the hostkeys of the machine (rsa and dsa)
89 - adds the ssh private key to the user
90 - adds the ssh public key to the users' authorized_keys file
93 sshd_keys = [(constants.SSH_HOST_RSA_PRIV, rsa, 0600),
94 (constants.SSH_HOST_RSA_PUB, rsapub, 0644),
95 (constants.SSH_HOST_DSA_PRIV, dsa, 0600),
96 (constants.SSH_HOST_DSA_PUB, dsapub, 0644)]
97 for name, content, mode in sshd_keys:
98 utils.WriteFile(name, data=content, mode=mode)
101 priv_key, pub_key, auth_keys = ssh.GetUserFiles(constants.GANETI_RUNAS,
103 except errors.OpExecError, err:
104 logger.Error("Error while processing user ssh files: %s" % err)
107 for name, content in [(priv_key, sshkey), (pub_key, sshpub)]:
108 utils.WriteFile(name, data=content, mode=0600)
110 utils.AddAuthorizedKey(auth_keys, sshpub)
112 utils.RunCmd([constants.SSH_INITD_SCRIPT, "restart"])
118 """Cleans up the current node and prepares it to be removed from the cluster.
121 if os.path.isdir(constants.DATA_DIR):
122 for rel_name in utils.ListVisibleFiles(constants.DATA_DIR):
123 full_name = os.path.join(constants.DATA_DIR, rel_name)
124 if os.path.isfile(full_name) and not os.path.islink(full_name):
125 utils.RemoveFile(full_name)
128 priv_key, pub_key, auth_keys = ssh.GetUserFiles(constants.GANETI_RUNAS)
129 except errors.OpExecError, err:
130 logger.Error("Error while processing ssh files: %s" % err)
133 f = open(pub_key, 'r')
135 utils.RemoveAuthorizedKey(auth_keys, f.read(8192))
139 utils.RemoveFile(priv_key)
140 utils.RemoveFile(pub_key)
142 # Return a reassuring string to the caller, and quit
143 raise errors.QuitGanetiException(False, 'Shutdown scheduled')
146 def GetNodeInfo(vgname):
147 """Gives back a hash with different informations about the node.
150 { 'vg_size' : xxx, 'vg_free' : xxx, 'memory_domain0': xxx,
151 'memory_free' : xxx, 'memory_total' : xxx }
153 vg_size is the size of the configured volume group in MiB
154 vg_free is the free size of the volume group in MiB
155 memory_dom0 is the memory allocated for domain0 in MiB
156 memory_free is the currently available (free) ram in MiB
157 memory_total is the total number of ram in MiB
161 vginfo = _GetVGInfo(vgname)
162 outputarray['vg_size'] = vginfo['vg_size']
163 outputarray['vg_free'] = vginfo['vg_free']
165 hyper = hypervisor.GetHypervisor()
166 hyp_info = hyper.GetNodeInfo()
167 if hyp_info is not None:
168 outputarray.update(hyp_info)
170 f = open("/proc/sys/kernel/random/boot_id", 'r')
172 outputarray["bootid"] = f.read(128).rstrip("\n")
179 def VerifyNode(what):
180 """Verify the status of the local node.
183 what - a dictionary of things to check:
184 'filelist' : list of files for which to compute checksums
185 'nodelist' : list of nodes we should check communication with
186 'hypervisor': run the hypervisor-specific verify
188 Requested files on local node are checksummed and the result returned.
190 The nodelist is traversed, with the following checks being made
192 - known_hosts key correct
193 - correct resolving of node name (target node returns its own hostname
194 by ssh-execution of 'hostname', result compared against name in list.
199 if 'hypervisor' in what:
200 result['hypervisor'] = hypervisor.GetHypervisor().Verify()
202 if 'filelist' in what:
203 result['filelist'] = utils.FingerprintFiles(what['filelist'])
205 if 'nodelist' in what:
206 result['nodelist'] = {}
207 random.shuffle(what['nodelist'])
208 for node in what['nodelist']:
209 success, message = _GetSshRunner().VerifyNodeHostname(node)
211 result['nodelist'][node] = message
212 if 'node-net-test' in what:
213 result['node-net-test'] = {}
214 my_name = utils.HostInfo().name
215 my_pip = my_sip = None
216 for name, pip, sip in what['node-net-test']:
222 result['node-net-test'][my_name] = ("Can't find my own"
223 " primary/secondary IP"
226 port = ssconf.SimpleStore().GetNodeDaemonPort()
227 for name, pip, sip in what['node-net-test']:
229 if not utils.TcpPing(pip, port, source=my_pip):
230 fail.append("primary")
232 if not utils.TcpPing(sip, port, source=my_sip):
233 fail.append("secondary")
235 result['node-net-test'][name] = ("failure using the %s"
242 def GetVolumeList(vg_name):
243 """Compute list of logical volumes and their size.
246 dictionary of all partions (key) with their size (in MiB), inactive
248 {'test1': ('20.06', True, True)}
253 result = utils.RunCmd(["lvs", "--noheadings", "--units=m", "--nosuffix",
254 "--separator=%s" % sep,
255 "-olv_name,lv_size,lv_attr", vg_name])
257 logger.Error("Failed to list logical volumes, lvs output: %s" %
261 valid_line_re = re.compile("^ *([^|]+)\|([0-9.]+)\|([^|]{6})\|?$")
262 for line in result.stdout.splitlines():
264 match = valid_line_re.match(line)
266 logger.Error("Invalid line returned from lvs output: '%s'" % line)
268 name, size, attr = match.groups()
269 inactive = attr[4] == '-'
270 online = attr[5] == 'o'
271 lvs[name] = (size, inactive, online)
276 def ListVolumeGroups():
277 """List the volume groups and their size.
280 Dictionary with keys volume name and values the size of the volume
283 return utils.ListVolumeGroups()
287 """List all volumes on this node.
290 result = utils.RunCmd(["lvs", "--noheadings", "--units=m", "--nosuffix",
292 "--options=lv_name,lv_size,devices,vg_name"])
294 logger.Error("Failed to list logical volumes, lvs output: %s" %
300 return dev.split('(')[0]
306 'name': line[0].strip(),
307 'size': line[1].strip(),
308 'dev': parse_dev(line[2].strip()),
309 'vg': line[3].strip(),
312 return [map_line(line.split('|')) for line in result.stdout.splitlines()
313 if line.count('|') >= 3]
316 def BridgesExist(bridges_list):
317 """Check if a list of bridges exist on the current node.
320 True if all of them exist, false otherwise
323 for bridge in bridges_list:
324 if not utils.BridgeExists(bridge):
330 def GetInstanceList():
331 """Provides a list of instances.
334 A list of all running instances on the current node
335 - instance1.example.com
336 - instance2.example.com
340 names = hypervisor.GetHypervisor().ListInstances()
341 except errors.HypervisorError, err:
342 logger.Error("error enumerating instances: %s" % str(err))
348 def GetInstanceInfo(instance):
349 """Gives back the informations about an instance as a dictionary.
352 instance: name of the instance (ex. instance1.example.com)
355 { 'memory' : 511, 'state' : '-b---', 'time' : 3188.8, }
357 memory: memory size of instance (int)
358 state: xen state of instance (string)
359 time: cpu time of instance (float)
364 iinfo = hypervisor.GetHypervisor().GetInstanceInfo(instance)
365 if iinfo is not None:
366 output['memory'] = iinfo[2]
367 output['state'] = iinfo[4]
368 output['time'] = iinfo[5]
373 def GetAllInstancesInfo():
374 """Gather data about all instances.
376 This is the equivalent of `GetInstanceInfo()`, except that it
377 computes data for all instances at once, thus being faster if one
378 needs data about more than one instance.
380 Returns: a dictionary of dictionaries, keys being the instance name,
382 { 'memory' : 511, 'state' : '-b---', 'time' : 3188.8, }
384 memory: memory size of instance (int)
385 state: xen state of instance (string)
386 time: cpu time of instance (float)
387 vcpus: the number of cpus
392 iinfo = hypervisor.GetHypervisor().GetAllInstancesInfo()
394 for name, inst_id, memory, vcpus, state, times in iinfo:
405 def AddOSToInstance(instance, os_disk, swap_disk):
406 """Add an OS to an instance.
409 instance: the instance object
410 os_disk: the instance-visible name of the os device
411 swap_disk: the instance-visible name of the swap device
414 inst_os = OSFromDisk(instance.os)
416 create_script = inst_os.create_script
418 os_device = instance.FindDisk(os_disk)
419 if os_device is None:
420 logger.Error("Can't find this device-visible name '%s'" % os_disk)
423 swap_device = instance.FindDisk(swap_disk)
424 if swap_device is None:
425 logger.Error("Can't find this device-visible name '%s'" % swap_disk)
428 real_os_dev = _RecursiveFindBD(os_device)
429 if real_os_dev is None:
430 raise errors.BlockDeviceError("Block device '%s' is not set up" %
434 real_swap_dev = _RecursiveFindBD(swap_device)
435 if real_swap_dev is None:
436 raise errors.BlockDeviceError("Block device '%s' is not set up" %
440 logfile = "%s/add-%s-%s-%d.log" % (constants.LOG_OS_DIR, instance.os,
441 instance.name, int(time.time()))
442 if not os.path.exists(constants.LOG_OS_DIR):
443 os.mkdir(constants.LOG_OS_DIR, 0750)
445 command = utils.BuildShellCmd("cd %s && %s -i %s -b %s -s %s &>%s",
446 inst_os.path, create_script, instance.name,
447 real_os_dev.dev_path, real_swap_dev.dev_path,
450 result = utils.RunCmd(command)
452 logger.Error("os create command '%s' returned error: %s, logfile: %s,"
454 (command, result.fail_reason, logfile, result.output))
460 def RunRenameInstance(instance, old_name, os_disk, swap_disk):
461 """Run the OS rename script for an instance.
464 instance: the instance object
465 old_name: the old name of the instance
466 os_disk: the instance-visible name of the os device
467 swap_disk: the instance-visible name of the swap device
470 inst_os = OSFromDisk(instance.os)
472 script = inst_os.rename_script
474 os_device = instance.FindDisk(os_disk)
475 if os_device is None:
476 logger.Error("Can't find this device-visible name '%s'" % os_disk)
479 swap_device = instance.FindDisk(swap_disk)
480 if swap_device is None:
481 logger.Error("Can't find this device-visible name '%s'" % swap_disk)
484 real_os_dev = _RecursiveFindBD(os_device)
485 if real_os_dev is None:
486 raise errors.BlockDeviceError("Block device '%s' is not set up" %
490 real_swap_dev = _RecursiveFindBD(swap_device)
491 if real_swap_dev is None:
492 raise errors.BlockDeviceError("Block device '%s' is not set up" %
496 logfile = "%s/rename-%s-%s-%s-%d.log" % (constants.LOG_OS_DIR, instance.os,
498 instance.name, int(time.time()))
499 if not os.path.exists(constants.LOG_OS_DIR):
500 os.mkdir(constants.LOG_OS_DIR, 0750)
502 command = utils.BuildShellCmd("cd %s && %s -o %s -n %s -b %s -s %s &>%s",
503 inst_os.path, script, old_name, instance.name,
504 real_os_dev.dev_path, real_swap_dev.dev_path,
507 result = utils.RunCmd(command)
510 logger.Error("os create command '%s' returned error: %s"
512 (command, result.fail_reason, result.output))
518 def _GetVGInfo(vg_name):
519 """Get informations about the volume group.
522 vg_name: the volume group
525 { 'vg_size' : xxx, 'vg_free' : xxx, 'pv_count' : xxx }
527 vg_size is the total size of the volume group in MiB
528 vg_free is the free size of the volume group in MiB
529 pv_count are the number of physical disks in that vg
531 If an error occurs during gathering of data, we return the same dict
532 with keys all set to None.
535 retdic = dict.fromkeys(["vg_size", "vg_free", "pv_count"])
537 retval = utils.RunCmd(["vgs", "-ovg_size,vg_free,pv_count", "--noheadings",
538 "--nosuffix", "--units=m", "--separator=:", vg_name])
541 errmsg = "volume group %s not present" % vg_name
544 valarr = retval.stdout.strip().rstrip(':').split(':')
548 "vg_size": int(round(float(valarr[0]), 0)),
549 "vg_free": int(round(float(valarr[1]), 0)),
550 "pv_count": int(valarr[2]),
552 except ValueError, err:
553 logger.Error("Fail to parse vgs output: %s" % str(err))
555 logger.Error("vgs output has the wrong number of fields (expected"
556 " three): %s" % str(valarr))
560 def _GatherBlockDevs(instance):
561 """Set up an instance's block device(s).
563 This is run on the primary node at instance startup. The block
564 devices must be already assembled.
568 for disk in instance.disks:
569 device = _RecursiveFindBD(disk)
571 raise errors.BlockDeviceError("Block device '%s' is not set up." %
574 block_devices.append((disk, device))
578 def StartInstance(instance, extra_args):
579 """Start an instance.
582 instance - name of instance to start.
585 running_instances = GetInstanceList()
587 if instance.name in running_instances:
590 block_devices = _GatherBlockDevs(instance)
591 hyper = hypervisor.GetHypervisor()
594 hyper.StartInstance(instance, block_devices, extra_args)
595 except errors.HypervisorError, err:
596 logger.Error("Failed to start instance: %s" % err)
602 def ShutdownInstance(instance):
603 """Shut an instance down.
606 instance - name of instance to shutdown.
609 running_instances = GetInstanceList()
611 if instance.name not in running_instances:
614 hyper = hypervisor.GetHypervisor()
616 hyper.StopInstance(instance)
617 except errors.HypervisorError, err:
618 logger.Error("Failed to stop instance: %s" % err)
621 # test every 10secs for 2min
625 for dummy in range(11):
626 if instance.name not in GetInstanceList():
630 # the shutdown did not succeed
631 logger.Error("shutdown of '%s' unsuccessful, using destroy" % instance)
634 hyper.StopInstance(instance, force=True)
635 except errors.HypervisorError, err:
636 logger.Error("Failed to stop instance: %s" % err)
640 if instance.name in GetInstanceList():
641 logger.Error("could not shutdown instance '%s' even by destroy")
647 def RebootInstance(instance, reboot_type, extra_args):
648 """Reboot an instance.
651 instance - name of instance to reboot
652 reboot_type - how to reboot [soft,hard,full]
655 running_instances = GetInstanceList()
657 if instance.name not in running_instances:
658 logger.Error("Cannot reboot instance that is not running")
661 hyper = hypervisor.GetHypervisor()
662 if reboot_type == constants.INSTANCE_REBOOT_SOFT:
664 hyper.RebootInstance(instance)
665 except errors.HypervisorError, err:
666 logger.Error("Failed to soft reboot instance: %s" % err)
668 elif reboot_type == constants.INSTANCE_REBOOT_HARD:
670 ShutdownInstance(instance)
671 StartInstance(instance, extra_args)
672 except errors.HypervisorError, err:
673 logger.Error("Failed to hard reboot instance: %s" % err)
676 raise errors.ParameterError("reboot_type invalid")
682 def MigrateInstance(instance, target, live):
683 """Migrates an instance to another node.
686 hyper = hypervisor.GetHypervisor()
689 hyper.MigrateInstance(instance, target, live)
690 except errors.HypervisorError, err:
691 msg = "Failed to migrate instance: %s" % str(err)
694 return (True, "Migration successfull")
697 def CreateBlockDevice(disk, size, owner, on_primary, info):
698 """Creates a block device for an instance.
701 disk: a ganeti.objects.Disk object
702 size: the size of the physical underlying device
703 owner: a string with the name of the instance
704 on_primary: a boolean indicating if it is the primary node or not
705 info: string that will be sent to the physical device creation
708 the new unique_id of the device (this can sometime be
709 computed only after creation), or None. On secondary nodes,
710 it's not required to return anything.
715 for child in disk.children:
716 crdev = _RecursiveAssembleBD(child, owner, on_primary)
717 if on_primary or disk.AssembleOnSecondary():
718 # we need the children open in case the device itself has to
723 device = bdev.FindDevice(disk.dev_type, disk.physical_id, clist)
724 if device is not None:
725 logger.Info("removing existing device %s" % disk)
727 except errors.BlockDeviceError, err:
730 device = bdev.Create(disk.dev_type, disk.physical_id,
733 raise ValueError("Can't create child device for %s, %s" %
735 if on_primary or disk.AssembleOnSecondary():
736 if not device.Assemble():
737 errorstring = "Can't assemble device after creation"
738 logger.Error(errorstring)
739 raise errors.BlockDeviceError("%s, very unusual event - check the node"
740 " daemon logs" % errorstring)
741 device.SetSyncSpeed(constants.SYNC_SPEED)
742 if on_primary or disk.OpenOnSecondary():
743 device.Open(force=True)
744 DevCacheManager.UpdateCache(device.dev_path, owner,
745 on_primary, disk.iv_name)
749 physical_id = device.unique_id
753 def RemoveBlockDevice(disk):
754 """Remove a block device.
756 This is intended to be called recursively.
760 # since we are removing the device, allow a partial match
761 # this allows removal of broken mirrors
762 rdev = _RecursiveFindBD(disk, allow_partial=True)
763 except errors.BlockDeviceError, err:
764 # probably can't attach
765 logger.Info("Can't attach to device %s in remove" % disk)
768 r_path = rdev.dev_path
769 result = rdev.Remove()
771 DevCacheManager.RemoveCache(r_path)
775 for child in disk.children:
776 result = result and RemoveBlockDevice(child)
780 def _RecursiveAssembleBD(disk, owner, as_primary):
781 """Activate a block device for an instance.
783 This is run on the primary and secondary nodes for an instance.
785 This function is called recursively.
788 disk: a objects.Disk object
789 as_primary: if we should make the block device read/write
792 the assembled device or None (in case no device was assembled)
794 If the assembly is not successful, an exception is raised.
799 mcn = disk.ChildrenNeeded()
801 mcn = 0 # max number of Nones allowed
803 mcn = len(disk.children) - mcn # max number of Nones
804 for chld_disk in disk.children:
806 cdev = _RecursiveAssembleBD(chld_disk, owner, as_primary)
807 except errors.BlockDeviceError, err:
808 if children.count(None) >= mcn:
811 logger.Debug("Error in child activation: %s" % str(err))
812 children.append(cdev)
814 if as_primary or disk.AssembleOnSecondary():
815 r_dev = bdev.AttachOrAssemble(disk.dev_type, disk.physical_id, children)
816 r_dev.SetSyncSpeed(constants.SYNC_SPEED)
818 if as_primary or disk.OpenOnSecondary():
820 DevCacheManager.UpdateCache(r_dev.dev_path, owner,
821 as_primary, disk.iv_name)
828 def AssembleBlockDevice(disk, owner, as_primary):
829 """Activate a block device for an instance.
831 This is a wrapper over _RecursiveAssembleBD.
834 a /dev path for primary nodes
835 True for secondary nodes
838 result = _RecursiveAssembleBD(disk, owner, as_primary)
839 if isinstance(result, bdev.BlockDev):
840 result = result.dev_path
844 def ShutdownBlockDevice(disk):
845 """Shut down a block device.
847 First, if the device is assembled (can `Attach()`), then the device
848 is shutdown. Then the children of the device are shutdown.
850 This function is called recursively. Note that we don't cache the
851 children or such, as oppossed to assemble, shutdown of different
852 devices doesn't require that the upper device was active.
855 r_dev = _RecursiveFindBD(disk)
856 if r_dev is not None:
857 r_path = r_dev.dev_path
858 result = r_dev.Shutdown()
860 DevCacheManager.RemoveCache(r_path)
864 for child in disk.children:
865 result = result and ShutdownBlockDevice(child)
869 def MirrorAddChildren(parent_cdev, new_cdevs):
870 """Extend a mirrored block device.
873 parent_bdev = _RecursiveFindBD(parent_cdev, allow_partial=True)
874 if parent_bdev is None:
875 logger.Error("Can't find parent device")
877 new_bdevs = [_RecursiveFindBD(disk) for disk in new_cdevs]
878 if new_bdevs.count(None) > 0:
879 logger.Error("Can't find new device(s) to add: %s:%s" %
880 (new_bdevs, new_cdevs))
882 parent_bdev.AddChildren(new_bdevs)
886 def MirrorRemoveChildren(parent_cdev, new_cdevs):
887 """Shrink a mirrored block device.
890 parent_bdev = _RecursiveFindBD(parent_cdev)
891 if parent_bdev is None:
892 logger.Error("Can't find parent in remove children: %s" % parent_cdev)
895 for disk in new_cdevs:
896 rpath = disk.StaticDevPath()
898 bd = _RecursiveFindBD(disk)
900 logger.Error("Can't find dynamic device %s while removing children" %
904 devs.append(bd.dev_path)
907 parent_bdev.RemoveChildren(devs)
911 def GetMirrorStatus(disks):
912 """Get the mirroring status of a list of devices.
915 disks: list of `objects.Disk`
918 list of (mirror_done, estimated_time) tuples, which
919 are the result of bdev.BlockDevice.CombinedSyncStatus()
924 rbd = _RecursiveFindBD(dsk)
926 raise errors.BlockDeviceError("Can't find device %s" % str(dsk))
927 stats.append(rbd.CombinedSyncStatus())
931 def _RecursiveFindBD(disk, allow_partial=False):
932 """Check if a device is activated.
934 If so, return informations about the real device.
937 disk: the objects.Disk instance
938 allow_partial: don't abort the find if a child of the
939 device can't be found; this is intended to be
940 used when repairing mirrors
943 None if the device can't be found
944 otherwise the device instance
949 for chdisk in disk.children:
950 children.append(_RecursiveFindBD(chdisk))
952 return bdev.FindDevice(disk.dev_type, disk.physical_id, children)
955 def FindBlockDevice(disk):
956 """Check if a device is activated.
958 If so, return informations about the real device.
961 disk: the objects.Disk instance
963 None if the device can't be found
964 (device_path, major, minor, sync_percent, estimated_time, is_degraded)
967 rbd = _RecursiveFindBD(disk)
970 return (rbd.dev_path, rbd.major, rbd.minor) + rbd.GetSyncStatus()
973 def UploadFile(file_name, data, mode, uid, gid, atime, mtime):
974 """Write a file to the filesystem.
976 This allows the master to overwrite(!) a file. It will only perform
977 the operation if the file belongs to a list of configuration files.
980 if not os.path.isabs(file_name):
981 logger.Error("Filename passed to UploadFile is not absolute: '%s'" %
986 constants.CLUSTER_CONF_FILE,
988 constants.SSH_KNOWN_HOSTS_FILE,
989 constants.VNC_PASSWORD_FILE,
991 allowed_files.extend(ssconf.SimpleStore().GetFileList())
992 if file_name not in allowed_files:
993 logger.Error("Filename passed to UploadFile not in allowed"
994 " upload targets: '%s'" % file_name)
997 utils.WriteFile(file_name, data=data, mode=mode, uid=uid, gid=gid,
998 atime=atime, mtime=mtime)
1002 def _ErrnoOrStr(err):
1003 """Format an EnvironmentError exception.
1005 If the `err` argument has an errno attribute, it will be looked up
1006 and converted into a textual EXXXX description. Otherwise the string
1007 representation of the error will be returned.
1010 if hasattr(err, 'errno'):
1011 detail = errno.errorcode[err.errno]
1017 def _OSOndiskVersion(name, os_dir):
1018 """Compute and return the API version of a given OS.
1020 This function will try to read the API version of the os given by
1021 the 'name' parameter and residing in the 'os_dir' directory.
1023 Return value will be either an integer denoting the version or None in the
1024 case when this is not a valid OS name.
1027 api_file = os.path.sep.join([os_dir, "ganeti_api_version"])
1030 st = os.stat(api_file)
1031 except EnvironmentError, err:
1032 raise errors.InvalidOS(name, os_dir, "'ganeti_api_version' file not"
1033 " found (%s)" % _ErrnoOrStr(err))
1035 if not stat.S_ISREG(stat.S_IFMT(st.st_mode)):
1036 raise errors.InvalidOS(name, os_dir, "'ganeti_api_version' file is not"
1042 api_version = f.read(256)
1045 except EnvironmentError, err:
1046 raise errors.InvalidOS(name, os_dir, "error while reading the"
1047 " API version (%s)" % _ErrnoOrStr(err))
1049 api_version = api_version.strip()
1051 api_version = int(api_version)
1052 except (TypeError, ValueError), err:
1053 raise errors.InvalidOS(name, os_dir,
1054 "API version is not integer (%s)" % str(err))
1059 def DiagnoseOS(top_dirs=None):
1060 """Compute the validity for all OSes.
1062 Returns an OS object for each name in all the given top directories
1063 (if not given defaults to constants.OS_SEARCH_PATH)
1069 if top_dirs is None:
1070 top_dirs = constants.OS_SEARCH_PATH
1073 for dir_name in top_dirs:
1074 if os.path.isdir(dir_name):
1076 f_names = utils.ListVisibleFiles(dir_name)
1077 except EnvironmentError, err:
1078 logger.Error("Can't list the OS directory %s: %s" %
1079 (dir_name, str(err)))
1081 for name in f_names:
1083 os_inst = OSFromDisk(name, base_dir=dir_name)
1084 result.append(os_inst)
1085 except errors.InvalidOS, err:
1086 result.append(objects.OS.FromInvalidOS(err))
1091 def OSFromDisk(name, base_dir=None):
1092 """Create an OS instance from disk.
1094 This function will return an OS instance if the given name is a
1095 valid OS name. Otherwise, it will raise an appropriate
1096 `errors.InvalidOS` exception, detailing why this is not a valid
1100 os_dir: Directory containing the OS scripts. Defaults to a search
1101 in all the OS_SEARCH_PATH directories.
1105 if base_dir is None:
1106 os_dir = utils.FindFile(name, constants.OS_SEARCH_PATH, os.path.isdir)
1108 raise errors.InvalidOS(name, None, "OS dir not found in search path")
1110 os_dir = os.path.sep.join([base_dir, name])
1112 api_version = _OSOndiskVersion(name, os_dir)
1114 if api_version != constants.OS_API_VERSION:
1115 raise errors.InvalidOS(name, os_dir, "API version mismatch"
1116 " (found %s want %s)"
1117 % (api_version, constants.OS_API_VERSION))
1119 # OS Scripts dictionary, we will populate it with the actual script names
1120 os_scripts = {'create': '', 'export': '', 'import': '', 'rename': ''}
1122 for script in os_scripts:
1123 os_scripts[script] = os.path.sep.join([os_dir, script])
1126 st = os.stat(os_scripts[script])
1127 except EnvironmentError, err:
1128 raise errors.InvalidOS(name, os_dir, "'%s' script missing (%s)" %
1129 (script, _ErrnoOrStr(err)))
1131 if stat.S_IMODE(st.st_mode) & stat.S_IXUSR != stat.S_IXUSR:
1132 raise errors.InvalidOS(name, os_dir, "'%s' script not executable" %
1135 if not stat.S_ISREG(stat.S_IFMT(st.st_mode)):
1136 raise errors.InvalidOS(name, os_dir, "'%s' is not a regular file" %
1140 return objects.OS(name=name, path=os_dir, status=constants.OS_VALID_STATUS,
1141 create_script=os_scripts['create'],
1142 export_script=os_scripts['export'],
1143 import_script=os_scripts['import'],
1144 rename_script=os_scripts['rename'],
1145 api_version=api_version)
1148 def GrowBlockDevice(disk, amount):
1149 """Grow a stack of block devices.
1151 This function is called recursively, with the childrens being the
1155 disk: the disk to be grown
1157 Returns: a tuple of (status, result), with:
1158 status: the result (true/false) of the operation
1159 result: the error message if the operation failed, otherwise not used
1162 r_dev = _RecursiveFindBD(disk)
1164 return False, "Cannot find block device %s" % (disk,)
1168 except errors.BlockDeviceError, err:
1169 return False, str(err)
1174 def SnapshotBlockDevice(disk):
1175 """Create a snapshot copy of a block device.
1177 This function is called recursively, and the snapshot is actually created
1178 just for the leaf lvm backend device.
1181 disk: the disk to be snapshotted
1184 a config entry for the actual lvm device snapshotted.
1188 if len(disk.children) == 1:
1189 # only one child, let's recurse on it
1190 return SnapshotBlockDevice(disk.children[0])
1192 # more than one child, choose one that matches
1193 for child in disk.children:
1194 if child.size == disk.size:
1195 # return implies breaking the loop
1196 return SnapshotBlockDevice(child)
1197 elif disk.dev_type == constants.LD_LV:
1198 r_dev = _RecursiveFindBD(disk)
1199 if r_dev is not None:
1200 # let's stay on the safe side and ask for the full size, for now
1201 return r_dev.Snapshot(disk.size)
1205 raise errors.ProgrammerError("Cannot snapshot non-lvm block device"
1206 " '%s' of type '%s'" %
1207 (disk.unique_id, disk.dev_type))
1210 def ExportSnapshot(disk, dest_node, instance):
1211 """Export a block device snapshot to a remote node.
1214 disk: the snapshot block device
1215 dest_node: the node to send the image to
1216 instance: instance being exported
1219 True if successful, False otherwise.
1222 inst_os = OSFromDisk(instance.os)
1223 export_script = inst_os.export_script
1225 logfile = "%s/exp-%s-%s-%s.log" % (constants.LOG_OS_DIR, inst_os.name,
1226 instance.name, int(time.time()))
1227 if not os.path.exists(constants.LOG_OS_DIR):
1228 os.mkdir(constants.LOG_OS_DIR, 0750)
1230 real_os_dev = _RecursiveFindBD(disk)
1231 if real_os_dev is None:
1232 raise errors.BlockDeviceError("Block device '%s' is not set up" %
1236 destdir = os.path.join(constants.EXPORT_DIR, instance.name + ".new")
1237 destfile = disk.physical_id[1]
1239 # the target command is built out of three individual commands,
1240 # which are joined by pipes; we check each individual command for
1243 expcmd = utils.BuildShellCmd("cd %s; %s -i %s -b %s 2>%s", inst_os.path,
1244 export_script, instance.name,
1245 real_os_dev.dev_path, logfile)
1249 destcmd = utils.BuildShellCmd("mkdir -p %s && cat > %s/%s",
1250 destdir, destdir, destfile)
1251 remotecmd = _GetSshRunner().BuildCmd(dest_node, constants.GANETI_RUNAS,
1254 # all commands have been checked, so we're safe to combine them
1255 command = '|'.join([expcmd, comprcmd, utils.ShellQuoteArgs(remotecmd)])
1257 result = utils.RunCmd(command)
1260 logger.Error("os snapshot export command '%s' returned error: %s"
1262 (command, result.fail_reason, result.output))
1268 def FinalizeExport(instance, snap_disks):
1269 """Write out the export configuration information.
1272 instance: instance configuration
1273 snap_disks: snapshot block devices
1276 False in case of error, True otherwise.
1279 destdir = os.path.join(constants.EXPORT_DIR, instance.name + ".new")
1280 finaldestdir = os.path.join(constants.EXPORT_DIR, instance.name)
1282 config = objects.SerializableConfigParser()
1284 config.add_section(constants.INISECT_EXP)
1285 config.set(constants.INISECT_EXP, 'version', '0')
1286 config.set(constants.INISECT_EXP, 'timestamp', '%d' % int(time.time()))
1287 config.set(constants.INISECT_EXP, 'source', instance.primary_node)
1288 config.set(constants.INISECT_EXP, 'os', instance.os)
1289 config.set(constants.INISECT_EXP, 'compression', 'gzip')
1291 config.add_section(constants.INISECT_INS)
1292 config.set(constants.INISECT_INS, 'name', instance.name)
1293 config.set(constants.INISECT_INS, 'memory', '%d' % instance.memory)
1294 config.set(constants.INISECT_INS, 'vcpus', '%d' % instance.vcpus)
1295 config.set(constants.INISECT_INS, 'disk_template', instance.disk_template)
1298 for nic_count, nic in enumerate(instance.nics):
1299 config.set(constants.INISECT_INS, 'nic%d_mac' %
1300 nic_count, '%s' % nic.mac)
1301 config.set(constants.INISECT_INS, 'nic%d_ip' % nic_count, '%s' % nic.ip)
1302 config.set(constants.INISECT_INS, 'nic%d_bridge' % nic_count, '%s' % nic.bridge)
1303 # TODO: redundant: on load can read nics until it doesn't exist
1304 config.set(constants.INISECT_INS, 'nic_count' , '%d' % nic_count)
1307 for disk_count, disk in enumerate(snap_disks):
1308 config.set(constants.INISECT_INS, 'disk%d_ivname' % disk_count,
1309 ('%s' % disk.iv_name))
1310 config.set(constants.INISECT_INS, 'disk%d_dump' % disk_count,
1311 ('%s' % disk.physical_id[1]))
1312 config.set(constants.INISECT_INS, 'disk%d_size' % disk_count,
1314 config.set(constants.INISECT_INS, 'disk_count' , '%d' % disk_count)
1316 cff = os.path.join(destdir, constants.EXPORT_CONF_FILE)
1317 cfo = open(cff, 'w')
1323 shutil.rmtree(finaldestdir, True)
1324 shutil.move(destdir, finaldestdir)
1329 def ExportInfo(dest):
1330 """Get export configuration information.
1333 dest: directory containing the export
1336 A serializable config file containing the export info.
1339 cff = os.path.join(dest, constants.EXPORT_CONF_FILE)
1341 config = objects.SerializableConfigParser()
1344 if (not config.has_section(constants.INISECT_EXP) or
1345 not config.has_section(constants.INISECT_INS)):
1351 def ImportOSIntoInstance(instance, os_disk, swap_disk, src_node, src_image):
1352 """Import an os image into an instance.
1355 instance: the instance object
1356 os_disk: the instance-visible name of the os device
1357 swap_disk: the instance-visible name of the swap device
1358 src_node: node holding the source image
1359 src_image: path to the source image on src_node
1362 False in case of error, True otherwise.
1365 inst_os = OSFromDisk(instance.os)
1366 import_script = inst_os.import_script
1368 os_device = instance.FindDisk(os_disk)
1369 if os_device is None:
1370 logger.Error("Can't find this device-visible name '%s'" % os_disk)
1373 swap_device = instance.FindDisk(swap_disk)
1374 if swap_device is None:
1375 logger.Error("Can't find this device-visible name '%s'" % swap_disk)
1378 real_os_dev = _RecursiveFindBD(os_device)
1379 if real_os_dev is None:
1380 raise errors.BlockDeviceError("Block device '%s' is not set up" %
1384 real_swap_dev = _RecursiveFindBD(swap_device)
1385 if real_swap_dev is None:
1386 raise errors.BlockDeviceError("Block device '%s' is not set up" %
1388 real_swap_dev.Open()
1390 logfile = "%s/import-%s-%s-%s.log" % (constants.LOG_OS_DIR, instance.os,
1391 instance.name, int(time.time()))
1392 if not os.path.exists(constants.LOG_OS_DIR):
1393 os.mkdir(constants.LOG_OS_DIR, 0750)
1395 destcmd = utils.BuildShellCmd('cat %s', src_image)
1396 remotecmd = _GetSshRunner().BuildCmd(src_node, constants.GANETI_RUNAS,
1400 impcmd = utils.BuildShellCmd("(cd %s; %s -i %s -b %s -s %s &>%s)",
1401 inst_os.path, import_script, instance.name,
1402 real_os_dev.dev_path, real_swap_dev.dev_path,
1405 command = '|'.join([utils.ShellQuoteArgs(remotecmd), comprcmd, impcmd])
1407 result = utils.RunCmd(command)
1410 logger.Error("os import command '%s' returned error: %s"
1412 (command, result.fail_reason, result.output))
1419 """Return a list of exports currently available on this machine.
1422 if os.path.isdir(constants.EXPORT_DIR):
1423 return utils.ListVisibleFiles(constants.EXPORT_DIR)
1428 def RemoveExport(export):
1429 """Remove an existing export from the node.
1432 export: the name of the export to remove
1435 False in case of error, True otherwise.
1438 target = os.path.join(constants.EXPORT_DIR, export)
1440 shutil.rmtree(target)
1441 # TODO: catch some of the relevant exceptions and provide a pretty
1442 # error message if rmtree fails.
1447 def RenameBlockDevices(devlist):
1448 """Rename a list of block devices.
1450 The devlist argument is a list of tuples (disk, new_logical,
1451 new_physical). The return value will be a combined boolean result
1452 (True only if all renames succeeded).
1456 for disk, unique_id in devlist:
1457 dev = _RecursiveFindBD(disk)
1462 old_rpath = dev.dev_path
1463 dev.Rename(unique_id)
1464 new_rpath = dev.dev_path
1465 if old_rpath != new_rpath:
1466 DevCacheManager.RemoveCache(old_rpath)
1467 # FIXME: we should add the new cache information here, like:
1468 # DevCacheManager.UpdateCache(new_rpath, owner, ...)
1469 # but we don't have the owner here - maybe parse from existing
1470 # cache? for now, we only lose lvm data when we rename, which
1471 # is less critical than DRBD or MD
1472 except errors.BlockDeviceError, err:
1473 logger.Error("Can't rename device '%s' to '%s': %s" %
1474 (dev, unique_id, err))
1479 def _TransformFileStorageDir(file_storage_dir):
1480 """Checks whether given file_storage_dir is valid.
1482 Checks wheter the given file_storage_dir is within the cluster-wide
1483 default file_storage_dir stored in SimpleStore. Only paths under that
1484 directory are allowed.
1487 file_storage_dir: string with path
1490 normalized file_storage_dir (string) if valid, None otherwise
1493 file_storage_dir = os.path.normpath(file_storage_dir)
1494 base_file_storage_dir = ssconf.SimpleStore().GetFileStorageDir()
1495 if (not os.path.commonprefix([file_storage_dir, base_file_storage_dir]) ==
1496 base_file_storage_dir):
1497 logger.Error("file storage directory '%s' is not under base file"
1498 " storage directory '%s'" %
1499 (file_storage_dir, base_file_storage_dir))
1501 return file_storage_dir
1504 def CreateFileStorageDir(file_storage_dir):
1505 """Create file storage directory.
1508 file_storage_dir: string containing the path
1511 tuple with first element a boolean indicating wheter dir
1512 creation was successful or not
1515 file_storage_dir = _TransformFileStorageDir(file_storage_dir)
1517 if not file_storage_dir:
1520 if os.path.exists(file_storage_dir):
1521 if not os.path.isdir(file_storage_dir):
1522 logger.Error("'%s' is not a directory" % file_storage_dir)
1526 os.makedirs(file_storage_dir, 0750)
1527 except OSError, err:
1528 logger.Error("Cannot create file storage directory '%s': %s" %
1529 (file_storage_dir, err))
1534 def RemoveFileStorageDir(file_storage_dir):
1535 """Remove file storage directory.
1537 Remove it only if it's empty. If not log an error and return.
1540 file_storage_dir: string containing the path
1543 tuple with first element a boolean indicating wheter dir
1544 removal was successful or not
1547 file_storage_dir = _TransformFileStorageDir(file_storage_dir)
1549 if not file_storage_dir:
1552 if os.path.exists(file_storage_dir):
1553 if not os.path.isdir(file_storage_dir):
1554 logger.Error("'%s' is not a directory" % file_storage_dir)
1556 # deletes dir only if empty, otherwise we want to return False
1558 os.rmdir(file_storage_dir)
1559 except OSError, err:
1560 logger.Error("Cannot remove file storage directory '%s': %s" %
1561 (file_storage_dir, err))
1566 def RenameFileStorageDir(old_file_storage_dir, new_file_storage_dir):
1567 """Rename the file storage directory.
1570 old_file_storage_dir: string containing the old path
1571 new_file_storage_dir: string containing the new path
1574 tuple with first element a boolean indicating wheter dir
1575 rename was successful or not
1578 old_file_storage_dir = _TransformFileStorageDir(old_file_storage_dir)
1579 new_file_storage_dir = _TransformFileStorageDir(new_file_storage_dir)
1581 if not old_file_storage_dir or not new_file_storage_dir:
1584 if not os.path.exists(new_file_storage_dir):
1585 if os.path.isdir(old_file_storage_dir):
1587 os.rename(old_file_storage_dir, new_file_storage_dir)
1588 except OSError, err:
1589 logger.Error("Cannot rename '%s' to '%s': %s"
1590 % (old_file_storage_dir, new_file_storage_dir, err))
1593 logger.Error("'%s' is not a directory" % old_file_storage_dir)
1596 if os.path.exists(old_file_storage_dir):
1597 logger.Error("Cannot rename '%s' to '%s'. Both locations exist." %
1598 old_file_storage_dir, new_file_storage_dir)
1603 def CloseBlockDevices(disks):
1604 """Closes the given block devices.
1606 This means they will be switched to secondary mode (in case of DRBD).
1611 rd = _RecursiveFindBD(cf)
1613 return (False, "Can't find device %s" % cf)
1620 except errors.BlockDeviceError, err:
1621 msg.append(str(err))
1623 return (False, "Can't make devices secondary: %s" % ",".join(msg))
1625 return (True, "All devices secondary")
1628 class HooksRunner(object):
1631 This class is instantiated on the node side (ganeti-noded) and not on
1635 RE_MASK = re.compile("^[a-zA-Z0-9_-]+$")
1637 def __init__(self, hooks_base_dir=None):
1638 """Constructor for hooks runner.
1641 - hooks_base_dir: if not None, this overrides the
1642 constants.HOOKS_BASE_DIR (useful for unittests)
1645 if hooks_base_dir is None:
1646 hooks_base_dir = constants.HOOKS_BASE_DIR
1647 self._BASE_DIR = hooks_base_dir
1650 def ExecHook(script, env):
1651 """Exec one hook script.
1654 - script: the full path to the script
1655 - env: the environment with which to exec the script
1658 # exec the process using subprocess and log the output
1661 fdstdin = open("/dev/null", "r")
1662 child = subprocess.Popen([script], stdin=fdstdin, stdout=subprocess.PIPE,
1663 stderr=subprocess.STDOUT, close_fds=True,
1664 shell=False, cwd="/", env=env)
1667 output = child.stdout.read(4096)
1668 child.stdout.close()
1669 except EnvironmentError, err:
1670 output += "Hook script error: %s" % str(err)
1674 result = child.wait()
1676 except EnvironmentError, err:
1677 if err.errno == errno.EINTR:
1681 # try not to leak fds
1682 for fd in (fdstdin, ):
1686 except EnvironmentError, err:
1687 # just log the error
1688 #logger.Error("While closing fd %s: %s" % (fd, err))
1691 return result == 0, output
1693 def RunHooks(self, hpath, phase, env):
1694 """Run the scripts in the hooks directory.
1696 This method will not be usually overriden by child opcodes.
1699 if phase == constants.HOOKS_PHASE_PRE:
1701 elif phase == constants.HOOKS_PHASE_POST:
1704 raise errors.ProgrammerError("Unknown hooks phase: '%s'" % phase)
1707 subdir = "%s-%s.d" % (hpath, suffix)
1708 dir_name = "%s/%s" % (self._BASE_DIR, subdir)
1710 dir_contents = utils.ListVisibleFiles(dir_name)
1711 except OSError, err:
1715 # we use the standard python sort order,
1716 # so 00name is the recommended naming scheme
1718 for relname in dir_contents:
1719 fname = os.path.join(dir_name, relname)
1720 if not (os.path.isfile(fname) and os.access(fname, os.X_OK) and
1721 self.RE_MASK.match(relname) is not None):
1722 rrval = constants.HKR_SKIP
1725 result, output = self.ExecHook(fname, env)
1727 rrval = constants.HKR_FAIL
1729 rrval = constants.HKR_SUCCESS
1730 rr.append(("%s/%s" % (subdir, relname), rrval, output))
1735 class IAllocatorRunner(object):
1736 """IAllocator runner.
1738 This class is instantiated on the node side (ganeti-noded) and not on
1742 def Run(self, name, idata):
1743 """Run an iallocator script.
1745 Return value: tuple of:
1746 - run status (one of the IARUN_ constants)
1749 - fail reason (as from utils.RunResult)
1752 alloc_script = utils.FindFile(name, constants.IALLOCATOR_SEARCH_PATH,
1754 if alloc_script is None:
1755 return (constants.IARUN_NOTFOUND, None, None, None)
1757 fd, fin_name = tempfile.mkstemp(prefix="ganeti-iallocator.")
1761 result = utils.RunCmd([alloc_script, fin_name])
1763 return (constants.IARUN_FAILURE, result.stdout, result.stderr,
1768 return (constants.IARUN_SUCCESS, result.stdout, result.stderr, None)
1771 class DevCacheManager(object):
1772 """Simple class for managing a cache of block device information.
1775 _DEV_PREFIX = "/dev/"
1776 _ROOT_DIR = constants.BDEV_CACHE_DIR
1779 def _ConvertPath(cls, dev_path):
1780 """Converts a /dev/name path to the cache file name.
1782 This replaces slashes with underscores and strips the /dev
1783 prefix. It then returns the full path to the cache file
1786 if dev_path.startswith(cls._DEV_PREFIX):
1787 dev_path = dev_path[len(cls._DEV_PREFIX):]
1788 dev_path = dev_path.replace("/", "_")
1789 fpath = "%s/bdev_%s" % (cls._ROOT_DIR, dev_path)
1793 def UpdateCache(cls, dev_path, owner, on_primary, iv_name):
1794 """Updates the cache information for a given device.
1797 if dev_path is None:
1798 logger.Error("DevCacheManager.UpdateCache got a None dev_path")
1800 fpath = cls._ConvertPath(dev_path)
1806 iv_name = "not_visible"
1807 fdata = "%s %s %s\n" % (str(owner), state, iv_name)
1809 utils.WriteFile(fpath, data=fdata)
1810 except EnvironmentError, err:
1811 logger.Error("Can't update bdev cache for %s, error %s" %
1812 (dev_path, str(err)))
1815 def RemoveCache(cls, dev_path):
1816 """Remove data for a dev_path.
1819 if dev_path is None:
1820 logger.Error("DevCacheManager.RemoveCache got a None dev_path")
1822 fpath = cls._ConvertPath(dev_path)
1824 utils.RemoveFile(fpath)
1825 except EnvironmentError, err:
1826 logger.Error("Can't update bdev cache for %s, error %s" %
1827 (dev_path, str(err)))