# Note: these are automake-specific variables, and must be named after
# the directory + 'dir' suffix
clientdir = $(pkgpythondir)/client
+ cmdlibdir = $(pkgpythondir)/cmdlib
hypervisordir = $(pkgpythondir)/hypervisor
+storagedir = $(pkgpythondir)/storage
httpdir = $(pkgpythondir)/http
masterddir = $(pkgpythondir)/masterd
confddir = $(pkgpythondir)/confd
lib/__init__.py \
lib/asyncnotifier.py \
lib/backend.py \
- lib/bdev.py \
lib/bootstrap.py \
lib/cli.py \
- lib/cmdlib.py \
lib/compat.py \
lib/config.py \
lib/constants.py \
$(python_tests) \
$(pkgpython_PYTHON) \
$(client_PYTHON) \
+ $(cmdlib_PYTHON) \
$(hypervisor_PYTHON) \
+ $(storage_PYTHON) \
$(rapi_PYTHON) \
$(server_PYTHON) \
$(pytools_PYTHON) \
creation.
- ``cfgupgrade`` now supports a ``--downgrade`` option to bring the
configuration back to the previous stable version.
+- The cluster option '--no-lvm-storage' was removed in favor of the new option
+ '--enabled-disk-templates'.
- Version 2.7.0 rc1
+ Version 2.7.0 rc2
-----------------
*(unreleased)*
in_chroot -- \
cabal install --global \
- QuickCheck==2.5.1.1 \
- network==2.3 hslogger Crypto text regex-pcre \
- attoparsec vector \
- json==0.4.4 \
- MonadCatchIO-transformers==0.2.2.0 mtl==2.0.1.0 \
- hashable==1.1.2.0 case-insensitive==0.3 parsec==3.0.1 \
- network==2.3 snap-server==0.8.1 \
+ network==2.3 \
+ regex-pcre==0.94.2 \
hinotify==0.3.2 \
+ hslogger==1.1.4 \
+ attoparsec==0.10.1.1\
+ quickcheck==2.5.1.1 \
+ crypto==4.2.4 \
+ monadcatchio-transformers==0.2.2.0 \
+ mtl==2.0.1.0 \
+ hashable==1.1.2.0 \
+ case-insensitive==0.3 \
+ parsec==3.0.1 \
+ network==2.3 \
+ snap-server==0.8.1 \
+ text==0.11.3.0 \
+ vector==0.9.1 \
- json==0.4.4
++ json==0.4.4 \
+ process==1.0.1.2
-
#Python development tools
in_chroot -- \
$APT_INSTALL pandoc python-epydoc graphviz
--- /dev/null
+ #
+ #
+
+ # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 Google Inc.
+ #
+ # This program is free software; you can redistribute it and/or modify
+ # it under the terms of the GNU General Public License as published by
+ # the Free Software Foundation; either version 2 of the License, or
+ # (at your option) any later version.
+ #
+ # This program is distributed in the hope that it will be useful, but
+ # WITHOUT ANY WARRANTY; without even the implied warranty of
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ # General Public License for more details.
+ #
+ # You should have received a copy of the GNU General Public License
+ # along with this program; if not, write to the Free Software
+ # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ # 02110-1301, USA.
+
+
+ """Logical units dealing with the cluster."""
+
+ import OpenSSL
+
+ import copy
+ import itertools
+ import logging
+ import operator
+ import os
+ import re
+ import time
+
+ from ganeti import compat
+ from ganeti import constants
+ from ganeti import errors
+ from ganeti import hypervisor
+ from ganeti import locking
+ from ganeti import masterd
+ from ganeti import netutils
+ from ganeti import objects
+ from ganeti import opcodes
+ from ganeti import pathutils
+ from ganeti import query
+ from ganeti import rpc
+ from ganeti import runtime
+ from ganeti import ssh
+ from ganeti import uidpool
+ from ganeti import utils
+ from ganeti import vcluster
+
+ from ganeti.cmdlib.base import NoHooksLU, QueryBase, LogicalUnit, \
+ ResultWithJobs
+ from ganeti.cmdlib.common import ShareAll, RunPostHook, \
+ ComputeAncillaryFiles, RedistributeAncillaryFiles, UploadHelper, \
+ GetWantedInstances, MergeAndVerifyHvState, MergeAndVerifyDiskState, \
+ GetUpdatedIPolicy, ComputeNewInstanceViolations, GetUpdatedParams, \
+ CheckOSParams, CheckHVParams, AdjustCandidatePool, CheckNodePVs, \
+ ComputeIPolicyInstanceViolation, AnnotateDiskParams, SupportsOob
+
+ import ganeti.masterd.instance
+
+
+ class LUClusterActivateMasterIp(NoHooksLU):
+ """Activate the master IP on the master node.
+
+ """
+ def Exec(self, feedback_fn):
+ """Activate the master IP.
+
+ """
+ master_params = self.cfg.GetMasterNetworkParameters()
+ ems = self.cfg.GetUseExternalMipScript()
+ result = self.rpc.call_node_activate_master_ip(master_params.name,
+ master_params, ems)
+ result.Raise("Could not activate the master IP")
+
+
+ class LUClusterDeactivateMasterIp(NoHooksLU):
+ """Deactivate the master IP on the master node.
+
+ """
+ def Exec(self, feedback_fn):
+ """Deactivate the master IP.
+
+ """
+ master_params = self.cfg.GetMasterNetworkParameters()
+ ems = self.cfg.GetUseExternalMipScript()
+ result = self.rpc.call_node_deactivate_master_ip(master_params.name,
+ master_params, ems)
+ result.Raise("Could not deactivate the master IP")
+
+
+ class LUClusterConfigQuery(NoHooksLU):
+ """Return configuration values.
+
+ """
+ REQ_BGL = False
+
+ def CheckArguments(self):
+ self.cq = ClusterQuery(None, self.op.output_fields, False)
+
+ def ExpandNames(self):
+ self.cq.ExpandNames(self)
+
+ def DeclareLocks(self, level):
+ self.cq.DeclareLocks(self, level)
+
+ def Exec(self, feedback_fn):
+ result = self.cq.OldStyleQuery(self)
+
+ assert len(result) == 1
+
+ return result[0]
+
+
+ class LUClusterDestroy(LogicalUnit):
+ """Logical unit for destroying the cluster.
+
+ """
+ HPATH = "cluster-destroy"
+ HTYPE = constants.HTYPE_CLUSTER
+
+ def BuildHooksEnv(self):
+ """Build hooks env.
+
+ """
+ return {
+ "OP_TARGET": self.cfg.GetClusterName(),
+ }
+
+ def BuildHooksNodes(self):
+ """Build hooks nodes.
+
+ """
+ return ([], [])
+
+ def CheckPrereq(self):
+ """Check prerequisites.
+
+ This checks whether the cluster is empty.
+
+ Any errors are signaled by raising errors.OpPrereqError.
+
+ """
+ master = self.cfg.GetMasterNode()
+
+ nodelist = self.cfg.GetNodeList()
+ if len(nodelist) != 1 or nodelist[0] != master:
+ raise errors.OpPrereqError("There are still %d node(s) in"
+ " this cluster." % (len(nodelist) - 1),
+ errors.ECODE_INVAL)
+ instancelist = self.cfg.GetInstanceList()
+ if instancelist:
+ raise errors.OpPrereqError("There are still %d instance(s) in"
+ " this cluster." % len(instancelist),
+ errors.ECODE_INVAL)
+
+ def Exec(self, feedback_fn):
+ """Destroys the cluster.
+
+ """
+ master_params = self.cfg.GetMasterNetworkParameters()
+
+ # Run post hooks on master node before it's removed
+ RunPostHook(self, master_params.name)
+
+ ems = self.cfg.GetUseExternalMipScript()
+ result = self.rpc.call_node_deactivate_master_ip(master_params.name,
+ master_params, ems)
+ if result.fail_msg:
+ self.LogWarning("Error disabling the master IP address: %s",
+ result.fail_msg)
+
+ return master_params.name
+
+
+ class LUClusterPostInit(LogicalUnit):
+ """Logical unit for running hooks after cluster initialization.
+
+ """
+ HPATH = "cluster-init"
+ HTYPE = constants.HTYPE_CLUSTER
+
+ def BuildHooksEnv(self):
+ """Build hooks env.
+
+ """
+ return {
+ "OP_TARGET": self.cfg.GetClusterName(),
+ }
+
+ def BuildHooksNodes(self):
+ """Build hooks nodes.
+
+ """
+ return ([], [self.cfg.GetMasterNode()])
+
+ def Exec(self, feedback_fn):
+ """Nothing to do.
+
+ """
+ return True
+
+
+ class ClusterQuery(QueryBase):
+ FIELDS = query.CLUSTER_FIELDS
+
+ #: Do not sort (there is only one item)
+ SORT_FIELD = None
+
+ def ExpandNames(self, lu):
+ lu.needed_locks = {}
+
+ # The following variables interact with _QueryBase._GetNames
+ self.wanted = locking.ALL_SET
+ self.do_locking = self.use_locking
+
+ if self.do_locking:
+ raise errors.OpPrereqError("Can not use locking for cluster queries",
+ errors.ECODE_INVAL)
+
+ def DeclareLocks(self, lu, level):
+ pass
+
+ def _GetQueryData(self, lu):
+ """Computes the list of nodes and their attributes.
+
+ """
+ # Locking is not used
+ assert not (compat.any(lu.glm.is_owned(level)
+ for level in locking.LEVELS
+ if level != locking.LEVEL_CLUSTER) or
+ self.do_locking or self.use_locking)
+
+ if query.CQ_CONFIG in self.requested_data:
+ cluster = lu.cfg.GetClusterInfo()
+ else:
+ cluster = NotImplemented
+
+ if query.CQ_QUEUE_DRAINED in self.requested_data:
+ drain_flag = os.path.exists(pathutils.JOB_QUEUE_DRAIN_FILE)
+ else:
+ drain_flag = NotImplemented
+
+ if query.CQ_WATCHER_PAUSE in self.requested_data:
+ master_name = lu.cfg.GetMasterNode()
+
+ result = lu.rpc.call_get_watcher_pause(master_name)
+ result.Raise("Can't retrieve watcher pause from master node '%s'" %
+ master_name)
+
+ watcher_pause = result.payload
+ else:
+ watcher_pause = NotImplemented
+
+ return query.ClusterQueryData(cluster, drain_flag, watcher_pause)
+
+
+ class LUClusterQuery(NoHooksLU):
+ """Query cluster configuration.
+
+ """
+ REQ_BGL = False
+
+ def ExpandNames(self):
+ self.needed_locks = {}
+
+ def Exec(self, feedback_fn):
+ """Return cluster config.
+
+ """
+ cluster = self.cfg.GetClusterInfo()
+ os_hvp = {}
+
+ # Filter just for enabled hypervisors
+ for os_name, hv_dict in cluster.os_hvp.items():
+ os_hvp[os_name] = {}
+ for hv_name, hv_params in hv_dict.items():
+ if hv_name in cluster.enabled_hypervisors:
+ os_hvp[os_name][hv_name] = hv_params
+
+ # Convert ip_family to ip_version
+ primary_ip_version = constants.IP4_VERSION
+ if cluster.primary_ip_family == netutils.IP6Address.family:
+ primary_ip_version = constants.IP6_VERSION
+
+ result = {
+ "software_version": constants.RELEASE_VERSION,
+ "protocol_version": constants.PROTOCOL_VERSION,
+ "config_version": constants.CONFIG_VERSION,
+ "os_api_version": max(constants.OS_API_VERSIONS),
+ "export_version": constants.EXPORT_VERSION,
+ "architecture": runtime.GetArchInfo(),
+ "name": cluster.cluster_name,
+ "master": cluster.master_node,
+ "default_hypervisor": cluster.primary_hypervisor,
+ "enabled_hypervisors": cluster.enabled_hypervisors,
+ "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
+ for hypervisor_name in cluster.enabled_hypervisors]),
+ "os_hvp": os_hvp,
+ "beparams": cluster.beparams,
+ "osparams": cluster.osparams,
+ "ipolicy": cluster.ipolicy,
+ "nicparams": cluster.nicparams,
+ "ndparams": cluster.ndparams,
+ "diskparams": cluster.diskparams,
+ "candidate_pool_size": cluster.candidate_pool_size,
+ "master_netdev": cluster.master_netdev,
+ "master_netmask": cluster.master_netmask,
+ "use_external_mip_script": cluster.use_external_mip_script,
+ "volume_group_name": cluster.volume_group_name,
+ "drbd_usermode_helper": cluster.drbd_usermode_helper,
+ "file_storage_dir": cluster.file_storage_dir,
+ "shared_file_storage_dir": cluster.shared_file_storage_dir,
+ "maintain_node_health": cluster.maintain_node_health,
+ "ctime": cluster.ctime,
+ "mtime": cluster.mtime,
+ "uuid": cluster.uuid,
+ "tags": list(cluster.GetTags()),
+ "uid_pool": cluster.uid_pool,
+ "default_iallocator": cluster.default_iallocator,
+ "reserved_lvs": cluster.reserved_lvs,
+ "primary_ip_version": primary_ip_version,
+ "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
+ "hidden_os": cluster.hidden_os,
+ "blacklisted_os": cluster.blacklisted_os,
+ }
+
+ return result
+
+
+ class LUClusterRedistConf(NoHooksLU):
+ """Force the redistribution of cluster configuration.
+
+ This is a very simple LU.
+
+ """
+ REQ_BGL = False
+
+ def ExpandNames(self):
+ self.needed_locks = {
+ locking.LEVEL_NODE: locking.ALL_SET,
+ locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
+ }
+ self.share_locks = ShareAll()
+
+ def Exec(self, feedback_fn):
+ """Redistribute the configuration.
+
+ """
+ self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
+ RedistributeAncillaryFiles(self)
+
+
+ class LUClusterRename(LogicalUnit):
+ """Rename the cluster.
+
+ """
+ HPATH = "cluster-rename"
+ HTYPE = constants.HTYPE_CLUSTER
+
+ def BuildHooksEnv(self):
+ """Build hooks env.
+
+ """
+ return {
+ "OP_TARGET": self.cfg.GetClusterName(),
+ "NEW_NAME": self.op.name,
+ }
+
+ def BuildHooksNodes(self):
+ """Build hooks nodes.
+
+ """
+ return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
+
+ def CheckPrereq(self):
+ """Verify that the passed name is a valid one.
+
+ """
+ hostname = netutils.GetHostname(name=self.op.name,
+ family=self.cfg.GetPrimaryIPFamily())
+
+ new_name = hostname.name
+ self.ip = new_ip = hostname.ip
+ old_name = self.cfg.GetClusterName()
+ old_ip = self.cfg.GetMasterIP()
+ if new_name == old_name and new_ip == old_ip:
+ raise errors.OpPrereqError("Neither the name nor the IP address of the"
+ " cluster has changed",
+ errors.ECODE_INVAL)
+ if new_ip != old_ip:
+ if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
+ raise errors.OpPrereqError("The given cluster IP address (%s) is"
+ " reachable on the network" %
+ new_ip, errors.ECODE_NOTUNIQUE)
+
+ self.op.name = new_name
+
+ def Exec(self, feedback_fn):
+ """Rename the cluster.
+
+ """
+ clustername = self.op.name
+ new_ip = self.ip
+
+ # shutdown the master IP
+ master_params = self.cfg.GetMasterNetworkParameters()
+ ems = self.cfg.GetUseExternalMipScript()
+ result = self.rpc.call_node_deactivate_master_ip(master_params.name,
+ master_params, ems)
+ result.Raise("Could not disable the master role")
+
+ try:
+ cluster = self.cfg.GetClusterInfo()
+ cluster.cluster_name = clustername
+ cluster.master_ip = new_ip
+ self.cfg.Update(cluster, feedback_fn)
+
+ # update the known hosts file
+ ssh.WriteKnownHostsFile(self.cfg, pathutils.SSH_KNOWN_HOSTS_FILE)
+ node_list = self.cfg.GetOnlineNodeList()
+ try:
+ node_list.remove(master_params.name)
+ except ValueError:
+ pass
+ UploadHelper(self, node_list, pathutils.SSH_KNOWN_HOSTS_FILE)
+ finally:
+ master_params.ip = new_ip
+ result = self.rpc.call_node_activate_master_ip(master_params.name,
+ master_params, ems)
+ msg = result.fail_msg
+ if msg:
+ self.LogWarning("Could not re-enable the master role on"
+ " the master, please restart manually: %s", msg)
+
+ return clustername
+
+
+ class LUClusterRepairDiskSizes(NoHooksLU):
+ """Verifies the cluster disks sizes.
+
+ """
+ REQ_BGL = False
+
+ def ExpandNames(self):
+ if self.op.instances:
+ self.wanted_names = GetWantedInstances(self, self.op.instances)
+ # Not getting the node allocation lock as only a specific set of
+ # instances (and their nodes) is going to be acquired
+ self.needed_locks = {
+ locking.LEVEL_NODE_RES: [],
+ locking.LEVEL_INSTANCE: self.wanted_names,
+ }
+ self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
+ else:
+ self.wanted_names = None
+ self.needed_locks = {
+ locking.LEVEL_NODE_RES: locking.ALL_SET,
+ locking.LEVEL_INSTANCE: locking.ALL_SET,
+
+ # This opcode is acquires the node locks for all instances
+ locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
+ }
+
+ self.share_locks = {
+ locking.LEVEL_NODE_RES: 1,
+ locking.LEVEL_INSTANCE: 0,
+ locking.LEVEL_NODE_ALLOC: 1,
+ }
+
+ def DeclareLocks(self, level):
+ if level == locking.LEVEL_NODE_RES and self.wanted_names is not None:
+ self._LockInstancesNodes(primary_only=True, level=level)
+
+ def CheckPrereq(self):
+ """Check prerequisites.
+
+ This only checks the optional instance list against the existing names.
+
+ """
+ if self.wanted_names is None:
+ self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
+
+ self.wanted_instances = \
+ map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
+
+ def _EnsureChildSizes(self, disk):
+ """Ensure children of the disk have the needed disk size.
+
+ This is valid mainly for DRBD8 and fixes an issue where the
+ children have smaller disk size.
+
+ @param disk: an L{ganeti.objects.Disk} object
+
+ """
+ if disk.dev_type == constants.LD_DRBD8:
+ assert disk.children, "Empty children for DRBD8?"
+ fchild = disk.children[0]
+ mismatch = fchild.size < disk.size
+ if mismatch:
+ self.LogInfo("Child disk has size %d, parent %d, fixing",
+ fchild.size, disk.size)
+ fchild.size = disk.size
+
+ # and we recurse on this child only, not on the metadev
+ return self._EnsureChildSizes(fchild) or mismatch
+ else:
+ return False
+
+ def Exec(self, feedback_fn):
+ """Verify the size of cluster disks.
+
+ """
+ # TODO: check child disks too
+ # TODO: check differences in size between primary/secondary nodes
+ per_node_disks = {}
+ for instance in self.wanted_instances:
+ pnode = instance.primary_node
+ if pnode not in per_node_disks:
+ per_node_disks[pnode] = []
+ for idx, disk in enumerate(instance.disks):
+ per_node_disks[pnode].append((instance, idx, disk))
+
+ assert not (frozenset(per_node_disks.keys()) -
+ self.owned_locks(locking.LEVEL_NODE_RES)), \
+ "Not owning correct locks"
+ assert not self.owned_locks(locking.LEVEL_NODE)
+
+ changed = []
+ for node, dskl in per_node_disks.items():
+ newl = [v[2].Copy() for v in dskl]
+ for dsk in newl:
+ self.cfg.SetDiskID(dsk, node)
+ result = self.rpc.call_blockdev_getsize(node, newl)
+ if result.fail_msg:
+ self.LogWarning("Failure in blockdev_getsize call to node"
+ " %s, ignoring", node)
+ continue
+ if len(result.payload) != len(dskl):
+ logging.warning("Invalid result from node %s: len(dksl)=%d,"
+ " result.payload=%s", node, len(dskl), result.payload)
+ self.LogWarning("Invalid result from node %s, ignoring node results",
+ node)
+ continue
+ for ((instance, idx, disk), size) in zip(dskl, result.payload):
+ if size is None:
+ self.LogWarning("Disk %d of instance %s did not return size"
+ " information, ignoring", idx, instance.name)
+ continue
+ if not isinstance(size, (int, long)):
+ self.LogWarning("Disk %d of instance %s did not return valid"
+ " size information, ignoring", idx, instance.name)
+ continue
+ size = size >> 20
+ if size != disk.size:
+ self.LogInfo("Disk %d of instance %s has mismatched size,"
+ " correcting: recorded %d, actual %d", idx,
+ instance.name, disk.size, size)
+ disk.size = size
+ self.cfg.Update(instance, feedback_fn)
+ changed.append((instance.name, idx, size))
+ if self._EnsureChildSizes(disk):
+ self.cfg.Update(instance, feedback_fn)
+ changed.append((instance.name, idx, disk.size))
+ return changed
+
+
+ def _ValidateNetmask(cfg, netmask):
+ """Checks if a netmask is valid.
+
+ @type cfg: L{config.ConfigWriter}
+ @param cfg: The cluster configuration
+ @type netmask: int
+ @param netmask: the netmask to be verified
+ @raise errors.OpPrereqError: if the validation fails
+
+ """
+ ip_family = cfg.GetPrimaryIPFamily()
+ try:
+ ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
+ except errors.ProgrammerError:
+ raise errors.OpPrereqError("Invalid primary ip family: %s." %
+ ip_family, errors.ECODE_INVAL)
+ if not ipcls.ValidateNetmask(netmask):
+ raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
+ (netmask), errors.ECODE_INVAL)
+
+
+ class LUClusterSetParams(LogicalUnit):
+ """Change the parameters of the cluster.
+
+ """
+ HPATH = "cluster-modify"
+ HTYPE = constants.HTYPE_CLUSTER
+ REQ_BGL = False
+
+ def CheckArguments(self):
+ """Check parameters
+
+ """
+ if self.op.uid_pool:
+ uidpool.CheckUidPool(self.op.uid_pool)
+
+ if self.op.add_uids:
+ uidpool.CheckUidPool(self.op.add_uids)
+
+ if self.op.remove_uids:
+ uidpool.CheckUidPool(self.op.remove_uids)
+
+ if self.op.master_netmask is not None:
+ _ValidateNetmask(self.cfg, self.op.master_netmask)
+
+ if self.op.diskparams:
+ for dt_params in self.op.diskparams.values():
+ utils.ForceDictType(dt_params, constants.DISK_DT_TYPES)
+ try:
+ utils.VerifyDictOptions(self.op.diskparams, constants.DISK_DT_DEFAULTS)
+ except errors.OpPrereqError, err:
+ raise errors.OpPrereqError("While verify diskparams options: %s" % err,
+ errors.ECODE_INVAL)
+
+ def ExpandNames(self):
+ # FIXME: in the future maybe other cluster params won't require checking on
+ # all nodes to be modified.
+ # FIXME: This opcode changes cluster-wide settings. Is acquiring all
+ # resource locks the right thing, shouldn't it be the BGL instead?
+ self.needed_locks = {
+ locking.LEVEL_NODE: locking.ALL_SET,
+ locking.LEVEL_INSTANCE: locking.ALL_SET,
+ locking.LEVEL_NODEGROUP: locking.ALL_SET,
+ locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
+ }
+ self.share_locks = ShareAll()
+
+ def BuildHooksEnv(self):
+ """Build hooks env.
+
+ """
+ return {
+ "OP_TARGET": self.cfg.GetClusterName(),
+ "NEW_VG_NAME": self.op.vg_name,
+ }
+
+ def BuildHooksNodes(self):
+ """Build hooks nodes.
+
+ """
+ mn = self.cfg.GetMasterNode()
+ return ([mn], [mn])
+
- def CheckPrereq(self):
- """Check prerequisites.
-
- This checks whether the given params don't conflict and
- if the given volume group is valid.
++ def _CheckVgName(self, node_list, enabled_disk_templates,
++ new_enabled_disk_templates):
++ """Check the consistency of the vg name on all nodes and in case it gets
++ unset whether there are instances still using it.
+
+ """
+ if self.op.vg_name is not None and not self.op.vg_name:
+ if self.cfg.HasAnyDiskOfType(constants.LD_LV):
+ raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
+ " instances exist", errors.ECODE_INVAL)
+
++ if (self.op.vg_name is not None and
++ utils.IsLvmEnabled(enabled_disk_templates)) or \
++ (self.cfg.GetVGName() is not None and
++ utils.LvmGetsEnabled(enabled_disk_templates,
++ new_enabled_disk_templates)):
++ self._CheckVgNameOnNodes(node_list)
++
++ def _CheckVgNameOnNodes(self, node_list):
++ """Check the status of the volume group on each node.
++
++ """
++ vglist = self.rpc.call_vg_list(node_list)
++ for node in node_list:
++ msg = vglist[node].fail_msg
++ if msg:
++ # ignoring down node
++ self.LogWarning("Error while gathering data on node %s"
++ " (ignoring node): %s", node, msg)
++ continue
++ vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
++ self.op.vg_name,
++ constants.MIN_VG_SIZE)
++ if vgstatus:
++ raise errors.OpPrereqError("Error on node '%s': %s" %
++ (node, vgstatus), errors.ECODE_ENVIRON)
++
++ def _GetEnabledDiskTemplates(self, cluster):
++ """Determines the enabled disk templates and the subset of disk templates
++ that are newly enabled by this operation.
++
++ """
++ enabled_disk_templates = None
++ new_enabled_disk_templates = []
++ if self.op.enabled_disk_templates:
++ enabled_disk_templates = self.op.enabled_disk_templates
++ new_enabled_disk_templates = \
++ list(set(enabled_disk_templates)
++ - set(cluster.enabled_disk_templates))
++ else:
++ enabled_disk_templates = cluster.enabled_disk_templates
++ return (enabled_disk_templates, new_enabled_disk_templates)
++
++ def CheckPrereq(self):
++ """Check prerequisites.
++
++ This checks whether the given params don't conflict and
++ if the given volume group is valid.
++
++ """
+ if self.op.drbd_helper is not None and not self.op.drbd_helper:
+ if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
+ raise errors.OpPrereqError("Cannot disable drbd helper while"
+ " drbd-based instances exist",
+ errors.ECODE_INVAL)
+
+ node_list = self.owned_locks(locking.LEVEL_NODE)
++ self.cluster = cluster = self.cfg.GetClusterInfo()
+
+ vm_capable_nodes = [node.name
+ for node in self.cfg.GetAllNodesInfo().values()
+ if node.name in node_list and node.vm_capable]
+
- # if vg_name not None, checks given volume group on all nodes
- if self.op.vg_name:
- vglist = self.rpc.call_vg_list(vm_capable_nodes)
- for node in vm_capable_nodes:
- msg = vglist[node].fail_msg
- if msg:
- # ignoring down node
- self.LogWarning("Error while gathering data on node %s"
- " (ignoring node): %s", node, msg)
- continue
- vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
- self.op.vg_name,
- constants.MIN_VG_SIZE)
- if vgstatus:
- raise errors.OpPrereqError("Error on node '%s': %s" %
- (node, vgstatus), errors.ECODE_ENVIRON)
++ (enabled_disk_templates, new_enabled_disk_templates) = \
++ self._GetEnabledDiskTemplates(cluster)
++
++ self._CheckVgName(vm_capable_nodes, enabled_disk_templates,
++ new_enabled_disk_templates)
+
+ if self.op.drbd_helper:
+ # checks given drbd helper on all nodes
+ helpers = self.rpc.call_drbd_helper(node_list)
+ for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
+ if ninfo.offline:
+ self.LogInfo("Not checking drbd helper on offline node %s", node)
+ continue
+ msg = helpers[node].fail_msg
+ if msg:
+ raise errors.OpPrereqError("Error checking drbd helper on node"
+ " '%s': %s" % (node, msg),
+ errors.ECODE_ENVIRON)
+ node_helper = helpers[node].payload
+ if node_helper != self.op.drbd_helper:
+ raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
+ (node, node_helper), errors.ECODE_ENVIRON)
+
- self.cluster = cluster = self.cfg.GetClusterInfo()
+ # validate params changes
+ if self.op.beparams:
+ objects.UpgradeBeParams(self.op.beparams)
+ utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
+ self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
+
+ if self.op.ndparams:
+ utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
+ self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
+
+ # TODO: we need a more general way to handle resetting
+ # cluster-level parameters to default values
+ if self.new_ndparams["oob_program"] == "":
+ self.new_ndparams["oob_program"] = \
+ constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
+
+ if self.op.hv_state:
+ new_hv_state = MergeAndVerifyHvState(self.op.hv_state,
+ self.cluster.hv_state_static)
+ self.new_hv_state = dict((hv, cluster.SimpleFillHvState(values))
+ for hv, values in new_hv_state.items())
+
+ if self.op.disk_state:
+ new_disk_state = MergeAndVerifyDiskState(self.op.disk_state,
+ self.cluster.disk_state_static)
+ self.new_disk_state = \
+ dict((storage, dict((name, cluster.SimpleFillDiskState(values))
+ for name, values in svalues.items()))
+ for storage, svalues in new_disk_state.items())
+
+ if self.op.ipolicy:
+ self.new_ipolicy = GetUpdatedIPolicy(cluster.ipolicy, self.op.ipolicy,
+ group_policy=False)
+
+ all_instances = self.cfg.GetAllInstancesInfo().values()
+ violations = set()
+ for group in self.cfg.GetAllNodeGroupsInfo().values():
+ instances = frozenset([inst for inst in all_instances
+ if compat.any(node in group.members
+ for node in inst.all_nodes)])
+ new_ipolicy = objects.FillIPolicy(self.new_ipolicy, group.ipolicy)
+ ipol = masterd.instance.CalculateGroupIPolicy(cluster, group)
+ new = ComputeNewInstanceViolations(ipol,
+ new_ipolicy, instances, self.cfg)
+ if new:
+ violations.update(new)
+
+ if violations:
+ self.LogWarning("After the ipolicy change the following instances"
+ " violate them: %s",
+ utils.CommaJoin(utils.NiceSort(violations)))
+
+ if self.op.nicparams:
+ utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
+ self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
+ objects.NIC.CheckParameterSyntax(self.new_nicparams)
+ nic_errors = []
+
+ # check all instances for consistency
+ for instance in self.cfg.GetAllInstancesInfo().values():
+ for nic_idx, nic in enumerate(instance.nics):
+ params_copy = copy.deepcopy(nic.nicparams)
+ params_filled = objects.FillDict(self.new_nicparams, params_copy)
+
+ # check parameter syntax
+ try:
+ objects.NIC.CheckParameterSyntax(params_filled)
+ except errors.ConfigurationError, err:
+ nic_errors.append("Instance %s, nic/%d: %s" %
+ (instance.name, nic_idx, err))
+
+ # if we're moving instances to routed, check that they have an ip
+ target_mode = params_filled[constants.NIC_MODE]
+ if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
+ nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
+ " address" % (instance.name, nic_idx))
+ if nic_errors:
+ raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
+ "\n".join(nic_errors), errors.ECODE_INVAL)
+
+ # hypervisor list/parameters
+ self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
+ if self.op.hvparams:
+ for hv_name, hv_dict in self.op.hvparams.items():
+ if hv_name not in self.new_hvparams:
+ self.new_hvparams[hv_name] = hv_dict
+ else:
+ self.new_hvparams[hv_name].update(hv_dict)
+
+ # disk template parameters
+ self.new_diskparams = objects.FillDict(cluster.diskparams, {})
+ if self.op.diskparams:
+ for dt_name, dt_params in self.op.diskparams.items():
+ if dt_name not in self.op.diskparams:
+ self.new_diskparams[dt_name] = dt_params
+ else:
+ self.new_diskparams[dt_name].update(dt_params)
+
+ # os hypervisor parameters
+ self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
+ if self.op.os_hvp:
+ for os_name, hvs in self.op.os_hvp.items():
+ if os_name not in self.new_os_hvp:
+ self.new_os_hvp[os_name] = hvs
+ else:
+ for hv_name, hv_dict in hvs.items():
+ if hv_dict is None:
+ # Delete if it exists
+ self.new_os_hvp[os_name].pop(hv_name, None)
+ elif hv_name not in self.new_os_hvp[os_name]:
+ self.new_os_hvp[os_name][hv_name] = hv_dict
+ else:
+ self.new_os_hvp[os_name][hv_name].update(hv_dict)
+
+ # os parameters
+ self.new_osp = objects.FillDict(cluster.osparams, {})
+ if self.op.osparams:
+ for os_name, osp in self.op.osparams.items():
+ if os_name not in self.new_osp:
+ self.new_osp[os_name] = {}
+
+ self.new_osp[os_name] = GetUpdatedParams(self.new_osp[os_name], osp,
+ use_none=True)
+
+ if not self.new_osp[os_name]:
+ # we removed all parameters
+ del self.new_osp[os_name]
+ else:
+ # check the parameter validity (remote check)
+ CheckOSParams(self, False, [self.cfg.GetMasterNode()],
+ os_name, self.new_osp[os_name])
+
+ # changes to the hypervisor list
+ if self.op.enabled_hypervisors is not None:
+ self.hv_list = self.op.enabled_hypervisors
+ for hv in self.hv_list:
+ # if the hypervisor doesn't already exist in the cluster
+ # hvparams, we initialize it to empty, and then (in both
+ # cases) we make sure to fill the defaults, as we might not
+ # have a complete defaults list if the hypervisor wasn't
+ # enabled before
+ if hv not in new_hvp:
+ new_hvp[hv] = {}
+ new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
+ utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
+ else:
+ self.hv_list = cluster.enabled_hypervisors
+
+ if self.op.hvparams or self.op.enabled_hypervisors is not None:
+ # either the enabled list has changed, or the parameters have, validate
+ for hv_name, hv_params in self.new_hvparams.items():
+ if ((self.op.hvparams and hv_name in self.op.hvparams) or
+ (self.op.enabled_hypervisors and
+ hv_name in self.op.enabled_hypervisors)):
+ # either this is a new hypervisor, or its parameters have changed
+ hv_class = hypervisor.GetHypervisorClass(hv_name)
+ utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
+ hv_class.CheckParameterSyntax(hv_params)
+ CheckHVParams(self, node_list, hv_name, hv_params)
+
+ self._CheckDiskTemplateConsistency()
+
+ if self.op.os_hvp:
+ # no need to check any newly-enabled hypervisors, since the
+ # defaults have already been checked in the above code-block
+ for os_name, os_hvp in self.new_os_hvp.items():
+ for hv_name, hv_params in os_hvp.items():
+ utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
+ # we need to fill in the new os_hvp on top of the actual hv_p
+ cluster_defaults = self.new_hvparams.get(hv_name, {})
+ new_osp = objects.FillDict(cluster_defaults, hv_params)
+ hv_class = hypervisor.GetHypervisorClass(hv_name)
+ hv_class.CheckParameterSyntax(new_osp)
+ CheckHVParams(self, node_list, hv_name, new_osp)
+
+ if self.op.default_iallocator:
+ alloc_script = utils.FindFile(self.op.default_iallocator,
+ constants.IALLOCATOR_SEARCH_PATH,
+ os.path.isfile)
+ if alloc_script is None:
+ raise errors.OpPrereqError("Invalid default iallocator script '%s'"
+ " specified" % self.op.default_iallocator,
+ errors.ECODE_INVAL)
+
+ def _CheckDiskTemplateConsistency(self):
+ """Check whether the disk templates that are going to be disabled
+ are still in use by some instances.
+
+ """
+ if self.op.enabled_disk_templates:
+ cluster = self.cfg.GetClusterInfo()
+ instances = self.cfg.GetAllInstancesInfo()
+
+ disk_templates_to_remove = set(cluster.enabled_disk_templates) \
+ - set(self.op.enabled_disk_templates)
+ for instance in instances.itervalues():
+ if instance.disk_template in disk_templates_to_remove:
+ raise errors.OpPrereqError("Cannot disable disk template '%s',"
+ " because instance '%s' is using it." %
+ (instance.disk_template, instance.name))
+
- def Exec(self, feedback_fn):
- """Change the parameters of the cluster.
++ def _SetVgName(self, feedback_fn):
++ """Determines and sets the new volume group name.
+
+ """
+ if self.op.vg_name is not None:
++ if self.op.vg_name and not \
++ utils.IsLvmEnabled(self.cluster.enabled_disk_templates):
++ feedback_fn("Note that you specified a volume group, but did not"
++ " enable any lvm disk template.")
+ new_volume = self.op.vg_name
+ if not new_volume:
++ if utils.IsLvmEnabled(self.cluster.enabled_disk_templates):
++ raise errors.OpPrereqError("Cannot unset volume group if lvm-based"
++ " disk templates are enabled.")
+ new_volume = None
+ if new_volume != self.cfg.GetVGName():
+ self.cfg.SetVGName(new_volume)
+ else:
+ feedback_fn("Cluster LVM configuration already in desired"
+ " state, not changing")
++ else:
++ if utils.IsLvmEnabled(self.cluster.enabled_disk_templates) and \
++ not self.cfg.GetVGName():
++ raise errors.OpPrereqError("Please specify a volume group when"
++ " enabling lvm-based disk-templates.")
++
++ def Exec(self, feedback_fn):
++ """Change the parameters of the cluster.
++
++ """
++ if self.op.enabled_disk_templates:
++ self.cluster.enabled_disk_templates = \
++ list(set(self.op.enabled_disk_templates))
++
++ self._SetVgName(feedback_fn)
++
+ if self.op.drbd_helper is not None:
++ if not constants.DT_DRBD8 in self.cluster.enabled_disk_templates:
++ feedback_fn("Note that you specified a drbd user helper, but did"
++ " enabled the drbd disk template.")
+ new_helper = self.op.drbd_helper
+ if not new_helper:
+ new_helper = None
+ if new_helper != self.cfg.GetDRBDHelper():
+ self.cfg.SetDRBDHelper(new_helper)
+ else:
+ feedback_fn("Cluster DRBD helper already in desired state,"
+ " not changing")
+ if self.op.hvparams:
+ self.cluster.hvparams = self.new_hvparams
+ if self.op.os_hvp:
+ self.cluster.os_hvp = self.new_os_hvp
+ if self.op.enabled_hypervisors is not None:
+ self.cluster.hvparams = self.new_hvparams
+ self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
- if self.op.enabled_disk_templates:
- self.cluster.enabled_disk_templates = \
- list(set(self.op.enabled_disk_templates))
+ if self.op.beparams:
+ self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
+ if self.op.nicparams:
+ self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
+ if self.op.ipolicy:
+ self.cluster.ipolicy = self.new_ipolicy
+ if self.op.osparams:
+ self.cluster.osparams = self.new_osp
+ if self.op.ndparams:
+ self.cluster.ndparams = self.new_ndparams
+ if self.op.diskparams:
+ self.cluster.diskparams = self.new_diskparams
+ if self.op.hv_state:
+ self.cluster.hv_state_static = self.new_hv_state
+ if self.op.disk_state:
+ self.cluster.disk_state_static = self.new_disk_state
+
+ if self.op.candidate_pool_size is not None:
+ self.cluster.candidate_pool_size = self.op.candidate_pool_size
+ # we need to update the pool size here, otherwise the save will fail
+ AdjustCandidatePool(self, [])
+
+ if self.op.maintain_node_health is not None:
+ if self.op.maintain_node_health and not constants.ENABLE_CONFD:
+ feedback_fn("Note: CONFD was disabled at build time, node health"
+ " maintenance is not useful (still enabling it)")
+ self.cluster.maintain_node_health = self.op.maintain_node_health
+
+ if self.op.prealloc_wipe_disks is not None:
+ self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
+
+ if self.op.add_uids is not None:
+ uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
+
+ if self.op.remove_uids is not None:
+ uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
+
+ if self.op.uid_pool is not None:
+ self.cluster.uid_pool = self.op.uid_pool
+
+ if self.op.default_iallocator is not None:
+ self.cluster.default_iallocator = self.op.default_iallocator
+
+ if self.op.reserved_lvs is not None:
+ self.cluster.reserved_lvs = self.op.reserved_lvs
+
+ if self.op.use_external_mip_script is not None:
+ self.cluster.use_external_mip_script = self.op.use_external_mip_script
+
+ def helper_os(aname, mods, desc):
+ desc += " OS list"
+ lst = getattr(self.cluster, aname)
+ for key, val in mods:
+ if key == constants.DDM_ADD:
+ if val in lst:
+ feedback_fn("OS %s already in %s, ignoring" % (val, desc))
+ else:
+ lst.append(val)
+ elif key == constants.DDM_REMOVE:
+ if val in lst:
+ lst.remove(val)
+ else:
+ feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
+ else:
+ raise errors.ProgrammerError("Invalid modification '%s'" % key)
+
+ if self.op.hidden_os:
+ helper_os("hidden_os", self.op.hidden_os, "hidden")
+
+ if self.op.blacklisted_os:
+ helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
+
+ if self.op.master_netdev:
+ master_params = self.cfg.GetMasterNetworkParameters()
+ ems = self.cfg.GetUseExternalMipScript()
+ feedback_fn("Shutting down master ip on the current netdev (%s)" %
+ self.cluster.master_netdev)
+ result = self.rpc.call_node_deactivate_master_ip(master_params.name,
+ master_params, ems)
+ result.Raise("Could not disable the master ip")
+ feedback_fn("Changing master_netdev from %s to %s" %
+ (master_params.netdev, self.op.master_netdev))
+ self.cluster.master_netdev = self.op.master_netdev
+
+ if self.op.master_netmask:
+ master_params = self.cfg.GetMasterNetworkParameters()
+ feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
+ result = self.rpc.call_node_change_master_netmask(master_params.name,
+ master_params.netmask,
+ self.op.master_netmask,
+ master_params.ip,
+ master_params.netdev)
+ if result.fail_msg:
+ msg = "Could not change the master IP netmask: %s" % result.fail_msg
+ feedback_fn(msg)
+
+ self.cluster.master_netmask = self.op.master_netmask
+
+ self.cfg.Update(self.cluster, feedback_fn)
+
+ if self.op.master_netdev:
+ master_params = self.cfg.GetMasterNetworkParameters()
+ feedback_fn("Starting the master ip on the new master netdev (%s)" %
+ self.op.master_netdev)
+ ems = self.cfg.GetUseExternalMipScript()
+ result = self.rpc.call_node_activate_master_ip(master_params.name,
+ master_params, ems)
+ if result.fail_msg:
+ self.LogWarning("Could not re-enable the master ip on"
+ " the master, please restart manually: %s",
+ result.fail_msg)
+
+
+ class LUClusterVerify(NoHooksLU):
+ """Submits all jobs necessary to verify the cluster.
+
+ """
+ REQ_BGL = False
+
+ def ExpandNames(self):
+ self.needed_locks = {}
+
+ def Exec(self, feedback_fn):
+ jobs = []
+
+ if self.op.group_name:
+ groups = [self.op.group_name]
+ depends_fn = lambda: None
+ else:
+ groups = self.cfg.GetNodeGroupList()
+
+ # Verify global configuration
+ jobs.append([
+ opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors),
+ ])
+
+ # Always depend on global verification
+ depends_fn = lambda: [(-len(jobs), [])]
+
+ jobs.extend(
+ [opcodes.OpClusterVerifyGroup(group_name=group,
+ ignore_errors=self.op.ignore_errors,
+ depends=depends_fn())]
+ for group in groups)
+
+ # Fix up all parameters
+ for op in itertools.chain(*jobs): # pylint: disable=W0142
+ op.debug_simulate_errors = self.op.debug_simulate_errors
+ op.verbose = self.op.verbose
+ op.error_codes = self.op.error_codes
+ try:
+ op.skip_checks = self.op.skip_checks
+ except AttributeError:
+ assert not isinstance(op, opcodes.OpClusterVerifyGroup)
+
+ return ResultWithJobs(jobs)
+
+
+ class _VerifyErrors(object):
+ """Mix-in for cluster/group verify LUs.
+
+ It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
+ self.op and self._feedback_fn to be available.)
+
+ """
+
+ ETYPE_FIELD = "code"
+ ETYPE_ERROR = "ERROR"
+ ETYPE_WARNING = "WARNING"
+
+ def _Error(self, ecode, item, msg, *args, **kwargs):
+ """Format an error message.
+
+ Based on the opcode's error_codes parameter, either format a
+ parseable error code, or a simpler error string.
+
+ This must be called only from Exec and functions called from Exec.
+
+ """
+ ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
+ itype, etxt, _ = ecode
+ # If the error code is in the list of ignored errors, demote the error to a
+ # warning
+ if etxt in self.op.ignore_errors: # pylint: disable=E1101
+ ltype = self.ETYPE_WARNING
+ # first complete the msg
+ if args:
+ msg = msg % args
+ # then format the whole message
+ if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
+ msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
+ else:
+ if item:
+ item = " " + item
+ else:
+ item = ""
+ msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
+ # and finally report it via the feedback_fn
+ self._feedback_fn(" - %s" % msg) # Mix-in. pylint: disable=E1101
+ # do not mark the operation as failed for WARN cases only
+ if ltype == self.ETYPE_ERROR:
+ self.bad = True
+
+ def _ErrorIf(self, cond, *args, **kwargs):
+ """Log an error message if the passed condition is True.
+
+ """
+ if (bool(cond)
+ or self.op.debug_simulate_errors): # pylint: disable=E1101
+ self._Error(*args, **kwargs)
+
+
+ def _VerifyCertificate(filename):
+ """Verifies a certificate for L{LUClusterVerifyConfig}.
+
+ @type filename: string
+ @param filename: Path to PEM file
+
+ """
+ try:
+ cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
+ utils.ReadFile(filename))
+ except Exception, err: # pylint: disable=W0703
+ return (LUClusterVerifyConfig.ETYPE_ERROR,
+ "Failed to load X509 certificate %s: %s" % (filename, err))
+
+ (errcode, msg) = \
+ utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
+ constants.SSL_CERT_EXPIRATION_ERROR)
+
+ if msg:
+ fnamemsg = "While verifying %s: %s" % (filename, msg)
+ else:
+ fnamemsg = None
+
+ if errcode is None:
+ return (None, fnamemsg)
+ elif errcode == utils.CERT_WARNING:
+ return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
+ elif errcode == utils.CERT_ERROR:
+ return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
+
+ raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
+
+
+ def _GetAllHypervisorParameters(cluster, instances):
+ """Compute the set of all hypervisor parameters.
+
+ @type cluster: L{objects.Cluster}
+ @param cluster: the cluster object
+ @param instances: list of L{objects.Instance}
+ @param instances: additional instances from which to obtain parameters
+ @rtype: list of (origin, hypervisor, parameters)
+ @return: a list with all parameters found, indicating the hypervisor they
+ apply to, and the origin (can be "cluster", "os X", or "instance Y")
+
+ """
+ hvp_data = []
+
+ for hv_name in cluster.enabled_hypervisors:
+ hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
+
+ for os_name, os_hvp in cluster.os_hvp.items():
+ for hv_name, hv_params in os_hvp.items():
+ if hv_params:
+ full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
+ hvp_data.append(("os %s" % os_name, hv_name, full_params))
+
+ # TODO: collapse identical parameter values in a single one
+ for instance in instances:
+ if instance.hvparams:
+ hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
+ cluster.FillHV(instance)))
+
+ return hvp_data
+
+
+ class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
+ """Verifies the cluster config.
+
+ """
+ REQ_BGL = False
+
+ def _VerifyHVP(self, hvp_data):
+ """Verifies locally the syntax of the hypervisor parameters.
+
+ """
+ for item, hv_name, hv_params in hvp_data:
+ msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
+ (item, hv_name))
+ try:
+ hv_class = hypervisor.GetHypervisorClass(hv_name)
+ utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
+ hv_class.CheckParameterSyntax(hv_params)
+ except errors.GenericError, err:
+ self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
+
+ def ExpandNames(self):
+ self.needed_locks = dict.fromkeys(locking.LEVELS, locking.ALL_SET)
+ self.share_locks = ShareAll()
+
+ def CheckPrereq(self):
+ """Check prerequisites.
+
+ """
+ # Retrieve all information
+ self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
+ self.all_node_info = self.cfg.GetAllNodesInfo()
+ self.all_inst_info = self.cfg.GetAllInstancesInfo()
+
+ def Exec(self, feedback_fn):
+ """Verify integrity of cluster, performing various test on nodes.
+
+ """
+ self.bad = False
+ self._feedback_fn = feedback_fn
+
+ feedback_fn("* Verifying cluster config")
+
+ for msg in self.cfg.VerifyConfig():
+ self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
+
+ feedback_fn("* Verifying cluster certificate files")
+
+ for cert_filename in pathutils.ALL_CERT_FILES:
+ (errcode, msg) = _VerifyCertificate(cert_filename)
+ self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
+
+ feedback_fn("* Verifying hypervisor parameters")
+
+ self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
+ self.all_inst_info.values()))
+
+ feedback_fn("* Verifying all nodes belong to an existing group")
+
+ # We do this verification here because, should this bogus circumstance
+ # occur, it would never be caught by VerifyGroup, which only acts on
+ # nodes/instances reachable from existing node groups.
+
+ dangling_nodes = set(node.name for node in self.all_node_info.values()
+ if node.group not in self.all_group_info)
+
+ dangling_instances = {}
+ no_node_instances = []
+
+ for inst in self.all_inst_info.values():
+ if inst.primary_node in dangling_nodes:
+ dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
+ elif inst.primary_node not in self.all_node_info:
+ no_node_instances.append(inst.name)
+
+ pretty_dangling = [
+ "%s (%s)" %
+ (node.name,
+ utils.CommaJoin(dangling_instances.get(node.name,
+ ["no instances"])))
+ for node in dangling_nodes]
+
+ self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
+ None,
+ "the following nodes (and their instances) belong to a non"
+ " existing group: %s", utils.CommaJoin(pretty_dangling))
+
+ self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
+ None,
+ "the following instances have a non-existing primary-node:"
+ " %s", utils.CommaJoin(no_node_instances))
+
+ return not self.bad
+
+
+ class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
+ """Verifies the status of a node group.
+
+ """
+ HPATH = "cluster-verify"
+ HTYPE = constants.HTYPE_CLUSTER
+ REQ_BGL = False
+
+ _HOOKS_INDENT_RE = re.compile("^", re.M)
+
+ class NodeImage(object):
+ """A class representing the logical and physical status of a node.
+
+ @type name: string
+ @ivar name: the node name to which this object refers
+ @ivar volumes: a structure as returned from
+ L{ganeti.backend.GetVolumeList} (runtime)
+ @ivar instances: a list of running instances (runtime)
+ @ivar pinst: list of configured primary instances (config)
+ @ivar sinst: list of configured secondary instances (config)
+ @ivar sbp: dictionary of {primary-node: list of instances} for all
+ instances for which this node is secondary (config)
+ @ivar mfree: free memory, as reported by hypervisor (runtime)
+ @ivar dfree: free disk, as reported by the node (runtime)
+ @ivar offline: the offline status (config)
+ @type rpc_fail: boolean
+ @ivar rpc_fail: whether the RPC verify call was successfull (overall,
+ not whether the individual keys were correct) (runtime)
+ @type lvm_fail: boolean
+ @ivar lvm_fail: whether the RPC call didn't return valid LVM data
+ @type hyp_fail: boolean
+ @ivar hyp_fail: whether the RPC call didn't return the instance list
+ @type ghost: boolean
+ @ivar ghost: whether this is a known node or not (config)
+ @type os_fail: boolean
+ @ivar os_fail: whether the RPC call didn't return valid OS data
+ @type oslist: list
+ @ivar oslist: list of OSes as diagnosed by DiagnoseOS
+ @type vm_capable: boolean
+ @ivar vm_capable: whether the node can host instances
+ @type pv_min: float
+ @ivar pv_min: size in MiB of the smallest PVs
+ @type pv_max: float
+ @ivar pv_max: size in MiB of the biggest PVs
+
+ """
+ def __init__(self, offline=False, name=None, vm_capable=True):
+ self.name = name
+ self.volumes = {}
+ self.instances = []
+ self.pinst = []
+ self.sinst = []
+ self.sbp = {}
+ self.mfree = 0
+ self.dfree = 0
+ self.offline = offline
+ self.vm_capable = vm_capable
+ self.rpc_fail = False
+ self.lvm_fail = False
+ self.hyp_fail = False
+ self.ghost = False
+ self.os_fail = False
+ self.oslist = {}
+ self.pv_min = None
+ self.pv_max = None
+
+ def ExpandNames(self):
+ # This raises errors.OpPrereqError on its own:
+ self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
+
+ # Get instances in node group; this is unsafe and needs verification later
+ inst_names = \
+ self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
+
+ self.needed_locks = {
+ locking.LEVEL_INSTANCE: inst_names,
+ locking.LEVEL_NODEGROUP: [self.group_uuid],
+ locking.LEVEL_NODE: [],
+
+ # This opcode is run by watcher every five minutes and acquires all nodes
+ # for a group. It doesn't run for a long time, so it's better to acquire
+ # the node allocation lock as well.
+ locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
+ }
+
+ self.share_locks = ShareAll()
+
+ def DeclareLocks(self, level):
+ if level == locking.LEVEL_NODE:
+ # Get members of node group; this is unsafe and needs verification later
+ nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
+
+ all_inst_info = self.cfg.GetAllInstancesInfo()
+
+ # In Exec(), we warn about mirrored instances that have primary and
+ # secondary living in separate node groups. To fully verify that
+ # volumes for these instances are healthy, we will need to do an
+ # extra call to their secondaries. We ensure here those nodes will
+ # be locked.
+ for inst in self.owned_locks(locking.LEVEL_INSTANCE):
+ # Important: access only the instances whose lock is owned
+ if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
+ nodes.update(all_inst_info[inst].secondary_nodes)
+
+ self.needed_locks[locking.LEVEL_NODE] = nodes
+
+ def CheckPrereq(self):
+ assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
+ self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
+
+ group_nodes = set(self.group_info.members)
+ group_instances = \
+ self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
+
+ unlocked_nodes = \
+ group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
+
+ unlocked_instances = \
+ group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
+
+ if unlocked_nodes:
+ raise errors.OpPrereqError("Missing lock for nodes: %s" %
+ utils.CommaJoin(unlocked_nodes),
+ errors.ECODE_STATE)
+
+ if unlocked_instances:
+ raise errors.OpPrereqError("Missing lock for instances: %s" %
+ utils.CommaJoin(unlocked_instances),
+ errors.ECODE_STATE)
+
+ self.all_node_info = self.cfg.GetAllNodesInfo()
+ self.all_inst_info = self.cfg.GetAllInstancesInfo()
+
+ self.my_node_names = utils.NiceSort(group_nodes)
+ self.my_inst_names = utils.NiceSort(group_instances)
+
+ self.my_node_info = dict((name, self.all_node_info[name])
+ for name in self.my_node_names)
+
+ self.my_inst_info = dict((name, self.all_inst_info[name])
+ for name in self.my_inst_names)
+
+ # We detect here the nodes that will need the extra RPC calls for verifying
+ # split LV volumes; they should be locked.
+ extra_lv_nodes = set()
+
+ for inst in self.my_inst_info.values():
+ if inst.disk_template in constants.DTS_INT_MIRROR:
+ for nname in inst.all_nodes:
+ if self.all_node_info[nname].group != self.group_uuid:
+ extra_lv_nodes.add(nname)
+
+ unlocked_lv_nodes = \
+ extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
+
+ if unlocked_lv_nodes:
+ raise errors.OpPrereqError("Missing node locks for LV check: %s" %
+ utils.CommaJoin(unlocked_lv_nodes),
+ errors.ECODE_STATE)
+ self.extra_lv_nodes = list(extra_lv_nodes)
+
+ def _VerifyNode(self, ninfo, nresult):
+ """Perform some basic validation on data returned from a node.
+
+ - check the result data structure is well formed and has all the
+ mandatory fields
+ - check ganeti version
+
+ @type ninfo: L{objects.Node}
+ @param ninfo: the node to check
+ @param nresult: the results from the node
+ @rtype: boolean
+ @return: whether overall this call was successful (and we can expect
+ reasonable values in the respose)
+
+ """
+ node = ninfo.name
+ _ErrorIf = self._ErrorIf # pylint: disable=C0103
+
+ # main result, nresult should be a non-empty dict
+ test = not nresult or not isinstance(nresult, dict)
+ _ErrorIf(test, constants.CV_ENODERPC, node,
+ "unable to verify node: no data returned")
+ if test:
+ return False
+
+ # compares ganeti version
+ local_version = constants.PROTOCOL_VERSION
+ remote_version = nresult.get("version", None)
+ test = not (remote_version and
+ isinstance(remote_version, (list, tuple)) and
+ len(remote_version) == 2)
+ _ErrorIf(test, constants.CV_ENODERPC, node,
+ "connection to node returned invalid data")
+ if test:
+ return False
+
+ test = local_version != remote_version[0]
+ _ErrorIf(test, constants.CV_ENODEVERSION, node,
+ "incompatible protocol versions: master %s,"
+ " node %s", local_version, remote_version[0])
+ if test:
+ return False
+
+ # node seems compatible, we can actually try to look into its results
+
+ # full package version
+ self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
+ constants.CV_ENODEVERSION, node,
+ "software version mismatch: master %s, node %s",
+ constants.RELEASE_VERSION, remote_version[1],
+ code=self.ETYPE_WARNING)
+
+ hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
+ if ninfo.vm_capable and isinstance(hyp_result, dict):
+ for hv_name, hv_result in hyp_result.iteritems():
+ test = hv_result is not None
+ _ErrorIf(test, constants.CV_ENODEHV, node,
+ "hypervisor %s verify failure: '%s'", hv_name, hv_result)
+
+ hvp_result = nresult.get(constants.NV_HVPARAMS, None)
+ if ninfo.vm_capable and isinstance(hvp_result, list):
+ for item, hv_name, hv_result in hvp_result:
+ _ErrorIf(True, constants.CV_ENODEHV, node,
+ "hypervisor %s parameter verify failure (source %s): %s",
+ hv_name, item, hv_result)
+
+ test = nresult.get(constants.NV_NODESETUP,
+ ["Missing NODESETUP results"])
+ _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
+ "; ".join(test))
+
+ return True
+
+ def _VerifyNodeTime(self, ninfo, nresult,
+ nvinfo_starttime, nvinfo_endtime):
+ """Check the node time.
+
+ @type ninfo: L{objects.Node}
+ @param ninfo: the node to check
+ @param nresult: the remote results for the node
+ @param nvinfo_starttime: the start time of the RPC call
+ @param nvinfo_endtime: the end time of the RPC call
+
+ """
+ node = ninfo.name
+ _ErrorIf = self._ErrorIf # pylint: disable=C0103
+
+ ntime = nresult.get(constants.NV_TIME, None)
+ try:
+ ntime_merged = utils.MergeTime(ntime)
+ except (ValueError, TypeError):
+ _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
+ return
+
+ if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
+ ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
+ elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
+ ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
+ else:
+ ntime_diff = None
+
+ _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
+ "Node time diverges by at least %s from master node time",
+ ntime_diff)
+
+ def _UpdateVerifyNodeLVM(self, ninfo, nresult, vg_name, nimg):
+ """Check the node LVM results and update info for cross-node checks.
+
+ @type ninfo: L{objects.Node}
+ @param ninfo: the node to check
+ @param nresult: the remote results for the node
+ @param vg_name: the configured VG name
+ @type nimg: L{NodeImage}
+ @param nimg: node image
+
+ """
+ if vg_name is None:
+ return
+
+ node = ninfo.name
+ _ErrorIf = self._ErrorIf # pylint: disable=C0103
+
+ # checks vg existence and size > 20G
+ vglist = nresult.get(constants.NV_VGLIST, None)
+ test = not vglist
+ _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
+ if not test:
+ vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
+ constants.MIN_VG_SIZE)
+ _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
+
+ # Check PVs
+ (errmsgs, pvminmax) = CheckNodePVs(nresult, self._exclusive_storage)
+ for em in errmsgs:
+ self._Error(constants.CV_ENODELVM, node, em)
+ if pvminmax is not None:
+ (nimg.pv_min, nimg.pv_max) = pvminmax
+
++ def _VerifyGroupDRBDVersion(self, node_verify_infos):
++ """Check cross-node DRBD version consistency.
++
++ @type node_verify_infos: dict
++ @param node_verify_infos: infos about nodes as returned from the
++ node_verify call.
++
++ """
++ node_versions = {}
++ for node, ndata in node_verify_infos.items():
++ nresult = ndata.payload
++ version = nresult.get(constants.NV_DRBDVERSION, "Missing DRBD version")
++ node_versions[node] = version
++
++ if len(set(node_versions.values())) > 1:
++ for node, version in sorted(node_versions.items()):
++ msg = "DRBD version mismatch: %s" % version
++ self._Error(constants.CV_ENODEDRBDHELPER, node, msg,
++ code=self.ETYPE_WARNING)
++
+ def _VerifyGroupLVM(self, node_image, vg_name):
+ """Check cross-node consistency in LVM.
+
+ @type node_image: dict
+ @param node_image: info about nodes, mapping from node to names to
+ L{NodeImage} objects
+ @param vg_name: the configured VG name
+
+ """
+ if vg_name is None:
+ return
+
+ # Only exlcusive storage needs this kind of checks
+ if not self._exclusive_storage:
+ return
+
+ # exclusive_storage wants all PVs to have the same size (approximately),
+ # if the smallest and the biggest ones are okay, everything is fine.
+ # pv_min is None iff pv_max is None
+ vals = filter((lambda ni: ni.pv_min is not None), node_image.values())
+ if not vals:
+ return
+ (pvmin, minnode) = min((ni.pv_min, ni.name) for ni in vals)
+ (pvmax, maxnode) = max((ni.pv_max, ni.name) for ni in vals)
+ bad = utils.LvmExclusiveTestBadPvSizes(pvmin, pvmax)
+ self._ErrorIf(bad, constants.CV_EGROUPDIFFERENTPVSIZE, self.group_info.name,
+ "PV sizes differ too much in the group; smallest (%s MB) is"
+ " on %s, biggest (%s MB) is on %s",
+ pvmin, minnode, pvmax, maxnode)
+
+ def _VerifyNodeBridges(self, ninfo, nresult, bridges):
+ """Check the node bridges.
+
+ @type ninfo: L{objects.Node}
+ @param ninfo: the node to check
+ @param nresult: the remote results for the node
+ @param bridges: the expected list of bridges
+
+ """
+ if not bridges:
+ return
+
+ node = ninfo.name
+ _ErrorIf = self._ErrorIf # pylint: disable=C0103
+
+ missing = nresult.get(constants.NV_BRIDGES, None)
+ test = not isinstance(missing, list)
+ _ErrorIf(test, constants.CV_ENODENET, node,
+ "did not return valid bridge information")
+ if not test:
+ _ErrorIf(bool(missing), constants.CV_ENODENET, node,
+ "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
+
+ def _VerifyNodeUserScripts(self, ninfo, nresult):
+ """Check the results of user scripts presence and executability on the node
+
+ @type ninfo: L{objects.Node}
+ @param ninfo: the node to check
+ @param nresult: the remote results for the node
+
+ """
+ node = ninfo.name
+
+ test = not constants.NV_USERSCRIPTS in nresult
+ self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node,
+ "did not return user scripts information")
+
+ broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None)
+ if not test:
+ self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node,
+ "user scripts not present or not executable: %s" %
+ utils.CommaJoin(sorted(broken_scripts)))
+
+ def _VerifyNodeNetwork(self, ninfo, nresult):
+ """Check the node network connectivity results.
+
+ @type ninfo: L{objects.Node}
+ @param ninfo: the node to check
+ @param nresult: the remote results for the node
+
+ """
+ node = ninfo.name
+ _ErrorIf = self._ErrorIf # pylint: disable=C0103
+
+ test = constants.NV_NODELIST not in nresult
+ _ErrorIf(test, constants.CV_ENODESSH, node,
+ "node hasn't returned node ssh connectivity data")
+ if not test:
+ if nresult[constants.NV_NODELIST]:
+ for a_node, a_msg in nresult[constants.NV_NODELIST].items():
+ _ErrorIf(True, constants.CV_ENODESSH, node,
+ "ssh communication with node '%s': %s", a_node, a_msg)
+
+ test = constants.NV_NODENETTEST not in nresult
+ _ErrorIf(test, constants.CV_ENODENET, node,
+ "node hasn't returned node tcp connectivity data")
+ if not test:
+ if nresult[constants.NV_NODENETTEST]:
+ nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
+ for anode in nlist:
+ _ErrorIf(True, constants.CV_ENODENET, node,
+ "tcp communication with node '%s': %s",
+ anode, nresult[constants.NV_NODENETTEST][anode])
+
+ test = constants.NV_MASTERIP not in nresult
+ _ErrorIf(test, constants.CV_ENODENET, node,
+ "node hasn't returned node master IP reachability data")
+ if not test:
+ if not nresult[constants.NV_MASTERIP]:
+ if node == self.master_node:
+ msg = "the master node cannot reach the master IP (not configured?)"
+ else:
+ msg = "cannot reach the master IP"
+ _ErrorIf(True, constants.CV_ENODENET, node, msg)
+
+ def _VerifyInstance(self, instance, inst_config, node_image,
+ diskstatus):
+ """Verify an instance.
+
+ This function checks to see if the required block devices are
+ available on the instance's node, and that the nodes are in the correct
+ state.
+
+ """
+ _ErrorIf = self._ErrorIf # pylint: disable=C0103
+ pnode = inst_config.primary_node
+ pnode_img = node_image[pnode]
+ groupinfo = self.cfg.GetAllNodeGroupsInfo()
+
+ node_vol_should = {}
+ inst_config.MapLVsByNode(node_vol_should)
+
+ cluster = self.cfg.GetClusterInfo()
+ ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
+ self.group_info)
+ err = ComputeIPolicyInstanceViolation(ipolicy, inst_config, self.cfg)
+ _ErrorIf(err, constants.CV_EINSTANCEPOLICY, instance, utils.CommaJoin(err),
+ code=self.ETYPE_WARNING)
+
+ for node in node_vol_should:
+ n_img = node_image[node]
+ if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
+ # ignore missing volumes on offline or broken nodes
+ continue
+ for volume in node_vol_should[node]:
+ test = volume not in n_img.volumes
+ _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
+ "volume %s missing on node %s", volume, node)
+
+ if inst_config.admin_state == constants.ADMINST_UP:
+ test = instance not in pnode_img.instances and not pnode_img.offline
+ _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
+ "instance not running on its primary node %s",
+ pnode)
+ _ErrorIf(pnode_img.offline, constants.CV_EINSTANCEBADNODE, instance,
+ "instance is marked as running and lives on offline node %s",
+ pnode)
+
+ diskdata = [(nname, success, status, idx)
+ for (nname, disks) in diskstatus.items()
+ for idx, (success, status) in enumerate(disks)]
+
+ for nname, success, bdev_status, idx in diskdata:
+ # the 'ghost node' construction in Exec() ensures that we have a
+ # node here
+ snode = node_image[nname]
+ bad_snode = snode.ghost or snode.offline
+ _ErrorIf(inst_config.admin_state == constants.ADMINST_UP and
+ not success and not bad_snode,
+ constants.CV_EINSTANCEFAULTYDISK, instance,
+ "couldn't retrieve status for disk/%s on %s: %s",
+ idx, nname, bdev_status)
+ _ErrorIf((inst_config.admin_state == constants.ADMINST_UP and
+ success and bdev_status.ldisk_status == constants.LDS_FAULTY),
+ constants.CV_EINSTANCEFAULTYDISK, instance,
+ "disk/%s on %s is faulty", idx, nname)
+
+ _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
+ constants.CV_ENODERPC, pnode, "instance %s, connection to"
+ " primary node failed", instance)
+
+ _ErrorIf(len(inst_config.secondary_nodes) > 1,
+ constants.CV_EINSTANCELAYOUT,
+ instance, "instance has multiple secondary nodes: %s",
+ utils.CommaJoin(inst_config.secondary_nodes),
+ code=self.ETYPE_WARNING)
+
+ if inst_config.disk_template not in constants.DTS_EXCL_STORAGE:
+ # Disk template not compatible with exclusive_storage: no instance
+ # node should have the flag set
+ es_flags = rpc.GetExclusiveStorageForNodeNames(self.cfg,
+ inst_config.all_nodes)
+ es_nodes = [n for (n, es) in es_flags.items()
+ if es]
+ _ErrorIf(es_nodes, constants.CV_EINSTANCEUNSUITABLENODE, instance,
+ "instance has template %s, which is not supported on nodes"
+ " that have exclusive storage set: %s",
+ inst_config.disk_template, utils.CommaJoin(es_nodes))
+
+ if inst_config.disk_template in constants.DTS_INT_MIRROR:
+ instance_nodes = utils.NiceSort(inst_config.all_nodes)
+ instance_groups = {}
+
+ for node in instance_nodes:
+ instance_groups.setdefault(self.all_node_info[node].group,
+ []).append(node)
+
+ pretty_list = [
+ "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
+ # Sort so that we always list the primary node first.
+ for group, nodes in sorted(instance_groups.items(),
+ key=lambda (_, nodes): pnode in nodes,
+ reverse=True)]
+
+ self._ErrorIf(len(instance_groups) > 1,
+ constants.CV_EINSTANCESPLITGROUPS,
+ instance, "instance has primary and secondary nodes in"
+ " different groups: %s", utils.CommaJoin(pretty_list),
+ code=self.ETYPE_WARNING)
+
+ inst_nodes_offline = []
+ for snode in inst_config.secondary_nodes:
+ s_img = node_image[snode]
+ _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
+ snode, "instance %s, connection to secondary node failed",
+ instance)
+
+ if s_img.offline:
+ inst_nodes_offline.append(snode)
+
+ # warn that the instance lives on offline nodes
+ _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
+ "instance has offline secondary node(s) %s",
+ utils.CommaJoin(inst_nodes_offline))
+ # ... or ghost/non-vm_capable nodes
+ for node in inst_config.all_nodes:
+ _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
+ instance, "instance lives on ghost node %s", node)
+ _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
+ instance, "instance lives on non-vm_capable node %s", node)
+
+ def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
+ """Verify if there are any unknown volumes in the cluster.
+
+ The .os, .swap and backup volumes are ignored. All other volumes are
+ reported as unknown.
+
+ @type reserved: L{ganeti.utils.FieldSet}
+ @param reserved: a FieldSet of reserved volume names
+
+ """
+ for node, n_img in node_image.items():
+ if (n_img.offline or n_img.rpc_fail or n_img.lvm_fail or
+ self.all_node_info[node].group != self.group_uuid):
+ # skip non-healthy nodes
+ continue
+ for volume in n_img.volumes:
+ test = ((node not in node_vol_should or
+ volume not in node_vol_should[node]) and
+ not reserved.Matches(volume))
+ self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
+ "volume %s is unknown", volume)
+
+ def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
+ """Verify N+1 Memory Resilience.
+
+ Check that if one single node dies we can still start all the
+ instances it was primary for.
+
+ """
+ cluster_info = self.cfg.GetClusterInfo()
+ for node, n_img in node_image.items():
+ # This code checks that every node which is now listed as
+ # secondary has enough memory to host all instances it is
+ # supposed to should a single other node in the cluster fail.
+ # FIXME: not ready for failover to an arbitrary node
+ # FIXME: does not support file-backed instances
+ # WARNING: we currently take into account down instances as well
+ # as up ones, considering that even if they're down someone
+ # might want to start them even in the event of a node failure.
+ if n_img.offline or self.all_node_info[node].group != self.group_uuid:
+ # we're skipping nodes marked offline and nodes in other groups from
+ # the N+1 warning, since most likely we don't have good memory
+ # infromation from them; we already list instances living on such
+ # nodes, and that's enough warning
+ continue
+ #TODO(dynmem): also consider ballooning out other instances
+ for prinode, instances in n_img.sbp.items():
+ needed_mem = 0
+ for instance in instances:
+ bep = cluster_info.FillBE(instance_cfg[instance])
+ if bep[constants.BE_AUTO_BALANCE]:
+ needed_mem += bep[constants.BE_MINMEM]
+ test = n_img.mfree < needed_mem
+ self._ErrorIf(test, constants.CV_ENODEN1, node,
+ "not enough memory to accomodate instance failovers"
+ " should node %s fail (%dMiB needed, %dMiB available)",
+ prinode, needed_mem, n_img.mfree)
+
+ @classmethod
+ def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
+ (files_all, files_opt, files_mc, files_vm)):
+ """Verifies file checksums collected from all nodes.
+
+ @param errorif: Callback for reporting errors
+ @param nodeinfo: List of L{objects.Node} objects
+ @param master_node: Name of master node
+ @param all_nvinfo: RPC results
+
+ """
+ # Define functions determining which nodes to consider for a file
+ files2nodefn = [
+ (files_all, None),
+ (files_mc, lambda node: (node.master_candidate or
+ node.name == master_node)),
+ (files_vm, lambda node: node.vm_capable),
+ ]
+
+ # Build mapping from filename to list of nodes which should have the file
+ nodefiles = {}
+ for (files, fn) in files2nodefn:
+ if fn is None:
+ filenodes = nodeinfo
+ else:
+ filenodes = filter(fn, nodeinfo)
+ nodefiles.update((filename,
+ frozenset(map(operator.attrgetter("name"), filenodes)))
+ for filename in files)
+
+ assert set(nodefiles) == (files_all | files_mc | files_vm)
+
+ fileinfo = dict((filename, {}) for filename in nodefiles)
+ ignore_nodes = set()
+
+ for node in nodeinfo:
+ if node.offline:
+ ignore_nodes.add(node.name)
+ continue
+
+ nresult = all_nvinfo[node.name]
+
+ if nresult.fail_msg or not nresult.payload:
+ node_files = None
+ else:
+ fingerprints = nresult.payload.get(constants.NV_FILELIST, None)
+ node_files = dict((vcluster.LocalizeVirtualPath(key), value)
+ for (key, value) in fingerprints.items())
+ del fingerprints
+
+ test = not (node_files and isinstance(node_files, dict))
+ errorif(test, constants.CV_ENODEFILECHECK, node.name,
+ "Node did not return file checksum data")
+ if test:
+ ignore_nodes.add(node.name)
+ continue
+
+ # Build per-checksum mapping from filename to nodes having it
+ for (filename, checksum) in node_files.items():
+ assert filename in nodefiles
+ fileinfo[filename].setdefault(checksum, set()).add(node.name)
+
+ for (filename, checksums) in fileinfo.items():
+ assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
+
+ # Nodes having the file
+ with_file = frozenset(node_name
+ for nodes in fileinfo[filename].values()
+ for node_name in nodes) - ignore_nodes
+
+ expected_nodes = nodefiles[filename] - ignore_nodes
+
+ # Nodes missing file
+ missing_file = expected_nodes - with_file
+
+ if filename in files_opt:
+ # All or no nodes
+ errorif(missing_file and missing_file != expected_nodes,
+ constants.CV_ECLUSTERFILECHECK, None,
+ "File %s is optional, but it must exist on all or no"
+ " nodes (not found on %s)",
+ filename, utils.CommaJoin(utils.NiceSort(missing_file)))
+ else:
+ errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
+ "File %s is missing from node(s) %s", filename,
+ utils.CommaJoin(utils.NiceSort(missing_file)))
+
+ # Warn if a node has a file it shouldn't
+ unexpected = with_file - expected_nodes
+ errorif(unexpected,
+ constants.CV_ECLUSTERFILECHECK, None,
+ "File %s should not exist on node(s) %s",
+ filename, utils.CommaJoin(utils.NiceSort(unexpected)))
+
+ # See if there are multiple versions of the file
+ test = len(checksums) > 1
+ if test:
+ variants = ["variant %s on %s" %
+ (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
+ for (idx, (checksum, nodes)) in
+ enumerate(sorted(checksums.items()))]
+ else:
+ variants = []
+
+ errorif(test, constants.CV_ECLUSTERFILECHECK, None,
+ "File %s found with %s different checksums (%s)",
+ filename, len(checksums), "; ".join(variants))
+
+ def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
+ drbd_map):
+ """Verifies and the node DRBD status.
+
+ @type ninfo: L{objects.Node}
+ @param ninfo: the node to check
+ @param nresult: the remote results for the node
+ @param instanceinfo: the dict of instances
+ @param drbd_helper: the configured DRBD usermode helper
+ @param drbd_map: the DRBD map as returned by
+ L{ganeti.config.ConfigWriter.ComputeDRBDMap}
+
+ """
+ node = ninfo.name
+ _ErrorIf = self._ErrorIf # pylint: disable=C0103
+
+ if drbd_helper:
+ helper_result = nresult.get(constants.NV_DRBDHELPER, None)
+ test = (helper_result is None)
+ _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
+ "no drbd usermode helper returned")
+ if helper_result:
+ status, payload = helper_result
+ test = not status
+ _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
+ "drbd usermode helper check unsuccessful: %s", payload)
+ test = status and (payload != drbd_helper)
+ _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
+ "wrong drbd usermode helper: %s", payload)
+
+ # compute the DRBD minors
+ node_drbd = {}
+ for minor, instance in drbd_map[node].items():
+ test = instance not in instanceinfo
+ _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
+ "ghost instance '%s' in temporary DRBD map", instance)
+ # ghost instance should not be running, but otherwise we
+ # don't give double warnings (both ghost instance and
+ # unallocated minor in use)
+ if test:
+ node_drbd[minor] = (instance, False)
+ else:
+ instance = instanceinfo[instance]
+ node_drbd[minor] = (instance.name,
+ instance.admin_state == constants.ADMINST_UP)
+
+ # and now check them
+ used_minors = nresult.get(constants.NV_DRBDLIST, [])
+ test = not isinstance(used_minors, (tuple, list))
+ _ErrorIf(test, constants.CV_ENODEDRBD, node,
+ "cannot parse drbd status file: %s", str(used_minors))
+ if test:
+ # we cannot check drbd status
+ return
+
+ for minor, (iname, must_exist) in node_drbd.items():
+ test = minor not in used_minors and must_exist
+ _ErrorIf(test, constants.CV_ENODEDRBD, node,
+ "drbd minor %d of instance %s is not active", minor, iname)
+ for minor in used_minors:
+ test = minor not in node_drbd
+ _ErrorIf(test, constants.CV_ENODEDRBD, node,
+ "unallocated drbd minor %d is in use", minor)
+
+ def _UpdateNodeOS(self, ninfo, nresult, nimg):
+ """Builds the node OS structures.
+
+ @type ninfo: L{objects.Node}
+ @param ninfo: the node to check
+ @param nresult: the remote results for the node
+ @param nimg: the node image object
+
+ """
+ node = ninfo.name
+ _ErrorIf = self._ErrorIf # pylint: disable=C0103
+
+ remote_os = nresult.get(constants.NV_OSLIST, None)
+ test = (not isinstance(remote_os, list) or
+ not compat.all(isinstance(v, list) and len(v) == 7
+ for v in remote_os))
+
+ _ErrorIf(test, constants.CV_ENODEOS, node,
+ "node hasn't returned valid OS data")
+
+ nimg.os_fail = test
+
+ if test:
+ return
+
+ os_dict = {}
+
+ for (name, os_path, status, diagnose,
+ variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
+
+ if name not in os_dict:
+ os_dict[name] = []
+
+ # parameters is a list of lists instead of list of tuples due to
+ # JSON lacking a real tuple type, fix it:
+ parameters = [tuple(v) for v in parameters]
+ os_dict[name].append((os_path, status, diagnose,
+ set(variants), set(parameters), set(api_ver)))
+
+ nimg.oslist = os_dict
+
+ def _VerifyNodeOS(self, ninfo, nimg, base):
+ """Verifies the node OS list.
+
+ @type ninfo: L{objects.Node}
+ @param ninfo: the node to check
+ @param nimg: the node image object
+ @param base: the 'template' node we match against (e.g. from the master)
+
+ """
+ node = ninfo.name
+ _ErrorIf = self._ErrorIf # pylint: disable=C0103
+
+ assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
+
+ beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
+ for os_name, os_data in nimg.oslist.items():
+ assert os_data, "Empty OS status for OS %s?!" % os_name
+ f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
+ _ErrorIf(not f_status, constants.CV_ENODEOS, node,
+ "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
+ _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
+ "OS '%s' has multiple entries (first one shadows the rest): %s",
+ os_name, utils.CommaJoin([v[0] for v in os_data]))
+ # comparisons with the 'base' image
+ test = os_name not in base.oslist
+ _ErrorIf(test, constants.CV_ENODEOS, node,
+ "Extra OS %s not present on reference node (%s)",
+ os_name, base.name)
+ if test:
+ continue
+ assert base.oslist[os_name], "Base node has empty OS status?"
+ _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
+ if not b_status:
+ # base OS is invalid, skipping
+ continue
+ for kind, a, b in [("API version", f_api, b_api),
+ ("variants list", f_var, b_var),
+ ("parameters", beautify_params(f_param),
+ beautify_params(b_param))]:
+ _ErrorIf(a != b, constants.CV_ENODEOS, node,
+ "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
+ kind, os_name, base.name,
+ utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
+
+ # check any missing OSes
+ missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
+ _ErrorIf(missing, constants.CV_ENODEOS, node,
+ "OSes present on reference node %s but missing on this node: %s",
+ base.name, utils.CommaJoin(missing))
+
+ def _VerifyFileStoragePaths(self, ninfo, nresult, is_master):
+ """Verifies paths in L{pathutils.FILE_STORAGE_PATHS_FILE}.
+
+ @type ninfo: L{objects.Node}
+ @param ninfo: the node to check
+ @param nresult: the remote results for the node
+ @type is_master: bool
+ @param is_master: Whether node is the master node
+
+ """
+ node = ninfo.name
+
+ if (is_master and
+ (constants.ENABLE_FILE_STORAGE or
+ constants.ENABLE_SHARED_FILE_STORAGE)):
+ try:
+ fspaths = nresult[constants.NV_FILE_STORAGE_PATHS]
+ except KeyError:
+ # This should never happen
+ self._ErrorIf(True, constants.CV_ENODEFILESTORAGEPATHS, node,
+ "Node did not return forbidden file storage paths")
+ else:
+ self._ErrorIf(fspaths, constants.CV_ENODEFILESTORAGEPATHS, node,
+ "Found forbidden file storage paths: %s",
+ utils.CommaJoin(fspaths))
+ else:
+ self._ErrorIf(constants.NV_FILE_STORAGE_PATHS in nresult,
+ constants.CV_ENODEFILESTORAGEPATHS, node,
+ "Node should not have returned forbidden file storage"
+ " paths")
+
+ def _VerifyOob(self, ninfo, nresult):
+ """Verifies out of band functionality of a node.
+
+ @type ninfo: L{objects.Node}
+ @param ninfo: the node to check
+ @param nresult: the remote results for the node
+
+ """
+ node = ninfo.name
+ # We just have to verify the paths on master and/or master candidates
+ # as the oob helper is invoked on the master
+ if ((ninfo.master_candidate or ninfo.master_capable) and
+ constants.NV_OOB_PATHS in nresult):
+ for path_result in nresult[constants.NV_OOB_PATHS]:
+ self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
+
+ def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
+ """Verifies and updates the node volume data.
+
+ This function will update a L{NodeImage}'s internal structures
+ with data from the remote call.
+
+ @type ninfo: L{objects.Node}
+ @param ninfo: the node to check
+ @param nresult: the remote results for the node
+ @param nimg: the node image object
+ @param vg_name: the configured VG name
+
+ """
+ node = ninfo.name
+ _ErrorIf = self._ErrorIf # pylint: disable=C0103
+
+ nimg.lvm_fail = True
+ lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
+ if vg_name is None:
+ pass
+ elif isinstance(lvdata, basestring):
+ _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
+ utils.SafeEncode(lvdata))
+ elif not isinstance(lvdata, dict):
+ _ErrorIf(True, constants.CV_ENODELVM, node,
+ "rpc call to node failed (lvlist)")
+ else:
+ nimg.volumes = lvdata
+ nimg.lvm_fail = False
+
+ def _UpdateNodeInstances(self, ninfo, nresult, nimg):
+ """Verifies and updates the node instance list.
+
+ If the listing was successful, then updates this node's instance
+ list. Otherwise, it marks the RPC call as failed for the instance
+ list key.
+
+ @type ninfo: L{objects.Node}
+ @param ninfo: the node to check
+ @param nresult: the remote results for the node
+ @param nimg: the node image object
+
+ """
+ idata = nresult.get(constants.NV_INSTANCELIST, None)
+ test = not isinstance(idata, list)
+ self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
+ "rpc call to node failed (instancelist): %s",
+ utils.SafeEncode(str(idata)))
+ if test:
+ nimg.hyp_fail = True
+ else:
+ nimg.instances = idata
+
+ def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
+ """Verifies and computes a node information map
+
+ @type ninfo: L{objects.Node}
+ @param ninfo: the node to check
+ @param nresult: the remote results for the node
+ @param nimg: the node image object
+ @param vg_name: the configured VG name
+
+ """
+ node = ninfo.name
+ _ErrorIf = self._ErrorIf # pylint: disable=C0103
+
+ # try to read free memory (from the hypervisor)
+ hv_info = nresult.get(constants.NV_HVINFO, None)
+ test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
+ _ErrorIf(test, constants.CV_ENODEHV, node,
+ "rpc call to node failed (hvinfo)")
+ if not test:
+ try:
+ nimg.mfree = int(hv_info["memory_free"])
+ except (ValueError, TypeError):
+ _ErrorIf(True, constants.CV_ENODERPC, node,
+ "node returned invalid nodeinfo, check hypervisor")
+
+ # FIXME: devise a free space model for file based instances as well
+ if vg_name is not None:
+ test = (constants.NV_VGLIST not in nresult or
+ vg_name not in nresult[constants.NV_VGLIST])
+ _ErrorIf(test, constants.CV_ENODELVM, node,
+ "node didn't return data for the volume group '%s'"
+ " - it is either missing or broken", vg_name)
+ if not test:
+ try:
+ nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
+ except (ValueError, TypeError):
+ _ErrorIf(True, constants.CV_ENODERPC, node,
+ "node returned invalid LVM info, check LVM status")
+
+ def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
+ """Gets per-disk status information for all instances.
+
+ @type nodelist: list of strings
+ @param nodelist: Node names
+ @type node_image: dict of (name, L{objects.Node})
+ @param node_image: Node objects
+ @type instanceinfo: dict of (name, L{objects.Instance})
+ @param instanceinfo: Instance objects
+ @rtype: {instance: {node: [(succes, payload)]}}
+ @return: a dictionary of per-instance dictionaries with nodes as
+ keys and disk information as values; the disk information is a
+ list of tuples (success, payload)
+
+ """
+ _ErrorIf = self._ErrorIf # pylint: disable=C0103
+
+ node_disks = {}
+ node_disks_devonly = {}
+ diskless_instances = set()
+ diskless = constants.DT_DISKLESS
+
+ for nname in nodelist:
+ node_instances = list(itertools.chain(node_image[nname].pinst,
+ node_image[nname].sinst))
+ diskless_instances.update(inst for inst in node_instances
+ if instanceinfo[inst].disk_template == diskless)
+ disks = [(inst, disk)
+ for inst in node_instances
+ for disk in instanceinfo[inst].disks]
+
+ if not disks:
+ # No need to collect data
+ continue
+
+ node_disks[nname] = disks
+
+ # _AnnotateDiskParams makes already copies of the disks
+ devonly = []
+ for (inst, dev) in disks:
+ (anno_disk,) = AnnotateDiskParams(instanceinfo[inst], [dev], self.cfg)
+ self.cfg.SetDiskID(anno_disk, nname)
+ devonly.append(anno_disk)
+
+ node_disks_devonly[nname] = devonly
+
+ assert len(node_disks) == len(node_disks_devonly)
+
+ # Collect data from all nodes with disks
+ result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
+ node_disks_devonly)
+
+ assert len(result) == len(node_disks)
+
+ instdisk = {}
+
+ for (nname, nres) in result.items():
+ disks = node_disks[nname]
+
+ if nres.offline:
+ # No data from this node
+ data = len(disks) * [(False, "node offline")]
+ else:
+ msg = nres.fail_msg
+ _ErrorIf(msg, constants.CV_ENODERPC, nname,
+ "while getting disk information: %s", msg)
+ if msg:
+ # No data from this node
+ data = len(disks) * [(False, msg)]
+ else:
+ data = []
+ for idx, i in enumerate(nres.payload):
+ if isinstance(i, (tuple, list)) and len(i) == 2:
+ data.append(i)
+ else:
+ logging.warning("Invalid result from node %s, entry %d: %s",
+ nname, idx, i)
+ data.append((False, "Invalid result from the remote node"))
+
+ for ((inst, _), status) in zip(disks, data):
+ instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
+
+ # Add empty entries for diskless instances.
+ for inst in diskless_instances:
+ assert inst not in instdisk
+ instdisk[inst] = {}
+
+ assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
+ len(nnames) <= len(instanceinfo[inst].all_nodes) and
+ compat.all(isinstance(s, (tuple, list)) and
+ len(s) == 2 for s in statuses)
+ for inst, nnames in instdisk.items()
+ for nname, statuses in nnames.items())
+ if __debug__:
+ instdisk_keys = set(instdisk)
+ instanceinfo_keys = set(instanceinfo)
+ assert instdisk_keys == instanceinfo_keys, \
+ ("instdisk keys (%s) do not match instanceinfo keys (%s)" %
+ (instdisk_keys, instanceinfo_keys))
+
+ return instdisk
+
+ @staticmethod
+ def _SshNodeSelector(group_uuid, all_nodes):
+ """Create endless iterators for all potential SSH check hosts.
+
+ """
+ nodes = [node for node in all_nodes
+ if (node.group != group_uuid and
+ not node.offline)]
+ keyfunc = operator.attrgetter("group")
+
+ return map(itertools.cycle,
+ [sorted(map(operator.attrgetter("name"), names))
+ for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
+ keyfunc)])
+
+ @classmethod
+ def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
+ """Choose which nodes should talk to which other nodes.
+
+ We will make nodes contact all nodes in their group, and one node from
+ every other group.
+
+ @warning: This algorithm has a known issue if one node group is much
+ smaller than others (e.g. just one node). In such a case all other
+ nodes will talk to the single node.
+
+ """
+ online_nodes = sorted(node.name for node in group_nodes if not node.offline)
+ sel = cls._SshNodeSelector(group_uuid, all_nodes)
+
+ return (online_nodes,
+ dict((name, sorted([i.next() for i in sel]))
+ for name in online_nodes))
+
+ def BuildHooksEnv(self):
+ """Build hooks env.
+
+ Cluster-Verify hooks just ran in the post phase and their failure makes
+ the output be logged in the verify output and the verification to fail.
+
+ """
+ env = {
+ "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags()),
+ }
+
+ env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
+ for node in self.my_node_info.values())
+
+ return env
+
+ def BuildHooksNodes(self):
+ """Build hooks nodes.
+
+ """
+ return ([], self.my_node_names)
+
+ def Exec(self, feedback_fn):
+ """Verify integrity of the node group, performing various test on nodes.
+
+ """
+ # This method has too many local variables. pylint: disable=R0914
+ feedback_fn("* Verifying group '%s'" % self.group_info.name)
+
+ if not self.my_node_names:
+ # empty node group
+ feedback_fn("* Empty node group, skipping verification")
+ return True
+
+ self.bad = False
+ _ErrorIf = self._ErrorIf # pylint: disable=C0103
+ verbose = self.op.verbose
+ self._feedback_fn = feedback_fn
+
+ vg_name = self.cfg.GetVGName()
+ drbd_helper = self.cfg.GetDRBDHelper()
+ cluster = self.cfg.GetClusterInfo()
+ hypervisors = cluster.enabled_hypervisors
+ node_data_list = [self.my_node_info[name] for name in self.my_node_names]
+
+ i_non_redundant = [] # Non redundant instances
+ i_non_a_balanced = [] # Non auto-balanced instances
+ i_offline = 0 # Count of offline instances
+ n_offline = 0 # Count of offline nodes
+ n_drained = 0 # Count of nodes being drained
+ node_vol_should = {}
+
+ # FIXME: verify OS list
+
+ # File verification
+ filemap = ComputeAncillaryFiles(cluster, False)
+
+ # do local checksums
+ master_node = self.master_node = self.cfg.GetMasterNode()
+ master_ip = self.cfg.GetMasterIP()
+
+ feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
+
+ user_scripts = []
+ if self.cfg.GetUseExternalMipScript():
+ user_scripts.append(pathutils.EXTERNAL_MASTER_SETUP_SCRIPT)
+
+ node_verify_param = {
+ constants.NV_FILELIST:
+ map(vcluster.MakeVirtualPath,
+ utils.UniqueSequence(filename
+ for files in filemap
+ for filename in files)),
+ constants.NV_NODELIST:
+ self._SelectSshCheckNodes(node_data_list, self.group_uuid,
+ self.all_node_info.values()),
+ constants.NV_HYPERVISOR: hypervisors,
+ constants.NV_HVPARAMS:
+ _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
+ constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
+ for node in node_data_list
+ if not node.offline],
+ constants.NV_INSTANCELIST: hypervisors,
+ constants.NV_VERSION: None,
+ constants.NV_HVINFO: self.cfg.GetHypervisorType(),
+ constants.NV_NODESETUP: None,
+ constants.NV_TIME: None,
+ constants.NV_MASTERIP: (master_node, master_ip),
+ constants.NV_OSLIST: None,
+ constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
+ constants.NV_USERSCRIPTS: user_scripts,
+ }
+
+ if vg_name is not None:
+ node_verify_param[constants.NV_VGLIST] = None
+ node_verify_param[constants.NV_LVLIST] = vg_name
+ node_verify_param[constants.NV_PVLIST] = [vg_name]
+
+ if drbd_helper:
++ node_verify_param[constants.NV_DRBDVERSION] = None
+ node_verify_param[constants.NV_DRBDLIST] = None
+ node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
+
+ if constants.ENABLE_FILE_STORAGE or constants.ENABLE_SHARED_FILE_STORAGE:
+ # Load file storage paths only from master node
+ node_verify_param[constants.NV_FILE_STORAGE_PATHS] = master_node
+
+ # bridge checks
+ # FIXME: this needs to be changed per node-group, not cluster-wide
+ bridges = set()
+ default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
+ if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
+ bridges.add(default_nicpp[constants.NIC_LINK])
+ for instance in self.my_inst_info.values():
+ for nic in instance.nics:
+ full_nic = cluster.SimpleFillNIC(nic.nicparams)
+ if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
+ bridges.add(full_nic[constants.NIC_LINK])
+
+ if bridges:
+ node_verify_param[constants.NV_BRIDGES] = list(bridges)
+
+ # Build our expected cluster state
+ node_image = dict((node.name, self.NodeImage(offline=node.offline,
+ name=node.name,
+ vm_capable=node.vm_capable))
+ for node in node_data_list)
+
+ # Gather OOB paths
+ oob_paths = []
+ for node in self.all_node_info.values():
+ path = SupportsOob(self.cfg, node)
+ if path and path not in oob_paths:
+ oob_paths.append(path)
+
+ if oob_paths:
+ node_verify_param[constants.NV_OOB_PATHS] = oob_paths
+
+ for instance in self.my_inst_names:
+ inst_config = self.my_inst_info[instance]
+ if inst_config.admin_state == constants.ADMINST_OFFLINE:
+ i_offline += 1
+
+ for nname in inst_config.all_nodes:
+ if nname not in node_image:
+ gnode = self.NodeImage(name=nname)
+ gnode.ghost = (nname not in self.all_node_info)
+ node_image[nname] = gnode
+
+ inst_config.MapLVsByNode(node_vol_should)
+
+ pnode = inst_config.primary_node
+ node_image[pnode].pinst.append(instance)
+
+ for snode in inst_config.secondary_nodes:
+ nimg = node_image[snode]
+ nimg.sinst.append(instance)
+ if pnode not in nimg.sbp:
+ nimg.sbp[pnode] = []
+ nimg.sbp[pnode].append(instance)
+
+ es_flags = rpc.GetExclusiveStorageForNodeNames(self.cfg, self.my_node_names)
+ # The value of exclusive_storage should be the same across the group, so if
+ # it's True for at least a node, we act as if it were set for all the nodes
+ self._exclusive_storage = compat.any(es_flags.values())
+ if self._exclusive_storage:
+ node_verify_param[constants.NV_EXCLUSIVEPVS] = True
+
+ # At this point, we have the in-memory data structures complete,
+ # except for the runtime information, which we'll gather next
+
+ # Due to the way our RPC system works, exact response times cannot be
+ # guaranteed (e.g. a broken node could run into a timeout). By keeping the
+ # time before and after executing the request, we can at least have a time
+ # window.
+ nvinfo_starttime = time.time()
+ all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
+ node_verify_param,
+ self.cfg.GetClusterName())
+ nvinfo_endtime = time.time()
+
+ if self.extra_lv_nodes and vg_name is not None:
+ extra_lv_nvinfo = \
+ self.rpc.call_node_verify(self.extra_lv_nodes,
+ {constants.NV_LVLIST: vg_name},
+ self.cfg.GetClusterName())
+ else:
+ extra_lv_nvinfo = {}
+
+ all_drbd_map = self.cfg.ComputeDRBDMap()
+
+ feedback_fn("* Gathering disk information (%s nodes)" %
+ len(self.my_node_names))
+ instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
+ self.my_inst_info)
+
+ feedback_fn("* Verifying configuration file consistency")
+
+ # If not all nodes are being checked, we need to make sure the master node
+ # and a non-checked vm_capable node are in the list.
+ absent_nodes = set(self.all_node_info).difference(self.my_node_info)
+ if absent_nodes:
+ vf_nvinfo = all_nvinfo.copy()
+ vf_node_info = list(self.my_node_info.values())
+ additional_nodes = []
+ if master_node not in self.my_node_info:
+ additional_nodes.append(master_node)
+ vf_node_info.append(self.all_node_info[master_node])
+ # Add the first vm_capable node we find which is not included,
+ # excluding the master node (which we already have)
+ for node in absent_nodes:
+ nodeinfo = self.all_node_info[node]
+ if (nodeinfo.vm_capable and not nodeinfo.offline and
+ node != master_node):
+ additional_nodes.append(node)
+ vf_node_info.append(self.all_node_info[node])
+ break
+ key = constants.NV_FILELIST
+ vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
+ {key: node_verify_param[key]},
+ self.cfg.GetClusterName()))
+ else:
+ vf_nvinfo = all_nvinfo
+ vf_node_info = self.my_node_info.values()
+
+ self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
+
+ feedback_fn("* Verifying node status")
+
+ refos_img = None
+
+ for node_i in node_data_list:
+ node = node_i.name
+ nimg = node_image[node]
+
+ if node_i.offline:
+ if verbose:
+ feedback_fn("* Skipping offline node %s" % (node,))
+ n_offline += 1
+ continue
+
+ if node == master_node:
+ ntype = "master"
+ elif node_i.master_candidate:
+ ntype = "master candidate"
+ elif node_i.drained:
+ ntype = "drained"
+ n_drained += 1
+ else:
+ ntype = "regular"
+ if verbose:
+ feedback_fn("* Verifying node %s (%s)" % (node, ntype))
+
+ msg = all_nvinfo[node].fail_msg
+ _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
+ msg)
+ if msg:
+ nimg.rpc_fail = True
+ continue
+
+ nresult = all_nvinfo[node].payload
+
+ nimg.call_ok = self._VerifyNode(node_i, nresult)
+ self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
+ self._VerifyNodeNetwork(node_i, nresult)
+ self._VerifyNodeUserScripts(node_i, nresult)
+ self._VerifyOob(node_i, nresult)
+ self._VerifyFileStoragePaths(node_i, nresult,
+ node == master_node)
+
+ if nimg.vm_capable:
+ self._UpdateVerifyNodeLVM(node_i, nresult, vg_name, nimg)
+ self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
+ all_drbd_map)
+
+ self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
+ self._UpdateNodeInstances(node_i, nresult, nimg)
+ self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
+ self._UpdateNodeOS(node_i, nresult, nimg)
+
+ if not nimg.os_fail:
+ if refos_img is None:
+ refos_img = nimg
+ self._VerifyNodeOS(node_i, nimg, refos_img)
+ self._VerifyNodeBridges(node_i, nresult, bridges)
+
+ # Check whether all running instancies are primary for the node. (This
+ # can no longer be done from _VerifyInstance below, since some of the
+ # wrong instances could be from other node groups.)
+ non_primary_inst = set(nimg.instances).difference(nimg.pinst)
+
+ for inst in non_primary_inst:
+ test = inst in self.all_inst_info
+ _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
+ "instance should not run on node %s", node_i.name)
+ _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
+ "node is running unknown instance %s", inst)
+
++ self._VerifyGroupDRBDVersion(all_nvinfo)
+ self._VerifyGroupLVM(node_image, vg_name)
+
+ for node, result in extra_lv_nvinfo.items():
+ self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
+ node_image[node], vg_name)
+
+ feedback_fn("* Verifying instance status")
+ for instance in self.my_inst_names:
+ if verbose:
+ feedback_fn("* Verifying instance %s" % instance)
+ inst_config = self.my_inst_info[instance]
+ self._VerifyInstance(instance, inst_config, node_image,
+ instdisk[instance])
+
+ # If the instance is non-redundant we cannot survive losing its primary
+ # node, so we are not N+1 compliant.
+ if inst_config.disk_template not in constants.DTS_MIRRORED:
+ i_non_redundant.append(instance)
+
+ if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
+ i_non_a_balanced.append(instance)
+
+ feedback_fn("* Verifying orphan volumes")
+ reserved = utils.FieldSet(*cluster.reserved_lvs)
+
+ # We will get spurious "unknown volume" warnings if any node of this group
+ # is secondary for an instance whose primary is in another group. To avoid
+ # them, we find these instances and add their volumes to node_vol_should.
+ for inst in self.all_inst_info.values():
+ for secondary in inst.secondary_nodes:
+ if (secondary in self.my_node_info
+ and inst.name not in self.my_inst_info):
+ inst.MapLVsByNode(node_vol_should)
+ break
+
+ self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
+
+ if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
+ feedback_fn("* Verifying N+1 Memory redundancy")
+ self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
+
+ feedback_fn("* Other Notes")
+ if i_non_redundant:
+ feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
+ % len(i_non_redundant))
+
+ if i_non_a_balanced:
+ feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
+ % len(i_non_a_balanced))
+
+ if i_offline:
+ feedback_fn(" - NOTICE: %d offline instance(s) found." % i_offline)
+
+ if n_offline:
+ feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline)
+
+ if n_drained:
+ feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained)
+
+ return not self.bad
+
+ def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
+ """Analyze the post-hooks' result
+
+ This method analyses the hook result, handles it, and sends some
+ nicely-formatted feedback back to the user.
+
+ @param phase: one of L{constants.HOOKS_PHASE_POST} or
+ L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
+ @param hooks_results: the results of the multi-node hooks rpc call
+ @param feedback_fn: function used send feedback back to the caller
+ @param lu_result: previous Exec result
+ @return: the new Exec result, based on the previous result
+ and hook results
+
+ """
+ # We only really run POST phase hooks, only for non-empty groups,
+ # and are only interested in their results
+ if not self.my_node_names:
+ # empty node group
+ pass
+ elif phase == constants.HOOKS_PHASE_POST:
+ # Used to change hooks' output to proper indentation
+ feedback_fn("* Hooks Results")
+ assert hooks_results, "invalid result from hooks"
+
+ for node_name in hooks_results:
+ res = hooks_results[node_name]
+ msg = res.fail_msg
+ test = msg and not res.offline
+ self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
+ "Communication failure in hooks execution: %s", msg)
+ if res.offline or msg:
+ # No need to investigate payload if node is offline or gave
+ # an error.
+ continue
+ for script, hkr, output in res.payload:
+ test = hkr == constants.HKR_FAIL
+ self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
+ "Script %s failed, output:", script)
+ if test:
+ output = self._HOOKS_INDENT_RE.sub(" ", output)
+ feedback_fn("%s" % output)
+ lu_result = False
+
+ return lu_result
+
+
+ class LUClusterVerifyDisks(NoHooksLU):
+ """Verifies the cluster disks status.
+
+ """
+ REQ_BGL = False
+
+ def ExpandNames(self):
+ self.share_locks = ShareAll()
+ self.needed_locks = {
+ locking.LEVEL_NODEGROUP: locking.ALL_SET,
+ }
+
+ def Exec(self, feedback_fn):
+ group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
+
+ # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
+ return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
+ for group in group_names])
--- /dev/null
+ #
+ #
+
+ # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 Google Inc.
+ #
+ # This program is free software; you can redistribute it and/or modify
+ # it under the terms of the GNU General Public License as published by
+ # the Free Software Foundation; either version 2 of the License, or
+ # (at your option) any later version.
+ #
+ # This program is distributed in the hope that it will be useful, but
+ # WITHOUT ANY WARRANTY; without even the implied warranty of
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ # General Public License for more details.
+ #
+ # You should have received a copy of the GNU General Public License
+ # along with this program; if not, write to the Free Software
+ # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ # 02110-1301, USA.
+
+
+ """Logical units dealing with instances."""
+
+ import OpenSSL
+ import copy
+ import logging
+ import os
+
+ from ganeti import compat
+ from ganeti import constants
+ from ganeti import errors
+ from ganeti import ht
+ from ganeti import hypervisor
+ from ganeti import locking
+ from ganeti.masterd import iallocator
+ from ganeti import masterd
+ from ganeti import netutils
+ from ganeti import objects
+ from ganeti import opcodes
+ from ganeti import pathutils
+ from ganeti import rpc
+ from ganeti import utils
+
+ from ganeti.cmdlib.base import NoHooksLU, LogicalUnit, ResultWithJobs
+
+ from ganeti.cmdlib.common import INSTANCE_DOWN, \
+ INSTANCE_NOT_RUNNING, CAN_CHANGE_INSTANCE_OFFLINE, CheckNodeOnline, \
+ ShareAll, GetDefaultIAllocator, CheckInstanceNodeGroups, \
+ LoadNodeEvacResult, CheckIAllocatorOrNode, CheckParamsNotGlobal, \
+ IsExclusiveStorageEnabledNode, CheckHVParams, CheckOSParams, \
+ AnnotateDiskParams, GetUpdatedParams, ExpandInstanceName, \
+ ComputeIPolicySpecViolation, CheckInstanceState, ExpandNodeName
+ from ganeti.cmdlib.instance_storage import CreateDisks, \
+ CheckNodesFreeDiskPerVG, WipeDisks, WipeOrCleanupDisks, WaitForSync, \
+ IsExclusiveStorageEnabledNodeName, CreateSingleBlockDev, ComputeDisks, \
+ CheckRADOSFreeSpace, ComputeDiskSizePerVG, GenerateDiskTemplate, \
+ StartInstanceDisks, ShutdownInstanceDisks, AssembleInstanceDisks
+ from ganeti.cmdlib.instance_utils import BuildInstanceHookEnvByObject, \
+ GetClusterDomainSecret, BuildInstanceHookEnv, NICListToTuple, \
+ NICToTuple, CheckNodeNotDrained, RemoveInstance, CopyLockList, \
+ ReleaseLocks, CheckNodeVmCapable, CheckTargetNodeIPolicy, \
+ GetInstanceInfoText, RemoveDisks, CheckNodeFreeMemory, \
+ CheckInstanceBridgesExist, CheckNicsBridgesExist, CheckNodeHasOS
+
+ import ganeti.masterd.instance
+
+
+ #: Type description for changes as returned by L{_ApplyContainerMods}'s
+ #: callbacks
+ _TApplyContModsCbChanges = \
+ ht.TMaybeListOf(ht.TAnd(ht.TIsLength(2), ht.TItems([
+ ht.TNonEmptyString,
+ ht.TAny,
+ ])))
+
+
+ def _CheckHostnameSane(lu, name):
+ """Ensures that a given hostname resolves to a 'sane' name.
+
+ The given name is required to be a prefix of the resolved hostname,
+ to prevent accidental mismatches.
+
+ @param lu: the logical unit on behalf of which we're checking
+ @param name: the name we should resolve and check
+ @return: the resolved hostname object
+
+ """
+ hostname = netutils.GetHostname(name=name)
+ if hostname.name != name:
+ lu.LogInfo("Resolved given name '%s' to '%s'", name, hostname.name)
+ if not utils.MatchNameComponent(name, [hostname.name]):
+ raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
+ " same as given hostname '%s'") %
+ (hostname.name, name), errors.ECODE_INVAL)
+ return hostname
+
+
+ def _CheckOpportunisticLocking(op):
+ """Generate error if opportunistic locking is not possible.
+
+ """
+ if op.opportunistic_locking and not op.iallocator:
+ raise errors.OpPrereqError("Opportunistic locking is only available in"
+ " combination with an instance allocator",
+ errors.ECODE_INVAL)
+
+
+ def _CreateInstanceAllocRequest(op, disks, nics, beparams, node_whitelist):
+ """Wrapper around IAReqInstanceAlloc.
+
+ @param op: The instance opcode
+ @param disks: The computed disks
+ @param nics: The computed nics
+ @param beparams: The full filled beparams
+ @param node_whitelist: List of nodes which should appear as online to the
+ allocator (unless the node is already marked offline)
+
+ @returns: A filled L{iallocator.IAReqInstanceAlloc}
+
+ """
+ spindle_use = beparams[constants.BE_SPINDLE_USE]
+ return iallocator.IAReqInstanceAlloc(name=op.instance_name,
+ disk_template=op.disk_template,
+ tags=op.tags,
+ os=op.os_type,
+ vcpus=beparams[constants.BE_VCPUS],
+ memory=beparams[constants.BE_MAXMEM],
+ spindle_use=spindle_use,
+ disks=disks,
+ nics=[n.ToDict() for n in nics],
+ hypervisor=op.hypervisor,
+ node_whitelist=node_whitelist)
+
+
+ def _ComputeFullBeParams(op, cluster):
+ """Computes the full beparams.
+
+ @param op: The instance opcode
+ @param cluster: The cluster config object
+
+ @return: The fully filled beparams
+
+ """
+ default_beparams = cluster.beparams[constants.PP_DEFAULT]
+ for param, value in op.beparams.iteritems():
+ if value == constants.VALUE_AUTO:
+ op.beparams[param] = default_beparams[param]
+ objects.UpgradeBeParams(op.beparams)
+ utils.ForceDictType(op.beparams, constants.BES_PARAMETER_TYPES)
+ return cluster.SimpleFillBE(op.beparams)
+
+
+ def _ComputeNics(op, cluster, default_ip, cfg, ec_id):
+ """Computes the nics.
+
+ @param op: The instance opcode
+ @param cluster: Cluster configuration object
+ @param default_ip: The default ip to assign
+ @param cfg: An instance of the configuration object
+ @param ec_id: Execution context ID
+
+ @returns: The build up nics
+
+ """
+ nics = []
+ for nic in op.nics:
+ nic_mode_req = nic.get(constants.INIC_MODE, None)
+ nic_mode = nic_mode_req
+ if nic_mode is None or nic_mode == constants.VALUE_AUTO:
+ nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
+
+ net = nic.get(constants.INIC_NETWORK, None)
+ link = nic.get(constants.NIC_LINK, None)
+ ip = nic.get(constants.INIC_IP, None)
+
+ if net is None or net.lower() == constants.VALUE_NONE:
+ net = None
+ else:
+ if nic_mode_req is not None or link is not None:
+ raise errors.OpPrereqError("If network is given, no mode or link"
+ " is allowed to be passed",
+ errors.ECODE_INVAL)
+
+ # ip validity checks
+ if ip is None or ip.lower() == constants.VALUE_NONE:
+ nic_ip = None
+ elif ip.lower() == constants.VALUE_AUTO:
+ if not op.name_check:
+ raise errors.OpPrereqError("IP address set to auto but name checks"
+ " have been skipped",
+ errors.ECODE_INVAL)
+ nic_ip = default_ip
+ else:
+ # We defer pool operations until later, so that the iallocator has
+ # filled in the instance's node(s) dimara
+ if ip.lower() == constants.NIC_IP_POOL:
+ if net is None:
+ raise errors.OpPrereqError("if ip=pool, parameter network"
+ " must be passed too",
+ errors.ECODE_INVAL)
+
+ elif not netutils.IPAddress.IsValid(ip):
+ raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
+ errors.ECODE_INVAL)
+
+ nic_ip = ip
+
+ # TODO: check the ip address for uniqueness
+ if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
+ raise errors.OpPrereqError("Routed nic mode requires an ip address",
+ errors.ECODE_INVAL)
+
+ # MAC address verification
+ mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
+ if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
+ mac = utils.NormalizeAndValidateMac(mac)
+
+ try:
+ # TODO: We need to factor this out
+ cfg.ReserveMAC(mac, ec_id)
+ except errors.ReservationError:
+ raise errors.OpPrereqError("MAC address %s already in use"
+ " in cluster" % mac,
+ errors.ECODE_NOTUNIQUE)
+
+ # Build nic parameters
+ nicparams = {}
+ if nic_mode_req:
+ nicparams[constants.NIC_MODE] = nic_mode
+ if link:
+ nicparams[constants.NIC_LINK] = link
+
+ check_params = cluster.SimpleFillNIC(nicparams)
+ objects.NIC.CheckParameterSyntax(check_params)
+ net_uuid = cfg.LookupNetwork(net)
+ name = nic.get(constants.INIC_NAME, None)
+ if name is not None and name.lower() == constants.VALUE_NONE:
+ name = None
+ nic_obj = objects.NIC(mac=mac, ip=nic_ip, name=name,
+ network=net_uuid, nicparams=nicparams)
+ nic_obj.uuid = cfg.GenerateUniqueID(ec_id)
+ nics.append(nic_obj)
+
+ return nics
+
+
+ def _CheckForConflictingIp(lu, ip, node):
+ """In case of conflicting IP address raise error.
+
+ @type ip: string
+ @param ip: IP address
+ @type node: string
+ @param node: node name
+
+ """
+ (conf_net, _) = lu.cfg.CheckIPInNodeGroup(ip, node)
+ if conf_net is not None:
+ raise errors.OpPrereqError(("The requested IP address (%s) belongs to"
+ " network %s, but the target NIC does not." %
+ (ip, conf_net)),
+ errors.ECODE_STATE)
+
+ return (None, None)
+
+
+ def _ComputeIPolicyInstanceSpecViolation(
+ ipolicy, instance_spec, disk_template,
+ _compute_fn=ComputeIPolicySpecViolation):
+ """Compute if instance specs meets the specs of ipolicy.
+
+ @type ipolicy: dict
+ @param ipolicy: The ipolicy to verify against
+ @param instance_spec: dict
+ @param instance_spec: The instance spec to verify
+ @type disk_template: string
+ @param disk_template: the disk template of the instance
+ @param _compute_fn: The function to verify ipolicy (unittest only)
+ @see: L{ComputeIPolicySpecViolation}
+
+ """
+ mem_size = instance_spec.get(constants.ISPEC_MEM_SIZE, None)
+ cpu_count = instance_spec.get(constants.ISPEC_CPU_COUNT, None)
+ disk_count = instance_spec.get(constants.ISPEC_DISK_COUNT, 0)
+ disk_sizes = instance_spec.get(constants.ISPEC_DISK_SIZE, [])
+ nic_count = instance_spec.get(constants.ISPEC_NIC_COUNT, 0)
+ spindle_use = instance_spec.get(constants.ISPEC_SPINDLE_USE, None)
+
+ return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
+ disk_sizes, spindle_use, disk_template)
+
+
+ def _CheckOSVariant(os_obj, name):
+ """Check whether an OS name conforms to the os variants specification.
+
+ @type os_obj: L{objects.OS}
+ @param os_obj: OS object to check
+ @type name: string
+ @param name: OS name passed by the user, to check for validity
+
+ """
+ variant = objects.OS.GetVariant(name)
+ if not os_obj.supported_variants:
+ if variant:
+ raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
+ " passed)" % (os_obj.name, variant),
+ errors.ECODE_INVAL)
+ return
+ if not variant:
+ raise errors.OpPrereqError("OS name must include a variant",
+ errors.ECODE_INVAL)
+
+ if variant not in os_obj.supported_variants:
+ raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
+
+
+ class LUInstanceCreate(LogicalUnit):
+ """Create an instance.
+
+ """
+ HPATH = "instance-add"
+ HTYPE = constants.HTYPE_INSTANCE
+ REQ_BGL = False
+
+ def CheckArguments(self):
+ """Check arguments.
+
+ """
+ # do not require name_check to ease forward/backward compatibility
+ # for tools
+ if self.op.no_install and self.op.start:
+ self.LogInfo("No-installation mode selected, disabling startup")
+ self.op.start = False
+ # validate/normalize the instance name
+ self.op.instance_name = \
+ netutils.Hostname.GetNormalizedName(self.op.instance_name)
+
+ if self.op.ip_check and not self.op.name_check:
+ # TODO: make the ip check more flexible and not depend on the name check
+ raise errors.OpPrereqError("Cannot do IP address check without a name"
+ " check", errors.ECODE_INVAL)
+
+ # check nics' parameter names
+ for nic in self.op.nics:
+ utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
+ # check that NIC's parameters names are unique and valid
+ utils.ValidateDeviceNames("NIC", self.op.nics)
+
+ # check that disk's names are unique and valid
+ utils.ValidateDeviceNames("disk", self.op.disks)
+
+ cluster = self.cfg.GetClusterInfo()
+ if not self.op.disk_template in cluster.enabled_disk_templates:
+ raise errors.OpPrereqError("Cannot create an instance with disk template"
+ " '%s', because it is not enabled in the"
+ " cluster. Enabled disk templates are: %s." %
+ (self.op.disk_template,
+ ",".join(cluster.enabled_disk_templates)))
+
+ # check disks. parameter names and consistent adopt/no-adopt strategy
+ has_adopt = has_no_adopt = False
+ for disk in self.op.disks:
+ if self.op.disk_template != constants.DT_EXT:
+ utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
+ if constants.IDISK_ADOPT in disk:
+ has_adopt = True
+ else:
+ has_no_adopt = True
+ if has_adopt and has_no_adopt:
+ raise errors.OpPrereqError("Either all disks are adopted or none is",
+ errors.ECODE_INVAL)
+ if has_adopt:
+ if self.op.disk_template not in constants.DTS_MAY_ADOPT:
+ raise errors.OpPrereqError("Disk adoption is not supported for the"
+ " '%s' disk template" %
+ self.op.disk_template,
+ errors.ECODE_INVAL)
+ if self.op.iallocator is not None:
+ raise errors.OpPrereqError("Disk adoption not allowed with an"
+ " iallocator script", errors.ECODE_INVAL)
+ if self.op.mode == constants.INSTANCE_IMPORT:
+ raise errors.OpPrereqError("Disk adoption not allowed for"
+ " instance import", errors.ECODE_INVAL)
+ else:
+ if self.op.disk_template in constants.DTS_MUST_ADOPT:
+ raise errors.OpPrereqError("Disk template %s requires disk adoption,"
+ " but no 'adopt' parameter given" %
+ self.op.disk_template,
+ errors.ECODE_INVAL)
+
+ self.adopt_disks = has_adopt
+
+ # instance name verification
+ if self.op.name_check:
+ self.hostname1 = _CheckHostnameSane(self, self.op.instance_name)
+ self.op.instance_name = self.hostname1.name
+ # used in CheckPrereq for ip ping check
+ self.check_ip = self.hostname1.ip
+ else:
+ self.check_ip = None
+
+ # file storage checks
+ if (self.op.file_driver and
+ not self.op.file_driver in constants.FILE_DRIVER):
+ raise errors.OpPrereqError("Invalid file driver name '%s'" %
+ self.op.file_driver, errors.ECODE_INVAL)
+
+ if self.op.disk_template == constants.DT_FILE:
+ opcodes.RequireFileStorage()
+ elif self.op.disk_template == constants.DT_SHARED_FILE:
+ opcodes.RequireSharedFileStorage()
+
+ ### Node/iallocator related checks
+ CheckIAllocatorOrNode(self, "iallocator", "pnode")
+
+ if self.op.pnode is not None:
+ if self.op.disk_template in constants.DTS_INT_MIRROR:
+ if self.op.snode is None:
+ raise errors.OpPrereqError("The networked disk templates need"
+ " a mirror node", errors.ECODE_INVAL)
+ elif self.op.snode:
+ self.LogWarning("Secondary node will be ignored on non-mirrored disk"
+ " template")
+ self.op.snode = None
+
+ _CheckOpportunisticLocking(self.op)
+
+ self._cds = GetClusterDomainSecret()
+
+ if self.op.mode == constants.INSTANCE_IMPORT:
+ # On import force_variant must be True, because if we forced it at
+ # initial install, our only chance when importing it back is that it
+ # works again!
+ self.op.force_variant = True
+
+ if self.op.no_install:
+ self.LogInfo("No-installation mode has no effect during import")
+
+ elif self.op.mode == constants.INSTANCE_CREATE:
+ if self.op.os_type is None:
+ raise errors.OpPrereqError("No guest OS specified",
+ errors.ECODE_INVAL)
+ if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
+ raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
+ " installation" % self.op.os_type,
+ errors.ECODE_STATE)
+ if self.op.disk_template is None:
+ raise errors.OpPrereqError("No disk template specified",
+ errors.ECODE_INVAL)
+
+ elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
+ # Check handshake to ensure both clusters have the same domain secret
+ src_handshake = self.op.source_handshake
+ if not src_handshake:
+ raise errors.OpPrereqError("Missing source handshake",
+ errors.ECODE_INVAL)
+
+ errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
+ src_handshake)
+ if errmsg:
+ raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
+ errors.ECODE_INVAL)
+
+ # Load and check source CA
+ self.source_x509_ca_pem = self.op.source_x509_ca
+ if not self.source_x509_ca_pem:
+ raise errors.OpPrereqError("Missing source X509 CA",
+ errors.ECODE_INVAL)
+
+ try:
+ (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
+ self._cds)
+ except OpenSSL.crypto.Error, err:
+ raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
+ (err, ), errors.ECODE_INVAL)
+
+ (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
+ if errcode is not None:
+ raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
+ errors.ECODE_INVAL)
+
+ self.source_x509_ca = cert
+
+ src_instance_name = self.op.source_instance_name
+ if not src_instance_name:
+ raise errors.OpPrereqError("Missing source instance name",
+ errors.ECODE_INVAL)
+
+ self.source_instance_name = \
+ netutils.GetHostname(name=src_instance_name).name
+
+ else:
+ raise errors.OpPrereqError("Invalid instance creation mode %r" %
+ self.op.mode, errors.ECODE_INVAL)
+
+ def ExpandNames(self):
+ """ExpandNames for CreateInstance.
+
+ Figure out the right locks for instance creation.
+
+ """
+ self.needed_locks = {}
+
+ instance_name = self.op.instance_name
+ # this is just a preventive check, but someone might still add this
+ # instance in the meantime, and creation will fail at lock-add time
+ if instance_name in self.cfg.GetInstanceList():
+ raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
+ instance_name, errors.ECODE_EXISTS)
+
+ self.add_locks[locking.LEVEL_INSTANCE] = instance_name
+
+ if self.op.iallocator:
+ # TODO: Find a solution to not lock all nodes in the cluster, e.g. by
+ # specifying a group on instance creation and then selecting nodes from
+ # that group
+ self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
+ self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
+
+ if self.op.opportunistic_locking:
+ self.opportunistic_locks[locking.LEVEL_NODE] = True
+ self.opportunistic_locks[locking.LEVEL_NODE_RES] = True
+ else:
+ self.op.pnode = ExpandNodeName(self.cfg, self.op.pnode)
+ nodelist = [self.op.pnode]
+ if self.op.snode is not None:
+ self.op.snode = ExpandNodeName(self.cfg, self.op.snode)
+ nodelist.append(self.op.snode)
+ self.needed_locks[locking.LEVEL_NODE] = nodelist
+
+ # in case of import lock the source node too
+ if self.op.mode == constants.INSTANCE_IMPORT:
+ src_node = self.op.src_node
+ src_path = self.op.src_path
+
+ if src_path is None:
+ self.op.src_path = src_path = self.op.instance_name
+
+ if src_node is None:
+ self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
+ self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
+ self.op.src_node = None
+ if os.path.isabs(src_path):
+ raise errors.OpPrereqError("Importing an instance from a path"
+ " requires a source node option",
+ errors.ECODE_INVAL)
+ else:
+ self.op.src_node = src_node = ExpandNodeName(self.cfg, src_node)
+ if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
+ self.needed_locks[locking.LEVEL_NODE].append(src_node)
+ if not os.path.isabs(src_path):
+ self.op.src_path = src_path = \
+ utils.PathJoin(pathutils.EXPORT_DIR, src_path)
+
+ self.needed_locks[locking.LEVEL_NODE_RES] = \
+ CopyLockList(self.needed_locks[locking.LEVEL_NODE])
+
+ def _RunAllocator(self):
+ """Run the allocator based on input opcode.
+
+ """
+ if self.op.opportunistic_locking:
+ # Only consider nodes for which a lock is held
+ node_whitelist = list(self.owned_locks(locking.LEVEL_NODE))
+ else:
+ node_whitelist = None
+
+ #TODO Export network to iallocator so that it chooses a pnode
+ # in a nodegroup that has the desired network connected to
+ req = _CreateInstanceAllocRequest(self.op, self.disks,
+ self.nics, self.be_full,
+ node_whitelist)
+ ial = iallocator.IAllocator(self.cfg, self.rpc, req)
+
+ ial.Run(self.op.iallocator)
+
+ if not ial.success:
+ # When opportunistic locks are used only a temporary failure is generated
+ if self.op.opportunistic_locking:
+ ecode = errors.ECODE_TEMP_NORES
+ else:
+ ecode = errors.ECODE_NORES
+
+ raise errors.OpPrereqError("Can't compute nodes using"
+ " iallocator '%s': %s" %
+ (self.op.iallocator, ial.info),
+ ecode)
+
+ self.op.pnode = ial.result[0]
+ self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
+ self.op.instance_name, self.op.iallocator,
+ utils.CommaJoin(ial.result))
+
+ assert req.RequiredNodes() in (1, 2), "Wrong node count from iallocator"
+
+ if req.RequiredNodes() == 2:
+ self.op.snode = ial.result[1]
+
+ def BuildHooksEnv(self):
+ """Build hooks env.
+
+ This runs on master, primary and secondary nodes of the instance.
+
+ """
+ env = {
+ "ADD_MODE": self.op.mode,
+ }
+ if self.op.mode == constants.INSTANCE_IMPORT:
+ env["SRC_NODE"] = self.op.src_node
+ env["SRC_PATH"] = self.op.src_path
+ env["SRC_IMAGES"] = self.src_images
+
+ env.update(BuildInstanceHookEnv(
+ name=self.op.instance_name,
+ primary_node=self.op.pnode,
+ secondary_nodes=self.secondaries,
+ status=self.op.start,
+ os_type=self.op.os_type,
+ minmem=self.be_full[constants.BE_MINMEM],
+ maxmem=self.be_full[constants.BE_MAXMEM],
+ vcpus=self.be_full[constants.BE_VCPUS],
+ nics=NICListToTuple(self, self.nics),
+ disk_template=self.op.disk_template,
+ disks=[(d[constants.IDISK_NAME], d[constants.IDISK_SIZE],
+ d[constants.IDISK_MODE]) for d in self.disks],
+ bep=self.be_full,
+ hvp=self.hv_full,
+ hypervisor_name=self.op.hypervisor,
+ tags=self.op.tags,
+ ))
+
+ return env
+
+ def BuildHooksNodes(self):
+ """Build hooks nodes.
+
+ """
+ nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
+ return nl, nl
+
+ def _ReadExportInfo(self):
+ """Reads the export information from disk.
+
+ It will override the opcode source node and path with the actual
+ information, if these two were not specified before.
+
+ @return: the export information
+
+ """
+ assert self.op.mode == constants.INSTANCE_IMPORT
+
+ src_node = self.op.src_node
+ src_path = self.op.src_path
+
+ if src_node is None:
+ locked_nodes = self.owned_locks(locking.LEVEL_NODE)
+ exp_list = self.rpc.call_export_list(locked_nodes)
+ found = False
+ for node in exp_list:
+ if exp_list[node].fail_msg:
+ continue
+ if src_path in exp_list[node].payload:
+ found = True
+ self.op.src_node = src_node = node
+ self.op.src_path = src_path = utils.PathJoin(pathutils.EXPORT_DIR,
+ src_path)
+ break
+ if not found:
+ raise errors.OpPrereqError("No export found for relative path %s" %
+ src_path, errors.ECODE_INVAL)
+
+ CheckNodeOnline(self, src_node)
+ result = self.rpc.call_export_info(src_node, src_path)
+ result.Raise("No export or invalid export found in dir %s" % src_path)
+
+ export_info = objects.SerializableConfigParser.Loads(str(result.payload))
+ if not export_info.has_section(constants.INISECT_EXP):
+ raise errors.ProgrammerError("Corrupted export config",
+ errors.ECODE_ENVIRON)
+
+ ei_version = export_info.get(constants.INISECT_EXP, "version")
+ if (int(ei_version) != constants.EXPORT_VERSION):
+ raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
+ (ei_version, constants.EXPORT_VERSION),
+ errors.ECODE_ENVIRON)
+ return export_info
+
+ def _ReadExportParams(self, einfo):
+ """Use export parameters as defaults.
+
+ In case the opcode doesn't specify (as in override) some instance
+ parameters, then try to use them from the export information, if
+ that declares them.
+
+ """
+ self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
+
+ if self.op.disk_template is None:
+ if einfo.has_option(constants.INISECT_INS, "disk_template"):
+ self.op.disk_template = einfo.get(constants.INISECT_INS,
+ "disk_template")
+ if self.op.disk_template not in constants.DISK_TEMPLATES:
+ raise errors.OpPrereqError("Disk template specified in configuration"
+ " file is not one of the allowed values:"
+ " %s" %
+ " ".join(constants.DISK_TEMPLATES),
+ errors.ECODE_INVAL)
+ else:
+ raise errors.OpPrereqError("No disk template specified and the export"
+ " is missing the disk_template information",
+ errors.ECODE_INVAL)
+
+ if not self.op.disks:
+ disks = []
+ # TODO: import the disk iv_name too
+ for idx in range(constants.MAX_DISKS):
+ if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
+ disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
+ disks.append({constants.IDISK_SIZE: disk_sz})
+ self.op.disks = disks
+ if not disks and self.op.disk_template != constants.DT_DISKLESS:
+ raise errors.OpPrereqError("No disk info specified and the export"
+ " is missing the disk information",
+ errors.ECODE_INVAL)
+
+ if not self.op.nics:
+ nics = []
+ for idx in range(constants.MAX_NICS):
+ if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
+ ndict = {}
+ for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
+ v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
+ ndict[name] = v
+ nics.append(ndict)
+ else:
+ break
+ self.op.nics = nics
+
+ if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
+ self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
+
+ if (self.op.hypervisor is None and
+ einfo.has_option(constants.INISECT_INS, "hypervisor")):
+ self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
+
+ if einfo.has_section(constants.INISECT_HYP):
+ # use the export parameters but do not override the ones
+ # specified by the user
+ for name, value in einfo.items(constants.INISECT_HYP):
+ if name not in self.op.hvparams:
+ self.op.hvparams[name] = value
+
+ if einfo.has_section(constants.INISECT_BEP):
+ # use the parameters, without overriding
+ for name, value in einfo.items(constants.INISECT_BEP):
+ if name not in self.op.beparams:
+ self.op.beparams[name] = value
+ # Compatibility for the old "memory" be param
+ if name == constants.BE_MEMORY:
+ if constants.BE_MAXMEM not in self.op.beparams:
+ self.op.beparams[constants.BE_MAXMEM] = value
+ if constants.BE_MINMEM not in self.op.beparams:
+ self.op.beparams[constants.BE_MINMEM] = value
+ else:
+ # try to read the parameters old style, from the main section
+ for name in constants.BES_PARAMETERS:
+ if (name not in self.op.beparams and
+ einfo.has_option(constants.INISECT_INS, name)):
+ self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
+
+ if einfo.has_section(constants.INISECT_OSP):
+ # use the parameters, without overriding
+ for name, value in einfo.items(constants.INISECT_OSP):
+ if name not in self.op.osparams:
+ self.op.osparams[name] = value
+
+ def _RevertToDefaults(self, cluster):
+ """Revert the instance parameters to the default values.
+
+ """
+ # hvparams
+ hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
+ for name in self.op.hvparams.keys():
+ if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
+ del self.op.hvparams[name]
+ # beparams
+ be_defs = cluster.SimpleFillBE({})
+ for name in self.op.beparams.keys():
+ if name in be_defs and be_defs[name] == self.op.beparams[name]:
+ del self.op.beparams[name]
+ # nic params
+ nic_defs = cluster.SimpleFillNIC({})
+ for nic in self.op.nics:
+ for name in constants.NICS_PARAMETERS:
+ if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
+ del nic[name]
+ # osparams
+ os_defs = cluster.SimpleFillOS(self.op.os_type, {})
+ for name in self.op.osparams.keys():
+ if name in os_defs and os_defs[name] == self.op.osparams[name]:
+ del self.op.osparams[name]
+
+ def _CalculateFileStorageDir(self):
+ """Calculate final instance file storage dir.
+
+ """
+ # file storage dir calculation/check
+ self.instance_file_storage_dir = None
+ if self.op.disk_template in constants.DTS_FILEBASED:
+ # build the full file storage dir path
+ joinargs = []
+
+ if self.op.disk_template == constants.DT_SHARED_FILE:
+ get_fsd_fn = self.cfg.GetSharedFileStorageDir
+ else:
+ get_fsd_fn = self.cfg.GetFileStorageDir
+
+ cfg_storagedir = get_fsd_fn()
+ if not cfg_storagedir:
+ raise errors.OpPrereqError("Cluster file storage dir not defined",
+ errors.ECODE_STATE)
+ joinargs.append(cfg_storagedir)
+
+ if self.op.file_storage_dir is not None:
+ joinargs.append(self.op.file_storage_dir)
+
+ joinargs.append(self.op.instance_name)
+
+ # pylint: disable=W0142
+ self.instance_file_storage_dir = utils.PathJoin(*joinargs)
+
+ def CheckPrereq(self): # pylint: disable=R0914
+ """Check prerequisites.
+
+ """
+ self._CalculateFileStorageDir()
+
+ if self.op.mode == constants.INSTANCE_IMPORT:
+ export_info = self._ReadExportInfo()
+ self._ReadExportParams(export_info)
+ self._old_instance_name = export_info.get(constants.INISECT_INS, "name")
+ else:
+ self._old_instance_name = None
+
+ if (not self.cfg.GetVGName() and
+ self.op.disk_template not in constants.DTS_NOT_LVM):
+ raise errors.OpPrereqError("Cluster does not support lvm-based"
+ " instances", errors.ECODE_STATE)
+
+ if (self.op.hypervisor is None or
+ self.op.hypervisor == constants.VALUE_AUTO):
+ self.op.hypervisor = self.cfg.GetHypervisorType()
+
+ cluster = self.cfg.GetClusterInfo()
+ enabled_hvs = cluster.enabled_hypervisors
+ if self.op.hypervisor not in enabled_hvs:
+ raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
+ " cluster (%s)" %
+ (self.op.hypervisor, ",".join(enabled_hvs)),
+ errors.ECODE_STATE)
+
+ # Check tag validity
+ for tag in self.op.tags:
+ objects.TaggableObject.ValidateTag(tag)
+
+ # check hypervisor parameter syntax (locally)
+ utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
+ filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
+ self.op.hvparams)
+ hv_type = hypervisor.GetHypervisorClass(self.op.hypervisor)
+ hv_type.CheckParameterSyntax(filled_hvp)
+ self.hv_full = filled_hvp
+ # check that we don't specify global parameters on an instance
+ CheckParamsNotGlobal(self.op.hvparams, constants.HVC_GLOBALS, "hypervisor",
+ "instance", "cluster")
+
+ # fill and remember the beparams dict
+ self.be_full = _ComputeFullBeParams(self.op, cluster)
+
+ # build os parameters
+ self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
+
+ # now that hvp/bep are in final format, let's reset to defaults,
+ # if told to do so
+ if self.op.identify_defaults:
+ self._RevertToDefaults(cluster)
+
+ # NIC buildup
+ self.nics = _ComputeNics(self.op, cluster, self.check_ip, self.cfg,
+ self.proc.GetECId())
+
+ # disk checks/pre-build
+ default_vg = self.cfg.GetVGName()
+ self.disks = ComputeDisks(self.op, default_vg)
+
+ if self.op.mode == constants.INSTANCE_IMPORT:
+ disk_images = []
+ for idx in range(len(self.disks)):
+ option = "disk%d_dump" % idx
+ if export_info.has_option(constants.INISECT_INS, option):
+ # FIXME: are the old os-es, disk sizes, etc. useful?
+ export_name = export_info.get(constants.INISECT_INS, option)
+ image = utils.PathJoin(self.op.src_path, export_name)
+ disk_images.append(image)
+ else:
+ disk_images.append(False)
+
+ self.src_images = disk_images
+
+ if self.op.instance_name == self._old_instance_name:
+ for idx, nic in enumerate(self.nics):
+ if nic.mac == constants.VALUE_AUTO:
+ nic_mac_ini = "nic%d_mac" % idx
+ nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
+
+ # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
+
+ # ip ping checks (we use the same ip that was resolved in ExpandNames)
+ if self.op.ip_check:
+ if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
+ raise errors.OpPrereqError("IP %s of instance %s already in use" %
+ (self.check_ip, self.op.instance_name),
+ errors.ECODE_NOTUNIQUE)
+
+ #### mac address generation
+ # By generating here the mac address both the allocator and the hooks get
+ # the real final mac address rather than the 'auto' or 'generate' value.
+ # There is a race condition between the generation and the instance object
+ # creation, which means that we know the mac is valid now, but we're not
+ # sure it will be when we actually add the instance. If things go bad
+ # adding the instance will abort because of a duplicate mac, and the
+ # creation job will fail.
+ for nic in self.nics:
+ if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
+ nic.mac = self.cfg.GenerateMAC(nic.network, self.proc.GetECId())
+
+ #### allocator run
+
+ if self.op.iallocator is not None:
+ self._RunAllocator()
+
+ # Release all unneeded node locks
+ keep_locks = filter(None, [self.op.pnode, self.op.snode, self.op.src_node])
+ ReleaseLocks(self, locking.LEVEL_NODE, keep=keep_locks)
+ ReleaseLocks(self, locking.LEVEL_NODE_RES, keep=keep_locks)
+ ReleaseLocks(self, locking.LEVEL_NODE_ALLOC)
+
+ assert (self.owned_locks(locking.LEVEL_NODE) ==
+ self.owned_locks(locking.LEVEL_NODE_RES)), \
+ "Node locks differ from node resource locks"
+
+ #### node related checks
+
+ # check primary node
+ self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
+ assert self.pnode is not None, \
+ "Cannot retrieve locked node %s" % self.op.pnode
+ if pnode.offline:
+ raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
+ pnode.name, errors.ECODE_STATE)
+ if pnode.drained:
+ raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
+ pnode.name, errors.ECODE_STATE)
+ if not pnode.vm_capable:
+ raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
+ " '%s'" % pnode.name, errors.ECODE_STATE)
+
+ self.secondaries = []
+
+ # Fill in any IPs from IP pools. This must happen here, because we need to
+ # know the nic's primary node, as specified by the iallocator
+ for idx, nic in enumerate(self.nics):
+ net_uuid = nic.network
+ if net_uuid is not None:
+ nobj = self.cfg.GetNetwork(net_uuid)
+ netparams = self.cfg.GetGroupNetParams(net_uuid, self.pnode.name)
+ if netparams is None:
+ raise errors.OpPrereqError("No netparams found for network"
+ " %s. Propably not connected to"
+ " node's %s nodegroup" %
+ (nobj.name, self.pnode.name),
+ errors.ECODE_INVAL)
+ self.LogInfo("NIC/%d inherits netparams %s" %
+ (idx, netparams.values()))
+ nic.nicparams = dict(netparams)
+ if nic.ip is not None:
+ if nic.ip.lower() == constants.NIC_IP_POOL:
+ try:
+ nic.ip = self.cfg.GenerateIp(net_uuid, self.proc.GetECId())
+ except errors.ReservationError:
+ raise errors.OpPrereqError("Unable to get a free IP for NIC %d"
+ " from the address pool" % idx,
+ errors.ECODE_STATE)
+ self.LogInfo("Chose IP %s from network %s", nic.ip, nobj.name)
+ else:
+ try:
+ self.cfg.ReserveIp(net_uuid, nic.ip, self.proc.GetECId())
+ except errors.ReservationError:
+ raise errors.OpPrereqError("IP address %s already in use"
+ " or does not belong to network %s" %
+ (nic.ip, nobj.name),
+ errors.ECODE_NOTUNIQUE)
+
+ # net is None, ip None or given
+ elif self.op.conflicts_check:
+ _CheckForConflictingIp(self, nic.ip, self.pnode.name)
+
+ # mirror node verification
+ if self.op.disk_template in constants.DTS_INT_MIRROR:
+ if self.op.snode == pnode.name:
+ raise errors.OpPrereqError("The secondary node cannot be the"
+ " primary node", errors.ECODE_INVAL)
+ CheckNodeOnline(self, self.op.snode)
+ CheckNodeNotDrained(self, self.op.snode)
+ CheckNodeVmCapable(self, self.op.snode)
+ self.secondaries.append(self.op.snode)
+
+ snode = self.cfg.GetNodeInfo(self.op.snode)
+ if pnode.group != snode.group:
+ self.LogWarning("The primary and secondary nodes are in two"
+ " different node groups; the disk parameters"
+ " from the first disk's node group will be"
+ " used")
+
- if not self.op.disk_template in constants.DTS_EXCL_STORAGE:
- nodes = [pnode]
- if self.op.disk_template in constants.DTS_INT_MIRROR:
- nodes.append(snode)
- has_es = lambda n: IsExclusiveStorageEnabledNode(self.cfg, n)
- if compat.any(map(has_es, nodes)):
++ nodes = [pnode]
++ if self.op.disk_template in constants.DTS_INT_MIRROR:
++ nodes.append(snode)
++ has_es = lambda n: IsExclusiveStorageEnabledNode(self.cfg, n)
++ if compat.any(map(has_es, nodes)):
++ if not self.op.disk_template in constants.DTS_EXCL_STORAGE:
+ raise errors.OpPrereqError("Disk template %s not supported with"
+ " exclusive storage" % self.op.disk_template,
+ errors.ECODE_STATE)
+
+ nodenames = [pnode.name] + self.secondaries
+
+ if not self.adopt_disks:
+ if self.op.disk_template == constants.DT_RBD:
+ # _CheckRADOSFreeSpace() is just a placeholder.
+ # Any function that checks prerequisites can be placed here.
+ # Check if there is enough space on the RADOS cluster.
+ CheckRADOSFreeSpace()
+ elif self.op.disk_template == constants.DT_EXT:
+ # FIXME: Function that checks prereqs if needed
+ pass
+ else:
+ # Check lv size requirements, if not adopting
+ req_sizes = ComputeDiskSizePerVG(self.op.disk_template, self.disks)
+ CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
+
+ elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
+ all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
+ disk[constants.IDISK_ADOPT])
+ for disk in self.disks])
+ if len(all_lvs) != len(self.disks):
+ raise errors.OpPrereqError("Duplicate volume names given for adoption",
+ errors.ECODE_INVAL)
+ for lv_name in all_lvs:
+ try:
+ # FIXME: lv_name here is "vg/lv" need to ensure that other calls
+ # to ReserveLV uses the same syntax
+ self.cfg.ReserveLV(lv_name, self.proc.GetECId())
+ except errors.ReservationError:
+ raise errors.OpPrereqError("LV named %s used by another instance" %
+ lv_name, errors.ECODE_NOTUNIQUE)
+
+ vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
+ vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
+
+ node_lvs = self.rpc.call_lv_list([pnode.name],
+ vg_names.payload.keys())[pnode.name]
+ node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
+ node_lvs = node_lvs.payload
+
+ delta = all_lvs.difference(node_lvs.keys())
+ if delta:
+ raise errors.OpPrereqError("Missing logical volume(s): %s" %
+ utils.CommaJoin(delta),
+ errors.ECODE_INVAL)
+ online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
+ if online_lvs:
+ raise errors.OpPrereqError("Online logical volumes found, cannot"
+ " adopt: %s" % utils.CommaJoin(online_lvs),
+ errors.ECODE_STATE)
+ # update the size of disk based on what is found
+ for dsk in self.disks:
+ dsk[constants.IDISK_SIZE] = \
+ int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
+ dsk[constants.IDISK_ADOPT])][0]))
+
+ elif self.op.disk_template == constants.DT_BLOCK:
+ # Normalize and de-duplicate device paths
+ all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
+ for disk in self.disks])
+ if len(all_disks) != len(self.disks):
+ raise errors.OpPrereqError("Duplicate disk names given for adoption",
+ errors.ECODE_INVAL)
+ baddisks = [d for d in all_disks
+ if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
+ if baddisks:
+ raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
+ " cannot be adopted" %
+ (utils.CommaJoin(baddisks),
+ constants.ADOPTABLE_BLOCKDEV_ROOT),
+ errors.ECODE_INVAL)
+
+ node_disks = self.rpc.call_bdev_sizes([pnode.name],
+ list(all_disks))[pnode.name]
+ node_disks.Raise("Cannot get block device information from node %s" %
+ pnode.name)
+ node_disks = node_disks.payload
+ delta = all_disks.difference(node_disks.keys())
+ if delta:
+ raise errors.OpPrereqError("Missing block device(s): %s" %
+ utils.CommaJoin(delta),
+ errors.ECODE_INVAL)
+ for dsk in self.disks:
+ dsk[constants.IDISK_SIZE] = \
+ int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
+
+ # Verify instance specs
+ spindle_use = self.be_full.get(constants.BE_SPINDLE_USE, None)
+ ispec = {
+ constants.ISPEC_MEM_SIZE: self.be_full.get(constants.BE_MAXMEM, None),
+ constants.ISPEC_CPU_COUNT: self.be_full.get(constants.BE_VCPUS, None),
+ constants.ISPEC_DISK_COUNT: len(self.disks),
+ constants.ISPEC_DISK_SIZE: [disk[constants.IDISK_SIZE]
+ for disk in self.disks],
+ constants.ISPEC_NIC_COUNT: len(self.nics),
+ constants.ISPEC_SPINDLE_USE: spindle_use,
+ }
+
+ group_info = self.cfg.GetNodeGroup(pnode.group)
+ ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group_info)
+ res = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec,
+ self.op.disk_template)
+ if not self.op.ignore_ipolicy and res:
+ msg = ("Instance allocation to group %s (%s) violates policy: %s" %
+ (pnode.group, group_info.name, utils.CommaJoin(res)))
+ raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
+
+ CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
+
+ CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
+ # check OS parameters (remotely)
+ CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
+
+ CheckNicsBridgesExist(self, self.nics, self.pnode.name)
+
+ #TODO: _CheckExtParams (remotely)
+ # Check parameters for extstorage
+
+ # memory check on primary node
+ #TODO(dynmem): use MINMEM for checking
+ if self.op.start:
+ CheckNodeFreeMemory(self, self.pnode.name,
+ "creating instance %s" % self.op.instance_name,
+ self.be_full[constants.BE_MAXMEM],
+ self.op.hypervisor)
+
+ self.dry_run_result = list(nodenames)
+
+ def Exec(self, feedback_fn):
+ """Create and add the instance to the cluster.
+
+ """
+ instance = self.op.instance_name
+ pnode_name = self.pnode.name
+
+ assert not (self.owned_locks(locking.LEVEL_NODE_RES) -
+ self.owned_locks(locking.LEVEL_NODE)), \
+ "Node locks differ from node resource locks"
+ assert not self.glm.is_owned(locking.LEVEL_NODE_ALLOC)
+
+ ht_kind = self.op.hypervisor
+ if ht_kind in constants.HTS_REQ_PORT:
+ network_port = self.cfg.AllocatePort()
+ else:
+ network_port = None
+
+ # This is ugly but we got a chicken-egg problem here
+ # We can only take the group disk parameters, as the instance
+ # has no disks yet (we are generating them right here).
+ node = self.cfg.GetNodeInfo(pnode_name)
+ nodegroup = self.cfg.GetNodeGroup(node.group)
+ disks = GenerateDiskTemplate(self,
+ self.op.disk_template,
+ instance, pnode_name,
+ self.secondaries,
+ self.disks,
+ self.instance_file_storage_dir,
+ self.op.file_driver,
+ 0,
+ feedback_fn,
+ self.cfg.GetGroupDiskParams(nodegroup))
+
+ iobj = objects.Instance(name=instance, os=self.op.os_type,
+ primary_node=pnode_name,
+ nics=self.nics, disks=disks,
+ disk_template=self.op.disk_template,
+ admin_state=constants.ADMINST_DOWN,
+ network_port=network_port,
+ beparams=self.op.beparams,
+ hvparams=self.op.hvparams,
+ hypervisor=self.op.hypervisor,
+ osparams=self.op.osparams,
+ )
+
+ if self.op.tags:
+ for tag in self.op.tags:
+ iobj.AddTag(tag)
+
+ if self.adopt_disks:
+ if self.op.disk_template == constants.DT_PLAIN:
+ # rename LVs to the newly-generated names; we need to construct
+ # 'fake' LV disks with the old data, plus the new unique_id
+ tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
+ rename_to = []
+ for t_dsk, a_dsk in zip(tmp_disks, self.disks):
+ rename_to.append(t_dsk.logical_id)
+ t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
+ self.cfg.SetDiskID(t_dsk, pnode_name)
+ result = self.rpc.call_blockdev_rename(pnode_name,
+ zip(tmp_disks, rename_to))
+ result.Raise("Failed to rename adoped LVs")
+ else:
+ feedback_fn("* creating instance disks...")
+ try:
+ CreateDisks(self, iobj)
+ except errors.OpExecError:
+ self.LogWarning("Device creation failed")
+ self.cfg.ReleaseDRBDMinors(instance)
+ raise
+
+ feedback_fn("adding instance %s to cluster config" % instance)
+
+ self.cfg.AddInstance(iobj, self.proc.GetECId())
+
+ # Declare that we don't want to remove the instance lock anymore, as we've
+ # added the instance to the config
+ del self.remove_locks[locking.LEVEL_INSTANCE]
+
+ if self.op.mode == constants.INSTANCE_IMPORT:
+ # Release unused nodes
+ ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
+ else:
+ # Release all nodes
+ ReleaseLocks(self, locking.LEVEL_NODE)
+
+ disk_abort = False
+ if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
+ feedback_fn("* wiping instance disks...")
+ try:
+ WipeDisks(self, iobj)
+ except errors.OpExecError, err:
+ logging.exception("Wiping disks failed")
+ self.LogWarning("Wiping instance disks failed (%s)", err)
+ disk_abort = True
+
+ if disk_abort:
+ # Something is already wrong with the disks, don't do anything else
+ pass
+ elif self.op.wait_for_sync:
+ disk_abort = not WaitForSync(self, iobj)
+ elif iobj.disk_template in constants.DTS_INT_MIRROR:
+ # make sure the disks are not degraded (still sync-ing is ok)
+ feedback_fn("* checking mirrors status")
+ disk_abort = not WaitForSync(self, iobj, oneshot=True)
+ else:
+ disk_abort = False
+
+ if disk_abort:
+ RemoveDisks(self, iobj)
+ self.cfg.RemoveInstance(iobj.name)
+ # Make sure the instance lock gets removed
+ self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
+ raise errors.OpExecError("There are some degraded disks for"
+ " this instance")
+
+ # Release all node resource locks
+ ReleaseLocks(self, locking.LEVEL_NODE_RES)
+
+ if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
+ # we need to set the disks ID to the primary node, since the
+ # preceding code might or might have not done it, depending on
+ # disk template and other options
+ for disk in iobj.disks:
+ self.cfg.SetDiskID(disk, pnode_name)
+ if self.op.mode == constants.INSTANCE_CREATE:
+ if not self.op.no_install:
+ pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
+ not self.op.wait_for_sync)
+ if pause_sync:
+ feedback_fn("* pausing disk sync to install instance OS")
+ result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
+ (iobj.disks,
+ iobj), True)
+ for idx, success in enumerate(result.payload):
+ if not success:
+ logging.warn("pause-sync of instance %s for disk %d failed",
+ instance, idx)
+
+ feedback_fn("* running the instance OS create scripts...")
+ # FIXME: pass debug option from opcode to backend
+ os_add_result = \
+ self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
+ self.op.debug_level)
+ if pause_sync:
+ feedback_fn("* resuming disk sync")
+ result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
+ (iobj.disks,
+ iobj), False)
+ for idx, success in enumerate(result.payload):
+ if not success:
+ logging.warn("resume-sync of instance %s for disk %d failed",
+ instance, idx)
+
+ os_add_result.Raise("Could not add os for instance %s"
+ " on node %s" % (instance, pnode_name))
+
+ else:
+ if self.op.mode == constants.INSTANCE_IMPORT:
+ feedback_fn("* running the instance OS import scripts...")
+
+ transfers = []
+
+ for idx, image in enumerate(self.src_images):
+ if not image:
+ continue
+
+ # FIXME: pass debug option from opcode to backend
+ dt = masterd.instance.DiskTransfer("disk/%s" % idx,
+ constants.IEIO_FILE, (image, ),
+ constants.IEIO_SCRIPT,
+ (iobj.disks[idx], idx),
+ None)
+ transfers.append(dt)
+
+ import_result = \
+ masterd.instance.TransferInstanceData(self, feedback_fn,
+ self.op.src_node, pnode_name,
+ self.pnode.secondary_ip,
+ iobj, transfers)
+ if not compat.all(import_result):
+ self.LogWarning("Some disks for instance %s on node %s were not"
+ " imported successfully" % (instance, pnode_name))
+
+ rename_from = self._old_instance_name
+
+ elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
+ feedback_fn("* preparing remote import...")
+ # The source cluster will stop the instance before attempting to make
+ # a connection. In some cases stopping an instance can take a long
+ # time, hence the shutdown timeout is added to the connection
+ # timeout.
+ connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
+ self.op.source_shutdown_timeout)
+ timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
+
+ assert iobj.primary_node == self.pnode.name
+ disk_results = \
+ masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
+ self.source_x509_ca,
+ self._cds, timeouts)
+ if not compat.all(disk_results):
+ # TODO: Should the instance still be started, even if some disks
+ # failed to import (valid for local imports, too)?
+ self.LogWarning("Some disks for instance %s on node %s were not"
+ " imported successfully" % (instance, pnode_name))
+
+ rename_from = self.source_instance_name
+
+ else:
+ # also checked in the prereq part
+ raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
+ % self.op.mode)
+
+ # Run rename script on newly imported instance
+ assert iobj.name == instance
+ feedback_fn("Running rename script for %s" % instance)
+ result = self.rpc.call_instance_run_rename(pnode_name, iobj,
+ rename_from,
+ self.op.debug_level)
+ if result.fail_msg:
+ self.LogWarning("Failed to run rename script for %s on node"
+ " %s: %s" % (instance, pnode_name, result.fail_msg))
+
+ assert not self.owned_locks(locking.LEVEL_NODE_RES)
+
+ if self.op.start:
+ iobj.admin_state = constants.ADMINST_UP
+ self.cfg.Update(iobj, feedback_fn)
+ logging.info("Starting instance %s on node %s", instance, pnode_name)
+ feedback_fn("* starting instance...")
+ result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
+ False, self.op.reason)
+ result.Raise("Could not start instance")
+
+ return list(iobj.all_nodes)
+
+
+ class LUInstanceRename(LogicalUnit):
+ """Rename an instance.
+
+ """
+ HPATH = "instance-rename"
+ HTYPE = constants.HTYPE_INSTANCE
+
+ def CheckArguments(self):
+ """Check arguments.
+
+ """
+ if self.op.ip_check and not self.op.name_check:
+ # TODO: make the ip check more flexible and not depend on the name check
+ raise errors.OpPrereqError("IP address check requires a name check",
+ errors.ECODE_INVAL)
+
+ def BuildHooksEnv(self):
+ """Build hooks env.
+
+ This runs on master, primary and secondary nodes of the instance.
+
+ """
+ env = BuildInstanceHookEnvByObject(self, self.instance)
+ env["INSTANCE_NEW_NAME"] = self.op.new_name
+ return env
+
+ def BuildHooksNodes(self):
+ """Build hooks nodes.
+
+ """
+ nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
+ return (nl, nl)
+
+ def CheckPrereq(self):
+ """Check prerequisites.
+
+ This checks that the instance is in the cluster and is not running.
+
+ """
+ self.op.instance_name = ExpandInstanceName(self.cfg,
+ self.op.instance_name)
+ instance = self.cfg.GetInstanceInfo(self.op.instance_name)
+ assert instance is not None
+ CheckNodeOnline(self, instance.primary_node)
+ CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
+ msg="cannot rename")
+ self.instance = instance
+
+ new_name = self.op.new_name
+ if self.op.name_check:
+ hostname = _CheckHostnameSane(self, new_name)
+ new_name = self.op.new_name = hostname.name
+ if (self.op.ip_check and
+ netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
+ raise errors.OpPrereqError("IP %s of instance %s already in use" %
+ (hostname.ip, new_name),
+ errors.ECODE_NOTUNIQUE)
+
+ instance_list = self.cfg.GetInstanceList()
+ if new_name in instance_list and new_name != instance.name:
+ raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
+ new_name, errors.ECODE_EXISTS)
+
+ def Exec(self, feedback_fn):
+ """Rename the instance.
+
+ """
+ inst = self.instance
+ old_name = inst.name
+
+ rename_file_storage = False
+ if (inst.disk_template in constants.DTS_FILEBASED and
+ self.op.new_name != inst.name):
+ old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
+ rename_file_storage = True
+
+ self.cfg.RenameInstance(inst.name, self.op.new_name)
+ # Change the instance lock. This is definitely safe while we hold the BGL.
+ # Otherwise the new lock would have to be added in acquired mode.
+ assert self.REQ_BGL
+ assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER)
+ self.glm.remove(locking.LEVEL_INSTANCE, old_name)
+ self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
+
+ # re-read the instance from the configuration after rename
+ inst = self.cfg.GetInstanceInfo(self.op.new_name)
+
+ if rename_file_storage:
+ new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
+ result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
+ old_file_storage_dir,
+ new_file_storage_dir)
+ result.Raise("Could not rename on node %s directory '%s' to '%s'"
+ " (but the instance has been renamed in Ganeti)" %
+ (inst.primary_node, old_file_storage_dir,
+ new_file_storage_dir))
+
+ StartInstanceDisks(self, inst, None)
+ # update info on disks
+ info = GetInstanceInfoText(inst)
+ for (idx, disk) in enumerate(inst.disks):
+ for node in inst.all_nodes:
+ self.cfg.SetDiskID(disk, node)
+ result = self.rpc.call_blockdev_setinfo(node, disk, info)
+ if result.fail_msg:
+ self.LogWarning("Error setting info on node %s for disk %s: %s",
+ node, idx, result.fail_msg)
+ try:
+ result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
+ old_name, self.op.debug_level)
+ msg = result.fail_msg
+ if msg:
+ msg = ("Could not run OS rename script for instance %s on node %s"
+ " (but the instance has been renamed in Ganeti): %s" %
+ (inst.name, inst.primary_node, msg))
+ self.LogWarning(msg)
+ finally:
+ ShutdownInstanceDisks(self, inst)
+
+ return inst.name
+
+
+ class LUInstanceRemove(LogicalUnit):
+ """Remove an instance.
+
+ """
+ HPATH = "instance-remove"
+ HTYPE = constants.HTYPE_INSTANCE
+ REQ_BGL = False
+
+ def ExpandNames(self):
+ self._ExpandAndLockInstance()
+ self.needed_locks[locking.LEVEL_NODE] = []
+ self.needed_locks[locking.LEVEL_NODE_RES] = []
+ self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
+
+ def DeclareLocks(self, level):
+ if level == locking.LEVEL_NODE:
+ self._LockInstancesNodes()
+ elif level == locking.LEVEL_NODE_RES:
+ # Copy node locks
+ self.needed_locks[locking.LEVEL_NODE_RES] = \
+ CopyLockList(self.needed_locks[locking.LEVEL_NODE])
+
+ def BuildHooksEnv(self):
+ """Build hooks env.
+
+ This runs on master, primary and secondary nodes of the instance.
+
+ """
+ env = BuildInstanceHookEnvByObject(self, self.instance)
+ env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
+ return env
+
+ def BuildHooksNodes(self):
+ """Build hooks nodes.
+
+ """
+ nl = [self.cfg.GetMasterNode()]
+ nl_post = list(self.instance.all_nodes) + nl
+ return (nl, nl_post)
+
+ def CheckPrereq(self):
+ """Check prerequisites.
+
+ This checks that the instance is in the cluster.
+
+ """
+ self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
+ assert self.instance is not None, \
+ "Cannot retrieve locked instance %s" % self.op.instance_name
+
+ def Exec(self, feedback_fn):
+ """Remove the instance.
+
+ """
+ instance = self.instance
+ logging.info("Shutting down instance %s on node %s",
+ instance.name, instance.primary_node)
+
+ result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
+ self.op.shutdown_timeout,
+ self.op.reason)
+ msg = result.fail_msg
+ if msg:
+ if self.op.ignore_failures:
+ feedback_fn("Warning: can't shutdown instance: %s" % msg)
+ else:
+ raise errors.OpExecError("Could not shutdown instance %s on"
+ " node %s: %s" %
+ (instance.name, instance.primary_node, msg))
+
+ assert (self.owned_locks(locking.LEVEL_NODE) ==
+ self.owned_locks(locking.LEVEL_NODE_RES))
+ assert not (set(instance.all_nodes) -
+ self.owned_locks(locking.LEVEL_NODE)), \
+ "Not owning correct locks"
+
+ RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
+
+
+ class LUInstanceMove(LogicalUnit):
+ """Move an instance by data-copying.
+
+ """
+ HPATH = "instance-move"
+ HTYPE = constants.HTYPE_INSTANCE
+ REQ_BGL = False
+
+ def ExpandNames(self):
+ self._ExpandAndLockInstance()
+ target_node = ExpandNodeName(self.cfg, self.op.target_node)
+ self.op.target_node = target_node
+ self.needed_locks[locking.LEVEL_NODE] = [target_node]
+ self.needed_locks[locking.LEVEL_NODE_RES] = []
+ self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
+
+ def DeclareLocks(self, level):
+ if level == locking.LEVEL_NODE:
+ self._LockInstancesNodes(primary_only=True)
+ elif level == locking.LEVEL_NODE_RES:
+ # Copy node locks
+ self.needed_locks[locking.LEVEL_NODE_RES] = \
+ CopyLockList(self.needed_locks[locking.LEVEL_NODE])
+
+ def BuildHooksEnv(self):
+ """Build hooks env.
+
+ This runs on master, primary and secondary nodes of the instance.
+
+ """
+ env = {
+ "TARGET_NODE": self.op.target_node,
+ "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
+ }
+ env.update(BuildInstanceHookEnvByObject(self, self.instance))
+ return env
+
+ def BuildHooksNodes(self):
+ """Build hooks nodes.
+
+ """
+ nl = [
+ self.cfg.GetMasterNode(),
+ self.instance.primary_node,
+ self.op.target_node,
+ ]
+ return (nl, nl)
+
+ def CheckPrereq(self):
+ """Check prerequisites.
+
+ This checks that the instance is in the cluster.
+
+ """
+ self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
+ assert self.instance is not None, \
+ "Cannot retrieve locked instance %s" % self.op.instance_name
+
+ if instance.disk_template not in constants.DTS_COPYABLE:
+ raise errors.OpPrereqError("Disk template %s not suitable for copying" %
+ instance.disk_template, errors.ECODE_STATE)
+
+ node = self.cfg.GetNodeInfo(self.op.target_node)
+ assert node is not None, \
+ "Cannot retrieve locked node %s" % self.op.target_node
+
+ self.target_node = target_node = node.name
+
+ if target_node == instance.primary_node:
+ raise errors.OpPrereqError("Instance %s is already on the node %s" %
+ (instance.name, target_node),
+ errors.ECODE_STATE)
+
+ bep = self.cfg.GetClusterInfo().FillBE(instance)
+
+ for idx, dsk in enumerate(instance.disks):
+ if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
+ raise errors.OpPrereqError("Instance disk %d has a complex layout,"
+ " cannot copy" % idx, errors.ECODE_STATE)
+
+ CheckNodeOnline(self, target_node)
+ CheckNodeNotDrained(self, target_node)
+ CheckNodeVmCapable(self, target_node)
+ cluster = self.cfg.GetClusterInfo()
+ group_info = self.cfg.GetNodeGroup(node.group)
+ ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group_info)
+ CheckTargetNodeIPolicy(self, ipolicy, instance, node, self.cfg,
+ ignore=self.op.ignore_ipolicy)
+
+ if instance.admin_state == constants.ADMINST_UP:
+ # check memory requirements on the secondary node
+ CheckNodeFreeMemory(self, target_node,
+ "failing over instance %s" %
+ instance.name, bep[constants.BE_MAXMEM],
+ instance.hypervisor)
+ else:
+ self.LogInfo("Not checking memory on the secondary node as"
+ " instance will not be started")
+
+ # check bridge existance
+ CheckInstanceBridgesExist(self, instance, node=target_node)
+
+ def Exec(self, feedback_fn):
+ """Move an instance.
+
+ The move is done by shutting it down on its present node, copying
+ the data over (slow) and starting it on the new node.
+
+ """
+ instance = self.instance
+
+ source_node = instance.primary_node
+ target_node = self.target_node
+
+ self.LogInfo("Shutting down instance %s on source node %s",
+ instance.name, source_node)
+
+ assert (self.owned_locks(locking.LEVEL_NODE) ==
+ self.owned_locks(locking.LEVEL_NODE_RES))
+
+ result = self.rpc.call_instance_shutdown(source_node, instance,
+ self.op.shutdown_timeout,
+ self.op.reason)
+ msg = result.fail_msg
+ if msg:
+ if self.op.ignore_consistency:
+ self.LogWarning("Could not shutdown instance %s on node %s."
+ " Proceeding anyway. Please make sure node"
+ " %s is down. Error details: %s",
+ instance.name, source_node, source_node, msg)
+ else:
+ raise errors.OpExecError("Could not shutdown instance %s on"
+ " node %s: %s" %
+ (instance.name, source_node, msg))
+
+ # create the target disks
+ try:
+ CreateDisks(self, instance, target_node=target_node)
+ except errors.OpExecError:
+ self.LogWarning("Device creation failed")
+ self.cfg.ReleaseDRBDMinors(instance.name)
+ raise
+
+ cluster_name = self.cfg.GetClusterInfo().cluster_name
+
+ errs = []
+ # activate, get path, copy the data over
+ for idx, disk in enumerate(instance.disks):
+ self.LogInfo("Copying data for disk %d", idx)
+ result = self.rpc.call_blockdev_assemble(target_node, (disk, instance),
+ instance.name, True, idx)
+ if result.fail_msg:
+ self.LogWarning("Can't assemble newly created disk %d: %s",
+ idx, result.fail_msg)
+ errs.append(result.fail_msg)
+ break
+ dev_path = result.payload
+ result = self.rpc.call_blockdev_export(source_node, (disk, instance),
+ target_node, dev_path,
+ cluster_name)
+ if result.fail_msg:
+ self.LogWarning("Can't copy data over for disk %d: %s",
+ idx, result.fail_msg)
+ errs.append(result.fail_msg)
+ break
+
+ if errs:
+ self.LogWarning("Some disks failed to copy, aborting")
+ try:
+ RemoveDisks(self, instance, target_node=target_node)
+ finally:
+ self.cfg.ReleaseDRBDMinors(instance.name)
+ raise errors.OpExecError("Errors during disk copy: %s" %
+ (",".join(errs),))
+
+ instance.primary_node = target_node
+ self.cfg.Update(instance, feedback_fn)
+
+ self.LogInfo("Removing the disks on the original node")
+ RemoveDisks(self, instance, target_node=source_node)
+
+ # Only start the instance if it's marked as up
+ if instance.admin_state == constants.ADMINST_UP:
+ self.LogInfo("Starting instance %s on node %s",
+ instance.name, target_node)
+
+ disks_ok, _ = AssembleInstanceDisks(self, instance,
+ ignore_secondaries=True)
+ if not disks_ok:
+ ShutdownInstanceDisks(self, instance)
+ raise errors.OpExecError("Can't activate the instance's disks")
+
+ result = self.rpc.call_instance_start(target_node,
+ (instance, None, None), False,
+ self.op.reason)
+ msg = result.fail_msg
+ if msg:
+ ShutdownInstanceDisks(self, instance)
+ raise errors.OpExecError("Could not start instance %s on node %s: %s" %
+ (instance.name, target_node, msg))
+
+
+ class LUInstanceMultiAlloc(NoHooksLU):
+ """Allocates multiple instances at the same time.
+
+ """
+ REQ_BGL = False
+
+ def CheckArguments(self):
+ """Check arguments.
+
+ """
+ nodes = []
+ for inst in self.op.instances:
+ if inst.iallocator is not None:
+ raise errors.OpPrereqError("iallocator are not allowed to be set on"
+ " instance objects", errors.ECODE_INVAL)
+ nodes.append(bool(inst.pnode))
+ if inst.disk_template in constants.DTS_INT_MIRROR:
+ nodes.append(bool(inst.snode))
+
+ has_nodes = compat.any(nodes)
+ if compat.all(nodes) ^ has_nodes:
+ raise errors.OpPrereqError("There are instance objects providing"
+ " pnode/snode while others do not",
+ errors.ECODE_INVAL)
+
+ if self.op.iallocator is None:
+ default_iallocator = self.cfg.GetDefaultIAllocator()
+ if default_iallocator and has_nodes:
+ self.op.iallocator = default_iallocator
+ else:
+ raise errors.OpPrereqError("No iallocator or nodes on the instances"
+ " given and no cluster-wide default"
+ " iallocator found; please specify either"
+ " an iallocator or nodes on the instances"
+ " or set a cluster-wide default iallocator",
+ errors.ECODE_INVAL)
+
+ _CheckOpportunisticLocking(self.op)
+
+ dups = utils.FindDuplicates([op.instance_name for op in self.op.instances])
+ if dups:
+ raise errors.OpPrereqError("There are duplicate instance names: %s" %
+ utils.CommaJoin(dups), errors.ECODE_INVAL)
+
+ def ExpandNames(self):
+ """Calculate the locks.
+
+ """
+ self.share_locks = ShareAll()
+ self.needed_locks = {
+ # iallocator will select nodes and even if no iallocator is used,
+ # collisions with LUInstanceCreate should be avoided
+ locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
+ }
+
+ if self.op.iallocator:
+ self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
+ self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
+
+ if self.op.opportunistic_locking:
+ self.opportunistic_locks[locking.LEVEL_NODE] = True
+ self.opportunistic_locks[locking.LEVEL_NODE_RES] = True
+ else:
+ nodeslist = []
+ for inst in self.op.instances:
+ inst.pnode = ExpandNodeName(self.cfg, inst.pnode)
+ nodeslist.append(inst.pnode)
+ if inst.snode is not None:
+ inst.snode = ExpandNodeName(self.cfg, inst.snode)
+ nodeslist.append(inst.snode)
+
+ self.needed_locks[locking.LEVEL_NODE] = nodeslist
+ # Lock resources of instance's primary and secondary nodes (copy to
+ # prevent accidential modification)
+ self.needed_locks[locking.LEVEL_NODE_RES] = list(nodeslist)
+
+ def CheckPrereq(self):
+ """Check prerequisite.
+
+ """
+ cluster = self.cfg.GetClusterInfo()
+ default_vg = self.cfg.GetVGName()
+ ec_id = self.proc.GetECId()
+
+ if self.op.opportunistic_locking:
+ # Only consider nodes for which a lock is held
+ node_whitelist = list(self.owned_locks(locking.LEVEL_NODE))
+ else:
+ node_whitelist = None
+
+ insts = [_CreateInstanceAllocRequest(op, ComputeDisks(op, default_vg),
+ _ComputeNics(op, cluster, None,
+ self.cfg, ec_id),
+ _ComputeFullBeParams(op, cluster),
+ node_whitelist)
+ for op in self.op.instances]
+
+ req = iallocator.IAReqMultiInstanceAlloc(instances=insts)
+ ial = iallocator.IAllocator(self.cfg, self.rpc, req)
+
+ ial.Run(self.op.iallocator)
+
+ if not ial.success:
+ raise errors.OpPrereqError("Can't compute nodes using"
+ " iallocator '%s': %s" %
+ (self.op.iallocator, ial.info),
+ errors.ECODE_NORES)
+
+ self.ia_result = ial.result
+
+ if self.op.dry_run:
+ self.dry_run_result = objects.FillDict(self._ConstructPartialResult(), {
+ constants.JOB_IDS_KEY: [],
+ })
+
+ def _ConstructPartialResult(self):
+ """Contructs the partial result.
+
+ """
+ (allocatable, failed) = self.ia_result
+ return {
+ opcodes.OpInstanceMultiAlloc.ALLOCATABLE_KEY:
+ map(compat.fst, allocatable),
+ opcodes.OpInstanceMultiAlloc.FAILED_KEY: failed,
+ }
+
+ def Exec(self, feedback_fn):
+ """Executes the opcode.
+
+ """
+ op2inst = dict((op.instance_name, op) for op in self.op.instances)
+ (allocatable, failed) = self.ia_result
+
+ jobs = []
+ for (name, nodes) in allocatable:
+ op = op2inst.pop(name)
+
+ if len(nodes) > 1:
+ (op.pnode, op.snode) = nodes
+ else:
+ (op.pnode,) = nodes
+
+ jobs.append([op])
+
+ missing = set(op2inst.keys()) - set(failed)
+ assert not missing, \
+ "Iallocator did return incomplete result: %s" % utils.CommaJoin(missing)
+
+ return ResultWithJobs(jobs, **self._ConstructPartialResult())
+
+
+ class _InstNicModPrivate:
+ """Data structure for network interface modifications.
+
+ Used by L{LUInstanceSetParams}.
+
+ """
+ def __init__(self):
+ self.params = None
+ self.filled = None
+
+
+ def _PrepareContainerMods(mods, private_fn):
+ """Prepares a list of container modifications by adding a private data field.
+
+ @type mods: list of tuples; (operation, index, parameters)
+ @param mods: List of modifications
+ @type private_fn: callable or None
+ @param private_fn: Callable for constructing a private data field for a
+ modification
+ @rtype: list
+
+ """
+ if private_fn is None:
+ fn = lambda: None
+ else:
+ fn = private_fn
+
+ return [(op, idx, params, fn()) for (op, idx, params) in mods]
+
+
+ def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
+ """Checks if nodes have enough physical CPUs
+
+ This function checks if all given nodes have the needed number of
+ physical CPUs. In case any node has less CPUs or we cannot get the
+ information from the node, this function raises an OpPrereqError
+ exception.
+
+ @type lu: C{LogicalUnit}
+ @param lu: a logical unit from which we get configuration data
+ @type nodenames: C{list}
+ @param nodenames: the list of node names to check
+ @type requested: C{int}
+ @param requested: the minimum acceptable number of physical CPUs
+ @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
+ or we cannot check the node
+
+ """
+ nodeinfo = lu.rpc.call_node_info(nodenames, None, [hypervisor_name], None)
+ for node in nodenames:
+ info = nodeinfo[node]
+ info.Raise("Cannot get current information from node %s" % node,
+ prereq=True, ecode=errors.ECODE_ENVIRON)
+ (_, _, (hv_info, )) = info.payload
+ num_cpus = hv_info.get("cpu_total", None)
+ if not isinstance(num_cpus, int):
+ raise errors.OpPrereqError("Can't compute the number of physical CPUs"
+ " on node %s, result was '%s'" %
+ (node, num_cpus), errors.ECODE_ENVIRON)
+ if requested > num_cpus:
+ raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
+ "required" % (node, num_cpus, requested),
+ errors.ECODE_NORES)
+
+
+ def GetItemFromContainer(identifier, kind, container):
+ """Return the item refered by the identifier.
+
+ @type identifier: string
+ @param identifier: Item index or name or UUID
+ @type kind: string
+ @param kind: One-word item description
+ @type container: list
+ @param container: Container to get the item from
+
+ """
+ # Index
+ try:
+ idx = int(identifier)
+ if idx == -1:
+ # Append
+ absidx = len(container) - 1
+ elif idx < 0:
+ raise IndexError("Not accepting negative indices other than -1")
+ elif idx > len(container):
+ raise IndexError("Got %s index %s, but there are only %s" %
+ (kind, idx, len(container)))
+ else:
+ absidx = idx
+ return (absidx, container[idx])
+ except ValueError:
+ pass
+
+ for idx, item in enumerate(container):
+ if item.uuid == identifier or item.name == identifier:
+ return (idx, item)
+
+ raise errors.OpPrereqError("Cannot find %s with identifier %s" %
+ (kind, identifier), errors.ECODE_NOENT)
+
+
+ def _ApplyContainerMods(kind, container, chgdesc, mods,
+ create_fn, modify_fn, remove_fn):
+ """Applies descriptions in C{mods} to C{container}.
+
+ @type kind: string
+ @param kind: One-word item description
+ @type container: list
+ @param container: Container to modify
+ @type chgdesc: None or list
+ @param chgdesc: List of applied changes
+ @type mods: list
+ @param mods: Modifications as returned by L{_PrepareContainerMods}
+ @type create_fn: callable
+ @param create_fn: Callback for creating a new item (L{constants.DDM_ADD});
+ receives absolute item index, parameters and private data object as added
+ by L{_PrepareContainerMods}, returns tuple containing new item and changes
+ as list
+ @type modify_fn: callable
+ @param modify_fn: Callback for modifying an existing item
+ (L{constants.DDM_MODIFY}); receives absolute item index, item, parameters
+ and private data object as added by L{_PrepareContainerMods}, returns
+ changes as list
+ @type remove_fn: callable
+ @param remove_fn: Callback on removing item; receives absolute item index,
+ item and private data object as added by L{_PrepareContainerMods}
+
+ """
+ for (op, identifier, params, private) in mods:
+ changes = None
+
+ if op == constants.DDM_ADD:
+ # Calculate where item will be added
+ # When adding an item, identifier can only be an index
+ try:
+ idx = int(identifier)
+ except ValueError:
+ raise errors.OpPrereqError("Only possitive integer or -1 is accepted as"
+ " identifier for %s" % constants.DDM_ADD,
+ errors.ECODE_INVAL)
+ if idx == -1:
+ addidx = len(container)
+ else:
+ if idx < 0:
+ raise IndexError("Not accepting negative indices other than -1")
+ elif idx > len(container):
+ raise IndexError("Got %s index %s, but there are only %s" %
+ (kind, idx, len(container)))
+ addidx = idx
+
+ if create_fn is None:
+ item = params
+ else:
+ (item, changes) = create_fn(addidx, params, private)
+
+ if idx == -1:
+ container.append(item)
+ else:
+ assert idx >= 0
+ assert idx <= len(container)
+ # list.insert does so before the specified index
+ container.insert(idx, item)
+ else:
+ # Retrieve existing item
+ (absidx, item) = GetItemFromContainer(identifier, kind, container)
+
+ if op == constants.DDM_REMOVE:
+ assert not params
+
+ if remove_fn is not None:
+ remove_fn(absidx, item, private)
+
+ changes = [("%s/%s" % (kind, absidx), "remove")]
+
+ assert container[absidx] == item
+ del container[absidx]
+ elif op == constants.DDM_MODIFY:
+ if modify_fn is not None:
+ changes = modify_fn(absidx, item, params, private)
+ else:
+ raise errors.ProgrammerError("Unhandled operation '%s'" % op)
+
+ assert _TApplyContModsCbChanges(changes)
+
+ if not (chgdesc is None or changes is None):
+ chgdesc.extend(changes)
+
+
+ def _UpdateIvNames(base_index, disks):
+ """Updates the C{iv_name} attribute of disks.
+
+ @type disks: list of L{objects.Disk}
+
+ """
+ for (idx, disk) in enumerate(disks):
+ disk.iv_name = "disk/%s" % (base_index + idx, )
+
+
+ class LUInstanceSetParams(LogicalUnit):
+ """Modifies an instances's parameters.
+
+ """
+ HPATH = "instance-modify"
+ HTYPE = constants.HTYPE_INSTANCE
+ REQ_BGL = False
+
+ @staticmethod
+ def _UpgradeDiskNicMods(kind, mods, verify_fn):
+ assert ht.TList(mods)
+ assert not mods or len(mods[0]) in (2, 3)
+
+ if mods and len(mods[0]) == 2:
+ result = []
+
+ addremove = 0
+ for op, params in mods:
+ if op in (constants.DDM_ADD, constants.DDM_REMOVE):
+ result.append((op, -1, params))
+ addremove += 1
+
+ if addremove > 1:
+ raise errors.OpPrereqError("Only one %s add or remove operation is"
+ " supported at a time" % kind,
+ errors.ECODE_INVAL)
+ else:
+ result.append((constants.DDM_MODIFY, op, params))
+
+ assert verify_fn(result)
+ else:
+ result = mods
+
+ return result
+
+ @staticmethod
+ def _CheckMods(kind, mods, key_types, item_fn):
+ """Ensures requested disk/NIC modifications are valid.
+
+ """
+ for (op, _, params) in mods:
+ assert ht.TDict(params)
+
+ # If 'key_types' is an empty dict, we assume we have an
+ # 'ext' template and thus do not ForceDictType
+ if key_types:
+ utils.ForceDictType(params, key_types)
+
+ if op == constants.DDM_REMOVE:
+ if params:
+ raise errors.OpPrereqError("No settings should be passed when"
+ " removing a %s" % kind,
+ errors.ECODE_INVAL)
+ elif op in (constants.DDM_ADD, constants.DDM_MODIFY):
+ item_fn(op, params)
+ else:
+ raise errors.ProgrammerError("Unhandled operation '%s'" % op)
+
+ @staticmethod
+ def _VerifyDiskModification(op, params):
+ """Verifies a disk modification.
+
+ """
+ if op == constants.DDM_ADD:
+ mode = params.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
+ if mode not in constants.DISK_ACCESS_SET:
+ raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
+ errors.ECODE_INVAL)
+
+ size = params.get(constants.IDISK_SIZE, None)
+ if size is None:
+ raise errors.OpPrereqError("Required disk parameter '%s' missing" %
+ constants.IDISK_SIZE, errors.ECODE_INVAL)
+
+ try:
+ size = int(size)
+ except (TypeError, ValueError), err:
+ raise errors.OpPrereqError("Invalid disk size parameter: %s" % err,
+ errors.ECODE_INVAL)
+
+ params[constants.IDISK_SIZE] = size
+ name = params.get(constants.IDISK_NAME, None)
+ if name is not None and name.lower() == constants.VALUE_NONE:
+ params[constants.IDISK_NAME] = None
+
+ elif op == constants.DDM_MODIFY:
+ if constants.IDISK_SIZE in params:
+ raise errors.OpPrereqError("Disk size change not possible, use"
+ " grow-disk", errors.ECODE_INVAL)
+ if len(params) > 2:
+ raise errors.OpPrereqError("Disk modification doesn't support"
+ " additional arbitrary parameters",
+ errors.ECODE_INVAL)
+ name = params.get(constants.IDISK_NAME, None)
+ if name is not None and name.lower() == constants.VALUE_NONE:
+ params[constants.IDISK_NAME] = None
+
+ @staticmethod
+ def _VerifyNicModification(op, params):
+ """Verifies a network interface modification.
+
+ """
+ if op in (constants.DDM_ADD, constants.DDM_MODIFY):
+ ip = params.get(constants.INIC_IP, None)
+ name = params.get(constants.INIC_NAME, None)
+ req_net = params.get(constants.INIC_NETWORK, None)
+ link = params.get(constants.NIC_LINK, None)
+ mode = params.get(constants.NIC_MODE, None)
+ if name is not None and name.lower() == constants.VALUE_NONE:
+ params[constants.INIC_NAME] = None
+ if req_net is not None:
+ if req_net.lower() == constants.VALUE_NONE:
+ params[constants.INIC_NETWORK] = None
+ req_net = None
+ elif link is not None or mode is not None:
+ raise errors.OpPrereqError("If network is given"
+ " mode or link should not",
+ errors.ECODE_INVAL)
+
+ if op == constants.DDM_ADD:
+ macaddr = params.get(constants.INIC_MAC, None)
+ if macaddr is None:
+ params[constants.INIC_MAC] = constants.VALUE_AUTO
+
+ if ip is not None:
+ if ip.lower() == constants.VALUE_NONE:
+ params[constants.INIC_IP] = None
+ else:
+ if ip.lower() == constants.NIC_IP_POOL:
+ if op == constants.DDM_ADD and req_net is None:
+ raise errors.OpPrereqError("If ip=pool, parameter network"
+ " cannot be none",
+ errors.ECODE_INVAL)
+ else:
+ if not netutils.IPAddress.IsValid(ip):
+ raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
+ errors.ECODE_INVAL)
+
+ if constants.INIC_MAC in params:
+ macaddr = params[constants.INIC_MAC]
+ if macaddr not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
+ macaddr = utils.NormalizeAndValidateMac(macaddr)
+
+ if op == constants.DDM_MODIFY and macaddr == constants.VALUE_AUTO:
+ raise errors.OpPrereqError("'auto' is not a valid MAC address when"
+ " modifying an existing NIC",
+ errors.ECODE_INVAL)
+
+ def CheckArguments(self):
+ if not (self.op.nics or self.op.disks or self.op.disk_template or
+ self.op.hvparams or self.op.beparams or self.op.os_name or
+ self.op.offline is not None or self.op.runtime_mem or
+ self.op.pnode):
+ raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
+
+ if self.op.hvparams:
+ CheckParamsNotGlobal(self.op.hvparams, constants.HVC_GLOBALS,
+ "hypervisor", "instance", "cluster")
+
+ self.op.disks = self._UpgradeDiskNicMods(
+ "disk", self.op.disks, opcodes.OpInstanceSetParams.TestDiskModifications)
+ self.op.nics = self._UpgradeDiskNicMods(
+ "NIC", self.op.nics, opcodes.OpInstanceSetParams.TestNicModifications)
+
+ if self.op.disks and self.op.disk_template is not None:
+ raise errors.OpPrereqError("Disk template conversion and other disk"
+ " changes not supported at the same time",
+ errors.ECODE_INVAL)
+
+ if (self.op.disk_template and
+ self.op.disk_template in constants.DTS_INT_MIRROR and
+ self.op.remote_node is None):
+ raise errors.OpPrereqError("Changing the disk template to a mirrored"
+ " one requires specifying a secondary node",
+ errors.ECODE_INVAL)
+
+ # Check NIC modifications
+ self._CheckMods("NIC", self.op.nics, constants.INIC_PARAMS_TYPES,
+ self._VerifyNicModification)
+
+ if self.op.pnode:
+ self.op.pnode = ExpandNodeName(self.cfg, self.op.pnode)
+
+ def ExpandNames(self):
+ self._ExpandAndLockInstance()
+ self.needed_locks[locking.LEVEL_NODEGROUP] = []
+ # Can't even acquire node locks in shared mode as upcoming changes in
+ # Ganeti 2.6 will start to modify the node object on disk conversion
+ self.needed_locks[locking.LEVEL_NODE] = []
+ self.needed_locks[locking.LEVEL_NODE_RES] = []
+ self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
+ # Look node group to look up the ipolicy
+ self.share_locks[locking.LEVEL_NODEGROUP] = 1
+
+ def DeclareLocks(self, level):
+ if level == locking.LEVEL_NODEGROUP:
+ assert not self.needed_locks[locking.LEVEL_NODEGROUP]
+ # Acquire locks for the instance's nodegroups optimistically. Needs
+ # to be verified in CheckPrereq
+ self.needed_locks[locking.LEVEL_NODEGROUP] = \
+ self.cfg.GetInstanceNodeGroups(self.op.instance_name)
+ elif level == locking.LEVEL_NODE:
+ self._LockInstancesNodes()
+ if self.op.disk_template and self.op.remote_node:
+ self.op.remote_node = ExpandNodeName(self.cfg, self.op.remote_node)
+ self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
+ elif level == locking.LEVEL_NODE_RES and self.op.disk_template:
+ # Copy node locks
+ self.needed_locks[locking.LEVEL_NODE_RES] = \
+ CopyLockList(self.needed_locks[locking.LEVEL_NODE])
+
+ def BuildHooksEnv(self):
+ """Build hooks env.
+
+ This runs on the master, primary and secondaries.
+
+ """
+ args = {}
+ if constants.BE_MINMEM in self.be_new:
+ args["minmem"] = self.be_new[constants.BE_MINMEM]
+ if constants.BE_MAXMEM in self.be_new:
+ args["maxmem"] = self.be_new[constants.BE_MAXMEM]
+ if constants.BE_VCPUS in self.be_new:
+ args["vcpus"] = self.be_new[constants.BE_VCPUS]
+ # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
+ # information at all.
+
+ if self._new_nics is not None:
+ nics = []
+
+ for nic in self._new_nics:
+ n = copy.deepcopy(nic)
+ nicparams = self.cluster.SimpleFillNIC(n.nicparams)
+ n.nicparams = nicparams
+ nics.append(NICToTuple(self, n))
+
+ args["nics"] = nics
+
+ env = BuildInstanceHookEnvByObject(self, self.instance, override=args)
+ if self.op.disk_template:
+ env["NEW_DISK_TEMPLATE"] = self.op.disk_template
+ if self.op.runtime_mem:
+ env["RUNTIME_MEMORY"] = self.op.runtime_mem
+
+ return env
+
+ def BuildHooksNodes(self):
+ """Build hooks nodes.
+
+ """
+ nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
+ return (nl, nl)
+
+ def _PrepareNicModification(self, params, private, old_ip, old_net_uuid,
+ old_params, cluster, pnode):
+
+ update_params_dict = dict([(key, params[key])
+ for key in constants.NICS_PARAMETERS
+ if key in params])
+
+ req_link = update_params_dict.get(constants.NIC_LINK, None)
+ req_mode = update_params_dict.get(constants.NIC_MODE, None)
+
+ new_net_uuid = None
+ new_net_uuid_or_name = params.get(constants.INIC_NETWORK, old_net_uuid)
+ if new_net_uuid_or_name:
+ new_net_uuid = self.cfg.LookupNetwork(new_net_uuid_or_name)
+ new_net_obj = self.cfg.GetNetwork(new_net_uuid)
+
+ if old_net_uuid:
+ old_net_obj = self.cfg.GetNetwork(old_net_uuid)
+
+ if new_net_uuid:
+ netparams = self.cfg.GetGroupNetParams(new_net_uuid, pnode)
+ if not netparams:
+ raise errors.OpPrereqError("No netparams found for the network"
+ " %s, probably not connected" %
+ new_net_obj.name, errors.ECODE_INVAL)
+ new_params = dict(netparams)
+ else:
+ new_params = GetUpdatedParams(old_params, update_params_dict)
+
+ utils.ForceDictType(new_params, constants.NICS_PARAMETER_TYPES)
+
+ new_filled_params = cluster.SimpleFillNIC(new_params)
+ objects.NIC.CheckParameterSyntax(new_filled_params)
+
+ new_mode = new_filled_params[constants.NIC_MODE]
+ if new_mode == constants.NIC_MODE_BRIDGED:
+ bridge = new_filled_params[constants.NIC_LINK]
+ msg = self.rpc.call_bridges_exist(pnode, [bridge]).fail_msg
+ if msg:
+ msg = "Error checking bridges on node '%s': %s" % (pnode, msg)
+ if self.op.force:
+ self.warn.append(msg)
+ else:
+ raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
+
+ elif new_mode == constants.NIC_MODE_ROUTED:
+ ip = params.get(constants.INIC_IP, old_ip)
+ if ip is None:
+ raise errors.OpPrereqError("Cannot set the NIC IP address to None"
+ " on a routed NIC", errors.ECODE_INVAL)
+
+ elif new_mode == constants.NIC_MODE_OVS:
+ # TODO: check OVS link
+ self.LogInfo("OVS links are currently not checked for correctness")
+
+ if constants.INIC_MAC in params:
+ mac = params[constants.INIC_MAC]
+ if mac is None:
+ raise errors.OpPrereqError("Cannot unset the NIC MAC address",
+ errors.ECODE_INVAL)
+ elif mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
+ # otherwise generate the MAC address
+ params[constants.INIC_MAC] = \
+ self.cfg.GenerateMAC(new_net_uuid, self.proc.GetECId())
+ else:
+ # or validate/reserve the current one
+ try:
+ self.cfg.ReserveMAC(mac, self.proc.GetECId())
+ except errors.ReservationError:
+ raise errors.OpPrereqError("MAC address '%s' already in use"
+ " in cluster" % mac,
+ errors.ECODE_NOTUNIQUE)
+ elif new_net_uuid != old_net_uuid:
+
+ def get_net_prefix(net_uuid):
+ mac_prefix = None
+ if net_uuid:
+ nobj = self.cfg.GetNetwork(net_uuid)
+ mac_prefix = nobj.mac_prefix
+
+ return mac_prefix
+
+ new_prefix = get_net_prefix(new_net_uuid)
+ old_prefix = get_net_prefix(old_net_uuid)
+ if old_prefix != new_prefix:
+ params[constants.INIC_MAC] = \
+ self.cfg.GenerateMAC(new_net_uuid, self.proc.GetECId())
+
+ # if there is a change in (ip, network) tuple
+ new_ip = params.get(constants.INIC_IP, old_ip)
+ if (new_ip, new_net_uuid) != (old_ip, old_net_uuid):
+ if new_ip:
+ # if IP is pool then require a network and generate one IP
+ if new_ip.lower() == constants.NIC_IP_POOL:
+ if new_net_uuid:
+ try:
+ new_ip = self.cfg.GenerateIp(new_net_uuid, self.proc.GetECId())
+ except errors.ReservationError:
+ raise errors.OpPrereqError("Unable to get a free IP"
+ " from the address pool",
+ errors.ECODE_STATE)
+ self.LogInfo("Chose IP %s from network %s",
+ new_ip,
+ new_net_obj.name)
+ params[constants.INIC_IP] = new_ip
+ else:
+ raise errors.OpPrereqError("ip=pool, but no network found",
+ errors.ECODE_INVAL)
+ # Reserve new IP if in the new network if any
+ elif new_net_uuid:
+ try:
+ self.cfg.ReserveIp(new_net_uuid, new_ip, self.proc.GetECId())
+ self.LogInfo("Reserving IP %s in network %s",
+ new_ip, new_net_obj.name)
+ except errors.ReservationError:
+ raise errors.OpPrereqError("IP %s not available in network %s" %
+ (new_ip, new_net_obj.name),
+ errors.ECODE_NOTUNIQUE)
+ # new network is None so check if new IP is a conflicting IP
+ elif self.op.conflicts_check:
+ _CheckForConflictingIp(self, new_ip, pnode)
+
+ # release old IP if old network is not None
+ if old_ip and old_net_uuid:
+ try:
+ self.cfg.ReleaseIp(old_net_uuid, old_ip, self.proc.GetECId())
+ except errors.AddressPoolError:
+ logging.warning("Release IP %s not contained in network %s",
+ old_ip, old_net_obj.name)
+
+ # there are no changes in (ip, network) tuple and old network is not None
+ elif (old_net_uuid is not None and
+ (req_link is not None or req_mode is not None)):
+ raise errors.OpPrereqError("Not allowed to change link or mode of"
+ " a NIC that is connected to a network",
+ errors.ECODE_INVAL)
+
+ private.params = new_params
+ private.filled = new_filled_params
+
+ def _PreCheckDiskTemplate(self, pnode_info):
+ """CheckPrereq checks related to a new disk template."""
+ # Arguments are passed to avoid configuration lookups
+ instance = self.instance
+ pnode = instance.primary_node
+ cluster = self.cluster
+ if instance.disk_template == self.op.disk_template:
+ raise errors.OpPrereqError("Instance already has disk template %s" %
+ instance.disk_template, errors.ECODE_INVAL)
+
+ if (instance.disk_template,
+ self.op.disk_template) not in self._DISK_CONVERSIONS:
+ raise errors.OpPrereqError("Unsupported disk template conversion from"
+ " %s to %s" % (instance.disk_template,
+ self.op.disk_template),
+ errors.ECODE_INVAL)
+ CheckInstanceState(self, instance, INSTANCE_DOWN,
+ msg="cannot change disk template")
+ if self.op.disk_template in constants.DTS_INT_MIRROR:
+ if self.op.remote_node == pnode:
+ raise errors.OpPrereqError("Given new secondary node %s is the same"
+ " as the primary node of the instance" %
+ self.op.remote_node, errors.ECODE_STATE)
+ CheckNodeOnline(self, self.op.remote_node)
+ CheckNodeNotDrained(self, self.op.remote_node)
+ # FIXME: here we assume that the old instance type is DT_PLAIN
+ assert instance.disk_template == constants.DT_PLAIN
+ disks = [{constants.IDISK_SIZE: d.size,
+ constants.IDISK_VG: d.logical_id[0]}
+ for d in instance.disks]
+ required = ComputeDiskSizePerVG(self.op.disk_template, disks)
+ CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
+
+ snode_info = self.cfg.GetNodeInfo(self.op.remote_node)
+ snode_group = self.cfg.GetNodeGroup(snode_info.group)
+ ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
+ snode_group)
+ CheckTargetNodeIPolicy(self, ipolicy, instance, snode_info, self.cfg,
+ ignore=self.op.ignore_ipolicy)
+ if pnode_info.group != snode_info.group:
+ self.LogWarning("The primary and secondary nodes are in two"
+ " different node groups; the disk parameters"
+ " from the first disk's node group will be"
+ " used")
+
+ if not self.op.disk_template in constants.DTS_EXCL_STORAGE:
+ # Make sure none of the nodes require exclusive storage
+ nodes = [pnode_info]
+ if self.op.disk_template in constants.DTS_INT_MIRROR:
+ assert snode_info
+ nodes.append(snode_info)
+ has_es = lambda n: IsExclusiveStorageEnabledNode(self.cfg, n)
+ if compat.any(map(has_es, nodes)):
+ errmsg = ("Cannot convert disk template from %s to %s when exclusive"
+ " storage is enabled" % (instance.disk_template,
+ self.op.disk_template))
+ raise errors.OpPrereqError(errmsg, errors.ECODE_STATE)
+
- def CheckPrereq(self):
- """Check prerequisites.
++ def _PreCheckDisks(self, ispec):
++ """CheckPrereq checks related to disk changes.
+
- This only checks the instance list against the existing names.
++ @type ispec: dict
++ @param ispec: instance specs to be updated with the new disks
+
+ """
- assert self.op.instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
- instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
-
- cluster = self.cluster = self.cfg.GetClusterInfo()
- assert self.instance is not None, \
- "Cannot retrieve locked instance %s" % self.op.instance_name
-
- pnode = instance.primary_node
-
- self.warn = []
-
- if (self.op.pnode is not None and self.op.pnode != pnode and
- not self.op.force):
- # verify that the instance is not up
- instance_info = self.rpc.call_instance_info(pnode, instance.name,
- instance.hypervisor)
- if instance_info.fail_msg:
- self.warn.append("Can't get instance runtime information: %s" %
- instance_info.fail_msg)
- elif instance_info.payload:
- raise errors.OpPrereqError("Instance is still running on %s" % pnode,
- errors.ECODE_STATE)
-
- assert pnode in self.owned_locks(locking.LEVEL_NODE)
- nodelist = list(instance.all_nodes)
- pnode_info = self.cfg.GetNodeInfo(pnode)
++ instance = self.instance
+ self.diskparams = self.cfg.GetInstanceDiskParams(instance)
+
- #_CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
- assert pnode_info.group in self.owned_locks(locking.LEVEL_NODEGROUP)
- group_info = self.cfg.GetNodeGroup(pnode_info.group)
-
- # dictionary with instance information after the modification
- ispec = {}
-
+ # Check disk modifications. This is done here and not in CheckArguments
+ # (as with NICs), because we need to know the instance's disk template
+ if instance.disk_template == constants.DT_EXT:
+ self._CheckMods("disk", self.op.disks, {},
+ self._VerifyDiskModification)
+ else:
+ self._CheckMods("disk", self.op.disks, constants.IDISK_PARAMS_TYPES,
+ self._VerifyDiskModification)
+
- # Prepare disk/NIC modifications
+ self.diskmod = _PrepareContainerMods(self.op.disks, None)
- self.nicmod = _PrepareContainerMods(self.op.nics, _InstNicModPrivate)
+
+ # Check the validity of the `provider' parameter
+ if instance.disk_template in constants.DT_EXT:
+ for mod in self.diskmod:
+ ext_provider = mod[2].get(constants.IDISK_PROVIDER, None)
+ if mod[0] == constants.DDM_ADD:
+ if ext_provider is None:
+ raise errors.OpPrereqError("Instance template is '%s' and parameter"
+ " '%s' missing, during disk add" %
+ (constants.DT_EXT,
+ constants.IDISK_PROVIDER),
+ errors.ECODE_NOENT)
+ elif mod[0] == constants.DDM_MODIFY:
+ if ext_provider:
+ raise errors.OpPrereqError("Parameter '%s' is invalid during disk"
+ " modification" %
+ constants.IDISK_PROVIDER,
+ errors.ECODE_INVAL)
+ else:
+ for mod in self.diskmod:
+ ext_provider = mod[2].get(constants.IDISK_PROVIDER, None)
+ if ext_provider is not None:
+ raise errors.OpPrereqError("Parameter '%s' is only valid for"
+ " instances of type '%s'" %
+ (constants.IDISK_PROVIDER,
+ constants.DT_EXT),
+ errors.ECODE_INVAL)
+
++ if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
++ raise errors.OpPrereqError("Disk operations not supported for"
++ " diskless instances", errors.ECODE_INVAL)
++
++ def _PrepareDiskMod(_, disk, params, __):
++ disk.name = params.get(constants.IDISK_NAME, None)
++
++ # Verify disk changes (operating on a copy)
++ disks = copy.deepcopy(instance.disks)
++ _ApplyContainerMods("disk", disks, None, self.diskmod, None,
++ _PrepareDiskMod, None)
++ utils.ValidateDeviceNames("disk", disks)
++ if len(disks) > constants.MAX_DISKS:
++ raise errors.OpPrereqError("Instance has too many disks (%d), cannot add"
++ " more" % constants.MAX_DISKS,
++ errors.ECODE_STATE)
++ disk_sizes = [disk.size for disk in instance.disks]
++ disk_sizes.extend(params["size"] for (op, idx, params, private) in
++ self.diskmod if op == constants.DDM_ADD)
++ ispec[constants.ISPEC_DISK_COUNT] = len(disk_sizes)
++ ispec[constants.ISPEC_DISK_SIZE] = disk_sizes
++
++ if self.op.offline is not None and self.op.offline:
++ CheckInstanceState(self, instance, CAN_CHANGE_INSTANCE_OFFLINE,
++ msg="can't change to offline")
++
++ def CheckPrereq(self):
++ """Check prerequisites.
++
++ This only checks the instance list against the existing names.
++
++ """
++ assert self.op.instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
++ instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
++
++ cluster = self.cluster = self.cfg.GetClusterInfo()
++ assert self.instance is not None, \
++ "Cannot retrieve locked instance %s" % self.op.instance_name
++
++ pnode = instance.primary_node
++
++ self.warn = []
++
++ if (self.op.pnode is not None and self.op.pnode != pnode and
++ not self.op.force):
++ # verify that the instance is not up
++ instance_info = self.rpc.call_instance_info(pnode, instance.name,
++ instance.hypervisor)
++ if instance_info.fail_msg:
++ self.warn.append("Can't get instance runtime information: %s" %
++ instance_info.fail_msg)
++ elif instance_info.payload:
++ raise errors.OpPrereqError("Instance is still running on %s" % pnode,
++ errors.ECODE_STATE)
++
++ assert pnode in self.owned_locks(locking.LEVEL_NODE)
++ nodelist = list(instance.all_nodes)
++ pnode_info = self.cfg.GetNodeInfo(pnode)
++
++ #_CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
++ assert pnode_info.group in self.owned_locks(locking.LEVEL_NODEGROUP)
++ group_info = self.cfg.GetNodeGroup(pnode_info.group)
++
++ # dictionary with instance information after the modification
++ ispec = {}
++
++ # Prepare NIC modifications
++ self.nicmod = _PrepareContainerMods(self.op.nics, _InstNicModPrivate)
++
+ # OS change
+ if self.op.os_name and not self.op.force:
+ CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
+ self.op.force_variant)
+ instance_os = self.op.os_name
+ else:
+ instance_os = instance.os
+
+ assert not (self.op.disk_template and self.op.disks), \
+ "Can't modify disk template and apply disk changes at the same time"
+
+ if self.op.disk_template:
+ self._PreCheckDiskTemplate(pnode_info)
+
++ self._PreCheckDisks(ispec)
++
+ # hvparams processing
+ if self.op.hvparams:
+ hv_type = instance.hypervisor
+ i_hvdict = GetUpdatedParams(instance.hvparams, self.op.hvparams)
+ utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
+ hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
+
+ # local check
+ hypervisor.GetHypervisorClass(hv_type).CheckParameterSyntax(hv_new)
+ CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
+ self.hv_proposed = self.hv_new = hv_new # the new actual values
+ self.hv_inst = i_hvdict # the new dict (without defaults)
+ else:
+ self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
+ instance.hvparams)
+ self.hv_new = self.hv_inst = {}
+
+ # beparams processing
+ if self.op.beparams:
+ i_bedict = GetUpdatedParams(instance.beparams, self.op.beparams,
+ use_none=True)
+ objects.UpgradeBeParams(i_bedict)
+ utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
+ be_new = cluster.SimpleFillBE(i_bedict)
+ self.be_proposed = self.be_new = be_new # the new actual values
+ self.be_inst = i_bedict # the new dict (without defaults)
+ else:
+ self.be_new = self.be_inst = {}
+ self.be_proposed = cluster.SimpleFillBE(instance.beparams)
+ be_old = cluster.FillBE(instance)
+
+ # CPU param validation -- checking every time a parameter is
+ # changed to cover all cases where either CPU mask or vcpus have
+ # changed
+ if (constants.BE_VCPUS in self.be_proposed and
+ constants.HV_CPU_MASK in self.hv_proposed):
+ cpu_list = \
+ utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
+ # Verify mask is consistent with number of vCPUs. Can skip this
+ # test if only 1 entry in the CPU mask, which means same mask
+ # is applied to all vCPUs.
+ if (len(cpu_list) > 1 and
+ len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
+ raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
+ " CPU mask [%s]" %
+ (self.be_proposed[constants.BE_VCPUS],
+ self.hv_proposed[constants.HV_CPU_MASK]),
+ errors.ECODE_INVAL)
+
+ # Only perform this test if a new CPU mask is given
+ if constants.HV_CPU_MASK in self.hv_new:
+ # Calculate the largest CPU number requested
+ max_requested_cpu = max(map(max, cpu_list))
+ # Check that all of the instance's nodes have enough physical CPUs to
+ # satisfy the requested CPU mask
+ _CheckNodesPhysicalCPUs(self, instance.all_nodes,
+ max_requested_cpu + 1, instance.hypervisor)
+
+ # osparams processing
+ if self.op.osparams:
+ i_osdict = GetUpdatedParams(instance.osparams, self.op.osparams)
+ CheckOSParams(self, True, nodelist, instance_os, i_osdict)
+ self.os_inst = i_osdict # the new dict (without defaults)
+ else:
+ self.os_inst = {}
+
+ #TODO(dynmem): do the appropriate check involving MINMEM
+ if (constants.BE_MAXMEM in self.op.beparams and not self.op.force and
+ be_new[constants.BE_MAXMEM] > be_old[constants.BE_MAXMEM]):
+ mem_check_list = [pnode]
+ if be_new[constants.BE_AUTO_BALANCE]:
+ # either we changed auto_balance to yes or it was from before
+ mem_check_list.extend(instance.secondary_nodes)
+ instance_info = self.rpc.call_instance_info(pnode, instance.name,
+ instance.hypervisor)
+ nodeinfo = self.rpc.call_node_info(mem_check_list, None,
+ [instance.hypervisor], False)
+ pninfo = nodeinfo[pnode]
+ msg = pninfo.fail_msg
+ if msg:
+ # Assume the primary node is unreachable and go ahead
+ self.warn.append("Can't get info from primary node %s: %s" %
+ (pnode, msg))
+ else:
+ (_, _, (pnhvinfo, )) = pninfo.payload
+ if not isinstance(pnhvinfo.get("memory_free", None), int):
+ self.warn.append("Node data from primary node %s doesn't contain"
+ " free memory information" % pnode)
+ elif instance_info.fail_msg:
+ self.warn.append("Can't get instance runtime information: %s" %
+ instance_info.fail_msg)
+ else:
+ if instance_info.payload:
+ current_mem = int(instance_info.payload["memory"])
+ else:
+ # Assume instance not running
+ # (there is a slight race condition here, but it's not very
+ # probable, and we have no other way to check)
+ # TODO: Describe race condition
+ current_mem = 0
+ #TODO(dynmem): do the appropriate check involving MINMEM
+ miss_mem = (be_new[constants.BE_MAXMEM] - current_mem -
+ pnhvinfo["memory_free"])
+ if miss_mem > 0:
+ raise errors.OpPrereqError("This change will prevent the instance"
+ " from starting, due to %d MB of memory"
+ " missing on its primary node" %
+ miss_mem, errors.ECODE_NORES)
+
+ if be_new[constants.BE_AUTO_BALANCE]:
+ for node, nres in nodeinfo.items():
+ if node not in instance.secondary_nodes:
+ continue
+ nres.Raise("Can't get info from secondary node %s" % node,
+ prereq=True, ecode=errors.ECODE_STATE)
+ (_, _, (nhvinfo, )) = nres.payload
+ if not isinstance(nhvinfo.get("memory_free", None), int):
+ raise errors.OpPrereqError("Secondary node %s didn't return free"
+ " memory information" % node,
+ errors.ECODE_STATE)
+ #TODO(dynmem): do the appropriate check involving MINMEM
+ elif be_new[constants.BE_MAXMEM] > nhvinfo["memory_free"]:
+ raise errors.OpPrereqError("This change will prevent the instance"
+ " from failover to its secondary node"
+ " %s, due to not enough memory" % node,
+ errors.ECODE_STATE)
+
+ if self.op.runtime_mem:
+ remote_info = self.rpc.call_instance_info(instance.primary_node,
+ instance.name,
+ instance.hypervisor)
+ remote_info.Raise("Error checking node %s" % instance.primary_node)
+ if not remote_info.payload: # not running already
+ raise errors.OpPrereqError("Instance %s is not running" %
+ instance.name, errors.ECODE_STATE)
+
+ current_memory = remote_info.payload["memory"]
+ if (not self.op.force and
+ (self.op.runtime_mem > self.be_proposed[constants.BE_MAXMEM] or
+ self.op.runtime_mem < self.be_proposed[constants.BE_MINMEM])):
+ raise errors.OpPrereqError("Instance %s must have memory between %d"
+ " and %d MB of memory unless --force is"
+ " given" %
+ (instance.name,
+ self.be_proposed[constants.BE_MINMEM],
+ self.be_proposed[constants.BE_MAXMEM]),
+ errors.ECODE_INVAL)
+
+ delta = self.op.runtime_mem - current_memory
+ if delta > 0:
+ CheckNodeFreeMemory(self, instance.primary_node,
+ "ballooning memory for instance %s" %
+ instance.name, delta, instance.hypervisor)
+
- if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
- raise errors.OpPrereqError("Disk operations not supported for"
- " diskless instances", errors.ECODE_INVAL)
-
+ def _PrepareNicCreate(_, params, private):
+ self._PrepareNicModification(params, private, None, None,
+ {}, cluster, pnode)
+ return (None, None)
+
+ def _PrepareNicMod(_, nic, params, private):
+ self._PrepareNicModification(params, private, nic.ip, nic.network,
+ nic.nicparams, cluster, pnode)
+ return None
+
+ def _PrepareNicRemove(_, params, __):
+ ip = params.ip
+ net = params.network
+ if net is not None and ip is not None:
+ self.cfg.ReleaseIp(net, ip, self.proc.GetECId())
+
+ # Verify NIC changes (operating on copy)
+ nics = instance.nics[:]
+ _ApplyContainerMods("NIC", nics, None, self.nicmod,
+ _PrepareNicCreate, _PrepareNicMod, _PrepareNicRemove)
+ if len(nics) > constants.MAX_NICS:
+ raise errors.OpPrereqError("Instance has too many network interfaces"
+ " (%d), cannot add more" % constants.MAX_NICS,
+ errors.ECODE_STATE)
+
- def _PrepareDiskMod(_, disk, params, __):
- disk.name = params.get(constants.IDISK_NAME, None)
-
- # Verify disk changes (operating on a copy)
- disks = copy.deepcopy(instance.disks)
- _ApplyContainerMods("disk", disks, None, self.diskmod, None,
- _PrepareDiskMod, None)
- utils.ValidateDeviceNames("disk", disks)
- if len(disks) > constants.MAX_DISKS:
- raise errors.OpPrereqError("Instance has too many disks (%d), cannot add"
- " more" % constants.MAX_DISKS,
- errors.ECODE_STATE)
- disk_sizes = [disk.size for disk in instance.disks]
- disk_sizes.extend(params["size"] for (op, idx, params, private) in
- self.diskmod if op == constants.DDM_ADD)
- ispec[constants.ISPEC_DISK_COUNT] = len(disk_sizes)
- ispec[constants.ISPEC_DISK_SIZE] = disk_sizes
-
- if self.op.offline is not None and self.op.offline:
- CheckInstanceState(self, instance, CAN_CHANGE_INSTANCE_OFFLINE,
- msg="can't change to offline")
-
+ # Pre-compute NIC changes (necessary to use result in hooks)
+ self._nic_chgdesc = []
+ if self.nicmod:
+ # Operate on copies as this is still in prereq
+ nics = [nic.Copy() for nic in instance.nics]
+ _ApplyContainerMods("NIC", nics, self._nic_chgdesc, self.nicmod,
+ self._CreateNewNic, self._ApplyNicMods, None)
+ # Verify that NIC names are unique and valid
+ utils.ValidateDeviceNames("NIC", nics)
+ self._new_nics = nics
+ ispec[constants.ISPEC_NIC_COUNT] = len(self._new_nics)
+ else:
+ self._new_nics = None
+ ispec[constants.ISPEC_NIC_COUNT] = len(instance.nics)
+
+ if not self.op.ignore_ipolicy:
+ ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
+ group_info)
+
+ # Fill ispec with backend parameters
+ ispec[constants.ISPEC_SPINDLE_USE] = \
+ self.be_new.get(constants.BE_SPINDLE_USE, None)
+ ispec[constants.ISPEC_CPU_COUNT] = self.be_new.get(constants.BE_VCPUS,
+ None)
+
+ # Copy ispec to verify parameters with min/max values separately
+ if self.op.disk_template:
+ new_disk_template = self.op.disk_template
+ else:
+ new_disk_template = instance.disk_template
+ ispec_max = ispec.copy()
+ ispec_max[constants.ISPEC_MEM_SIZE] = \
+ self.be_new.get(constants.BE_MAXMEM, None)
+ res_max = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec_max,
+ new_disk_template)
+ ispec_min = ispec.copy()
+ ispec_min[constants.ISPEC_MEM_SIZE] = \
+ self.be_new.get(constants.BE_MINMEM, None)
+ res_min = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec_min,
+ new_disk_template)
+
+ if (res_max or res_min):
+ # FIXME: Improve error message by including information about whether
+ # the upper or lower limit of the parameter fails the ipolicy.
+ msg = ("Instance allocation to group %s (%s) violates policy: %s" %
+ (group_info, group_info.name,
+ utils.CommaJoin(set(res_max + res_min))))
+ raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
+
+ def _ConvertPlainToDrbd(self, feedback_fn):
+ """Converts an instance from plain to drbd.
+
+ """
+ feedback_fn("Converting template to drbd")
+ instance = self.instance
+ pnode = instance.primary_node
+ snode = self.op.remote_node
+
+ assert instance.disk_template == constants.DT_PLAIN
+
+ # create a fake disk info for _GenerateDiskTemplate
+ disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
+ constants.IDISK_VG: d.logical_id[0],
+ constants.IDISK_NAME: d.name}
+ for d in instance.disks]
+ new_disks = GenerateDiskTemplate(self, self.op.disk_template,
+ instance.name, pnode, [snode],
+ disk_info, None, None, 0, feedback_fn,
+ self.diskparams)
+ anno_disks = rpc.AnnotateDiskParams(constants.DT_DRBD8, new_disks,
+ self.diskparams)
+ p_excl_stor = IsExclusiveStorageEnabledNodeName(self.cfg, pnode)
+ s_excl_stor = IsExclusiveStorageEnabledNodeName(self.cfg, snode)
+ info = GetInstanceInfoText(instance)
+ feedback_fn("Creating additional volumes...")
+ # first, create the missing data and meta devices
+ for disk in anno_disks:
+ # unfortunately this is... not too nice
+ CreateSingleBlockDev(self, pnode, instance, disk.children[1],
+ info, True, p_excl_stor)
+ for child in disk.children:
+ CreateSingleBlockDev(self, snode, instance, child, info, True,
+ s_excl_stor)
+ # at this stage, all new LVs have been created, we can rename the
+ # old ones
+ feedback_fn("Renaming original volumes...")
+ rename_list = [(o, n.children[0].logical_id)
+ for (o, n) in zip(instance.disks, new_disks)]
+ result = self.rpc.call_blockdev_rename(pnode, rename_list)
+ result.Raise("Failed to rename original LVs")
+
+ feedback_fn("Initializing DRBD devices...")
+ # all child devices are in place, we can now create the DRBD devices
+ try:
+ for disk in anno_disks:
+ for (node, excl_stor) in [(pnode, p_excl_stor), (snode, s_excl_stor)]:
+ f_create = node == pnode
+ CreateSingleBlockDev(self, node, instance, disk, info, f_create,
+ excl_stor)
+ except errors.GenericError, e:
+ feedback_fn("Initializing of DRBD devices failed;"
+ " renaming back original volumes...")
+ for disk in new_disks:
+ self.cfg.SetDiskID(disk, pnode)
+ rename_back_list = [(n.children[0], o.logical_id)
+ for (n, o) in zip(new_disks, instance.disks)]
+ result = self.rpc.call_blockdev_rename(pnode, rename_back_list)
+ result.Raise("Failed to rename LVs back after error %s" % str(e))
+ raise
+
+ # at this point, the instance has been modified
+ instance.disk_template = constants.DT_DRBD8
+ instance.disks = new_disks
+ self.cfg.Update(instance, feedback_fn)
+
+ # Release node locks while waiting for sync
+ ReleaseLocks(self, locking.LEVEL_NODE)
+
+ # disks are created, waiting for sync
+ disk_abort = not WaitForSync(self, instance,
+ oneshot=not self.op.wait_for_sync)
+ if disk_abort:
+ raise errors.OpExecError("There are some degraded disks for"
+ " this instance, please cleanup manually")
+
+ # Node resource locks will be released by caller
+
+ def _ConvertDrbdToPlain(self, feedback_fn):
+ """Converts an instance from drbd to plain.
+
+ """
+ instance = self.instance
+
+ assert len(instance.secondary_nodes) == 1
+ assert instance.disk_template == constants.DT_DRBD8
+
+ pnode = instance.primary_node
+ snode = instance.secondary_nodes[0]
+ feedback_fn("Converting template to plain")
+
+ old_disks = AnnotateDiskParams(instance, instance.disks, self.cfg)
+ new_disks = [d.children[0] for d in instance.disks]
+
+ # copy over size, mode and name
+ for parent, child in zip(old_disks, new_disks):
+ child.size = parent.size
+ child.mode = parent.mode
+ child.name = parent.name
+
+ # this is a DRBD disk, return its port to the pool
+ # NOTE: this must be done right before the call to cfg.Update!
+ for disk in old_disks:
+ tcp_port = disk.logical_id[2]
+ self.cfg.AddTcpUdpPort(tcp_port)
+
+ # update instance structure
+ instance.disks = new_disks
+ instance.disk_template = constants.DT_PLAIN
+ _UpdateIvNames(0, instance.disks)
+ self.cfg.Update(instance, feedback_fn)
+
+ # Release locks in case removing disks takes a while
+ ReleaseLocks(self, locking.LEVEL_NODE)
+
+ feedback_fn("Removing volumes on the secondary node...")
+ for disk in old_disks:
+ self.cfg.SetDiskID(disk, snode)
+ msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
+ if msg:
+ self.LogWarning("Could not remove block device %s on node %s,"
+ " continuing anyway: %s", disk.iv_name, snode, msg)
+
+ feedback_fn("Removing unneeded volumes on the primary node...")
+ for idx, disk in enumerate(old_disks):
+ meta = disk.children[1]
+ self.cfg.SetDiskID(meta, pnode)
+ msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
+ if msg:
+ self.LogWarning("Could not remove metadata for disk %d on node %s,"
+ " continuing anyway: %s", idx, pnode, msg)
+
+ def _CreateNewDisk(self, idx, params, _):
+ """Creates a new disk.
+
+ """
+ instance = self.instance
+
+ # add a new disk
+ if instance.disk_template in constants.DTS_FILEBASED:
+ (file_driver, file_path) = instance.disks[0].logical_id
+ file_path = os.path.dirname(file_path)
+ else:
+ file_driver = file_path = None
+
+ disk = \
+ GenerateDiskTemplate(self, instance.disk_template, instance.name,
+ instance.primary_node, instance.secondary_nodes,
+ [params], file_path, file_driver, idx,
+ self.Log, self.diskparams)[0]
+
+ new_disks = CreateDisks(self, instance, disks=[disk])
+
+ if self.cluster.prealloc_wipe_disks:
+ # Wipe new disk
+ WipeOrCleanupDisks(self, instance,
+ disks=[(idx, disk, 0)],
+ cleanup=new_disks)
+
+ return (disk, [
+ ("disk/%d" % idx, "add:size=%s,mode=%s" % (disk.size, disk.mode)),
+ ])
+
+ @staticmethod
+ def _ModifyDisk(idx, disk, params, _):
+ """Modifies a disk.
+
+ """
+ changes = []
+ mode = params.get(constants.IDISK_MODE, None)
+ if mode:
+ disk.mode = mode
+ changes.append(("disk.mode/%d" % idx, disk.mode))
+
+ name = params.get(constants.IDISK_NAME, None)
+ disk.name = name
+ changes.append(("disk.name/%d" % idx, disk.name))
+
+ return changes
+
+ def _RemoveDisk(self, idx, root, _):
+ """Removes a disk.
+
+ """
+ (anno_disk,) = AnnotateDiskParams(self.instance, [root], self.cfg)
+ for node, disk in anno_disk.ComputeNodeTree(self.instance.primary_node):
+ self.cfg.SetDiskID(disk, node)
+ msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
+ if msg:
+ self.LogWarning("Could not remove disk/%d on node '%s': %s,"
+ " continuing anyway", idx, node, msg)
+
+ # if this is a DRBD disk, return its port to the pool
+ if root.dev_type in constants.LDS_DRBD:
+ self.cfg.AddTcpUdpPort(root.logical_id[2])
+
+ def _CreateNewNic(self, idx, params, private):
+ """Creates data structure for a new network interface.
+
+ """
+ mac = params[constants.INIC_MAC]
+ ip = params.get(constants.INIC_IP, None)
+ net = params.get(constants.INIC_NETWORK, None)
+ name = params.get(constants.INIC_NAME, None)
+ net_uuid = self.cfg.LookupNetwork(net)
+ #TODO: not private.filled?? can a nic have no nicparams??
+ nicparams = private.filled
+ nobj = objects.NIC(mac=mac, ip=ip, network=net_uuid, name=name,
+ nicparams=nicparams)
+ nobj.uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
+
+ return (nobj, [
+ ("nic.%d" % idx,
+ "add:mac=%s,ip=%s,mode=%s,link=%s,network=%s" %
+ (mac, ip, private.filled[constants.NIC_MODE],
+ private.filled[constants.NIC_LINK],
+ net)),
+ ])
+
+ def _ApplyNicMods(self, idx, nic, params, private):
+ """Modifies a network interface.
+
+ """
+ changes = []
+
+ for key in [constants.INIC_MAC, constants.INIC_IP, constants.INIC_NAME]:
+ if key in params:
+ changes.append(("nic.%s/%d" % (key, idx), params[key]))
+ setattr(nic, key, params[key])
+
+ new_net = params.get(constants.INIC_NETWORK, nic.network)
+ new_net_uuid = self.cfg.LookupNetwork(new_net)
+ if new_net_uuid != nic.network:
+ changes.append(("nic.network/%d" % idx, new_net))
+ nic.network = new_net_uuid
+
+ if private.filled:
+ nic.nicparams = private.filled
+
+ for (key, val) in nic.nicparams.items():
+ changes.append(("nic.%s/%d" % (key, idx), val))
+
+ return changes
+
+ def Exec(self, feedback_fn):
+ """Modifies an instance.
+
+ All parameters take effect only at the next restart of the instance.
+
+ """
+ # Process here the warnings from CheckPrereq, as we don't have a
+ # feedback_fn there.
+ # TODO: Replace with self.LogWarning
+ for warn in self.warn:
+ feedback_fn("WARNING: %s" % warn)
+
+ assert ((self.op.disk_template is None) ^
+ bool(self.owned_locks(locking.LEVEL_NODE_RES))), \
+ "Not owning any node resource locks"
+
+ result = []
+ instance = self.instance
+
+ # New primary node
+ if self.op.pnode:
+ instance.primary_node = self.op.pnode
+
+ # runtime memory
+ if self.op.runtime_mem:
+ rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
+ instance,
+ self.op.runtime_mem)
+ rpcres.Raise("Cannot modify instance runtime memory")
+ result.append(("runtime_memory", self.op.runtime_mem))
+
+ # Apply disk changes
+ _ApplyContainerMods("disk", instance.disks, result, self.diskmod,
+ self._CreateNewDisk, self._ModifyDisk,
+ self._RemoveDisk)
+ _UpdateIvNames(0, instance.disks)
+
+ if self.op.disk_template:
+ if __debug__:
+ check_nodes = set(instance.all_nodes)
+ if self.op.remote_node:
+ check_nodes.add(self.op.remote_node)
+ for level in [locking.LEVEL_NODE, locking.LEVEL_NODE_RES]:
+ owned = self.owned_locks(level)
+ assert not (check_nodes - owned), \
+ ("Not owning the correct locks, owning %r, expected at least %r" %
+ (owned, check_nodes))
+
+ r_shut = ShutdownInstanceDisks(self, instance)
+ if not r_shut:
+ raise errors.OpExecError("Cannot shutdown instance disks, unable to"
+ " proceed with disk template conversion")
+ mode = (instance.disk_template, self.op.disk_template)
+ try:
+ self._DISK_CONVERSIONS[mode](self, feedback_fn)
+ except:
+ self.cfg.ReleaseDRBDMinors(instance.name)
+ raise
+ result.append(("disk_template", self.op.disk_template))
+
+ assert instance.disk_template == self.op.disk_template, \
+ ("Expected disk template '%s', found '%s'" %
+ (self.op.disk_template, instance.disk_template))
+
+ # Release node and resource locks if there are any (they might already have
+ # been released during disk conversion)
+ ReleaseLocks(self, locking.LEVEL_NODE)
+ ReleaseLocks(self, locking.LEVEL_NODE_RES)
+
+ # Apply NIC changes
+ if self._new_nics is not None:
+ instance.nics = self._new_nics
+ result.extend(self._nic_chgdesc)
+
+ # hvparams changes
+ if self.op.hvparams:
+ instance.hvparams = self.hv_inst
+ for key, val in self.op.hvparams.iteritems():
+ result.append(("hv/%s" % key, val))
+
+ # beparams changes
+ if self.op.beparams:
+ instance.beparams = self.be_inst
+ for key, val in self.op.beparams.iteritems():
+ result.append(("be/%s" % key, val))
+
+ # OS change
+ if self.op.os_name:
+ instance.os = self.op.os_name
+
+ # osparams changes
+ if self.op.osparams:
+ instance.osparams = self.os_inst
+ for key, val in self.op.osparams.iteritems():
+ result.append(("os/%s" % key, val))
+
+ if self.op.offline is None:
+ # Ignore
+ pass
+ elif self.op.offline:
+ # Mark instance as offline
+ self.cfg.MarkInstanceOffline(instance.name)
+ result.append(("admin_state", constants.ADMINST_OFFLINE))
+ else:
+ # Mark instance as online, but stopped
+ self.cfg.MarkInstanceDown(instance.name)
+ result.append(("admin_state", constants.ADMINST_DOWN))
+
+ self.cfg.Update(instance, feedback_fn, self.proc.GetECId())
+
+ assert not (self.owned_locks(locking.LEVEL_NODE_RES) or
+ self.owned_locks(locking.LEVEL_NODE)), \
+ "All node locks should have been released by now"
+
+ return result
+
+ _DISK_CONVERSIONS = {
+ (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
+ (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
+ }
+
+
+ class LUInstanceChangeGroup(LogicalUnit):
+ HPATH = "instance-change-group"
+ HTYPE = constants.HTYPE_INSTANCE
+ REQ_BGL = False
+
+ def ExpandNames(self):
+ self.share_locks = ShareAll()
+
+ self.needed_locks = {
+ locking.LEVEL_NODEGROUP: [],
+ locking.LEVEL_NODE: [],
+ locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
+ }
+
+ self._ExpandAndLockInstance()
+
+ if self.op.target_groups:
+ self.req_target_uuids = map(self.cfg.LookupNodeGroup,
+ self.op.target_groups)
+ else:
+ self.req_target_uuids = None
+
+ self.op.iallocator = GetDefaultIAllocator(self.cfg, self.op.iallocator)
+
+ def DeclareLocks(self, level):
+ if level == locking.LEVEL_NODEGROUP:
+ assert not self.needed_locks[locking.LEVEL_NODEGROUP]
+
+ if self.req_target_uuids:
+ lock_groups = set(self.req_target_uuids)
+
+ # Lock all groups used by instance optimistically; this requires going
+ # via the node before it's locked, requiring verification later on
+ instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
+ lock_groups.update(instance_groups)
+ else:
+ # No target groups, need to lock all of them
+ lock_groups = locking.ALL_SET
+
+ self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
+
+ elif level == locking.LEVEL_NODE:
+ if self.req_target_uuids:
+ # Lock all nodes used by instances
+ self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
+ self._LockInstancesNodes()
+
+ # Lock all nodes in all potential target groups
+ lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
+ self.cfg.GetInstanceNodeGroups(self.op.instance_name))
+ member_nodes = [node_name
+ for group in lock_groups
+ for node_name in self.cfg.GetNodeGroup(group).members]
+ self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
+ else:
+ # Lock all nodes as all groups are potential targets
+ self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
+
+ def CheckPrereq(self):
+ owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
+ owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
+ owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
+
+ assert (self.req_target_uuids is None or
+ owned_groups.issuperset(self.req_target_uuids))
+ assert owned_instances == set([self.op.instance_name])
+
+ # Get instance information
+ self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
+
+ # Check if node groups for locked instance are still correct
+ assert owned_nodes.issuperset(self.instance.all_nodes), \
+ ("Instance %s's nodes changed while we kept the lock" %
+ self.op.instance_name)
+
+ inst_groups = CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
+ owned_groups)
+
+ if self.req_target_uuids:
+ # User requested specific target groups
+ self.target_uuids = frozenset(self.req_target_uuids)
+ else:
+ # All groups except those used by the instance are potential targets
+ self.target_uuids = owned_groups - inst_groups
+
+ conflicting_groups = self.target_uuids & inst_groups
+ if conflicting_groups:
+ raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
+ " used by the instance '%s'" %
+ (utils.CommaJoin(conflicting_groups),
+ self.op.instance_name),
+ errors.ECODE_INVAL)
+
+ if not self.target_uuids:
+ raise errors.OpPrereqError("There are no possible target groups",
+ errors.ECODE_INVAL)
+
+ def BuildHooksEnv(self):
+ """Build hooks env.
+
+ """
+ assert self.target_uuids
+
+ env = {
+ "TARGET_GROUPS": " ".join(self.target_uuids),
+ }
+
+ env.update(BuildInstanceHookEnvByObject(self, self.instance))
+
+ return env
+
+ def BuildHooksNodes(self):
+ """Build hooks nodes.
+
+ """
+ mn = self.cfg.GetMasterNode()
+ return ([mn], [mn])
+
+ def Exec(self, feedback_fn):
+ instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
+
+ assert instances == [self.op.instance_name], "Instance not locked"
+
+ req = iallocator.IAReqGroupChange(instances=instances,
+ target_groups=list(self.target_uuids))
+ ial = iallocator.IAllocator(self.cfg, self.rpc, req)
+
+ ial.Run(self.op.iallocator)
+
+ if not ial.success:
+ raise errors.OpPrereqError("Can't compute solution for changing group of"
+ " instance '%s' using iallocator '%s': %s" %
+ (self.op.instance_name, self.op.iallocator,
+ ial.info), errors.ECODE_NORES)
+
+ jobs = LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
+
+ self.LogInfo("Iallocator returned %s job(s) for changing group of"
+ " instance '%s'", len(jobs), self.op.instance_name)
+
+ return ResultWithJobs(jobs)
--- /dev/null
+ #
+ #
+
+ # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 Google Inc.
+ #
+ # This program is free software; you can redistribute it and/or modify
+ # it under the terms of the GNU General Public License as published by
+ # the Free Software Foundation; either version 2 of the License, or
+ # (at your option) any later version.
+ #
+ # This program is distributed in the hope that it will be useful, but
+ # WITHOUT ANY WARRANTY; without even the implied warranty of
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ # General Public License for more details.
+ #
+ # You should have received a copy of the GNU General Public License
+ # along with this program; if not, write to the Free Software
+ # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ # 02110-1301, USA.
+
+
+ """Logical units dealing with storage of instances."""
+
+ import itertools
+ import logging
+ import os
+ import time
+
+ from ganeti import compat
+ from ganeti import constants
+ from ganeti import errors
+ from ganeti import ht
+ from ganeti import locking
+ from ganeti.masterd import iallocator
+ from ganeti import objects
+ from ganeti import utils
+ from ganeti import opcodes
+ from ganeti import rpc
+ from ganeti.cmdlib.base import LogicalUnit, NoHooksLU, Tasklet
+ from ganeti.cmdlib.common import INSTANCE_DOWN, INSTANCE_NOT_RUNNING, \
+ AnnotateDiskParams, CheckIAllocatorOrNode, ExpandNodeName, \
+ CheckNodeOnline, CheckInstanceNodeGroups, CheckInstanceState, \
+ IsExclusiveStorageEnabledNode, FindFaultyInstanceDisks
+ from ganeti.cmdlib.instance_utils import GetInstanceInfoText, \
+ CopyLockList, ReleaseLocks, CheckNodeVmCapable, \
+ BuildInstanceHookEnvByObject, CheckNodeNotDrained, CheckTargetNodeIPolicy
+
+ import ganeti.masterd.instance
+
+
+ _DISK_TEMPLATE_NAME_PREFIX = {
+ constants.DT_PLAIN: "",
+ constants.DT_RBD: ".rbd",
+ constants.DT_EXT: ".ext",
+ }
+
+
+ _DISK_TEMPLATE_DEVICE_TYPE = {
+ constants.DT_PLAIN: constants.LD_LV,
+ constants.DT_FILE: constants.LD_FILE,
+ constants.DT_SHARED_FILE: constants.LD_FILE,
+ constants.DT_BLOCK: constants.LD_BLOCKDEV,
+ constants.DT_RBD: constants.LD_RBD,
+ constants.DT_EXT: constants.LD_EXT,
+ }
+
+
+ def CreateSingleBlockDev(lu, node, instance, device, info, force_open,
+ excl_stor):
+ """Create a single block device on a given node.
+
+ This will not recurse over children of the device, so they must be
+ created in advance.
+
+ @param lu: the lu on whose behalf we execute
+ @param node: the node on which to create the device
+ @type instance: L{objects.Instance}
+ @param instance: the instance which owns the device
+ @type device: L{objects.Disk}
+ @param device: the device to create
+ @param info: the extra 'metadata' we should attach to the device
+ (this will be represented as a LVM tag)
+ @type force_open: boolean
+ @param force_open: this parameter will be passes to the
+ L{backend.BlockdevCreate} function where it specifies
+ whether we run on primary or not, and it affects both
+ the child assembly and the device own Open() execution
+ @type excl_stor: boolean
+ @param excl_stor: Whether exclusive_storage is active for the node
+
+ """
+ lu.cfg.SetDiskID(device, node)
+ result = lu.rpc.call_blockdev_create(node, device, device.size,
+ instance.name, force_open, info,
+ excl_stor)
+ result.Raise("Can't create block device %s on"
+ " node %s for instance %s" % (device, node, instance.name))
+ if device.physical_id is None:
+ device.physical_id = result.payload
+
+
+ def _CreateBlockDevInner(lu, node, instance, device, force_create,
+ info, force_open, excl_stor):
+ """Create a tree of block devices on a given node.
+
+ If this device type has to be created on secondaries, create it and
+ all its children.
+
+ If not, just recurse to children keeping the same 'force' value.
+
+ @attention: The device has to be annotated already.
+
+ @param lu: the lu on whose behalf we execute
+ @param node: the node on which to create the device
+ @type instance: L{objects.Instance}
+ @param instance: the instance which owns the device
+ @type device: L{objects.Disk}
+ @param device: the device to create
+ @type force_create: boolean
+ @param force_create: whether to force creation of this device; this
+ will be change to True whenever we find a device which has
+ CreateOnSecondary() attribute
+ @param info: the extra 'metadata' we should attach to the device
+ (this will be represented as a LVM tag)
+ @type force_open: boolean
+ @param force_open: this parameter will be passes to the
+ L{backend.BlockdevCreate} function where it specifies
+ whether we run on primary or not, and it affects both
+ the child assembly and the device own Open() execution
+ @type excl_stor: boolean
+ @param excl_stor: Whether exclusive_storage is active for the node
+
+ @return: list of created devices
+ """
+ created_devices = []
+ try:
+ if device.CreateOnSecondary():
+ force_create = True
+
+ if device.children:
+ for child in device.children:
+ devs = _CreateBlockDevInner(lu, node, instance, child, force_create,
+ info, force_open, excl_stor)
+ created_devices.extend(devs)
+
+ if not force_create:
+ return created_devices
+
+ CreateSingleBlockDev(lu, node, instance, device, info, force_open,
+ excl_stor)
+ # The device has been completely created, so there is no point in keeping
+ # its subdevices in the list. We just add the device itself instead.
+ created_devices = [(node, device)]
+ return created_devices
+
+ except errors.DeviceCreationError, e:
+ e.created_devices.extend(created_devices)
+ raise e
+ except errors.OpExecError, e:
+ raise errors.DeviceCreationError(str(e), created_devices)
+
+
+ def IsExclusiveStorageEnabledNodeName(cfg, nodename):
+ """Whether exclusive_storage is in effect for the given node.
+
+ @type cfg: L{config.ConfigWriter}
+ @param cfg: The cluster configuration
+ @type nodename: string
+ @param nodename: The node
+ @rtype: bool
+ @return: The effective value of exclusive_storage
+ @raise errors.OpPrereqError: if no node exists with the given name
+
+ """
+ ni = cfg.GetNodeInfo(nodename)
+ if ni is None:
+ raise errors.OpPrereqError("Invalid node name %s" % nodename,
+ errors.ECODE_NOENT)
+ return IsExclusiveStorageEnabledNode(cfg, ni)
+
+
+ def _CreateBlockDev(lu, node, instance, device, force_create, info,
+ force_open):
+ """Wrapper around L{_CreateBlockDevInner}.
+
+ This method annotates the root device first.
+
+ """
+ (disk,) = AnnotateDiskParams(instance, [device], lu.cfg)
+ excl_stor = IsExclusiveStorageEnabledNodeName(lu.cfg, node)
+ return _CreateBlockDevInner(lu, node, instance, disk, force_create, info,
+ force_open, excl_stor)
+
+
+ def _UndoCreateDisks(lu, disks_created):
+ """Undo the work performed by L{CreateDisks}.
+
+ This function is called in case of an error to undo the work of
+ L{CreateDisks}.
+
+ @type lu: L{LogicalUnit}
+ @param lu: the logical unit on whose behalf we execute
+ @param disks_created: the result returned by L{CreateDisks}
+
+ """
+ for (node, disk) in disks_created:
+ lu.cfg.SetDiskID(disk, node)
+ result = lu.rpc.call_blockdev_remove(node, disk)
+ if result.fail_msg:
+ logging.warning("Failed to remove newly-created disk %s on node %s:"
+ " %s", disk, node, result.fail_msg)
+
+
+ def CreateDisks(lu, instance, to_skip=None, target_node=None, disks=None):
+ """Create all disks for an instance.
+
+ This abstracts away some work from AddInstance.
+
+ @type lu: L{LogicalUnit}
+ @param lu: the logical unit on whose behalf we execute
+ @type instance: L{objects.Instance}
+ @param instance: the instance whose disks we should create
+ @type to_skip: list
+ @param to_skip: list of indices to skip
+ @type target_node: string
+ @param target_node: if passed, overrides the target node for creation
+ @type disks: list of {objects.Disk}
+ @param disks: the disks to create; if not specified, all the disks of the
+ instance are created
+ @return: information about the created disks, to be used to call
+ L{_UndoCreateDisks}
+ @raise errors.OpPrereqError: in case of error
+
+ """
+ info = GetInstanceInfoText(instance)
+ if target_node is None:
+ pnode = instance.primary_node
+ all_nodes = instance.all_nodes
+ else:
+ pnode = target_node
+ all_nodes = [pnode]
+
+ if disks is None:
+ disks = instance.disks
+
+ if instance.disk_template in constants.DTS_FILEBASED:
+ file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
+ result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
+
+ result.Raise("Failed to create directory '%s' on"
+ " node %s" % (file_storage_dir, pnode))
+
+ disks_created = []
+ for idx, device in enumerate(disks):
+ if to_skip and idx in to_skip:
+ continue
+ logging.info("Creating disk %s for instance '%s'", idx, instance.name)
+ for node in all_nodes:
+ f_create = node == pnode
+ try:
+ _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
+ disks_created.append((node, device))
+ except errors.DeviceCreationError, e:
+ logging.warning("Creating disk %s for instance '%s' failed",
+ idx, instance.name)
+ disks_created.extend(e.created_devices)
+ _UndoCreateDisks(lu, disks_created)
+ raise errors.OpExecError(e.message)
+ return disks_created
+
+
+ def ComputeDiskSizePerVG(disk_template, disks):
+ """Compute disk size requirements in the volume group
+
+ """
+ def _compute(disks, payload):
+ """Universal algorithm.
+
+ """
+ vgs = {}
+ for disk in disks:
+ vgs[disk[constants.IDISK_VG]] = \
+ vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
+
+ return vgs
+
+ # Required free disk space as a function of disk and swap space
+ req_size_dict = {
+ constants.DT_DISKLESS: {},
+ constants.DT_PLAIN: _compute(disks, 0),
+ # 128 MB are added for drbd metadata for each disk
+ constants.DT_DRBD8: _compute(disks, constants.DRBD_META_SIZE),
+ constants.DT_FILE: {},
+ constants.DT_SHARED_FILE: {},
+ }
+
+ if disk_template not in req_size_dict:
+ raise errors.ProgrammerError("Disk template '%s' size requirement"
+ " is unknown" % disk_template)
+
+ return req_size_dict[disk_template]
+
+
+ def ComputeDisks(op, default_vg):
+ """Computes the instance disks.
+
+ @param op: The instance opcode
+ @param default_vg: The default_vg to assume
+
+ @return: The computed disks
+
+ """
+ disks = []
+ for disk in op.disks:
+ mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
+ if mode not in constants.DISK_ACCESS_SET:
+ raise errors.OpPrereqError("Invalid disk access mode '%s'" %
+ mode, errors.ECODE_INVAL)
+ size = disk.get(constants.IDISK_SIZE, None)
+ if size is None:
+ raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
+ try:
+ size = int(size)
+ except (TypeError, ValueError):
+ raise errors.OpPrereqError("Invalid disk size '%s'" % size,
+ errors.ECODE_INVAL)
+
+ ext_provider = disk.get(constants.IDISK_PROVIDER, None)
+ if ext_provider and op.disk_template != constants.DT_EXT:
+ raise errors.OpPrereqError("The '%s' option is only valid for the %s"
+ " disk template, not %s" %
+ (constants.IDISK_PROVIDER, constants.DT_EXT,
+ op.disk_template), errors.ECODE_INVAL)
+
+ data_vg = disk.get(constants.IDISK_VG, default_vg)
+ name = disk.get(constants.IDISK_NAME, None)
+ if name is not None and name.lower() == constants.VALUE_NONE:
+ name = None
+ new_disk = {
+ constants.IDISK_SIZE: size,
+ constants.IDISK_MODE: mode,
+ constants.IDISK_VG: data_vg,
+ constants.IDISK_NAME: name,
+ }
+
+ if constants.IDISK_METAVG in disk:
+ new_disk[constants.IDISK_METAVG] = disk[constants.IDISK_METAVG]
+ if constants.IDISK_ADOPT in disk:
+ new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
+
+ # For extstorage, demand the `provider' option and add any
+ # additional parameters (ext-params) to the dict
+ if op.disk_template == constants.DT_EXT:
+ if ext_provider:
+ new_disk[constants.IDISK_PROVIDER] = ext_provider
+ for key in disk:
+ if key not in constants.IDISK_PARAMS:
+ new_disk[key] = disk[key]
+ else:
+ raise errors.OpPrereqError("Missing provider for template '%s'" %
+ constants.DT_EXT, errors.ECODE_INVAL)
+
+ disks.append(new_disk)
+
+ return disks
+
+
+ def CheckRADOSFreeSpace():
+ """Compute disk size requirements inside the RADOS cluster.
+
+ """
+ # For the RADOS cluster we assume there is always enough space.
+ pass
+
+
+ def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
+ iv_name, p_minor, s_minor):
+ """Generate a drbd8 device complete with its children.
+
+ """
+ assert len(vgnames) == len(names) == 2
+ port = lu.cfg.AllocatePort()
+ shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
+
+ dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
+ logical_id=(vgnames[0], names[0]),
+ params={})
+ dev_data.uuid = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
+ dev_meta = objects.Disk(dev_type=constants.LD_LV,
+ size=constants.DRBD_META_SIZE,
+ logical_id=(vgnames[1], names[1]),
+ params={})
+ dev_meta.uuid = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
+ drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
+ logical_id=(primary, secondary, port,
+ p_minor, s_minor,
+ shared_secret),
+ children=[dev_data, dev_meta],
+ iv_name=iv_name, params={})
+ drbd_dev.uuid = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
+ return drbd_dev
+
+
+ def GenerateDiskTemplate(
+ lu, template_name, instance_name, primary_node, secondary_nodes,
+ disk_info, file_storage_dir, file_driver, base_index,
+ feedback_fn, full_disk_params, _req_file_storage=opcodes.RequireFileStorage,
+ _req_shr_file_storage=opcodes.RequireSharedFileStorage):
+ """Generate the entire disk layout for a given template type.
+
+ """
+ vgname = lu.cfg.GetVGName()
+ disk_count = len(disk_info)
+ disks = []
+
+ if template_name == constants.DT_DISKLESS:
+ pass
+ elif template_name == constants.DT_DRBD8:
+ if len(secondary_nodes) != 1:
+ raise errors.ProgrammerError("Wrong template configuration")
+ remote_node = secondary_nodes[0]
+ minors = lu.cfg.AllocateDRBDMinor(
+ [primary_node, remote_node] * len(disk_info), instance_name)
+
+ (drbd_params, _, _) = objects.Disk.ComputeLDParams(template_name,
+ full_disk_params)
+ drbd_default_metavg = drbd_params[constants.LDP_DEFAULT_METAVG]
+
+ names = []
+ for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
+ for i in range(disk_count)]):
+ names.append(lv_prefix + "_data")
+ names.append(lv_prefix + "_meta")
+ for idx, disk in enumerate(disk_info):
+ disk_index = idx + base_index
+ data_vg = disk.get(constants.IDISK_VG, vgname)
+ meta_vg = disk.get(constants.IDISK_METAVG, drbd_default_metavg)
+ disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
+ disk[constants.IDISK_SIZE],
+ [data_vg, meta_vg],
+ names[idx * 2:idx * 2 + 2],
+ "disk/%d" % disk_index,
+ minors[idx * 2], minors[idx * 2 + 1])
+ disk_dev.mode = disk[constants.IDISK_MODE]
+ disk_dev.name = disk.get(constants.IDISK_NAME, None)
+ disks.append(disk_dev)
+ else:
+ if secondary_nodes:
+ raise errors.ProgrammerError("Wrong template configuration")
+
+ if template_name == constants.DT_FILE:
+ _req_file_storage()
+ elif template_name == constants.DT_SHARED_FILE:
+ _req_shr_file_storage()
+
+ name_prefix = _DISK_TEMPLATE_NAME_PREFIX.get(template_name, None)
+ if name_prefix is None:
+ names = None
+ else:
+ names = _GenerateUniqueNames(lu, ["%s.disk%s" %
+ (name_prefix, base_index + i)
+ for i in range(disk_count)])
+
+ if template_name == constants.DT_PLAIN:
+
+ def logical_id_fn(idx, _, disk):
+ vg = disk.get(constants.IDISK_VG, vgname)
+ return (vg, names[idx])
+
+ elif template_name in (constants.DT_FILE, constants.DT_SHARED_FILE):
+ logical_id_fn = \
+ lambda _, disk_index, disk: (file_driver,
+ "%s/disk%d" % (file_storage_dir,
+ disk_index))
+ elif template_name == constants.DT_BLOCK:
+ logical_id_fn = \
+ lambda idx, disk_index, disk: (constants.BLOCKDEV_DRIVER_MANUAL,
+ disk[constants.IDISK_ADOPT])
+ elif template_name == constants.DT_RBD:
+ logical_id_fn = lambda idx, _, disk: ("rbd", names[idx])
+ elif template_name == constants.DT_EXT:
+ def logical_id_fn(idx, _, disk):
+ provider = disk.get(constants.IDISK_PROVIDER, None)
+ if provider is None:
+ raise errors.ProgrammerError("Disk template is %s, but '%s' is"
+ " not found", constants.DT_EXT,
+ constants.IDISK_PROVIDER)
+ return (provider, names[idx])
+ else:
+ raise errors.ProgrammerError("Unknown disk template '%s'" % template_name)
+
+ dev_type = _DISK_TEMPLATE_DEVICE_TYPE[template_name]
+
+ for idx, disk in enumerate(disk_info):
+ params = {}
+ # Only for the Ext template add disk_info to params
+ if template_name == constants.DT_EXT:
+ params[constants.IDISK_PROVIDER] = disk[constants.IDISK_PROVIDER]
+ for key in disk:
+ if key not in constants.IDISK_PARAMS:
+ params[key] = disk[key]
+ disk_index = idx + base_index
+ size = disk[constants.IDISK_SIZE]
+ feedback_fn("* disk %s, size %s" %
+ (disk_index, utils.FormatUnit(size, "h")))
+ disk_dev = objects.Disk(dev_type=dev_type, size=size,
+ logical_id=logical_id_fn(idx, disk_index, disk),
+ iv_name="disk/%d" % disk_index,
+ mode=disk[constants.IDISK_MODE],
+ params=params)
+ disk_dev.name = disk.get(constants.IDISK_NAME, None)
+ disk_dev.uuid = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
+ disks.append(disk_dev)
+
+ return disks
+
+
+ class LUInstanceRecreateDisks(LogicalUnit):
+ """Recreate an instance's missing disks.
+
+ """
+ HPATH = "instance-recreate-disks"
+ HTYPE = constants.HTYPE_INSTANCE
+ REQ_BGL = False
+
+ _MODIFYABLE = compat.UniqueFrozenset([
+ constants.IDISK_SIZE,
+ constants.IDISK_MODE,
+ ])
+
+ # New or changed disk parameters may have different semantics
+ assert constants.IDISK_PARAMS == (_MODIFYABLE | frozenset([
+ constants.IDISK_ADOPT,
+
+ # TODO: Implement support changing VG while recreating
+ constants.IDISK_VG,
+ constants.IDISK_METAVG,
+ constants.IDISK_PROVIDER,
+ constants.IDISK_NAME,
+ ]))
+
+ def _RunAllocator(self):
+ """Run the allocator based on input opcode.
+
+ """
+ be_full = self.cfg.GetClusterInfo().FillBE(self.instance)
+
+ # FIXME
+ # The allocator should actually run in "relocate" mode, but current
+ # allocators don't support relocating all the nodes of an instance at
+ # the same time. As a workaround we use "allocate" mode, but this is
+ # suboptimal for two reasons:
+ # - The instance name passed to the allocator is present in the list of
+ # existing instances, so there could be a conflict within the
+ # internal structures of the allocator. This doesn't happen with the
+ # current allocators, but it's a liability.
+ # - The allocator counts the resources used by the instance twice: once
+ # because the instance exists already, and once because it tries to
+ # allocate a new instance.
+ # The allocator could choose some of the nodes on which the instance is
+ # running, but that's not a problem. If the instance nodes are broken,
+ # they should be already be marked as drained or offline, and hence
+ # skipped by the allocator. If instance disks have been lost for other
+ # reasons, then recreating the disks on the same nodes should be fine.
+ disk_template = self.instance.disk_template
+ spindle_use = be_full[constants.BE_SPINDLE_USE]
+ req = iallocator.IAReqInstanceAlloc(name=self.op.instance_name,
+ disk_template=disk_template,
+ tags=list(self.instance.GetTags()),
+ os=self.instance.os,
+ nics=[{}],
+ vcpus=be_full[constants.BE_VCPUS],
+ memory=be_full[constants.BE_MAXMEM],
+ spindle_use=spindle_use,
+ disks=[{constants.IDISK_SIZE: d.size,
+ constants.IDISK_MODE: d.mode}
+ for d in self.instance.disks],
+ hypervisor=self.instance.hypervisor,
+ node_whitelist=None)
+ ial = iallocator.IAllocator(self.cfg, self.rpc, req)
+
+ ial.Run(self.op.iallocator)
+
+ assert req.RequiredNodes() == len(self.instance.all_nodes)
+
+ if not ial.success:
+ raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
+ " %s" % (self.op.iallocator, ial.info),
+ errors.ECODE_NORES)
+
+ self.op.nodes = ial.result
+ self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
+ self.op.instance_name, self.op.iallocator,
+ utils.CommaJoin(ial.result))
+
+ def CheckArguments(self):
+ if self.op.disks and ht.TNonNegativeInt(self.op.disks[0]):
+ # Normalize and convert deprecated list of disk indices
+ self.op.disks = [(idx, {}) for idx in sorted(frozenset(self.op.disks))]
+
+ duplicates = utils.FindDuplicates(map(compat.fst, self.op.disks))
+ if duplicates:
+ raise errors.OpPrereqError("Some disks have been specified more than"
+ " once: %s" % utils.CommaJoin(duplicates),
+ errors.ECODE_INVAL)
+
+ # We don't want _CheckIAllocatorOrNode selecting the default iallocator
+ # when neither iallocator nor nodes are specified
+ if self.op.iallocator or self.op.nodes:
+ CheckIAllocatorOrNode(self, "iallocator", "nodes")
+
+ for (idx, params) in self.op.disks:
+ utils.ForceDictType(params, constants.IDISK_PARAMS_TYPES)
+ unsupported = frozenset(params.keys()) - self._MODIFYABLE
+ if unsupported:
+ raise errors.OpPrereqError("Parameters for disk %s try to change"
+ " unmodifyable parameter(s): %s" %
+ (idx, utils.CommaJoin(unsupported)),
+ errors.ECODE_INVAL)
+
+ def ExpandNames(self):
+ self._ExpandAndLockInstance()
+ self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
+
+ if self.op.nodes:
+ self.op.nodes = [ExpandNodeName(self.cfg, n) for n in self.op.nodes]
+ self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
+ else:
+ self.needed_locks[locking.LEVEL_NODE] = []
+ if self.op.iallocator:
+ # iallocator will select a new node in the same group
+ self.needed_locks[locking.LEVEL_NODEGROUP] = []
+ self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
+
+ self.needed_locks[locking.LEVEL_NODE_RES] = []
+
+ def DeclareLocks(self, level):
+ if level == locking.LEVEL_NODEGROUP:
+ assert self.op.iallocator is not None
+ assert not self.op.nodes
+ assert not self.needed_locks[locking.LEVEL_NODEGROUP]
+ self.share_locks[locking.LEVEL_NODEGROUP] = 1
+ # Lock the primary group used by the instance optimistically; this
+ # requires going via the node before it's locked, requiring
+ # verification later on
+ self.needed_locks[locking.LEVEL_NODEGROUP] = \
+ self.cfg.GetInstanceNodeGroups(self.op.instance_name, primary_only=True)
+
+ elif level == locking.LEVEL_NODE:
+ # If an allocator is used, then we lock all the nodes in the current
+ # instance group, as we don't know yet which ones will be selected;
+ # if we replace the nodes without using an allocator, locks are
+ # already declared in ExpandNames; otherwise, we need to lock all the
+ # instance nodes for disk re-creation
+ if self.op.iallocator:
+ assert not self.op.nodes
+ assert not self.needed_locks[locking.LEVEL_NODE]
+ assert len(self.owned_locks(locking.LEVEL_NODEGROUP)) == 1
+
+ # Lock member nodes of the group of the primary node
+ for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP):
+ self.needed_locks[locking.LEVEL_NODE].extend(
+ self.cfg.GetNodeGroup(group_uuid).members)
+
+ assert locking.NAL in self.owned_locks(locking.LEVEL_NODE_ALLOC)
+ elif not self.op.nodes:
+ self._LockInstancesNodes(primary_only=False)
+ elif level == locking.LEVEL_NODE_RES:
+ # Copy node locks
+ self.needed_locks[locking.LEVEL_NODE_RES] = \
+ CopyLockList(self.needed_locks[locking.LEVEL_NODE])
+
+ def BuildHooksEnv(self):
+ """Build hooks env.
+
+ This runs on master, primary and secondary nodes of the instance.
+
+ """
+ return BuildInstanceHookEnvByObject(self, self.instance)
+
+ def BuildHooksNodes(self):
+ """Build hooks nodes.
+
+ """
+ nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
+ return (nl, nl)
+
+ def CheckPrereq(self):
+ """Check prerequisites.
+
+ This checks that the instance is in the cluster and is not running.
+
+ """
+ instance = self.cfg.GetInstanceInfo(self.op.instance_name)
+ assert instance is not None, \
+ "Cannot retrieve locked instance %s" % self.op.instance_name
+ if self.op.nodes:
+ if len(self.op.nodes) != len(instance.all_nodes):
+ raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
+ " %d replacement nodes were specified" %
+ (instance.name, len(instance.all_nodes),
+ len(self.op.nodes)),
+ errors.ECODE_INVAL)
+ assert instance.disk_template != constants.DT_DRBD8 or \
+ len(self.op.nodes) == 2
+ assert instance.disk_template != constants.DT_PLAIN or \
+ len(self.op.nodes) == 1
+ primary_node = self.op.nodes[0]
+ else:
+ primary_node = instance.primary_node
+ if not self.op.iallocator:
+ CheckNodeOnline(self, primary_node)
+
+ if instance.disk_template == constants.DT_DISKLESS:
+ raise errors.OpPrereqError("Instance '%s' has no disks" %
+ self.op.instance_name, errors.ECODE_INVAL)
+
+ # Verify if node group locks are still correct
+ owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
+ if owned_groups:
+ # Node group locks are acquired only for the primary node (and only
+ # when the allocator is used)
+ CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups,
+ primary_only=True)
+
+ # if we replace nodes *and* the old primary is offline, we don't
+ # check the instance state
+ old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
+ if not ((self.op.iallocator or self.op.nodes) and old_pnode.offline):
+ CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
+ msg="cannot recreate disks")
+
+ if self.op.disks:
+ self.disks = dict(self.op.disks)
+ else:
+ self.disks = dict((idx, {}) for idx in range(len(instance.disks)))
+
+ maxidx = max(self.disks.keys())
+ if maxidx >= len(instance.disks):
+ raise errors.OpPrereqError("Invalid disk index '%s'" % maxidx,
+ errors.ECODE_INVAL)
+
+ if ((self.op.nodes or self.op.iallocator) and
+ sorted(self.disks.keys()) != range(len(instance.disks))):
+ raise errors.OpPrereqError("Can't recreate disks partially and"
+ " change the nodes at the same time",
+ errors.ECODE_INVAL)
+
+ self.instance = instance
+
+ if self.op.iallocator:
+ self._RunAllocator()
+ # Release unneeded node and node resource locks
+ ReleaseLocks(self, locking.LEVEL_NODE, keep=self.op.nodes)
+ ReleaseLocks(self, locking.LEVEL_NODE_RES, keep=self.op.nodes)
+ ReleaseLocks(self, locking.LEVEL_NODE_ALLOC)
+
+ assert not self.glm.is_owned(locking.LEVEL_NODE_ALLOC)
+
+ def Exec(self, feedback_fn):
+ """Recreate the disks.
+
+ """
+ instance = self.instance
+
+ assert (self.owned_locks(locking.LEVEL_NODE) ==
+ self.owned_locks(locking.LEVEL_NODE_RES))
+
+ to_skip = []
+ mods = [] # keeps track of needed changes
+
+ for idx, disk in enumerate(instance.disks):
+ try:
+ changes = self.disks[idx]
+ except KeyError:
+ # Disk should not be recreated
+ to_skip.append(idx)
+ continue
+
+ # update secondaries for disks, if needed
+ if self.op.nodes and disk.dev_type == constants.LD_DRBD8:
+ # need to update the nodes and minors
+ assert len(self.op.nodes) == 2
+ assert len(disk.logical_id) == 6 # otherwise disk internals
+ # have changed
+ (_, _, old_port, _, _, old_secret) = disk.logical_id
+ new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
+ new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
+ new_minors[0], new_minors[1], old_secret)
+ assert len(disk.logical_id) == len(new_id)
+ else:
+ new_id = None
+
+ mods.append((idx, new_id, changes))
+
+ # now that we have passed all asserts above, we can apply the mods
+ # in a single run (to avoid partial changes)
+ for idx, new_id, changes in mods:
+ disk = instance.disks[idx]
+ if new_id is not None:
+ assert disk.dev_type == constants.LD_DRBD8
+ disk.logical_id = new_id
+ if changes:
+ disk.Update(size=changes.get(constants.IDISK_SIZE, None),
+ mode=changes.get(constants.IDISK_MODE, None))
+
+ # change primary node, if needed
+ if self.op.nodes:
+ instance.primary_node = self.op.nodes[0]
+ self.LogWarning("Changing the instance's nodes, you will have to"
+ " remove any disks left on the older nodes manually")
+
+ if self.op.nodes:
+ self.cfg.Update(instance, feedback_fn)
+
+ # All touched nodes must be locked
+ mylocks = self.owned_locks(locking.LEVEL_NODE)
+ assert mylocks.issuperset(frozenset(instance.all_nodes))
+ new_disks = CreateDisks(self, instance, to_skip=to_skip)
+
+ # TODO: Release node locks before wiping, or explain why it's not possible
+ if self.cfg.GetClusterInfo().prealloc_wipe_disks:
+ wipedisks = [(idx, disk, 0)
+ for (idx, disk) in enumerate(instance.disks)
+ if idx not in to_skip]
+ WipeOrCleanupDisks(self, instance, disks=wipedisks, cleanup=new_disks)
+
+
+ def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
+ """Checks if nodes have enough free disk space in the specified VG.
+
+ This function checks if all given nodes have the needed amount of
+ free disk. In case any node has less disk or we cannot get the
+ information from the node, this function raises an OpPrereqError
+ exception.
+
+ @type lu: C{LogicalUnit}
+ @param lu: a logical unit from which we get configuration data
+ @type nodenames: C{list}
+ @param nodenames: the list of node names to check
+ @type vg: C{str}
+ @param vg: the volume group to check
+ @type requested: C{int}
+ @param requested: the amount of disk in MiB to check for
+ @raise errors.OpPrereqError: if the node doesn't have enough disk,
+ or we cannot check the node
+
+ """
+ es_flags = rpc.GetExclusiveStorageForNodeNames(lu.cfg, nodenames)
+ nodeinfo = lu.rpc.call_node_info(nodenames, [vg], None, es_flags)
+ for node in nodenames:
+ info = nodeinfo[node]
+ info.Raise("Cannot get current information from node %s" % node,
+ prereq=True, ecode=errors.ECODE_ENVIRON)
+ (_, (vg_info, ), _) = info.payload
+ vg_free = vg_info.get("vg_free", None)
+ if not isinstance(vg_free, int):
+ raise errors.OpPrereqError("Can't compute free disk space on node"
+ " %s for vg %s, result was '%s'" %
+ (node, vg, vg_free), errors.ECODE_ENVIRON)
+ if requested > vg_free:
+ raise errors.OpPrereqError("Not enough disk space on target node %s"
+ " vg %s: required %d MiB, available %d MiB" %
+ (node, vg, requested, vg_free),
+ errors.ECODE_NORES)
+
+
+ def CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
+ """Checks if nodes have enough free disk space in all the VGs.
+
+ This function checks if all given nodes have the needed amount of
+ free disk. In case any node has less disk or we cannot get the
+ information from the node, this function raises an OpPrereqError
+ exception.
+
+ @type lu: C{LogicalUnit}
+ @param lu: a logical unit from which we get configuration data
+ @type nodenames: C{list}
+ @param nodenames: the list of node names to check
+ @type req_sizes: C{dict}
+ @param req_sizes: the hash of vg and corresponding amount of disk in
+ MiB to check for
+ @raise errors.OpPrereqError: if the node doesn't have enough disk,
+ or we cannot check the node
+
+ """
+ for vg, req_size in req_sizes.items():
+ _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
+
+
+ def _DiskSizeInBytesToMebibytes(lu, size):
+ """Converts a disk size in bytes to mebibytes.
+
+ Warns and rounds up if the size isn't an even multiple of 1 MiB.
+
+ """
+ (mib, remainder) = divmod(size, 1024 * 1024)
+
+ if remainder != 0:
+ lu.LogWarning("Disk size is not an even multiple of 1 MiB; rounding up"
+ " to not overwrite existing data (%s bytes will not be"
+ " wiped)", (1024 * 1024) - remainder)
+ mib += 1
+
+ return mib
+
+
+ def _CalcEta(time_taken, written, total_size):
+ """Calculates the ETA based on size written and total size.
+
+ @param time_taken: The time taken so far
+ @param written: amount written so far
+ @param total_size: The total size of data to be written
+ @return: The remaining time in seconds
+
+ """
+ avg_time = time_taken / float(written)
+ return (total_size - written) * avg_time
+
+
+ def WipeDisks(lu, instance, disks=None):
+ """Wipes instance disks.
+
+ @type lu: L{LogicalUnit}
+ @param lu: the logical unit on whose behalf we execute
+ @type instance: L{objects.Instance}
+ @param instance: the instance whose disks we should create
+ @type disks: None or list of tuple of (number, L{objects.Disk}, number)
+ @param disks: Disk details; tuple contains disk index, disk object and the
+ start offset
+
+ """
+ node = instance.primary_node
+
+ if disks is None:
+ disks = [(idx, disk, 0)
+ for (idx, disk) in enumerate(instance.disks)]
+
+ for (_, device, _) in disks:
+ lu.cfg.SetDiskID(device, node)
+
+ logging.info("Pausing synchronization of disks of instance '%s'",
+ instance.name)
+ result = lu.rpc.call_blockdev_pause_resume_sync(node,
+ (map(compat.snd, disks),
+ instance),
+ True)
+ result.Raise("Failed to pause disk synchronization on node '%s'" % node)
+
+ for idx, success in enumerate(result.payload):
+ if not success:
+ logging.warn("Pausing synchronization of disk %s of instance '%s'"
+ " failed", idx, instance.name)
+
+ try:
+ for (idx, device, offset) in disks:
+ # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
+ # MAX_WIPE_CHUNK at max. Truncating to integer to avoid rounding errors.
+ wipe_chunk_size = \
+ int(min(constants.MAX_WIPE_CHUNK,
+ device.size / 100.0 * constants.MIN_WIPE_CHUNK_PERCENT))
+
+ size = device.size
+ last_output = 0
+ start_time = time.time()
+
+ if offset == 0:
+ info_text = ""
+ else:
+ info_text = (" (from %s to %s)" %
+ (utils.FormatUnit(offset, "h"),
+ utils.FormatUnit(size, "h")))
+
+ lu.LogInfo("* Wiping disk %s%s", idx, info_text)
+
+ logging.info("Wiping disk %d for instance %s on node %s using"
+ " chunk size %s", idx, instance.name, node, wipe_chunk_size)
+
+ while offset < size:
+ wipe_size = min(wipe_chunk_size, size - offset)
+
+ logging.debug("Wiping disk %d, offset %s, chunk %s",
+ idx, offset, wipe_size)
+
+ result = lu.rpc.call_blockdev_wipe(node, (device, instance), offset,
+ wipe_size)
+ result.Raise("Could not wipe disk %d at offset %d for size %d" %
+ (idx, offset, wipe_size))
+
+ now = time.time()
+ offset += wipe_size
+ if now - last_output >= 60:
+ eta = _CalcEta(now - start_time, offset, size)
+ lu.LogInfo(" - done: %.1f%% ETA: %s",
+ offset / float(size) * 100, utils.FormatSeconds(eta))
+ last_output = now
+ finally:
+ logging.info("Resuming synchronization of disks for instance '%s'",
+ instance.name)
+
+ result = lu.rpc.call_blockdev_pause_resume_sync(node,
+ (map(compat.snd, disks),
+ instance),
+ False)
+
+ if result.fail_msg:
+ lu.LogWarning("Failed to resume disk synchronization on node '%s': %s",
+ node, result.fail_msg)
+ else:
+ for idx, success in enumerate(result.payload):
+ if not success:
+ lu.LogWarning("Resuming synchronization of disk %s of instance '%s'"
+ " failed", idx, instance.name)
+
+
+ def WipeOrCleanupDisks(lu, instance, disks=None, cleanup=None):
+ """Wrapper for L{WipeDisks} that handles errors.
+
+ @type lu: L{LogicalUnit}
+ @param lu: the logical unit on whose behalf we execute
+ @type instance: L{objects.Instance}
+ @param instance: the instance whose disks we should wipe
+ @param disks: see L{WipeDisks}
+ @param cleanup: the result returned by L{CreateDisks}, used for cleanup in
+ case of error
+ @raise errors.OpPrereqError: in case of failure
+
+ """
+ try:
+ WipeDisks(lu, instance, disks=disks)
+ except errors.OpExecError:
+ logging.warning("Wiping disks for instance '%s' failed",
+ instance.name)
+ _UndoCreateDisks(lu, cleanup)
+ raise
+
+
+ def ExpandCheckDisks(instance, disks):
+ """Return the instance disks selected by the disks list
+
+ @type disks: list of L{objects.Disk} or None
+ @param disks: selected disks
+ @rtype: list of L{objects.Disk}
+ @return: selected instance disks to act on
+
+ """
+ if disks is None:
+ return instance.disks
+ else:
+ if not set(disks).issubset(instance.disks):
+ raise errors.ProgrammerError("Can only act on disks belonging to the"
+ " target instance")
+ return disks
+
+
+ def WaitForSync(lu, instance, disks=None, oneshot=False):
+ """Sleep and poll for an instance's disk to sync.
+
+ """
+ if not instance.disks or disks is not None and not disks:
+ return True
+
+ disks = ExpandCheckDisks(instance, disks)
+
+ if not oneshot:
+ lu.LogInfo("Waiting for instance %s to sync disks", instance.name)
+
+ node = instance.primary_node
+
+ for dev in disks:
+ lu.cfg.SetDiskID(dev, node)
+
+ # TODO: Convert to utils.Retry
+
+ retries = 0
+ degr_retries = 10 # in seconds, as we sleep 1 second each time
+ while True:
+ max_time = 0
+ done = True
+ cumul_degraded = False
+ rstats = lu.rpc.call_blockdev_getmirrorstatus(node, (disks, instance))
+ msg = rstats.fail_msg
+ if msg:
+ lu.LogWarning("Can't get any data from node %s: %s", node, msg)
+ retries += 1
+ if retries >= 10:
+ raise errors.RemoteError("Can't contact node %s for mirror data,"
+ " aborting." % node)
+ time.sleep(6)
+ continue
+ rstats = rstats.payload
+ retries = 0
+ for i, mstat in enumerate(rstats):
+ if mstat is None:
+ lu.LogWarning("Can't compute data for node %s/%s",
+ node, disks[i].iv_name)
+ continue
+
+ cumul_degraded = (cumul_degraded or
+ (mstat.is_degraded and mstat.sync_percent is None))
+ if mstat.sync_percent is not None:
+ done = False
+ if mstat.estimated_time is not None:
+ rem_time = ("%s remaining (estimated)" %
+ utils.FormatSeconds(mstat.estimated_time))
+ max_time = mstat.estimated_time
+ else:
+ rem_time = "no time estimate"
+ lu.LogInfo("- device %s: %5.2f%% done, %s",
+ disks[i].iv_name, mstat.sync_percent, rem_time)
+
+ # if we're done but degraded, let's do a few small retries, to
+ # make sure we see a stable and not transient situation; therefore
+ # we force restart of the loop
+ if (done or oneshot) and cumul_degraded and degr_retries > 0:
+ logging.info("Degraded disks found, %d retries left", degr_retries)
+ degr_retries -= 1
+ time.sleep(1)
+ continue
+
+ if done or oneshot:
+ break
+
+ time.sleep(min(60, max_time))
+
+ if done:
+ lu.LogInfo("Instance %s's disks are in sync", instance.name)
+
+ return not cumul_degraded
+
+
+ def ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
+ """Shutdown block devices of an instance.
+
+ This does the shutdown on all nodes of the instance.
+
+ If the ignore_primary is false, errors on the primary node are
+ ignored.
+
+ """
+ all_result = True
+ disks = ExpandCheckDisks(instance, disks)
+
+ for disk in disks:
+ for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
+ lu.cfg.SetDiskID(top_disk, node)
+ result = lu.rpc.call_blockdev_shutdown(node, (top_disk, instance))
+ msg = result.fail_msg
+ if msg:
+ lu.LogWarning("Could not shutdown block device %s on node %s: %s",
+ disk.iv_name, node, msg)
+ if ((node == instance.primary_node and not ignore_primary) or
+ (node != instance.primary_node and not result.offline)):
+ all_result = False
+ return all_result
+
+
+ def _SafeShutdownInstanceDisks(lu, instance, disks=None):
+ """Shutdown block devices of an instance.
+
+ This function checks if an instance is running, before calling
+ _ShutdownInstanceDisks.
+
+ """
+ CheckInstanceState(lu, instance, INSTANCE_DOWN, msg="cannot shutdown disks")
+ ShutdownInstanceDisks(lu, instance, disks=disks)
+
+
+ def AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
+ ignore_size=False):
+ """Prepare the block devices for an instance.
+
+ This sets up the block devices on all nodes.
+
+ @type lu: L{LogicalUnit}
+ @param lu: the logical unit on whose behalf we execute
+ @type instance: L{objects.Instance}
+ @param instance: the instance for whose disks we assemble
+ @type disks: list of L{objects.Disk} or None
+ @param disks: which disks to assemble (or all, if None)
+ @type ignore_secondaries: boolean
+ @param ignore_secondaries: if true, errors on secondary nodes
+ won't result in an error return from the function
+ @type ignore_size: boolean
+ @param ignore_size: if true, the current known size of the disk
+ will not be used during the disk activation, useful for cases
+ when the size is wrong
+ @return: False if the operation failed, otherwise a list of
+ (host, instance_visible_name, node_visible_name)
+ with the mapping from node devices to instance devices
+
+ """
+ device_info = []
+ disks_ok = True
+ iname = instance.name
+ disks = ExpandCheckDisks(instance, disks)
+
+ # With the two passes mechanism we try to reduce the window of
+ # opportunity for the race condition of switching DRBD to primary
+ # before handshaking occured, but we do not eliminate it
+
+ # The proper fix would be to wait (with some limits) until the
+ # connection has been made and drbd transitions from WFConnection
+ # into any other network-connected state (Connected, SyncTarget,
+ # SyncSource, etc.)
+
+ # 1st pass, assemble on all nodes in secondary mode
+ for idx, inst_disk in enumerate(disks):
+ for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
+ if ignore_size:
+ node_disk = node_disk.Copy()
+ node_disk.UnsetSize()
+ lu.cfg.SetDiskID(node_disk, node)
+ result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
+ False, idx)
+ msg = result.fail_msg
+ if msg:
+ is_offline_secondary = (node in instance.secondary_nodes and
+ result.offline)
+ lu.LogWarning("Could not prepare block device %s on node %s"
+ " (is_primary=False, pass=1): %s",
+ inst_disk.iv_name, node, msg)
+ if not (ignore_secondaries or is_offline_secondary):
+ disks_ok = False
+
+ # FIXME: race condition on drbd migration to primary
+
+ # 2nd pass, do only the primary node
+ for idx, inst_disk in enumerate(disks):
+ dev_path = None
+
+ for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
+ if node != instance.primary_node:
+ continue
+ if ignore_size:
+ node_disk = node_disk.Copy()
+ node_disk.UnsetSize()
+ lu.cfg.SetDiskID(node_disk, node)
+ result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
+ True, idx)
+ msg = result.fail_msg
+ if msg:
+ lu.LogWarning("Could not prepare block device %s on node %s"
+ " (is_primary=True, pass=2): %s",
+ inst_disk.iv_name, node, msg)
+ disks_ok = False
+ else:
+ dev_path = result.payload
+
+ device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
+
+ # leave the disks configured for the primary node
+ # this is a workaround that would be fixed better by
+ # improving the logical/physical id handling
+ for disk in disks:
+ lu.cfg.SetDiskID(disk, instance.primary_node)
+
+ return disks_ok, device_info
+
+
+ def StartInstanceDisks(lu, instance, force):
+ """Start the disks of an instance.
+
+ """
+ disks_ok, _ = AssembleInstanceDisks(lu, instance,
+ ignore_secondaries=force)
+ if not disks_ok:
+ ShutdownInstanceDisks(lu, instance)
+ if force is not None and not force:
+ lu.LogWarning("",
+ hint=("If the message above refers to a secondary node,"
+ " you can retry the operation using '--force'"))
+ raise errors.OpExecError("Disk consistency error")
+
+
+ class LUInstanceGrowDisk(LogicalUnit):
+ """Grow a disk of an instance.
+
+ """
+ HPATH = "disk-grow"
+ HTYPE = constants.HTYPE_INSTANCE
+ REQ_BGL = False
+
+ def ExpandNames(self):
+ self._ExpandAndLockInstance()
+ self.needed_locks[locking.LEVEL_NODE] = []
+ self.needed_locks[locking.LEVEL_NODE_RES] = []
+ self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
+ self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
+
+ def DeclareLocks(self, level):
+ if level == locking.LEVEL_NODE:
+ self._LockInstancesNodes()
+ elif level == locking.LEVEL_NODE_RES:
+ # Copy node locks
+ self.needed_locks[locking.LEVEL_NODE_RES] = \
+ CopyLockList(self.needed_locks[locking.LEVEL_NODE])
+
+ def BuildHooksEnv(self):
+ """Build hooks env.
+
+ This runs on the master, the primary and all the secondaries.
+
+ """
+ env = {
+ "DISK": self.op.disk,
+ "AMOUNT": self.op.amount,
+ "ABSOLUTE": self.op.absolute,
+ }
+ env.update(BuildInstanceHookEnvByObject(self, self.instance))
+ return env
+
+ def BuildHooksNodes(self):
+ """Build hooks nodes.
+
+ """
+ nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
+ return (nl, nl)
+
+ def CheckPrereq(self):
+ """Check prerequisites.
+
+ This checks that the instance is in the cluster.
+
+ """
+ instance = self.cfg.GetInstanceInfo(self.op.instance_name)
+ assert instance is not None, \
+ "Cannot retrieve locked instance %s" % self.op.instance_name
+ nodenames = list(instance.all_nodes)
+ for node in nodenames:
+ CheckNodeOnline(self, node)
+
+ self.instance = instance
+
+ if instance.disk_template not in constants.DTS_GROWABLE:
+ raise errors.OpPrereqError("Instance's disk layout does not support"
+ " growing", errors.ECODE_INVAL)
+
+ self.disk = instance.FindDisk(self.op.disk)
+
+ if self.op.absolute:
+ self.target = self.op.amount
+ self.delta = self.target - self.disk.size
+ if self.delta < 0:
+ raise errors.OpPrereqError("Requested size (%s) is smaller than "
+ "current disk size (%s)" %
+ (utils.FormatUnit(self.target, "h"),
+ utils.FormatUnit(self.disk.size, "h")),
+ errors.ECODE_STATE)
+ else:
+ self.delta = self.op.amount
+ self.target = self.disk.size + self.delta
+ if self.delta < 0:
+ raise errors.OpPrereqError("Requested increment (%s) is negative" %
+ utils.FormatUnit(self.delta, "h"),
+ errors.ECODE_INVAL)
+
+ self._CheckDiskSpace(nodenames, self.disk.ComputeGrowth(self.delta))
+
+ def _CheckDiskSpace(self, nodenames, req_vgspace):
+ template = self.instance.disk_template
+ if template not in (constants.DTS_NO_FREE_SPACE_CHECK):
+ # TODO: check the free disk space for file, when that feature will be
+ # supported
+ nodes = map(self.cfg.GetNodeInfo, nodenames)
+ es_nodes = filter(lambda n: IsExclusiveStorageEnabledNode(self.cfg, n),
+ nodes)
+ if es_nodes:
+ # With exclusive storage we need to something smarter than just looking
+ # at free space; for now, let's simply abort the operation.
+ raise errors.OpPrereqError("Cannot grow disks when exclusive_storage"
+ " is enabled", errors.ECODE_STATE)
+ CheckNodesFreeDiskPerVG(self, nodenames, req_vgspace)
+
+ def Exec(self, feedback_fn):
+ """Execute disk grow.
+
+ """
+ instance = self.instance
+ disk = self.disk
+
+ assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
+ assert (self.owned_locks(locking.LEVEL_NODE) ==
+ self.owned_locks(locking.LEVEL_NODE_RES))
+
+ wipe_disks = self.cfg.GetClusterInfo().prealloc_wipe_disks
+
+ disks_ok, _ = AssembleInstanceDisks(self, self.instance, disks=[disk])
+ if not disks_ok:
+ raise errors.OpExecError("Cannot activate block device to grow")
+
+ feedback_fn("Growing disk %s of instance '%s' by %s to %s" %
+ (self.op.disk, instance.name,
+ utils.FormatUnit(self.delta, "h"),
+ utils.FormatUnit(self.target, "h")))
+
+ # First run all grow ops in dry-run mode
+ for node in instance.all_nodes:
+ self.cfg.SetDiskID(disk, node)
+ result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
+ True, True)
+ result.Raise("Dry-run grow request failed to node %s" % node)
+
+ if wipe_disks:
+ # Get disk size from primary node for wiping
+ result = self.rpc.call_blockdev_getsize(instance.primary_node, [disk])
+ result.Raise("Failed to retrieve disk size from node '%s'" %
+ instance.primary_node)
+
+ (disk_size_in_bytes, ) = result.payload
+
+ if disk_size_in_bytes is None:
+ raise errors.OpExecError("Failed to retrieve disk size from primary"
+ " node '%s'" % instance.primary_node)
+
+ old_disk_size = _DiskSizeInBytesToMebibytes(self, disk_size_in_bytes)
+
+ assert old_disk_size >= disk.size, \
+ ("Retrieved disk size too small (got %s, should be at least %s)" %
+ (old_disk_size, disk.size))
+ else:
+ old_disk_size = None
+
+ # We know that (as far as we can test) operations across different
+ # nodes will succeed, time to run it for real on the backing storage
+ for node in instance.all_nodes:
+ self.cfg.SetDiskID(disk, node)
+ result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
+ False, True)
+ result.Raise("Grow request failed to node %s" % node)
+
+ # And now execute it for logical storage, on the primary node
+ node = instance.primary_node
+ self.cfg.SetDiskID(disk, node)
+ result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
+ False, False)
+ result.Raise("Grow request failed to node %s" % node)
+
+ disk.RecordGrow(self.delta)
+ self.cfg.Update(instance, feedback_fn)
+
+ # Changes have been recorded, release node lock
+ ReleaseLocks(self, locking.LEVEL_NODE)
+
+ # Downgrade lock while waiting for sync
+ self.glm.downgrade(locking.LEVEL_INSTANCE)
+
+ assert wipe_disks ^ (old_disk_size is None)
+
+ if wipe_disks:
+ assert instance.disks[self.op.disk] == disk
+
+ # Wipe newly added disk space
+ WipeDisks(self, instance,
+ disks=[(self.op.disk, disk, old_disk_size)])
+
+ if self.op.wait_for_sync:
+ disk_abort = not WaitForSync(self, instance, disks=[disk])
+ if disk_abort:
+ self.LogWarning("Disk syncing has not returned a good status; check"
+ " the instance")
+ if instance.admin_state != constants.ADMINST_UP:
+ _SafeShutdownInstanceDisks(self, instance, disks=[disk])
+ elif instance.admin_state != constants.ADMINST_UP:
+ self.LogWarning("Not shutting down the disk even if the instance is"
+ " not supposed to be running because no wait for"
+ " sync mode was requested")
+
+ assert self.owned_locks(locking.LEVEL_NODE_RES)
+ assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
+
+
+ class LUInstanceReplaceDisks(LogicalUnit):
+ """Replace the disks of an instance.
+
+ """
+ HPATH = "mirrors-replace"
+ HTYPE = constants.HTYPE_INSTANCE
+ REQ_BGL = False
+
+ def CheckArguments(self):
+ """Check arguments.
+
+ """
+ remote_node = self.op.remote_node
+ ialloc = self.op.iallocator
+ if self.op.mode == constants.REPLACE_DISK_CHG:
+ if remote_node is None and ialloc is None:
+ raise errors.OpPrereqError("When changing the secondary either an"
+ " iallocator script must be used or the"
+ " new node given", errors.ECODE_INVAL)
+ else:
+ CheckIAllocatorOrNode(self, "iallocator", "remote_node")
+
+ elif remote_node is not None or ialloc is not None:
+ # Not replacing the secondary
+ raise errors.OpPrereqError("The iallocator and new node options can"
+ " only be used when changing the"
+ " secondary node", errors.ECODE_INVAL)
+
+ def ExpandNames(self):
+ self._ExpandAndLockInstance()
+
+ assert locking.LEVEL_NODE not in self.needed_locks
+ assert locking.LEVEL_NODE_RES not in self.needed_locks
+ assert locking.LEVEL_NODEGROUP not in self.needed_locks
+
+ assert self.op.iallocator is None or self.op.remote_node is None, \
+ "Conflicting options"
+
+ if self.op.remote_node is not None:
+ self.op.remote_node = ExpandNodeName(self.cfg, self.op.remote_node)
+
+ # Warning: do not remove the locking of the new secondary here
- # unless DRBD8.AddChildren is changed to work in parallel;
++ # unless DRBD8Dev.AddChildren is changed to work in parallel;
+ # currently it doesn't since parallel invocations of
+ # FindUnusedMinor will conflict
+ self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
+ self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
+ else:
+ self.needed_locks[locking.LEVEL_NODE] = []
+ self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
+
+ if self.op.iallocator is not None:
+ # iallocator will select a new node in the same group
+ self.needed_locks[locking.LEVEL_NODEGROUP] = []
+ self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
+
+ self.needed_locks[locking.LEVEL_NODE_RES] = []
+
+ self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
+ self.op.iallocator, self.op.remote_node,
+ self.op.disks, self.op.early_release,
+ self.op.ignore_ipolicy)
+
+ self.tasklets = [self.replacer]
+
+ def DeclareLocks(self, level):
+ if level == locking.LEVEL_NODEGROUP:
+ assert self.op.remote_node is None
+ assert self.op.iallocator is not None
+ assert not self.needed_locks[locking.LEVEL_NODEGROUP]
+
+ self.share_locks[locking.LEVEL_NODEGROUP] = 1
+ # Lock all groups used by instance optimistically; this requires going
+ # via the node before it's locked, requiring verification later on
+ self.needed_locks[locking.LEVEL_NODEGROUP] = \
+ self.cfg.GetInstanceNodeGroups(self.op.instance_name)
+
+ elif level == locking.LEVEL_NODE:
+ if self.op.iallocator is not None:
+ assert self.op.remote_node is None
+ assert not self.needed_locks[locking.LEVEL_NODE]
+ assert locking.NAL in self.owned_locks(locking.LEVEL_NODE_ALLOC)
+
+ # Lock member nodes of all locked groups
+ self.needed_locks[locking.LEVEL_NODE] = \
+ [node_name
+ for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
+ for node_name in self.cfg.GetNodeGroup(group_uuid).members]
+ else:
+ assert not self.glm.is_owned(locking.LEVEL_NODE_ALLOC)
+
+ self._LockInstancesNodes()
+
+ elif level == locking.LEVEL_NODE_RES:
+ # Reuse node locks
+ self.needed_locks[locking.LEVEL_NODE_RES] = \
+ self.needed_locks[locking.LEVEL_NODE]
+
+ def BuildHooksEnv(self):
+ """Build hooks env.
+
+ This runs on the master, the primary and all the secondaries.
+
+ """
+ instance = self.replacer.instance
+ env = {
+ "MODE": self.op.mode,
+ "NEW_SECONDARY": self.op.remote_node,
+ "OLD_SECONDARY": instance.secondary_nodes[0],
+ }
+ env.update(BuildInstanceHookEnvByObject(self, instance))
+ return env
+
+ def BuildHooksNodes(self):
+ """Build hooks nodes.
+
+ """
+ instance = self.replacer.instance
+ nl = [
+ self.cfg.GetMasterNode(),
+ instance.primary_node,
+ ]
+ if self.op.remote_node is not None:
+ nl.append(self.op.remote_node)
+ return nl, nl
+
+ def CheckPrereq(self):
+ """Check prerequisites.
+
+ """
+ assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
+ self.op.iallocator is None)
+
+ # Verify if node group locks are still correct
+ owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
+ if owned_groups:
+ CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
+
+ return LogicalUnit.CheckPrereq(self)
+
+
+ class LUInstanceActivateDisks(NoHooksLU):
+ """Bring up an instance's disks.
+
+ """
+ REQ_BGL = False
+
+ def ExpandNames(self):
+ self._ExpandAndLockInstance()
+ self.needed_locks[locking.LEVEL_NODE] = []
+ self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
+
+ def DeclareLocks(self, level):
+ if level == locking.LEVEL_NODE:
+ self._LockInstancesNodes()
+
+ def CheckPrereq(self):
+ """Check prerequisites.
+
+ This checks that the instance is in the cluster.
+
+ """
+ self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
+ assert self.instance is not None, \
+ "Cannot retrieve locked instance %s" % self.op.instance_name
+ CheckNodeOnline(self, self.instance.primary_node)
+
+ def Exec(self, feedback_fn):
+ """Activate the disks.
+
+ """
+ disks_ok, disks_info = \
+ AssembleInstanceDisks(self, self.instance,
+ ignore_size=self.op.ignore_size)
+ if not disks_ok:
+ raise errors.OpExecError("Cannot activate block devices")
+
+ if self.op.wait_for_sync:
+ if not WaitForSync(self, self.instance):
+ raise errors.OpExecError("Some disks of the instance are degraded!")
+
+ return disks_info
+
+
+ class LUInstanceDeactivateDisks(NoHooksLU):
+ """Shutdown an instance's disks.
+
+ """
+ REQ_BGL = False
+
+ def ExpandNames(self):
+ self._ExpandAndLockInstance()
+ self.needed_locks[locking.LEVEL_NODE] = []
+ self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
+
+ def DeclareLocks(self, level):
+ if level == locking.LEVEL_NODE:
+ self._LockInstancesNodes()
+
+ def CheckPrereq(self):
+ """Check prerequisites.
+
+ This checks that the instance is in the cluster.
+
+ """
+ self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
+ assert self.instance is not None, \
+ "Cannot retrieve locked instance %s" % self.op.instance_name
+
+ def Exec(self, feedback_fn):
+ """Deactivate the disks
+
+ """
+ instance = self.instance
+ if self.op.force:
+ ShutdownInstanceDisks(self, instance)
+ else:
+ _SafeShutdownInstanceDisks(self, instance)
+
+
+ def _CheckDiskConsistencyInner(lu, instance, dev, node, on_primary,
+ ldisk=False):
+ """Check that mirrors are not degraded.
+
+ @attention: The device has to be annotated already.
+
+ The ldisk parameter, if True, will change the test from the
+ is_degraded attribute (which represents overall non-ok status for
+ the device(s)) to the ldisk (representing the local storage status).
+
+ """
+ lu.cfg.SetDiskID(dev, node)
+
+ result = True
+
+ if on_primary or dev.AssembleOnSecondary():
+ rstats = lu.rpc.call_blockdev_find(node, dev)
+ msg = rstats.fail_msg
+ if msg:
+ lu.LogWarning("Can't find disk on node %s: %s", node, msg)
+ result = False
+ elif not rstats.payload:
+ lu.LogWarning("Can't find disk on node %s", node)
+ result = False
+ else:
+ if ldisk:
+ result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
+ else:
+ result = result and not rstats.payload.is_degraded
+
+ if dev.children:
+ for child in dev.children:
+ result = result and _CheckDiskConsistencyInner(lu, instance, child, node,
+ on_primary)
+
+ return result
+
+
+ def CheckDiskConsistency(lu, instance, dev, node, on_primary, ldisk=False):
+ """Wrapper around L{_CheckDiskConsistencyInner}.
+
+ """
+ (disk,) = AnnotateDiskParams(instance, [dev], lu.cfg)
+ return _CheckDiskConsistencyInner(lu, instance, disk, node, on_primary,
+ ldisk=ldisk)
+
+
+ def _BlockdevFind(lu, node, dev, instance):
+ """Wrapper around call_blockdev_find to annotate diskparams.
+
+ @param lu: A reference to the lu object
+ @param node: The node to call out
+ @param dev: The device to find
+ @param instance: The instance object the device belongs to
+ @returns The result of the rpc call
+
+ """
+ (disk,) = AnnotateDiskParams(instance, [dev], lu.cfg)
+ return lu.rpc.call_blockdev_find(node, disk)
+
+
+ def _GenerateUniqueNames(lu, exts):
+ """Generate a suitable LV name.
+
+ This will generate a logical volume name for the given instance.
+
+ """
+ results = []
+ for val in exts:
+ new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
+ results.append("%s%s" % (new_id, val))
+ return results
+
+
+ class TLReplaceDisks(Tasklet):
+ """Replaces disks for an instance.
+
+ Note: Locking is not within the scope of this class.
+
+ """
+ def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
+ disks, early_release, ignore_ipolicy):
+ """Initializes this class.
+
+ """
+ Tasklet.__init__(self, lu)
+
+ # Parameters
+ self.instance_name = instance_name
+ self.mode = mode
+ self.iallocator_name = iallocator_name
+ self.remote_node = remote_node
+ self.disks = disks
+ self.early_release = early_release
+ self.ignore_ipolicy = ignore_ipolicy
+
+ # Runtime data
+ self.instance = None
+ self.new_node = None
+ self.target_node = None
+ self.other_node = None
+ self.remote_node_info = None
+ self.node_secondary_ip = None
+
+ @staticmethod
+ def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
+ """Compute a new secondary node using an IAllocator.
+
+ """
+ req = iallocator.IAReqRelocate(name=instance_name,
+ relocate_from=list(relocate_from))
+ ial = iallocator.IAllocator(lu.cfg, lu.rpc, req)
+
+ ial.Run(iallocator_name)
+
+ if not ial.success:
+ raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
+ " %s" % (iallocator_name, ial.info),
+ errors.ECODE_NORES)
+
+ remote_node_name = ial.result[0]
+
+ lu.LogInfo("Selected new secondary for instance '%s': %s",
+ instance_name, remote_node_name)
+
+ return remote_node_name
+
+ def _FindFaultyDisks(self, node_name):
+ """Wrapper for L{FindFaultyInstanceDisks}.
+
+ """
+ return FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
+ node_name, True)
+
+ def _CheckDisksActivated(self, instance):
+ """Checks if the instance disks are activated.
+
+ @param instance: The instance to check disks
+ @return: True if they are activated, False otherwise
+
+ """
+ nodes = instance.all_nodes
+
+ for idx, dev in enumerate(instance.disks):
+ for node in nodes:
+ self.lu.LogInfo("Checking disk/%d on %s", idx, node)
+ self.cfg.SetDiskID(dev, node)
+
+ result = _BlockdevFind(self, node, dev, instance)
+
+ if result.offline:
+ continue
+ elif result.fail_msg or not result.payload:
+ return False
+
+ return True
+
+ def CheckPrereq(self):
+ """Check prerequisites.
+
+ This checks that the instance is in the cluster.
+
+ """
+ self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
+ assert instance is not None, \
+ "Cannot retrieve locked instance %s" % self.instance_name
+
+ if instance.disk_template != constants.DT_DRBD8:
+ raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
+ " instances", errors.ECODE_INVAL)
+
+ if len(instance.secondary_nodes) != 1:
+ raise errors.OpPrereqError("The instance has a strange layout,"
+ " expected one secondary but found %d" %
+ len(instance.secondary_nodes),
+ errors.ECODE_FAULT)
+
+ instance = self.instance
+ secondary_node = instance.secondary_nodes[0]
+
+ if self.iallocator_name is None:
+ remote_node = self.remote_node
+ else:
+ remote_node = self._RunAllocator(self.lu, self.iallocator_name,
+ instance.name, instance.secondary_nodes)
+
+ if remote_node is None:
+ self.remote_node_info = None
+ else:
+ assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
+ "Remote node '%s' is not locked" % remote_node
+
+ self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
+ assert self.remote_node_info is not None, \
+ "Cannot retrieve locked node %s" % remote_node
+
+ if remote_node == self.instance.primary_node:
+ raise errors.OpPrereqError("The specified node is the primary node of"
+ " the instance", errors.ECODE_INVAL)
+
+ if remote_node == secondary_node:
+ raise errors.OpPrereqError("The specified node is already the"
+ " secondary node of the instance",
+ errors.ECODE_INVAL)
+
+ if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
+ constants.REPLACE_DISK_CHG):
+ raise errors.OpPrereqError("Cannot specify disks to be replaced",
+ errors.ECODE_INVAL)
+
+ if self.mode == constants.REPLACE_DISK_AUTO:
+ if not self._CheckDisksActivated(instance):
+ raise errors.OpPrereqError("Please run activate-disks on instance %s"
+ " first" % self.instance_name,
+ errors.ECODE_STATE)
+ faulty_primary = self._FindFaultyDisks(instance.primary_node)
+ faulty_secondary = self._FindFaultyDisks(secondary_node)
+
+ if faulty_primary and faulty_secondary:
+ raise errors.OpPrereqError("Instance %s has faulty disks on more than"
+ " one node and can not be repaired"
+ " automatically" % self.instance_name,
+ errors.ECODE_STATE)
+
+ if faulty_primary:
+ self.disks = faulty_primary
+ self.target_node = instance.primary_node
+ self.other_node = secondary_node
+ check_nodes = [self.target_node, self.other_node]
+ elif faulty_secondary:
+ self.disks = faulty_secondary
+ self.target_node = secondary_node
+ self.other_node = instance.primary_node
+ check_nodes = [self.target_node, self.other_node]
+ else:
+ self.disks = []
+ check_nodes = []
+
+ else:
+ # Non-automatic modes
+ if self.mode == constants.REPLACE_DISK_PRI:
+ self.target_node = instance.primary_node
+ self.other_node = secondary_node
+ check_nodes = [self.target_node, self.other_node]
+
+ elif self.mode == constants.REPLACE_DISK_SEC:
+ self.target_node = secondary_node
+ self.other_node = instance.primary_node
+ check_nodes = [self.target_node, self.other_node]
+
+ elif self.mode == constants.REPLACE_DISK_CHG:
+ self.new_node = remote_node
+ self.other_node = instance.primary_node
+ self.target_node = secondary_node
+ check_nodes = [self.new_node, self.other_node]
+
+ CheckNodeNotDrained(self.lu, remote_node)
+ CheckNodeVmCapable(self.lu, remote_node)
+
+ old_node_info = self.cfg.GetNodeInfo(secondary_node)
+ assert old_node_info is not None
+ if old_node_info.offline and not self.early_release:
+ # doesn't make sense to delay the release
+ self.early_release = True
+ self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
+ " early-release mode", secondary_node)
+
+ else:
+ raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
+ self.mode)
+
+ # If not specified all disks should be replaced
+ if not self.disks:
+ self.disks = range(len(self.instance.disks))
+
+ # TODO: This is ugly, but right now we can't distinguish between internal
+ # submitted opcode and external one. We should fix that.
+ if self.remote_node_info:
+ # We change the node, lets verify it still meets instance policy
+ new_group_info = self.cfg.GetNodeGroup(self.remote_node_info.group)
+ cluster = self.cfg.GetClusterInfo()
+ ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
+ new_group_info)
+ CheckTargetNodeIPolicy(self, ipolicy, instance, self.remote_node_info,
+ self.cfg, ignore=self.ignore_ipolicy)
+
+ for node in check_nodes:
+ CheckNodeOnline(self.lu, node)
+
+ touched_nodes = frozenset(node_name for node_name in [self.new_node,
+ self.other_node,
+ self.target_node]
+ if node_name is not None)
+
+ # Release unneeded node and node resource locks
+ ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
+ ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, keep=touched_nodes)
+ ReleaseLocks(self.lu, locking.LEVEL_NODE_ALLOC)
+
+ # Release any owned node group
+ ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
+
+ # Check whether disks are valid
+ for disk_idx in self.disks:
+ instance.FindDisk(disk_idx)
+
+ # Get secondary node IP addresses
+ self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
+ in self.cfg.GetMultiNodeInfo(touched_nodes))
+
+ def Exec(self, feedback_fn):
+ """Execute disk replacement.
+
+ This dispatches the disk replacement to the appropriate handler.
+
+ """
+ if __debug__:
+ # Verify owned locks before starting operation
+ owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
+ assert set(owned_nodes) == set(self.node_secondary_ip), \
+ ("Incorrect node locks, owning %s, expected %s" %
+ (owned_nodes, self.node_secondary_ip.keys()))
+ assert (self.lu.owned_locks(locking.LEVEL_NODE) ==
+ self.lu.owned_locks(locking.LEVEL_NODE_RES))
+ assert not self.lu.glm.is_owned(locking.LEVEL_NODE_ALLOC)
+
+ owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
+ assert list(owned_instances) == [self.instance_name], \
+ "Instance '%s' not locked" % self.instance_name
+
+ assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
+ "Should not own any node group lock at this point"
+
+ if not self.disks:
+ feedback_fn("No disks need replacement for instance '%s'" %
+ self.instance.name)
+ return
+
+ feedback_fn("Replacing disk(s) %s for instance '%s'" %
+ (utils.CommaJoin(self.disks), self.instance.name))
+ feedback_fn("Current primary node: %s" % self.instance.primary_node)
+ feedback_fn("Current seconary node: %s" %
+ utils.CommaJoin(self.instance.secondary_nodes))
+
+ activate_disks = (self.instance.admin_state != constants.ADMINST_UP)
+
+ # Activate the instance disks if we're replacing them on a down instance
+ if activate_disks:
+ StartInstanceDisks(self.lu, self.instance, True)
+
+ try:
+ # Should we replace the secondary node?
+ if self.new_node is not None:
+ fn = self._ExecDrbd8Secondary
+ else:
+ fn = self._ExecDrbd8DiskOnly
+
+ result = fn(feedback_fn)
+ finally:
+ # Deactivate the instance disks if we're replacing them on a
+ # down instance
+ if activate_disks:
+ _SafeShutdownInstanceDisks(self.lu, self.instance)
+
+ assert not self.lu.owned_locks(locking.LEVEL_NODE)
+
+ if __debug__:
+ # Verify owned locks
+ owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE_RES)
+ nodes = frozenset(self.node_secondary_ip)
+ assert ((self.early_release and not owned_nodes) or
+ (not self.early_release and not (set(owned_nodes) - nodes))), \
+ ("Not owning the correct locks, early_release=%s, owned=%r,"
+ " nodes=%r" % (self.early_release, owned_nodes, nodes))
+
+ return result
+
+ def _CheckVolumeGroup(self, nodes):
+ self.lu.LogInfo("Checking volume groups")
+
+ vgname = self.cfg.GetVGName()
+
+ # Make sure volume group exists on all involved nodes
+ results = self.rpc.call_vg_list(nodes)
+ if not results:
+ raise errors.OpExecError("Can't list volume groups on the nodes")
+
+ for node in nodes:
+ res = results[node]
+ res.Raise("Error checking node %s" % node)
+ if vgname not in res.payload:
+ raise errors.OpExecError("Volume group '%s' not found on node %s" %
+ (vgname, node))
+
+ def _CheckDisksExistence(self, nodes):
+ # Check disk existence
+ for idx, dev in enumerate(self.instance.disks):
+ if idx not in self.disks:
+ continue
+
+ for node in nodes:
+ self.lu.LogInfo("Checking disk/%d on %s", idx, node)
+ self.cfg.SetDiskID(dev, node)
+
+ result = _BlockdevFind(self, node, dev, self.instance)
+
+ msg = result.fail_msg
+ if msg or not result.payload:
+ if not msg:
+ msg = "disk not found"
+ raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
+ (idx, node, msg))
+
+ def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
+ for idx, dev in enumerate(self.instance.disks):
+ if idx not in self.disks:
+ continue
+
+ self.lu.LogInfo("Checking disk/%d consistency on node %s" %
+ (idx, node_name))
+
+ if not CheckDiskConsistency(self.lu, self.instance, dev, node_name,
+ on_primary, ldisk=ldisk):
+ raise errors.OpExecError("Node %s has degraded storage, unsafe to"
+ " replace disks for instance %s" %
+ (node_name, self.instance.name))
+
+ def _CreateNewStorage(self, node_name):
+ """Create new storage on the primary or secondary node.
+
+ This is only used for same-node replaces, not for changing the
+ secondary node, hence we don't want to modify the existing disk.
+
+ """
+ iv_names = {}
+
+ disks = AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
+ for idx, dev in enumerate(disks):
+ if idx not in self.disks:
+ continue
+
+ self.lu.LogInfo("Adding storage on %s for disk/%d", node_name, idx)
+
+ self.cfg.SetDiskID(dev, node_name)
+
+ lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
+ names = _GenerateUniqueNames(self.lu, lv_names)
+
+ (data_disk, meta_disk) = dev.children
+ vg_data = data_disk.logical_id[0]
+ lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
+ logical_id=(vg_data, names[0]),
+ params=data_disk.params)
+ vg_meta = meta_disk.logical_id[0]
+ lv_meta = objects.Disk(dev_type=constants.LD_LV,
+ size=constants.DRBD_META_SIZE,
+ logical_id=(vg_meta, names[1]),
+ params=meta_disk.params)
+
+ new_lvs = [lv_data, lv_meta]
+ old_lvs = [child.Copy() for child in dev.children]
+ iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
+ excl_stor = IsExclusiveStorageEnabledNodeName(self.lu.cfg, node_name)
+
+ # we pass force_create=True to force the LVM creation
+ for new_lv in new_lvs:
+ _CreateBlockDevInner(self.lu, node_name, self.instance, new_lv, True,
+ GetInstanceInfoText(self.instance), False,
+ excl_stor)
+
+ return iv_names
+
+ def _CheckDevices(self, node_name, iv_names):
+ for name, (dev, _, _) in iv_names.iteritems():
+ self.cfg.SetDiskID(dev, node_name)
+
+ result = _BlockdevFind(self, node_name, dev, self.instance)
+
+ msg = result.fail_msg
+ if msg or not result.payload:
+ if not msg:
+ msg = "disk not found"
+ raise errors.OpExecError("Can't find DRBD device %s: %s" %
+ (name, msg))
+
+ if result.payload.is_degraded:
+ raise errors.OpExecError("DRBD device %s is degraded!" % name)
+
+ def _RemoveOldStorage(self, node_name, iv_names):
+ for name, (_, old_lvs, _) in iv_names.iteritems():
+ self.lu.LogInfo("Remove logical volumes for %s", name)
+
+ for lv in old_lvs:
+ self.cfg.SetDiskID(lv, node_name)
+
+ msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
+ if msg:
+ self.lu.LogWarning("Can't remove old LV: %s", msg,
+ hint="remove unused LVs manually")
+
+ def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
+ """Replace a disk on the primary or secondary for DRBD 8.
+
+ The algorithm for replace is quite complicated:
+
+ 1. for each disk to be replaced:
+
+ 1. create new LVs on the target node with unique names
+ 1. detach old LVs from the drbd device
+ 1. rename old LVs to name_replaced.<time_t>
+ 1. rename new LVs to old LVs
+ 1. attach the new LVs (with the old names now) to the drbd device
+
+ 1. wait for sync across all devices
+
+ 1. for each modified disk:
+
+ 1. remove old LVs (which have the name name_replaces.<time_t>)
+
+ Failures are not very well handled.
+
+ """
+ steps_total = 6
+
+ # Step: check device activation
+ self.lu.LogStep(1, steps_total, "Check device existence")
+ self._CheckDisksExistence([self.other_node, self.target_node])
+ self._CheckVolumeGroup([self.target_node, self.other_node])
+
+ # Step: check other node consistency
+ self.lu.LogStep(2, steps_total, "Check peer consistency")
+ self._CheckDisksConsistency(self.other_node,
+ self.other_node == self.instance.primary_node,
+ False)
+
+ # Step: create new storage
+ self.lu.LogStep(3, steps_total, "Allocate new storage")
+ iv_names = self._CreateNewStorage(self.target_node)
+
+ # Step: for each lv, detach+rename*2+attach
+ self.lu.LogStep(4, steps_total, "Changing drbd configuration")
+ for dev, old_lvs, new_lvs in iv_names.itervalues():
+ self.lu.LogInfo("Detaching %s drbd from local storage", dev.iv_name)
+
+ result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
+ old_lvs)
+ result.Raise("Can't detach drbd from local storage on node"
+ " %s for device %s" % (self.target_node, dev.iv_name))
+ #dev.children = []
+ #cfg.Update(instance)
+
+ # ok, we created the new LVs, so now we know we have the needed
+ # storage; as such, we proceed on the target node to rename
+ # old_lv to _old, and new_lv to old_lv; note that we rename LVs
+ # using the assumption that logical_id == physical_id (which in
+ # turn is the unique_id on that node)
+
+ # FIXME(iustin): use a better name for the replaced LVs
+ temp_suffix = int(time.time())
+ ren_fn = lambda d, suff: (d.physical_id[0],
+ d.physical_id[1] + "_replaced-%s" % suff)
+
+ # Build the rename list based on what LVs exist on the node
+ rename_old_to_new = []
+ for to_ren in old_lvs:
+ result = self.rpc.call_blockdev_find(self.target_node, to_ren)
+ if not result.fail_msg and result.payload:
+ # device exists
+ rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
+
+ self.lu.LogInfo("Renaming the old LVs on the target node")
+ result = self.rpc.call_blockdev_rename(self.target_node,
+ rename_old_to_new)
+ result.Raise("Can't rename old LVs on node %s" % self.target_node)
+
+ # Now we rename the new LVs to the old LVs
+ self.lu.LogInfo("Renaming the new LVs on the target node")
+ rename_new_to_old = [(new, old.physical_id)
+ for old, new in zip(old_lvs, new_lvs)]
+ result = self.rpc.call_blockdev_rename(self.target_node,
+ rename_new_to_old)
+ result.Raise("Can't rename new LVs on node %s" % self.target_node)
+
+ # Intermediate steps of in memory modifications
+ for old, new in zip(old_lvs, new_lvs):
+ new.logical_id = old.logical_id
+ self.cfg.SetDiskID(new, self.target_node)
+
+ # We need to modify old_lvs so that removal later removes the
+ # right LVs, not the newly added ones; note that old_lvs is a
+ # copy here
+ for disk in old_lvs:
+ disk.logical_id = ren_fn(disk, temp_suffix)
+ self.cfg.SetDiskID(disk, self.target_node)
+
+ # Now that the new lvs have the old name, we can add them to the device
+ self.lu.LogInfo("Adding new mirror component on %s", self.target_node)
+ result = self.rpc.call_blockdev_addchildren(self.target_node,
+ (dev, self.instance), new_lvs)
+ msg = result.fail_msg
+ if msg:
+ for new_lv in new_lvs:
+ msg2 = self.rpc.call_blockdev_remove(self.target_node,
+ new_lv).fail_msg
+ if msg2:
+ self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
+ hint=("cleanup manually the unused logical"
+ "volumes"))
+ raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
+
+ cstep = itertools.count(5)
+
+ if self.early_release:
+ self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
+ self._RemoveOldStorage(self.target_node, iv_names)
+ # TODO: Check if releasing locks early still makes sense
+ ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
+ else:
+ # Release all resource locks except those used by the instance
+ ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
+ keep=self.node_secondary_ip.keys())
+
+ # Release all node locks while waiting for sync
+ ReleaseLocks(self.lu, locking.LEVEL_NODE)
+
+ # TODO: Can the instance lock be downgraded here? Take the optional disk
+ # shutdown in the caller into consideration.
+
+ # Wait for sync
+ # This can fail as the old devices are degraded and _WaitForSync
+ # does a combined result over all disks, so we don't check its return value
+ self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
+ WaitForSync(self.lu, self.instance)
+
+ # Check all devices manually
+ self._CheckDevices(self.instance.primary_node, iv_names)
+
+ # Step: remove old storage
+ if not self.early_release:
+ self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
+ self._RemoveOldStorage(self.target_node, iv_names)
+
+ def _ExecDrbd8Secondary(self, feedback_fn):
+ """Replace the secondary node for DRBD 8.
+
+ The algorithm for replace is quite complicated:
+ - for all disks of the instance:
+ - create new LVs on the new node with same names
+ - shutdown the drbd device on the old secondary
+ - disconnect the drbd network on the primary
+ - create the drbd device on the new secondary
+ - network attach the drbd on the primary, using an artifice:
+ the drbd code for Attach() will connect to the network if it
+ finds a device which is connected to the good local disks but
+ not network enabled
+ - wait for sync across all devices
+ - remove all disks from the old secondary
+
+ Failures are not very well handled.
+
+ """
+ steps_total = 6
+
+ pnode = self.instance.primary_node
+
+ # Step: check device activation
+ self.lu.LogStep(1, steps_total, "Check device existence")
+ self._CheckDisksExistence([self.instance.primary_node])
+ self._CheckVolumeGroup([self.instance.primary_node])
+
+ # Step: check other node consistency
+ self.lu.LogStep(2, steps_total, "Check peer consistency")
+ self._CheckDisksConsistency(self.instance.primary_node, True, True)
+
+ # Step: create new storage
+ self.lu.LogStep(3, steps_total, "Allocate new storage")
+ disks = AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
+ excl_stor = IsExclusiveStorageEnabledNodeName(self.lu.cfg, self.new_node)
+ for idx, dev in enumerate(disks):
+ self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
+ (self.new_node, idx))
+ # we pass force_create=True to force LVM creation
+ for new_lv in dev.children:
+ _CreateBlockDevInner(self.lu, self.new_node, self.instance, new_lv,
+ True, GetInstanceInfoText(self.instance), False,
+ excl_stor)
+
+ # Step 4: dbrd minors and drbd setups changes
+ # after this, we must manually remove the drbd minors on both the
+ # error and the success paths
+ self.lu.LogStep(4, steps_total, "Changing drbd configuration")
+ minors = self.cfg.AllocateDRBDMinor([self.new_node
+ for dev in self.instance.disks],
+ self.instance.name)
+ logging.debug("Allocated minors %r", minors)
+
+ iv_names = {}
+ for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
+ self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
+ (self.new_node, idx))
+ # create new devices on new_node; note that we create two IDs:
+ # one without port, so the drbd will be activated without
+ # networking information on the new node at this stage, and one
+ # with network, for the latter activation in step 4
+ (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
+ if self.instance.primary_node == o_node1:
+ p_minor = o_minor1
+ else:
+ assert self.instance.primary_node == o_node2, "Three-node instance?"
+ p_minor = o_minor2
+
+ new_alone_id = (self.instance.primary_node, self.new_node, None,
+ p_minor, new_minor, o_secret)
+ new_net_id = (self.instance.primary_node, self.new_node, o_port,
+ p_minor, new_minor, o_secret)
+
+ iv_names[idx] = (dev, dev.children, new_net_id)
+ logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
+ new_net_id)
+ new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
+ logical_id=new_alone_id,
+ children=dev.children,
+ size=dev.size,
+ params={})
+ (anno_new_drbd,) = AnnotateDiskParams(self.instance, [new_drbd],
+ self.cfg)
+ try:
+ CreateSingleBlockDev(self.lu, self.new_node, self.instance,
+ anno_new_drbd,
+ GetInstanceInfoText(self.instance), False,
+ excl_stor)
+ except errors.GenericError:
+ self.cfg.ReleaseDRBDMinors(self.instance.name)
+ raise
+
+ # We have new devices, shutdown the drbd on the old secondary
+ for idx, dev in enumerate(self.instance.disks):
+ self.lu.LogInfo("Shutting down drbd for disk/%d on old node", idx)
+ self.cfg.SetDiskID(dev, self.target_node)
+ msg = self.rpc.call_blockdev_shutdown(self.target_node,
+ (dev, self.instance)).fail_msg
+ if msg:
+ self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
+ "node: %s" % (idx, msg),
+ hint=("Please cleanup this device manually as"
+ " soon as possible"))
+
+ self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
+ result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
+ self.instance.disks)[pnode]
+
+ msg = result.fail_msg
+ if msg:
+ # detaches didn't succeed (unlikely)
+ self.cfg.ReleaseDRBDMinors(self.instance.name)
+ raise errors.OpExecError("Can't detach the disks from the network on"
+ " old node: %s" % (msg,))
+
+ # if we managed to detach at least one, we update all the disks of
+ # the instance to point to the new secondary
+ self.lu.LogInfo("Updating instance configuration")
+ for dev, _, new_logical_id in iv_names.itervalues():
+ dev.logical_id = new_logical_id
+ self.cfg.SetDiskID(dev, self.instance.primary_node)
+
+ self.cfg.Update(self.instance, feedback_fn)
+
+ # Release all node locks (the configuration has been updated)
+ ReleaseLocks(self.lu, locking.LEVEL_NODE)
+
+ # and now perform the drbd attach
+ self.lu.LogInfo("Attaching primary drbds to new secondary"
+ " (standalone => connected)")
+ result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
+ self.new_node],
+ self.node_secondary_ip,
+ (self.instance.disks, self.instance),
+ self.instance.name,
+ False)
+ for to_node, to_result in result.items():
+ msg = to_result.fail_msg
+ if msg:
+ self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
+ to_node, msg,
+ hint=("please do a gnt-instance info to see the"
+ " status of disks"))
+
+ cstep = itertools.count(5)
+
+ if self.early_release:
+ self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
+ self._RemoveOldStorage(self.target_node, iv_names)
+ # TODO: Check if releasing locks early still makes sense
+ ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
+ else:
+ # Release all resource locks except those used by the instance
+ ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
+ keep=self.node_secondary_ip.keys())
+
+ # TODO: Can the instance lock be downgraded here? Take the optional disk
+ # shutdown in the caller into consideration.
+
+ # Wait for sync
+ # This can fail as the old devices are degraded and _WaitForSync
+ # does a combined result over all disks, so we don't check its return value
+ self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
+ WaitForSync(self.lu, self.instance)
+
+ # Check all devices manually
+ self._CheckDevices(self.instance.primary_node, iv_names)
+
+ # Step: remove old storage
+ if not self.early_release:
+ self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
+ self._RemoveOldStorage(self.target_node, iv_names)