import os
import os.path
-import sha
import time
import tempfile
import re
import platform
import logging
import copy
-import itertools
+import random
from ganeti import ssh
from ganeti import utils
from ganeti import objects
from ganeti import opcodes
from ganeti import serializer
+from ganeti import ssconf
class LogicalUnit(object):
- implement BuildHooksEnv
- redefine HPATH and HTYPE
- optionally redefine their run requirements:
- REQ_MASTER: the LU needs to run on the master node
REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
Note that all commands require root permissions.
HPATH = None
HTYPE = None
_OP_REQP = []
- REQ_MASTER = True
REQ_BGL = True
def __init__(self, processor, op, context, rpc):
if attr_val is None:
raise errors.OpPrereqError("Required parameter '%s' missing" %
attr_name)
-
- if not self.cfg.IsCluster():
- raise errors.OpPrereqError("Cluster not initialized yet,"
- " use 'gnt-cluster init' first.")
- if self.REQ_MASTER:
- master = self.cfg.GetMasterNode()
- if master != utils.HostInfo().name:
- raise errors.OpPrereqError("Commands must be run on the master"
- " node %s" % master)
+ self.CheckArguments()
def __GetSSH(self):
"""Returns the SshRunner object
ssh = property(fget=__GetSSH)
+ def CheckArguments(self):
+ """Check syntactic validity for the opcode arguments.
+
+ This method is for doing a simple syntactic check and ensure
+ validity of opcode parameters, without any cluster-related
+ checks. While the same can be accomplished in ExpandNames and/or
+ CheckPrereq, doing these separate is better because:
+
+ - ExpandNames is left as as purely a lock-related function
+ - CheckPrereq is run after we have aquired locks (and possible
+ waited for them)
+
+ The function is allowed to change the self.op attribute so that
+ later methods can no longer worry about missing parameters.
+
+ """
+ pass
+
def ExpandNames(self):
"""Expand names for this LU.
LUs which implement this method must also populate the self.needed_locks
member, as a dict with lock levels as keys, and a list of needed lock names
as values. Rules:
- - Use an empty dict if you don't need any lock
- - If you don't need any lock at a particular level omit that level
- - Don't put anything for the BGL level
- - If you want all locks at a level use locking.ALL_SET as a value
+
+ - use an empty dict if you don't need any lock
+ - if you don't need any lock at a particular level omit that level
+ - don't put anything for the BGL level
+ - if you want all locks at a level use locking.ALL_SET as a value
If you need to share locks (rather than acquire them exclusively) at one
level you can modify self.share_locks, setting a true value (usually 1) for
that level. By default locks are not shared.
- Examples:
- # Acquire all nodes and one instance
- self.needed_locks = {
- locking.LEVEL_NODE: locking.ALL_SET,
- locking.LEVEL_INSTANCE: ['instance1.example.tld'],
- }
- # Acquire just two nodes
- self.needed_locks = {
- locking.LEVEL_NODE: ['node1.example.tld', 'node2.example.tld'],
- }
- # Acquire no locks
- self.needed_locks = {} # No, you can't leave it to the default value None
+ Examples::
+
+ # Acquire all nodes and one instance
+ self.needed_locks = {
+ locking.LEVEL_NODE: locking.ALL_SET,
+ locking.LEVEL_INSTANCE: ['instance1.example.tld'],
+ }
+ # Acquire just two nodes
+ self.needed_locks = {
+ locking.LEVEL_NODE: ['node1.example.tld', 'node2.example.tld'],
+ }
+ # Acquire no locks
+ self.needed_locks = {} # No, you can't leave it to the default value None
"""
# The implementation of this method is mandatory only if the new LU is
previous result is passed back unchanged but any LU can define it if it
wants to use the local cluster hook-scripts somehow.
- Args:
- phase: the hooks phase that has just been run
- hooks_results: the results of the multi-node hooks rpc call
- feedback_fn: function to send feedback back to the caller
- lu_result: the previous result this LU had, or None in the PRE phase.
+ @param phase: one of L{constants.HOOKS_PHASE_POST} or
+ L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
+ @param hook_results: the results of the multi-node hooks rpc call
+ @param feedback_fn: function used send feedback back to the caller
+ @param lu_result: the previous Exec result this LU had, or None
+ in the PRE phase
+ @return: the new Exec result, based on the previous result
+ and hook results
"""
return lu_result
In the future it may grow parameters to just lock some instance's nodes, or
to just lock primaries or secondary nodes, if needed.
- If should be called in DeclareLocks in a way similar to:
+ If should be called in DeclareLocks in a way similar to::
- if level == locking.LEVEL_NODE:
- self._LockInstancesNodes()
+ if level == locking.LEVEL_NODE:
+ self._LockInstancesNodes()
@type primary_only: boolean
@param primary_only: only lock primary nodes of locked instances
HTYPE = None
-class _FieldSet(object):
- """A simple field set.
-
- Among the features are:
- - checking if a string is among a list of static string or regex objects
- - checking if a whole list of string matches
- - returning the matching groups from a regex match
-
- Internally, all fields are held as regular expression objects.
-
- """
- def __init__(self, *items):
- self.items = [re.compile("^%s$" % value) for value in items]
-
- def Extend(self, other_set):
- """Extend the field set with the items from another one"""
- self.items.extend(other_set.items)
-
- def Matches(self, field):
- """Checks if a field matches the current set
-
- @type field: str
- @param field: the string to match
- @return: either False or a regular expression match object
-
- """
- for m in itertools.ifilter(None, (val.match(field) for val in self.items)):
- return m
- return False
-
- def NonMatching(self, items):
- """Returns the list of fields not matching the current set
-
- @type items: list
- @param items: the list of fields to check
- @rtype: list
- @return: list of non-matching fields
-
- """
- return [val for val in items if not self.Matches(val)]
-
-
def _GetWantedNodes(lu, nodes):
"""Returns list of checked and expanded node names.
- Args:
- nodes: List of nodes (strings) or None for all
+ @type lu: L{LogicalUnit}
+ @param lu: the logical unit on whose behalf we execute
+ @type nodes: list
+ @param nodes: list of node names or None for all nodes
+ @rtype: list
+ @return: the list of nodes, sorted
+ @raise errors.OpProgrammerError: if the nodes parameter is wrong type
"""
if not isinstance(nodes, list):
def _GetWantedInstances(lu, instances):
"""Returns list of checked and expanded instance names.
- Args:
- instances: List of instances (strings) or None for all
+ @type lu: L{LogicalUnit}
+ @param lu: the logical unit on whose behalf we execute
+ @type instances: list
+ @param instances: list of instance names or None for all instances
+ @rtype: list
+ @return: the list of instances, sorted
+ @raise errors.OpPrereqError: if the instances parameter is wrong type
+ @raise errors.OpPrereqError: if any of the passed instances is not found
"""
if not isinstance(instances, list):
wanted.append(instance)
else:
- wanted = lu.cfg.GetInstanceList()
- return utils.NiceSort(wanted)
+ wanted = utils.NiceSort(lu.cfg.GetInstanceList())
+ return wanted
def _CheckOutputFields(static, dynamic, selected):
"""Checks whether all selected fields are valid.
- @type static: L{_FieldSet}
+ @type static: L{utils.FieldSet}
@param static: static fields set
- @type dynamic: L{_FieldSet}
+ @type dynamic: L{utils.FieldSet}
@param dynamic: dynamic fields set
"""
- f = _FieldSet()
+ f = utils.FieldSet()
f.Extend(static)
f.Extend(dynamic)
% ",".join(delta))
+def _CheckBooleanOpField(op, name):
+ """Validates boolean opcode parameters.
+
+ This will ensure that an opcode parameter is either a boolean value,
+ or None (but that it always exists).
+
+ """
+ val = getattr(op, name, None)
+ if not (val is None or isinstance(val, bool)):
+ raise errors.OpPrereqError("Invalid boolean parameter '%s' (%s)" %
+ (name, str(val)))
+ setattr(op, name, val)
+
+
+def _CheckNodeOnline(lu, node):
+ """Ensure that a given node is online.
+
+ @param lu: the LU on behalf of which we make the check
+ @param node: the node to check
+ @raise errors.OpPrereqError: if the node is offline
+
+ """
+ if lu.cfg.GetNodeInfo(node).offline:
+ raise errors.OpPrereqError("Can't use offline node %s" % node)
+
+
+def _CheckNodeNotDrained(lu, node):
+ """Ensure that a given node is not drained.
+
+ @param lu: the LU on behalf of which we make the check
+ @param node: the node to check
+ @raise errors.OpPrereqError: if the node is drained
+
+ """
+ if lu.cfg.GetNodeInfo(node).drained:
+ raise errors.OpPrereqError("Can't use drained node %s" % node)
+
+
def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
- memory, vcpus, nics):
- """Builds instance related env variables for hooks from single variables.
+ memory, vcpus, nics, disk_template, disks):
+ """Builds instance related env variables for hooks
+
+ This builds the hook environment from individual variables.
+
+ @type name: string
+ @param name: the name of the instance
+ @type primary_node: string
+ @param primary_node: the name of the instance's primary node
+ @type secondary_nodes: list
+ @param secondary_nodes: list of secondary nodes as strings
+ @type os_type: string
+ @param os_type: the name of the instance's OS
+ @type status: boolean
+ @param status: the should_run status of the instance
+ @type memory: string
+ @param memory: the memory size of the instance
+ @type vcpus: string
+ @param vcpus: the count of VCPUs the instance has
+ @type nics: list
+ @param nics: list of tuples (ip, bridge, mac) representing
+ the NICs the instance has
+ @type disk_template: string
+ @param disk_template: the distk template of the instance
+ @type disks: list
+ @param disks: the list of (size, mode) pairs
+ @rtype: dict
+ @return: the hook environment for this instance
- Args:
- secondary_nodes: List of secondary nodes as strings
"""
+ if status:
+ str_status = "up"
+ else:
+ str_status = "down"
env = {
"OP_TARGET": name,
"INSTANCE_NAME": name,
"INSTANCE_PRIMARY": primary_node,
"INSTANCE_SECONDARIES": " ".join(secondary_nodes),
"INSTANCE_OS_TYPE": os_type,
- "INSTANCE_STATUS": status,
+ "INSTANCE_STATUS": str_status,
"INSTANCE_MEMORY": memory,
"INSTANCE_VCPUS": vcpus,
+ "INSTANCE_DISK_TEMPLATE": disk_template,
}
if nics:
nic_count = len(nics)
- for idx, (ip, bridge, mac) in enumerate(nics):
+ for idx, (ip, mac, mode, link) in enumerate(nics):
if ip is None:
ip = ""
env["INSTANCE_NIC%d_IP" % idx] = ip
- env["INSTANCE_NIC%d_BRIDGE" % idx] = bridge
- env["INSTANCE_NIC%d_HWADDR" % idx] = mac
+ env["INSTANCE_NIC%d_MAC" % idx] = mac
+ env["INSTANCE_NIC%d_MODE" % idx] = mode
+ env["INSTANCE_NIC%d_LINK" % idx] = link
+ if mode == constants.NIC_MODE_BRIDGED:
+ env["INSTANCE_NIC%d_BRIDGE" % idx] = link
else:
nic_count = 0
env["INSTANCE_NIC_COUNT"] = nic_count
+ if disks:
+ disk_count = len(disks)
+ for idx, (size, mode) in enumerate(disks):
+ env["INSTANCE_DISK%d_SIZE" % idx] = size
+ env["INSTANCE_DISK%d_MODE" % idx] = mode
+ else:
+ disk_count = 0
+
+ env["INSTANCE_DISK_COUNT"] = disk_count
+
return env
+def _PreBuildNICHooksList(lu, nics):
+ """Build a list of nic information tuples.
+
+ This list is suitable to be passed to _BuildInstanceHookEnv.
+
+ @type lu: L{LogicalUnit}
+ @param lu: the logical unit on whose behalf we execute
+ @type nics: list of L{objects.NIC}
+ @param nics: list of nics to convert to hooks tuples
+
+ """
+ hooks_nics = []
+ c_nicparams = lu.cfg.GetClusterInfo().nicparams[constants.PP_DEFAULT]
+ for nic in nics:
+ ip = nic.ip
+ mac = nic.mac
+ filled_params = objects.FillDict(c_nicparams, nic.nicparams)
+ mode = filled_params[constants.NIC_MODE]
+ link = filled_params[constants.NIC_LINK]
+ hooks_nics.append((ip, mac, mode, link))
+ return hooks_nics
def _BuildInstanceHookEnvByObject(lu, instance, override=None):
"""Builds instance related env variables for hooks from an object.
- Args:
- instance: objects.Instance object of instance
- override: dict of values to override
+ @type lu: L{LogicalUnit}
+ @param lu: the logical unit on whose behalf we execute
+ @type instance: L{objects.Instance}
+ @param instance: the instance for which we should build the
+ environment
+ @type override: dict
+ @param override: dictionary with key/values that will override
+ our values
+ @rtype: dict
+ @return: the hook environment dictionary
+
"""
bep = lu.cfg.GetClusterInfo().FillBE(instance)
args = {
'primary_node': instance.primary_node,
'secondary_nodes': instance.secondary_nodes,
'os_type': instance.os,
- 'status': instance.os,
+ 'status': instance.admin_up,
'memory': bep[constants.BE_MEMORY],
'vcpus': bep[constants.BE_VCPUS],
- 'nics': [(nic.ip, nic.bridge, nic.mac) for nic in instance.nics],
+ 'nics': _PreBuildNICHooksList(lu, instance.nics),
+ 'disk_template': instance.disk_template,
+ 'disks': [(disk.size, disk.mode) for disk in instance.disks],
}
if override:
args.update(override)
return _BuildInstanceHookEnv(**args)
-def _CheckInstanceBridgesExist(lu, instance):
+def _AdjustCandidatePool(lu):
+ """Adjust the candidate pool after node operations.
+
+ """
+ mod_list = lu.cfg.MaintainCandidatePool()
+ if mod_list:
+ lu.LogInfo("Promoted nodes to master candidate role: %s",
+ ", ".join(node.name for node in mod_list))
+ for name in mod_list:
+ lu.context.ReaddNode(name)
+ mc_now, mc_max = lu.cfg.GetMasterCandidateStats()
+ if mc_now > mc_max:
+ lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
+ (mc_now, mc_max))
+
+
+def _CheckNicsBridgesExist(lu, target_nics, target_node,
+ profile=constants.PP_DEFAULT):
+ """Check that the brigdes needed by a list of nics exist.
+
+ """
+ c_nicparams = lu.cfg.GetClusterInfo().nicparams[profile]
+ paramslist = [objects.FillDict(c_nicparams, nic.nicparams)
+ for nic in target_nics]
+ brlist = [params[constants.NIC_LINK] for params in paramslist
+ if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
+ if brlist:
+ result = lu.rpc.call_bridges_exist(target_node, brlist)
+ result.Raise()
+ if not result.data:
+ raise errors.OpPrereqError("One or more target bridges %s does not"
+ " exist on destination node '%s'" %
+ (brlist, target_node))
+
+
+def _CheckInstanceBridgesExist(lu, instance, node=None):
"""Check that the brigdes needed by an instance exist.
"""
- # check bridges existance
- brlist = [nic.bridge for nic in instance.nics]
- if not lu.rpc.call_bridges_exist(instance.primary_node, brlist):
- raise errors.OpPrereqError("one or more target bridges %s does not"
- " exist on destination node '%s'" %
- (brlist, instance.primary_node))
+ if node is None:
+ node=instance.primary_node
+ _CheckNicsBridgesExist(lu, instance.nics, node)
class LUDestroyCluster(NoHooksLU):
"""
master = self.cfg.GetMasterNode()
- if not self.rpc.call_node_stop_master(master, False):
+ result = self.rpc.call_node_stop_master(master, False)
+ result.Raise()
+ if not result.data:
raise errors.OpExecError("Could not disable the master role")
priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
utils.CreateBackup(priv_key)
}
self.share_locks = dict(((i, 1) for i in locking.LEVELS))
- def _VerifyNode(self, node, file_list, local_cksum, vglist, node_result,
- remote_version, feedback_fn):
+ def _VerifyNode(self, nodeinfo, file_list, local_cksum,
+ node_result, feedback_fn, master_files,
+ drbd_map, vg_name):
"""Run multiple tests against a node.
Test list:
+
- compares ganeti version
- checks vg existance and size > 20G
- checks config file checksum
- checks ssh to other nodes
- Args:
- node: name of the node to check
- file_list: required list of files
- local_cksum: dictionary of local files and their checksums
+ @type nodeinfo: L{objects.Node}
+ @param nodeinfo: the node to check
+ @param file_list: required list of files
+ @param local_cksum: dictionary of local files and their checksums
+ @param node_result: the results from the node
+ @param feedback_fn: function used to accumulate results
+ @param master_files: list of files that only masters should have
+ @param drbd_map: the useddrbd minors for this node, in
+ form of minor: (instance, must_exist) which correspond to instances
+ and their running status
+ @param vg_name: Ganeti Volume Group (result of self.cfg.GetVGName())
"""
+ node = nodeinfo.name
+
+ # main result, node_result should be a non-empty dict
+ if not node_result or not isinstance(node_result, dict):
+ feedback_fn(" - ERROR: unable to verify node %s." % (node,))
+ return True
+
# compares ganeti version
local_version = constants.PROTOCOL_VERSION
- if not remote_version:
+ remote_version = node_result.get('version', None)
+ if not (remote_version and isinstance(remote_version, (list, tuple)) and
+ len(remote_version) == 2):
feedback_fn(" - ERROR: connection to %s failed" % (node))
return True
- if local_version != remote_version:
- feedback_fn(" - ERROR: sw version mismatch: master %s, node(%s) %s" %
- (local_version, node, remote_version))
+ if local_version != remote_version[0]:
+ feedback_fn(" - ERROR: incompatible protocol versions: master %s,"
+ " node %s %s" % (local_version, node, remote_version[0]))
return True
- # checks vg existance and size > 20G
+ # node seems compatible, we can actually try to look into its results
bad = False
- if not vglist:
- feedback_fn(" - ERROR: unable to check volume groups on node %s." %
- (node,))
- bad = True
- else:
- vgstatus = utils.CheckVolumeGroupSize(vglist, self.cfg.GetVGName(),
- constants.MIN_VG_SIZE)
- if vgstatus:
- feedback_fn(" - ERROR: %s on node %s" % (vgstatus, node))
- bad = True
- if not node_result:
- feedback_fn(" - ERROR: unable to verify node %s." % (node,))
- return True
+ # full package version
+ if constants.RELEASE_VERSION != remote_version[1]:
+ feedback_fn(" - WARNING: software version mismatch: master %s,"
+ " node %s %s" %
+ (constants.RELEASE_VERSION, node, remote_version[1]))
+
+ # checks vg existence and size > 20G
+ if vg_name is not None:
+ vglist = node_result.get(constants.NV_VGLIST, None)
+ if not vglist:
+ feedback_fn(" - ERROR: unable to check volume groups on node %s." %
+ (node,))
+ bad = True
+ else:
+ vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
+ constants.MIN_VG_SIZE)
+ if vgstatus:
+ feedback_fn(" - ERROR: %s on node %s" % (vgstatus, node))
+ bad = True
# checks config file checksum
- # checks ssh to any
- if 'filelist' not in node_result:
+ remote_cksum = node_result.get(constants.NV_FILELIST, None)
+ if not isinstance(remote_cksum, dict):
bad = True
feedback_fn(" - ERROR: node hasn't returned file checksum data")
else:
- remote_cksum = node_result['filelist']
for file_name in file_list:
+ node_is_mc = nodeinfo.master_candidate
+ must_have_file = file_name not in master_files
if file_name not in remote_cksum:
- bad = True
- feedback_fn(" - ERROR: file '%s' missing" % file_name)
+ if node_is_mc or must_have_file:
+ bad = True
+ feedback_fn(" - ERROR: file '%s' missing" % file_name)
elif remote_cksum[file_name] != local_cksum[file_name]:
- bad = True
- feedback_fn(" - ERROR: file '%s' has wrong checksum" % file_name)
+ if node_is_mc or must_have_file:
+ bad = True
+ feedback_fn(" - ERROR: file '%s' has wrong checksum" % file_name)
+ else:
+ # not candidate and this is not a must-have file
+ bad = True
+ feedback_fn(" - ERROR: non master-candidate has old/wrong file"
+ " '%s'" % file_name)
+ else:
+ # all good, except non-master/non-must have combination
+ if not node_is_mc and not must_have_file:
+ feedback_fn(" - ERROR: file '%s' should not exist on non master"
+ " candidates" % file_name)
+
+ # checks ssh to any
- if 'nodelist' not in node_result:
+ if constants.NV_NODELIST not in node_result:
bad = True
feedback_fn(" - ERROR: node hasn't returned node ssh connectivity data")
else:
- if node_result['nodelist']:
+ if node_result[constants.NV_NODELIST]:
bad = True
- for node in node_result['nodelist']:
+ for node in node_result[constants.NV_NODELIST]:
feedback_fn(" - ERROR: ssh communication with node '%s': %s" %
- (node, node_result['nodelist'][node]))
- if 'node-net-test' not in node_result:
+ (node, node_result[constants.NV_NODELIST][node]))
+
+ if constants.NV_NODENETTEST not in node_result:
bad = True
feedback_fn(" - ERROR: node hasn't returned node tcp connectivity data")
else:
- if node_result['node-net-test']:
+ if node_result[constants.NV_NODENETTEST]:
bad = True
- nlist = utils.NiceSort(node_result['node-net-test'].keys())
+ nlist = utils.NiceSort(node_result[constants.NV_NODENETTEST].keys())
for node in nlist:
feedback_fn(" - ERROR: tcp communication with node '%s': %s" %
- (node, node_result['node-net-test'][node]))
+ (node, node_result[constants.NV_NODENETTEST][node]))
- hyp_result = node_result.get('hypervisor', None)
+ hyp_result = node_result.get(constants.NV_HYPERVISOR, None)
if isinstance(hyp_result, dict):
for hv_name, hv_result in hyp_result.iteritems():
if hv_result is not None:
feedback_fn(" - ERROR: hypervisor %s verify failure: '%s'" %
(hv_name, hv_result))
+
+ # check used drbd list
+ if vg_name is not None:
+ used_minors = node_result.get(constants.NV_DRBDLIST, [])
+ if not isinstance(used_minors, (tuple, list)):
+ feedback_fn(" - ERROR: cannot parse drbd status file: %s" %
+ str(used_minors))
+ else:
+ for minor, (iname, must_exist) in drbd_map.items():
+ if minor not in used_minors and must_exist:
+ feedback_fn(" - ERROR: drbd minor %d of instance %s is"
+ " not active" % (minor, iname))
+ bad = True
+ for minor in used_minors:
+ if minor not in drbd_map:
+ feedback_fn(" - ERROR: unallocated drbd minor %d is in use" %
+ minor)
+ bad = True
+
return bad
def _VerifyInstance(self, instance, instanceconfig, node_vol_is,
- node_instance, feedback_fn):
+ node_instance, feedback_fn, n_offline):
"""Verify an instance.
This function checks to see if the required block devices are
instanceconfig.MapLVsByNode(node_vol_should)
for node in node_vol_should:
+ if node in n_offline:
+ # ignore missing volumes on offline nodes
+ continue
for volume in node_vol_should[node]:
if node not in node_vol_is or volume not in node_vol_is[node]:
feedback_fn(" - ERROR: volume %s missing on node %s" %
(volume, node))
bad = True
- if not instanceconfig.status == 'down':
- if (node_current not in node_instance or
- not instance in node_instance[node_current]):
+ if instanceconfig.admin_up:
+ if ((node_current not in node_instance or
+ not instance in node_instance[node_current]) and
+ node_current not in n_offline):
feedback_fn(" - ERROR: instance %s not running on node %s" %
(instance, node_current))
bad = True
"""
all_nodes = self.cfg.GetNodeList()
- # TODO: populate the environment with useful information for verify hooks
- env = {}
+ env = {
+ "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
+ }
+ for node in self.cfg.GetAllNodesInfo().values():
+ env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags())
+
return env, [], all_nodes
def Exec(self, feedback_fn):
nodelist = utils.NiceSort(self.cfg.GetNodeList())
nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
instancelist = utils.NiceSort(self.cfg.GetInstanceList())
+ instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
+ for iname in instancelist)
i_non_redundant = [] # Non redundant instances
i_non_a_balanced = [] # Non auto-balanced instances
+ n_offline = [] # List of offline nodes
+ n_drained = [] # List of nodes being drained
node_volume = {}
node_instance = {}
node_info = {}
# FIXME: verify OS list
# do local checksums
- file_names = []
+ master_files = [constants.CLUSTER_CONF_FILE]
+
+ file_names = ssconf.SimpleStore().GetFileList()
file_names.append(constants.SSL_CERT_FILE)
- file_names.append(constants.CLUSTER_CONF_FILE)
+ file_names.append(constants.RAPI_CERT_FILE)
+ file_names.extend(master_files)
+
local_checksums = utils.FingerprintFiles(file_names)
feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
- all_volumeinfo = self.rpc.call_volume_list(nodelist, vg_name)
- all_instanceinfo = self.rpc.call_instance_list(nodelist, hypervisors)
- all_vglist = self.rpc.call_vg_list(nodelist)
node_verify_param = {
- 'filelist': file_names,
- 'nodelist': nodelist,
- 'hypervisor': hypervisors,
- 'node-net-test': [(node.name, node.primary_ip, node.secondary_ip)
- for node in nodeinfo]
+ constants.NV_FILELIST: file_names,
+ constants.NV_NODELIST: [node.name for node in nodeinfo
+ if not node.offline],
+ constants.NV_HYPERVISOR: hypervisors,
+ constants.NV_NODENETTEST: [(node.name, node.primary_ip,
+ node.secondary_ip) for node in nodeinfo
+ if not node.offline],
+ constants.NV_INSTANCELIST: hypervisors,
+ constants.NV_VERSION: None,
+ constants.NV_HVINFO: self.cfg.GetHypervisorType(),
}
+ if vg_name is not None:
+ node_verify_param[constants.NV_VGLIST] = None
+ node_verify_param[constants.NV_LVLIST] = vg_name
+ node_verify_param[constants.NV_DRBDLIST] = None
all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
self.cfg.GetClusterName())
- all_rversion = self.rpc.call_version(nodelist)
- all_ninfo = self.rpc.call_node_info(nodelist, self.cfg.GetVGName(),
- self.cfg.GetHypervisorType())
cluster = self.cfg.GetClusterInfo()
- for node in nodelist:
- feedback_fn("* Verifying node %s" % node)
- result = self._VerifyNode(node, file_names, local_checksums,
- all_vglist[node], all_nvinfo[node],
- all_rversion[node], feedback_fn)
- bad = bad or result
+ master_node = self.cfg.GetMasterNode()
+ all_drbd_map = self.cfg.ComputeDRBDMap()
- # node_volume
- volumeinfo = all_volumeinfo[node]
+ for node_i in nodeinfo:
+ node = node_i.name
+ nresult = all_nvinfo[node].data
+
+ if node_i.offline:
+ feedback_fn("* Skipping offline node %s" % (node,))
+ n_offline.append(node)
+ continue
+
+ if node == master_node:
+ ntype = "master"
+ elif node_i.master_candidate:
+ ntype = "master candidate"
+ elif node_i.drained:
+ ntype = "drained"
+ n_drained.append(node)
+ else:
+ ntype = "regular"
+ feedback_fn("* Verifying node %s (%s)" % (node, ntype))
+
+ if all_nvinfo[node].failed or not isinstance(nresult, dict):
+ feedback_fn(" - ERROR: connection to %s failed" % (node,))
+ bad = True
+ continue
- if isinstance(volumeinfo, basestring):
+ node_drbd = {}
+ for minor, instance in all_drbd_map[node].items():
+ if instance not in instanceinfo:
+ feedback_fn(" - ERROR: ghost instance '%s' in temporary DRBD map" %
+ instance)
+ # ghost instance should not be running, but otherwise we
+ # don't give double warnings (both ghost instance and
+ # unallocated minor in use)
+ node_drbd[minor] = (instance, False)
+ else:
+ instance = instanceinfo[instance]
+ node_drbd[minor] = (instance.name, instance.admin_up)
+ result = self._VerifyNode(node_i, file_names, local_checksums,
+ nresult, feedback_fn, master_files,
+ node_drbd, vg_name)
+ bad = bad or result
+
+ lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
+ if vg_name is None:
+ node_volume[node] = {}
+ elif isinstance(lvdata, basestring):
feedback_fn(" - ERROR: LVM problem on node %s: %s" %
- (node, volumeinfo[-400:].encode('string_escape')))
+ (node, utils.SafeEncode(lvdata)))
bad = True
node_volume[node] = {}
- elif not isinstance(volumeinfo, dict):
- feedback_fn(" - ERROR: connection to %s failed" % (node,))
+ elif not isinstance(lvdata, dict):
+ feedback_fn(" - ERROR: connection to %s failed (lvlist)" % (node,))
bad = True
continue
else:
- node_volume[node] = volumeinfo
+ node_volume[node] = lvdata
# node_instance
- nodeinstance = all_instanceinfo[node]
- if type(nodeinstance) != list:
- feedback_fn(" - ERROR: connection to %s failed" % (node,))
+ idata = nresult.get(constants.NV_INSTANCELIST, None)
+ if not isinstance(idata, list):
+ feedback_fn(" - ERROR: connection to %s failed (instancelist)" %
+ (node,))
bad = True
continue
- node_instance[node] = nodeinstance
+ node_instance[node] = idata
# node_info
- nodeinfo = all_ninfo[node]
+ nodeinfo = nresult.get(constants.NV_HVINFO, None)
if not isinstance(nodeinfo, dict):
- feedback_fn(" - ERROR: connection to %s failed" % (node,))
+ feedback_fn(" - ERROR: connection to %s failed (hvinfo)" % (node,))
bad = True
continue
try:
node_info[node] = {
"mfree": int(nodeinfo['memory_free']),
- "dfree": int(nodeinfo['vg_free']),
"pinst": [],
"sinst": [],
# dictionary holding all instances this node is secondary for,
# secondary.
"sinst-by-pnode": {},
}
- except ValueError:
- feedback_fn(" - ERROR: invalid value returned from node %s" % (node,))
+ # FIXME: devise a free space model for file based instances as well
+ if vg_name is not None:
+ if (constants.NV_VGLIST not in nresult or
+ vg_name not in nresult[constants.NV_VGLIST]):
+ feedback_fn(" - ERROR: node %s didn't return data for the"
+ " volume group '%s' - it is either missing or broken" %
+ (node, vg_name))
+ bad = True
+ continue
+ node_info[node]["dfree"] = int(nresult[constants.NV_VGLIST][vg_name])
+ except (ValueError, KeyError):
+ feedback_fn(" - ERROR: invalid nodeinfo value returned"
+ " from node %s" % (node,))
bad = True
continue
for instance in instancelist:
feedback_fn("* Verifying instance %s" % instance)
- inst_config = self.cfg.GetInstanceInfo(instance)
+ inst_config = instanceinfo[instance]
result = self._VerifyInstance(instance, inst_config, node_volume,
- node_instance, feedback_fn)
+ node_instance, feedback_fn, n_offline)
bad = bad or result
+ inst_nodes_offline = []
inst_config.MapLVsByNode(node_vol_should)
pnode = inst_config.primary_node
if pnode in node_info:
node_info[pnode]['pinst'].append(instance)
- else:
+ elif pnode not in n_offline:
feedback_fn(" - ERROR: instance %s, connection to primary node"
" %s failed" % (instance, pnode))
bad = True
+ if pnode in n_offline:
+ inst_nodes_offline.append(pnode)
+
# If the instance is non-redundant we cannot survive losing its primary
# node, so we are not N+1 compliant. On the other hand we have no disk
# templates with more than one secondary so that situation is not well
if pnode not in node_info[snode]['sinst-by-pnode']:
node_info[snode]['sinst-by-pnode'][pnode] = []
node_info[snode]['sinst-by-pnode'][pnode].append(instance)
- else:
+ elif snode not in n_offline:
feedback_fn(" - ERROR: instance %s, connection to secondary node"
" %s failed" % (instance, snode))
+ bad = True
+ if snode in n_offline:
+ inst_nodes_offline.append(snode)
+
+ if inst_nodes_offline:
+ # warn that the instance lives on offline nodes, and set bad=True
+ feedback_fn(" - ERROR: instance lives on offline node(s) %s" %
+ ", ".join(inst_nodes_offline))
+ bad = True
feedback_fn("* Verifying orphan volumes")
result = self._VerifyOrphanVolumes(node_vol_should, node_volume,
feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
% len(i_non_a_balanced))
+ if n_offline:
+ feedback_fn(" - NOTICE: %d offline node(s) found." % len(n_offline))
+
+ if n_drained:
+ feedback_fn(" - NOTICE: %d drained node(s) found." % len(n_drained))
+
return not bad
def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
- """Analize the post-hooks' result, handle it, and send some
+ """Analize the post-hooks' result
+
+ This method analyses the hook result, handles it, and sends some
nicely-formatted feedback back to the user.
- Args:
- phase: the hooks phase that has just been run
- hooks_results: the results of the multi-node hooks rpc call
- feedback_fn: function to send feedback back to the caller
- lu_result: previous Exec result
+ @param phase: one of L{constants.HOOKS_PHASE_POST} or
+ L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
+ @param hooks_results: the results of the multi-node hooks rpc call
+ @param feedback_fn: function used send feedback back to the caller
+ @param lu_result: previous Exec result
+ @return: the new Exec result, based on the previous result
+ and hook results
"""
# We only really run POST phase hooks, and are only interested in
for node_name in hooks_results:
show_node_header = True
res = hooks_results[node_name]
- if res is False or not isinstance(res, list):
- feedback_fn(" Communication failure")
+ if res.failed or res.data is False or not isinstance(res.data, list):
+ if res.offline:
+ # no need to warn or set fail return value
+ continue
+ feedback_fn(" Communication failure in hooks execution")
lu_result = 1
continue
- for script, hkr, output in res:
+ for script, hkr, output in res.data:
if hkr == constants.HKR_FAIL:
# The node header is only shown once, if there are
# failing hooks on that node
nv_dict = {}
for inst in instances:
inst_lvs = {}
- if (inst.status != "up" or
+ if (not inst.admin_up or
inst.disk_template not in constants.DTS_NET_MIRROR):
continue
inst.MapLVsByNode(inst_lvs)
for node in nodes:
# node_volume
lvs = node_lvs[node]
-
+ if lvs.failed:
+ if not lvs.offline:
+ self.LogWarning("Connection to node %s failed: %s" %
+ (node, lvs.data))
+ continue
+ lvs = lvs.data
if isinstance(lvs, basestring):
logging.warning("Error enumerating LVs on node %s: %s", node, lvs)
res_nlvm[node] = lvs
+ continue
elif not isinstance(lvs, dict):
logging.warning("Connection to node %s failed or invalid data"
" returned", node)
# shutdown the master IP
master = self.cfg.GetMasterNode()
- if not self.rpc.call_node_stop_master(master, False):
+ result = self.rpc.call_node_stop_master(master, False)
+ if result.failed or not result.data:
raise errors.OpExecError("Could not disable the master role")
try:
- # modify the sstore
- # TODO: sstore
- ss.SetKey(ss.SS_MASTER_IP, ip)
- ss.SetKey(ss.SS_CLUSTER_NAME, clustername)
-
- # Distribute updated ss config to all nodes
- myself = self.cfg.GetNodeInfo(master)
- dist_nodes = self.cfg.GetNodeList()
- if myself.name in dist_nodes:
- dist_nodes.remove(myself.name)
-
- logging.debug("Copying updated ssconf data to all nodes")
- for keyname in [ss.SS_CLUSTER_NAME, ss.SS_MASTER_IP]:
- fname = ss.KeyToFilename(keyname)
- result = self.rpc.call_upload_file(dist_nodes, fname)
- for to_node in dist_nodes:
- if not result[to_node]:
- self.LogWarning("Copy of file %s to node %s failed",
- fname, to_node)
+ cluster = self.cfg.GetClusterInfo()
+ cluster.cluster_name = clustername
+ cluster.master_ip = ip
+ self.cfg.Update(cluster)
+
+ # update the known hosts file
+ ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
+ node_list = self.cfg.GetNodeList()
+ try:
+ node_list.remove(master)
+ except ValueError:
+ pass
+ result = self.rpc.call_upload_file(node_list,
+ constants.SSH_KNOWN_HOSTS_FILE)
+ for to_node, to_result in result.iteritems():
+ msg = to_result.RemoteFailMsg()
+ if msg:
+ msg = ("Copy of file %s to node %s failed: %s" %
+ (constants.SSH_KNOWN_HOSTS_FILE, to_node, msg))
+ self.proc.LogWarning(msg)
+
finally:
- if not self.rpc.call_node_start_master(master, False):
+ result = self.rpc.call_node_start_master(master, False)
+ if result.failed or not result.data:
self.LogWarning("Could not re-enable the master role on"
" the master, please restart manually.")
def _RecursiveCheckIfLVMBased(disk):
"""Check if the given disk or its children are lvm-based.
- Args:
- disk: ganeti.objects.Disk object
-
- Returns:
- boolean indicating whether a LD_LV dev_type was found or not
+ @type disk: L{objects.Disk}
+ @param disk: the disk to check
+ @rtype: booleean
+ @return: boolean indicating whether a LD_LV dev_type was found or not
"""
if disk.children:
_OP_REQP = []
REQ_BGL = False
+ def CheckArguments(self):
+ """Check parameters
+
+ """
+ if not hasattr(self.op, "candidate_pool_size"):
+ self.op.candidate_pool_size = None
+ if self.op.candidate_pool_size is not None:
+ try:
+ self.op.candidate_pool_size = int(self.op.candidate_pool_size)
+ except (ValueError, TypeError), err:
+ raise errors.OpPrereqError("Invalid candidate_pool_size value: %s" %
+ str(err))
+ if self.op.candidate_pool_size < 1:
+ raise errors.OpPrereqError("At least one master candidate needed")
+
def ExpandNames(self):
# FIXME: in the future maybe other cluster params won't require checking on
# all nodes to be modified.
if the given volume group is valid.
"""
- # FIXME: This only works because there is only one parameter that can be
- # changed or removed.
if self.op.vg_name is not None and not self.op.vg_name:
instances = self.cfg.GetAllInstancesInfo().values()
for inst in instances:
if self.op.vg_name:
vglist = self.rpc.call_vg_list(node_list)
for node in node_list:
- vgstatus = utils.CheckVolumeGroupSize(vglist[node], self.op.vg_name,
+ if vglist[node].failed:
+ # ignoring down node
+ self.LogWarning("Node %s unreachable/error, ignoring" % node)
+ continue
+ vgstatus = utils.CheckVolumeGroupSize(vglist[node].data,
+ self.op.vg_name,
constants.MIN_VG_SIZE)
if vgstatus:
raise errors.OpPrereqError("Error on node '%s': %s" %
(node, vgstatus))
self.cluster = cluster = self.cfg.GetClusterInfo()
- # beparams changes do not need validation (we can't validate?),
- # but we still process here
+ # validate params changes
if self.op.beparams:
- self.new_beparams = cluster.FillDict(
- cluster.beparams[constants.BEGR_DEFAULT], self.op.beparams)
+ utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
+ self.new_beparams = objects.FillDict(
+ cluster.beparams[constants.PP_DEFAULT], self.op.beparams)
+
+ if self.op.nicparams:
+ utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
+ self.new_nicparams = objects.FillDict(
+ cluster.nicparams[constants.PP_DEFAULT], self.op.nicparams)
+ objects.NIC.CheckParameterSyntax(self.new_nicparams)
# hypervisor list/parameters
- self.new_hvparams = cluster.FillDict(cluster.hvparams, {})
+ self.new_hvparams = objects.FillDict(cluster.hvparams, {})
if self.op.hvparams:
if not isinstance(self.op.hvparams, dict):
raise errors.OpPrereqError("Invalid 'hvparams' parameter on input")
hv_name in self.op.enabled_hypervisors)):
# either this is a new hypervisor, or its parameters have changed
hv_class = hypervisor.GetHypervisor(hv_name)
+ utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
hv_class.CheckParameterSyntax(hv_params)
_CheckHVParams(self, node_list, hv_name, hv_params)
"""
if self.op.vg_name is not None:
- if self.op.vg_name != self.cfg.GetVGName():
- self.cfg.SetVGName(self.op.vg_name)
+ new_volume = self.op.vg_name
+ if not new_volume:
+ new_volume = None
+ if new_volume != self.cfg.GetVGName():
+ self.cfg.SetVGName(new_volume)
else:
feedback_fn("Cluster LVM configuration already in desired"
" state, not changing")
if self.op.enabled_hypervisors is not None:
self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
if self.op.beparams:
- self.cluster.beparams[constants.BEGR_DEFAULT] = self.new_beparams
+ self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
+ if self.op.nicparams:
+ self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
+
+ if self.op.candidate_pool_size is not None:
+ self.cluster.candidate_pool_size = self.op.candidate_pool_size
+
self.cfg.Update(self.cluster)
+ # we want to update nodes after the cluster so that if any errors
+ # happen, we have recorded and saved the cluster info
+ if self.op.candidate_pool_size is not None:
+ _AdjustCandidatePool(self)
+
+
+def _RedistributeAncillaryFiles(lu, additional_nodes=None):
+ """Distribute additional files which are part of the cluster configuration.
+
+ ConfigWriter takes care of distributing the config and ssconf files, but
+ there are more files which should be distributed to all nodes. This function
+ makes sure those are copied.
+
+ @param lu: calling logical unit
+ @param additional_nodes: list of nodes not in the config to distribute to
+
+ """
+ # 1. Gather target nodes
+ myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
+ dist_nodes = lu.cfg.GetNodeList()
+ if additional_nodes is not None:
+ dist_nodes.extend(additional_nodes)
+ if myself.name in dist_nodes:
+ dist_nodes.remove(myself.name)
+ # 2. Gather files to distribute
+ dist_files = set([constants.ETC_HOSTS,
+ constants.SSH_KNOWN_HOSTS_FILE,
+ constants.RAPI_CERT_FILE,
+ constants.RAPI_USERS_FILE,
+ ])
+
+ enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors
+ for hv_name in enabled_hypervisors:
+ hv_class = hypervisor.GetHypervisor(hv_name)
+ dist_files.update(hv_class.GetAncillaryFiles())
+
+ # 3. Perform the files upload
+ for fname in dist_files:
+ if os.path.exists(fname):
+ result = lu.rpc.call_upload_file(dist_nodes, fname)
+ for to_node, to_result in result.items():
+ msg = to_result.RemoteFailMsg()
+ if msg:
+ msg = ("Copy of file %s to node %s failed: %s" %
+ (fname, to_node, msg))
+ lu.proc.LogWarning(msg)
+
+
+class LURedistributeConfig(NoHooksLU):
+ """Force the redistribution of cluster configuration.
+
+ This is a very simple LU.
+
+ """
+ _OP_REQP = []
+ REQ_BGL = False
+
+ def ExpandNames(self):
+ self.needed_locks = {
+ locking.LEVEL_NODE: locking.ALL_SET,
+ }
+ self.share_locks[locking.LEVEL_NODE] = 1
+
+ def CheckPrereq(self):
+ """Check prerequisites.
+
+ """
+
+ def Exec(self, feedback_fn):
+ """Redistribute the configuration.
+
+ """
+ self.cfg.Update(self.cfg.GetClusterInfo())
+ _RedistributeAncillaryFiles(self)
+
def _WaitForSync(lu, instance, oneshot=False, unlock=False):
"""Sleep and poll for an instance's disk to sync.
done = True
cumul_degraded = False
rstats = lu.rpc.call_blockdev_getmirrorstatus(node, instance.disks)
- if not rstats:
- lu.LogWarning("Can't get any data from node %s", node)
+ msg = rstats.RemoteFailMsg()
+ if msg:
+ lu.LogWarning("Can't get any data from node %s: %s", node, msg)
retries += 1
if retries >= 10:
raise errors.RemoteError("Can't contact node %s for mirror data,"
" aborting." % node)
time.sleep(6)
continue
+ rstats = rstats.payload
retries = 0
- for i in range(len(rstats)):
- mstat = rstats[i]
+ for i, mstat in enumerate(rstats):
if mstat is None:
lu.LogWarning("Can't compute data for node %s/%s",
node, instance.disks[i].iv_name)
result = True
if on_primary or dev.AssembleOnSecondary():
rstats = lu.rpc.call_blockdev_find(node, dev)
- if not rstats:
- logging.warning("Node %s: disk degraded, not found or node down", node)
+ msg = rstats.RemoteFailMsg()
+ if msg:
+ lu.LogWarning("Can't find disk on node %s: %s", node, msg)
+ result = False
+ elif not rstats.payload:
+ lu.LogWarning("Can't find disk on node %s", node)
result = False
else:
- result = result and (not rstats[idx])
+ result = result and (not rstats.payload[idx])
if dev.children:
for child in dev.children:
result = result and _CheckDiskConsistency(lu, child, node, on_primary)
"""
_OP_REQP = ["output_fields", "names"]
REQ_BGL = False
- _FIELDS_STATIC = _FieldSet()
- _FIELDS_DYNAMIC = _FieldSet("name", "valid", "node_status")
+ _FIELDS_STATIC = utils.FieldSet()
+ _FIELDS_DYNAMIC = utils.FieldSet("name", "valid", "node_status")
def ExpandNames(self):
if self.op.names:
selected=self.op.output_fields)
# Lock all nodes, in shared mode
+ # Temporary removal of locks, should be reverted later
+ # TODO: reintroduce locks when they are lighter-weight
self.needed_locks = {}
- self.share_locks[locking.LEVEL_NODE] = 1
- self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
+ #self.share_locks[locking.LEVEL_NODE] = 1
+ #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
def CheckPrereq(self):
"""Check prerequisites.
def _DiagnoseByOS(node_list, rlist):
"""Remaps a per-node return list into an a per-os per-node dictionary
- Args:
- node_list: a list with the names of all nodes
- rlist: a map with node names as keys and OS objects as values
+ @param node_list: a list with the names of all nodes
+ @param rlist: a map with node names as keys and OS objects as values
- Returns:
- map: a map with osnames as keys and as value another map, with
- nodes as
- keys and list of OS objects as values
- e.g. {"debian-etch": {"node1": [<object>,...],
- "node2": [<object>,]}
- }
+ @rtype: dict
+ @return: a dictionary with osnames as keys and as value another map, with
+ nodes as keys and list of OS objects as values, eg::
+
+ {"debian-etch": {"node1": [<object>,...],
+ "node2": [<object>,]}
+ }
"""
all_os = {}
+ # we build here the list of nodes that didn't fail the RPC (at RPC
+ # level), so that nodes with a non-responding node daemon don't
+ # make all OSes invalid
+ good_nodes = [node_name for node_name in rlist
+ if not rlist[node_name].failed]
for node_name, nr in rlist.iteritems():
- if not nr:
+ if nr.failed or not nr.data:
continue
- for os_obj in nr:
+ for os_obj in nr.data:
if os_obj.name not in all_os:
# build a list of nodes for this os containing empty lists
# for each node in node_list
all_os[os_obj.name] = {}
- for nname in node_list:
+ for nname in good_nodes:
all_os[os_obj.name][nname] = []
all_os[os_obj.name][node_name].append(os_obj)
return all_os
"""Compute the list of OSes.
"""
- node_list = self.acquired_locks[locking.LEVEL_NODE]
- node_data = self.rpc.call_os_diagnose(node_list)
+ valid_nodes = [node for node in self.cfg.GetOnlineNodeList()]
+ node_data = self.rpc.call_os_diagnose(valid_nodes)
if node_data == False:
raise errors.OpExecError("Can't gather the list of OSes")
- pol = self._DiagnoseByOS(node_list, node_data)
+ pol = self._DiagnoseByOS(valid_nodes, node_data)
output = []
for os_name, os_data in pol.iteritems():
row = []
for instance_name in instance_list:
instance = self.cfg.GetInstanceInfo(instance_name)
- if node.name == instance.primary_node:
- raise errors.OpPrereqError("Instance %s still running on the node,"
- " please remove first." % instance_name)
- if node.name in instance.secondary_nodes:
- raise errors.OpPrereqError("Instance %s has node as a secondary,"
+ if node.name in instance.all_nodes:
+ raise errors.OpPrereqError("Instance %s is still running on the node,"
" please remove first." % instance_name)
self.op.node_name = node.name
self.node = node
self.rpc.call_node_leave_cluster(node.name)
+ # Promote nodes to master candidate as needed
+ _AdjustCandidatePool(self)
+
class LUQueryNodes(NoHooksLU):
"""Logical unit for querying nodes.
"""
- _OP_REQP = ["output_fields", "names"]
+ _OP_REQP = ["output_fields", "names", "use_locking"]
REQ_BGL = False
- _FIELDS_DYNAMIC = _FieldSet(
+ _FIELDS_DYNAMIC = utils.FieldSet(
"dtotal", "dfree",
"mtotal", "mnode", "mfree",
"bootid",
- "ctotal",
+ "ctotal", "cnodes", "csockets",
)
- _FIELDS_STATIC = _FieldSet(
+ _FIELDS_STATIC = utils.FieldSet(
"name", "pinst_cnt", "sinst_cnt",
"pinst_list", "sinst_list",
"pip", "sip", "tags",
"serial_no",
+ "master_candidate",
+ "master",
+ "offline",
+ "drained",
)
def ExpandNames(self):
else:
self.wanted = locking.ALL_SET
- self.do_locking = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
+ self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
+ self.do_locking = self.do_node_query and self.op.use_locking
if self.do_locking:
# if we don't request only static fields, we need to lock the nodes
self.needed_locks[locking.LEVEL_NODE] = self.wanted
# begin data gathering
- if self.do_locking:
+ if self.do_node_query:
live_data = {}
node_data = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
self.cfg.GetHypervisorType())
for name in nodenames:
- nodeinfo = node_data.get(name, None)
- if nodeinfo:
+ nodeinfo = node_data[name]
+ if not nodeinfo.failed and nodeinfo.data:
+ nodeinfo = nodeinfo.data
+ fn = utils.TryConvert
live_data[name] = {
- "mtotal": utils.TryConvert(int, nodeinfo['memory_total']),
- "mnode": utils.TryConvert(int, nodeinfo['memory_dom0']),
- "mfree": utils.TryConvert(int, nodeinfo['memory_free']),
- "dtotal": utils.TryConvert(int, nodeinfo['vg_size']),
- "dfree": utils.TryConvert(int, nodeinfo['vg_free']),
- "ctotal": utils.TryConvert(int, nodeinfo['cpu_total']),
- "bootid": nodeinfo['bootid'],
+ "mtotal": fn(int, nodeinfo.get('memory_total', None)),
+ "mnode": fn(int, nodeinfo.get('memory_dom0', None)),
+ "mfree": fn(int, nodeinfo.get('memory_free', None)),
+ "dtotal": fn(int, nodeinfo.get('vg_size', None)),
+ "dfree": fn(int, nodeinfo.get('vg_free', None)),
+ "ctotal": fn(int, nodeinfo.get('cpu_total', None)),
+ "bootid": nodeinfo.get('bootid', None),
+ "cnodes": fn(int, nodeinfo.get('cpu_nodes', None)),
+ "csockets": fn(int, nodeinfo.get('cpu_sockets', None)),
}
else:
live_data[name] = {}
if secnode in node_to_secondary:
node_to_secondary[secnode].add(inst.name)
+ master_node = self.cfg.GetMasterNode()
+
# end data gathering
output = []
val = list(node.GetTags())
elif field == "serial_no":
val = node.serial_no
+ elif field == "master_candidate":
+ val = node.master_candidate
+ elif field == "master":
+ val = node.name == master_node
+ elif field == "offline":
+ val = node.offline
+ elif field == "drained":
+ val = node.drained
elif self._FIELDS_DYNAMIC.Matches(field):
val = live_data[node.name].get(field, None)
else:
"""
_OP_REQP = ["nodes", "output_fields"]
REQ_BGL = False
- _FIELDS_DYNAMIC = _FieldSet("phys", "vg", "name", "size", "instance")
- _FIELDS_STATIC = _FieldSet("node")
+ _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
+ _FIELDS_STATIC = utils.FieldSet("node")
def ExpandNames(self):
_CheckOutputFields(static=self._FIELDS_STATIC,
output = []
for node in nodenames:
- if node not in volumes or not volumes[node]:
+ if node not in volumes or volumes[node].failed or not volumes[node].data:
continue
- node_vols = volumes[node][:]
+ node_vols = volumes[node].data[:]
node_vols.sort(key=lambda vol: vol['dev'])
for vol in node_vols:
raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
" based ping to noded port")
+ cp_size = self.cfg.GetClusterInfo().candidate_pool_size
+ mc_now, _ = self.cfg.GetMasterCandidateStats()
+ master_candidate = mc_now < cp_size
+
self.new_node = objects.Node(name=node,
primary_ip=primary_ip,
- secondary_ip=secondary_ip)
+ secondary_ip=secondary_ip,
+ master_candidate=master_candidate,
+ offline=False, drained=False)
def Exec(self, feedback_fn):
"""Adds the new node to the cluster.
# check connectivity
result = self.rpc.call_version([node])[node]
- if result:
- if constants.PROTOCOL_VERSION == result:
+ result.Raise()
+ if result.data:
+ if constants.PROTOCOL_VERSION == result.data:
logging.info("Communication to node %s fine, sw version %s match",
- node, result)
+ node, result.data)
else:
raise errors.OpExecError("Version mismatch master version %s,"
" node version %s" %
- (constants.PROTOCOL_VERSION, result))
+ (constants.PROTOCOL_VERSION, result.data))
else:
raise errors.OpExecError("Cannot get version from the new node")
keyarray[2],
keyarray[3], keyarray[4], keyarray[5])
- if not result:
- raise errors.OpExecError("Cannot transfer ssh keys to the new node")
+ msg = result.RemoteFailMsg()
+ if msg:
+ raise errors.OpExecError("Cannot transfer ssh keys to the"
+ " new node: %s" % msg)
# Add node to our /etc/hosts, and add key to known_hosts
- utils.AddHostToEtcHosts(new_node.name)
+ if self.cfg.GetClusterInfo().modify_etc_hosts:
+ utils.AddHostToEtcHosts(new_node.name)
if new_node.secondary_ip != new_node.primary_ip:
- if not self.rpc.call_node_has_ip_address(new_node.name,
- new_node.secondary_ip):
+ result = self.rpc.call_node_has_ip_address(new_node.name,
+ new_node.secondary_ip)
+ if result.failed or not result.data:
raise errors.OpExecError("Node claims it doesn't have the secondary ip"
" you gave (%s). Please fix and re-run this"
" command." % new_node.secondary_ip)
result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
self.cfg.GetClusterName())
for verifier in node_verify_list:
- if not result[verifier]:
+ if result[verifier].failed or not result[verifier].data:
raise errors.OpExecError("Cannot communicate with %s's node daemon"
" for remote verification" % verifier)
- if result[verifier]['nodelist']:
- for failed in result[verifier]['nodelist']:
+ if result[verifier].data['nodelist']:
+ for failed in result[verifier].data['nodelist']:
feedback_fn("ssh/hostname verification failed %s -> %s" %
- (verifier, result[verifier]['nodelist'][failed]))
+ (verifier, result[verifier].data['nodelist'][failed]))
raise errors.OpExecError("ssh/hostname verification failed.")
- # Distribute updated /etc/hosts and known_hosts to all nodes,
- # including the node just added
- myself = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
- dist_nodes = self.cfg.GetNodeList()
- if not self.op.readd:
- dist_nodes.append(node)
- if myself.name in dist_nodes:
- dist_nodes.remove(myself.name)
-
- logging.debug("Copying hosts and known_hosts to all nodes")
- for fname in (constants.ETC_HOSTS, constants.SSH_KNOWN_HOSTS_FILE):
- result = self.rpc.call_upload_file(dist_nodes, fname)
- for to_node in dist_nodes:
- if not result[to_node]:
- logging.error("Copy of file %s to node %s failed", fname, to_node)
-
- to_copy = []
- if constants.HT_XEN_HVM in self.cfg.GetClusterInfo().enabled_hypervisors:
- to_copy.append(constants.VNC_PASSWORD_FILE)
- for fname in to_copy:
- result = self.rpc.call_upload_file([node], fname)
- if not result[node]:
- logging.error("Could not copy file %s to node %s", fname, node)
-
if self.op.readd:
+ _RedistributeAncillaryFiles(self)
self.context.ReaddNode(new_node)
else:
+ _RedistributeAncillaryFiles(self, additional_nodes=[node])
self.context.AddNode(new_node)
+class LUSetNodeParams(LogicalUnit):
+ """Modifies the parameters of a node.
+
+ """
+ HPATH = "node-modify"
+ HTYPE = constants.HTYPE_NODE
+ _OP_REQP = ["node_name"]
+ REQ_BGL = False
+
+ def CheckArguments(self):
+ node_name = self.cfg.ExpandNodeName(self.op.node_name)
+ if node_name is None:
+ raise errors.OpPrereqError("Invalid node name '%s'" % self.op.node_name)
+ self.op.node_name = node_name
+ _CheckBooleanOpField(self.op, 'master_candidate')
+ _CheckBooleanOpField(self.op, 'offline')
+ _CheckBooleanOpField(self.op, 'drained')
+ all_mods = [self.op.offline, self.op.master_candidate, self.op.drained]
+ if all_mods.count(None) == 3:
+ raise errors.OpPrereqError("Please pass at least one modification")
+ if all_mods.count(True) > 1:
+ raise errors.OpPrereqError("Can't set the node into more than one"
+ " state at the same time")
+
+ def ExpandNames(self):
+ self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
+
+ def BuildHooksEnv(self):
+ """Build hooks env.
+
+ This runs on the master node.
+
+ """
+ env = {
+ "OP_TARGET": self.op.node_name,
+ "MASTER_CANDIDATE": str(self.op.master_candidate),
+ "OFFLINE": str(self.op.offline),
+ "DRAINED": str(self.op.drained),
+ }
+ nl = [self.cfg.GetMasterNode(),
+ self.op.node_name]
+ return env, nl, nl
+
+ def CheckPrereq(self):
+ """Check prerequisites.
+
+ This only checks the instance list against the existing names.
+
+ """
+ node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
+
+ if ((self.op.master_candidate == False or self.op.offline == True or
+ self.op.drained == True) and node.master_candidate):
+ # we will demote the node from master_candidate
+ if self.op.node_name == self.cfg.GetMasterNode():
+ raise errors.OpPrereqError("The master node has to be a"
+ " master candidate, online and not drained")
+ cp_size = self.cfg.GetClusterInfo().candidate_pool_size
+ num_candidates, _ = self.cfg.GetMasterCandidateStats()
+ if num_candidates <= cp_size:
+ msg = ("Not enough master candidates (desired"
+ " %d, new value will be %d)" % (cp_size, num_candidates-1))
+ if self.op.force:
+ self.LogWarning(msg)
+ else:
+ raise errors.OpPrereqError(msg)
+
+ if (self.op.master_candidate == True and
+ ((node.offline and not self.op.offline == False) or
+ (node.drained and not self.op.drained == False))):
+ raise errors.OpPrereqError("Node '%s' is offline or drained, can't set"
+ " to master_candidate" % node.name)
+
+ return
+
+ def Exec(self, feedback_fn):
+ """Modifies a node.
+
+ """
+ node = self.node
+
+ result = []
+ changed_mc = False
+
+ if self.op.offline is not None:
+ node.offline = self.op.offline
+ result.append(("offline", str(self.op.offline)))
+ if self.op.offline == True:
+ if node.master_candidate:
+ node.master_candidate = False
+ changed_mc = True
+ result.append(("master_candidate", "auto-demotion due to offline"))
+ if node.drained:
+ node.drained = False
+ result.append(("drained", "clear drained status due to offline"))
+
+ if self.op.master_candidate is not None:
+ node.master_candidate = self.op.master_candidate
+ changed_mc = True
+ result.append(("master_candidate", str(self.op.master_candidate)))
+ if self.op.master_candidate == False:
+ rrc = self.rpc.call_node_demote_from_mc(node.name)
+ msg = rrc.RemoteFailMsg()
+ if msg:
+ self.LogWarning("Node failed to demote itself: %s" % msg)
+
+ if self.op.drained is not None:
+ node.drained = self.op.drained
+ result.append(("drained", str(self.op.drained)))
+ if self.op.drained == True:
+ if node.master_candidate:
+ node.master_candidate = False
+ changed_mc = True
+ result.append(("master_candidate", "auto-demotion due to drain"))
+ if node.offline:
+ node.offline = False
+ result.append(("offline", "clear offline status due to drain"))
+
+ # this will trigger configuration file update, if needed
+ self.cfg.Update(node)
+ # this will trigger job queue propagation or cleanup
+ if changed_mc:
+ self.context.ReaddNode(node)
+
+ return result
+
+
+class LUPowercycleNode(NoHooksLU):
+ """Powercycles a node.
+
+ """
+ _OP_REQP = ["node_name", "force"]
+ REQ_BGL = False
+
+ def CheckArguments(self):
+ node_name = self.cfg.ExpandNodeName(self.op.node_name)
+ if node_name is None:
+ raise errors.OpPrereqError("Invalid node name '%s'" % self.op.node_name)
+ self.op.node_name = node_name
+ if node_name == self.cfg.GetMasterNode() and not self.op.force:
+ raise errors.OpPrereqError("The node is the master and the force"
+ " parameter was not set")
+
+ def ExpandNames(self):
+ """Locking for PowercycleNode.
+
+ This is a last-resource option and shouldn't block on other
+ jobs. Therefore, we grab no locks.
+
+ """
+ self.needed_locks = {}
+
+ def CheckPrereq(self):
+ """Check prerequisites.
+
+ This LU has no prereqs.
+
+ """
+ pass
+
+ def Exec(self, feedback_fn):
+ """Reboots a node.
+
+ """
+ result = self.rpc.call_node_powercycle(self.op.node_name,
+ self.cfg.GetHypervisorType())
+ msg = result.RemoteFailMsg()
+ if msg:
+ raise errors.OpExecError("Failed to schedule the reboot: %s" % msg)
+ return result.payload
+
+
class LUQueryClusterInfo(NoHooksLU):
"""Query cluster configuration.
"""
_OP_REQP = []
- REQ_MASTER = False
REQ_BGL = False
def ExpandNames(self):
"master": cluster.master_node,
"default_hypervisor": cluster.default_hypervisor,
"enabled_hypervisors": cluster.enabled_hypervisors,
- "hvparams": cluster.hvparams,
+ "hvparams": dict([(hypervisor, cluster.hvparams[hypervisor])
+ for hypervisor in cluster.enabled_hypervisors]),
"beparams": cluster.beparams,
+ "nicparams": cluster.nicparams,
+ "candidate_pool_size": cluster.candidate_pool_size,
+ "master_netdev": cluster.master_netdev,
+ "volume_group_name": cluster.volume_group_name,
+ "file_storage_dir": cluster.file_storage_dir,
}
return result
"""
_OP_REQP = []
REQ_BGL = False
- _FIELDS_DYNAMIC = _FieldSet()
- _FIELDS_STATIC = _FieldSet("cluster_name", "master_node", "drain_flag")
+ _FIELDS_DYNAMIC = utils.FieldSet()
+ _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag")
def ExpandNames(self):
self.needed_locks = {}
self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
assert self.instance is not None, \
"Cannot retrieve locked instance %s" % self.op.instance_name
+ _CheckNodeOnline(self, self.instance.primary_node)
def Exec(self, feedback_fn):
"""Activate the disks.
This sets up the block devices on all nodes.
- Args:
- instance: a ganeti.objects.Instance object
- ignore_secondaries: if true, errors on secondary nodes won't result
- in an error return from the function
+ @type lu: L{LogicalUnit}
+ @param lu: the logical unit on whose behalf we execute
+ @type instance: L{objects.Instance}
+ @param instance: the instance for whose disks we assemble
+ @type ignore_secondaries: boolean
+ @param ignore_secondaries: if true, errors on secondary nodes
+ won't result in an error return from the function
+ @return: False if the operation failed, otherwise a list of
+ (host, instance_visible_name, node_visible_name)
+ with the mapping from node devices to instance devices
- Returns:
- false if the operation failed
- list of (host, instance_visible_name, node_visible_name) if the operation
- suceeded with the mapping from node devices to instance devices
"""
device_info = []
disks_ok = True
for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
lu.cfg.SetDiskID(node_disk, node)
result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False)
- if not result:
+ msg = result.RemoteFailMsg()
+ if msg:
lu.proc.LogWarning("Could not prepare block device %s on node %s"
- " (is_primary=False, pass=1)",
- inst_disk.iv_name, node)
+ " (is_primary=False, pass=1): %s",
+ inst_disk.iv_name, node, msg)
if not ignore_secondaries:
disks_ok = False
continue
lu.cfg.SetDiskID(node_disk, node)
result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True)
- if not result:
+ msg = result.RemoteFailMsg()
+ if msg:
lu.proc.LogWarning("Could not prepare block device %s on node %s"
- " (is_primary=True, pass=2)",
- inst_disk.iv_name, node)
+ " (is_primary=True, pass=2): %s",
+ inst_disk.iv_name, node, msg)
disks_ok = False
- device_info.append((instance.primary_node, inst_disk.iv_name, result))
+ device_info.append((instance.primary_node, inst_disk.iv_name,
+ result.payload))
# leave the disks configured for the primary node
# this is a workaround that would be fixed better by
ins_l = lu.rpc.call_instance_list([instance.primary_node],
[instance.hypervisor])
ins_l = ins_l[instance.primary_node]
- if not type(ins_l) is list:
+ if ins_l.failed or not isinstance(ins_l.data, list):
raise errors.OpExecError("Can't contact node '%s'" %
instance.primary_node)
- if instance.name in ins_l:
+ if instance.name in ins_l.data:
raise errors.OpExecError("Instance is running, can't shutdown"
" block devices.")
ignored.
"""
- result = True
+ all_result = True
for disk in instance.disks:
for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
lu.cfg.SetDiskID(top_disk, node)
- if not lu.rpc.call_blockdev_shutdown(node, top_disk):
- logging.error("Could not shutdown block device %s on node %s",
- disk.iv_name, node)
+ result = lu.rpc.call_blockdev_shutdown(node, top_disk)
+ msg = result.RemoteFailMsg()
+ if msg:
+ lu.LogWarning("Could not shutdown block device %s on node %s: %s",
+ disk.iv_name, node, msg)
if not ignore_primary or node != instance.primary_node:
- result = False
- return result
+ all_result = False
+ return all_result
-def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor):
+def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
"""Checks if a node has enough free memory.
This function check if a given node has the needed amount of free
@param reason: string to use in the error message
@type requested: C{int}
@param requested: the amount of memory in MiB to check for
- @type hypervisor: C{str}
- @param hypervisor: the hypervisor to ask for memory stats
+ @type hypervisor_name: C{str}
+ @param hypervisor_name: the hypervisor to ask for memory stats
@raise errors.OpPrereqError: if the node doesn't have enough memory, or
we cannot check the node
"""
- nodeinfo = lu.rpc.call_node_info([node], lu.cfg.GetVGName(), hypervisor)
- if not nodeinfo or not isinstance(nodeinfo, dict):
- raise errors.OpPrereqError("Could not contact node %s for resource"
- " information" % (node,))
-
- free_mem = nodeinfo[node].get('memory_free')
+ nodeinfo = lu.rpc.call_node_info([node], lu.cfg.GetVGName(), hypervisor_name)
+ nodeinfo[node].Raise()
+ free_mem = nodeinfo[node].data.get('memory_free')
if not isinstance(free_mem, int):
raise errors.OpPrereqError("Can't compute free memory on node %s, result"
" was '%s'" % (node, free_mem))
def ExpandNames(self):
self._ExpandAndLockInstance()
- self.needed_locks[locking.LEVEL_NODE] = []
- self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
-
- def DeclareLocks(self, level):
- if level == locking.LEVEL_NODE:
- self._LockInstancesNodes()
def BuildHooksEnv(self):
"""Build hooks env.
"FORCE": self.op.force,
}
env.update(_BuildInstanceHookEnvByObject(self, self.instance))
- nl = ([self.cfg.GetMasterNode(), self.instance.primary_node] +
- list(self.instance.secondary_nodes))
+ nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
return env, nl, nl
def CheckPrereq(self):
assert self.instance is not None, \
"Cannot retrieve locked instance %s" % self.op.instance_name
+ # extra beparams
+ self.beparams = getattr(self.op, "beparams", {})
+ if self.beparams:
+ if not isinstance(self.beparams, dict):
+ raise errors.OpPrereqError("Invalid beparams passed: %s, expected"
+ " dict" % (type(self.beparams), ))
+ # fill the beparams dict
+ utils.ForceDictType(self.beparams, constants.BES_PARAMETER_TYPES)
+ self.op.beparams = self.beparams
+
+ # extra hvparams
+ self.hvparams = getattr(self.op, "hvparams", {})
+ if self.hvparams:
+ if not isinstance(self.hvparams, dict):
+ raise errors.OpPrereqError("Invalid hvparams passed: %s, expected"
+ " dict" % (type(self.hvparams), ))
+
+ # check hypervisor parameter syntax (locally)
+ cluster = self.cfg.GetClusterInfo()
+ utils.ForceDictType(self.hvparams, constants.HVS_PARAMETER_TYPES)
+ filled_hvp = objects.FillDict(cluster.hvparams[instance.hypervisor],
+ instance.hvparams)
+ filled_hvp.update(self.hvparams)
+ hv_type = hypervisor.GetHypervisor(instance.hypervisor)
+ hv_type.CheckParameterSyntax(filled_hvp)
+ _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
+ self.op.hvparams = self.hvparams
+
+ _CheckNodeOnline(self, instance.primary_node)
+
bep = self.cfg.GetClusterInfo().FillBE(instance)
# check bridges existance
_CheckInstanceBridgesExist(self, instance)
- _CheckNodeFreeMemory(self, instance.primary_node,
- "starting instance %s" % instance.name,
- bep[constants.BE_MEMORY], instance.hypervisor)
+ remote_info = self.rpc.call_instance_info(instance.primary_node,
+ instance.name,
+ instance.hypervisor)
+ remote_info.Raise()
+ if not remote_info.data:
+ _CheckNodeFreeMemory(self, instance.primary_node,
+ "starting instance %s" % instance.name,
+ bep[constants.BE_MEMORY], instance.hypervisor)
def Exec(self, feedback_fn):
"""Start the instance.
"""
instance = self.instance
force = self.op.force
- extra_args = getattr(self.op, "extra_args", "")
self.cfg.MarkInstanceUp(instance.name)
_StartInstanceDisks(self, instance, force)
- if not self.rpc.call_instance_start(node_current, instance, extra_args):
+ result = self.rpc.call_instance_start(node_current, instance,
+ self.hvparams, self.beparams)
+ msg = result.RemoteFailMsg()
+ if msg:
_ShutdownInstanceDisks(self, instance)
- raise errors.OpExecError("Could not start instance")
+ raise errors.OpExecError("Could not start instance: %s" % msg)
class LURebootInstance(LogicalUnit):
constants.INSTANCE_REBOOT_HARD,
constants.INSTANCE_REBOOT_FULL))
self._ExpandAndLockInstance()
- self.needed_locks[locking.LEVEL_NODE] = []
- self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
-
- def DeclareLocks(self, level):
- if level == locking.LEVEL_NODE:
- primary_only = not constants.INSTANCE_REBOOT_FULL
- self._LockInstancesNodes(primary_only=primary_only)
def BuildHooksEnv(self):
"""Build hooks env.
"""
env = {
"IGNORE_SECONDARIES": self.op.ignore_secondaries,
+ "REBOOT_TYPE": self.op.reboot_type,
}
env.update(_BuildInstanceHookEnvByObject(self, self.instance))
- nl = ([self.cfg.GetMasterNode(), self.instance.primary_node] +
- list(self.instance.secondary_nodes))
+ nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
return env, nl, nl
def CheckPrereq(self):
assert self.instance is not None, \
"Cannot retrieve locked instance %s" % self.op.instance_name
+ _CheckNodeOnline(self, instance.primary_node)
+
# check bridges existance
_CheckInstanceBridgesExist(self, instance)
instance = self.instance
ignore_secondaries = self.op.ignore_secondaries
reboot_type = self.op.reboot_type
- extra_args = getattr(self.op, "extra_args", "")
node_current = instance.primary_node
if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
constants.INSTANCE_REBOOT_HARD]:
- if not self.rpc.call_instance_reboot(node_current, instance,
- reboot_type, extra_args):
- raise errors.OpExecError("Could not reboot instance")
+ for disk in instance.disks:
+ self.cfg.SetDiskID(disk, node_current)
+ result = self.rpc.call_instance_reboot(node_current, instance,
+ reboot_type)
+ msg = result.RemoteFailMsg()
+ if msg:
+ raise errors.OpExecError("Could not reboot instance: %s" % msg)
else:
- if not self.rpc.call_instance_shutdown(node_current, instance):
- raise errors.OpExecError("could not shutdown instance for full reboot")
+ result = self.rpc.call_instance_shutdown(node_current, instance)
+ msg = result.RemoteFailMsg()
+ if msg:
+ raise errors.OpExecError("Could not shutdown instance for"
+ " full reboot: %s" % msg)
_ShutdownInstanceDisks(self, instance)
_StartInstanceDisks(self, instance, ignore_secondaries)
- if not self.rpc.call_instance_start(node_current, instance, extra_args):
+ result = self.rpc.call_instance_start(node_current, instance, None, None)
+ msg = result.RemoteFailMsg()
+ if msg:
_ShutdownInstanceDisks(self, instance)
- raise errors.OpExecError("Could not start instance for full reboot")
+ raise errors.OpExecError("Could not start instance for"
+ " full reboot: %s" % msg)
self.cfg.MarkInstanceUp(instance.name)
def ExpandNames(self):
self._ExpandAndLockInstance()
- self.needed_locks[locking.LEVEL_NODE] = []
- self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
-
- def DeclareLocks(self, level):
- if level == locking.LEVEL_NODE:
- self._LockInstancesNodes()
def BuildHooksEnv(self):
"""Build hooks env.
"""
env = _BuildInstanceHookEnvByObject(self, self.instance)
- nl = ([self.cfg.GetMasterNode(), self.instance.primary_node] +
- list(self.instance.secondary_nodes))
+ nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
return env, nl, nl
def CheckPrereq(self):
self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
assert self.instance is not None, \
"Cannot retrieve locked instance %s" % self.op.instance_name
+ _CheckNodeOnline(self, self.instance.primary_node)
def Exec(self, feedback_fn):
"""Shutdown the instance.
instance = self.instance
node_current = instance.primary_node
self.cfg.MarkInstanceDown(instance.name)
- if not self.rpc.call_instance_shutdown(node_current, instance):
- self.proc.LogWarning("Could not shutdown instance")
+ result = self.rpc.call_instance_shutdown(node_current, instance)
+ msg = result.RemoteFailMsg()
+ if msg:
+ self.proc.LogWarning("Could not shutdown instance: %s" % msg)
_ShutdownInstanceDisks(self, instance)
def ExpandNames(self):
self._ExpandAndLockInstance()
- self.needed_locks[locking.LEVEL_NODE] = []
- self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
-
- def DeclareLocks(self, level):
- if level == locking.LEVEL_NODE:
- self._LockInstancesNodes()
def BuildHooksEnv(self):
"""Build hooks env.
"""
env = _BuildInstanceHookEnvByObject(self, self.instance)
- nl = ([self.cfg.GetMasterNode(), self.instance.primary_node] +
- list(self.instance.secondary_nodes))
+ nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
return env, nl, nl
def CheckPrereq(self):
instance = self.cfg.GetInstanceInfo(self.op.instance_name)
assert instance is not None, \
"Cannot retrieve locked instance %s" % self.op.instance_name
+ _CheckNodeOnline(self, instance.primary_node)
if instance.disk_template == constants.DT_DISKLESS:
raise errors.OpPrereqError("Instance '%s' has no disks" %
self.op.instance_name)
- if instance.status != "down":
+ if instance.admin_up:
raise errors.OpPrereqError("Instance '%s' is marked to be up" %
self.op.instance_name)
remote_info = self.rpc.call_instance_info(instance.primary_node,
instance.name,
instance.hypervisor)
- if remote_info:
+ remote_info.Raise()
+ if remote_info.data:
raise errors.OpPrereqError("Instance '%s' is running on the node %s" %
(self.op.instance_name,
instance.primary_node))
if pnode is None:
raise errors.OpPrereqError("Primary node '%s' is unknown" %
self.op.pnode)
- os_obj = self.rpc.call_os_get(pnode.name, self.op.os_type)
- if not os_obj:
+ result = self.rpc.call_os_get(pnode.name, self.op.os_type)
+ result.Raise()
+ if not isinstance(result.data, objects.OS):
raise errors.OpPrereqError("OS '%s' not in supported OS list for"
" primary node" % self.op.os_type)
_StartInstanceDisks(self, inst, None)
try:
feedback_fn("Running the instance OS create scripts...")
- if not self.rpc.call_instance_os_add(inst.primary_node, inst):
+ result = self.rpc.call_instance_os_add(inst.primary_node, inst, True)
+ msg = result.RemoteFailMsg()
+ if msg:
raise errors.OpExecError("Could not install OS for instance %s"
- " on node %s" %
- (inst.name, inst.primary_node))
+ " on node %s: %s" %
+ (inst.name, inst.primary_node, msg))
finally:
_ShutdownInstanceDisks(self, inst)
"""
env = _BuildInstanceHookEnvByObject(self, self.instance)
env["INSTANCE_NEW_NAME"] = self.op.new_name
- nl = ([self.cfg.GetMasterNode(), self.instance.primary_node] +
- list(self.instance.secondary_nodes))
+ nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
return env, nl, nl
def CheckPrereq(self):
if instance is None:
raise errors.OpPrereqError("Instance '%s' not known" %
self.op.instance_name)
- if instance.status != "down":
+ _CheckNodeOnline(self, instance.primary_node)
+
+ if instance.admin_up:
raise errors.OpPrereqError("Instance '%s' is marked to be up" %
self.op.instance_name)
remote_info = self.rpc.call_instance_info(instance.primary_node,
instance.name,
instance.hypervisor)
- if remote_info:
+ remote_info.Raise()
+ if remote_info.data:
raise errors.OpPrereqError("Instance '%s' is running on the node %s" %
(self.op.instance_name,
instance.primary_node))
result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
old_file_storage_dir,
new_file_storage_dir)
-
- if not result:
+ result.Raise()
+ if not result.data:
raise errors.OpExecError("Could not connect to node '%s' to rename"
" directory '%s' to '%s' (but the instance"
" has been renamed in Ganeti)" % (
inst.primary_node, old_file_storage_dir,
new_file_storage_dir))
- if not result[0]:
+ if not result.data[0]:
raise errors.OpExecError("Could not rename directory '%s' to '%s'"
" (but the instance has been renamed in"
" Ganeti)" % (old_file_storage_dir,
_StartInstanceDisks(self, inst, None)
try:
- if not self.rpc.call_instance_run_rename(inst.primary_node, inst,
- old_name):
+ result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
+ old_name)
+ msg = result.RemoteFailMsg()
+ if msg:
msg = ("Could not run OS rename script for instance %s on node %s"
- " (but the instance has been renamed in Ganeti)" %
- (inst.name, inst.primary_node))
+ " (but the instance has been renamed in Ganeti): %s" %
+ (inst.name, inst.primary_node, msg))
self.proc.LogWarning(msg)
finally:
_ShutdownInstanceDisks(self, inst)
logging.info("Shutting down instance %s on node %s",
instance.name, instance.primary_node)
- if not self.rpc.call_instance_shutdown(instance.primary_node, instance):
+ result = self.rpc.call_instance_shutdown(instance.primary_node, instance)
+ msg = result.RemoteFailMsg()
+ if msg:
if self.op.ignore_failures:
- feedback_fn("Warning: can't shutdown instance")
+ feedback_fn("Warning: can't shutdown instance: %s" % msg)
else:
- raise errors.OpExecError("Could not shutdown instance %s on node %s" %
- (instance.name, instance.primary_node))
+ raise errors.OpExecError("Could not shutdown instance %s on"
+ " node %s: %s" %
+ (instance.name, instance.primary_node, msg))
logging.info("Removing block devices for instance %s", instance.name)
"""Logical unit for querying instances.
"""
- _OP_REQP = ["output_fields", "names"]
+ _OP_REQP = ["output_fields", "names", "use_locking"]
REQ_BGL = False
- _FIELDS_STATIC = _FieldSet(*["name", "os", "pnode", "snodes",
- "admin_state", "admin_ram",
- "disk_template", "ip", "mac", "bridge",
- "sda_size", "sdb_size", "vcpus", "tags",
- "network_port", "beparams",
- "(disk).(size)/([0-9]+)",
- "(disk).(sizes)",
- "(nic).(mac|ip|bridge)/([0-9]+)",
- "(nic).(macs|ips|bridges)",
- "(disk|nic).(count)",
- "serial_no", "hypervisor", "hvparams",] +
- ["hv/%s" % name
- for name in constants.HVS_PARAMETERS] +
- ["be/%s" % name
- for name in constants.BES_PARAMETERS])
- _FIELDS_DYNAMIC = _FieldSet("oper_state", "oper_ram", "status")
+ _FIELDS_STATIC = utils.FieldSet(*["name", "os", "pnode", "snodes",
+ "admin_state",
+ "disk_template", "ip", "mac", "bridge",
+ "sda_size", "sdb_size", "vcpus", "tags",
+ "network_port", "beparams",
+ r"(disk)\.(size)/([0-9]+)",
+ r"(disk)\.(sizes)", "disk_usage",
+ r"(nic)\.(mac|ip|bridge)/([0-9]+)",
+ r"(nic)\.(macs|ips|bridges)",
+ r"(disk|nic)\.(count)",
+ "serial_no", "hypervisor", "hvparams",] +
+ ["hv/%s" % name
+ for name in constants.HVS_PARAMETERS] +
+ ["be/%s" % name
+ for name in constants.BES_PARAMETERS])
+ _FIELDS_DYNAMIC = utils.FieldSet("oper_state", "oper_ram", "status")
def ExpandNames(self):
else:
self.wanted = locking.ALL_SET
- self.do_locking = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
+ self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
+ self.do_locking = self.do_node_query and self.op.use_locking
if self.do_locking:
self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
self.needed_locks[locking.LEVEL_NODE] = []
"""
all_info = self.cfg.GetAllInstancesInfo()
- if self.do_locking:
- instance_names = self.acquired_locks[locking.LEVEL_INSTANCE]
- elif self.wanted != locking.ALL_SET:
- instance_names = self.wanted
- missing = set(instance_names).difference(all_info.keys())
- if missing:
- raise errors.OpExecError(
- "Some instances were removed before retrieving their data: %s"
- % missing)
+ if self.wanted == locking.ALL_SET:
+ # caller didn't specify instance names, so ordering is not important
+ if self.do_locking:
+ instance_names = self.acquired_locks[locking.LEVEL_INSTANCE]
+ else:
+ instance_names = all_info.keys()
+ instance_names = utils.NiceSort(instance_names)
else:
- instance_names = all_info.keys()
+ # caller did specify names, so we must keep the ordering
+ if self.do_locking:
+ tgt_set = self.acquired_locks[locking.LEVEL_INSTANCE]
+ else:
+ tgt_set = all_info.keys()
+ missing = set(self.wanted).difference(tgt_set)
+ if missing:
+ raise errors.OpExecError("Some instances were removed before"
+ " retrieving their data: %s" % missing)
+ instance_names = self.wanted
- instance_names = utils.NiceSort(instance_names)
instance_list = [all_info[iname] for iname in instance_names]
# begin data gathering
hv_list = list(set([inst.hypervisor for inst in instance_list]))
bad_nodes = []
- if self.do_locking:
+ off_nodes = []
+ if self.do_node_query:
live_data = {}
node_data = self.rpc.call_all_instances_info(nodes, hv_list)
for name in nodes:
result = node_data[name]
- if result:
- live_data.update(result)
- elif result == False:
+ if result.offline:
+ # offline nodes will be in both lists
+ off_nodes.append(name)
+ if result.failed:
bad_nodes.append(name)
- # else no instance is alive
+ else:
+ if result.data:
+ live_data.update(result.data)
+ # else no instance is alive
else:
live_data = dict([(name, {}) for name in instance_names])
elif field == "snodes":
val = list(instance.secondary_nodes)
elif field == "admin_state":
- val = (instance.status != "down")
+ val = instance.admin_up
elif field == "oper_state":
if instance.primary_node in bad_nodes:
val = None
else:
val = bool(live_data.get(instance.name))
elif field == "status":
- if instance.primary_node in bad_nodes:
+ if instance.primary_node in off_nodes:
+ val = "ERROR_nodeoffline"
+ elif instance.primary_node in bad_nodes:
val = "ERROR_nodedown"
else:
running = bool(live_data.get(instance.name))
if running:
- if instance.status != "down":
+ if instance.admin_up:
val = "running"
else:
val = "ERROR_up"
else:
- if instance.status != "down":
+ if instance.admin_up:
val = "ERROR_down"
else:
val = "ADMIN_down"
val = instance.FindDisk(idx).size
except errors.OpPrereqError:
val = None
+ elif field == "disk_usage": # total disk usage per node
+ disk_sizes = [{'size': disk.size} for disk in instance.disks]
+ val = _ComputeDiskSize(instance.disk_template, disk_sizes)
elif field == "tags":
val = list(instance.GetTags())
elif field == "serial_no":
assert self.instance is not None, \
"Cannot retrieve locked instance %s" % self.op.instance_name
- bep = self.cfg.GetClusterInfo().FillBE(instance)
- if instance.disk_template not in constants.DTS_NET_MIRROR:
+ bep = self.cfg.GetClusterInfo().FillBE(instance)
+ if instance.disk_template not in constants.DTS_NET_MIRROR:
+ raise errors.OpPrereqError("Instance's disk layout is not"
+ " network mirrored, cannot failover.")
+
+ secondary_nodes = instance.secondary_nodes
+ if not secondary_nodes:
+ raise errors.ProgrammerError("no secondary node but using "
+ "a mirrored disk template")
+
+ target_node = secondary_nodes[0]
+ _CheckNodeOnline(self, target_node)
+ _CheckNodeNotDrained(self, target_node)
+ # check memory requirements on the secondary node
+ _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
+ instance.name, bep[constants.BE_MEMORY],
+ instance.hypervisor)
+ # check bridge existance
+ _CheckInstanceBridgesExist(self, instance, node=target_node)
+
+ def Exec(self, feedback_fn):
+ """Failover an instance.
+
+ The failover is done by shutting it down on its present node and
+ starting it on the secondary.
+
+ """
+ instance = self.instance
+
+ source_node = instance.primary_node
+ target_node = instance.secondary_nodes[0]
+
+ feedback_fn("* checking disk consistency between source and target")
+ for dev in instance.disks:
+ # for drbd, these are drbd over lvm
+ if not _CheckDiskConsistency(self, dev, target_node, False):
+ if instance.admin_up and not self.op.ignore_consistency:
+ raise errors.OpExecError("Disk %s is degraded on target node,"
+ " aborting failover." % dev.iv_name)
+
+ feedback_fn("* shutting down instance on source node")
+ logging.info("Shutting down instance %s on node %s",
+ instance.name, source_node)
+
+ result = self.rpc.call_instance_shutdown(source_node, instance)
+ msg = result.RemoteFailMsg()
+ if msg:
+ if self.op.ignore_consistency:
+ self.proc.LogWarning("Could not shutdown instance %s on node %s."
+ " Proceeding anyway. Please make sure node"
+ " %s is down. Error details: %s",
+ instance.name, source_node, source_node, msg)
+ else:
+ raise errors.OpExecError("Could not shutdown instance %s on"
+ " node %s: %s" %
+ (instance.name, source_node, msg))
+
+ feedback_fn("* deactivating the instance's disks on source node")
+ if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
+ raise errors.OpExecError("Can't shut down the instance's disks.")
+
+ instance.primary_node = target_node
+ # distribute new instance config to the other nodes
+ self.cfg.Update(instance)
+
+ # Only start the instance if it's marked as up
+ if instance.admin_up:
+ feedback_fn("* activating the instance's disks on target node")
+ logging.info("Starting instance %s on node %s",
+ instance.name, target_node)
+
+ disks_ok, dummy = _AssembleInstanceDisks(self, instance,
+ ignore_secondaries=True)
+ if not disks_ok:
+ _ShutdownInstanceDisks(self, instance)
+ raise errors.OpExecError("Can't activate the instance's disks")
+
+ feedback_fn("* starting the instance on the target node")
+ result = self.rpc.call_instance_start(target_node, instance, None, None)
+ msg = result.RemoteFailMsg()
+ if msg:
+ _ShutdownInstanceDisks(self, instance)
+ raise errors.OpExecError("Could not start instance %s on node %s: %s" %
+ (instance.name, target_node, msg))
+
+
+class LUMigrateInstance(LogicalUnit):
+ """Migrate an instance.
+
+ This is migration without shutting down, compared to the failover,
+ which is done with shutdown.
+
+ """
+ HPATH = "instance-migrate"
+ HTYPE = constants.HTYPE_INSTANCE
+ _OP_REQP = ["instance_name", "live", "cleanup"]
+
+ REQ_BGL = False
+
+ def ExpandNames(self):
+ self._ExpandAndLockInstance()
+ self.needed_locks[locking.LEVEL_NODE] = []
+ self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
+
+ def DeclareLocks(self, level):
+ if level == locking.LEVEL_NODE:
+ self._LockInstancesNodes()
+
+ def BuildHooksEnv(self):
+ """Build hooks env.
+
+ This runs on master, primary and secondary nodes of the instance.
+
+ """
+ env = _BuildInstanceHookEnvByObject(self, self.instance)
+ env["MIGRATE_LIVE"] = self.op.live
+ env["MIGRATE_CLEANUP"] = self.op.cleanup
+ nl = [self.cfg.GetMasterNode()] + list(self.instance.secondary_nodes)
+ return env, nl, nl
+
+ def CheckPrereq(self):
+ """Check prerequisites.
+
+ This checks that the instance is in the cluster.
+
+ """
+ instance = self.cfg.GetInstanceInfo(
+ self.cfg.ExpandInstanceName(self.op.instance_name))
+ if instance is None:
+ raise errors.OpPrereqError("Instance '%s' not known" %
+ self.op.instance_name)
+
+ if instance.disk_template != constants.DT_DRBD8:
raise errors.OpPrereqError("Instance's disk layout is not"
- " network mirrored, cannot failover.")
+ " drbd8, cannot migrate.")
secondary_nodes = instance.secondary_nodes
if not secondary_nodes:
- raise errors.ProgrammerError("no secondary node but using "
- "a mirrored disk template")
+ raise errors.ConfigurationError("No secondary node but using"
+ " drbd8 disk template")
+
+ i_be = self.cfg.GetClusterInfo().FillBE(instance)
target_node = secondary_nodes[0]
# check memory requirements on the secondary node
- _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
- instance.name, bep[constants.BE_MEMORY],
+ _CheckNodeFreeMemory(self, target_node, "migrating instance %s" %
+ instance.name, i_be[constants.BE_MEMORY],
instance.hypervisor)
# check bridge existance
- brlist = [nic.bridge for nic in instance.nics]
- if not self.rpc.call_bridges_exist(target_node, brlist):
- raise errors.OpPrereqError("One or more target bridges %s does not"
- " exist on destination node '%s'" %
- (brlist, target_node))
+ _CheckInstanceBridgesExist(self, instance, node=target_node)
- def Exec(self, feedback_fn):
- """Failover an instance.
+ if not self.op.cleanup:
+ _CheckNodeNotDrained(self, target_node)
+ result = self.rpc.call_instance_migratable(instance.primary_node,
+ instance)
+ msg = result.RemoteFailMsg()
+ if msg:
+ raise errors.OpPrereqError("Can't migrate: %s - please use failover" %
+ msg)
- The failover is done by shutting it down on its present node and
- starting it on the secondary.
+ self.instance = instance
+
+ def _WaitUntilSync(self):
+ """Poll with custom rpc for disk sync.
+
+ This uses our own step-based rpc call.
"""
- instance = self.instance
+ self.feedback_fn("* wait until resync is done")
+ all_done = False
+ while not all_done:
+ all_done = True
+ result = self.rpc.call_drbd_wait_sync(self.all_nodes,
+ self.nodes_ip,
+ self.instance.disks)
+ min_percent = 100
+ for node, nres in result.items():
+ msg = nres.RemoteFailMsg()
+ if msg:
+ raise errors.OpExecError("Cannot resync disks on node %s: %s" %
+ (node, msg))
+ node_done, node_percent = nres.payload
+ all_done = all_done and node_done
+ if node_percent is not None:
+ min_percent = min(min_percent, node_percent)
+ if not all_done:
+ if min_percent < 100:
+ self.feedback_fn(" - progress: %.1f%%" % min_percent)
+ time.sleep(2)
+
+ def _EnsureSecondary(self, node):
+ """Demote a node to secondary.
- source_node = instance.primary_node
- target_node = instance.secondary_nodes[0]
+ """
+ self.feedback_fn("* switching node %s to secondary mode" % node)
- feedback_fn("* checking disk consistency between source and target")
- for dev in instance.disks:
- # for drbd, these are drbd over lvm
- if not _CheckDiskConsistency(self, dev, target_node, False):
- if instance.status == "up" and not self.op.ignore_consistency:
- raise errors.OpExecError("Disk %s is degraded on target node,"
- " aborting failover." % dev.iv_name)
+ for dev in self.instance.disks:
+ self.cfg.SetDiskID(dev, node)
- feedback_fn("* shutting down instance on source node")
- logging.info("Shutting down instance %s on node %s",
- instance.name, source_node)
+ result = self.rpc.call_blockdev_close(node, self.instance.name,
+ self.instance.disks)
+ msg = result.RemoteFailMsg()
+ if msg:
+ raise errors.OpExecError("Cannot change disk to secondary on node %s,"
+ " error %s" % (node, msg))
- if not self.rpc.call_instance_shutdown(source_node, instance):
- if self.op.ignore_consistency:
- self.proc.LogWarning("Could not shutdown instance %s on node %s."
- " Proceeding"
- " anyway. Please make sure node %s is down",
- instance.name, source_node, source_node)
- else:
- raise errors.OpExecError("Could not shutdown instance %s on node %s" %
- (instance.name, source_node))
+ def _GoStandalone(self):
+ """Disconnect from the network.
- feedback_fn("* deactivating the instance's disks on source node")
- if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
- raise errors.OpExecError("Can't shut down the instance's disks.")
+ """
+ self.feedback_fn("* changing into standalone mode")
+ result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
+ self.instance.disks)
+ for node, nres in result.items():
+ msg = nres.RemoteFailMsg()
+ if msg:
+ raise errors.OpExecError("Cannot disconnect disks node %s,"
+ " error %s" % (node, msg))
+
+ def _GoReconnect(self, multimaster):
+ """Reconnect to the network.
- instance.primary_node = target_node
- # distribute new instance config to the other nodes
- self.cfg.Update(instance)
+ """
+ if multimaster:
+ msg = "dual-master"
+ else:
+ msg = "single-master"
+ self.feedback_fn("* changing disks into %s mode" % msg)
+ result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
+ self.instance.disks,
+ self.instance.name, multimaster)
+ for node, nres in result.items():
+ msg = nres.RemoteFailMsg()
+ if msg:
+ raise errors.OpExecError("Cannot change disks config on node %s,"
+ " error: %s" % (node, msg))
+
+ def _ExecCleanup(self):
+ """Try to cleanup after a failed migration.
+
+ The cleanup is done by:
+ - check that the instance is running only on one node
+ (and update the config if needed)
+ - change disks on its secondary node to secondary
+ - wait until disks are fully synchronized
+ - disconnect from the network
+ - change disks into single-master mode
+ - wait again until disks are fully synchronized
- # Only start the instance if it's marked as up
- if instance.status == "up":
- feedback_fn("* activating the instance's disks on target node")
- logging.info("Starting instance %s on node %s",
- instance.name, target_node)
+ """
+ instance = self.instance
+ target_node = self.target_node
+ source_node = self.source_node
+
+ # check running on only one node
+ self.feedback_fn("* checking where the instance actually runs"
+ " (if this hangs, the hypervisor might be in"
+ " a bad state)")
+ ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
+ for node, result in ins_l.items():
+ result.Raise()
+ if not isinstance(result.data, list):
+ raise errors.OpExecError("Can't contact node '%s'" % node)
+
+ runningon_source = instance.name in ins_l[source_node].data
+ runningon_target = instance.name in ins_l[target_node].data
+
+ if runningon_source and runningon_target:
+ raise errors.OpExecError("Instance seems to be running on two nodes,"
+ " or the hypervisor is confused. You will have"
+ " to ensure manually that it runs only on one"
+ " and restart this operation.")
+
+ if not (runningon_source or runningon_target):
+ raise errors.OpExecError("Instance does not seem to be running at all."
+ " In this case, it's safer to repair by"
+ " running 'gnt-instance stop' to ensure disk"
+ " shutdown, and then restarting it.")
+
+ if runningon_target:
+ # the migration has actually succeeded, we need to update the config
+ self.feedback_fn("* instance running on secondary node (%s),"
+ " updating config" % target_node)
+ instance.primary_node = target_node
+ self.cfg.Update(instance)
+ demoted_node = source_node
+ else:
+ self.feedback_fn("* instance confirmed to be running on its"
+ " primary node (%s)" % source_node)
+ demoted_node = target_node
- disks_ok, dummy = _AssembleInstanceDisks(self, instance,
- ignore_secondaries=True)
- if not disks_ok:
- _ShutdownInstanceDisks(self, instance)
- raise errors.OpExecError("Can't activate the instance's disks")
+ self._EnsureSecondary(demoted_node)
+ try:
+ self._WaitUntilSync()
+ except errors.OpExecError:
+ # we ignore here errors, since if the device is standalone, it
+ # won't be able to sync
+ pass
+ self._GoStandalone()
+ self._GoReconnect(False)
+ self._WaitUntilSync()
- feedback_fn("* starting the instance on the target node")
- if not self.rpc.call_instance_start(target_node, instance, None):
- _ShutdownInstanceDisks(self, instance)
- raise errors.OpExecError("Could not start instance %s on node %s." %
- (instance.name, target_node))
+ self.feedback_fn("* done")
+ def _RevertDiskStatus(self):
+ """Try to revert the disk status after a failed migration.
-def _CreateBlockDevOnPrimary(lu, node, instance, device, info):
- """Create a tree of block devices on the primary node.
+ """
+ target_node = self.target_node
+ try:
+ self._EnsureSecondary(target_node)
+ self._GoStandalone()
+ self._GoReconnect(False)
+ self._WaitUntilSync()
+ except errors.OpExecError, err:
+ self.LogWarning("Migration failed and I can't reconnect the"
+ " drives: error '%s'\n"
+ "Please look and recover the instance status" %
+ str(err))
+
+ def _AbortMigration(self):
+ """Call the hypervisor code to abort a started migration.
- This always creates all devices.
+ """
+ instance = self.instance
+ target_node = self.target_node
+ migration_info = self.migration_info
+
+ abort_result = self.rpc.call_finalize_migration(target_node,
+ instance,
+ migration_info,
+ False)
+ abort_msg = abort_result.RemoteFailMsg()
+ if abort_msg:
+ logging.error("Aborting migration failed on target node %s: %s" %
+ (target_node, abort_msg))
+ # Don't raise an exception here, as we stil have to try to revert the
+ # disk status, even if this step failed.
+
+ def _ExecMigration(self):
+ """Migrate an instance.
+
+ The migrate is done by:
+ - change the disks into dual-master mode
+ - wait until disks are fully synchronized again
+ - migrate the instance
+ - change disks on the new secondary node (the old primary) to secondary
+ - wait until disks are fully synchronized
+ - change disks into single-master mode
- """
- if device.children:
- for child in device.children:
- if not _CreateBlockDevOnPrimary(lu, node, instance, child, info):
- return False
+ """
+ instance = self.instance
+ target_node = self.target_node
+ source_node = self.source_node
- lu.cfg.SetDiskID(device, node)
- new_id = lu.rpc.call_blockdev_create(node, device, device.size,
- instance.name, True, info)
- if not new_id:
- return False
- if device.physical_id is None:
- device.physical_id = new_id
- return True
+ self.feedback_fn("* checking disk consistency between source and target")
+ for dev in instance.disks:
+ if not _CheckDiskConsistency(self, dev, target_node, False):
+ raise errors.OpExecError("Disk %s is degraded or not fully"
+ " synchronized on target node,"
+ " aborting migrate." % dev.iv_name)
+
+ # First get the migration information from the remote node
+ result = self.rpc.call_migration_info(source_node, instance)
+ msg = result.RemoteFailMsg()
+ if msg:
+ log_err = ("Failed fetching source migration information from %s: %s" %
+ (source_node, msg))
+ logging.error(log_err)
+ raise errors.OpExecError(log_err)
+
+ self.migration_info = migration_info = result.payload
+
+ # Then switch the disks to master/master mode
+ self._EnsureSecondary(target_node)
+ self._GoStandalone()
+ self._GoReconnect(True)
+ self._WaitUntilSync()
+
+ self.feedback_fn("* preparing %s to accept the instance" % target_node)
+ result = self.rpc.call_accept_instance(target_node,
+ instance,
+ migration_info,
+ self.nodes_ip[target_node])
+
+ msg = result.RemoteFailMsg()
+ if msg:
+ logging.error("Instance pre-migration failed, trying to revert"
+ " disk status: %s", msg)
+ self._AbortMigration()
+ self._RevertDiskStatus()
+ raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
+ (instance.name, msg))
+
+ self.feedback_fn("* migrating instance to %s" % target_node)
+ time.sleep(10)
+ result = self.rpc.call_instance_migrate(source_node, instance,
+ self.nodes_ip[target_node],
+ self.op.live)
+ msg = result.RemoteFailMsg()
+ if msg:
+ logging.error("Instance migration failed, trying to revert"
+ " disk status: %s", msg)
+ self._AbortMigration()
+ self._RevertDiskStatus()
+ raise errors.OpExecError("Could not migrate instance %s: %s" %
+ (instance.name, msg))
+ time.sleep(10)
+
+ instance.primary_node = target_node
+ # distribute new instance config to the other nodes
+ self.cfg.Update(instance)
+
+ result = self.rpc.call_finalize_migration(target_node,
+ instance,
+ migration_info,
+ True)
+ msg = result.RemoteFailMsg()
+ if msg:
+ logging.error("Instance migration succeeded, but finalization failed:"
+ " %s" % msg)
+ raise errors.OpExecError("Could not finalize instance migration: %s" %
+ msg)
+
+ self._EnsureSecondary(source_node)
+ self._WaitUntilSync()
+ self._GoStandalone()
+ self._GoReconnect(False)
+ self._WaitUntilSync()
+
+ self.feedback_fn("* done")
+
+ def Exec(self, feedback_fn):
+ """Perform the migration.
+
+ """
+ self.feedback_fn = feedback_fn
+
+ self.source_node = self.instance.primary_node
+ self.target_node = self.instance.secondary_nodes[0]
+ self.all_nodes = [self.source_node, self.target_node]
+ self.nodes_ip = {
+ self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
+ self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
+ }
+ if self.op.cleanup:
+ return self._ExecCleanup()
+ else:
+ return self._ExecMigration()
-def _CreateBlockDevOnSecondary(lu, node, instance, device, force, info):
- """Create a tree of block devices on a secondary node.
+def _CreateBlockDev(lu, node, instance, device, force_create,
+ info, force_open):
+ """Create a tree of block devices on a given node.
If this device type has to be created on secondaries, create it and
all its children.
If not, just recurse to children keeping the same 'force' value.
+ @param lu: the lu on whose behalf we execute
+ @param node: the node on which to create the device
+ @type instance: L{objects.Instance}
+ @param instance: the instance which owns the device
+ @type device: L{objects.Disk}
+ @param device: the device to create
+ @type force_create: boolean
+ @param force_create: whether to force creation of this device; this
+ will be change to True whenever we find a device which has
+ CreateOnSecondary() attribute
+ @param info: the extra 'metadata' we should attach to the device
+ (this will be represented as a LVM tag)
+ @type force_open: boolean
+ @param force_open: this parameter will be passes to the
+ L{backend.BlockdevCreate} function where it specifies
+ whether we run on primary or not, and it affects both
+ the child assembly and the device own Open() execution
+
"""
if device.CreateOnSecondary():
- force = True
+ force_create = True
+
if device.children:
for child in device.children:
- if not _CreateBlockDevOnSecondary(lu, node, instance,
- child, force, info):
- return False
+ _CreateBlockDev(lu, node, instance, child, force_create,
+ info, force_open)
- if not force:
- return True
+ if not force_create:
+ return
+
+ _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
+
+
+def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
+ """Create a single block device on a given node.
+
+ This will not recurse over children of the device, so they must be
+ created in advance.
+
+ @param lu: the lu on whose behalf we execute
+ @param node: the node on which to create the device
+ @type instance: L{objects.Instance}
+ @param instance: the instance which owns the device
+ @type device: L{objects.Disk}
+ @param device: the device to create
+ @param info: the extra 'metadata' we should attach to the device
+ (this will be represented as a LVM tag)
+ @type force_open: boolean
+ @param force_open: this parameter will be passes to the
+ L{backend.BlockdevCreate} function where it specifies
+ whether we run on primary or not, and it affects both
+ the child assembly and the device own Open() execution
+
+ """
lu.cfg.SetDiskID(device, node)
- new_id = lu.rpc.call_blockdev_create(node, device, device.size,
- instance.name, False, info)
- if not new_id:
- return False
+ result = lu.rpc.call_blockdev_create(node, device, device.size,
+ instance.name, force_open, info)
+ msg = result.RemoteFailMsg()
+ if msg:
+ raise errors.OpExecError("Can't create block device %s on"
+ " node %s for instance %s: %s" %
+ (device, node, instance.name, msg))
if device.physical_id is None:
- device.physical_id = new_id
- return True
+ device.physical_id = result.payload
def _GenerateUniqueNames(lu, exts):
def _GenerateDiskTemplate(lu, template_name,
instance_name, primary_node,
secondary_nodes, disk_info,
- file_storage_dir, file_driver):
+ file_storage_dir, file_driver,
+ base_index):
"""Generate the entire disk layout for a given template type.
"""
names = _GenerateUniqueNames(lu, [".disk%d" % i
for i in range(disk_count)])
for idx, disk in enumerate(disk_info):
+ disk_index = idx + base_index
disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
logical_id=(vgname, names[idx]),
- iv_name = "disk/%d" % idx)
+ iv_name="disk/%d" % disk_index,
+ mode=disk["mode"])
disks.append(disk_dev)
elif template_name == constants.DT_DRBD8:
if len(secondary_nodes) != 1:
minors = lu.cfg.AllocateDRBDMinor(
[primary_node, remote_node] * len(disk_info), instance_name)
- names = _GenerateUniqueNames(lu,
- [".disk%d_%s" % (i, s)
- for i in range(disk_count)
- for s in ("data", "meta")
- ])
+ names = []
+ for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % i
+ for i in range(disk_count)]):
+ names.append(lv_prefix + "_data")
+ names.append(lv_prefix + "_meta")
for idx, disk in enumerate(disk_info):
+ disk_index = idx + base_index
disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
disk["size"], names[idx*2:idx*2+2],
- "disk/%d" % idx,
+ "disk/%d" % disk_index,
minors[idx*2], minors[idx*2+1])
+ disk_dev.mode = disk["mode"]
disks.append(disk_dev)
elif template_name == constants.DT_FILE:
if len(secondary_nodes) != 0:
raise errors.ProgrammerError("Wrong template configuration")
for idx, disk in enumerate(disk_info):
-
+ disk_index = idx + base_index
disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
- iv_name="disk/%d" % idx,
+ iv_name="disk/%d" % disk_index,
logical_id=(file_driver,
"%s/disk%d" % (file_storage_dir,
- idx)))
+ disk_index)),
+ mode=disk["mode"])
disks.append(disk_dev)
else:
raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
This abstracts away some work from AddInstance.
- Args:
- instance: the instance object
-
- Returns:
- True or False showing the success of the creation process
+ @type lu: L{LogicalUnit}
+ @param lu: the logical unit on whose behalf we execute
+ @type instance: L{objects.Instance}
+ @param instance: the instance whose disks we should create
+ @rtype: boolean
+ @return: the success of the creation
"""
info = _GetInstanceInfoText(instance)
+ pnode = instance.primary_node
if instance.disk_template == constants.DT_FILE:
file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
- result = lu.rpc.call_file_storage_dir_create(instance.primary_node,
- file_storage_dir)
+ result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
- if not result:
- logging.error("Could not connect to node '%s'", instance.primary_node)
- return False
+ if result.failed or not result.data:
+ raise errors.OpExecError("Could not connect to node '%s'" % pnode)
- if not result[0]:
- logging.error("Failed to create directory '%s'", file_storage_dir)
- return False
+ if not result.data[0]:
+ raise errors.OpExecError("Failed to create directory '%s'" %
+ file_storage_dir)
+ # Note: this needs to be kept in sync with adding of disks in
+ # LUSetInstanceParams
for device in instance.disks:
logging.info("Creating volume %s for instance %s",
device.iv_name, instance.name)
#HARDCODE
- for secondary_node in instance.secondary_nodes:
- if not _CreateBlockDevOnSecondary(lu, secondary_node, instance,
- device, False, info):
- logging.error("Failed to create volume %s (%s) on secondary node %s!",
- device.iv_name, device, secondary_node)
- return False
- #HARDCODE
- if not _CreateBlockDevOnPrimary(lu, instance.primary_node,
- instance, device, info):
- logging.error("Failed to create volume %s on primary!", device.iv_name)
- return False
-
- return True
+ for node in instance.all_nodes:
+ f_create = node == pnode
+ _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
def _RemoveDisks(lu, instance):
be removed, the removal will continue with the other ones (compare
with `_CreateDisks()`).
- Args:
- instance: the instance object
-
- Returns:
- True or False showing the success of the removal proces
+ @type lu: L{LogicalUnit}
+ @param lu: the logical unit on whose behalf we execute
+ @type instance: L{objects.Instance}
+ @param instance: the instance whose disks we should remove
+ @rtype: boolean
+ @return: the success of the removal
"""
logging.info("Removing block devices for instance %s", instance.name)
- result = True
+ all_result = True
for device in instance.disks:
for node, disk in device.ComputeNodeTree(instance.primary_node):
lu.cfg.SetDiskID(disk, node)
- if not lu.rpc.call_blockdev_remove(node, disk):
- lu.proc.LogWarning("Could not remove block device %s on node %s,"
- " continuing anyway", device.iv_name, node)
- result = False
+ msg = lu.rpc.call_blockdev_remove(node, disk).RemoteFailMsg()
+ if msg:
+ lu.LogWarning("Could not remove block device %s on node %s,"
+ " continuing anyway: %s", device.iv_name, node, msg)
+ all_result = False
if instance.disk_template == constants.DT_FILE:
file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
- if not lu.rpc.call_file_storage_dir_remove(instance.primary_node,
- file_storage_dir):
+ result = lu.rpc.call_file_storage_dir_remove(instance.primary_node,
+ file_storage_dir)
+ if result.failed or not result.data:
logging.error("Could not remove directory '%s'", file_storage_dir)
- result = False
+ all_result = False
- return result
+ return all_result
def _ComputeDiskSize(disk_template, disks):
"""Compute disk size requirements in the volume group
- This is currently hard-coded for the two-drive layout.
-
"""
# Required free disk space as a function of disk and swap space
req_size_dict = {
hvname,
hvparams)
for node in nodenames:
- info = hvinfo.get(node, None)
- if not info or not isinstance(info, (tuple, list)):
- raise errors.OpPrereqError("Cannot get current information"
- " from node '%s' (%s)" % (node, info))
- if not info[0]:
- raise errors.OpPrereqError("Hypervisor parameter validation failed:"
- " %s" % info[1])
+ info = hvinfo[node]
+ if info.offline:
+ continue
+ msg = info.RemoteFailMsg()
+ if msg:
+ raise errors.OpPrereqError("Hypervisor parameter validation"
+ " failed on node %s: %s" % (node, msg))
class LUCreateInstance(LogicalUnit):
",".join(enabled_hvs)))
# check hypervisor parameter syntax (locally)
-
- filled_hvp = cluster.FillDict(cluster.hvparams[self.op.hypervisor],
+ utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
+ filled_hvp = objects.FillDict(cluster.hvparams[self.op.hypervisor],
self.op.hvparams)
hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
hv_type.CheckParameterSyntax(filled_hvp)
# fill and remember the beparams dict
- self.be_full = cluster.FillDict(cluster.beparams[constants.BEGR_DEFAULT],
+ utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
+ self.be_full = objects.FillDict(cluster.beparams[constants.PP_DEFAULT],
self.op.beparams)
#### instance parameters check
# NIC buildup
self.nics = []
- for nic in self.op.nics:
+ for idx, nic in enumerate(self.op.nics):
+ nic_mode_req = nic.get("mode", None)
+ nic_mode = nic_mode_req
+ if nic_mode is None:
+ nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
+
+ # in routed mode, for the first nic, the default ip is 'auto'
+ if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
+ default_ip_mode = constants.VALUE_AUTO
+ else:
+ default_ip_mode = constants.VALUE_NONE
+
# ip validity checks
- ip = nic.get("ip", None)
- if ip is None or ip.lower() == "none":
+ ip = nic.get("ip", default_ip_mode)
+ if ip is None or ip.lower() == constants.VALUE_NONE:
nic_ip = None
elif ip.lower() == constants.VALUE_AUTO:
nic_ip = hostname1.ip
" like a valid IP" % ip)
nic_ip = ip
+ # TODO: check the ip for uniqueness !!
+ if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
+ raise errors.OpPrereqError("Routed nic mode requires an ip address")
+
# MAC address verification
mac = nic.get("mac", constants.VALUE_AUTO)
if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
raise errors.OpPrereqError("Invalid MAC address specified: %s" %
mac)
# bridge verification
- bridge = nic.get("bridge", self.cfg.GetDefBridge())
- self.nics.append(objects.NIC(mac=mac, ip=nic_ip, bridge=bridge))
+ bridge = nic.get("bridge", None)
+ link = nic.get("link", None)
+ if bridge and link:
+ raise errors.OpPrereqError("Cannot pass 'bridge' and 'link' at the same time")
+ elif bridge and nic_mode == constants.NIC_MODE_ROUTED:
+ raise errors.OpPrereqError("Cannot pass 'bridge' on a routed nic")
+ elif bridge:
+ link = bridge
+
+ nicparams = {}
+ if nic_mode_req:
+ nicparams[constants.NIC_MODE] = nic_mode_req
+ if link:
+ nicparams[constants.NIC_LINK] = link
+
+ check_params = objects.FillDict(cluster.nicparams[constants.PP_DEFAULT],
+ nicparams)
+ objects.NIC.CheckParameterSyntax(check_params)
+ self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
# disk checks/pre-build
self.disks = []
src_node = getattr(self.op, "src_node", None)
src_path = getattr(self.op, "src_path", None)
- if src_node is None or src_path is None:
- raise errors.OpPrereqError("Importing an instance requires source"
- " node and path options")
+ if src_path is None:
+ self.op.src_path = src_path = self.op.instance_name
- if not os.path.isabs(src_path):
- raise errors.OpPrereqError("The source path must be absolute")
-
- self.op.src_node = src_node = self._ExpandNode(src_node)
- if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
- self.needed_locks[locking.LEVEL_NODE].append(src_node)
+ if src_node is None:
+ self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
+ self.op.src_node = None
+ if os.path.isabs(src_path):
+ raise errors.OpPrereqError("Importing an instance from an absolute"
+ " path requires a source node option.")
+ else:
+ self.op.src_node = src_node = self._ExpandNode(src_node)
+ if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
+ self.needed_locks[locking.LEVEL_NODE].append(src_node)
+ if not os.path.isabs(src_path):
+ self.op.src_path = src_path = \
+ os.path.join(constants.EXPORT_DIR, src_path)
else: # INSTANCE_CREATE
if getattr(self.op, "os_type", None) is None:
mem_size=self.be_full[constants.BE_MEMORY],
disks=self.disks,
nics=nics,
+ hypervisor=self.op.hypervisor,
)
ial.Run(self.op.iallocator)
"""
env = {
- "INSTANCE_DISK_TEMPLATE": self.op.disk_template,
- "INSTANCE_DISK_SIZE": ",".join(str(d["size"]) for d in self.disks),
- "INSTANCE_ADD_MODE": self.op.mode,
+ "ADD_MODE": self.op.mode,
}
if self.op.mode == constants.INSTANCE_IMPORT:
- env["INSTANCE_SRC_NODE"] = self.op.src_node
- env["INSTANCE_SRC_PATH"] = self.op.src_path
- env["INSTANCE_SRC_IMAGES"] = self.src_images
+ env["SRC_NODE"] = self.op.src_node
+ env["SRC_PATH"] = self.op.src_path
+ env["SRC_IMAGES"] = self.src_images
- env.update(_BuildInstanceHookEnv(name=self.op.instance_name,
+ env.update(_BuildInstanceHookEnv(
+ name=self.op.instance_name,
primary_node=self.op.pnode,
secondary_nodes=self.secondaries,
- status=self.instance_status,
+ status=self.op.start,
os_type=self.op.os_type,
memory=self.be_full[constants.BE_MEMORY],
vcpus=self.be_full[constants.BE_VCPUS],
- nics=[(n.ip, n.bridge, n.mac) for n in self.nics],
+ nics=_PreBuildNICHooksList(self, self.nics),
+ disk_template=self.op.disk_template,
+ disks=[(d["size"], d["mode"]) for d in self.disks],
))
nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
raise errors.OpPrereqError("Cluster does not support lvm-based"
" instances")
-
if self.op.mode == constants.INSTANCE_IMPORT:
src_node = self.op.src_node
src_path = self.op.src_path
- export_info = self.rpc.call_export_info(src_node, src_path)
-
- if not export_info:
+ if src_node is None:
+ exp_list = self.rpc.call_export_list(
+ self.acquired_locks[locking.LEVEL_NODE])
+ found = False
+ for node in exp_list:
+ if not exp_list[node].failed and src_path in exp_list[node].data:
+ found = True
+ self.op.src_node = src_node = node
+ self.op.src_path = src_path = os.path.join(constants.EXPORT_DIR,
+ src_path)
+ break
+ if not found:
+ raise errors.OpPrereqError("No export found for relative path %s" %
+ src_path)
+
+ _CheckNodeOnline(self, src_node)
+ result = self.rpc.call_export_info(src_node, src_path)
+ result.Raise()
+ if not result.data:
raise errors.OpPrereqError("No export found in dir %s" % src_path)
+ export_info = result.data
if not export_info.has_section(constants.INISECT_EXP):
raise errors.ProgrammerError("Corrupted export config")
if instance_disks < export_disks:
raise errors.OpPrereqError("Not enough disks to import."
" (instance: %d, export: %d)" %
- (2, export_disks))
+ (instance_disks, export_disks))
self.op.os_type = export_info.get(constants.INISECT_EXP, 'os')
disk_images = []
self.src_images = disk_images
- if self.op.mac == constants.VALUE_AUTO:
- old_name = export_info.get(constants.INISECT_INS, 'name')
- if self.op.instance_name == old_name:
- # FIXME: adjust every nic, when we'll be able to create instances
- # with more than one
- if int(export_info.get(constants.INISECT_INS, 'nic_count')) >= 1:
- self.op.mac = export_info.get(constants.INISECT_INS, 'nic_0_mac')
+ old_name = export_info.get(constants.INISECT_INS, 'name')
+ # FIXME: int() here could throw a ValueError on broken exports
+ exp_nic_count = int(export_info.get(constants.INISECT_INS, 'nic_count'))
+ if self.op.instance_name == old_name:
+ for idx, nic in enumerate(self.nics):
+ if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
+ nic_mac_ini = 'nic%d_mac' % idx
+ nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
+ # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
# ip ping checks (we use the same ip that was resolved in ExpandNames)
-
if self.op.start and not self.op.ip_check:
raise errors.OpPrereqError("Cannot ignore IP address conflicts when"
" adding an instance in start mode")
raise errors.OpPrereqError("IP %s of instance %s already in use" %
(self.check_ip, self.op.instance_name))
+ #### mac address generation
+ # By generating here the mac address both the allocator and the hooks get
+ # the real final mac address rather than the 'auto' or 'generate' value.
+ # There is a race condition between the generation and the instance object
+ # creation, which means that we know the mac is valid now, but we're not
+ # sure it will be when we actually add the instance. If things go bad
+ # adding the instance will abort because of a duplicate mac, and the
+ # creation job will fail.
+ for nic in self.nics:
+ if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
+ nic.mac = self.cfg.GenerateMAC()
+
#### allocator run
if self.op.iallocator is not None:
self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
assert self.pnode is not None, \
"Cannot retrieve locked node %s" % self.op.pnode
+ if pnode.offline:
+ raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
+ pnode.name)
+ if pnode.drained:
+ raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
+ pnode.name)
+
self.secondaries = []
# mirror node verification
if self.op.snode == pnode.name:
raise errors.OpPrereqError("The secondary node cannot be"
" the primary node.")
+ _CheckNodeOnline(self, self.op.snode)
+ _CheckNodeNotDrained(self, self.op.snode)
self.secondaries.append(self.op.snode)
nodenames = [pnode.name] + self.secondaries
nodeinfo = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
self.op.hypervisor)
for node in nodenames:
- info = nodeinfo.get(node, None)
+ info = nodeinfo[node]
+ info.Raise()
+ info = info.data
if not info:
raise errors.OpPrereqError("Cannot get current information"
" from node '%s'" % node)
_CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
# os verification
- os_obj = self.rpc.call_os_get(pnode.name, self.op.os_type)
- if not os_obj:
+ result = self.rpc.call_os_get(pnode.name, self.op.os_type)
+ result.Raise()
+ if not isinstance(result.data, objects.OS):
raise errors.OpPrereqError("OS '%s' not in supported os list for"
" primary node" % self.op.os_type)
- # bridge check on primary node
- bridges = [n.bridge for n in self.nics]
- if not self.rpc.call_bridges_exist(self.pnode.name, bridges):
- raise errors.OpPrereqError("one of the target bridges '%s' does not"
- " exist on"
- " destination node '%s'" %
- (",".join(bridges), pnode.name))
+ _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
# memory check on primary node
if self.op.start:
self.be_full[constants.BE_MEMORY],
self.op.hypervisor)
- if self.op.start:
- self.instance_status = 'up'
- else:
- self.instance_status = 'down'
-
def Exec(self, feedback_fn):
"""Create and add the instance to the cluster.
instance = self.op.instance_name
pnode_name = self.pnode.name
- for nic in self.nics:
- if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
- nic.mac = self.cfg.GenerateMAC()
-
ht_kind = self.op.hypervisor
if ht_kind in constants.HTS_REQ_PORT:
network_port = self.cfg.AllocatePort()
self.secondaries,
self.disks,
file_storage_dir,
- self.op.file_driver)
+ self.op.file_driver,
+ 0)
iobj = objects.Instance(name=instance, os=self.op.os_type,
primary_node=pnode_name,
nics=self.nics, disks=disks,
disk_template=self.op.disk_template,
- status=self.instance_status,
+ admin_up=False,
network_port=network_port,
beparams=self.op.beparams,
hvparams=self.op.hvparams,
)
feedback_fn("* creating instance disks...")
- if not _CreateDisks(self, iobj):
- _RemoveDisks(self, iobj)
- self.cfg.ReleaseDRBDMinors(instance)
- raise errors.OpExecError("Device creation failed, reverting...")
+ try:
+ _CreateDisks(self, iobj)
+ except errors.OpExecError:
+ self.LogWarning("Device creation failed, reverting...")
+ try:
+ _RemoveDisks(self, iobj)
+ finally:
+ self.cfg.ReleaseDRBDMinors(instance)
+ raise
feedback_fn("adding instance %s to cluster config" % instance)
# Declare that we don't want to remove the instance lock anymore, as we've
# added the instance to the config
del self.remove_locks[locking.LEVEL_INSTANCE]
- # Remove the temp. assignements for the instance's drbds
- self.cfg.ReleaseDRBDMinors(instance)
+ # Unlock all the nodes
+ if self.op.mode == constants.INSTANCE_IMPORT:
+ nodes_keep = [self.op.src_node]
+ nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
+ if node != self.op.src_node]
+ self.context.glm.release(locking.LEVEL_NODE, nodes_release)
+ self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
+ else:
+ self.context.glm.release(locking.LEVEL_NODE)
+ del self.acquired_locks[locking.LEVEL_NODE]
if self.op.wait_for_sync:
disk_abort = not _WaitForSync(self, iobj)
if iobj.disk_template != constants.DT_DISKLESS:
if self.op.mode == constants.INSTANCE_CREATE:
feedback_fn("* running the instance OS create scripts...")
- if not self.rpc.call_instance_os_add(pnode_name, iobj):
- raise errors.OpExecError("could not add os for instance %s"
- " on node %s" %
- (instance, pnode_name))
+ result = self.rpc.call_instance_os_add(pnode_name, iobj, False)
+ msg = result.RemoteFailMsg()
+ if msg:
+ raise errors.OpExecError("Could not add os for instance %s"
+ " on node %s: %s" %
+ (instance, pnode_name, msg))
elif self.op.mode == constants.INSTANCE_IMPORT:
feedback_fn("* running the instance OS import scripts...")
import_result = self.rpc.call_instance_os_import(pnode_name, iobj,
src_node, src_images,
cluster_name)
- for idx, result in enumerate(import_result):
+ import_result.Raise()
+ for idx, result in enumerate(import_result.data):
if not result:
- self.LogWarning("Could not image %s for on instance %s, disk %d,"
- " on node %s" % (src_images[idx], instance, idx,
- pnode_name))
+ self.LogWarning("Could not import the image %s for instance"
+ " %s, disk %d, on node %s" %
+ (src_images[idx], instance, idx, pnode_name))
else:
# also checked in the prereq part
raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
% self.op.mode)
if self.op.start:
+ iobj.admin_up = True
+ self.cfg.Update(iobj)
logging.info("Starting instance %s on node %s", instance, pnode_name)
feedback_fn("* starting instance...")
- if not self.rpc.call_instance_start(pnode_name, iobj, None):
- raise errors.OpExecError("Could not start instance")
+ result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
+ msg = result.RemoteFailMsg()
+ if msg:
+ raise errors.OpExecError("Could not start instance: %s" % msg)
class LUConnectConsole(NoHooksLU):
self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
assert self.instance is not None, \
"Cannot retrieve locked instance %s" % self.op.instance_name
+ _CheckNodeOnline(self, self.instance.primary_node)
def Exec(self, feedback_fn):
"""Connect to the console of an instance
node_insts = self.rpc.call_instance_list([node],
[instance.hypervisor])[node]
- if node_insts is False:
- raise errors.OpExecError("Can't connect to node %s." % node)
+ node_insts.Raise()
- if instance.name not in node_insts:
+ if instance.name not in node_insts.data:
raise errors.OpExecError("Instance %s is not running." % instance.name)
logging.debug("Connecting to console of %s on %s", instance.name, node)
hyper = hypervisor.GetHypervisor(instance.hypervisor)
- console_cmd = hyper.GetShellCommandForConsole(instance)
+ cluster = self.cfg.GetClusterInfo()
+ # beparams and hvparams are passed separately, to avoid editing the
+ # instance and then saving the defaults in the instance itself.
+ hvparams = cluster.FillHV(instance)
+ beparams = cluster.FillBE(instance)
+ console_cmd = hyper.GetShellCommandForConsole(instance, hvparams, beparams)
# build ssh cmdline
return self.ssh.BuildCmd(node, "root", console_cmd, batch=True, tty=True)
_OP_REQP = ["instance_name", "mode", "disks"]
REQ_BGL = False
- def ExpandNames(self):
- self._ExpandAndLockInstance()
-
+ def CheckArguments(self):
if not hasattr(self.op, "remote_node"):
self.op.remote_node = None
-
- ia_name = getattr(self.op, "iallocator", None)
- if ia_name is not None:
- if self.op.remote_node is not None:
+ if not hasattr(self.op, "iallocator"):
+ self.op.iallocator = None
+
+ # check for valid parameter combination
+ cnt = [self.op.remote_node, self.op.iallocator].count(None)
+ if self.op.mode == constants.REPLACE_DISK_CHG:
+ if cnt == 2:
+ raise errors.OpPrereqError("When changing the secondary either an"
+ " iallocator script must be used or the"
+ " new node given")
+ elif cnt == 0:
raise errors.OpPrereqError("Give either the iallocator or the new"
" secondary, not both")
+ else: # not replacing the secondary
+ if cnt != 2:
+ raise errors.OpPrereqError("The iallocator and new node options can"
+ " be used only when changing the"
+ " secondary node")
+
+ def ExpandNames(self):
+ self._ExpandAndLockInstance()
+
+ if self.op.iallocator is not None:
self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
elif self.op.remote_node is not None:
remote_node = self.cfg.ExpandNodeName(self.op.remote_node)
raise errors.OpPrereqError("Node '%s' not known" %
self.op.remote_node)
self.op.remote_node = remote_node
+ # Warning: do not remove the locking of the new secondary here
+ # unless DRBD8.AddChildren is changed to work in parallel;
+ # currently it doesn't since parallel invocations of
+ # FindUnusedMinor will conflict
self.needed_locks[locking.LEVEL_NODE] = [remote_node]
self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
else:
"Cannot retrieve locked instance %s" % self.op.instance_name
self.instance = instance
- if instance.disk_template not in constants.DTS_NET_MIRROR:
- raise errors.OpPrereqError("Instance's disk layout is not"
- " network mirrored.")
+ if instance.disk_template != constants.DT_DRBD8:
+ raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
+ " instances")
if len(instance.secondary_nodes) != 1:
raise errors.OpPrereqError("The instance has a strange layout,"
self.sec_node = instance.secondary_nodes[0]
- ia_name = getattr(self.op, "iallocator", None)
- if ia_name is not None:
+ if self.op.iallocator is not None:
self._RunAllocator()
remote_node = self.op.remote_node
raise errors.OpPrereqError("The specified node is the primary node of"
" the instance.")
elif remote_node == self.sec_node:
- if self.op.mode == constants.REPLACE_DISK_SEC:
- # this is for DRBD8, where we can't execute the same mode of
- # replacement as for drbd7 (no different port allocated)
- raise errors.OpPrereqError("Same secondary given, cannot execute"
- " replacement")
- if instance.disk_template == constants.DT_DRBD8:
- if (self.op.mode == constants.REPLACE_DISK_ALL and
- remote_node is not None):
- # switch to replace secondary mode
- self.op.mode = constants.REPLACE_DISK_SEC
-
- if self.op.mode == constants.REPLACE_DISK_ALL:
- raise errors.OpPrereqError("Template 'drbd' only allows primary or"
- " secondary disk replacement, not"
- " both at once")
- elif self.op.mode == constants.REPLACE_DISK_PRI:
- if remote_node is not None:
- raise errors.OpPrereqError("Template 'drbd' does not allow changing"
- " the secondary while doing a primary"
- " node disk replacement")
- self.tgt_node = instance.primary_node
- self.oth_node = instance.secondary_nodes[0]
- elif self.op.mode == constants.REPLACE_DISK_SEC:
- self.new_node = remote_node # this can be None, in which case
- # we don't change the secondary
- self.tgt_node = instance.secondary_nodes[0]
- self.oth_node = instance.primary_node
- else:
- raise errors.ProgrammerError("Unhandled disk replace mode")
+ raise errors.OpPrereqError("The specified node is already the"
+ " secondary node of the instance.")
+
+ if self.op.mode == constants.REPLACE_DISK_PRI:
+ n1 = self.tgt_node = instance.primary_node
+ n2 = self.oth_node = self.sec_node
+ elif self.op.mode == constants.REPLACE_DISK_SEC:
+ n1 = self.tgt_node = self.sec_node
+ n2 = self.oth_node = instance.primary_node
+ elif self.op.mode == constants.REPLACE_DISK_CHG:
+ n1 = self.new_node = remote_node
+ n2 = self.oth_node = instance.primary_node
+ self.tgt_node = self.sec_node
+ _CheckNodeNotDrained(self, remote_node)
+ else:
+ raise errors.ProgrammerError("Unhandled disk replace mode")
+
+ _CheckNodeOnline(self, n1)
+ _CheckNodeOnline(self, n2)
if not self.op.disks:
self.op.disks = range(len(instance.disks))
"""Replace a disk on the primary or secondary for dbrd8.
The algorithm for replace is quite complicated:
- - for each disk to be replaced:
- - create new LVs on the target node with unique names
- - detach old LVs from the drbd device
- - rename old LVs to name_replaced.<time_t>
- - rename new LVs to old LVs
- - attach the new LVs (with the old names now) to the drbd device
- - wait for sync across all devices
- - for each modified disk:
- - remove old LVs (which have the name name_replaces.<time_t>)
+
+ 1. for each disk to be replaced:
+
+ 1. create new LVs on the target node with unique names
+ 1. detach old LVs from the drbd device
+ 1. rename old LVs to name_replaced.<time_t>
+ 1. rename new LVs to old LVs
+ 1. attach the new LVs (with the old names now) to the drbd device
+
+ 1. wait for sync across all devices
+
+ 1. for each modified disk:
+
+ 1. remove old LVs (which have the name name_replaces.<time_t>)
Failures are not very well handled.
if not results:
raise errors.OpExecError("Can't list volume groups on the nodes")
for node in oth_node, tgt_node:
- res = results.get(node, False)
- if not res or my_vg not in res:
+ res = results[node]
+ if res.failed or not res.data or my_vg not in res.data:
raise errors.OpExecError("Volume group '%s' not found on %s" %
(my_vg, node))
for idx, dev in enumerate(instance.disks):
for node in tgt_node, oth_node:
info("checking disk/%d on %s" % (idx, node))
cfg.SetDiskID(dev, node)
- if not self.rpc.call_blockdev_find(node, dev):
- raise errors.OpExecError("Can't find disk/%d on node %s" %
- (idx, node))
+ result = self.rpc.call_blockdev_find(node, dev)
+ msg = result.RemoteFailMsg()
+ if not msg and not result.payload:
+ msg = "disk not found"
+ if msg:
+ raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
+ (idx, node, msg))
# Step: check other node consistency
self.proc.LogStep(2, steps_total, "check peer consistency")
iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
info("creating new local storage on %s for %s" %
(tgt_node, dev.iv_name))
- # since we *always* want to create this LV, we use the
- # _Create...OnPrimary (which forces the creation), even if we
- # are talking about the secondary node
+ # we pass force_create=True to force the LVM creation
for new_lv in new_lvs:
- if not _CreateBlockDevOnPrimary(self, tgt_node, instance, new_lv,
- _GetInstanceInfoText(instance)):
- raise errors.OpExecError("Failed to create new LV named '%s' on"
- " node '%s'" %
- (new_lv.logical_id[1], tgt_node))
+ _CreateBlockDev(self, tgt_node, instance, new_lv, True,
+ _GetInstanceInfoText(instance), False)
# Step: for each lv, detach+rename*2+attach
self.proc.LogStep(4, steps_total, "change drbd configuration")
for dev, old_lvs, new_lvs in iv_names.itervalues():
info("detaching %s drbd from local storage" % dev.iv_name)
- if not self.rpc.call_blockdev_removechildren(tgt_node, dev, old_lvs):
+ result = self.rpc.call_blockdev_removechildren(tgt_node, dev, old_lvs)
+ msg = result.RemoteFailMsg()
+ if msg:
raise errors.OpExecError("Can't detach drbd from local storage on node"
- " %s for device %s" % (tgt_node, dev.iv_name))
+ " %s for device %s: %s" %
+ (tgt_node, dev.iv_name, msg))
#dev.children = []
#cfg.Update(instance)
# build the rename list based on what LVs exist on the node
rlist = []
for to_ren in old_lvs:
- find_res = self.rpc.call_blockdev_find(tgt_node, to_ren)
- if find_res is not None: # device exists
+ result = self.rpc.call_blockdev_find(tgt_node, to_ren)
+ if not result.RemoteFailMsg() and result.payload:
+ # device exists
rlist.append((to_ren, ren_fn(to_ren, temp_suffix)))
info("renaming the old LVs on the target node")
- if not self.rpc.call_blockdev_rename(tgt_node, rlist):
- raise errors.OpExecError("Can't rename old LVs on node %s" % tgt_node)
+ result = self.rpc.call_blockdev_rename(tgt_node, rlist)
+ msg = result.RemoteFailMsg()
+ if msg:
+ raise errors.OpExecError("Can't rename old LVs on node %s: %s" %
+ (tgt_node, msg))
# now we rename the new LVs to the old LVs
info("renaming the new LVs on the target node")
rlist = [(new, old.physical_id) for old, new in zip(old_lvs, new_lvs)]
- if not self.rpc.call_blockdev_rename(tgt_node, rlist):
- raise errors.OpExecError("Can't rename new LVs on node %s" % tgt_node)
+ result = self.rpc.call_blockdev_rename(tgt_node, rlist)
+ msg = result.RemoteFailMsg()
+ if msg:
+ raise errors.OpExecError("Can't rename new LVs on node %s: %s" %
+ (tgt_node, msg))
for old, new in zip(old_lvs, new_lvs):
new.logical_id = old.logical_id
# now that the new lvs have the old name, we can add them to the device
info("adding new mirror component on %s" % tgt_node)
- if not self.rpc.call_blockdev_addchildren(tgt_node, dev, new_lvs):
+ result = self.rpc.call_blockdev_addchildren(tgt_node, dev, new_lvs)
+ msg = result.RemoteFailMsg()
+ if msg:
for new_lv in new_lvs:
- if not self.rpc.call_blockdev_remove(tgt_node, new_lv):
- warning("Can't rollback device %s", hint="manually cleanup unused"
- " logical volumes")
- raise errors.OpExecError("Can't add local storage to drbd")
+ msg = self.rpc.call_blockdev_remove(tgt_node, new_lv).RemoteFailMsg()
+ if msg:
+ warning("Can't rollback device %s: %s", dev, msg,
+ hint="cleanup manually the unused logical volumes")
+ raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
dev.children = new_lvs
cfg.Update(instance)
# so check manually all the devices
for name, (dev, old_lvs, new_lvs) in iv_names.iteritems():
cfg.SetDiskID(dev, instance.primary_node)
- is_degr = self.rpc.call_blockdev_find(instance.primary_node, dev)[5]
- if is_degr:
+ result = self.rpc.call_blockdev_find(instance.primary_node, dev)
+ msg = result.RemoteFailMsg()
+ if not msg and not result.payload:
+ msg = "disk not found"
+ if msg:
+ raise errors.OpExecError("Can't find DRBD device %s: %s" %
+ (name, msg))
+ if result.payload[5]:
raise errors.OpExecError("DRBD device %s is degraded!" % name)
# Step: remove old storage
info("remove logical volumes for %s" % name)
for lv in old_lvs:
cfg.SetDiskID(lv, tgt_node)
- if not self.rpc.call_blockdev_remove(tgt_node, lv):
- warning("Can't remove old LV", hint="manually remove unused LVs")
+ msg = self.rpc.call_blockdev_remove(tgt_node, lv).RemoteFailMsg()
+ if msg:
+ warning("Can't remove old LV: %s" % msg,
+ hint="manually remove unused LVs")
continue
def _ExecD8Secondary(self, feedback_fn):
warning, info = (self.proc.LogWarning, self.proc.LogInfo)
instance = self.instance
iv_names = {}
- vgname = self.cfg.GetVGName()
# start of work
cfg = self.cfg
old_node = self.tgt_node
new_node = self.new_node
pri_node = instance.primary_node
+ nodes_ip = {
+ old_node: self.cfg.GetNodeInfo(old_node).secondary_ip,
+ new_node: self.cfg.GetNodeInfo(new_node).secondary_ip,
+ pri_node: self.cfg.GetNodeInfo(pri_node).secondary_ip,
+ }
# Step: check device activation
self.proc.LogStep(1, steps_total, "check device existence")
info("checking volume groups")
my_vg = cfg.GetVGName()
results = self.rpc.call_vg_list([pri_node, new_node])
- if not results:
- raise errors.OpExecError("Can't list volume groups on the nodes")
for node in pri_node, new_node:
- res = results.get(node, False)
- if not res or my_vg not in res:
+ res = results[node]
+ if res.failed or not res.data or my_vg not in res.data:
raise errors.OpExecError("Volume group '%s' not found on %s" %
(my_vg, node))
for idx, dev in enumerate(instance.disks):
continue
info("checking disk/%d on %s" % (idx, pri_node))
cfg.SetDiskID(dev, pri_node)
- if not self.rpc.call_blockdev_find(pri_node, dev):
- raise errors.OpExecError("Can't find disk/%d on node %s" %
- (idx, pri_node))
+ result = self.rpc.call_blockdev_find(pri_node, dev)
+ msg = result.RemoteFailMsg()
+ if not msg and not result.payload:
+ msg = "disk not found"
+ if msg:
+ raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
+ (idx, pri_node, msg))
# Step: check other node consistency
self.proc.LogStep(2, steps_total, "check peer consistency")
# Step: create new storage
self.proc.LogStep(3, steps_total, "allocate new storage")
for idx, dev in enumerate(instance.disks):
- size = dev.size
info("adding new local storage on %s for disk/%d" %
(new_node, idx))
- # since we *always* want to create this LV, we use the
- # _Create...OnPrimary (which forces the creation), even if we
- # are talking about the secondary node
+ # we pass force_create=True to force LVM creation
for new_lv in dev.children:
- if not _CreateBlockDevOnPrimary(self, new_node, instance, new_lv,
- _GetInstanceInfoText(instance)):
- raise errors.OpExecError("Failed to create new LV named '%s' on"
- " node '%s'" %
- (new_lv.logical_id[1], new_node))
+ _CreateBlockDev(self, new_node, instance, new_lv, True,
+ _GetInstanceInfoText(instance), False)
# Step 4: dbrd minors and drbd setups changes
# after this, we must manually remove the drbd minors on both the
for idx, (dev, new_minor) in enumerate(zip(instance.disks, minors)):
size = dev.size
info("activating a new drbd on %s for disk/%d" % (new_node, idx))
- # create new devices on new_node
- if pri_node == dev.logical_id[0]:
- new_logical_id = (pri_node, new_node,
- dev.logical_id[2], dev.logical_id[3], new_minor,
- dev.logical_id[5])
+ # create new devices on new_node; note that we create two IDs:
+ # one without port, so the drbd will be activated without
+ # networking information on the new node at this stage, and one
+ # with network, for the latter activation in step 4
+ (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
+ if pri_node == o_node1:
+ p_minor = o_minor1
else:
- new_logical_id = (new_node, pri_node,
- dev.logical_id[2], new_minor, dev.logical_id[4],
- dev.logical_id[5])
- iv_names[idx] = (dev, dev.children, new_logical_id)
+ p_minor = o_minor2
+
+ new_alone_id = (pri_node, new_node, None, p_minor, new_minor, o_secret)
+ new_net_id = (pri_node, new_node, o_port, p_minor, new_minor, o_secret)
+
+ iv_names[idx] = (dev, dev.children, new_net_id)
logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
- new_logical_id)
+ new_net_id)
new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
- logical_id=new_logical_id,
+ logical_id=new_alone_id,
children=dev.children)
- if not _CreateBlockDevOnSecondary(self, new_node, instance,
- new_drbd, False,
- _GetInstanceInfoText(instance)):
+ try:
+ _CreateSingleBlockDev(self, new_node, instance, new_drbd,
+ _GetInstanceInfoText(instance), False)
+ except errors.GenericError:
self.cfg.ReleaseDRBDMinors(instance.name)
- raise errors.OpExecError("Failed to create new DRBD on"
- " node '%s'" % new_node)
+ raise
for idx, dev in enumerate(instance.disks):
# we have new devices, shutdown the drbd on the old secondary
info("shutting down drbd for disk/%d on old node" % idx)
cfg.SetDiskID(dev, old_node)
- if not self.rpc.call_blockdev_shutdown(old_node, dev):
- warning("Failed to shutdown drbd for disk/%d on old node" % idx,
+ msg = self.rpc.call_blockdev_shutdown(old_node, dev).RemoteFailMsg()
+ if msg:
+ warning("Failed to shutdown drbd for disk/%d on old node: %s" %
+ (idx, msg),
hint="Please cleanup this device manually as soon as possible")
info("detaching primary drbds from the network (=> standalone)")
- done = 0
- for idx, dev in enumerate(instance.disks):
- cfg.SetDiskID(dev, pri_node)
- # set the network part of the physical (unique in bdev terms) id
- # to None, meaning detach from network
- dev.physical_id = (None, None, None, None) + dev.physical_id[4:]
- # and 'find' the device, which will 'fix' it to match the
- # standalone state
- if self.rpc.call_blockdev_find(pri_node, dev):
- done += 1
- else:
- warning("Failed to detach drbd disk/%d from network, unusual case" %
- idx)
+ result = self.rpc.call_drbd_disconnect_net([pri_node], nodes_ip,
+ instance.disks)[pri_node]
- if not done:
- # no detaches succeeded (very unlikely)
+ msg = result.RemoteFailMsg()
+ if msg:
+ # detaches didn't succeed (unlikely)
self.cfg.ReleaseDRBDMinors(instance.name)
- raise errors.OpExecError("Can't detach at least one DRBD from old node")
+ raise errors.OpExecError("Can't detach the disks from the network on"
+ " old node: %s" % (msg,))
# if we managed to detach at least one, we update all the disks of
# the instance to point to the new secondary
dev.logical_id = new_logical_id
cfg.SetDiskID(dev, pri_node)
cfg.Update(instance)
- # we can remove now the temp minors as now the new values are
- # written to the config file (and therefore stable)
- self.cfg.ReleaseDRBDMinors(instance.name)
# and now perform the drbd attach
info("attaching primary drbds to new secondary (standalone => connected)")
- failures = []
- for idx, dev in enumerate(instance.disks):
- info("attaching primary drbd for disk/%d to new secondary node" % idx)
- # since the attach is smart, it's enough to 'find' the device,
- # it will automatically activate the network, if the physical_id
- # is correct
- cfg.SetDiskID(dev, pri_node)
- logging.debug("Disk to attach: %s", dev)
- if not self.rpc.call_blockdev_find(pri_node, dev):
- warning("can't attach drbd disk/%d to new secondary!" % idx,
- "please do a gnt-instance info to see the status of disks")
+ result = self.rpc.call_drbd_attach_net([pri_node, new_node], nodes_ip,
+ instance.disks, instance.name,
+ False)
+ for to_node, to_result in result.items():
+ msg = to_result.RemoteFailMsg()
+ if msg:
+ warning("can't attach drbd disks on node %s: %s", to_node, msg,
+ hint="please do a gnt-instance info to see the"
+ " status of disks")
# this can fail as the old devices are degraded and _WaitForSync
# does a combined result over all disks, so we don't check its
# so check manually all the devices
for idx, (dev, old_lvs, _) in iv_names.iteritems():
cfg.SetDiskID(dev, pri_node)
- is_degr = self.rpc.call_blockdev_find(pri_node, dev)[5]
- if is_degr:
+ result = self.rpc.call_blockdev_find(pri_node, dev)
+ msg = result.RemoteFailMsg()
+ if not msg and not result.payload:
+ msg = "disk not found"
+ if msg:
+ raise errors.OpExecError("Can't find DRBD device disk/%d: %s" %
+ (idx, msg))
+ if result.payload[5]:
raise errors.OpExecError("DRBD device disk/%d is degraded!" % idx)
self.proc.LogStep(6, steps_total, "removing old storage")
info("remove logical volumes for disk/%d" % idx)
for lv in old_lvs:
cfg.SetDiskID(lv, old_node)
- if not self.rpc.call_blockdev_remove(old_node, lv):
- warning("Can't remove LV on old secondary",
+ msg = self.rpc.call_blockdev_remove(old_node, lv).RemoteFailMsg()
+ if msg:
+ warning("Can't remove LV on old secondary: %s", msg,
hint="Cleanup stale volumes by hand")
def Exec(self, feedback_fn):
instance = self.instance
# Activate the instance disks if we're replacing them on a down instance
- if instance.status == "down":
+ if not instance.admin_up:
_StartInstanceDisks(self, instance, True)
- if instance.disk_template == constants.DT_DRBD8:
- if self.op.remote_node is None:
- fn = self._ExecD8DiskOnly
- else:
- fn = self._ExecD8Secondary
+ if self.op.mode == constants.REPLACE_DISK_CHG:
+ fn = self._ExecD8Secondary
else:
- raise errors.ProgrammerError("Unhandled disk replacement case")
+ fn = self._ExecD8DiskOnly
ret = fn(feedback_fn)
# Deactivate the instance disks if we're replacing them on a down instance
- if instance.status == "down":
+ if not instance.admin_up:
_SafeShutdownInstanceDisks(self, instance)
return ret
instance = self.cfg.GetInstanceInfo(self.op.instance_name)
assert instance is not None, \
"Cannot retrieve locked instance %s" % self.op.instance_name
+ nodenames = list(instance.all_nodes)
+ for node in nodenames:
+ _CheckNodeOnline(self, node)
+
self.instance = instance
self.disk = instance.FindDisk(self.op.disk)
- nodenames = [instance.primary_node] + list(instance.secondary_nodes)
nodeinfo = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
instance.hypervisor)
for node in nodenames:
- info = nodeinfo.get(node, None)
- if not info:
+ info = nodeinfo[node]
+ if info.failed or not info.data:
raise errors.OpPrereqError("Cannot get current information"
" from node '%s'" % node)
- vg_free = info.get('vg_free', None)
+ vg_free = info.data.get('vg_free', None)
if not isinstance(vg_free, int):
raise errors.OpPrereqError("Can't compute free disk space on"
" node %s" % node)
- if self.op.amount > info['vg_free']:
+ if self.op.amount > vg_free:
raise errors.OpPrereqError("Not enough disk space on target node %s:"
" %d MiB available, %d MiB required" %
- (node, info['vg_free'], self.op.amount))
+ (node, vg_free, self.op.amount))
def Exec(self, feedback_fn):
"""Execute disk grow.
"""
instance = self.instance
disk = self.disk
- for node in (instance.secondary_nodes + (instance.primary_node,)):
+ for node in instance.all_nodes:
self.cfg.SetDiskID(disk, node)
result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
- if (not result or not isinstance(result, (list, tuple)) or
- len(result) != 2):
- raise errors.OpExecError("grow request failed to node %s" % node)
- elif not result[0]:
- raise errors.OpExecError("grow request failed to node %s: %s" %
- (node, result[1]))
+ msg = result.RemoteFailMsg()
+ if msg:
+ raise errors.OpExecError("Grow request failed to node %s: %s" %
+ (node, msg))
disk.RecordGrow(self.op.amount)
self.cfg.Update(instance)
if self.op.wait_for_sync:
for name in self.op.instances:
full_name = self.cfg.ExpandInstanceName(name)
if full_name is None:
- raise errors.OpPrereqError("Instance '%s' not known" %
- self.op.instance_name)
+ raise errors.OpPrereqError("Instance '%s' not known" % name)
self.wanted_names.append(full_name)
self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
else:
if not static:
self.cfg.SetDiskID(dev, instance.primary_node)
dev_pstatus = self.rpc.call_blockdev_find(instance.primary_node, dev)
+ if dev_pstatus.offline:
+ dev_pstatus = None
+ else:
+ msg = dev_pstatus.RemoteFailMsg()
+ if msg:
+ raise errors.OpExecError("Can't compute disk status for %s: %s" %
+ (instance.name, msg))
+ dev_pstatus = dev_pstatus.payload
else:
dev_pstatus = None
if snode and not static:
self.cfg.SetDiskID(dev, snode)
dev_sstatus = self.rpc.call_blockdev_find(snode, dev)
+ if dev_sstatus.offline:
+ dev_sstatus = None
+ else:
+ msg = dev_sstatus.RemoteFailMsg()
+ if msg:
+ raise errors.OpExecError("Can't compute disk status for %s: %s" %
+ (instance.name, msg))
+ dev_sstatus = dev_sstatus.payload
else:
dev_sstatus = None
"pstatus": dev_pstatus,
"sstatus": dev_sstatus,
"children": dev_children,
+ "mode": dev.mode,
}
return data
remote_info = self.rpc.call_instance_info(instance.primary_node,
instance.name,
instance.hypervisor)
+ remote_info.Raise()
+ remote_info = remote_info.data
if remote_info and "state" in remote_info:
remote_state = "up"
else:
remote_state = "down"
else:
remote_state = None
- if instance.status == "down":
- config_state = "down"
- else:
+ if instance.admin_up:
config_state = "up"
+ else:
+ config_state = "down"
disks = [self._ComputeDiskStatus(instance, None, device)
for device in instance.disks]
"""
HPATH = "instance-modify"
HTYPE = constants.HTYPE_INSTANCE
- _OP_REQP = ["instance_name", "hvparams"]
+ _OP_REQP = ["instance_name"]
REQ_BGL = False
+ def CheckArguments(self):
+ if not hasattr(self.op, 'nics'):
+ self.op.nics = []
+ if not hasattr(self.op, 'disks'):
+ self.op.disks = []
+ if not hasattr(self.op, 'beparams'):
+ self.op.beparams = {}
+ if not hasattr(self.op, 'hvparams'):
+ self.op.hvparams = {}
+ self.op.force = getattr(self.op, "force", False)
+ if not (self.op.nics or self.op.disks or
+ self.op.hvparams or self.op.beparams):
+ raise errors.OpPrereqError("No changes submitted")
+
+ # Disk validation
+ disk_addremove = 0
+ for disk_op, disk_dict in self.op.disks:
+ if disk_op == constants.DDM_REMOVE:
+ disk_addremove += 1
+ continue
+ elif disk_op == constants.DDM_ADD:
+ disk_addremove += 1
+ else:
+ if not isinstance(disk_op, int):
+ raise errors.OpPrereqError("Invalid disk index")
+ if disk_op == constants.DDM_ADD:
+ mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
+ if mode not in constants.DISK_ACCESS_SET:
+ raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode)
+ size = disk_dict.get('size', None)
+ if size is None:
+ raise errors.OpPrereqError("Required disk parameter size missing")
+ try:
+ size = int(size)
+ except ValueError, err:
+ raise errors.OpPrereqError("Invalid disk size parameter: %s" %
+ str(err))
+ disk_dict['size'] = size
+ else:
+ # modification of disk
+ if 'size' in disk_dict:
+ raise errors.OpPrereqError("Disk size change not possible, use"
+ " grow-disk")
+
+ if disk_addremove > 1:
+ raise errors.OpPrereqError("Only one disk add or remove operation"
+ " supported at a time")
+
+ # NIC validation
+ nic_addremove = 0
+ for nic_op, nic_dict in self.op.nics:
+ if nic_op == constants.DDM_REMOVE:
+ nic_addremove += 1
+ continue
+ elif nic_op == constants.DDM_ADD:
+ nic_addremove += 1
+ else:
+ if not isinstance(nic_op, int):
+ raise errors.OpPrereqError("Invalid nic index")
+
+ # nic_dict should be a dict
+ nic_ip = nic_dict.get('ip', None)
+ if nic_ip is not None:
+ if nic_ip.lower() == constants.VALUE_NONE:
+ nic_dict['ip'] = None
+ else:
+ if not utils.IsValidIP(nic_ip):
+ raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip)
+
+ nic_bridge = nic_dict.get('bridge', None)
+ nic_link = nic_dict.get('link', None)
+ if nic_bridge and nic_link:
+ raise errors.OpPrereqError("Cannot pass 'bridge' and 'link' at the same time")
+ elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
+ nic_dict['bridge'] = None
+ elif nic_link and nic_link.lower() == constants.VALUE_NONE:
+ nic_dict['link'] = None
+
+ if nic_op == constants.DDM_ADD:
+ nic_mac = nic_dict.get('mac', None)
+ if nic_mac is None:
+ nic_dict['mac'] = constants.VALUE_AUTO
+
+ if 'mac' in nic_dict:
+ nic_mac = nic_dict['mac']
+ if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
+ if not utils.IsValidMac(nic_mac):
+ raise errors.OpPrereqError("Invalid MAC address %s" % nic_mac)
+ if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
+ raise errors.OpPrereqError("'auto' is not a valid MAC address when"
+ " modifying an existing nic")
+
+ if nic_addremove > 1:
+ raise errors.OpPrereqError("Only one NIC add or remove operation"
+ " supported at a time")
+
def ExpandNames(self):
self._ExpandAndLockInstance()
self.needed_locks[locking.LEVEL_NODE] = []
self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
-
def DeclareLocks(self, level):
if level == locking.LEVEL_NODE:
self._LockInstancesNodes()
args['memory'] = self.be_new[constants.BE_MEMORY]
if constants.BE_VCPUS in self.be_new:
args['vcpus'] = self.be_new[constants.BE_VCPUS]
- if self.do_ip or self.do_bridge or self.mac:
- if self.do_ip:
- ip = self.ip
- else:
- ip = self.instance.nics[0].ip
- if self.bridge:
- bridge = self.bridge
- else:
- bridge = self.instance.nics[0].bridge
- if self.mac:
- mac = self.mac
- else:
- mac = self.instance.nics[0].mac
- args['nics'] = [(ip, bridge, mac)]
+ # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
+ # information at all.
+ if self.op.nics:
+ args['nics'] = []
+ nic_override = dict(self.op.nics)
+ c_nicparams = self.cluster.nicparams[constants.PP_DEFAULT]
+ for idx, nic in enumerate(self.instance.nics):
+ if idx in nic_override:
+ this_nic_override = nic_override[idx]
+ else:
+ this_nic_override = {}
+ if 'ip' in this_nic_override:
+ ip = this_nic_override['ip']
+ else:
+ ip = nic.ip
+ if 'mac' in this_nic_override:
+ mac = this_nic_override['mac']
+ else:
+ mac = nic.mac
+ if idx in self.nic_pnew:
+ nicparams = self.nic_pnew[idx]
+ else:
+ nicparams = objects.FillDict(c_nicparams, nic.nicparams)
+ mode = nicparams[constants.NIC_MODE]
+ link = nicparams[constants.NIC_LINK]
+ args['nics'].append((ip, mac, mode, link))
+ if constants.DDM_ADD in nic_override:
+ ip = nic_override[constants.DDM_ADD].get('ip', None)
+ mac = nic_override[constants.DDM_ADD]['mac']
+ nicparams = self.nic_pnew[constants.DDM_ADD]
+ mode = nicparams[constants.NIC_MODE]
+ link = nicparams[constants.NIC_LINK]
+ args['nics'].append((ip, mac, mode, link))
+ elif constants.DDM_REMOVE in nic_override:
+ del args['nics'][-1]
+
env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
- nl = [self.cfg.GetMasterNode(),
- self.instance.primary_node] + list(self.instance.secondary_nodes)
+ nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
return env, nl, nl
+ def _GetUpdatedParams(self, old_params, update_dict,
+ default_values, parameter_types):
+ """Return the new params dict for the given params.
+
+ @type old_params: dict
+ @type old_params: old parameters
+ @type update_dict: dict
+ @type update_dict: dict containing new parameter values,
+ or constants.VALUE_DEFAULT to reset the
+ parameter to its default value
+ @type default_values: dict
+ @param default_values: default values for the filled parameters
+ @type parameter_types: dict
+ @param parameter_types: dict mapping target dict keys to types
+ in constants.ENFORCEABLE_TYPES
+ @rtype: (dict, dict)
+ @return: (new_parameters, filled_parameters)
+
+ """
+ params_copy = copy.deepcopy(old_params)
+ for key, val in update_dict.iteritems():
+ if val == constants.VALUE_DEFAULT:
+ try:
+ del params_copy[key]
+ except KeyError:
+ pass
+ else:
+ params_copy[key] = val
+ utils.ForceDictType(params_copy, parameter_types)
+ params_filled = objects.FillDict(default_values, params_copy)
+ return (params_copy, params_filled)
+
def CheckPrereq(self):
"""Check prerequisites.
This only checks the instance list against the existing names.
"""
- # FIXME: all the parameters could be checked before, in ExpandNames, or in
- # a separate CheckArguments function, if we implement one, so the operation
- # can be aborted without waiting for any lock, should it have an error...
- self.ip = getattr(self.op, "ip", None)
- self.mac = getattr(self.op, "mac", None)
- self.bridge = getattr(self.op, "bridge", None)
- self.kernel_path = getattr(self.op, "kernel_path", None)
- self.initrd_path = getattr(self.op, "initrd_path", None)
- self.force = getattr(self.op, "force", None)
- all_parms = [self.ip, self.bridge, self.mac]
- if (all_parms.count(None) == len(all_parms) and
- not self.op.hvparams and
- not self.op.beparams):
- raise errors.OpPrereqError("No changes submitted")
- for item in (constants.BE_MEMORY, constants.BE_VCPUS):
- val = self.op.beparams.get(item, None)
- if val is not None:
- try:
- val = int(val)
- except ValueError, err:
- raise errors.OpPrereqError("Invalid %s size: %s" % (item, str(err)))
- self.op.beparams[item] = val
- if self.ip is not None:
- self.do_ip = True
- if self.ip.lower() == "none":
- self.ip = None
- else:
- if not utils.IsValidIP(self.ip):
- raise errors.OpPrereqError("Invalid IP address '%s'." % self.ip)
- else:
- self.do_ip = False
- self.do_bridge = (self.bridge is not None)
- if self.mac is not None:
- if self.cfg.IsMacInUse(self.mac):
- raise errors.OpPrereqError('MAC address %s already in use in cluster' %
- self.mac)
- if not utils.IsValidMac(self.mac):
- raise errors.OpPrereqError('Invalid MAC address %s' % self.mac)
+ force = self.force = self.op.force
# checking the new params on the primary/secondary nodes
instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
+ cluster = self.cluster = self.cfg.GetClusterInfo()
assert self.instance is not None, \
"Cannot retrieve locked instance %s" % self.op.instance_name
- pnode = self.instance.primary_node
- nodelist = [pnode]
- nodelist.extend(instance.secondary_nodes)
+ pnode = instance.primary_node
+ nodelist = list(instance.all_nodes)
# hvparams processing
if self.op.hvparams:
- i_hvdict = copy.deepcopy(instance.hvparams)
- for key, val in self.op.hvparams.iteritems():
- if val is None:
- try:
- del i_hvdict[key]
- except KeyError:
- pass
- else:
- i_hvdict[key] = val
- cluster = self.cfg.GetClusterInfo()
- hv_new = cluster.FillDict(cluster.hvparams[instance.hypervisor],
- i_hvdict)
+ i_hvdict, hv_new = self._GetUpdatedParams(
+ instance.hvparams, self.op.hvparams,
+ cluster.hvparams[instance.hypervisor],
+ constants.HVS_PARAMETER_TYPES)
# local check
hypervisor.GetHypervisor(
instance.hypervisor).CheckParameterSyntax(hv_new)
# beparams processing
if self.op.beparams:
- i_bedict = copy.deepcopy(instance.beparams)
- for key, val in self.op.beparams.iteritems():
- if val is None:
- try:
- del i_bedict[key]
- except KeyError:
- pass
- else:
- i_bedict[key] = val
- cluster = self.cfg.GetClusterInfo()
- be_new = cluster.FillDict(cluster.beparams[constants.BEGR_DEFAULT],
- i_bedict)
+ i_bedict, be_new = self._GetUpdatedParams(
+ instance.beparams, self.op.beparams,
+ cluster.beparams[constants.PP_DEFAULT],
+ constants.BES_PARAMETER_TYPES)
self.be_new = be_new # the new actual values
self.be_inst = i_bedict # the new dict (without defaults)
else:
- self.hv_new = self.hv_inst = {}
+ self.be_new = self.be_inst = {}
self.warn = []
instance.hypervisor)
nodeinfo = self.rpc.call_node_info(mem_check_list, self.cfg.GetVGName(),
instance.hypervisor)
-
- if pnode not in nodeinfo or not isinstance(nodeinfo[pnode], dict):
+ if nodeinfo[pnode].failed or not isinstance(nodeinfo[pnode].data, dict):
# Assume the primary node is unreachable and go ahead
self.warn.append("Can't get info from primary node %s" % pnode)
else:
- if instance_info:
- current_mem = instance_info['memory']
+ if not instance_info.failed and instance_info.data:
+ current_mem = int(instance_info.data['memory'])
else:
# Assume instance not running
# (there is a slight race condition here, but it's not very probable,
# and we have no other way to check)
current_mem = 0
miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
- nodeinfo[pnode]['memory_free'])
+ nodeinfo[pnode].data['memory_free'])
if miss_mem > 0:
raise errors.OpPrereqError("This change will prevent the instance"
" from starting, due to %d MB of memory"
" missing on its primary node" % miss_mem)
if be_new[constants.BE_AUTO_BALANCE]:
- for node in instance.secondary_nodes:
- if node not in nodeinfo or not isinstance(nodeinfo[node], dict):
+ for node, nres in nodeinfo.iteritems():
+ if node not in instance.secondary_nodes:
+ continue
+ if nres.failed or not isinstance(nres.data, dict):
self.warn.append("Can't get info from secondary node %s" % node)
- elif be_new[constants.BE_MEMORY] > nodeinfo[node]['memory_free']:
+ elif be_new[constants.BE_MEMORY] > nres.data['memory_free']:
self.warn.append("Not enough memory to failover instance to"
" secondary node %s" % node)
+ # NIC processing
+ self.nic_pnew = {}
+ self.nic_pinst = {}
+ for nic_op, nic_dict in self.op.nics:
+ if nic_op == constants.DDM_REMOVE:
+ if not instance.nics:
+ raise errors.OpPrereqError("Instance has no NICs, cannot remove")
+ continue
+ if nic_op != constants.DDM_ADD:
+ # an existing nic
+ if nic_op < 0 or nic_op >= len(instance.nics):
+ raise errors.OpPrereqError("Invalid NIC index %s, valid values"
+ " are 0 to %d" %
+ (nic_op, len(instance.nics)))
+ old_nic_params = instance.nics[nic_op].nicparams
+ old_nic_ip = instance.nics[nic_op].ip
+ else:
+ old_nic_params = {}
+ old_nic_ip = None
+
+ update_params_dict = dict([(key, nic_dict[key])
+ for key in constants.NICS_PARAMETERS
+ if key in nic_dict])
+
+ if 'bridge' in nic_dict:
+ update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
+
+ new_nic_params, new_filled_nic_params = \
+ self._GetUpdatedParams(old_nic_params, update_params_dict,
+ cluster.nicparams[constants.PP_DEFAULT],
+ constants.NICS_PARAMETER_TYPES)
+ objects.NIC.CheckParameterSyntax(new_filled_nic_params)
+ self.nic_pinst[nic_op] = new_nic_params
+ self.nic_pnew[nic_op] = new_filled_nic_params
+ new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
+
+ if new_nic_mode == constants.NIC_MODE_BRIDGED:
+ nic_bridge = new_filled_nic_params[constants.NIC_LINK]
+ result = self.rpc.call_bridges_exist(pnode, [nic_bridge])
+ result.Raise()
+ if not result.data:
+ msg = ("Bridge '%s' doesn't exist on one of"
+ " the instance nodes" % nic_bridge)
+ if self.force:
+ self.warn.append(msg)
+ else:
+ raise errors.OpPrereqError(msg)
+ if new_nic_mode == constants.NIC_MODE_ROUTED:
+ if 'ip' in nic_dict:
+ nic_ip = nic_dict['ip']
+ else:
+ nic_ip = old_nic_ip
+ if nic_ip is None:
+ raise errors.OpPrereqError('Cannot set the nic ip to None'
+ ' on a routed nic')
+ if 'mac' in nic_dict:
+ nic_mac = nic_dict['mac']
+ if nic_mac is None:
+ raise errors.OpPrereqError('Cannot set the nic mac to None')
+ elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
+ # otherwise generate the mac
+ nic_dict['mac'] = self.cfg.GenerateMAC()
+ else:
+ # or validate/reserve the current one
+ if self.cfg.IsMacInUse(nic_mac):
+ raise errors.OpPrereqError("MAC address %s already in use"
+ " in cluster" % nic_mac)
+
+ # DISK processing
+ if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
+ raise errors.OpPrereqError("Disk operations not supported for"
+ " diskless instances")
+ for disk_op, disk_dict in self.op.disks:
+ if disk_op == constants.DDM_REMOVE:
+ if len(instance.disks) == 1:
+ raise errors.OpPrereqError("Cannot remove the last disk of"
+ " an instance")
+ ins_l = self.rpc.call_instance_list([pnode], [instance.hypervisor])
+ ins_l = ins_l[pnode]
+ if ins_l.failed or not isinstance(ins_l.data, list):
+ raise errors.OpPrereqError("Can't contact node '%s'" % pnode)
+ if instance.name in ins_l.data:
+ raise errors.OpPrereqError("Instance is running, can't remove"
+ " disks.")
+
+ if (disk_op == constants.DDM_ADD and
+ len(instance.nics) >= constants.MAX_DISKS):
+ raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
+ " add more" % constants.MAX_DISKS)
+ if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
+ # an existing disk
+ if disk_op < 0 or disk_op >= len(instance.disks):
+ raise errors.OpPrereqError("Invalid disk index %s, valid values"
+ " are 0 to %d" %
+ (disk_op, len(instance.disks)))
+
return
def Exec(self, feedback_fn):
"""Modifies an instance.
All parameters take effect only at the next restart of the instance.
+
"""
# Process here the warnings from CheckPrereq, as we don't have a
# feedback_fn there.
result = []
instance = self.instance
- if self.do_ip:
- instance.nics[0].ip = self.ip
- result.append(("ip", self.ip))
- if self.bridge:
- instance.nics[0].bridge = self.bridge
- result.append(("bridge", self.bridge))
- if self.mac:
- instance.nics[0].mac = self.mac
- result.append(("mac", self.mac))
+ cluster = self.cluster
+ # disk changes
+ for disk_op, disk_dict in self.op.disks:
+ if disk_op == constants.DDM_REMOVE:
+ # remove the last disk
+ device = instance.disks.pop()
+ device_idx = len(instance.disks)
+ for node, disk in device.ComputeNodeTree(instance.primary_node):
+ self.cfg.SetDiskID(disk, node)
+ msg = self.rpc.call_blockdev_remove(node, disk).RemoteFailMsg()
+ if msg:
+ self.LogWarning("Could not remove disk/%d on node %s: %s,"
+ " continuing anyway", device_idx, node, msg)
+ result.append(("disk/%d" % device_idx, "remove"))
+ elif disk_op == constants.DDM_ADD:
+ # add a new disk
+ if instance.disk_template == constants.DT_FILE:
+ file_driver, file_path = instance.disks[0].logical_id
+ file_path = os.path.dirname(file_path)
+ else:
+ file_driver = file_path = None
+ disk_idx_base = len(instance.disks)
+ new_disk = _GenerateDiskTemplate(self,
+ instance.disk_template,
+ instance.name, instance.primary_node,
+ instance.secondary_nodes,
+ [disk_dict],
+ file_path,
+ file_driver,
+ disk_idx_base)[0]
+ instance.disks.append(new_disk)
+ info = _GetInstanceInfoText(instance)
+
+ logging.info("Creating volume %s for instance %s",
+ new_disk.iv_name, instance.name)
+ # Note: this needs to be kept in sync with _CreateDisks
+ #HARDCODE
+ for node in instance.all_nodes:
+ f_create = node == instance.primary_node
+ try:
+ _CreateBlockDev(self, node, instance, new_disk,
+ f_create, info, f_create)
+ except errors.OpExecError, err:
+ self.LogWarning("Failed to create volume %s (%s) on"
+ " node %s: %s",
+ new_disk.iv_name, new_disk, node, err)
+ result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
+ (new_disk.size, new_disk.mode)))
+ else:
+ # change a given disk
+ instance.disks[disk_op].mode = disk_dict['mode']
+ result.append(("disk.mode/%d" % disk_op, disk_dict['mode']))
+ # NIC changes
+ for nic_op, nic_dict in self.op.nics:
+ if nic_op == constants.DDM_REMOVE:
+ # remove the last nic
+ del instance.nics[-1]
+ result.append(("nic.%d" % len(instance.nics), "remove"))
+ elif nic_op == constants.DDM_ADD:
+ # mac and bridge should be set, by now
+ mac = nic_dict['mac']
+ ip = nic_dict.get('ip', None)
+ nicparams = self.nic_pinst[constants.DDM_ADD]
+ new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
+ instance.nics.append(new_nic)
+ result.append(("nic.%d" % (len(instance.nics) - 1),
+ "add:mac=%s,ip=%s,mode=%s,link=%s" %
+ (new_nic.mac, new_nic.ip,
+ self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
+ self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
+ )))
+ else:
+ for key in 'mac', 'ip':
+ if key in nic_dict:
+ setattr(instance.nics[nic_op], key, nic_dict[key])
+ if nic_op in self.nic_pnew:
+ instance.nics[nic_op].nicparams = self.nic_pnew[nic_op]
+ for key, val in nic_dict.iteritems():
+ result.append(("nic.%s/%d" % (key, nic_op), val))
+
+ # hvparams changes
if self.op.hvparams:
- instance.hvparams = self.hv_new
+ instance.hvparams = self.hv_inst
for key, val in self.op.hvparams.iteritems():
result.append(("hv/%s" % key, val))
+
+ # beparams changes
if self.op.beparams:
instance.beparams = self.be_inst
for key, val in self.op.beparams.iteritems():
def Exec(self, feedback_fn):
"""Compute the list of all the exported system images.
- Returns:
- a dictionary with the structure node->(export-list)
- where export-list is a list of the instances exported on
- that node.
+ @rtype: dict
+ @return: a dictionary with the structure node->(export-list)
+ where export-list is a list of the instances exported on
+ that node.
"""
- return self.rpc.call_export_list(self.nodes)
+ rpcresult = self.rpc.call_export_list(self.nodes)
+ result = {}
+ for node in rpcresult:
+ if rpcresult[node].failed:
+ result[node] = False
+ else:
+ result[node] = rpcresult[node].data
+
+ return result
class LUExportInstance(LogicalUnit):
self.instance = self.cfg.GetInstanceInfo(instance_name)
assert self.instance is not None, \
"Cannot retrieve locked instance %s" % self.op.instance_name
+ _CheckNodeOnline(self, self.instance.primary_node)
self.dst_node = self.cfg.GetNodeInfo(
self.cfg.ExpandNodeName(self.op.target_node))
- assert self.dst_node is not None, \
- "Cannot retrieve locked node %s" % self.op.target_node
+ if self.dst_node is None:
+ # This is wrong node name, not a non-locked node
+ raise errors.OpPrereqError("Wrong node name %s" % self.op.target_node)
+ _CheckNodeOnline(self, self.dst_node.name)
+ _CheckNodeNotDrained(self, self.dst_node.name)
# instance disk type verification
for disk in self.instance.disks:
src_node = instance.primary_node
if self.op.shutdown:
# shutdown the instance, but not the disks
- if not self.rpc.call_instance_shutdown(src_node, instance):
- raise errors.OpExecError("Could not shutdown instance %s on node %s" %
- (instance.name, src_node))
+ result = self.rpc.call_instance_shutdown(src_node, instance)
+ msg = result.RemoteFailMsg()
+ if msg:
+ raise errors.OpExecError("Could not shutdown instance %s on"
+ " node %s: %s" %
+ (instance.name, src_node, msg))
vgname = self.cfg.GetVGName()
snap_disks = []
+ # set the disks ID correctly since call_instance_start needs the
+ # correct drbd minor to create the symlinks
+ for disk in instance.disks:
+ self.cfg.SetDiskID(disk, src_node)
+
try:
for disk in instance.disks:
- # new_dev_name will be a snapshot of an lvm leaf of the one we passed
- new_dev_name = self.rpc.call_blockdev_snapshot(src_node, disk)
-
- if not new_dev_name:
- self.LogWarning("Could not snapshot block device %s on node %s",
- disk.logical_id[1], src_node)
+ # result.payload will be a snapshot of an lvm leaf of the one we passed
+ result = self.rpc.call_blockdev_snapshot(src_node, disk)
+ msg = result.RemoteFailMsg()
+ if msg:
+ self.LogWarning("Could not snapshot block device %s on node %s: %s",
+ disk.logical_id[1], src_node, msg)
snap_disks.append(False)
else:
+ disk_id = (vgname, result.payload)
new_dev = objects.Disk(dev_type=constants.LD_LV, size=disk.size,
- logical_id=(vgname, new_dev_name),
- physical_id=(vgname, new_dev_name),
+ logical_id=disk_id, physical_id=disk_id,
iv_name=disk.iv_name)
snap_disks.append(new_dev)
finally:
- if self.op.shutdown and instance.status == "up":
- if not self.rpc.call_instance_start(src_node, instance, None):
+ if self.op.shutdown and instance.admin_up:
+ result = self.rpc.call_instance_start(src_node, instance, None, None)
+ msg = result.RemoteFailMsg()
+ if msg:
_ShutdownInstanceDisks(self, instance)
- raise errors.OpExecError("Could not start instance")
+ raise errors.OpExecError("Could not start instance: %s" % msg)
# TODO: check for size
cluster_name = self.cfg.GetClusterName()
for idx, dev in enumerate(snap_disks):
if dev:
- if not self.rpc.call_snapshot_export(src_node, dev, dst_node.name,
- instance, cluster_name, idx):
+ result = self.rpc.call_snapshot_export(src_node, dev, dst_node.name,
+ instance, cluster_name, idx)
+ msg = result.RemoteFailMsg()
+ if msg:
self.LogWarning("Could not export block device %s from node %s to"
- " node %s", dev.logical_id[1], src_node,
- dst_node.name)
- if not self.rpc.call_blockdev_remove(src_node, dev):
+ " node %s: %s", dev.logical_id[1], src_node,
+ dst_node.name, msg)
+ msg = self.rpc.call_blockdev_remove(src_node, dev).RemoteFailMsg()
+ if msg:
self.LogWarning("Could not remove snapshot block device %s from node"
- " %s", dev.logical_id[1], src_node)
+ " %s: %s", dev.logical_id[1], src_node, msg)
- if not self.rpc.call_finalize_export(dst_node.name, instance, snap_disks):
+ result = self.rpc.call_finalize_export(dst_node.name, instance, snap_disks)
+ if result.failed or not result.data:
self.LogWarning("Could not finalize export for instance %s on node %s",
instance.name, dst_node.name)
if nodelist:
exportlist = self.rpc.call_export_list(nodelist)
for node in exportlist:
- if instance.name in exportlist[node]:
+ if exportlist[node].failed:
+ continue
+ if instance.name in exportlist[node].data:
if not self.rpc.call_export_remove(node, instance.name):
self.LogWarning("Could not remove older export for instance %s"
" on node %s", instance.name, node)
locking.LEVEL_NODE])
found = False
for node in exportlist:
- if instance_name in exportlist[node]:
+ if exportlist[node].failed:
+ self.LogWarning("Failed to query node %s, continuing" % node)
+ continue
+ if instance_name in exportlist[node].data:
found = True
- if not self.rpc.call_export_remove(node, instance_name):
+ result = self.rpc.call_export_remove(node, instance_name)
+ if result.failed or not result.data:
logging.error("Could not remove export for instance %s"
" on node %s", instance_name, node)
if not result:
raise errors.OpExecError("Complete failure from rpc call")
for node, node_result in result.items():
- if not node_result:
+ node_result.Raise()
+ if not node_result.data:
raise errors.OpExecError("Failure during rpc call to node %s,"
- " result: %s" % (node, node_result))
+ " result: %s" % (node, node_result.data))
class IAllocator(object):
"""
_ALLO_KEYS = [
"mem_size", "disks", "disk_template",
- "os", "tags", "nics", "vcpus",
+ "os", "tags", "nics", "vcpus", "hypervisor",
]
_RELO_KEYS = [
"relocate_from",
self.name = name
self.mem_size = self.disks = self.disk_template = None
self.os = self.tags = self.nics = self.vcpus = None
+ self.hypervisor = None
self.relocate_from = None
# computed fields
self.required_nodes = None
cluster_info = cfg.GetClusterInfo()
# cluster data
data = {
- "version": 1,
+ "version": constants.IALLOCATOR_VERSION,
"cluster_name": cfg.GetClusterName(),
"cluster_tags": list(cluster_info.GetTags()),
- "enable_hypervisors": list(cluster_info.enabled_hypervisors),
+ "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
# we don't have job IDs
}
-
- i_list = []
- cluster = self.cfg.GetClusterInfo()
- for iname in cfg.GetInstanceList():
- i_obj = cfg.GetInstanceInfo(iname)
- i_list.append((i_obj, cluster.FillBE(i_obj)))
+ iinfo = cfg.GetAllInstancesInfo().values()
+ i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
# node data
node_results = {}
node_list = cfg.GetNodeList()
- # FIXME: here we have only one hypervisor information, but
- # instance can belong to different hypervisors
+
+ if self.mode == constants.IALLOCATOR_MODE_ALLOC:
+ hypervisor_name = self.hypervisor
+ elif self.mode == constants.IALLOCATOR_MODE_RELOC:
+ hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
+
node_data = self.lu.rpc.call_node_info(node_list, cfg.GetVGName(),
- cfg.GetHypervisorType())
- for nname in node_list:
+ hypervisor_name)
+ node_iinfo = self.lu.rpc.call_all_instances_info(node_list,
+ cluster_info.enabled_hypervisors)
+ for nname, nresult in node_data.items():
+ # first fill in static (config-based) values
ninfo = cfg.GetNodeInfo(nname)
- if nname not in node_data or not isinstance(node_data[nname], dict):
- raise errors.OpExecError("Can't get data for node %s" % nname)
- remote_info = node_data[nname]
- for attr in ['memory_total', 'memory_free', 'memory_dom0',
- 'vg_size', 'vg_free', 'cpu_total']:
- if attr not in remote_info:
- raise errors.OpExecError("Node '%s' didn't return attribute '%s'" %
- (nname, attr))
- try:
- remote_info[attr] = int(remote_info[attr])
- except ValueError, err:
- raise errors.OpExecError("Node '%s' returned invalid value for '%s':"
- " %s" % (nname, attr, str(err)))
- # compute memory used by primary instances
- i_p_mem = i_p_up_mem = 0
- for iinfo, beinfo in i_list:
- if iinfo.primary_node == nname:
- i_p_mem += beinfo[constants.BE_MEMORY]
- if iinfo.status == "up":
- i_p_up_mem += beinfo[constants.BE_MEMORY]
-
- # compute memory used by instances
pnr = {
"tags": list(ninfo.GetTags()),
- "total_memory": remote_info['memory_total'],
- "reserved_memory": remote_info['memory_dom0'],
- "free_memory": remote_info['memory_free'],
- "i_pri_memory": i_p_mem,
- "i_pri_up_memory": i_p_up_mem,
- "total_disk": remote_info['vg_size'],
- "free_disk": remote_info['vg_free'],
"primary_ip": ninfo.primary_ip,
"secondary_ip": ninfo.secondary_ip,
- "total_cpus": remote_info['cpu_total'],
+ "offline": ninfo.offline,
+ "drained": ninfo.drained,
+ "master_candidate": ninfo.master_candidate,
}
+
+ if not ninfo.offline:
+ nresult.Raise()
+ if not isinstance(nresult.data, dict):
+ raise errors.OpExecError("Can't get data for node %s" % nname)
+ remote_info = nresult.data
+ for attr in ['memory_total', 'memory_free', 'memory_dom0',
+ 'vg_size', 'vg_free', 'cpu_total']:
+ if attr not in remote_info:
+ raise errors.OpExecError("Node '%s' didn't return attribute"
+ " '%s'" % (nname, attr))
+ try:
+ remote_info[attr] = int(remote_info[attr])
+ except ValueError, err:
+ raise errors.OpExecError("Node '%s' returned invalid value"
+ " for '%s': %s" % (nname, attr, err))
+ # compute memory used by primary instances
+ i_p_mem = i_p_up_mem = 0
+ for iinfo, beinfo in i_list:
+ if iinfo.primary_node == nname:
+ i_p_mem += beinfo[constants.BE_MEMORY]
+ if iinfo.name not in node_iinfo[nname].data:
+ i_used_mem = 0
+ else:
+ i_used_mem = int(node_iinfo[nname].data[iinfo.name]['memory'])
+ i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
+ remote_info['memory_free'] -= max(0, i_mem_diff)
+
+ if iinfo.admin_up:
+ i_p_up_mem += beinfo[constants.BE_MEMORY]
+
+ # compute memory used by instances
+ pnr_dyn = {
+ "total_memory": remote_info['memory_total'],
+ "reserved_memory": remote_info['memory_dom0'],
+ "free_memory": remote_info['memory_free'],
+ "total_disk": remote_info['vg_size'],
+ "free_disk": remote_info['vg_free'],
+ "total_cpus": remote_info['cpu_total'],
+ "i_pri_memory": i_p_mem,
+ "i_pri_up_memory": i_p_up_mem,
+ }
+ pnr.update(pnr_dyn)
+
node_results[nname] = pnr
data["nodes"] = node_results
# instance data
instance_data = {}
for iinfo, beinfo in i_list:
- nic_data = [{"mac": n.mac, "ip": n.ip, "bridge": n.bridge}
- for n in iinfo.nics]
+ nic_data = []
+ for nic in iinfo.nics:
+ filled_params = objects.FillDict(
+ cluster_info.nicparams[constants.PP_DEFAULT],
+ nic.nicparams)
+ nic_dict = {"mac": nic.mac,
+ "ip": nic.ip,
+ "mode": filled_params[constants.NIC_MODE],
+ "link": filled_params[constants.NIC_LINK],
+ }
+ if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
+ nic_dict["bridge"] = filled_params[constants.NIC_LINK]
+ nic_data.append(nic_dict)
pir = {
"tags": list(iinfo.GetTags()),
- "should_run": iinfo.status == "up",
+ "admin_up": iinfo.admin_up,
"vcpus": beinfo[constants.BE_VCPUS],
"memory": beinfo[constants.BE_MEMORY],
"os": iinfo.os,
"nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
"nics": nic_data,
- "disks": [{"size": dsk.size, "mode": "w"} for dsk in iinfo.disks],
+ "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks],
"disk_template": iinfo.disk_template,
"hypervisor": iinfo.hypervisor,
}
+ pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
+ pir["disks"])
instance_data[iinfo.name] = pir
data["instances"] = instance_data
"""
data = self.in_data
- if len(self.disks) != 2:
- raise errors.OpExecError("Only two-disk configurations supported")
- disk_space = _ComputeDiskSize(self.disk_template,
- self.disks[0]["size"], self.disks[1]["size"])
+ disk_space = _ComputeDiskSize(self.disk_template, self.disks)
if self.disk_template in constants.DTS_NET_MIRROR:
self.required_nodes = 2
raise errors.OpPrereqError("Instance has not exactly one secondary node")
self.required_nodes = 1
-
- disk_space = _ComputeDiskSize(instance.disk_template,
- instance.disks[0].size,
- instance.disks[1].size)
+ disk_sizes = [{'size': disk.size} for disk in instance.disks]
+ disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
request = {
"type": "relocate",
data = self.in_text
result = call_fn(self.lu.cfg.GetMasterNode(), name, self.in_text)
+ result.Raise()
- if not isinstance(result, (list, tuple)) or len(result) != 4:
+ if not isinstance(result.data, (list, tuple)) or len(result.data) != 4:
raise errors.OpExecError("Invalid result from master iallocator runner")
- rcode, stdout, stderr, fail = result
+ rcode, stdout, stderr, fail = result.data
if rcode == constants.IARUN_NOTFOUND:
raise errors.OpExecError("Can't find allocator '%s'" % name)
" 'nics' parameter")
if not isinstance(self.op.disks, list):
raise errors.OpPrereqError("Invalid parameter 'disks'")
- if len(self.op.disks) != 2:
- raise errors.OpPrereqError("Only two-disk configurations supported")
for row in self.op.disks:
if (not isinstance(row, dict) or
"size" not in row or
row["mode"] not in ['r', 'w']):
raise errors.OpPrereqError("Invalid contents of the"
" 'disks' parameter")
+ if not hasattr(self.op, "hypervisor") or self.op.hypervisor is None:
+ self.op.hypervisor = self.cfg.GetHypervisorType()
elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
if not hasattr(self.op, "name"):
raise errors.OpPrereqError("Missing attribute 'name' on opcode input")
tags=self.op.tags,
nics=self.op.nics,
vcpus=self.op.vcpus,
+ hypervisor=self.op.hypervisor,
)
else:
ial = IAllocator(self,