import re
import platform
import logging
+import copy
+import random
-from ganeti import rpc
from ganeti import ssh
-from ganeti import logger
from ganeti import utils
from ganeti import errors
from ganeti import hypervisor
from ganeti import objects
from ganeti import opcodes
from ganeti import serializer
+from ganeti import ssconf
class LogicalUnit(object):
- implement BuildHooksEnv
- redefine HPATH and HTYPE
- optionally redefine their run requirements:
- REQ_MASTER: the LU needs to run on the master node
- REQ_WSSTORE: the LU needs a writable SimpleStore
REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
Note that all commands require root permissions.
HPATH = None
HTYPE = None
_OP_REQP = []
- REQ_MASTER = True
- REQ_WSSTORE = False
REQ_BGL = True
- def __init__(self, processor, op, context, sstore):
+ def __init__(self, processor, op, context, rpc):
"""Constructor for LogicalUnit.
This needs to be overriden in derived classes in order to check op
self.proc = processor
self.op = op
self.cfg = context.cfg
- self.sstore = sstore
self.context = context
+ self.rpc = rpc
# Dicts used to declare locking needs to mcpu
self.needed_locks = None
self.acquired_locks = {}
# Used to force good behavior when calling helper functions
self.recalculate_locks = {}
self.__ssh = None
+ # logging
+ self.LogWarning = processor.LogWarning
+ self.LogInfo = processor.LogInfo
for attr_name in self._OP_REQP:
attr_val = getattr(op, attr_name, None)
if attr_val is None:
raise errors.OpPrereqError("Required parameter '%s' missing" %
attr_name)
-
- if not self.cfg.IsCluster():
- raise errors.OpPrereqError("Cluster not initialized yet,"
- " use 'gnt-cluster init' first.")
- if self.REQ_MASTER:
- master = sstore.GetMasterNode()
- if master != utils.HostInfo().name:
- raise errors.OpPrereqError("Commands must be run on the master"
- " node %s" % master)
+ self.CheckArguments()
def __GetSSH(self):
"""Returns the SshRunner object
"""
if not self.__ssh:
- self.__ssh = ssh.SshRunner(self.cfg)
+ self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
return self.__ssh
ssh = property(fget=__GetSSH)
+ def CheckArguments(self):
+ """Check syntactic validity for the opcode arguments.
+
+ This method is for doing a simple syntactic check and ensure
+ validity of opcode parameters, without any cluster-related
+ checks. While the same can be accomplished in ExpandNames and/or
+ CheckPrereq, doing these separate is better because:
+
+ - ExpandNames is left as as purely a lock-related function
+ - CheckPrereq is run after we have aquired locks (and possible
+ waited for them)
+
+ The function is allowed to change the self.op attribute so that
+ later methods can no longer worry about missing parameters.
+
+ """
+ pass
+
def ExpandNames(self):
"""Expand names for this LU.
LUs which implement this method must also populate the self.needed_locks
member, as a dict with lock levels as keys, and a list of needed lock names
as values. Rules:
- - Use an empty dict if you don't need any lock
- - If you don't need any lock at a particular level omit that level
- - Don't put anything for the BGL level
- - If you want all locks at a level use locking.ALL_SET as a value
+
+ - use an empty dict if you don't need any lock
+ - if you don't need any lock at a particular level omit that level
+ - don't put anything for the BGL level
+ - if you want all locks at a level use locking.ALL_SET as a value
If you need to share locks (rather than acquire them exclusively) at one
level you can modify self.share_locks, setting a true value (usually 1) for
that level. By default locks are not shared.
- Examples:
- # Acquire all nodes and one instance
- self.needed_locks = {
- locking.LEVEL_NODE: locking.ALL_SET,
- locking.LEVEL_INSTANCE: ['instance1.example.tld'],
- }
- # Acquire just two nodes
- self.needed_locks = {
- locking.LEVEL_NODE: ['node1.example.tld', 'node2.example.tld'],
- }
- # Acquire no locks
- self.needed_locks = {} # No, you can't leave it to the default value None
+ Examples::
+
+ # Acquire all nodes and one instance
+ self.needed_locks = {
+ locking.LEVEL_NODE: locking.ALL_SET,
+ locking.LEVEL_INSTANCE: ['instance1.example.tld'],
+ }
+ # Acquire just two nodes
+ self.needed_locks = {
+ locking.LEVEL_NODE: ['node1.example.tld', 'node2.example.tld'],
+ }
+ # Acquire no locks
+ self.needed_locks = {} # No, you can't leave it to the default value None
"""
# The implementation of this method is mandatory only if the new LU is
previous result is passed back unchanged but any LU can define it if it
wants to use the local cluster hook-scripts somehow.
- Args:
- phase: the hooks phase that has just been run
- hooks_results: the results of the multi-node hooks rpc call
- feedback_fn: function to send feedback back to the caller
- lu_result: the previous result this LU had, or None in the PRE phase.
+ @param phase: one of L{constants.HOOKS_PHASE_POST} or
+ L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
+ @param hook_results: the results of the multi-node hooks rpc call
+ @param feedback_fn: function used send feedback back to the caller
+ @param lu_result: the previous Exec result this LU had, or None
+ in the PRE phase
+ @return: the new Exec result, based on the previous result
+ and hook results
"""
return lu_result
In the future it may grow parameters to just lock some instance's nodes, or
to just lock primaries or secondary nodes, if needed.
- If should be called in DeclareLocks in a way similar to:
+ If should be called in DeclareLocks in a way similar to::
- if level == locking.LEVEL_NODE:
- self._LockInstancesNodes()
+ if level == locking.LEVEL_NODE:
+ self._LockInstancesNodes()
@type primary_only: boolean
@param primary_only: only lock primary nodes of locked instances
def _GetWantedNodes(lu, nodes):
"""Returns list of checked and expanded node names.
- Args:
- nodes: List of nodes (strings) or None for all
+ @type lu: L{LogicalUnit}
+ @param lu: the logical unit on whose behalf we execute
+ @type nodes: list
+ @param nodes: list of node names or None for all nodes
+ @rtype: list
+ @return: the list of nodes, sorted
+ @raise errors.OpProgrammerError: if the nodes parameter is wrong type
"""
if not isinstance(nodes, list):
def _GetWantedInstances(lu, instances):
"""Returns list of checked and expanded instance names.
- Args:
- instances: List of instances (strings) or None for all
+ @type lu: L{LogicalUnit}
+ @param lu: the logical unit on whose behalf we execute
+ @type instances: list
+ @param instances: list of instance names or None for all instances
+ @rtype: list
+ @return: the list of instances, sorted
+ @raise errors.OpPrereqError: if the instances parameter is wrong type
+ @raise errors.OpPrereqError: if any of the passed instances is not found
"""
if not isinstance(instances, list):
def _CheckOutputFields(static, dynamic, selected):
"""Checks whether all selected fields are valid.
- Args:
- static: Static fields
- dynamic: Dynamic fields
+ @type static: L{utils.FieldSet}
+ @param static: static fields set
+ @type dynamic: L{utils.FieldSet}
+ @param dynamic: dynamic fields set
"""
- static_fields = frozenset(static)
- dynamic_fields = frozenset(dynamic)
+ f = utils.FieldSet()
+ f.Extend(static)
+ f.Extend(dynamic)
- all_fields = static_fields | dynamic_fields
-
- if not all_fields.issuperset(selected):
+ delta = f.NonMatching(selected)
+ if delta:
raise errors.OpPrereqError("Unknown output fields selected: %s"
- % ",".join(frozenset(selected).
- difference(all_fields)))
+ % ",".join(delta))
def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
memory, vcpus, nics):
- """Builds instance related env variables for hooks from single variables.
+ """Builds instance related env variables for hooks
+
+ This builds the hook environment from individual variables.
+
+ @type name: string
+ @param name: the name of the instance
+ @type primary_node: string
+ @param primary_node: the name of the instance's primary node
+ @type secondary_nodes: list
+ @param secondary_nodes: list of secondary nodes as strings
+ @type os_type: string
+ @param os_type: the name of the instance's OS
+ @type status: string
+ @param status: the desired status of the instances
+ @type memory: string
+ @param memory: the memory size of the instance
+ @type vcpus: string
+ @param vcpus: the count of VCPUs the instance has
+ @type nics: list
+ @param nics: list of tuples (ip, bridge, mac) representing
+ the NICs the instance has
+ @rtype: dict
+ @return: the hook environment for this instance
- Args:
- secondary_nodes: List of secondary nodes as strings
"""
env = {
"OP_TARGET": name,
return env
-def _BuildInstanceHookEnvByObject(instance, override=None):
+def _BuildInstanceHookEnvByObject(lu, instance, override=None):
"""Builds instance related env variables for hooks from an object.
- Args:
- instance: objects.Instance object of instance
- override: dict of values to override
+ @type lu: L{LogicalUnit}
+ @param lu: the logical unit on whose behalf we execute
+ @type instance: L{objects.Instance}
+ @param instance: the instance for which we should build the
+ environment
+ @type override: dict
+ @param override: dictionary with key/values that will override
+ our values
+ @rtype: dict
+ @return: the hook environment dictionary
+
"""
+ bep = lu.cfg.GetClusterInfo().FillBE(instance)
args = {
'name': instance.name,
'primary_node': instance.primary_node,
'secondary_nodes': instance.secondary_nodes,
'os_type': instance.os,
'status': instance.os,
- 'memory': instance.memory,
- 'vcpus': instance.vcpus,
+ 'memory': bep[constants.BE_MEMORY],
+ 'vcpus': bep[constants.BE_VCPUS],
'nics': [(nic.ip, nic.bridge, nic.mac) for nic in instance.nics],
}
if override:
return _BuildInstanceHookEnv(**args)
-def _CheckInstanceBridgesExist(instance):
+def _CheckInstanceBridgesExist(lu, instance):
"""Check that the brigdes needed by an instance exist.
"""
# check bridges existance
brlist = [nic.bridge for nic in instance.nics]
- if not rpc.call_bridges_exist(instance.primary_node, brlist):
- raise errors.OpPrereqError("one or more target bridges %s does not"
+ result = lu.rpc.call_bridges_exist(instance.primary_node, brlist)
+ result.Raise()
+ if not result.data:
+ raise errors.OpPrereqError("One or more target bridges %s does not"
" exist on destination node '%s'" %
(brlist, instance.primary_node))
Any errors are signalled by raising errors.OpPrereqError.
"""
- master = self.sstore.GetMasterNode()
+ master = self.cfg.GetMasterNode()
nodelist = self.cfg.GetNodeList()
if len(nodelist) != 1 or nodelist[0] != master:
"""Destroys the cluster.
"""
- master = self.sstore.GetMasterNode()
- if not rpc.call_node_stop_master(master, False):
+ master = self.cfg.GetMasterNode()
+ result = self.rpc.call_node_stop_master(master, False)
+ result.Raise()
+ if not result.data:
raise errors.OpExecError("Could not disable the master role")
priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
utils.CreateBackup(priv_key)
}
self.share_locks = dict(((i, 1) for i in locking.LEVELS))
- def _VerifyNode(self, node, file_list, local_cksum, vglist, node_result,
- remote_version, feedback_fn):
+ def _VerifyNode(self, nodeinfo, file_list, local_cksum,
+ node_result, feedback_fn, master_files):
"""Run multiple tests against a node.
Test list:
+
- compares ganeti version
- checks vg existance and size > 20G
- checks config file checksum
- checks ssh to other nodes
- Args:
- node: name of the node to check
- file_list: required list of files
- local_cksum: dictionary of local files and their checksums
+ @type nodeinfo: L{objects.Node}
+ @param nodeinfo: the node to check
+ @param file_list: required list of files
+ @param local_cksum: dictionary of local files and their checksums
+ @param node_result: the results from the node
+ @param feedback_fn: function used to accumulate results
+ @param master_files: list of files that only masters should have
"""
+ node = nodeinfo.name
+
+ # main result, node_result should be a non-empty dict
+ if not node_result or not isinstance(node_result, dict):
+ feedback_fn(" - ERROR: unable to verify node %s." % (node,))
+ return True
+
# compares ganeti version
local_version = constants.PROTOCOL_VERSION
+ remote_version = node_result.get('version', None)
if not remote_version:
feedback_fn(" - ERROR: connection to %s failed" % (node))
return True
# checks vg existance and size > 20G
bad = False
+ vglist = node_result.get(constants.NV_VGLIST, None)
if not vglist:
feedback_fn(" - ERROR: unable to check volume groups on node %s." %
(node,))
bad = True
# checks config file checksum
- # checks ssh to any
- if 'filelist' not in node_result:
+ remote_cksum = node_result.get(constants.NV_FILELIST, None)
+ if not isinstance(remote_cksum, dict):
bad = True
feedback_fn(" - ERROR: node hasn't returned file checksum data")
else:
- remote_cksum = node_result['filelist']
for file_name in file_list:
+ node_is_mc = nodeinfo.master_candidate
+ must_have_file = file_name not in master_files
if file_name not in remote_cksum:
- bad = True
- feedback_fn(" - ERROR: file '%s' missing" % file_name)
+ if node_is_mc or must_have_file:
+ bad = True
+ feedback_fn(" - ERROR: file '%s' missing" % file_name)
elif remote_cksum[file_name] != local_cksum[file_name]:
- bad = True
- feedback_fn(" - ERROR: file '%s' has wrong checksum" % file_name)
+ if node_is_mc or must_have_file:
+ bad = True
+ feedback_fn(" - ERROR: file '%s' has wrong checksum" % file_name)
+ else:
+ # not candidate and this is not a must-have file
+ bad = True
+ feedback_fn(" - ERROR: non master-candidate has old/wrong file"
+ " '%s'" % file_name)
+ else:
+ # all good, except non-master/non-must have combination
+ if not node_is_mc and not must_have_file:
+ feedback_fn(" - ERROR: file '%s' should not exist on non master"
+ " candidates" % file_name)
- if 'nodelist' not in node_result:
+ # checks ssh to any
+
+ if constants.NV_NODELIST not in node_result:
bad = True
feedback_fn(" - ERROR: node hasn't returned node ssh connectivity data")
else:
- if node_result['nodelist']:
+ if node_result[constants.NV_NODELIST]:
bad = True
- for node in node_result['nodelist']:
+ for node in node_result[constants.NV_NODELIST]:
feedback_fn(" - ERROR: ssh communication with node '%s': %s" %
- (node, node_result['nodelist'][node]))
- if 'node-net-test' not in node_result:
+ (node, node_result[constants.NV_NODELIST][node]))
+
+ if constants.NV_NODENETTEST not in node_result:
bad = True
feedback_fn(" - ERROR: node hasn't returned node tcp connectivity data")
else:
- if node_result['node-net-test']:
+ if node_result[constants.NV_NODENETTEST]:
bad = True
- nlist = utils.NiceSort(node_result['node-net-test'].keys())
+ nlist = utils.NiceSort(node_result[constants.NV_NODENETTEST].keys())
for node in nlist:
feedback_fn(" - ERROR: tcp communication with node '%s': %s" %
- (node, node_result['node-net-test'][node]))
-
- hyp_result = node_result.get('hypervisor', None)
- if hyp_result is not None:
- feedback_fn(" - ERROR: hypervisor verify failure: '%s'" % hyp_result)
+ (node, node_result[constants.NV_NODENETTEST][node]))
+
+ hyp_result = node_result.get(constants.NV_HYPERVISOR, None)
+ if isinstance(hyp_result, dict):
+ for hv_name, hv_result in hyp_result.iteritems():
+ if hv_result is not None:
+ feedback_fn(" - ERROR: hypervisor %s verify failure: '%s'" %
+ (hv_name, hv_result))
return bad
def _VerifyInstance(self, instance, instanceconfig, node_vol_is,
for prinode, instances in nodeinfo['sinst-by-pnode'].iteritems():
needed_mem = 0
for instance in instances:
- needed_mem += instance_cfg[instance].memory
+ bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance])
+ if bep[constants.BE_AUTO_BALANCE]:
+ needed_mem += bep[constants.BE_MEMORY]
if nodeinfo['mfree'] < needed_mem:
feedback_fn(" - ERROR: not enough memory on node %s to accomodate"
" failovers should node %s fail" % (node, prinode))
feedback_fn(" - ERROR: %s" % msg)
vg_name = self.cfg.GetVGName()
+ hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
nodelist = utils.NiceSort(self.cfg.GetNodeList())
nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
instancelist = utils.NiceSort(self.cfg.GetInstanceList())
i_non_redundant = [] # Non redundant instances
+ i_non_a_balanced = [] # Non auto-balanced instances
node_volume = {}
node_instance = {}
node_info = {}
# FIXME: verify OS list
# do local checksums
- file_names = list(self.sstore.GetFileList())
+ master_files = [constants.CLUSTER_CONF_FILE]
+
+ file_names = ssconf.SimpleStore().GetFileList()
file_names.append(constants.SSL_CERT_FILE)
- file_names.append(constants.CLUSTER_CONF_FILE)
+ file_names.extend(master_files)
+
local_checksums = utils.FingerprintFiles(file_names)
feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
- all_volumeinfo = rpc.call_volume_list(nodelist, vg_name)
- all_instanceinfo = rpc.call_instance_list(nodelist)
- all_vglist = rpc.call_vg_list(nodelist)
node_verify_param = {
- 'filelist': file_names,
- 'nodelist': nodelist,
- 'hypervisor': None,
- 'node-net-test': [(node.name, node.primary_ip, node.secondary_ip)
- for node in nodeinfo]
+ constants.NV_FILELIST: file_names,
+ constants.NV_NODELIST: nodelist,
+ constants.NV_HYPERVISOR: hypervisors,
+ constants.NV_NODENETTEST: [(node.name, node.primary_ip,
+ node.secondary_ip) for node in nodeinfo],
+ constants.NV_LVLIST: vg_name,
+ constants.NV_INSTANCELIST: hypervisors,
+ constants.NV_VGLIST: None,
+ constants.NV_VERSION: None,
+ constants.NV_HVINFO: self.cfg.GetHypervisorType(),
}
- all_nvinfo = rpc.call_node_verify(nodelist, node_verify_param)
- all_rversion = rpc.call_version(nodelist)
- all_ninfo = rpc.call_node_info(nodelist, self.cfg.GetVGName())
+ all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
+ self.cfg.GetClusterName())
+
+ cluster = self.cfg.GetClusterInfo()
+ master_node = self.cfg.GetMasterNode()
+ for node_i in nodeinfo:
+ node = node_i.name
+ nresult = all_nvinfo[node].data
+
+ if node == master_node:
+ ntype = "master"
+ elif node_i.master_candidate:
+ ntype = "master candidate"
+ else:
+ ntype = "regular"
+ feedback_fn("* Verifying node %s (%s)" % (node, ntype))
- for node in nodelist:
- feedback_fn("* Verifying node %s" % node)
- result = self._VerifyNode(node, file_names, local_checksums,
- all_vglist[node], all_nvinfo[node],
- all_rversion[node], feedback_fn)
- bad = bad or result
+ if all_nvinfo[node].failed or not isinstance(nresult, dict):
+ feedback_fn(" - ERROR: connection to %s failed" % (node,))
+ bad = True
+ continue
- # node_volume
- volumeinfo = all_volumeinfo[node]
+ result = self._VerifyNode(node_i, file_names, local_checksums,
+ nresult, feedback_fn, master_files)
+ bad = bad or result
- if isinstance(volumeinfo, basestring):
+ lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
+ if isinstance(lvdata, basestring):
feedback_fn(" - ERROR: LVM problem on node %s: %s" %
- (node, volumeinfo[-400:].encode('string_escape')))
+ (node, lvdata.encode('string_escape')))
bad = True
node_volume[node] = {}
- elif not isinstance(volumeinfo, dict):
- feedback_fn(" - ERROR: connection to %s failed" % (node,))
+ elif not isinstance(lvdata, dict):
+ feedback_fn(" - ERROR: connection to %s failed (lvlist)" % (node,))
bad = True
continue
else:
- node_volume[node] = volumeinfo
+ node_volume[node] = lvdata
# node_instance
- nodeinstance = all_instanceinfo[node]
- if type(nodeinstance) != list:
- feedback_fn(" - ERROR: connection to %s failed" % (node,))
+ idata = nresult.get(constants.NV_INSTANCELIST, None)
+ if not isinstance(idata, list):
+ feedback_fn(" - ERROR: connection to %s failed (instancelist)" %
+ (node,))
bad = True
continue
- node_instance[node] = nodeinstance
+ node_instance[node] = idata
# node_info
- nodeinfo = all_ninfo[node]
+ nodeinfo = nresult.get(constants.NV_HVINFO, None)
if not isinstance(nodeinfo, dict):
- feedback_fn(" - ERROR: connection to %s failed" % (node,))
+ feedback_fn(" - ERROR: connection to %s failed (hvinfo)" % (node,))
bad = True
continue
try:
node_info[node] = {
"mfree": int(nodeinfo['memory_free']),
- "dfree": int(nodeinfo['vg_free']),
+ "dfree": int(nresult[constants.NV_VGLIST][vg_name]),
"pinst": [],
"sinst": [],
# dictionary holding all instances this node is secondary for,
feedback_fn(" - WARNING: multiple secondaries for instance %s"
% instance)
+ if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
+ i_non_a_balanced.append(instance)
+
for snode in inst_config.secondary_nodes:
if snode in node_info:
node_info[snode]['sinst'].append(instance)
feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
% len(i_non_redundant))
+ if i_non_a_balanced:
+ feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
+ % len(i_non_a_balanced))
+
return not bad
def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
- """Analize the post-hooks' result, handle it, and send some
+ """Analize the post-hooks' result
+
+ This method analyses the hook result, handles it, and sends some
nicely-formatted feedback back to the user.
- Args:
- phase: the hooks phase that has just been run
- hooks_results: the results of the multi-node hooks rpc call
- feedback_fn: function to send feedback back to the caller
- lu_result: previous Exec result
+ @param phase: one of L{constants.HOOKS_PHASE_POST} or
+ L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
+ @param hooks_results: the results of the multi-node hooks rpc call
+ @param feedback_fn: function used send feedback back to the caller
+ @param lu_result: previous Exec result
+ @return: the new Exec result, based on the previous result
+ and hook results
"""
# We only really run POST phase hooks, and are only interested in
for node_name in hooks_results:
show_node_header = True
res = hooks_results[node_name]
- if res is False or not isinstance(res, list):
- feedback_fn(" Communication failure")
+ if res.failed or res.data is False or not isinstance(res.data, list):
+ feedback_fn(" Communication failure in hooks execution")
lu_result = 1
continue
- for script, hkr, output in res:
+ for script, hkr, output in res.data:
if hkr == constants.HKR_FAIL:
# The node header is only shown once, if there are
# failing hooks on that node
if not nv_dict:
return result
- node_lvs = rpc.call_volume_list(nodes, vg_name)
+ node_lvs = self.rpc.call_volume_list(nodes, vg_name)
to_act = set()
for node in nodes:
# node_volume
lvs = node_lvs[node]
-
+ if lvs.failed:
+ self.LogWarning("Connection to node %s failed: %s" %
+ (node, lvs.data))
+ continue
+ lvs = lvs.data
if isinstance(lvs, basestring):
- logger.Info("error enumerating LVs on node %s: %s" % (node, lvs))
+ logging.warning("Error enumerating LVs on node %s: %s", node, lvs)
res_nlvm[node] = lvs
elif not isinstance(lvs, dict):
- logger.Info("connection to node %s failed or invalid data returned" %
- (node,))
+ logging.warning("Connection to node %s failed or invalid data"
+ " returned", node)
res_nodes.append(node)
continue
HPATH = "cluster-rename"
HTYPE = constants.HTYPE_CLUSTER
_OP_REQP = ["name"]
- REQ_WSSTORE = True
def BuildHooksEnv(self):
"""Build hooks env.
"""
env = {
- "OP_TARGET": self.sstore.GetClusterName(),
+ "OP_TARGET": self.cfg.GetClusterName(),
"NEW_NAME": self.op.name,
}
- mn = self.sstore.GetMasterNode()
+ mn = self.cfg.GetMasterNode()
return env, [mn], [mn]
def CheckPrereq(self):
new_name = hostname.name
self.ip = new_ip = hostname.ip
- old_name = self.sstore.GetClusterName()
- old_ip = self.sstore.GetMasterIP()
+ old_name = self.cfg.GetClusterName()
+ old_ip = self.cfg.GetMasterIP()
if new_name == old_name and new_ip == old_ip:
raise errors.OpPrereqError("Neither the name nor the IP address of the"
" cluster has changed")
"""
clustername = self.op.name
ip = self.ip
- ss = self.sstore
# shutdown the master IP
- master = ss.GetMasterNode()
- if not rpc.call_node_stop_master(master, False):
+ master = self.cfg.GetMasterNode()
+ result = self.rpc.call_node_stop_master(master, False)
+ if result.failed or not result.data:
raise errors.OpExecError("Could not disable the master role")
try:
- # modify the sstore
- ss.SetKey(ss.SS_MASTER_IP, ip)
- ss.SetKey(ss.SS_CLUSTER_NAME, clustername)
-
- # Distribute updated ss config to all nodes
- myself = self.cfg.GetNodeInfo(master)
- dist_nodes = self.cfg.GetNodeList()
- if myself.name in dist_nodes:
- dist_nodes.remove(myself.name)
-
- logger.Debug("Copying updated ssconf data to all nodes")
- for keyname in [ss.SS_CLUSTER_NAME, ss.SS_MASTER_IP]:
- fname = ss.KeyToFilename(keyname)
- result = rpc.call_upload_file(dist_nodes, fname)
- for to_node in dist_nodes:
- if not result[to_node]:
- logger.Error("copy of file %s to node %s failed" %
- (fname, to_node))
+ cluster = self.cfg.GetClusterInfo()
+ cluster.cluster_name = clustername
+ cluster.master_ip = ip
+ self.cfg.Update(cluster)
+
+ # update the known hosts file
+ ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
+ node_list = self.cfg.GetNodeList()
+ try:
+ node_list.remove(master)
+ except ValueError:
+ pass
+ result = self.rpc.call_upload_file(node_list,
+ constants.SSH_KNOWN_HOSTS_FILE)
+ for to_node, to_result in result.iteritems():
+ if to_result.failed or not to_result.data:
+ logging.error("Copy of file %s to node %s failed", fname, to_node)
+
finally:
- if not rpc.call_node_start_master(master, False):
- logger.Error("Could not re-enable the master role on the master,"
- " please restart manually.")
+ result = self.rpc.call_node_start_master(master, False)
+ if result.failed or not result.data:
+ self.LogWarning("Could not re-enable the master role on"
+ " the master, please restart manually.")
def _RecursiveCheckIfLVMBased(disk):
"""Check if the given disk or its children are lvm-based.
- Args:
- disk: ganeti.objects.Disk object
-
- Returns:
- boolean indicating whether a LD_LV dev_type was found or not
+ @type disk: L{objects.Disk}
+ @param disk: the disk to check
+ @rtype: booleean
+ @return: boolean indicating whether a LD_LV dev_type was found or not
"""
if disk.children:
_OP_REQP = []
REQ_BGL = False
+ def CheckParameters(self):
+ """Check parameters
+
+ """
+ if not hasattr(self.op, "candidate_pool_size"):
+ self.op.candidate_pool_size = None
+ if self.op.candidate_pool_size is not None:
+ try:
+ self.op.candidate_pool_size = int(self.op.candidate_pool_size)
+ except ValueError, err:
+ raise errors.OpPrereqError("Invalid candidate_pool_size value: %s" %
+ str(err))
+ if self.op.candidate_pool_size < 1:
+ raise errors.OpPrereqError("At least one master candidate needed")
+
def ExpandNames(self):
# FIXME: in the future maybe other cluster params won't require checking on
# all nodes to be modified.
"""
env = {
- "OP_TARGET": self.sstore.GetClusterName(),
+ "OP_TARGET": self.cfg.GetClusterName(),
"NEW_VG_NAME": self.op.vg_name,
}
- mn = self.sstore.GetMasterNode()
+ mn = self.cfg.GetMasterNode()
return env, [mn], [mn]
def CheckPrereq(self):
"""
# FIXME: This only works because there is only one parameter that can be
# changed or removed.
- if not self.op.vg_name:
+ if self.op.vg_name is not None and not self.op.vg_name:
instances = self.cfg.GetAllInstancesInfo().values()
for inst in instances:
for disk in inst.disks:
raise errors.OpPrereqError("Cannot disable lvm storage while"
" lvm-based instances exist")
+ node_list = self.acquired_locks[locking.LEVEL_NODE]
+
# if vg_name not None, checks given volume group on all nodes
if self.op.vg_name:
- node_list = self.acquired_locks[locking.LEVEL_NODE]
- vglist = rpc.call_vg_list(node_list)
+ vglist = self.rpc.call_vg_list(node_list)
for node in node_list:
- vgstatus = utils.CheckVolumeGroupSize(vglist[node], self.op.vg_name,
+ if vglist[node].failed:
+ # ignoring down node
+ self.LogWarning("Node %s unreachable/error, ignoring" % node)
+ continue
+ vgstatus = utils.CheckVolumeGroupSize(vglist[node].data,
+ self.op.vg_name,
constants.MIN_VG_SIZE)
if vgstatus:
raise errors.OpPrereqError("Error on node '%s': %s" %
(node, vgstatus))
- def Exec(self, feedback_fn):
- """Change the parameters of the cluster.
+ self.cluster = cluster = self.cfg.GetClusterInfo()
+ # validate beparams changes
+ if self.op.beparams:
+ utils.CheckBEParams(self.op.beparams)
+ self.new_beparams = cluster.FillDict(
+ cluster.beparams[constants.BEGR_DEFAULT], self.op.beparams)
+
+ # hypervisor list/parameters
+ self.new_hvparams = cluster.FillDict(cluster.hvparams, {})
+ if self.op.hvparams:
+ if not isinstance(self.op.hvparams, dict):
+ raise errors.OpPrereqError("Invalid 'hvparams' parameter on input")
+ for hv_name, hv_dict in self.op.hvparams.items():
+ if hv_name not in self.new_hvparams:
+ self.new_hvparams[hv_name] = hv_dict
+ else:
+ self.new_hvparams[hv_name].update(hv_dict)
- """
- if self.op.vg_name != self.cfg.GetVGName():
- self.cfg.SetVGName(self.op.vg_name)
+ if self.op.enabled_hypervisors is not None:
+ self.hv_list = self.op.enabled_hypervisors
else:
- feedback_fn("Cluster LVM configuration already in desired"
- " state, not changing")
+ self.hv_list = cluster.enabled_hypervisors
+
+ if self.op.hvparams or self.op.enabled_hypervisors is not None:
+ # either the enabled list has changed, or the parameters have, validate
+ for hv_name, hv_params in self.new_hvparams.items():
+ if ((self.op.hvparams and hv_name in self.op.hvparams) or
+ (self.op.enabled_hypervisors and
+ hv_name in self.op.enabled_hypervisors)):
+ # either this is a new hypervisor, or its parameters have changed
+ hv_class = hypervisor.GetHypervisor(hv_name)
+ hv_class.CheckParameterSyntax(hv_params)
+ _CheckHVParams(self, node_list, hv_name, hv_params)
+ def Exec(self, feedback_fn):
+ """Change the parameters of the cluster.
-def _WaitForSync(cfgw, instance, proc, oneshot=False, unlock=False):
+ """
+ if self.op.vg_name is not None:
+ if self.op.vg_name != self.cfg.GetVGName():
+ self.cfg.SetVGName(self.op.vg_name)
+ else:
+ feedback_fn("Cluster LVM configuration already in desired"
+ " state, not changing")
+ if self.op.hvparams:
+ self.cluster.hvparams = self.new_hvparams
+ if self.op.enabled_hypervisors is not None:
+ self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
+ if self.op.beparams:
+ self.cluster.beparams[constants.BEGR_DEFAULT] = self.new_beparams
+ if self.op.candidate_pool_size is not None:
+ self.cluster.candidate_pool_size = self.op.candidate_pool_size
+
+ self.cfg.Update(self.cluster)
+
+ # we want to update nodes after the cluster so that if any errors
+ # happen, we have recorded and saved the cluster info
+ if self.op.candidate_pool_size is not None:
+ node_info = self.cfg.GetAllNodesInfo().values()
+ num_candidates = len([node for node in node_info
+ if node.master_candidate])
+ num_nodes = len(node_info)
+ if num_candidates < self.op.candidate_pool_size:
+ random.shuffle(node_info)
+ for node in node_info:
+ if num_candidates >= self.op.candidate_pool_size:
+ break
+ if node.master_candidate:
+ continue
+ node.master_candidate = True
+ self.LogInfo("Promoting node %s to master candidate", node.name)
+ self.cfg.Update(node)
+ self.context.ReaddNode(node)
+ num_candidates += 1
+ elif num_candidates > self.op.candidate_pool_size:
+ self.LogInfo("Note: more nodes are candidates (%d) than the new value"
+ " of candidate_pool_size (%d)" %
+ (num_candidates, self.op.candidate_pool_size))
+
+
+def _WaitForSync(lu, instance, oneshot=False, unlock=False):
"""Sleep and poll for an instance's disk to sync.
"""
return True
if not oneshot:
- proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
+ lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
node = instance.primary_node
for dev in instance.disks:
- cfgw.SetDiskID(dev, node)
+ lu.cfg.SetDiskID(dev, node)
retries = 0
while True:
max_time = 0
done = True
cumul_degraded = False
- rstats = rpc.call_blockdev_getmirrorstatus(node, instance.disks)
- if not rstats:
- proc.LogWarning("Can't get any data from node %s" % node)
+ rstats = lu.rpc.call_blockdev_getmirrorstatus(node, instance.disks)
+ if rstats.failed or not rstats.data:
+ lu.LogWarning("Can't get any data from node %s", node)
retries += 1
if retries >= 10:
raise errors.RemoteError("Can't contact node %s for mirror data,"
" aborting." % node)
time.sleep(6)
continue
+ rstats = rstats.data
retries = 0
for i in range(len(rstats)):
mstat = rstats[i]
if mstat is None:
- proc.LogWarning("Can't compute data for node %s/%s" %
- (node, instance.disks[i].iv_name))
+ lu.LogWarning("Can't compute data for node %s/%s",
+ node, instance.disks[i].iv_name)
continue
# we ignore the ldisk parameter
perc_done, est_time, is_degraded, _ = mstat
max_time = est_time
else:
rem_time = "no time estimate"
- proc.LogInfo("- device %s: %5.2f%% done, %s" %
- (instance.disks[i].iv_name, perc_done, rem_time))
+ lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
+ (instance.disks[i].iv_name, perc_done, rem_time))
if done or oneshot:
break
time.sleep(min(60, max_time))
if done:
- proc.LogInfo("Instance %s's disks are in sync." % instance.name)
+ lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
return not cumul_degraded
-def _CheckDiskConsistency(cfgw, dev, node, on_primary, ldisk=False):
+def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
"""Check that mirrors are not degraded.
The ldisk parameter, if True, will change the test from the
the device(s)) to the ldisk (representing the local storage status).
"""
- cfgw.SetDiskID(dev, node)
+ lu.cfg.SetDiskID(dev, node)
if ldisk:
idx = 6
else:
result = True
if on_primary or dev.AssembleOnSecondary():
- rstats = rpc.call_blockdev_find(node, dev)
- if not rstats:
- logger.ToStderr("Node %s: Disk degraded, not found or node down" % node)
+ rstats = lu.rpc.call_blockdev_find(node, dev)
+ if rstats.failed or not rstats.data:
+ logging.warning("Node %s: disk degraded, not found or node down", node)
result = False
else:
- result = result and (not rstats[idx])
+ result = result and (not rstats.data[idx])
if dev.children:
for child in dev.children:
- result = result and _CheckDiskConsistency(cfgw, child, node, on_primary)
+ result = result and _CheckDiskConsistency(lu, child, node, on_primary)
return result
"""
_OP_REQP = ["output_fields", "names"]
REQ_BGL = False
+ _FIELDS_STATIC = utils.FieldSet()
+ _FIELDS_DYNAMIC = utils.FieldSet("name", "valid", "node_status")
def ExpandNames(self):
if self.op.names:
raise errors.OpPrereqError("Selective OS query not supported")
- self.dynamic_fields = frozenset(["name", "valid", "node_status"])
- _CheckOutputFields(static=[],
- dynamic=self.dynamic_fields,
+ _CheckOutputFields(static=self._FIELDS_STATIC,
+ dynamic=self._FIELDS_DYNAMIC,
selected=self.op.output_fields)
# Lock all nodes, in shared mode
def _DiagnoseByOS(node_list, rlist):
"""Remaps a per-node return list into an a per-os per-node dictionary
- Args:
- node_list: a list with the names of all nodes
- rlist: a map with node names as keys and OS objects as values
+ @param node_list: a list with the names of all nodes
+ @param rlist: a map with node names as keys and OS objects as values
+
+ @rtype: dict
+ @returns: a dictionary with osnames as keys and as value another map, with
+ nodes as keys and list of OS objects as values, eg::
- Returns:
- map: a map with osnames as keys and as value another map, with
- nodes as
- keys and list of OS objects as values
- e.g. {"debian-etch": {"node1": [<object>,...],
- "node2": [<object>,]}
- }
+ {"debian-etch": {"node1": [<object>,...],
+ "node2": [<object>,]}
+ }
"""
all_os = {}
for node_name, nr in rlist.iteritems():
- if not nr:
+ if nr.failed or not nr.data:
continue
- for os_obj in nr:
+ for os_obj in nr.data:
if os_obj.name not in all_os:
# build a list of nodes for this os containing empty lists
# for each node in node_list
"""
node_list = self.acquired_locks[locking.LEVEL_NODE]
- node_data = rpc.call_os_diagnose(node_list)
+ node_data = self.rpc.call_os_diagnose(node_list)
if node_data == False:
raise errors.OpExecError("Can't gather the list of OSes")
pol = self._DiagnoseByOS(node_list, node_data)
instance_list = self.cfg.GetInstanceList()
- masternode = self.sstore.GetMasterNode()
+ masternode = self.cfg.GetMasterNode()
if node.name == masternode:
raise errors.OpPrereqError("Node is the master node,"
" you need to failover first.")
"""
node = self.node
- logger.Info("stopping the node daemon and removing configs from node %s" %
- node.name)
+ logging.info("Stopping the node daemon and removing configs from node %s",
+ node.name)
self.context.RemoveNode(node.name)
- rpc.call_node_leave_cluster(node.name)
+ self.rpc.call_node_leave_cluster(node.name)
+
+ # Promote nodes to master candidate as needed
+ cp_size = self.cfg.GetClusterInfo().candidate_pool_size
+ node_info = self.cfg.GetAllNodesInfo().values()
+ num_candidates = len([n for n in node_info
+ if n.master_candidate])
+ num_nodes = len(node_info)
+ random.shuffle(node_info)
+ for node in node_info:
+ if num_candidates >= cp_size or num_candidates >= num_nodes:
+ break
+ if node.master_candidate:
+ continue
+ node.master_candidate = True
+ self.LogInfo("Promoting node %s to master candidate", node.name)
+ self.cfg.Update(node)
+ self.context.ReaddNode(node)
+ num_candidates += 1
class LUQueryNodes(NoHooksLU):
"""
_OP_REQP = ["output_fields", "names"]
REQ_BGL = False
+ _FIELDS_DYNAMIC = utils.FieldSet(
+ "dtotal", "dfree",
+ "mtotal", "mnode", "mfree",
+ "bootid",
+ "ctotal",
+ )
+
+ _FIELDS_STATIC = utils.FieldSet(
+ "name", "pinst_cnt", "sinst_cnt",
+ "pinst_list", "sinst_list",
+ "pip", "sip", "tags",
+ "serial_no",
+ "master_candidate",
+ "master",
+ )
def ExpandNames(self):
- self.dynamic_fields = frozenset([
- "dtotal", "dfree",
- "mtotal", "mnode", "mfree",
- "bootid",
- "ctotal",
- ])
-
- self.static_fields = frozenset([
- "name", "pinst_cnt", "sinst_cnt",
- "pinst_list", "sinst_list",
- "pip", "sip", "tags",
- "serial_no",
- ])
-
- _CheckOutputFields(static=self.static_fields,
- dynamic=self.dynamic_fields,
+ _CheckOutputFields(static=self._FIELDS_STATIC,
+ dynamic=self._FIELDS_DYNAMIC,
selected=self.op.output_fields)
self.needed_locks = {}
else:
self.wanted = locking.ALL_SET
- self.do_locking = not self.static_fields.issuperset(self.op.output_fields)
+ self.do_locking = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
if self.do_locking:
# if we don't request only static fields, we need to lock the nodes
self.needed_locks[locking.LEVEL_NODE] = self.wanted
nodenames = self.wanted
missing = set(nodenames).difference(all_info.keys())
if missing:
- raise self.OpExecError(
+ raise errors.OpExecError(
"Some nodes were removed before retrieving their data: %s" % missing)
else:
nodenames = all_info.keys()
+
+ nodenames = utils.NiceSort(nodenames)
nodelist = [all_info[name] for name in nodenames]
# begin data gathering
- if self.dynamic_fields.intersection(self.op.output_fields):
+ if self.do_locking:
live_data = {}
- node_data = rpc.call_node_info(nodenames, self.cfg.GetVGName())
+ node_data = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
+ self.cfg.GetHypervisorType())
for name in nodenames:
- nodeinfo = node_data.get(name, None)
- if nodeinfo:
+ nodeinfo = node_data[name]
+ if not nodeinfo.failed and nodeinfo.data:
+ nodeinfo = nodeinfo.data
+ fn = utils.TryConvert
live_data[name] = {
- "mtotal": utils.TryConvert(int, nodeinfo['memory_total']),
- "mnode": utils.TryConvert(int, nodeinfo['memory_dom0']),
- "mfree": utils.TryConvert(int, nodeinfo['memory_free']),
- "dtotal": utils.TryConvert(int, nodeinfo['vg_size']),
- "dfree": utils.TryConvert(int, nodeinfo['vg_free']),
- "ctotal": utils.TryConvert(int, nodeinfo['cpu_total']),
- "bootid": nodeinfo['bootid'],
+ "mtotal": fn(int, nodeinfo.get('memory_total', None)),
+ "mnode": fn(int, nodeinfo.get('memory_dom0', None)),
+ "mfree": fn(int, nodeinfo.get('memory_free', None)),
+ "dtotal": fn(int, nodeinfo.get('vg_size', None)),
+ "dfree": fn(int, nodeinfo.get('vg_free', None)),
+ "ctotal": fn(int, nodeinfo.get('cpu_total', None)),
+ "bootid": nodeinfo.get('bootid', None),
}
else:
live_data[name] = {}
if secnode in node_to_secondary:
node_to_secondary[secnode].add(inst.name)
+ master_node = self.cfg.GetMasterNode()
+
# end data gathering
output = []
val = list(node.GetTags())
elif field == "serial_no":
val = node.serial_no
- elif field in self.dynamic_fields:
+ elif field == "master_candidate":
+ val = node.master_candidate
+ elif field == "master":
+ val = node.name == master_node
+ elif self._FIELDS_DYNAMIC.Matches(field):
val = live_data[node.name].get(field, None)
else:
raise errors.ParameterError(field)
"""
_OP_REQP = ["nodes", "output_fields"]
REQ_BGL = False
+ _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
+ _FIELDS_STATIC = utils.FieldSet("node")
def ExpandNames(self):
- _CheckOutputFields(static=["node"],
- dynamic=["phys", "vg", "name", "size", "instance"],
+ _CheckOutputFields(static=self._FIELDS_STATIC,
+ dynamic=self._FIELDS_DYNAMIC,
selected=self.op.output_fields)
self.needed_locks = {}
"""
nodenames = self.nodes
- volumes = rpc.call_node_volumes(nodenames)
+ volumes = self.rpc.call_node_volumes(nodenames)
ilist = [self.cfg.GetInstanceInfo(iname) for iname
in self.cfg.GetInstanceList()]
output = []
for node in nodenames:
- if node not in volumes or not volumes[node]:
+ if node not in volumes or volumes[node].failed or not volumes[node].data:
continue
- node_vols = volumes[node][:]
+ node_vols = volumes[node].data[:]
node_vols.sort(key=lambda vol: vol['dev'])
for vol in node_vols:
# check that the type of the node (single versus dual homed) is the
# same as for the master
- myself = cfg.GetNodeInfo(self.sstore.GetMasterNode())
+ myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
master_singlehomed = myself.secondary_ip == myself.primary_ip
newbie_singlehomed = secondary_ip == primary_ip
if master_singlehomed != newbie_singlehomed:
raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
" based ping to noded port")
+ cp_size = self.cfg.GetClusterInfo().candidate_pool_size
+ node_info = self.cfg.GetAllNodesInfo().values()
+ num_candidates = len([n for n in node_info
+ if n.master_candidate])
+ master_candidate = num_candidates < cp_size
+
self.new_node = objects.Node(name=node,
primary_ip=primary_ip,
- secondary_ip=secondary_ip)
+ secondary_ip=secondary_ip,
+ master_candidate=master_candidate)
def Exec(self, feedback_fn):
"""Adds the new node to the cluster.
node = new_node.name
# check connectivity
- result = rpc.call_version([node])[node]
- if result:
- if constants.PROTOCOL_VERSION == result:
- logger.Info("communication to node %s fine, sw version %s match" %
- (node, result))
+ result = self.rpc.call_version([node])[node]
+ result.Raise()
+ if result.data:
+ if constants.PROTOCOL_VERSION == result.data:
+ logging.info("Communication to node %s fine, sw version %s match",
+ node, result.data)
else:
raise errors.OpExecError("Version mismatch master version %s,"
" node version %s" %
- (constants.PROTOCOL_VERSION, result))
+ (constants.PROTOCOL_VERSION, result.data))
else:
raise errors.OpExecError("Cannot get version from the new node")
# setup ssh on node
- logger.Info("copy ssh key to node %s" % node)
+ logging.info("Copy ssh key to node %s", node)
priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
keyarray = []
keyfiles = [constants.SSH_HOST_DSA_PRIV, constants.SSH_HOST_DSA_PUB,
finally:
f.close()
- result = rpc.call_node_add(node, keyarray[0], keyarray[1], keyarray[2],
- keyarray[3], keyarray[4], keyarray[5])
+ result = self.rpc.call_node_add(node, keyarray[0], keyarray[1],
+ keyarray[2],
+ keyarray[3], keyarray[4], keyarray[5])
- if not result:
+ if result.failed or not result.data:
raise errors.OpExecError("Cannot transfer ssh keys to the new node")
# Add node to our /etc/hosts, and add key to known_hosts
utils.AddHostToEtcHosts(new_node.name)
if new_node.secondary_ip != new_node.primary_ip:
- if not rpc.call_node_tcp_ping(new_node.name,
- constants.LOCALHOST_IP_ADDRESS,
- new_node.secondary_ip,
- constants.DEFAULT_NODED_PORT,
- 10, False):
+ result = self.rpc.call_node_has_ip_address(new_node.name,
+ new_node.secondary_ip)
+ if result.failed or not result.data:
raise errors.OpExecError("Node claims it doesn't have the secondary ip"
" you gave (%s). Please fix and re-run this"
" command." % new_node.secondary_ip)
- node_verify_list = [self.sstore.GetMasterNode()]
+ node_verify_list = [self.cfg.GetMasterNode()]
node_verify_param = {
'nodelist': [node],
# TODO: do a node-net-test as well?
}
- result = rpc.call_node_verify(node_verify_list, node_verify_param)
+ result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
+ self.cfg.GetClusterName())
for verifier in node_verify_list:
- if not result[verifier]:
+ if result[verifier].failed or not result[verifier].data:
raise errors.OpExecError("Cannot communicate with %s's node daemon"
" for remote verification" % verifier)
- if result[verifier]['nodelist']:
- for failed in result[verifier]['nodelist']:
+ if result[verifier].data['nodelist']:
+ for failed in result[verifier].data['nodelist']:
feedback_fn("ssh/hostname verification failed %s -> %s" %
(verifier, result[verifier]['nodelist'][failed]))
raise errors.OpExecError("ssh/hostname verification failed.")
# Distribute updated /etc/hosts and known_hosts to all nodes,
# including the node just added
- myself = self.cfg.GetNodeInfo(self.sstore.GetMasterNode())
+ myself = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
dist_nodes = self.cfg.GetNodeList()
if not self.op.readd:
dist_nodes.append(node)
if myself.name in dist_nodes:
dist_nodes.remove(myself.name)
- logger.Debug("Copying hosts and known_hosts to all nodes")
+ logging.debug("Copying hosts and known_hosts to all nodes")
for fname in (constants.ETC_HOSTS, constants.SSH_KNOWN_HOSTS_FILE):
- result = rpc.call_upload_file(dist_nodes, fname)
- for to_node in dist_nodes:
- if not result[to_node]:
- logger.Error("copy of file %s to node %s failed" %
- (fname, to_node))
-
- to_copy = self.sstore.GetFileList()
- if self.sstore.GetHypervisorType() == constants.HT_XEN_HVM31:
+ result = self.rpc.call_upload_file(dist_nodes, fname)
+ for to_node, to_result in result.iteritems():
+ if to_result.failed or not to_result.data:
+ logging.error("Copy of file %s to node %s failed", fname, to_node)
+
+ to_copy = []
+ if constants.HT_XEN_HVM in self.cfg.GetClusterInfo().enabled_hypervisors:
to_copy.append(constants.VNC_PASSWORD_FILE)
for fname in to_copy:
- result = rpc.call_upload_file([node], fname)
- if not result[node]:
- logger.Error("could not copy file %s to node %s" % (fname, node))
+ result = self.rpc.call_upload_file([node], fname)
+ if result[node].failed or not result[node]:
+ logging.error("Could not copy file %s to node %s", fname, node)
if self.op.readd:
self.context.ReaddNode(new_node)
self.context.AddNode(new_node)
+class LUSetNodeParams(LogicalUnit):
+ """Modifies the parameters of a node.
+
+ """
+ HPATH = "node-modify"
+ HTYPE = constants.HTYPE_NODE
+ _OP_REQP = ["node_name"]
+ REQ_BGL = False
+
+ def CheckArguments(self):
+ node_name = self.cfg.ExpandNodeName(self.op.node_name)
+ if node_name is None:
+ raise errors.OpPrereqError("Invalid node name '%s'" % self.op.node_name)
+ self.op.node_name = node_name
+ if not hasattr(self.op, 'master_candidate'):
+ raise errors.OpPrereqError("Please pass at least one modification")
+ self.op.master_candidate = bool(self.op.master_candidate)
+
+ def ExpandNames(self):
+ self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
+
+ def BuildHooksEnv(self):
+ """Build hooks env.
+
+ This runs on the master node.
+
+ """
+ env = {
+ "OP_TARGET": self.op.node_name,
+ "MASTER_CANDIDATE": str(self.op.master_candidate),
+ }
+ nl = [self.cfg.GetMasterNode(),
+ self.op.node_name]
+ return env, nl, nl
+
+ def CheckPrereq(self):
+ """Check prerequisites.
+
+ This only checks the instance list against the existing names.
+
+ """
+ force = self.force = self.op.force
+
+ if self.op.master_candidate == False:
+ if self.op.node_name == self.cfg.GetMasterNode():
+ raise errors.OpPrereqError("The master node has to be a"
+ " master candidate")
+ cp_size = self.cfg.GetClusterInfo().candidate_pool_size
+ node_info = self.cfg.GetAllNodesInfo().values()
+ num_candidates = len([node for node in node_info
+ if node.master_candidate])
+ if num_candidates <= cp_size:
+ msg = ("Not enough master candidates (desired"
+ " %d, new value will be %d)" % (cp_size, num_candidates-1))
+ if force:
+ self.LogWarning(msg)
+ else:
+ raise errors.OpPrereqError(msg)
+
+ return
+
+ def Exec(self, feedback_fn):
+ """Modifies a node.
+
+ """
+ node = self.cfg.GetNodeInfo(self.op.node_name)
+
+ result = []
+
+ if self.op.master_candidate is not None:
+ node.master_candidate = self.op.master_candidate
+ result.append(("master_candidate", str(self.op.master_candidate)))
+
+ # this will trigger configuration file update, if needed
+ self.cfg.Update(node)
+ # this will trigger job queue propagation or cleanup
+ if self.op.node_name != self.cfg.GetMasterNode():
+ self.context.ReaddNode(node)
+
+ return result
+
+
class LUQueryClusterInfo(NoHooksLU):
"""Query cluster configuration.
"""
_OP_REQP = []
- REQ_MASTER = False
REQ_BGL = False
def ExpandNames(self):
"""Return cluster config.
"""
+ cluster = self.cfg.GetClusterInfo()
result = {
- "name": self.sstore.GetClusterName(),
"software_version": constants.RELEASE_VERSION,
"protocol_version": constants.PROTOCOL_VERSION,
"config_version": constants.CONFIG_VERSION,
"os_api_version": constants.OS_API_VERSION,
"export_version": constants.EXPORT_VERSION,
- "master": self.sstore.GetMasterNode(),
"architecture": (platform.architecture()[0], platform.machine()),
- "hypervisor_type": self.sstore.GetHypervisorType(),
+ "name": cluster.cluster_name,
+ "master": cluster.master_node,
+ "default_hypervisor": cluster.default_hypervisor,
+ "enabled_hypervisors": cluster.enabled_hypervisors,
+ "hvparams": cluster.hvparams,
+ "beparams": cluster.beparams,
+ "candidate_pool_size": cluster.candidate_pool_size,
}
return result
"""
_OP_REQP = []
REQ_BGL = False
+ _FIELDS_DYNAMIC = utils.FieldSet()
+ _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag")
def ExpandNames(self):
self.needed_locks = {}
- static_fields = ["cluster_name", "master_node"]
- _CheckOutputFields(static=static_fields,
- dynamic=[],
+ _CheckOutputFields(static=self._FIELDS_STATIC,
+ dynamic=self._FIELDS_DYNAMIC,
selected=self.op.output_fields)
def CheckPrereq(self):
values = []
for field in self.op.output_fields:
if field == "cluster_name":
- values.append(self.cfg.GetClusterName())
+ entry = self.cfg.GetClusterName()
elif field == "master_node":
- values.append(self.cfg.GetMasterNode())
+ entry = self.cfg.GetMasterNode()
+ elif field == "drain_flag":
+ entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
else:
raise errors.ParameterError(field)
+ values.append(entry)
return values
"""Activate the disks.
"""
- disks_ok, disks_info = _AssembleInstanceDisks(self.instance, self.cfg)
+ disks_ok, disks_info = _AssembleInstanceDisks(self, self.instance)
if not disks_ok:
raise errors.OpExecError("Cannot activate block devices")
return disks_info
-def _AssembleInstanceDisks(instance, cfg, ignore_secondaries=False):
+def _AssembleInstanceDisks(lu, instance, ignore_secondaries=False):
"""Prepare the block devices for an instance.
This sets up the block devices on all nodes.
- Args:
- instance: a ganeti.objects.Instance object
- ignore_secondaries: if true, errors on secondary nodes won't result
- in an error return from the function
+ @type lu: L{LogicalUnit}
+ @param lu: the logical unit on whose behalf we execute
+ @type instance: L{objects.Instance}
+ @param instance: the instance for whose disks we assemble
+ @type ignore_secondaries: boolean
+ @param ignore_secondaries: if true, errors on secondary nodes
+ won't result in an error return from the function
+ @return: False if the operation failed, otherwise a list of
+ (host, instance_visible_name, node_visible_name)
+ with the mapping from node devices to instance devices
- Returns:
- false if the operation failed
- list of (host, instance_visible_name, node_visible_name) if the operation
- suceeded with the mapping from node devices to instance devices
"""
device_info = []
disks_ok = True
# 1st pass, assemble on all nodes in secondary mode
for inst_disk in instance.disks:
for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
- cfg.SetDiskID(node_disk, node)
- result = rpc.call_blockdev_assemble(node, node_disk, iname, False)
- if not result:
- logger.Error("could not prepare block device %s on node %s"
- " (is_primary=False, pass=1)" % (inst_disk.iv_name, node))
+ lu.cfg.SetDiskID(node_disk, node)
+ result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False)
+ if result.failed or not result:
+ lu.proc.LogWarning("Could not prepare block device %s on node %s"
+ " (is_primary=False, pass=1)",
+ inst_disk.iv_name, node)
if not ignore_secondaries:
disks_ok = False
for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
if node != instance.primary_node:
continue
- cfg.SetDiskID(node_disk, node)
- result = rpc.call_blockdev_assemble(node, node_disk, iname, True)
- if not result:
- logger.Error("could not prepare block device %s on node %s"
- " (is_primary=True, pass=2)" % (inst_disk.iv_name, node))
+ lu.cfg.SetDiskID(node_disk, node)
+ result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True)
+ if result.failed or not result:
+ lu.proc.LogWarning("Could not prepare block device %s on node %s"
+ " (is_primary=True, pass=2)",
+ inst_disk.iv_name, node)
disks_ok = False
device_info.append((instance.primary_node, inst_disk.iv_name, result))
# this is a workaround that would be fixed better by
# improving the logical/physical id handling
for disk in instance.disks:
- cfg.SetDiskID(disk, instance.primary_node)
+ lu.cfg.SetDiskID(disk, instance.primary_node)
return disks_ok, device_info
-def _StartInstanceDisks(cfg, instance, force):
+def _StartInstanceDisks(lu, instance, force):
"""Start the disks of an instance.
"""
- disks_ok, dummy = _AssembleInstanceDisks(instance, cfg,
+ disks_ok, dummy = _AssembleInstanceDisks(lu, instance,
ignore_secondaries=force)
if not disks_ok:
- _ShutdownInstanceDisks(instance, cfg)
+ _ShutdownInstanceDisks(lu, instance)
if force is not None and not force:
- logger.Error("If the message above refers to a secondary node,"
- " you can retry the operation using '--force'.")
+ lu.proc.LogWarning("", hint="If the message above refers to a"
+ " secondary node,"
+ " you can retry the operation using '--force'.")
raise errors.OpExecError("Disk consistency error")
"""
instance = self.instance
- _SafeShutdownInstanceDisks(instance, self.cfg)
+ _SafeShutdownInstanceDisks(self, instance)
-def _SafeShutdownInstanceDisks(instance, cfg):
+def _SafeShutdownInstanceDisks(lu, instance):
"""Shutdown block devices of an instance.
This function checks if an instance is running, before calling
_ShutdownInstanceDisks.
"""
- ins_l = rpc.call_instance_list([instance.primary_node])
+ ins_l = lu.rpc.call_instance_list([instance.primary_node],
+ [instance.hypervisor])
ins_l = ins_l[instance.primary_node]
- if not type(ins_l) is list:
+ if ins_l.failed or not isinstance(ins_l.data, list):
raise errors.OpExecError("Can't contact node '%s'" %
instance.primary_node)
- if instance.name in ins_l:
+ if instance.name in ins_l.data:
raise errors.OpExecError("Instance is running, can't shutdown"
" block devices.")
- _ShutdownInstanceDisks(instance, cfg)
+ _ShutdownInstanceDisks(lu, instance)
-def _ShutdownInstanceDisks(instance, cfg, ignore_primary=False):
+def _ShutdownInstanceDisks(lu, instance, ignore_primary=False):
"""Shutdown block devices of an instance.
This does the shutdown on all nodes of the instance.
result = True
for disk in instance.disks:
for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
- cfg.SetDiskID(top_disk, node)
- if not rpc.call_blockdev_shutdown(node, top_disk):
- logger.Error("could not shutdown block device %s on node %s" %
- (disk.iv_name, node))
+ lu.cfg.SetDiskID(top_disk, node)
+ result = lu.rpc.call_blockdev_shutdown(node, top_disk)
+ if result.failed or not result.data:
+ logging.error("Could not shutdown block device %s on node %s",
+ disk.iv_name, node)
if not ignore_primary or node != instance.primary_node:
result = False
return result
-def _CheckNodeFreeMemory(cfg, node, reason, requested):
+def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor):
"""Checks if a node has enough free memory.
This function check if a given node has the needed amount of free
information from the node, this function raise an OpPrereqError
exception.
- Args:
- - cfg: a ConfigWriter instance
- - node: the node name
- - reason: string to use in the error message
- - requested: the amount of memory in MiB
+ @type lu: C{LogicalUnit}
+ @param lu: a logical unit from which we get configuration data
+ @type node: C{str}
+ @param node: the node to check
+ @type reason: C{str}
+ @param reason: string to use in the error message
+ @type requested: C{int}
+ @param requested: the amount of memory in MiB to check for
+ @type hypervisor: C{str}
+ @param hypervisor: the hypervisor to ask for memory stats
+ @raise errors.OpPrereqError: if the node doesn't have enough memory, or
+ we cannot check the node
"""
- nodeinfo = rpc.call_node_info([node], cfg.GetVGName())
- if not nodeinfo or not isinstance(nodeinfo, dict):
- raise errors.OpPrereqError("Could not contact node %s for resource"
- " information" % (node,))
-
- free_mem = nodeinfo[node].get('memory_free')
+ nodeinfo = lu.rpc.call_node_info([node], lu.cfg.GetVGName(), hypervisor)
+ nodeinfo[node].Raise()
+ free_mem = nodeinfo[node].data.get('memory_free')
if not isinstance(free_mem, int):
raise errors.OpPrereqError("Can't compute free memory on node %s, result"
" was '%s'" % (node, free_mem))
def ExpandNames(self):
self._ExpandAndLockInstance()
- self.needed_locks[locking.LEVEL_NODE] = []
- self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
-
- def DeclareLocks(self, level):
- if level == locking.LEVEL_NODE:
- self._LockInstancesNodes()
def BuildHooksEnv(self):
"""Build hooks env.
env = {
"FORCE": self.op.force,
}
- env.update(_BuildInstanceHookEnvByObject(self.instance))
- nl = ([self.sstore.GetMasterNode(), self.instance.primary_node] +
+ env.update(_BuildInstanceHookEnvByObject(self, self.instance))
+ nl = ([self.cfg.GetMasterNode(), self.instance.primary_node] +
list(self.instance.secondary_nodes))
return env, nl, nl
assert self.instance is not None, \
"Cannot retrieve locked instance %s" % self.op.instance_name
+ bep = self.cfg.GetClusterInfo().FillBE(instance)
# check bridges existance
- _CheckInstanceBridgesExist(instance)
+ _CheckInstanceBridgesExist(self, instance)
- _CheckNodeFreeMemory(self.cfg, instance.primary_node,
+ _CheckNodeFreeMemory(self, instance.primary_node,
"starting instance %s" % instance.name,
- instance.memory)
+ bep[constants.BE_MEMORY], instance.hypervisor)
def Exec(self, feedback_fn):
"""Start the instance.
node_current = instance.primary_node
- _StartInstanceDisks(self.cfg, instance, force)
+ _StartInstanceDisks(self, instance, force)
- if not rpc.call_instance_start(node_current, instance, extra_args):
- _ShutdownInstanceDisks(instance, self.cfg)
+ result = self.rpc.call_instance_start(node_current, instance, extra_args)
+ if result.failed or not result.data:
+ _ShutdownInstanceDisks(self, instance)
raise errors.OpExecError("Could not start instance")
constants.INSTANCE_REBOOT_HARD,
constants.INSTANCE_REBOOT_FULL))
self._ExpandAndLockInstance()
- self.needed_locks[locking.LEVEL_NODE] = []
- self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
-
- def DeclareLocks(self, level):
- if level == locking.LEVEL_NODE:
- primary_only = not constants.INSTANCE_REBOOT_FULL
- self._LockInstancesNodes(primary_only=primary_only)
def BuildHooksEnv(self):
"""Build hooks env.
env = {
"IGNORE_SECONDARIES": self.op.ignore_secondaries,
}
- env.update(_BuildInstanceHookEnvByObject(self.instance))
- nl = ([self.sstore.GetMasterNode(), self.instance.primary_node] +
+ env.update(_BuildInstanceHookEnvByObject(self, self.instance))
+ nl = ([self.cfg.GetMasterNode(), self.instance.primary_node] +
list(self.instance.secondary_nodes))
return env, nl, nl
"Cannot retrieve locked instance %s" % self.op.instance_name
# check bridges existance
- _CheckInstanceBridgesExist(instance)
+ _CheckInstanceBridgesExist(self, instance)
def Exec(self, feedback_fn):
"""Reboot the instance.
if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
constants.INSTANCE_REBOOT_HARD]:
- if not rpc.call_instance_reboot(node_current, instance,
- reboot_type, extra_args):
+ result = self.rpc.call_instance_reboot(node_current, instance,
+ reboot_type, extra_args)
+ if result.failed or not result.data:
raise errors.OpExecError("Could not reboot instance")
else:
- if not rpc.call_instance_shutdown(node_current, instance):
+ if not self.rpc.call_instance_shutdown(node_current, instance):
raise errors.OpExecError("could not shutdown instance for full reboot")
- _ShutdownInstanceDisks(instance, self.cfg)
- _StartInstanceDisks(self.cfg, instance, ignore_secondaries)
- if not rpc.call_instance_start(node_current, instance, extra_args):
- _ShutdownInstanceDisks(instance, self.cfg)
+ _ShutdownInstanceDisks(self, instance)
+ _StartInstanceDisks(self, instance, ignore_secondaries)
+ result = self.rpc.call_instance_start(node_current, instance, extra_args)
+ if result.failed or not result.data:
+ _ShutdownInstanceDisks(self, instance)
raise errors.OpExecError("Could not start instance for full reboot")
self.cfg.MarkInstanceUp(instance.name)
def ExpandNames(self):
self._ExpandAndLockInstance()
- self.needed_locks[locking.LEVEL_NODE] = []
- self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
-
- def DeclareLocks(self, level):
- if level == locking.LEVEL_NODE:
- self._LockInstancesNodes()
def BuildHooksEnv(self):
"""Build hooks env.
This runs on master, primary and secondary nodes of the instance.
"""
- env = _BuildInstanceHookEnvByObject(self.instance)
- nl = ([self.sstore.GetMasterNode(), self.instance.primary_node] +
+ env = _BuildInstanceHookEnvByObject(self, self.instance)
+ nl = ([self.cfg.GetMasterNode(), self.instance.primary_node] +
list(self.instance.secondary_nodes))
return env, nl, nl
instance = self.instance
node_current = instance.primary_node
self.cfg.MarkInstanceDown(instance.name)
- if not rpc.call_instance_shutdown(node_current, instance):
- logger.Error("could not shutdown instance")
+ result = self.rpc.call_instance_shutdown(node_current, instance)
+ if result.failed or not result.data:
+ self.proc.LogWarning("Could not shutdown instance")
- _ShutdownInstanceDisks(instance, self.cfg)
+ _ShutdownInstanceDisks(self, instance)
class LUReinstallInstance(LogicalUnit):
def ExpandNames(self):
self._ExpandAndLockInstance()
- self.needed_locks[locking.LEVEL_NODE] = []
- self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
-
- def DeclareLocks(self, level):
- if level == locking.LEVEL_NODE:
- self._LockInstancesNodes()
def BuildHooksEnv(self):
"""Build hooks env.
This runs on master, primary and secondary nodes of the instance.
"""
- env = _BuildInstanceHookEnvByObject(self.instance)
- nl = ([self.sstore.GetMasterNode(), self.instance.primary_node] +
+ env = _BuildInstanceHookEnvByObject(self, self.instance)
+ nl = ([self.cfg.GetMasterNode(), self.instance.primary_node] +
list(self.instance.secondary_nodes))
return env, nl, nl
if instance.status != "down":
raise errors.OpPrereqError("Instance '%s' is marked to be up" %
self.op.instance_name)
- remote_info = rpc.call_instance_info(instance.primary_node, instance.name)
- if remote_info:
+ remote_info = self.rpc.call_instance_info(instance.primary_node,
+ instance.name,
+ instance.hypervisor)
+ if remote_info.failed or remote_info.data:
raise errors.OpPrereqError("Instance '%s' is running on the node %s" %
(self.op.instance_name,
instance.primary_node))
if pnode is None:
raise errors.OpPrereqError("Primary node '%s' is unknown" %
self.op.pnode)
- os_obj = rpc.call_os_get(pnode.name, self.op.os_type)
- if not os_obj:
+ result = self.rpc.call_os_get(pnode.name, self.op.os_type)
+ result.Raise()
+ if not isinstance(result.data, objects.OS):
raise errors.OpPrereqError("OS '%s' not in supported OS list for"
" primary node" % self.op.os_type)
inst.os = self.op.os_type
self.cfg.Update(inst)
- _StartInstanceDisks(self.cfg, inst, None)
+ _StartInstanceDisks(self, inst, None)
try:
feedback_fn("Running the instance OS create scripts...")
- if not rpc.call_instance_os_add(inst.primary_node, inst, "sda", "sdb"):
+ result = self.rpc.call_instance_os_add(inst.primary_node, inst)
+ result.Raise()
+ if not result.data:
raise errors.OpExecError("Could not install OS for instance %s"
" on node %s" %
(inst.name, inst.primary_node))
finally:
- _ShutdownInstanceDisks(inst, self.cfg)
+ _ShutdownInstanceDisks(self, inst)
class LURenameInstance(LogicalUnit):
This runs on master, primary and secondary nodes of the instance.
"""
- env = _BuildInstanceHookEnvByObject(self.instance)
+ env = _BuildInstanceHookEnvByObject(self, self.instance)
env["INSTANCE_NEW_NAME"] = self.op.new_name
- nl = ([self.sstore.GetMasterNode(), self.instance.primary_node] +
+ nl = ([self.cfg.GetMasterNode(), self.instance.primary_node] +
list(self.instance.secondary_nodes))
return env, nl, nl
if instance.status != "down":
raise errors.OpPrereqError("Instance '%s' is marked to be up" %
self.op.instance_name)
- remote_info = rpc.call_instance_info(instance.primary_node, instance.name)
- if remote_info:
+ remote_info = self.rpc.call_instance_info(instance.primary_node,
+ instance.name,
+ instance.hypervisor)
+ remote_info.Raise()
+ if remote_info.data:
raise errors.OpPrereqError("Instance '%s' is running on the node %s" %
(self.op.instance_name,
instance.primary_node))
self.cfg.RenameInstance(inst.name, self.op.new_name)
# Change the instance lock. This is definitely safe while we hold the BGL
- self.context.glm.remove(locking.LEVEL_INSTANCE, inst.name)
+ self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
# re-read the instance from the configuration after rename
if inst.disk_template == constants.DT_FILE:
new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
- result = rpc.call_file_storage_dir_rename(inst.primary_node,
- old_file_storage_dir,
- new_file_storage_dir)
-
- if not result:
+ result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
+ old_file_storage_dir,
+ new_file_storage_dir)
+ result.Raise()
+ if not result.data:
raise errors.OpExecError("Could not connect to node '%s' to rename"
" directory '%s' to '%s' (but the instance"
" has been renamed in Ganeti)" % (
inst.primary_node, old_file_storage_dir,
new_file_storage_dir))
- if not result[0]:
+ if not result.data[0]:
raise errors.OpExecError("Could not rename directory '%s' to '%s'"
" (but the instance has been renamed in"
" Ganeti)" % (old_file_storage_dir,
new_file_storage_dir))
- _StartInstanceDisks(self.cfg, inst, None)
+ _StartInstanceDisks(self, inst, None)
try:
- if not rpc.call_instance_run_rename(inst.primary_node, inst, old_name,
- "sda", "sdb"):
+ result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
+ old_name)
+ if result.failed or not result.data:
msg = ("Could not run OS rename script for instance %s on node %s"
" (but the instance has been renamed in Ganeti)" %
(inst.name, inst.primary_node))
- logger.Error(msg)
+ self.proc.LogWarning(msg)
finally:
- _ShutdownInstanceDisks(inst, self.cfg)
+ _ShutdownInstanceDisks(self, inst)
class LURemoveInstance(LogicalUnit):
This runs on master, primary and secondary nodes of the instance.
"""
- env = _BuildInstanceHookEnvByObject(self.instance)
- nl = [self.sstore.GetMasterNode()]
+ env = _BuildInstanceHookEnvByObject(self, self.instance)
+ nl = [self.cfg.GetMasterNode()]
return env, nl, nl
def CheckPrereq(self):
"""
instance = self.instance
- logger.Info("shutting down instance %s on node %s" %
- (instance.name, instance.primary_node))
+ logging.info("Shutting down instance %s on node %s",
+ instance.name, instance.primary_node)
- if not rpc.call_instance_shutdown(instance.primary_node, instance):
+ result = self.rpc.call_instance_shutdown(instance.primary_node, instance)
+ if result.failed or not result.data:
if self.op.ignore_failures:
feedback_fn("Warning: can't shutdown instance")
else:
raise errors.OpExecError("Could not shutdown instance %s on node %s" %
(instance.name, instance.primary_node))
- logger.Info("removing block devices for instance %s" % instance.name)
+ logging.info("Removing block devices for instance %s", instance.name)
- if not _RemoveDisks(instance, self.cfg):
+ if not _RemoveDisks(self, instance):
if self.op.ignore_failures:
feedback_fn("Warning: can't remove instance's disks")
else:
raise errors.OpExecError("Can't remove instance's disks")
- logger.Info("removing instance %s out of cluster config" % instance.name)
+ logging.info("Removing instance %s out of cluster config", instance.name)
self.cfg.RemoveInstance(instance.name)
self.remove_locks[locking.LEVEL_INSTANCE] = instance.name
"""
_OP_REQP = ["output_fields", "names"]
REQ_BGL = False
+ _FIELDS_STATIC = utils.FieldSet(*["name", "os", "pnode", "snodes",
+ "admin_state", "admin_ram",
+ "disk_template", "ip", "mac", "bridge",
+ "sda_size", "sdb_size", "vcpus", "tags",
+ "network_port", "beparams",
+ "(disk).(size)/([0-9]+)",
+ "(disk).(sizes)",
+ "(nic).(mac|ip|bridge)/([0-9]+)",
+ "(nic).(macs|ips|bridges)",
+ "(disk|nic).(count)",
+ "serial_no", "hypervisor", "hvparams",] +
+ ["hv/%s" % name
+ for name in constants.HVS_PARAMETERS] +
+ ["be/%s" % name
+ for name in constants.BES_PARAMETERS])
+ _FIELDS_DYNAMIC = utils.FieldSet("oper_state", "oper_ram", "status")
+
def ExpandNames(self):
- self.dynamic_fields = frozenset(["oper_state", "oper_ram", "status"])
- self.static_fields = frozenset([
- "name", "os", "pnode", "snodes",
- "admin_state", "admin_ram",
- "disk_template", "ip", "mac", "bridge",
- "sda_size", "sdb_size", "vcpus", "tags",
- "network_port", "kernel_path", "initrd_path",
- "hvm_boot_order", "hvm_acpi", "hvm_pae",
- "hvm_cdrom_image_path", "hvm_nic_type",
- "hvm_disk_type", "vnc_bind_address",
- "serial_no",
- ])
- _CheckOutputFields(static=self.static_fields,
- dynamic=self.dynamic_fields,
+ _CheckOutputFields(static=self._FIELDS_STATIC,
+ dynamic=self._FIELDS_DYNAMIC,
selected=self.op.output_fields)
self.needed_locks = {}
else:
self.wanted = locking.ALL_SET
- self.do_locking = not self.static_fields.issuperset(self.op.output_fields)
+ self.do_locking = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
if self.do_locking:
self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
self.needed_locks[locking.LEVEL_NODE] = []
instance_names = self.wanted
missing = set(instance_names).difference(all_info.keys())
if missing:
- raise self.OpExecError(
+ raise errors.OpExecError(
"Some instances were removed before retrieving their data: %s"
% missing)
else:
instance_names = all_info.keys()
+
+ instance_names = utils.NiceSort(instance_names)
instance_list = [all_info[iname] for iname in instance_names]
# begin data gathering
nodes = frozenset([inst.primary_node for inst in instance_list])
+ hv_list = list(set([inst.hypervisor for inst in instance_list]))
bad_nodes = []
- if self.dynamic_fields.intersection(self.op.output_fields):
+ if self.do_locking:
live_data = {}
- node_data = rpc.call_all_instances_info(nodes)
+ node_data = self.rpc.call_all_instances_info(nodes, hv_list)
for name in nodes:
result = node_data[name]
- if result:
- live_data.update(result)
- elif result == False:
+ if result.failed:
bad_nodes.append(name)
- # else no instance is alive
+ else:
+ if result.data:
+ live_data.update(result.data)
+ # else no instance is alive
else:
live_data = dict([(name, {}) for name in instance_names])
# end data gathering
+ HVPREFIX = "hv/"
+ BEPREFIX = "be/"
output = []
for instance in instance_list:
iout = []
+ i_hv = self.cfg.GetClusterInfo().FillHV(instance)
+ i_be = self.cfg.GetClusterInfo().FillBE(instance)
for field in self.op.output_fields:
+ st_match = self._FIELDS_STATIC.Matches(field)
if field == "name":
val = instance.name
elif field == "os":
val = "ERROR_down"
else:
val = "ADMIN_down"
- elif field == "admin_ram":
- val = instance.memory
elif field == "oper_ram":
if instance.primary_node in bad_nodes:
val = None
elif field == "mac":
val = instance.nics[0].mac
elif field == "sda_size" or field == "sdb_size":
- disk = instance.FindDisk(field[:3])
- if disk is None:
+ idx = ord(field[2]) - ord('a')
+ try:
+ val = instance.FindDisk(idx).size
+ except errors.OpPrereqError:
val = None
- else:
- val = disk.size
- elif field == "vcpus":
- val = instance.vcpus
elif field == "tags":
val = list(instance.GetTags())
elif field == "serial_no":
val = instance.serial_no
- elif field in ("network_port", "kernel_path", "initrd_path",
- "hvm_boot_order", "hvm_acpi", "hvm_pae",
- "hvm_cdrom_image_path", "hvm_nic_type",
- "hvm_disk_type", "vnc_bind_address"):
- val = getattr(instance, field, None)
- if val is not None:
- pass
- elif field in ("hvm_nic_type", "hvm_disk_type",
- "kernel_path", "initrd_path"):
- val = "default"
+ elif field == "network_port":
+ val = instance.network_port
+ elif field == "hypervisor":
+ val = instance.hypervisor
+ elif field == "hvparams":
+ val = i_hv
+ elif (field.startswith(HVPREFIX) and
+ field[len(HVPREFIX):] in constants.HVS_PARAMETERS):
+ val = i_hv.get(field[len(HVPREFIX):], None)
+ elif field == "beparams":
+ val = i_be
+ elif (field.startswith(BEPREFIX) and
+ field[len(BEPREFIX):] in constants.BES_PARAMETERS):
+ val = i_be.get(field[len(BEPREFIX):], None)
+ elif st_match and st_match.groups():
+ # matches a variable list
+ st_groups = st_match.groups()
+ if st_groups and st_groups[0] == "disk":
+ if st_groups[1] == "count":
+ val = len(instance.disks)
+ elif st_groups[1] == "sizes":
+ val = [disk.size for disk in instance.disks]
+ elif st_groups[1] == "size":
+ try:
+ val = instance.FindDisk(st_groups[2]).size
+ except errors.OpPrereqError:
+ val = None
+ else:
+ assert False, "Unhandled disk parameter"
+ elif st_groups[0] == "nic":
+ if st_groups[1] == "count":
+ val = len(instance.nics)
+ elif st_groups[1] == "macs":
+ val = [nic.mac for nic in instance.nics]
+ elif st_groups[1] == "ips":
+ val = [nic.ip for nic in instance.nics]
+ elif st_groups[1] == "bridges":
+ val = [nic.bridge for nic in instance.nics]
+ else:
+ # index-based item
+ nic_idx = int(st_groups[2])
+ if nic_idx >= len(instance.nics):
+ val = None
+ else:
+ if st_groups[1] == "mac":
+ val = instance.nics[nic_idx].mac
+ elif st_groups[1] == "ip":
+ val = instance.nics[nic_idx].ip
+ elif st_groups[1] == "bridge":
+ val = instance.nics[nic_idx].bridge
+ else:
+ assert False, "Unhandled NIC parameter"
else:
- val = "-"
+ assert False, "Unhandled variable parameter"
else:
raise errors.ParameterError(field)
iout.append(val)
env = {
"IGNORE_CONSISTENCY": self.op.ignore_consistency,
}
- env.update(_BuildInstanceHookEnvByObject(self.instance))
- nl = [self.sstore.GetMasterNode()] + list(self.instance.secondary_nodes)
+ env.update(_BuildInstanceHookEnvByObject(self, self.instance))
+ nl = [self.cfg.GetMasterNode()] + list(self.instance.secondary_nodes)
return env, nl, nl
def CheckPrereq(self):
assert self.instance is not None, \
"Cannot retrieve locked instance %s" % self.op.instance_name
+ bep = self.cfg.GetClusterInfo().FillBE(instance)
if instance.disk_template not in constants.DTS_NET_MIRROR:
raise errors.OpPrereqError("Instance's disk layout is not"
" network mirrored, cannot failover.")
target_node = secondary_nodes[0]
# check memory requirements on the secondary node
- _CheckNodeFreeMemory(self.cfg, target_node, "failing over instance %s" %
- instance.name, instance.memory)
+ _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
+ instance.name, bep[constants.BE_MEMORY],
+ instance.hypervisor)
# check bridge existance
brlist = [nic.bridge for nic in instance.nics]
- if not rpc.call_bridges_exist(target_node, brlist):
+ result = self.rpc.call_bridges_exist(target_node, brlist)
+ result.Raise()
+ if not result.data:
raise errors.OpPrereqError("One or more target bridges %s does not"
" exist on destination node '%s'" %
(brlist, target_node))
feedback_fn("* checking disk consistency between source and target")
for dev in instance.disks:
# for drbd, these are drbd over lvm
- if not _CheckDiskConsistency(self.cfg, dev, target_node, False):
+ if not _CheckDiskConsistency(self, dev, target_node, False):
if instance.status == "up" and not self.op.ignore_consistency:
raise errors.OpExecError("Disk %s is degraded on target node,"
" aborting failover." % dev.iv_name)
feedback_fn("* shutting down instance on source node")
- logger.Info("Shutting down instance %s on node %s" %
- (instance.name, source_node))
+ logging.info("Shutting down instance %s on node %s",
+ instance.name, source_node)
- if not rpc.call_instance_shutdown(source_node, instance):
+ result = self.rpc.call_instance_shutdown(source_node, instance)
+ if result.failed or not result.data:
if self.op.ignore_consistency:
- logger.Error("Could not shutdown instance %s on node %s. Proceeding"
- " anyway. Please make sure node %s is down" %
- (instance.name, source_node, source_node))
+ self.proc.LogWarning("Could not shutdown instance %s on node %s."
+ " Proceeding"
+ " anyway. Please make sure node %s is down",
+ instance.name, source_node, source_node)
else:
raise errors.OpExecError("Could not shutdown instance %s on node %s" %
(instance.name, source_node))
feedback_fn("* deactivating the instance's disks on source node")
- if not _ShutdownInstanceDisks(instance, self.cfg, ignore_primary=True):
+ if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
raise errors.OpExecError("Can't shut down the instance's disks.")
instance.primary_node = target_node
# Only start the instance if it's marked as up
if instance.status == "up":
feedback_fn("* activating the instance's disks on target node")
- logger.Info("Starting instance %s on node %s" %
- (instance.name, target_node))
+ logging.info("Starting instance %s on node %s",
+ instance.name, target_node)
- disks_ok, dummy = _AssembleInstanceDisks(instance, self.cfg,
+ disks_ok, dummy = _AssembleInstanceDisks(self, instance,
ignore_secondaries=True)
if not disks_ok:
- _ShutdownInstanceDisks(instance, self.cfg)
+ _ShutdownInstanceDisks(self, instance)
raise errors.OpExecError("Can't activate the instance's disks")
feedback_fn("* starting the instance on the target node")
- if not rpc.call_instance_start(target_node, instance, None):
- _ShutdownInstanceDisks(instance, self.cfg)
+ result = self.rpc.call_instance_start(target_node, instance, None)
+ if result.failed or not result.data:
+ _ShutdownInstanceDisks(self, instance)
raise errors.OpExecError("Could not start instance %s on node %s." %
(instance.name, target_node))
-def _CreateBlockDevOnPrimary(cfg, node, instance, device, info):
+def _CreateBlockDevOnPrimary(lu, node, instance, device, info):
"""Create a tree of block devices on the primary node.
This always creates all devices.
"""
if device.children:
for child in device.children:
- if not _CreateBlockDevOnPrimary(cfg, node, instance, child, info):
+ if not _CreateBlockDevOnPrimary(lu, node, instance, child, info):
return False
- cfg.SetDiskID(device, node)
- new_id = rpc.call_blockdev_create(node, device, device.size,
- instance.name, True, info)
- if not new_id:
+ lu.cfg.SetDiskID(device, node)
+ new_id = lu.rpc.call_blockdev_create(node, device, device.size,
+ instance.name, True, info)
+ if new_id.failed or not new_id.data:
return False
if device.physical_id is None:
device.physical_id = new_id
return True
-def _CreateBlockDevOnSecondary(cfg, node, instance, device, force, info):
+def _CreateBlockDevOnSecondary(lu, node, instance, device, force, info):
"""Create a tree of block devices on a secondary node.
If this device type has to be created on secondaries, create it and
force = True
if device.children:
for child in device.children:
- if not _CreateBlockDevOnSecondary(cfg, node, instance,
+ if not _CreateBlockDevOnSecondary(lu, node, instance,
child, force, info):
return False
if not force:
return True
- cfg.SetDiskID(device, node)
- new_id = rpc.call_blockdev_create(node, device, device.size,
- instance.name, False, info)
- if not new_id:
+ lu.cfg.SetDiskID(device, node)
+ new_id = lu.rpc.call_blockdev_create(node, device, device.size,
+ instance.name, False, info)
+ if new_id.failed or not new_id.data:
return False
if device.physical_id is None:
device.physical_id = new_id
return True
-def _GenerateUniqueNames(cfg, exts):
+def _GenerateUniqueNames(lu, exts):
"""Generate a suitable LV name.
This will generate a logical volume name for the given instance.
"""
results = []
for val in exts:
- new_id = cfg.GenerateUniqueID()
+ new_id = lu.cfg.GenerateUniqueID()
results.append("%s%s" % (new_id, val))
return results
-def _GenerateDRBD8Branch(cfg, primary, secondary, size, names, iv_name,
+def _GenerateDRBD8Branch(lu, primary, secondary, size, names, iv_name,
p_minor, s_minor):
"""Generate a drbd8 device complete with its children.
"""
- port = cfg.AllocatePort()
- vgname = cfg.GetVGName()
- shared_secret = cfg.GenerateDRBDSecret()
+ port = lu.cfg.AllocatePort()
+ vgname = lu.cfg.GetVGName()
+ shared_secret = lu.cfg.GenerateDRBDSecret()
dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
logical_id=(vgname, names[0]))
dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
return drbd_dev
-def _GenerateDiskTemplate(cfg, template_name,
+def _GenerateDiskTemplate(lu, template_name,
instance_name, primary_node,
- secondary_nodes, disk_sz, swap_sz,
- file_storage_dir, file_driver):
+ secondary_nodes, disk_info,
+ file_storage_dir, file_driver,
+ base_index):
"""Generate the entire disk layout for a given template type.
"""
#TODO: compute space requirements
- vgname = cfg.GetVGName()
+ vgname = lu.cfg.GetVGName()
+ disk_count = len(disk_info)
+ disks = []
if template_name == constants.DT_DISKLESS:
- disks = []
+ pass
elif template_name == constants.DT_PLAIN:
if len(secondary_nodes) != 0:
raise errors.ProgrammerError("Wrong template configuration")
- names = _GenerateUniqueNames(cfg, [".sda", ".sdb"])
- sda_dev = objects.Disk(dev_type=constants.LD_LV, size=disk_sz,
- logical_id=(vgname, names[0]),
- iv_name = "sda")
- sdb_dev = objects.Disk(dev_type=constants.LD_LV, size=swap_sz,
- logical_id=(vgname, names[1]),
- iv_name = "sdb")
- disks = [sda_dev, sdb_dev]
+ names = _GenerateUniqueNames(lu, [".disk%d" % i
+ for i in range(disk_count)])
+ for idx, disk in enumerate(disk_info):
+ disk_index = idx + base_index
+ disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
+ logical_id=(vgname, names[idx]),
+ iv_name="disk/%d" % disk_index)
+ disks.append(disk_dev)
elif template_name == constants.DT_DRBD8:
if len(secondary_nodes) != 1:
raise errors.ProgrammerError("Wrong template configuration")
remote_node = secondary_nodes[0]
- (minor_pa, minor_pb,
- minor_sa, minor_sb) = cfg.AllocateDRBDMinor(
- [primary_node, primary_node, remote_node, remote_node], instance_name)
-
- names = _GenerateUniqueNames(cfg, [".sda_data", ".sda_meta",
- ".sdb_data", ".sdb_meta"])
- drbd_sda_dev = _GenerateDRBD8Branch(cfg, primary_node, remote_node,
- disk_sz, names[0:2], "sda",
- minor_pa, minor_sa)
- drbd_sdb_dev = _GenerateDRBD8Branch(cfg, primary_node, remote_node,
- swap_sz, names[2:4], "sdb",
- minor_pb, minor_sb)
- disks = [drbd_sda_dev, drbd_sdb_dev]
+ minors = lu.cfg.AllocateDRBDMinor(
+ [primary_node, remote_node] * len(disk_info), instance_name)
+
+ names = _GenerateUniqueNames(lu,
+ [".disk%d_%s" % (i, s)
+ for i in range(disk_count)
+ for s in ("data", "meta")
+ ])
+ for idx, disk in enumerate(disk_info):
+ disk_index = idx + base_index
+ disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
+ disk["size"], names[idx*2:idx*2+2],
+ "disk/%d" % disk_index,
+ minors[idx*2], minors[idx*2+1])
+ disks.append(disk_dev)
elif template_name == constants.DT_FILE:
if len(secondary_nodes) != 0:
raise errors.ProgrammerError("Wrong template configuration")
- file_sda_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk_sz,
- iv_name="sda", logical_id=(file_driver,
- "%s/sda" % file_storage_dir))
- file_sdb_dev = objects.Disk(dev_type=constants.LD_FILE, size=swap_sz,
- iv_name="sdb", logical_id=(file_driver,
- "%s/sdb" % file_storage_dir))
- disks = [file_sda_dev, file_sdb_dev]
+ for idx, disk in enumerate(disk_info):
+ disk_index = idx + base_index
+ disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
+ iv_name="disk/%d" % disk_index,
+ logical_id=(file_driver,
+ "%s/disk%d" % (file_storage_dir,
+ idx)))
+ disks.append(disk_dev)
else:
raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
return disks
return "originstname+%s" % instance.name
-def _CreateDisks(cfg, instance):
+def _CreateDisks(lu, instance):
"""Create all disks for an instance.
This abstracts away some work from AddInstance.
- Args:
- instance: the instance object
-
- Returns:
- True or False showing the success of the creation process
+ @type lu: L{LogicalUnit}
+ @param lu: the logical unit on whose behalf we execute
+ @type instance: L{objects.Instance}
+ @param instance: the instance whose disks we should create
+ @rtype: boolean
+ @return: the success of the creation
"""
info = _GetInstanceInfoText(instance)
if instance.disk_template == constants.DT_FILE:
file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
- result = rpc.call_file_storage_dir_create(instance.primary_node,
- file_storage_dir)
+ result = lu.rpc.call_file_storage_dir_create(instance.primary_node,
+ file_storage_dir)
- if not result:
- logger.Error("Could not connect to node '%s'" % instance.primary_node)
+ if result.failed or not result.data:
+ logging.error("Could not connect to node '%s'", instance.primary_node)
return False
- if not result[0]:
- logger.Error("failed to create directory '%s'" % file_storage_dir)
+ if not result.data[0]:
+ logging.error("Failed to create directory '%s'", file_storage_dir)
return False
+ # Note: this needs to be kept in sync with adding of disks in
+ # LUSetInstanceParams
for device in instance.disks:
- logger.Info("creating volume %s for instance %s" %
- (device.iv_name, instance.name))
+ logging.info("Creating volume %s for instance %s",
+ device.iv_name, instance.name)
#HARDCODE
for secondary_node in instance.secondary_nodes:
- if not _CreateBlockDevOnSecondary(cfg, secondary_node, instance,
+ if not _CreateBlockDevOnSecondary(lu, secondary_node, instance,
device, False, info):
- logger.Error("failed to create volume %s (%s) on secondary node %s!" %
- (device.iv_name, device, secondary_node))
+ logging.error("Failed to create volume %s (%s) on secondary node %s!",
+ device.iv_name, device, secondary_node)
return False
#HARDCODE
- if not _CreateBlockDevOnPrimary(cfg, instance.primary_node,
+ if not _CreateBlockDevOnPrimary(lu, instance.primary_node,
instance, device, info):
- logger.Error("failed to create volume %s on primary!" %
- device.iv_name)
+ logging.error("Failed to create volume %s on primary!", device.iv_name)
return False
return True
-def _RemoveDisks(instance, cfg):
+def _RemoveDisks(lu, instance):
"""Remove all disks for an instance.
This abstracts away some work from `AddInstance()` and
be removed, the removal will continue with the other ones (compare
with `_CreateDisks()`).
- Args:
- instance: the instance object
-
- Returns:
- True or False showing the success of the removal proces
+ @type lu: L{LogicalUnit}
+ @param lu: the logical unit on whose behalf we execute
+ @type instance: L{objects.Instance}
+ @param instance: the instance whose disks we should remove
+ @rtype: boolean
+ @return: the success of the removal
"""
- logger.Info("removing block devices for instance %s" % instance.name)
+ logging.info("Removing block devices for instance %s", instance.name)
result = True
for device in instance.disks:
for node, disk in device.ComputeNodeTree(instance.primary_node):
- cfg.SetDiskID(disk, node)
- if not rpc.call_blockdev_remove(node, disk):
- logger.Error("could not remove block device %s on node %s,"
- " continuing anyway" %
- (device.iv_name, node))
+ lu.cfg.SetDiskID(disk, node)
+ result = lu.rpc.call_blockdev_remove(node, disk)
+ if result.failed or not result.data:
+ lu.proc.LogWarning("Could not remove block device %s on node %s,"
+ " continuing anyway", device.iv_name, node)
result = False
if instance.disk_template == constants.DT_FILE:
file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
- if not rpc.call_file_storage_dir_remove(instance.primary_node,
- file_storage_dir):
- logger.Error("could not remove directory '%s'" % file_storage_dir)
+ result = lu.rpc.call_file_storage_dir_remove(instance.primary_node,
+ file_storage_dir)
+ if result.failed or not result.data:
+ logging.error("Could not remove directory '%s'", file_storage_dir)
result = False
return result
-def _ComputeDiskSize(disk_template, disk_size, swap_size):
+def _ComputeDiskSize(disk_template, disks):
"""Compute disk size requirements in the volume group
- This is currently hard-coded for the two-drive layout.
-
"""
# Required free disk space as a function of disk and swap space
req_size_dict = {
constants.DT_DISKLESS: None,
- constants.DT_PLAIN: disk_size + swap_size,
- # 256 MB are added for drbd metadata, 128MB for each drbd device
- constants.DT_DRBD8: disk_size + swap_size + 256,
+ constants.DT_PLAIN: sum(d["size"] for d in disks),
+ # 128 MB are added for drbd metadata for each disk
+ constants.DT_DRBD8: sum(d["size"] + 128 for d in disks),
constants.DT_FILE: None,
}
return req_size_dict[disk_template]
+def _CheckHVParams(lu, nodenames, hvname, hvparams):
+ """Hypervisor parameter validation.
+
+ This function abstract the hypervisor parameter validation to be
+ used in both instance create and instance modify.
+
+ @type lu: L{LogicalUnit}
+ @param lu: the logical unit for which we check
+ @type nodenames: list
+ @param nodenames: the list of nodes on which we should check
+ @type hvname: string
+ @param hvname: the name of the hypervisor we should use
+ @type hvparams: dict
+ @param hvparams: the parameters which we need to check
+ @raise errors.OpPrereqError: if the parameters are not valid
+
+ """
+ hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
+ hvname,
+ hvparams)
+ for node in nodenames:
+ info = hvinfo[node]
+ info.Raise()
+ if not info.data or not isinstance(info.data, (tuple, list)):
+ raise errors.OpPrereqError("Cannot get current information"
+ " from node '%s' (%s)" % (node, info.data))
+ if not info.data[0]:
+ raise errors.OpPrereqError("Hypervisor parameter validation failed:"
+ " %s" % info.data[1])
+
+
class LUCreateInstance(LogicalUnit):
"""Create an instance.
"""
HPATH = "instance-add"
HTYPE = constants.HTYPE_INSTANCE
- _OP_REQP = ["instance_name", "mem_size", "disk_size",
- "disk_template", "swap_size", "mode", "start", "vcpus",
- "wait_for_sync", "ip_check", "mac"]
+ _OP_REQP = ["instance_name", "disks", "disk_template",
+ "mode", "start",
+ "wait_for_sync", "ip_check", "nics",
+ "hvparams", "beparams"]
REQ_BGL = False
def _ExpandNode(self, node):
self.needed_locks = {}
# set optional parameters to none if they don't exist
- for attr in ["kernel_path", "initrd_path", "pnode", "snode",
- "iallocator", "hvm_boot_order", "hvm_acpi", "hvm_pae",
- "hvm_cdrom_image_path", "hvm_nic_type", "hvm_disk_type",
- "vnc_bind_address"]:
+ for attr in ["pnode", "snode", "iallocator", "hypervisor"]:
if not hasattr(self.op, attr):
setattr(self.op, attr, None)
+ # cheap checks, mostly valid constants given
+
# verify creation mode
if self.op.mode not in (constants.INSTANCE_CREATE,
constants.INSTANCE_IMPORT):
raise errors.OpPrereqError("Invalid instance creation mode '%s'" %
self.op.mode)
+
# disk template and mirror node verification
if self.op.disk_template not in constants.DISK_TEMPLATES:
raise errors.OpPrereqError("Invalid disk template name")
+ if self.op.hypervisor is None:
+ self.op.hypervisor = self.cfg.GetHypervisorType()
+
+ cluster = self.cfg.GetClusterInfo()
+ enabled_hvs = cluster.enabled_hypervisors
+ if self.op.hypervisor not in enabled_hvs:
+ raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
+ " cluster (%s)" % (self.op.hypervisor,
+ ",".join(enabled_hvs)))
+
+ # check hypervisor parameter syntax (locally)
+
+ filled_hvp = cluster.FillDict(cluster.hvparams[self.op.hypervisor],
+ self.op.hvparams)
+ hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
+ hv_type.CheckParameterSyntax(filled_hvp)
+
+ # fill and remember the beparams dict
+ utils.CheckBEParams(self.op.beparams)
+ self.be_full = cluster.FillDict(cluster.beparams[constants.BEGR_DEFAULT],
+ self.op.beparams)
+
#### instance parameters check
# instance name verification
self.add_locks[locking.LEVEL_INSTANCE] = instance_name
- # ip validity checks
- ip = getattr(self.op, "ip", None)
- if ip is None or ip.lower() == "none":
- inst_ip = None
- elif ip.lower() == "auto":
- inst_ip = hostname1.ip
- else:
- if not utils.IsValidIP(ip):
- raise errors.OpPrereqError("given IP address '%s' doesn't look"
- " like a valid IP" % ip)
- inst_ip = ip
- self.inst_ip = self.op.ip = inst_ip
+ # NIC buildup
+ self.nics = []
+ for nic in self.op.nics:
+ # ip validity checks
+ ip = nic.get("ip", None)
+ if ip is None or ip.lower() == "none":
+ nic_ip = None
+ elif ip.lower() == constants.VALUE_AUTO:
+ nic_ip = hostname1.ip
+ else:
+ if not utils.IsValidIP(ip):
+ raise errors.OpPrereqError("Given IP address '%s' doesn't look"
+ " like a valid IP" % ip)
+ nic_ip = ip
+
+ # MAC address verification
+ mac = nic.get("mac", constants.VALUE_AUTO)
+ if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
+ if not utils.IsValidMac(mac.lower()):
+ raise errors.OpPrereqError("Invalid MAC address specified: %s" %
+ mac)
+ # bridge verification
+ bridge = nic.get("bridge", self.cfg.GetDefBridge())
+ self.nics.append(objects.NIC(mac=mac, ip=nic_ip, bridge=bridge))
+
+ # disk checks/pre-build
+ self.disks = []
+ for disk in self.op.disks:
+ mode = disk.get("mode", constants.DISK_RDWR)
+ if mode not in constants.DISK_ACCESS_SET:
+ raise errors.OpPrereqError("Invalid disk access mode '%s'" %
+ mode)
+ size = disk.get("size", None)
+ if size is None:
+ raise errors.OpPrereqError("Missing disk size")
+ try:
+ size = int(size)
+ except ValueError:
+ raise errors.OpPrereqError("Invalid disk size '%s'" % size)
+ self.disks.append({"size": size, "mode": mode})
+
# used in CheckPrereq for ip ping check
self.check_ip = hostname1.ip
- # MAC address verification
- if self.op.mac != "auto":
- if not utils.IsValidMac(self.op.mac.lower()):
- raise errors.OpPrereqError("invalid MAC address specified: %s" %
- self.op.mac)
-
- # boot order verification
- if self.op.hvm_boot_order is not None:
- if len(self.op.hvm_boot_order.strip("acdn")) != 0:
- raise errors.OpPrereqError("invalid boot order specified,"
- " must be one or more of [acdn]")
# file storage checks
if (self.op.file_driver and
not self.op.file_driver in constants.FILE_DRIVER):
src_node = getattr(self.op, "src_node", None)
src_path = getattr(self.op, "src_path", None)
- if src_node is None or src_path is None:
- raise errors.OpPrereqError("Importing an instance requires source"
- " node and path options")
-
- if not os.path.isabs(src_path):
- raise errors.OpPrereqError("The source path must be absolute")
+ if src_path is None:
+ self.op.src_path = src_path = self.op.instance_name
- self.op.src_node = src_node = self._ExpandNode(src_node)
- if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
- self.needed_locks[locking.LEVEL_NODE].append(src_node)
+ if src_node is None:
+ self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
+ self.op.src_node = None
+ if os.path.isabs(src_path):
+ raise errors.OpPrereqError("Importing an instance from an absolute"
+ " path requires a source node option.")
+ else:
+ self.op.src_node = src_node = self._ExpandNode(src_node)
+ if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
+ self.needed_locks[locking.LEVEL_NODE].append(src_node)
+ if not os.path.isabs(src_path):
+ self.op.src_path = src_path = \
+ os.path.join(constants.EXPORT_DIR, src_path)
else: # INSTANCE_CREATE
if getattr(self.op, "os_type", None) is None:
"""Run the allocator based on input opcode.
"""
- disks = [{"size": self.op.disk_size, "mode": "w"},
- {"size": self.op.swap_size, "mode": "w"}]
- nics = [{"mac": self.op.mac, "ip": getattr(self.op, "ip", None),
- "bridge": self.op.bridge}]
- ial = IAllocator(self.cfg, self.sstore,
+ nics = [n.ToDict() for n in self.nics]
+ ial = IAllocator(self,
mode=constants.IALLOCATOR_MODE_ALLOC,
name=self.op.instance_name,
disk_template=self.op.disk_template,
tags=[],
os=self.op.os_type,
- vcpus=self.op.vcpus,
- mem_size=self.op.mem_size,
- disks=disks,
+ vcpus=self.be_full[constants.BE_VCPUS],
+ mem_size=self.be_full[constants.BE_MEMORY],
+ disks=self.disks,
nics=nics,
+ hypervisor=self.op.hypervisor,
)
ial.Run(self.op.iallocator)
(self.op.iallocator, len(ial.nodes),
ial.required_nodes))
self.op.pnode = ial.nodes[0]
- logger.ToStdout("Selected nodes for the instance: %s" %
- (", ".join(ial.nodes),))
- logger.Info("Selected nodes for instance %s via iallocator %s: %s" %
- (self.op.instance_name, self.op.iallocator, ial.nodes))
+ self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
+ self.op.instance_name, self.op.iallocator,
+ ", ".join(ial.nodes))
if ial.required_nodes == 2:
self.op.snode = ial.nodes[1]
"""
env = {
"INSTANCE_DISK_TEMPLATE": self.op.disk_template,
- "INSTANCE_DISK_SIZE": self.op.disk_size,
- "INSTANCE_SWAP_SIZE": self.op.swap_size,
+ "INSTANCE_DISK_SIZE": ",".join(str(d["size"]) for d in self.disks),
"INSTANCE_ADD_MODE": self.op.mode,
}
if self.op.mode == constants.INSTANCE_IMPORT:
env["INSTANCE_SRC_NODE"] = self.op.src_node
env["INSTANCE_SRC_PATH"] = self.op.src_path
- env["INSTANCE_SRC_IMAGE"] = self.src_image
+ env["INSTANCE_SRC_IMAGES"] = self.src_images
env.update(_BuildInstanceHookEnv(name=self.op.instance_name,
primary_node=self.op.pnode,
secondary_nodes=self.secondaries,
status=self.instance_status,
os_type=self.op.os_type,
- memory=self.op.mem_size,
- vcpus=self.op.vcpus,
- nics=[(self.inst_ip, self.op.bridge, self.op.mac)],
+ memory=self.be_full[constants.BE_MEMORY],
+ vcpus=self.be_full[constants.BE_VCPUS],
+ nics=[(n.ip, n.bridge, n.mac) for n in self.nics],
))
- nl = ([self.sstore.GetMasterNode(), self.op.pnode] +
+ nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
self.secondaries)
return env, nl, nl
raise errors.OpPrereqError("Cluster does not support lvm-based"
" instances")
+
if self.op.mode == constants.INSTANCE_IMPORT:
src_node = self.op.src_node
src_path = self.op.src_path
- export_info = rpc.call_export_info(src_node, src_path)
-
- if not export_info:
+ if src_node is None:
+ exp_list = self.rpc.call_export_list(
+ self.acquired_locks[locking.LEVEL_NODE])
+ found = False
+ for node in exp_list:
+ if not exp_list[node].failed and src_path in exp_list[node].data:
+ found = True
+ self.op.src_node = src_node = node
+ self.op.src_path = src_path = os.path.join(constants.EXPORT_DIR,
+ src_path)
+ break
+ if not found:
+ raise errors.OpPrereqError("No export found for relative path %s" %
+ src_path)
+
+ result = self.rpc.call_export_info(src_node, src_path)
+ result.Raise()
+ if not result.data:
raise errors.OpPrereqError("No export found in dir %s" % src_path)
+ export_info = result.data
if not export_info.has_section(constants.INISECT_EXP):
raise errors.ProgrammerError("Corrupted export config")
raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
(ei_version, constants.EXPORT_VERSION))
- if int(export_info.get(constants.INISECT_INS, 'disk_count')) > 1:
- raise errors.OpPrereqError("Can't import instance with more than"
- " one data disk")
+ # Check that the new instance doesn't have less disks than the export
+ instance_disks = len(self.disks)
+ export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
+ if instance_disks < export_disks:
+ raise errors.OpPrereqError("Not enough disks to import."
+ " (instance: %d, export: %d)" %
+ (instance_disks, export_disks))
- # FIXME: are the old os-es, disk sizes, etc. useful?
self.op.os_type = export_info.get(constants.INISECT_EXP, 'os')
- diskimage = os.path.join(src_path, export_info.get(constants.INISECT_INS,
- 'disk0_dump'))
- self.src_image = diskimage
+ disk_images = []
+ for idx in range(export_disks):
+ option = 'disk%d_dump' % idx
+ if export_info.has_option(constants.INISECT_INS, option):
+ # FIXME: are the old os-es, disk sizes, etc. useful?
+ export_name = export_info.get(constants.INISECT_INS, option)
+ image = os.path.join(src_path, export_name)
+ disk_images.append(image)
+ else:
+ disk_images.append(False)
- # ip ping checks (we use the same ip that was resolved in ExpandNames)
+ self.src_images = disk_images
+ old_name = export_info.get(constants.INISECT_INS, 'name')
+ # FIXME: int() here could throw a ValueError on broken exports
+ exp_nic_count = int(export_info.get(constants.INISECT_INS, 'nic_count'))
+ if self.op.instance_name == old_name:
+ for idx, nic in enumerate(self.nics):
+ if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
+ nic_mac_ini = 'nic%d_mac' % idx
+ nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
+
+ # ip ping checks (we use the same ip that was resolved in ExpandNames)
if self.op.start and not self.op.ip_check:
raise errors.OpPrereqError("Cannot ignore IP address conflicts when"
" adding an instance in start mode")
if self.op.ip_check:
if utils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
raise errors.OpPrereqError("IP %s of instance %s already in use" %
- (self.check_ip, instance_name))
-
- # bridge verification
- bridge = getattr(self.op, "bridge", None)
- if bridge is None:
- self.op.bridge = self.cfg.GetDefBridge()
- else:
- self.op.bridge = bridge
+ (self.check_ip, self.op.instance_name))
#### allocator run
" the primary node.")
self.secondaries.append(self.op.snode)
+ nodenames = [pnode.name] + self.secondaries
+
req_size = _ComputeDiskSize(self.op.disk_template,
- self.op.disk_size, self.op.swap_size)
+ self.disks)
# Check lv size requirements
if req_size is not None:
- nodenames = [pnode.name] + self.secondaries
- nodeinfo = rpc.call_node_info(nodenames, self.cfg.GetVGName())
+ nodeinfo = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
+ self.op.hypervisor)
for node in nodenames:
- info = nodeinfo.get(node, None)
+ info = nodeinfo[node]
+ info.Raise()
+ info = info.data
if not info:
raise errors.OpPrereqError("Cannot get current information"
" from node '%s'" % node)
" %d MB available, %d MB required" %
(node, info['vg_free'], req_size))
+ _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
+
# os verification
- os_obj = rpc.call_os_get(pnode.name, self.op.os_type)
- if not os_obj:
+ result = self.rpc.call_os_get(pnode.name, self.op.os_type)
+ result.Raise()
+ if not isinstance(result.data, objects.OS):
raise errors.OpPrereqError("OS '%s' not in supported os list for"
" primary node" % self.op.os_type)
- if self.op.kernel_path == constants.VALUE_NONE:
- raise errors.OpPrereqError("Can't set instance kernel to none")
-
# bridge check on primary node
- if not rpc.call_bridges_exist(self.pnode.name, [self.op.bridge]):
- raise errors.OpPrereqError("target bridge '%s' does not exist on"
- " destination node '%s'" %
- (self.op.bridge, pnode.name))
+ bridges = [n.bridge for n in self.nics]
+ result = self.rpc.call_bridges_exist(self.pnode.name, bridges)
+ result.Raise()
+ if not result.data:
+ raise errors.OpPrereqError("One of the target bridges '%s' does not"
+ " exist on destination node '%s'" %
+ (",".join(bridges), pnode.name))
# memory check on primary node
if self.op.start:
- _CheckNodeFreeMemory(self.cfg, self.pnode.name,
+ _CheckNodeFreeMemory(self, self.pnode.name,
"creating instance %s" % self.op.instance_name,
- self.op.mem_size)
-
- # hvm_cdrom_image_path verification
- if self.op.hvm_cdrom_image_path is not None:
- # FIXME (als): shouldn't these checks happen on the destination node?
- if not os.path.isabs(self.op.hvm_cdrom_image_path):
- raise errors.OpPrereqError("The path to the HVM CDROM image must"
- " be an absolute path or None, not %s" %
- self.op.hvm_cdrom_image_path)
- if not os.path.isfile(self.op.hvm_cdrom_image_path):
- raise errors.OpPrereqError("The HVM CDROM image must either be a"
- " regular file or a symlink pointing to"
- " an existing regular file, not %s" %
- self.op.hvm_cdrom_image_path)
-
- # vnc_bind_address verification
- if self.op.vnc_bind_address is not None:
- if not utils.IsValidIP(self.op.vnc_bind_address):
- raise errors.OpPrereqError("given VNC bind address '%s' doesn't look"
- " like a valid IP address" %
- self.op.vnc_bind_address)
-
- # Xen HVM device type checks
- if self.sstore.GetHypervisorType() == constants.HT_XEN_HVM31:
- if self.op.hvm_nic_type not in constants.HT_HVM_VALID_NIC_TYPES:
- raise errors.OpPrereqError("Invalid NIC type %s specified for Xen HVM"
- " hypervisor" % self.op.hvm_nic_type)
- if self.op.hvm_disk_type not in constants.HT_HVM_VALID_DISK_TYPES:
- raise errors.OpPrereqError("Invalid disk type %s specified for Xen HVM"
- " hypervisor" % self.op.hvm_disk_type)
+ self.be_full[constants.BE_MEMORY],
+ self.op.hypervisor)
if self.op.start:
self.instance_status = 'up'
instance = self.op.instance_name
pnode_name = self.pnode.name
- if self.op.mac == "auto":
- mac_address = self.cfg.GenerateMAC()
- else:
- mac_address = self.op.mac
-
- nic = objects.NIC(bridge=self.op.bridge, mac=mac_address)
- if self.inst_ip is not None:
- nic.ip = self.inst_ip
+ for nic in self.nics:
+ if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
+ nic.mac = self.cfg.GenerateMAC()
- ht_kind = self.sstore.GetHypervisorType()
+ ht_kind = self.op.hypervisor
if ht_kind in constants.HTS_REQ_PORT:
network_port = self.cfg.AllocatePort()
else:
network_port = None
- if self.op.vnc_bind_address is None:
- self.op.vnc_bind_address = constants.VNC_DEFAULT_BIND_ADDRESS
+ ##if self.op.vnc_bind_address is None:
+ ## self.op.vnc_bind_address = constants.VNC_DEFAULT_BIND_ADDRESS
# this is needed because os.path.join does not accept None arguments
if self.op.file_storage_dir is None:
# build the full file storage dir path
file_storage_dir = os.path.normpath(os.path.join(
- self.sstore.GetFileStorageDir(),
+ self.cfg.GetFileStorageDir(),
string_file_storage_dir, instance))
- disks = _GenerateDiskTemplate(self.cfg,
+ disks = _GenerateDiskTemplate(self,
self.op.disk_template,
instance, pnode_name,
- self.secondaries, self.op.disk_size,
- self.op.swap_size,
+ self.secondaries,
+ self.disks,
file_storage_dir,
- self.op.file_driver)
+ self.op.file_driver,
+ 0)
iobj = objects.Instance(name=instance, os=self.op.os_type,
primary_node=pnode_name,
- memory=self.op.mem_size,
- vcpus=self.op.vcpus,
- nics=[nic], disks=disks,
+ nics=self.nics, disks=disks,
disk_template=self.op.disk_template,
status=self.instance_status,
network_port=network_port,
- kernel_path=self.op.kernel_path,
- initrd_path=self.op.initrd_path,
- hvm_boot_order=self.op.hvm_boot_order,
- hvm_acpi=self.op.hvm_acpi,
- hvm_pae=self.op.hvm_pae,
- hvm_cdrom_image_path=self.op.hvm_cdrom_image_path,
- vnc_bind_address=self.op.vnc_bind_address,
- hvm_nic_type=self.op.hvm_nic_type,
- hvm_disk_type=self.op.hvm_disk_type,
+ beparams=self.op.beparams,
+ hvparams=self.op.hvparams,
+ hypervisor=self.op.hypervisor,
)
feedback_fn("* creating instance disks...")
- if not _CreateDisks(self.cfg, iobj):
- _RemoveDisks(iobj, self.cfg)
+ if not _CreateDisks(self, iobj):
+ _RemoveDisks(self, iobj)
self.cfg.ReleaseDRBDMinors(instance)
raise errors.OpExecError("Device creation failed, reverting...")
del self.remove_locks[locking.LEVEL_INSTANCE]
# Remove the temp. assignements for the instance's drbds
self.cfg.ReleaseDRBDMinors(instance)
+ # Unlock all the nodes
+ if self.op.mode == constants.INSTANCE_IMPORT:
+ nodes_keep = [self.op.src_node]
+ nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
+ if node != self.op.src_node]
+ self.context.glm.release(locking.LEVEL_NODE, nodes_release)
+ self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
+ else:
+ self.context.glm.release(locking.LEVEL_NODE)
+ del self.acquired_locks[locking.LEVEL_NODE]
if self.op.wait_for_sync:
- disk_abort = not _WaitForSync(self.cfg, iobj, self.proc)
+ disk_abort = not _WaitForSync(self, iobj)
elif iobj.disk_template in constants.DTS_NET_MIRROR:
# make sure the disks are not degraded (still sync-ing is ok)
time.sleep(15)
feedback_fn("* checking mirrors status")
- disk_abort = not _WaitForSync(self.cfg, iobj, self.proc, oneshot=True)
+ disk_abort = not _WaitForSync(self, iobj, oneshot=True)
else:
disk_abort = False
if disk_abort:
- _RemoveDisks(iobj, self.cfg)
+ _RemoveDisks(self, iobj)
self.cfg.RemoveInstance(iobj.name)
# Make sure the instance lock gets removed
self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
if iobj.disk_template != constants.DT_DISKLESS:
if self.op.mode == constants.INSTANCE_CREATE:
feedback_fn("* running the instance OS create scripts...")
- if not rpc.call_instance_os_add(pnode_name, iobj, "sda", "sdb"):
- raise errors.OpExecError("could not add os for instance %s"
+ result = self.rpc.call_instance_os_add(pnode_name, iobj)
+ result.Raise()
+ if not result.data:
+ raise errors.OpExecError("Could not add os for instance %s"
" on node %s" %
(instance, pnode_name))
elif self.op.mode == constants.INSTANCE_IMPORT:
feedback_fn("* running the instance OS import scripts...")
src_node = self.op.src_node
- src_image = self.src_image
- if not rpc.call_instance_os_import(pnode_name, iobj, "sda", "sdb",
- src_node, src_image):
- raise errors.OpExecError("Could not import os for instance"
- " %s on node %s" %
- (instance, pnode_name))
+ src_images = self.src_images
+ cluster_name = self.cfg.GetClusterName()
+ import_result = self.rpc.call_instance_os_import(pnode_name, iobj,
+ src_node, src_images,
+ cluster_name)
+ import_result.Raise()
+ for idx, result in enumerate(import_result.data):
+ if not result:
+ self.LogWarning("Could not import the image %s for instance"
+ " %s, disk %d, on node %s" %
+ (src_images[idx], instance, idx, pnode_name))
else:
# also checked in the prereq part
raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
% self.op.mode)
if self.op.start:
- logger.Info("starting instance %s on node %s" % (instance, pnode_name))
+ logging.info("Starting instance %s on node %s", instance, pnode_name)
feedback_fn("* starting instance...")
- if not rpc.call_instance_start(pnode_name, iobj, None):
+ result = self.rpc.call_instance_start(pnode_name, iobj, None)
+ result.Raise()
+ if not result.data:
raise errors.OpExecError("Could not start instance")
instance = self.instance
node = instance.primary_node
- node_insts = rpc.call_instance_list([node])[node]
- if node_insts is False:
- raise errors.OpExecError("Can't connect to node %s." % node)
+ node_insts = self.rpc.call_instance_list([node],
+ [instance.hypervisor])[node]
+ node_insts.Raise()
- if instance.name not in node_insts:
+ if instance.name not in node_insts.data:
raise errors.OpExecError("Instance %s is not running." % instance.name)
- logger.Debug("connecting to console of %s on %s" % (instance.name, node))
+ logging.debug("Connecting to console of %s on %s", instance.name, node)
- hyper = hypervisor.GetHypervisor(self.cfg)
+ hyper = hypervisor.GetHypervisor(instance.hypervisor)
console_cmd = hyper.GetShellCommandForConsole(instance)
# build ssh cmdline
"""Compute a new secondary node using an IAllocator.
"""
- ial = IAllocator(self.cfg, self.sstore,
+ ial = IAllocator(self,
mode=constants.IALLOCATOR_MODE_RELOC,
name=self.op.instance_name,
relocate_from=[self.sec_node])
" of nodes (%s), required %s" %
(len(ial.nodes), ial.required_nodes))
self.op.remote_node = ial.nodes[0]
- logger.ToStdout("Selected new secondary for the instance: %s" %
- self.op.remote_node)
+ self.LogInfo("Selected new secondary for the instance: %s",
+ self.op.remote_node)
def BuildHooksEnv(self):
"""Build hooks env.
"NEW_SECONDARY": self.op.remote_node,
"OLD_SECONDARY": self.instance.secondary_nodes[0],
}
- env.update(_BuildInstanceHookEnvByObject(self.instance))
+ env.update(_BuildInstanceHookEnvByObject(self, self.instance))
nl = [
- self.sstore.GetMasterNode(),
+ self.cfg.GetMasterNode(),
self.instance.primary_node,
]
if self.op.remote_node is not None:
else:
raise errors.ProgrammerError("Unhandled disk replace mode")
- for name in self.op.disks:
- if instance.FindDisk(name) is None:
- raise errors.OpPrereqError("Disk '%s' not found for instance '%s'" %
- (name, instance.name))
+ if not self.op.disks:
+ self.op.disks = range(len(instance.disks))
+
+ for disk_idx in self.op.disks:
+ instance.FindDisk(disk_idx)
def _ExecD8DiskOnly(self, feedback_fn):
"""Replace a disk on the primary or secondary for dbrd8.
The algorithm for replace is quite complicated:
- - for each disk to be replaced:
- - create new LVs on the target node with unique names
- - detach old LVs from the drbd device
- - rename old LVs to name_replaced.<time_t>
- - rename new LVs to old LVs
- - attach the new LVs (with the old names now) to the drbd device
- - wait for sync across all devices
- - for each modified disk:
- - remove old LVs (which have the name name_replaces.<time_t>)
+
+ 1. for each disk to be replaced:
+
+ 1. create new LVs on the target node with unique names
+ 1. detach old LVs from the drbd device
+ 1. rename old LVs to name_replaced.<time_t>
+ 1. rename new LVs to old LVs
+ 1. attach the new LVs (with the old names now) to the drbd device
+
+ 1. wait for sync across all devices
+
+ 1. for each modified disk:
+
+ 1. remove old LVs (which have the name name_replaces.<time_t>)
Failures are not very well handled.
self.proc.LogStep(1, steps_total, "check device existence")
info("checking volume groups")
my_vg = cfg.GetVGName()
- results = rpc.call_vg_list([oth_node, tgt_node])
+ results = self.rpc.call_vg_list([oth_node, tgt_node])
if not results:
raise errors.OpExecError("Can't list volume groups on the nodes")
for node in oth_node, tgt_node:
- res = results.get(node, False)
- if not res or my_vg not in res:
+ res = results[node]
+ if res.failed or not res.data or my_vg not in res.data:
raise errors.OpExecError("Volume group '%s' not found on %s" %
(my_vg, node))
- for dev in instance.disks:
- if not dev.iv_name in self.op.disks:
+ for idx, dev in enumerate(instance.disks):
+ if idx not in self.op.disks:
continue
for node in tgt_node, oth_node:
- info("checking %s on %s" % (dev.iv_name, node))
+ info("checking disk/%d on %s" % (idx, node))
cfg.SetDiskID(dev, node)
- if not rpc.call_blockdev_find(node, dev):
- raise errors.OpExecError("Can't find device %s on node %s" %
- (dev.iv_name, node))
+ if not self.rpc.call_blockdev_find(node, dev):
+ raise errors.OpExecError("Can't find disk/%d on node %s" %
+ (idx, node))
# Step: check other node consistency
self.proc.LogStep(2, steps_total, "check peer consistency")
- for dev in instance.disks:
- if not dev.iv_name in self.op.disks:
+ for idx, dev in enumerate(instance.disks):
+ if idx not in self.op.disks:
continue
- info("checking %s consistency on %s" % (dev.iv_name, oth_node))
- if not _CheckDiskConsistency(self.cfg, dev, oth_node,
+ info("checking disk/%d consistency on %s" % (idx, oth_node))
+ if not _CheckDiskConsistency(self, dev, oth_node,
oth_node==instance.primary_node):
raise errors.OpExecError("Peer node (%s) has degraded storage, unsafe"
" to replace disks on this node (%s)" %
# Step: create new storage
self.proc.LogStep(3, steps_total, "allocate new storage")
- for dev in instance.disks:
- if not dev.iv_name in self.op.disks:
+ for idx, dev in enumerate(instance.disks):
+ if idx not in self.op.disks:
continue
size = dev.size
cfg.SetDiskID(dev, tgt_node)
- lv_names = [".%s_%s" % (dev.iv_name, suf) for suf in ["data", "meta"]]
- names = _GenerateUniqueNames(cfg, lv_names)
+ lv_names = [".disk%d_%s" % (idx, suf)
+ for suf in ["data", "meta"]]
+ names = _GenerateUniqueNames(self, lv_names)
lv_data = objects.Disk(dev_type=constants.LD_LV, size=size,
logical_id=(vgname, names[0]))
lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
# _Create...OnPrimary (which forces the creation), even if we
# are talking about the secondary node
for new_lv in new_lvs:
- if not _CreateBlockDevOnPrimary(cfg, tgt_node, instance, new_lv,
+ if not _CreateBlockDevOnPrimary(self, tgt_node, instance, new_lv,
_GetInstanceInfoText(instance)):
raise errors.OpExecError("Failed to create new LV named '%s' on"
" node '%s'" %
self.proc.LogStep(4, steps_total, "change drbd configuration")
for dev, old_lvs, new_lvs in iv_names.itervalues():
info("detaching %s drbd from local storage" % dev.iv_name)
- if not rpc.call_blockdev_removechildren(tgt_node, dev, old_lvs):
+ result = self.rpc.call_blockdev_removechildren(tgt_node, dev, old_lvs)
+ result.Raise()
+ if not result.data:
raise errors.OpExecError("Can't detach drbd from local storage on node"
" %s for device %s" % (tgt_node, dev.iv_name))
#dev.children = []
# build the rename list based on what LVs exist on the node
rlist = []
for to_ren in old_lvs:
- find_res = rpc.call_blockdev_find(tgt_node, to_ren)
- if find_res is not None: # device exists
+ find_res = self.rpc.call_blockdev_find(tgt_node, to_ren)
+ if not find_res.failed and find_res.data is not None: # device exists
rlist.append((to_ren, ren_fn(to_ren, temp_suffix)))
info("renaming the old LVs on the target node")
- if not rpc.call_blockdev_rename(tgt_node, rlist):
+ result = self.rpc.call_blockdev_rename(tgt_node, rlist)
+ result.Raise()
+ if not result.data:
raise errors.OpExecError("Can't rename old LVs on node %s" % tgt_node)
# now we rename the new LVs to the old LVs
info("renaming the new LVs on the target node")
rlist = [(new, old.physical_id) for old, new in zip(old_lvs, new_lvs)]
- if not rpc.call_blockdev_rename(tgt_node, rlist):
+ result = self.rpc.call_blockdev_rename(tgt_node, rlist)
+ result.Raise()
+ if not result.data:
raise errors.OpExecError("Can't rename new LVs on node %s" % tgt_node)
for old, new in zip(old_lvs, new_lvs):
# now that the new lvs have the old name, we can add them to the device
info("adding new mirror component on %s" % tgt_node)
- if not rpc.call_blockdev_addchildren(tgt_node, dev, new_lvs):
+ result =self.rpc.call_blockdev_addchildren(tgt_node, dev, new_lvs)
+ if result.failed or not result.data:
for new_lv in new_lvs:
- if not rpc.call_blockdev_remove(tgt_node, new_lv):
+ result = self.rpc.call_blockdev_remove(tgt_node, new_lv)
+ if result.failed or not result.data:
warning("Can't rollback device %s", hint="manually cleanup unused"
" logical volumes")
raise errors.OpExecError("Can't add local storage to drbd")
# does a combined result over all disks, so we don't check its
# return value
self.proc.LogStep(5, steps_total, "sync devices")
- _WaitForSync(cfg, instance, self.proc, unlock=True)
+ _WaitForSync(self, instance, unlock=True)
# so check manually all the devices
for name, (dev, old_lvs, new_lvs) in iv_names.iteritems():
cfg.SetDiskID(dev, instance.primary_node)
- is_degr = rpc.call_blockdev_find(instance.primary_node, dev)[5]
- if is_degr:
+ result = self.rpc.call_blockdev_find(instance.primary_node, dev)
+ if result.failed or result.data[5]:
raise errors.OpExecError("DRBD device %s is degraded!" % name)
# Step: remove old storage
info("remove logical volumes for %s" % name)
for lv in old_lvs:
cfg.SetDiskID(lv, tgt_node)
- if not rpc.call_blockdev_remove(tgt_node, lv):
+ result = self.rpc.call_blockdev_remove(tgt_node, lv)
+ if result.failed or not result.data:
warning("Can't remove old LV", hint="manually remove unused LVs")
continue
self.proc.LogStep(1, steps_total, "check device existence")
info("checking volume groups")
my_vg = cfg.GetVGName()
- results = rpc.call_vg_list([pri_node, new_node])
- if not results:
- raise errors.OpExecError("Can't list volume groups on the nodes")
+ results = self.rpc.call_vg_list([pri_node, new_node])
for node in pri_node, new_node:
- res = results.get(node, False)
- if not res or my_vg not in res:
+ res = results[node]
+ if res.failed or not res.data or my_vg not in res.data:
raise errors.OpExecError("Volume group '%s' not found on %s" %
(my_vg, node))
- for dev in instance.disks:
- if not dev.iv_name in self.op.disks:
+ for idx, dev in enumerate(instance.disks):
+ if idx not in self.op.disks:
continue
- info("checking %s on %s" % (dev.iv_name, pri_node))
+ info("checking disk/%d on %s" % (idx, pri_node))
cfg.SetDiskID(dev, pri_node)
- if not rpc.call_blockdev_find(pri_node, dev):
- raise errors.OpExecError("Can't find device %s on node %s" %
- (dev.iv_name, pri_node))
+ result = self.rpc.call_blockdev_find(pri_node, dev)
+ result.Raise()
+ if not result.data:
+ raise errors.OpExecError("Can't find disk/%d on node %s" %
+ (idx, pri_node))
# Step: check other node consistency
self.proc.LogStep(2, steps_total, "check peer consistency")
- for dev in instance.disks:
- if not dev.iv_name in self.op.disks:
+ for idx, dev in enumerate(instance.disks):
+ if idx not in self.op.disks:
continue
- info("checking %s consistency on %s" % (dev.iv_name, pri_node))
- if not _CheckDiskConsistency(self.cfg, dev, pri_node, True, ldisk=True):
+ info("checking disk/%d consistency on %s" % (idx, pri_node))
+ if not _CheckDiskConsistency(self, dev, pri_node, True, ldisk=True):
raise errors.OpExecError("Primary node (%s) has degraded storage,"
" unsafe to replace the secondary" %
pri_node)
# Step: create new storage
self.proc.LogStep(3, steps_total, "allocate new storage")
- for dev in instance.disks:
+ for idx, dev in enumerate(instance.disks):
size = dev.size
- info("adding new local storage on %s for %s" % (new_node, dev.iv_name))
+ info("adding new local storage on %s for disk/%d" %
+ (new_node, idx))
# since we *always* want to create this LV, we use the
# _Create...OnPrimary (which forces the creation), even if we
# are talking about the secondary node
for new_lv in dev.children:
- if not _CreateBlockDevOnPrimary(cfg, new_node, instance, new_lv,
+ if not _CreateBlockDevOnPrimary(self, new_node, instance, new_lv,
_GetInstanceInfoText(instance)):
raise errors.OpExecError("Failed to create new LV named '%s' on"
" node '%s'" %
(new_lv.logical_id[1], new_node))
-
# Step 4: dbrd minors and drbd setups changes
# after this, we must manually remove the drbd minors on both the
# error and the success paths
instance.name)
logging.debug("Allocated minors %s" % (minors,))
self.proc.LogStep(4, steps_total, "changing drbd configuration")
- for dev, new_minor in zip(instance.disks, minors):
+ for idx, (dev, new_minor) in enumerate(zip(instance.disks, minors)):
size = dev.size
- info("activating a new drbd on %s for %s" % (new_node, dev.iv_name))
+ info("activating a new drbd on %s for disk/%d" % (new_node, idx))
# create new devices on new_node
if pri_node == dev.logical_id[0]:
new_logical_id = (pri_node, new_node,
new_logical_id = (new_node, pri_node,
dev.logical_id[2], new_minor, dev.logical_id[4],
dev.logical_id[5])
- iv_names[dev.iv_name] = (dev, dev.children, new_logical_id)
+ iv_names[idx] = (dev, dev.children, new_logical_id)
logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
new_logical_id)
new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
logical_id=new_logical_id,
children=dev.children)
- if not _CreateBlockDevOnSecondary(cfg, new_node, instance,
+ if not _CreateBlockDevOnSecondary(self, new_node, instance,
new_drbd, False,
- _GetInstanceInfoText(instance)):
+ _GetInstanceInfoText(instance)):
self.cfg.ReleaseDRBDMinors(instance.name)
raise errors.OpExecError("Failed to create new DRBD on"
" node '%s'" % new_node)
- for dev in instance.disks:
+ for idx, dev in enumerate(instance.disks):
# we have new devices, shutdown the drbd on the old secondary
- info("shutting down drbd for %s on old node" % dev.iv_name)
+ info("shutting down drbd for disk/%d on old node" % idx)
cfg.SetDiskID(dev, old_node)
- if not rpc.call_blockdev_shutdown(old_node, dev):
- warning("Failed to shutdown drbd for %s on old node" % dev.iv_name,
+ result = self.rpc.call_blockdev_shutdown(old_node, dev)
+ if result.failed or not result.data:
+ warning("Failed to shutdown drbd for disk/%d on old node" % idx,
hint="Please cleanup this device manually as soon as possible")
info("detaching primary drbds from the network (=> standalone)")
done = 0
- for dev in instance.disks:
+ for idx, dev in enumerate(instance.disks):
cfg.SetDiskID(dev, pri_node)
# set the network part of the physical (unique in bdev terms) id
# to None, meaning detach from network
dev.physical_id = (None, None, None, None) + dev.physical_id[4:]
# and 'find' the device, which will 'fix' it to match the
# standalone state
- if rpc.call_blockdev_find(pri_node, dev):
+ result = self.rpc.call_blockdev_find(pri_node, dev)
+ if not result.failed and result.data:
done += 1
else:
- warning("Failed to detach drbd %s from network, unusual case" %
- dev.iv_name)
+ warning("Failed to detach drbd disk/%d from network, unusual case" %
+ idx)
if not done:
# no detaches succeeded (very unlikely)
# and now perform the drbd attach
info("attaching primary drbds to new secondary (standalone => connected)")
failures = []
- for dev in instance.disks:
- info("attaching primary drbd for %s to new secondary node" % dev.iv_name)
+ for idx, dev in enumerate(instance.disks):
+ info("attaching primary drbd for disk/%d to new secondary node" % idx)
# since the attach is smart, it's enough to 'find' the device,
# it will automatically activate the network, if the physical_id
# is correct
cfg.SetDiskID(dev, pri_node)
logging.debug("Disk to attach: %s", dev)
- if not rpc.call_blockdev_find(pri_node, dev):
- warning("can't attach drbd %s to new secondary!" % dev.iv_name,
+ result = self.rpc.call_blockdev_find(pri_node, dev)
+ if result.failed or not result.data:
+ warning("can't attach drbd disk/%d to new secondary!" % idx,
"please do a gnt-instance info to see the status of disks")
# this can fail as the old devices are degraded and _WaitForSync
# does a combined result over all disks, so we don't check its
# return value
self.proc.LogStep(5, steps_total, "sync devices")
- _WaitForSync(cfg, instance, self.proc, unlock=True)
+ _WaitForSync(self, instance, unlock=True)
# so check manually all the devices
- for name, (dev, old_lvs, _) in iv_names.iteritems():
+ for idx, (dev, old_lvs, _) in iv_names.iteritems():
cfg.SetDiskID(dev, pri_node)
- is_degr = rpc.call_blockdev_find(pri_node, dev)[5]
- if is_degr:
- raise errors.OpExecError("DRBD device %s is degraded!" % name)
+ result = self.rpc.call_blockdev_find(pri_node, dev)
+ result.Raise()
+ if result.data[5]:
+ raise errors.OpExecError("DRBD device disk/%d is degraded!" % idx)
self.proc.LogStep(6, steps_total, "removing old storage")
- for name, (dev, old_lvs, _) in iv_names.iteritems():
- info("remove logical volumes for %s" % name)
+ for idx, (dev, old_lvs, _) in iv_names.iteritems():
+ info("remove logical volumes for disk/%d" % idx)
for lv in old_lvs:
cfg.SetDiskID(lv, old_node)
- if not rpc.call_blockdev_remove(old_node, lv):
+ result = self.rpc.call_blockdev_remove(old_node, lv)
+ if result.failed or not result.data:
warning("Can't remove LV on old secondary",
hint="Cleanup stale volumes by hand")
# Activate the instance disks if we're replacing them on a down instance
if instance.status == "down":
- _StartInstanceDisks(self.cfg, instance, True)
+ _StartInstanceDisks(self, instance, True)
if instance.disk_template == constants.DT_DRBD8:
if self.op.remote_node is None:
# Deactivate the instance disks if we're replacing them on a down instance
if instance.status == "down":
- _SafeShutdownInstanceDisks(instance, self.cfg)
+ _SafeShutdownInstanceDisks(self, instance)
return ret
"""
HPATH = "disk-grow"
HTYPE = constants.HTYPE_INSTANCE
- _OP_REQP = ["instance_name", "disk", "amount"]
+ _OP_REQP = ["instance_name", "disk", "amount", "wait_for_sync"]
REQ_BGL = False
def ExpandNames(self):
"DISK": self.op.disk,
"AMOUNT": self.op.amount,
}
- env.update(_BuildInstanceHookEnvByObject(self.instance))
+ env.update(_BuildInstanceHookEnvByObject(self, self.instance))
nl = [
- self.sstore.GetMasterNode(),
+ self.cfg.GetMasterNode(),
self.instance.primary_node,
]
return env, nl, nl
raise errors.OpPrereqError("Instance's disk layout does not support"
" growing.")
- if instance.FindDisk(self.op.disk) is None:
- raise errors.OpPrereqError("Disk '%s' not found for instance '%s'" %
- (self.op.disk, instance.name))
+ self.disk = instance.FindDisk(self.op.disk)
nodenames = [instance.primary_node] + list(instance.secondary_nodes)
- nodeinfo = rpc.call_node_info(nodenames, self.cfg.GetVGName())
+ nodeinfo = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
+ instance.hypervisor)
for node in nodenames:
- info = nodeinfo.get(node, None)
- if not info:
+ info = nodeinfo[node]
+ if info.failed or not info.data:
raise errors.OpPrereqError("Cannot get current information"
" from node '%s'" % node)
- vg_free = info.get('vg_free', None)
+ vg_free = info.data.get('vg_free', None)
if not isinstance(vg_free, int):
raise errors.OpPrereqError("Can't compute free disk space on"
" node %s" % node)
- if self.op.amount > info['vg_free']:
+ if self.op.amount > vg_free:
raise errors.OpPrereqError("Not enough disk space on target node %s:"
" %d MiB available, %d MiB required" %
- (node, info['vg_free'], self.op.amount))
+ (node, vg_free, self.op.amount))
def Exec(self, feedback_fn):
"""Execute disk grow.
"""
instance = self.instance
- disk = instance.FindDisk(self.op.disk)
+ disk = self.disk
for node in (instance.secondary_nodes + (instance.primary_node,)):
self.cfg.SetDiskID(disk, node)
- result = rpc.call_blockdev_grow(node, disk, self.op.amount)
- if not result or not isinstance(result, (list, tuple)) or len(result) != 2:
- raise errors.OpExecError("grow request failed to node %s" % node)
- elif not result[0]:
- raise errors.OpExecError("grow request failed to node %s: %s" %
- (node, result[1]))
+ result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
+ result.Raise()
+ if (not result.data or not isinstance(result.data, (list, tuple)) or
+ len(result.data) != 2):
+ raise errors.OpExecError("Grow request failed to node %s" % node)
+ elif not result.data[0]:
+ raise errors.OpExecError("Grow request failed to node %s: %s" %
+ (node, result.data[1]))
disk.RecordGrow(self.op.amount)
self.cfg.Update(instance)
- return
+ if self.op.wait_for_sync:
+ disk_abort = not _WaitForSync(self, instance)
+ if disk_abort:
+ self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
+ " status.\nPlease check the instance.")
class LUQueryInstanceData(NoHooksLU):
"""Query runtime instance data.
"""
- _OP_REQP = ["instances"]
+ _OP_REQP = ["instances", "static"]
REQ_BGL = False
def ExpandNames(self):
"""Compute block device status.
"""
- self.cfg.SetDiskID(dev, instance.primary_node)
- dev_pstatus = rpc.call_blockdev_find(instance.primary_node, dev)
+ static = self.op.static
+ if not static:
+ self.cfg.SetDiskID(dev, instance.primary_node)
+ dev_pstatus = self.rpc.call_blockdev_find(instance.primary_node, dev)
+ dev_pstatus.Raise()
+ dev_pstatus = dev_pstatus.data
+ else:
+ dev_pstatus = None
+
if dev.dev_type in constants.LDS_DRBD:
# we change the snode then (otherwise we use the one passed in)
if dev.logical_id[0] == instance.primary_node:
else:
snode = dev.logical_id[0]
- if snode:
+ if snode and not static:
self.cfg.SetDiskID(dev, snode)
- dev_sstatus = rpc.call_blockdev_find(snode, dev)
+ dev_sstatus = self.rpc.call_blockdev_find(snode, dev)
+ dev_sstatus.Raise()
+ dev_sstatus = dev_sstatus.data
else:
dev_sstatus = None
"pstatus": dev_pstatus,
"sstatus": dev_sstatus,
"children": dev_children,
+ "mode": dev.mode,
}
return data
def Exec(self, feedback_fn):
"""Gather and return data"""
result = {}
+
+ cluster = self.cfg.GetClusterInfo()
+
for instance in self.wanted_instances:
- remote_info = rpc.call_instance_info(instance.primary_node,
- instance.name)
- if remote_info and "state" in remote_info:
- remote_state = "up"
+ if not self.op.static:
+ remote_info = self.rpc.call_instance_info(instance.primary_node,
+ instance.name,
+ instance.hypervisor)
+ remote_info.Raise()
+ remote_info = remote_info.data
+ if remote_info and "state" in remote_info:
+ remote_state = "up"
+ else:
+ remote_state = "down"
else:
- remote_state = "down"
+ remote_state = None
if instance.status == "down":
config_state = "down"
else:
"pnode": instance.primary_node,
"snodes": instance.secondary_nodes,
"os": instance.os,
- "memory": instance.memory,
"nics": [(nic.mac, nic.ip, nic.bridge) for nic in instance.nics],
"disks": disks,
- "vcpus": instance.vcpus,
+ "hypervisor": instance.hypervisor,
+ "network_port": instance.network_port,
+ "hv_instance": instance.hvparams,
+ "hv_actual": cluster.FillHV(instance),
+ "be_instance": instance.beparams,
+ "be_actual": cluster.FillBE(instance),
}
- htkind = self.sstore.GetHypervisorType()
- if htkind == constants.HT_XEN_PVM30:
- idict["kernel_path"] = instance.kernel_path
- idict["initrd_path"] = instance.initrd_path
-
- if htkind == constants.HT_XEN_HVM31:
- idict["hvm_boot_order"] = instance.hvm_boot_order
- idict["hvm_acpi"] = instance.hvm_acpi
- idict["hvm_pae"] = instance.hvm_pae
- idict["hvm_cdrom_image_path"] = instance.hvm_cdrom_image_path
- idict["hvm_nic_type"] = instance.hvm_nic_type
- idict["hvm_disk_type"] = instance.hvm_disk_type
-
- if htkind in constants.HTS_REQ_PORT:
- if instance.vnc_bind_address is None:
- vnc_bind_address = constants.VNC_DEFAULT_BIND_ADDRESS
- else:
- vnc_bind_address = instance.vnc_bind_address
- if instance.network_port is None:
- vnc_console_port = None
- elif vnc_bind_address == constants.BIND_ADDRESS_GLOBAL:
- vnc_console_port = "%s:%s" % (instance.primary_node,
- instance.network_port)
- elif vnc_bind_address == constants.LOCALHOST_IP_ADDRESS:
- vnc_console_port = "%s:%s on node %s" % (vnc_bind_address,
- instance.network_port,
- instance.primary_node)
- else:
- vnc_console_port = "%s:%s" % (instance.vnc_bind_address,
- instance.network_port)
- idict["vnc_console_port"] = vnc_console_port
- idict["vnc_bind_address"] = vnc_bind_address
- idict["network_port"] = instance.network_port
-
result[instance.name] = idict
return result
_OP_REQP = ["instance_name"]
REQ_BGL = False
+ def CheckArguments(self):
+ if not hasattr(self.op, 'nics'):
+ self.op.nics = []
+ if not hasattr(self.op, 'disks'):
+ self.op.disks = []
+ if not hasattr(self.op, 'beparams'):
+ self.op.beparams = {}
+ if not hasattr(self.op, 'hvparams'):
+ self.op.hvparams = {}
+ self.op.force = getattr(self.op, "force", False)
+ if not (self.op.nics or self.op.disks or
+ self.op.hvparams or self.op.beparams):
+ raise errors.OpPrereqError("No changes submitted")
+
+ utils.CheckBEParams(self.op.beparams)
+
+ # Disk validation
+ disk_addremove = 0
+ for disk_op, disk_dict in self.op.disks:
+ if disk_op == constants.DDM_REMOVE:
+ disk_addremove += 1
+ continue
+ elif disk_op == constants.DDM_ADD:
+ disk_addremove += 1
+ else:
+ if not isinstance(disk_op, int):
+ raise errors.OpPrereqError("Invalid disk index")
+ if disk_op == constants.DDM_ADD:
+ mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
+ if mode not in (constants.DISK_RDONLY, constants.DISK_RDWR):
+ raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode)
+ size = disk_dict.get('size', None)
+ if size is None:
+ raise errors.OpPrereqError("Required disk parameter size missing")
+ try:
+ size = int(size)
+ except ValueError, err:
+ raise errors.OpPrereqError("Invalid disk size parameter: %s" %
+ str(err))
+ disk_dict['size'] = size
+ else:
+ # modification of disk
+ if 'size' in disk_dict:
+ raise errors.OpPrereqError("Disk size change not possible, use"
+ " grow-disk")
+
+ if disk_addremove > 1:
+ raise errors.OpPrereqError("Only one disk add or remove operation"
+ " supported at a time")
+
+ # NIC validation
+ nic_addremove = 0
+ for nic_op, nic_dict in self.op.nics:
+ if nic_op == constants.DDM_REMOVE:
+ nic_addremove += 1
+ continue
+ elif nic_op == constants.DDM_ADD:
+ nic_addremove += 1
+ else:
+ if not isinstance(nic_op, int):
+ raise errors.OpPrereqError("Invalid nic index")
+
+ # nic_dict should be a dict
+ nic_ip = nic_dict.get('ip', None)
+ if nic_ip is not None:
+ if nic_ip.lower() == "none":
+ nic_dict['ip'] = None
+ else:
+ if not utils.IsValidIP(nic_ip):
+ raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip)
+ # we can only check None bridges and assign the default one
+ nic_bridge = nic_dict.get('bridge', None)
+ if nic_bridge is None:
+ nic_dict['bridge'] = self.cfg.GetDefBridge()
+ # but we can validate MACs
+ nic_mac = nic_dict.get('mac', None)
+ if nic_mac is not None:
+ if self.cfg.IsMacInUse(nic_mac):
+ raise errors.OpPrereqError("MAC address %s already in use"
+ " in cluster" % nic_mac)
+ if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
+ if not utils.IsValidMac(nic_mac):
+ raise errors.OpPrereqError("Invalid MAC address %s" % nic_mac)
+ if nic_addremove > 1:
+ raise errors.OpPrereqError("Only one NIC add or remove operation"
+ " supported at a time")
+
def ExpandNames(self):
self._ExpandAndLockInstance()
+ self.needed_locks[locking.LEVEL_NODE] = []
+ self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
+
+ def DeclareLocks(self, level):
+ if level == locking.LEVEL_NODE:
+ self._LockInstancesNodes()
def BuildHooksEnv(self):
"""Build hooks env.
"""
args = dict()
- if self.mem:
- args['memory'] = self.mem
- if self.vcpus:
- args['vcpus'] = self.vcpus
- if self.do_ip or self.do_bridge or self.mac:
- if self.do_ip:
- ip = self.ip
- else:
- ip = self.instance.nics[0].ip
- if self.bridge:
- bridge = self.bridge
- else:
- bridge = self.instance.nics[0].bridge
- if self.mac:
- mac = self.mac
- else:
- mac = self.instance.nics[0].mac
- args['nics'] = [(ip, bridge, mac)]
- env = _BuildInstanceHookEnvByObject(self.instance, override=args)
- nl = [self.sstore.GetMasterNode(),
+ if constants.BE_MEMORY in self.be_new:
+ args['memory'] = self.be_new[constants.BE_MEMORY]
+ if constants.BE_VCPUS in self.be_new:
+ args['vcpus'] = self.be_new[constants.BE_VCPUS]
+ # FIXME: readd disk/nic changes
+ env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
+ nl = [self.cfg.GetMasterNode(),
self.instance.primary_node] + list(self.instance.secondary_nodes)
return env, nl, nl
This only checks the instance list against the existing names.
"""
- # FIXME: all the parameters could be checked before, in ExpandNames, or in
- # a separate CheckArguments function, if we implement one, so the operation
- # can be aborted without waiting for any lock, should it have an error...
- self.mem = getattr(self.op, "mem", None)
- self.vcpus = getattr(self.op, "vcpus", None)
- self.ip = getattr(self.op, "ip", None)
- self.mac = getattr(self.op, "mac", None)
- self.bridge = getattr(self.op, "bridge", None)
- self.kernel_path = getattr(self.op, "kernel_path", None)
- self.initrd_path = getattr(self.op, "initrd_path", None)
- self.hvm_boot_order = getattr(self.op, "hvm_boot_order", None)
- self.hvm_acpi = getattr(self.op, "hvm_acpi", None)
- self.hvm_pae = getattr(self.op, "hvm_pae", None)
- self.hvm_nic_type = getattr(self.op, "hvm_nic_type", None)
- self.hvm_disk_type = getattr(self.op, "hvm_disk_type", None)
- self.hvm_cdrom_image_path = getattr(self.op, "hvm_cdrom_image_path", None)
- self.vnc_bind_address = getattr(self.op, "vnc_bind_address", None)
- self.force = getattr(self.op, "force", None)
- all_parms = [self.mem, self.vcpus, self.ip, self.bridge, self.mac,
- self.kernel_path, self.initrd_path, self.hvm_boot_order,
- self.hvm_acpi, self.hvm_pae, self.hvm_cdrom_image_path,
- self.vnc_bind_address, self.hvm_nic_type, self.hvm_disk_type]
- if all_parms.count(None) == len(all_parms):
- raise errors.OpPrereqError("No changes submitted")
- if self.mem is not None:
- try:
- self.mem = int(self.mem)
- except ValueError, err:
- raise errors.OpPrereqError("Invalid memory size: %s" % str(err))
- if self.vcpus is not None:
- try:
- self.vcpus = int(self.vcpus)
- except ValueError, err:
- raise errors.OpPrereqError("Invalid vcpus number: %s" % str(err))
- if self.ip is not None:
- self.do_ip = True
- if self.ip.lower() == "none":
- self.ip = None
- else:
- if not utils.IsValidIP(self.ip):
- raise errors.OpPrereqError("Invalid IP address '%s'." % self.ip)
- else:
- self.do_ip = False
- self.do_bridge = (self.bridge is not None)
- if self.mac is not None:
- if self.cfg.IsMacInUse(self.mac):
- raise errors.OpPrereqError('MAC address %s already in use in cluster' %
- self.mac)
- if not utils.IsValidMac(self.mac):
- raise errors.OpPrereqError('Invalid MAC address %s' % self.mac)
-
- if self.kernel_path is not None:
- self.do_kernel_path = True
- if self.kernel_path == constants.VALUE_NONE:
- raise errors.OpPrereqError("Can't set instance to no kernel")
-
- if self.kernel_path != constants.VALUE_DEFAULT:
- if not os.path.isabs(self.kernel_path):
- raise errors.OpPrereqError("The kernel path must be an absolute"
- " filename")
- else:
- self.do_kernel_path = False
-
- if self.initrd_path is not None:
- self.do_initrd_path = True
- if self.initrd_path not in (constants.VALUE_NONE,
- constants.VALUE_DEFAULT):
- if not os.path.isabs(self.initrd_path):
- raise errors.OpPrereqError("The initrd path must be an absolute"
- " filename")
- else:
- self.do_initrd_path = False
-
- # boot order verification
- if self.hvm_boot_order is not None:
- if self.hvm_boot_order != constants.VALUE_DEFAULT:
- if len(self.hvm_boot_order.strip("acdn")) != 0:
- raise errors.OpPrereqError("invalid boot order specified,"
- " must be one or more of [acdn]"
- " or 'default'")
-
- # hvm_cdrom_image_path verification
- if self.op.hvm_cdrom_image_path is not None:
- if not (os.path.isabs(self.op.hvm_cdrom_image_path) or
- self.op.hvm_cdrom_image_path.lower() == "none"):
- raise errors.OpPrereqError("The path to the HVM CDROM image must"
- " be an absolute path or None, not %s" %
- self.op.hvm_cdrom_image_path)
- if not (os.path.isfile(self.op.hvm_cdrom_image_path) or
- self.op.hvm_cdrom_image_path.lower() == "none"):
- raise errors.OpPrereqError("The HVM CDROM image must either be a"
- " regular file or a symlink pointing to"
- " an existing regular file, not %s" %
- self.op.hvm_cdrom_image_path)
-
- # vnc_bind_address verification
- if self.op.vnc_bind_address is not None:
- if not utils.IsValidIP(self.op.vnc_bind_address):
- raise errors.OpPrereqError("given VNC bind address '%s' doesn't look"
- " like a valid IP address" %
- self.op.vnc_bind_address)
+ force = self.force = self.op.force
+
+ # checking the new params on the primary/secondary nodes
instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
assert self.instance is not None, \
"Cannot retrieve locked instance %s" % self.op.instance_name
+ pnode = self.instance.primary_node
+ nodelist = [pnode]
+ nodelist.extend(instance.secondary_nodes)
+
+ # hvparams processing
+ if self.op.hvparams:
+ i_hvdict = copy.deepcopy(instance.hvparams)
+ for key, val in self.op.hvparams.iteritems():
+ if val == constants.VALUE_DEFAULT:
+ try:
+ del i_hvdict[key]
+ except KeyError:
+ pass
+ elif val == constants.VALUE_NONE:
+ i_hvdict[key] = None
+ else:
+ i_hvdict[key] = val
+ cluster = self.cfg.GetClusterInfo()
+ hv_new = cluster.FillDict(cluster.hvparams[instance.hypervisor],
+ i_hvdict)
+ # local check
+ hypervisor.GetHypervisor(
+ instance.hypervisor).CheckParameterSyntax(hv_new)
+ _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
+ self.hv_new = hv_new # the new actual values
+ self.hv_inst = i_hvdict # the new dict (without defaults)
+ else:
+ self.hv_new = self.hv_inst = {}
+
+ # beparams processing
+ if self.op.beparams:
+ i_bedict = copy.deepcopy(instance.beparams)
+ for key, val in self.op.beparams.iteritems():
+ if val == constants.VALUE_DEFAULT:
+ try:
+ del i_bedict[key]
+ except KeyError:
+ pass
+ else:
+ i_bedict[key] = val
+ cluster = self.cfg.GetClusterInfo()
+ be_new = cluster.FillDict(cluster.beparams[constants.BEGR_DEFAULT],
+ i_bedict)
+ self.be_new = be_new # the new actual values
+ self.be_inst = i_bedict # the new dict (without defaults)
+ else:
+ self.be_new = self.be_inst = {}
+
self.warn = []
- if self.mem is not None and not self.force:
- pnode = self.instance.primary_node
- nodelist = [pnode]
- nodelist.extend(instance.secondary_nodes)
- instance_info = rpc.call_instance_info(pnode, instance.name)
- nodeinfo = rpc.call_node_info(nodelist, self.cfg.GetVGName())
-
- if pnode not in nodeinfo or not isinstance(nodeinfo[pnode], dict):
+
+ if constants.BE_MEMORY in self.op.beparams and not self.force:
+ mem_check_list = [pnode]
+ if be_new[constants.BE_AUTO_BALANCE]:
+ # either we changed auto_balance to yes or it was from before
+ mem_check_list.extend(instance.secondary_nodes)
+ instance_info = self.rpc.call_instance_info(pnode, instance.name,
+ instance.hypervisor)
+ nodeinfo = self.rpc.call_node_info(mem_check_list, self.cfg.GetVGName(),
+ instance.hypervisor)
+ if nodeinfo[pnode].failed or not isinstance(nodeinfo[pnode].data, dict):
# Assume the primary node is unreachable and go ahead
self.warn.append("Can't get info from primary node %s" % pnode)
else:
- if instance_info:
- current_mem = instance_info['memory']
+ if not instance_info.failed and instance_info.data:
+ current_mem = instance_info.data['memory']
else:
# Assume instance not running
# (there is a slight race condition here, but it's not very probable,
# and we have no other way to check)
current_mem = 0
- miss_mem = self.mem - current_mem - nodeinfo[pnode]['memory_free']
+ miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
+ nodeinfo[pnode].data['memory_free'])
if miss_mem > 0:
raise errors.OpPrereqError("This change will prevent the instance"
" from starting, due to %d MB of memory"
" missing on its primary node" % miss_mem)
- for node in instance.secondary_nodes:
- if node not in nodeinfo or not isinstance(nodeinfo[node], dict):
- self.warn.append("Can't get info from secondary node %s" % node)
- elif self.mem > nodeinfo[node]['memory_free']:
- self.warn.append("Not enough memory to failover instance to secondary"
- " node %s" % node)
-
- # Xen HVM device type checks
- if self.sstore.GetHypervisorType() == constants.HT_XEN_HVM31:
- if self.op.hvm_nic_type is not None:
- if self.op.hvm_nic_type not in constants.HT_HVM_VALID_NIC_TYPES:
- raise errors.OpPrereqError("Invalid NIC type %s specified for Xen"
- " HVM hypervisor" % self.op.hvm_nic_type)
- if self.op.hvm_disk_type is not None:
- if self.op.hvm_disk_type not in constants.HT_HVM_VALID_DISK_TYPES:
- raise errors.OpPrereqError("Invalid disk type %s specified for Xen"
- " HVM hypervisor" % self.op.hvm_disk_type)
+ if be_new[constants.BE_AUTO_BALANCE]:
+ for node, nres in instance.secondary_nodes.iteritems():
+ if nres.failed or not isinstance(nres.data, dict):
+ self.warn.append("Can't get info from secondary node %s" % node)
+ elif be_new[constants.BE_MEMORY] > nres.data['memory_free']:
+ self.warn.append("Not enough memory to failover instance to"
+ " secondary node %s" % node)
+
+ # NIC processing
+ for nic_op, nic_dict in self.op.nics:
+ if nic_op == constants.DDM_REMOVE:
+ if not instance.nics:
+ raise errors.OpPrereqError("Instance has no NICs, cannot remove")
+ continue
+ if nic_op != constants.DDM_ADD:
+ # an existing nic
+ if nic_op < 0 or nic_op >= len(instance.nics):
+ raise errors.OpPrereqError("Invalid NIC index %s, valid values"
+ " are 0 to %d" %
+ (nic_op, len(instance.nics)))
+ nic_bridge = nic_dict.get('bridge', None)
+ if nic_bridge is not None:
+ if not self.rpc.call_bridges_exist(pnode, [nic_bridge]):
+ msg = ("Bridge '%s' doesn't exist on one of"
+ " the instance nodes" % nic_bridge)
+ if self.force:
+ self.warn.append(msg)
+ else:
+ raise errors.OpPrereqError(msg)
+
+ # DISK processing
+ if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
+ raise errors.OpPrereqError("Disk operations not supported for"
+ " diskless instances")
+ for disk_op, disk_dict in self.op.disks:
+ if disk_op == constants.DDM_REMOVE:
+ if len(instance.disks) == 1:
+ raise errors.OpPrereqError("Cannot remove the last disk of"
+ " an instance")
+ ins_l = self.rpc.call_instance_list([pnode], [instance.hypervisor])
+ ins_l = ins_l[pnode]
+ if not type(ins_l) is list:
+ raise errors.OpPrereqError("Can't contact node '%s'" % pnode)
+ if instance.name in ins_l:
+ raise errors.OpPrereqError("Instance is running, can't remove"
+ " disks.")
+
+ if (disk_op == constants.DDM_ADD and
+ len(instance.nics) >= constants.MAX_DISKS):
+ raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
+ " add more" % constants.MAX_DISKS)
+ if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
+ # an existing disk
+ if disk_op < 0 or disk_op >= len(instance.disks):
+ raise errors.OpPrereqError("Invalid disk index %s, valid values"
+ " are 0 to %d" %
+ (disk_op, len(instance.disks)))
return
"""Modifies an instance.
All parameters take effect only at the next restart of the instance.
+
"""
# Process here the warnings from CheckPrereq, as we don't have a
# feedback_fn there.
result = []
instance = self.instance
- if self.mem:
- instance.memory = self.mem
- result.append(("mem", self.mem))
- if self.vcpus:
- instance.vcpus = self.vcpus
- result.append(("vcpus", self.vcpus))
- if self.do_ip:
- instance.nics[0].ip = self.ip
- result.append(("ip", self.ip))
- if self.bridge:
- instance.nics[0].bridge = self.bridge
- result.append(("bridge", self.bridge))
- if self.mac:
- instance.nics[0].mac = self.mac
- result.append(("mac", self.mac))
- if self.do_kernel_path:
- instance.kernel_path = self.kernel_path
- result.append(("kernel_path", self.kernel_path))
- if self.do_initrd_path:
- instance.initrd_path = self.initrd_path
- result.append(("initrd_path", self.initrd_path))
- if self.hvm_boot_order:
- if self.hvm_boot_order == constants.VALUE_DEFAULT:
- instance.hvm_boot_order = None
+ # disk changes
+ for disk_op, disk_dict in self.op.disks:
+ if disk_op == constants.DDM_REMOVE:
+ # remove the last disk
+ device = instance.disks.pop()
+ device_idx = len(instance.disks)
+ for node, disk in device.ComputeNodeTree(instance.primary_node):
+ self.cfg.SetDiskID(disk, node)
+ result = self.rpc.call_blockdev_remove(node, disk)
+ if result.failed or not result.data:
+ self.proc.LogWarning("Could not remove disk/%d on node %s,"
+ " continuing anyway", device_idx, node)
+ result.append(("disk/%d" % device_idx, "remove"))
+ elif disk_op == constants.DDM_ADD:
+ # add a new disk
+ if instance.disk_template == constants.DT_FILE:
+ file_driver, file_path = instance.disks[0].logical_id
+ file_path = os.path.dirname(file_path)
+ else:
+ file_driver = file_path = None
+ disk_idx_base = len(instance.disks)
+ new_disk = _GenerateDiskTemplate(self,
+ instance.disk_template,
+ instance, instance.primary_node,
+ instance.secondary_nodes,
+ [disk_dict],
+ file_path,
+ file_driver,
+ disk_idx_base)[0]
+ new_disk.mode = disk_dict['mode']
+ instance.disks.append(new_disk)
+ info = _GetInstanceInfoText(instance)
+
+ logging.info("Creating volume %s for instance %s",
+ new_disk.iv_name, instance.name)
+ # Note: this needs to be kept in sync with _CreateDisks
+ #HARDCODE
+ for secondary_node in instance.secondary_nodes:
+ if not _CreateBlockDevOnSecondary(self, secondary_node, instance,
+ new_disk, False, info):
+ self.LogWarning("Failed to create volume %s (%s) on"
+ " secondary node %s!",
+ new_disk.iv_name, new_disk, secondary_node)
+ #HARDCODE
+ if not _CreateBlockDevOnPrimary(self, instance.primary_node,
+ instance, new_disk, info):
+ self.LogWarning("Failed to create volume %s on primary!",
+ new_disk.iv_name)
+ result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
+ (new_disk.size, new_disk.mode)))
else:
- instance.hvm_boot_order = self.hvm_boot_order
- result.append(("hvm_boot_order", self.hvm_boot_order))
- if self.hvm_acpi is not None:
- instance.hvm_acpi = self.hvm_acpi
- result.append(("hvm_acpi", self.hvm_acpi))
- if self.hvm_pae is not None:
- instance.hvm_pae = self.hvm_pae
- result.append(("hvm_pae", self.hvm_pae))
- if self.hvm_nic_type is not None:
- instance.hvm_nic_type = self.hvm_nic_type
- result.append(("hvm_nic_type", self.hvm_nic_type))
- if self.hvm_disk_type is not None:
- instance.hvm_disk_type = self.hvm_disk_type
- result.append(("hvm_disk_type", self.hvm_disk_type))
- if self.hvm_cdrom_image_path:
- if self.hvm_cdrom_image_path == constants.VALUE_NONE:
- instance.hvm_cdrom_image_path = None
+ # change a given disk
+ instance.disks[disk_op].mode = disk_dict['mode']
+ result.append(("disk.mode/%d" % disk_op, disk_dict['mode']))
+ # NIC changes
+ for nic_op, nic_dict in self.op.nics:
+ if nic_op == constants.DDM_REMOVE:
+ # remove the last nic
+ del instance.nics[-1]
+ result.append(("nic.%d" % len(instance.nics), "remove"))
+ elif nic_op == constants.DDM_ADD:
+ # add a new nic
+ if 'mac' not in nic_dict:
+ mac = constants.VALUE_GENERATE
+ else:
+ mac = nic_dict['mac']
+ if mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
+ mac = self.cfg.GenerateMAC()
+ new_nic = objects.NIC(mac=mac, ip=nic_dict.get('ip', None),
+ bridge=nic_dict.get('bridge', None))
+ instance.nics.append(new_nic)
+ result.append(("nic.%d" % (len(instance.nics) - 1),
+ "add:mac=%s,ip=%s,bridge=%s" %
+ (new_nic.mac, new_nic.ip, new_nic.bridge)))
else:
- instance.hvm_cdrom_image_path = self.hvm_cdrom_image_path
- result.append(("hvm_cdrom_image_path", self.hvm_cdrom_image_path))
- if self.vnc_bind_address:
- instance.vnc_bind_address = self.vnc_bind_address
- result.append(("vnc_bind_address", self.vnc_bind_address))
+ # change a given nic
+ for key in 'mac', 'ip', 'bridge':
+ if key in nic_dict:
+ setattr(instance.nics[nic_op], key, nic_dict[key])
+ result.append(("nic.%s/%d" % (key, nic_op), nic_dict[key]))
+
+ # hvparams changes
+ if self.op.hvparams:
+ instance.hvparams = self.hv_new
+ for key, val in self.op.hvparams.iteritems():
+ result.append(("hv/%s" % key, val))
+
+ # beparams changes
+ if self.op.beparams:
+ instance.beparams = self.be_inst
+ for key, val in self.op.beparams.iteritems():
+ result.append(("be/%s" % key, val))
self.cfg.Update(instance)
def Exec(self, feedback_fn):
"""Compute the list of all the exported system images.
- Returns:
- a dictionary with the structure node->(export-list)
- where export-list is a list of the instances exported on
- that node.
+ @rtype: dict
+ @return: a dictionary with the structure node->(export-list)
+ where export-list is a list of the instances exported on
+ that node.
"""
- return rpc.call_export_list(self.nodes)
+ rpcresult = self.rpc.call_export_list(self.nodes)
+ result = {}
+ for node in rpcresult:
+ if rpcresult[node].failed:
+ result[node] = False
+ else:
+ result[node] = rpcresult[node].data
+
+ return result
class LUExportInstance(LogicalUnit):
"EXPORT_NODE": self.op.target_node,
"EXPORT_DO_SHUTDOWN": self.op.shutdown,
}
- env.update(_BuildInstanceHookEnvByObject(self.instance))
- nl = [self.sstore.GetMasterNode(), self.instance.primary_node,
+ env.update(_BuildInstanceHookEnvByObject(self, self.instance))
+ nl = [self.cfg.GetMasterNode(), self.instance.primary_node,
self.op.target_node]
return env, nl, nl
self.dst_node = self.cfg.GetNodeInfo(
self.cfg.ExpandNodeName(self.op.target_node))
- assert self.dst_node is not None, \
- "Cannot retrieve locked node %s" % self.op.target_node
+ if self.dst_node is None:
+ # This is wrong node name, not a non-locked node
+ raise errors.OpPrereqError("Wrong node name %s" % self.op.target_node)
# instance disk type verification
for disk in self.instance.disks:
src_node = instance.primary_node
if self.op.shutdown:
# shutdown the instance, but not the disks
- if not rpc.call_instance_shutdown(src_node, instance):
+ result = self.rpc.call_instance_shutdown(src_node, instance)
+ result.Raise()
+ if not result.data:
raise errors.OpExecError("Could not shutdown instance %s on node %s" %
(instance.name, src_node))
try:
for disk in instance.disks:
- if disk.iv_name == "sda":
- # new_dev_name will be a snapshot of an lvm leaf of the one we passed
- new_dev_name = rpc.call_blockdev_snapshot(src_node, disk)
-
- if not new_dev_name:
- logger.Error("could not snapshot block device %s on node %s" %
- (disk.logical_id[1], src_node))
- else:
- new_dev = objects.Disk(dev_type=constants.LD_LV, size=disk.size,
- logical_id=(vgname, new_dev_name),
- physical_id=(vgname, new_dev_name),
- iv_name=disk.iv_name)
- snap_disks.append(new_dev)
+ # new_dev_name will be a snapshot of an lvm leaf of the one we passed
+ new_dev_name = self.rpc.call_blockdev_snapshot(src_node, disk)
+ if new_dev_name.failed or not new_dev_name.data:
+ self.LogWarning("Could not snapshot block device %s on node %s",
+ disk.logical_id[1], src_node)
+ snap_disks.append(False)
+ else:
+ new_dev = objects.Disk(dev_type=constants.LD_LV, size=disk.size,
+ logical_id=(vgname, new_dev_name.data),
+ physical_id=(vgname, new_dev_name.data),
+ iv_name=disk.iv_name)
+ snap_disks.append(new_dev)
finally:
if self.op.shutdown and instance.status == "up":
- if not rpc.call_instance_start(src_node, instance, None):
- _ShutdownInstanceDisks(instance, self.cfg)
+ result = self.rpc.call_instance_start(src_node, instance, None)
+ if result.failed or not result.data:
+ _ShutdownInstanceDisks(self, instance)
raise errors.OpExecError("Could not start instance")
# TODO: check for size
- for dev in snap_disks:
- if not rpc.call_snapshot_export(src_node, dev, dst_node.name, instance):
- logger.Error("could not export block device %s from node %s to node %s"
- % (dev.logical_id[1], src_node, dst_node.name))
- if not rpc.call_blockdev_remove(src_node, dev):
- logger.Error("could not remove snapshot block device %s from node %s" %
- (dev.logical_id[1], src_node))
-
- if not rpc.call_finalize_export(dst_node.name, instance, snap_disks):
- logger.Error("could not finalize export for instance %s on node %s" %
- (instance.name, dst_node.name))
+ cluster_name = self.cfg.GetClusterName()
+ for idx, dev in enumerate(snap_disks):
+ if dev:
+ result = self.rpc.call_snapshot_export(src_node, dev, dst_node.name,
+ instance, cluster_name, idx)
+ if result.failed or not result.data:
+ self.LogWarning("Could not export block device %s from node %s to"
+ " node %s", dev.logical_id[1], src_node,
+ dst_node.name)
+ result = self.rpc.call_blockdev_remove(src_node, dev)
+ if result.failed or not result.data:
+ self.LogWarning("Could not remove snapshot block device %s from node"
+ " %s", dev.logical_id[1], src_node)
+
+ result = self.rpc.call_finalize_export(dst_node.name, instance, snap_disks)
+ if result.failed or not result.data:
+ self.LogWarning("Could not finalize export for instance %s on node %s",
+ instance.name, dst_node.name)
nodelist = self.cfg.GetNodeList()
nodelist.remove(dst_node.name)
# if we proceed the backup would be removed because OpQueryExports
# substitutes an empty list with the full cluster node list.
if nodelist:
- exportlist = rpc.call_export_list(nodelist)
+ exportlist = self.rpc.call_export_list(nodelist)
for node in exportlist:
- if instance.name in exportlist[node]:
- if not rpc.call_export_remove(node, instance.name):
- logger.Error("could not remove older export for instance %s"
- " on node %s" % (instance.name, node))
+ if exportlist[node].failed:
+ continue
+ if instance.name in exportlist[node].data:
+ if not self.rpc.call_export_remove(node, instance.name):
+ self.LogWarning("Could not remove older export for instance %s"
+ " on node %s", instance.name, node)
class LURemoveExport(NoHooksLU):
fqdn_warn = True
instance_name = self.op.instance_name
- exportlist = rpc.call_export_list(self.acquired_locks[locking.LEVEL_NODE])
+ exportlist = self.rpc.call_export_list(self.acquired_locks[
+ locking.LEVEL_NODE])
found = False
for node in exportlist:
- if instance_name in exportlist[node]:
+ if exportlist[node].failed:
+ self.LogWarning("Failed to query node %s, continuing" % node)
+ continue
+ if instance_name in exportlist[node].data:
found = True
- if not rpc.call_export_remove(node, instance_name):
- logger.Error("could not remove export for instance %s"
- " on node %s" % (instance_name, node))
+ result = self.rpc.call_export_remove(node, instance_name)
+ if result.failed or not result.data:
+ logging.error("Could not remove export for instance %s"
+ " on node %s", instance_name, node)
if fqdn_warn and not found:
feedback_fn("Export not found. If trying to remove an export belonging"
if not utils.TestDelay(self.op.duration):
raise errors.OpExecError("Error during master delay test")
if self.op.on_nodes:
- result = rpc.call_test_delay(self.op.on_nodes, self.op.duration)
+ result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
if not result:
raise errors.OpExecError("Complete failure from rpc call")
for node, node_result in result.items():
- if not node_result:
+ node_result.Raise()
+ if not node_result.data:
raise errors.OpExecError("Failure during rpc call to node %s,"
- " result: %s" % (node, node_result))
+ " result: %s" % (node, node_result.data))
class IAllocator(object):
"""IAllocator framework.
An IAllocator instance has three sets of attributes:
- - cfg/sstore that are needed to query the cluster
+ - cfg that is needed to query the cluster
- input data (all members of the _KEYS class attribute are required)
- four buffer attributes (in|out_data|text), that represent the
input (to the external script) in text and data structure format,
"""
_ALLO_KEYS = [
"mem_size", "disks", "disk_template",
- "os", "tags", "nics", "vcpus",
+ "os", "tags", "nics", "vcpus", "hypervisor",
]
_RELO_KEYS = [
"relocate_from",
]
- def __init__(self, cfg, sstore, mode, name, **kwargs):
- self.cfg = cfg
- self.sstore = sstore
+ def __init__(self, lu, mode, name, **kwargs):
+ self.lu = lu
# init buffer variables
self.in_text = self.out_text = self.in_data = self.out_data = None
# init all input fields so that pylint is happy
This is the data that is independent of the actual operation.
"""
- cfg = self.cfg
+ cfg = self.lu.cfg
+ cluster_info = cfg.GetClusterInfo()
# cluster data
data = {
"version": 1,
- "cluster_name": self.sstore.GetClusterName(),
- "cluster_tags": list(cfg.GetClusterInfo().GetTags()),
- "hypervisor_type": self.sstore.GetHypervisorType(),
+ "cluster_name": cfg.GetClusterName(),
+ "cluster_tags": list(cluster_info.GetTags()),
+ "enable_hypervisors": list(cluster_info.enabled_hypervisors),
# we don't have job IDs
}
-
- i_list = [cfg.GetInstanceInfo(iname) for iname in cfg.GetInstanceList()]
+ iinfo = cfg.GetAllInstancesInfo().values()
+ i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
# node data
node_results = {}
node_list = cfg.GetNodeList()
- node_data = rpc.call_node_info(node_list, cfg.GetVGName())
+
+ if self.mode == constants.IALLOCATOR_MODE_ALLOC:
+ hypervisor = self.hypervisor
+ elif self.mode == constants.IALLOCATOR_MODE_RELOC:
+ hypervisor = cfg.GetInstanceInfo(self.name).hypervisor
+
+ node_data = self.lu.rpc.call_node_info(node_list, cfg.GetVGName(),
+ hypervisor)
+ node_iinfo = self.lu.rpc.call_all_instances_info(node_list,
+ cluster_info.enabled_hypervisors)
for nname in node_list:
ninfo = cfg.GetNodeInfo(nname)
- if nname not in node_data or not isinstance(node_data[nname], dict):
+ node_data[nname].Raise()
+ if not isinstance(node_data[nname].data, dict):
raise errors.OpExecError("Can't get data for node %s" % nname)
- remote_info = node_data[nname]
+ remote_info = node_data[nname].data
for attr in ['memory_total', 'memory_free', 'memory_dom0',
'vg_size', 'vg_free', 'cpu_total']:
if attr not in remote_info:
" %s" % (nname, attr, str(err)))
# compute memory used by primary instances
i_p_mem = i_p_up_mem = 0
- for iinfo in i_list:
+ for iinfo, beinfo in i_list:
if iinfo.primary_node == nname:
- i_p_mem += iinfo.memory
+ i_p_mem += beinfo[constants.BE_MEMORY]
+ if iinfo.name not in node_iinfo[nname]:
+ i_used_mem = 0
+ else:
+ i_used_mem = int(node_iinfo[nname][iinfo.name]['memory'])
+ i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
+ remote_info['memory_free'] -= max(0, i_mem_diff)
+
if iinfo.status == "up":
- i_p_up_mem += iinfo.memory
+ i_p_up_mem += beinfo[constants.BE_MEMORY]
# compute memory used by instances
pnr = {
# instance data
instance_data = {}
- for iinfo in i_list:
+ for iinfo, beinfo in i_list:
nic_data = [{"mac": n.mac, "ip": n.ip, "bridge": n.bridge}
for n in iinfo.nics]
pir = {
"tags": list(iinfo.GetTags()),
"should_run": iinfo.status == "up",
- "vcpus": iinfo.vcpus,
- "memory": iinfo.memory,
+ "vcpus": beinfo[constants.BE_VCPUS],
+ "memory": beinfo[constants.BE_MEMORY],
"os": iinfo.os,
"nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
"nics": nic_data,
"disks": [{"size": dsk.size, "mode": "w"} for dsk in iinfo.disks],
"disk_template": iinfo.disk_template,
+ "hypervisor": iinfo.hypervisor,
}
instance_data[iinfo.name] = pir
if len(self.disks) != 2:
raise errors.OpExecError("Only two-disk configurations supported")
- disk_space = _ComputeDiskSize(self.disk_template,
- self.disks[0]["size"], self.disks[1]["size"])
+ disk_space = _ComputeDiskSize(self.disk_template, self.disks)
if self.disk_template in constants.DTS_NET_MIRROR:
self.required_nodes = 2
done.
"""
- instance = self.cfg.GetInstanceInfo(self.name)
+ instance = self.lu.cfg.GetInstanceInfo(self.name)
if instance is None:
raise errors.ProgrammerError("Unknown instance '%s' passed to"
" IAllocator" % self.name)
raise errors.OpPrereqError("Instance has not exactly one secondary node")
self.required_nodes = 1
-
- disk_space = _ComputeDiskSize(instance.disk_template,
- instance.disks[0].size,
- instance.disks[1].size)
+ disk_sizes = [{'size': disk.size} for disk in instance.disks]
+ disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
request = {
"type": "relocate",
self.in_text = serializer.Dump(self.in_data)
- def Run(self, name, validate=True, call_fn=rpc.call_iallocator_runner):
+ def Run(self, name, validate=True, call_fn=None):
"""Run an instance allocator and return the results.
"""
+ if call_fn is None:
+ call_fn = self.lu.rpc.call_iallocator_runner
data = self.in_text
- result = call_fn(self.sstore.GetMasterNode(), name, self.in_text)
+ result = call_fn(self.lu.cfg.GetMasterNode(), name, self.in_text)
+ result.Raise()
- if not isinstance(result, (list, tuple)) or len(result) != 4:
+ if not isinstance(result.data, (list, tuple)) or len(result.data) != 4:
raise errors.OpExecError("Invalid result from master iallocator runner")
- rcode, stdout, stderr, fail = result
+ rcode, stdout, stderr, fail = result.data
if rcode == constants.IARUN_NOTFOUND:
raise errors.OpExecError("Can't find allocator '%s'" % name)
row["mode"] not in ['r', 'w']):
raise errors.OpPrereqError("Invalid contents of the"
" 'disks' parameter")
+ if self.op.hypervisor is None:
+ self.op.hypervisor = self.cfg.GetHypervisorType()
elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
if not hasattr(self.op, "name"):
raise errors.OpPrereqError("Missing attribute 'name' on opcode input")
"""
if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
- ial = IAllocator(self.cfg, self.sstore,
+ ial = IAllocator(self,
mode=self.op.mode,
name=self.op.name,
mem_size=self.op.mem_size,
tags=self.op.tags,
nics=self.op.nics,
vcpus=self.op.vcpus,
+ hypervisor=self.op.hypervisor,
)
else:
- ial = IAllocator(self.cfg, self.sstore,
+ ial = IAllocator(self,
mode=self.op.mode,
name=self.op.name,
relocate_from=list(self.relocate_from),