@param lu: the LU on behalf of which we make the check
@param node: the node to check
- @raise errors.OpPrereqError: if the nodes is offline
+ @raise errors.OpPrereqError: if the node is offline
"""
if lu.cfg.GetNodeInfo(node).offline:
raise errors.OpPrereqError("Can't use offline node %s" % node)
+def _CheckNodeNotDrained(lu, node):
+ """Ensure that a given node is not drained.
+
+ @param lu: the LU on behalf of which we make the check
+ @param node: the node to check
+ @raise errors.OpPrereqError: if the node is drained
+
+ """
+ if lu.cfg.GetNodeInfo(node).drained:
+ raise errors.OpPrereqError("Can't use drained node %s" % node)
+
+
def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
memory, vcpus, nics):
"""Builds instance related env variables for hooks
i_non_redundant = [] # Non redundant instances
i_non_a_balanced = [] # Non auto-balanced instances
n_offline = [] # List of offline nodes
+ n_drained = [] # List of nodes being drained
node_volume = {}
node_instance = {}
node_info = {}
ntype = "master"
elif node_i.master_candidate:
ntype = "master candidate"
+ elif node_i.drained:
+ ntype = "drained"
+ n_drained.append(node)
else:
ntype = "regular"
feedback_fn("* Verifying node %s (%s)" % (node, ntype))
if n_offline:
feedback_fn(" - NOTICE: %d offline node(s) found." % len(n_offline))
+ if n_drained:
+ feedback_fn(" - NOTICE: %d drained node(s) found." % len(n_drained))
+
return not bad
def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
if the given volume group is valid.
"""
- # FIXME: This only works because there is only one parameter that can be
- # changed or removed.
if self.op.vg_name is not None and not self.op.vg_name:
instances = self.cfg.GetAllInstancesInfo().values()
for inst in instances:
self.cluster = cluster = self.cfg.GetClusterInfo()
# validate beparams changes
if self.op.beparams:
- utils.CheckBEParams(self.op.beparams)
+ utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
self.new_beparams = cluster.FillDict(
cluster.beparams[constants.BEGR_DEFAULT], self.op.beparams)
hv_name in self.op.enabled_hypervisors)):
# either this is a new hypervisor, or its parameters have changed
hv_class = hypervisor.GetHypervisor(hv_name)
+ utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
hv_class.CheckParameterSyntax(hv_params)
_CheckHVParams(self, node_list, hv_name, hv_params)
"master_candidate",
"master",
"offline",
+ "drained",
)
def ExpandNames(self):
val = node.name == master_node
elif field == "offline":
val = node.offline
+ elif field == "drained":
+ val = node.drained
elif self._FIELDS_DYNAMIC.Matches(field):
val = live_data[node.name].get(field, None)
else:
primary_ip=primary_ip,
secondary_ip=secondary_ip,
master_candidate=master_candidate,
- offline=False)
+ offline=False, drained=False)
def Exec(self, feedback_fn):
"""Adds the new node to the cluster.
to_copy = []
enabled_hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
- if constants.HTS_USE_VNC.intersection(enabled_hypervisors):
+ if constants.HTS_COPY_VNC_PASSWORD.intersection(enabled_hypervisors):
to_copy.append(constants.VNC_PASSWORD_FILE)
for fname in to_copy:
self.op.node_name = node_name
_CheckBooleanOpField(self.op, 'master_candidate')
_CheckBooleanOpField(self.op, 'offline')
- if self.op.master_candidate is None and self.op.offline is None:
+ _CheckBooleanOpField(self.op, 'drained')
+ all_mods = [self.op.offline, self.op.master_candidate, self.op.drained]
+ if all_mods.count(None) == 3:
raise errors.OpPrereqError("Please pass at least one modification")
- if self.op.offline == True and self.op.master_candidate == True:
- raise errors.OpPrereqError("Can't set the node into offline and"
- " master_candidate at the same time")
+ if all_mods.count(True) > 1:
+ raise errors.OpPrereqError("Can't set the node into more than one"
+ " state at the same time")
def ExpandNames(self):
self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
"OP_TARGET": self.op.node_name,
"MASTER_CANDIDATE": str(self.op.master_candidate),
"OFFLINE": str(self.op.offline),
+ "DRAINED": str(self.op.drained),
}
nl = [self.cfg.GetMasterNode(),
self.op.node_name]
"""
node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
- if ((self.op.master_candidate == False or self.op.offline == True)
- and node.master_candidate):
+ if ((self.op.master_candidate == False or self.op.offline == True or
+ self.op.drained == True) and node.master_candidate):
# we will demote the node from master_candidate
if self.op.node_name == self.cfg.GetMasterNode():
raise errors.OpPrereqError("The master node has to be a"
- " master candidate and online")
+ " master candidate, online and not drained")
cp_size = self.cfg.GetClusterInfo().candidate_pool_size
num_candidates, _ = self.cfg.GetMasterCandidateStats()
if num_candidates <= cp_size:
else:
raise errors.OpPrereqError(msg)
- if (self.op.master_candidate == True and node.offline and
- not self.op.offline == False):
- raise errors.OpPrereqError("Can't set an offline node to"
- " master_candidate")
+ if (self.op.master_candidate == True and
+ ((node.offline and not self.op.offline == False) or
+ (node.drained and not self.op.drained == False))):
+ raise errors.OpPrereqError("Node '%s' is offline or drained, can't set"
+ " to master_candidate")
return
node = self.node
result = []
+ changed_mc = False
if self.op.offline is not None:
node.offline = self.op.offline
result.append(("offline", str(self.op.offline)))
- if self.op.offline == True and node.master_candidate:
- node.master_candidate = False
- result.append(("master_candidate", "auto-demotion due to offline"))
+ if self.op.offline == True:
+ if node.master_candidate:
+ node.master_candidate = False
+ changed_mc = True
+ result.append(("master_candidate", "auto-demotion due to offline"))
+ if node.drained:
+ node.drained = False
+ result.append(("drained", "clear drained status due to offline"))
if self.op.master_candidate is not None:
node.master_candidate = self.op.master_candidate
+ changed_mc = True
result.append(("master_candidate", str(self.op.master_candidate)))
if self.op.master_candidate == False:
rrc = self.rpc.call_node_demote_from_mc(node.name)
if msg:
self.LogWarning("Node failed to demote itself: %s" % msg)
+ if self.op.drained is not None:
+ node.drained = self.op.drained
+ result.append(("drained", str(self.op.drained)))
+ if self.op.drained == True:
+ if node.master_candidate:
+ node.master_candidate = False
+ changed_mc = True
+ result.append(("master_candidate", "auto-demotion due to drain"))
+ if node.offline:
+ node.offline = False
+ result.append(("offline", "clear offline status due to drain"))
+
# this will trigger configuration file update, if needed
self.cfg.Update(node)
# this will trigger job queue propagation or cleanup
- if self.op.node_name != self.cfg.GetMasterNode():
+ if changed_mc:
self.context.ReaddNode(node)
return result
ignored.
"""
- result = True
+ all_result = True
for disk in instance.disks:
for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
lu.cfg.SetDiskID(top_disk, node)
result = lu.rpc.call_blockdev_shutdown(node, top_disk)
- if result.failed or not result.data:
- logging.error("Could not shutdown block device %s on node %s",
- disk.iv_name, node)
+ msg = result.RemoteFailMsg()
+ if msg:
+ lu.LogWarning("Could not shutdown block device %s on node %s: %s",
+ disk.iv_name, node, msg)
if not ignore_primary or node != instance.primary_node:
- result = False
- return result
+ all_result = False
+ return all_result
def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
constants.INSTANCE_REBOOT_HARD]:
result = self.rpc.call_instance_reboot(node_current, instance,
reboot_type, extra_args)
- if result.failed or not result.data:
- raise errors.OpExecError("Could not reboot instance")
+ msg = result.RemoteFailMsg()
+ if msg:
+ raise errors.OpExecError("Could not reboot instance: %s" % msg)
else:
- if not self.rpc.call_instance_shutdown(node_current, instance):
- raise errors.OpExecError("could not shutdown instance for full reboot")
+ result = self.rpc.call_instance_shutdown(node_current, instance)
+ msg = result.RemoteFailMsg()
+ if msg:
+ raise errors.OpExecError("Could not shutdown instance for"
+ " full reboot: %s" % msg)
_ShutdownInstanceDisks(self, instance)
_StartInstanceDisks(self, instance, ignore_secondaries)
result = self.rpc.call_instance_start(node_current, instance, extra_args)
node_current = instance.primary_node
self.cfg.MarkInstanceDown(instance.name)
result = self.rpc.call_instance_shutdown(node_current, instance)
- if result.failed or not result.data:
- self.proc.LogWarning("Could not shutdown instance")
+ msg = result.RemoteFailMsg()
+ if msg:
+ self.proc.LogWarning("Could not shutdown instance: %s" % msg)
_ShutdownInstanceDisks(self, instance)
instance.name, instance.primary_node)
result = self.rpc.call_instance_shutdown(instance.primary_node, instance)
- if result.failed or not result.data:
+ msg = result.RemoteFailMsg()
+ if msg:
if self.op.ignore_failures:
- feedback_fn("Warning: can't shutdown instance")
+ feedback_fn("Warning: can't shutdown instance: %s" % msg)
else:
- raise errors.OpExecError("Could not shutdown instance %s on node %s" %
- (instance.name, instance.primary_node))
+ raise errors.OpExecError("Could not shutdown instance %s on"
+ " node %s: %s" %
+ (instance.name, instance.primary_node, msg))
logging.info("Removing block devices for instance %s", instance.name)
target_node = secondary_nodes[0]
_CheckNodeOnline(self, target_node)
+ _CheckNodeNotDrained(self, target_node)
# check memory requirements on the secondary node
_CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
instance.name, bep[constants.BE_MEMORY],
instance.name, source_node)
result = self.rpc.call_instance_shutdown(source_node, instance)
- if result.failed or not result.data:
+ msg = result.RemoteFailMsg()
+ if msg:
if self.op.ignore_consistency:
self.proc.LogWarning("Could not shutdown instance %s on node %s."
- " Proceeding"
- " anyway. Please make sure node %s is down",
- instance.name, source_node, source_node)
+ " Proceeding anyway. Please make sure node"
+ " %s is down. Error details: %s",
+ instance.name, source_node, source_node, msg)
else:
- raise errors.OpExecError("Could not shutdown instance %s on node %s" %
- (instance.name, source_node))
+ raise errors.OpExecError("Could not shutdown instance %s on"
+ " node %s: %s" %
+ (instance.name, source_node, msg))
feedback_fn("* deactivating the instance's disks on source node")
if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
secondary_nodes = instance.secondary_nodes
if not secondary_nodes:
- raise errors.ProgrammerError("no secondary node but using "
- "drbd8 disk template")
+ raise errors.ConfigurationError("No secondary node but using"
+ " drbd8 disk template")
i_be = self.cfg.GetClusterInfo().FillBE(instance)
(brlist, target_node))
if not self.op.cleanup:
+ _CheckNodeNotDrained(self, target_node)
result = self.rpc.call_instance_migratable(instance.primary_node,
instance)
msg = result.RemoteFailMsg()
iv_name="disk/%d" % disk_index,
logical_id=(file_driver,
"%s/disk%d" % (file_storage_dir,
- idx)),
+ disk_index)),
mode=disk["mode"])
disks.append(disk_dev)
else:
"""
logging.info("Removing block devices for instance %s", instance.name)
- result = True
+ all_result = True
for device in instance.disks:
for node, disk in device.ComputeNodeTree(instance.primary_node):
lu.cfg.SetDiskID(disk, node)
- result = lu.rpc.call_blockdev_remove(node, disk)
- if result.failed or not result.data:
- lu.proc.LogWarning("Could not remove block device %s on node %s,"
- " continuing anyway", device.iv_name, node)
- result = False
+ msg = lu.rpc.call_blockdev_remove(node, disk).RemoteFailMsg()
+ if msg:
+ lu.LogWarning("Could not remove block device %s on node %s,"
+ " continuing anyway: %s", device.iv_name, node, msg)
+ all_result = False
if instance.disk_template == constants.DT_FILE:
file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
file_storage_dir)
if result.failed or not result.data:
logging.error("Could not remove directory '%s'", file_storage_dir)
- result = False
+ all_result = False
- return result
+ return all_result
def _ComputeDiskSize(disk_template, disks):
",".join(enabled_hvs)))
# check hypervisor parameter syntax (locally)
-
+ utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
filled_hvp = cluster.FillDict(cluster.hvparams[self.op.hypervisor],
self.op.hvparams)
hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
hv_type.CheckParameterSyntax(filled_hvp)
# fill and remember the beparams dict
- utils.CheckBEParams(self.op.beparams)
+ utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
self.be_full = cluster.FillDict(cluster.beparams[constants.BEGR_DEFAULT],
self.op.beparams)
if pnode.offline:
raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
pnode.name)
+ if pnode.drained:
+ raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
+ pnode.name)
self.secondaries = []
if self.op.snode == pnode.name:
raise errors.OpPrereqError("The secondary node cannot be"
" the primary node.")
- self.secondaries.append(self.op.snode)
_CheckNodeOnline(self, self.op.snode)
+ _CheckNodeNotDrained(self, self.op.snode)
+ self.secondaries.append(self.op.snode)
nodenames = [pnode.name] + self.secondaries
n1 = self.new_node = remote_node
n2 = self.oth_node = instance.primary_node
self.tgt_node = self.sec_node
+ _CheckNodeNotDrained(self, remote_node)
else:
raise errors.ProgrammerError("Unhandled disk replace mode")
result = self.rpc.call_blockdev_addchildren(tgt_node, dev, new_lvs)
if result.failed or not result.data:
for new_lv in new_lvs:
- result = self.rpc.call_blockdev_remove(tgt_node, new_lv)
- if result.failed or not result.data:
- warning("Can't rollback device %s", hint="manually cleanup unused"
- " logical volumes")
+ msg = self.rpc.call_blockdev_remove(tgt_node, new_lv).RemoteFailMsg()
+ if msg:
+ warning("Can't rollback device %s: %s", dev, msg,
+ hint="cleanup manually the unused logical volumes")
raise errors.OpExecError("Can't add local storage to drbd")
dev.children = new_lvs
info("remove logical volumes for %s" % name)
for lv in old_lvs:
cfg.SetDiskID(lv, tgt_node)
- result = self.rpc.call_blockdev_remove(tgt_node, lv)
- if result.failed or not result.data:
- warning("Can't remove old LV", hint="manually remove unused LVs")
+ msg = self.rpc.call_blockdev_remove(tgt_node, lv).RemoteFailMsg()
+ if msg:
+ warning("Can't remove old LV: %s" % msg,
+ hint="manually remove unused LVs")
continue
def _ExecD8Secondary(self, feedback_fn):
# we have new devices, shutdown the drbd on the old secondary
info("shutting down drbd for disk/%d on old node" % idx)
cfg.SetDiskID(dev, old_node)
- result = self.rpc.call_blockdev_shutdown(old_node, dev)
- if result.failed or not result.data:
- warning("Failed to shutdown drbd for disk/%d on old node" % idx,
+ msg = self.rpc.call_blockdev_shutdown(old_node, dev).RemoteFailMsg()
+ if msg:
+ warning("Failed to shutdown drbd for disk/%d on old node: %s" %
+ (idx, msg),
hint="Please cleanup this device manually as soon as possible")
info("detaching primary drbds from the network (=> standalone)")
info("remove logical volumes for disk/%d" % idx)
for lv in old_lvs:
cfg.SetDiskID(lv, old_node)
- result = self.rpc.call_blockdev_remove(old_node, lv)
- if result.failed or not result.data:
- warning("Can't remove LV on old secondary",
+ msg = self.rpc.call_blockdev_remove(old_node, lv).RemoteFailMsg()
+ if msg:
+ warning("Can't remove LV on old secondary: %s", msg,
hint="Cleanup stale volumes by hand")
def Exec(self, feedback_fn):
self.op.hvparams or self.op.beparams):
raise errors.OpPrereqError("No changes submitted")
- utils.CheckBEParams(self.op.beparams)
-
# Disk validation
disk_addremove = 0
for disk_op, disk_dict in self.op.disks:
del i_hvdict[key]
except KeyError:
pass
- elif val == constants.VALUE_NONE:
- i_hvdict[key] = None
else:
i_hvdict[key] = val
cluster = self.cfg.GetClusterInfo()
+ utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
hv_new = cluster.FillDict(cluster.hvparams[instance.hypervisor],
i_hvdict)
# local check
else:
i_bedict[key] = val
cluster = self.cfg.GetClusterInfo()
+ utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
be_new = cluster.FillDict(cluster.beparams[constants.BEGR_DEFAULT],
i_bedict)
self.be_new = be_new # the new actual values
device_idx = len(instance.disks)
for node, disk in device.ComputeNodeTree(instance.primary_node):
self.cfg.SetDiskID(disk, node)
- rpc_result = self.rpc.call_blockdev_remove(node, disk)
- if rpc_result.failed or not rpc_result.data:
- self.proc.LogWarning("Could not remove disk/%d on node %s,"
- " continuing anyway", device_idx, node)
+ msg = self.rpc.call_blockdev_remove(node, disk).RemoteFailMsg()
+ if msg:
+ self.LogWarning("Could not remove disk/%d on node %s: %s,"
+ " continuing anyway", device_idx, node, msg)
result.append(("disk/%d" % device_idx, "remove"))
elif disk_op == constants.DDM_ADD:
# add a new disk
# This is wrong node name, not a non-locked node
raise errors.OpPrereqError("Wrong node name %s" % self.op.target_node)
_CheckNodeOnline(self, self.dst_node.name)
+ _CheckNodeNotDrained(self, self.dst_node.name)
# instance disk type verification
for disk in self.instance.disks:
if self.op.shutdown:
# shutdown the instance, but not the disks
result = self.rpc.call_instance_shutdown(src_node, instance)
- result.Raise()
- if not result.data:
- raise errors.OpExecError("Could not shutdown instance %s on node %s" %
- (instance.name, src_node))
+ msg = result.RemoteFailMsg()
+ if msg:
+ raise errors.OpExecError("Could not shutdown instance %s on"
+ " node %s: %s" %
+ (instance.name, src_node, msg))
vgname = self.cfg.GetVGName()
self.LogWarning("Could not export block device %s from node %s to"
" node %s", dev.logical_id[1], src_node,
dst_node.name)
- result = self.rpc.call_blockdev_remove(src_node, dev)
- if result.failed or not result.data:
+ msg = self.rpc.call_blockdev_remove(src_node, dev).RemoteFailMsg()
+ if msg:
self.LogWarning("Could not remove snapshot block device %s from node"
- " %s", dev.logical_id[1], src_node)
+ " %s: %s", dev.logical_id[1], src_node, msg)
result = self.rpc.call_finalize_export(dst_node.name, instance, snap_disks)
if result.failed or not result.data:
"primary_ip": ninfo.primary_ip,
"secondary_ip": ninfo.secondary_ip,
"offline": ninfo.offline,
+ "drained": ninfo.drained,
"master_candidate": ninfo.master_candidate,
}