return _BuildInstanceHookEnv(**args)
-def _AdjustCandidatePool(lu):
+def _AdjustCandidatePool(lu, exceptions):
"""Adjust the candidate pool after node operations.
"""
- mod_list = lu.cfg.MaintainCandidatePool()
+ mod_list = lu.cfg.MaintainCandidatePool(exceptions)
if mod_list:
lu.LogInfo("Promoted nodes to master candidate role: %s",
", ".join(node.name for node in mod_list))
for name in mod_list:
lu.context.ReaddNode(name)
- mc_now, mc_max = lu.cfg.GetMasterCandidateStats()
+ mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
if mc_now > mc_max:
lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
(mc_now, mc_max))
+def _DecideSelfPromotion(lu, exceptions=None):
+ """Decide whether I should promote myself as a master candidate.
+
+ """
+ cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
+ mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
+ # the new node will increase mc_max with one, so:
+ mc_should = min(mc_should + 1, cp_size)
+ return mc_now < mc_should
+
+
def _CheckNicsBridgesExist(lu, target_nics, target_node,
profile=constants.PP_DEFAULT):
"""Check that the brigdes needed by a list of nics exist.
_CheckNicsBridgesExist(lu, instance.nics, node)
+def _CheckOSVariant(os, name):
+ """Check whether an OS name conforms to the os variants specification.
+
+ @type os: L{objects.OS}
+ @param os: OS object to check
+ @type name: string
+ @param name: OS name passed by the user, to check for validity
+
+ """
+ if not os.supported_variants:
+ return
+ try:
+ variant = name.split("+", 1)[1]
+ except IndexError:
+ raise errors.OpPrereqError("OS name must include a variant")
+
+ if variant not in os.supported_variants:
+ raise errors.OpPrereqError("Unsupported OS variant")
+
+
def _GetNodeInstancesInner(cfg, fn):
return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
if full_name is None:
raise errors.OpPrereqError("Instance '%s' not known" % name)
self.wanted_names.append(full_name)
- self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
self.needed_locks = {
locking.LEVEL_NODE: [],
locking.LEVEL_INSTANCE: self.wanted_names,
self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
in self.wanted_names]
+ def _EnsureChildSizes(self, disk):
+ """Ensure children of the disk have the needed disk size.
+
+ This is valid mainly for DRBD8 and fixes an issue where the
+ children have smaller disk size.
+
+ @param disk: an L{ganeti.objects.Disk} object
+
+ """
+ if disk.dev_type == constants.LD_DRBD8:
+ assert disk.children, "Empty children for DRBD8?"
+ fchild = disk.children[0]
+ mismatch = fchild.size < disk.size
+ if mismatch:
+ self.LogInfo("Child disk has size %d, parent %d, fixing",
+ fchild.size, disk.size)
+ fchild.size = disk.size
+
+ # and we recurse on this child only, not on the metadev
+ return self._EnsureChildSizes(fchild) or mismatch
+ else:
+ return False
+
def Exec(self, feedback_fn):
"""Verify the size of cluster disks.
changed = []
for node, dskl in per_node_disks.items():
- result = self.rpc.call_blockdev_getsizes(node, [v[2] for v in dskl])
- if result.RemoteFailMsg():
+ newl = [v[2].Copy() for v in dskl]
+ for dsk in newl:
+ self.cfg.SetDiskID(dsk, node)
+ result = self.rpc.call_blockdev_getsizes(node, newl)
+ if result.fail_msg:
self.LogWarning("Failure in blockdev_getsizes call to node"
" %s, ignoring", node)
continue
disk.size = size
self.cfg.Update(instance)
changed.append((instance.name, idx, size))
+ if self._EnsureChildSizes(disk):
+ self.cfg.Update(instance)
+ changed.append((instance.name, idx, disk.size))
return changed
invalid_hvs = set(self.hv_list) - constants.HYPER_TYPES
if invalid_hvs:
raise errors.OpPrereqError("Enabled hypervisors contains invalid"
- " entries: %s" %
- utils.CommaJoin(invalid_hvs))
+ " entries: %s" % " ,".join(invalid_hvs))
else:
self.hv_list = cluster.enabled_hypervisors
if self.op.candidate_pool_size is not None:
self.cluster.candidate_pool_size = self.op.candidate_pool_size
# we need to update the pool size here, otherwise the save will fail
- _AdjustCandidatePool(self)
+ _AdjustCandidatePool(self, [])
self.cfg.Update(self.cluster)
_OP_REQP = ["output_fields", "names"]
REQ_BGL = False
_FIELDS_STATIC = utils.FieldSet()
- _FIELDS_DYNAMIC = utils.FieldSet("name", "valid", "node_status")
+ _FIELDS_DYNAMIC = utils.FieldSet("name", "valid", "node_status", "variants")
+ # Fields that need calculation of global os validity
+ _FIELDS_NEEDVALID = frozenset(["valid", "variants"])
def ExpandNames(self):
if self.op.names:
for node_name, nr in rlist.items():
if nr.fail_msg or not nr.payload:
continue
- for name, path, status, diagnose in nr.payload:
+ for name, path, status, diagnose, variants in nr.payload:
if name not in all_os:
# build a list of nodes for this os containing empty lists
# for each node in node_list
all_os[name] = {}
for nname in good_nodes:
all_os[name][nname] = []
- all_os[name][node_name].append((path, status, diagnose))
+ all_os[name][node_name].append((path, status, diagnose, variants))
return all_os
def Exec(self, feedback_fn):
node_data = self.rpc.call_os_diagnose(valid_nodes)
pol = self._DiagnoseByOS(valid_nodes, node_data)
output = []
+ calc_valid = self._FIELDS_NEEDVALID.intersection(self.op.output_fields)
+ calc_variants = "variants" in self.op.output_fields
+
for os_name, os_data in pol.items():
row = []
+ if calc_valid:
+ valid = True
+ variants = None
+ for osl in os_data.values():
+ valid = valid and osl and osl[0][1]
+ if not valid:
+ variants = None
+ break
+ if calc_variants:
+ node_variants = osl[0][3]
+ if variants is None:
+ variants = node_variants
+ else:
+ variants = [v for v in variants if v in node_variants]
+
for field in self.op.output_fields:
if field == "name":
val = os_name
elif field == "valid":
- val = utils.all([osl and osl[0][1] for osl in os_data.values()])
+ val = valid
elif field == "node_status":
# this is just a copy of the dict
val = {}
for node_name, nos_list in os_data.items():
val[node_name] = nos_list
+ elif field == "variants":
+ val = variants
else:
raise errors.ParameterError(field)
row.append(val)
logging.info("Stopping the node daemon and removing configs from node %s",
node.name)
+ # Promote nodes to master candidate as needed
+ _AdjustCandidatePool(self, exceptions=[node.name])
self.context.RemoveNode(node.name)
# Run post hooks on the node before it's removed
self.LogWarning("Errors encountered on the remote node while leaving"
" the cluster: %s", msg)
- # Promote nodes to master candidate as needed
- _AdjustCandidatePool(self)
-
class LUQueryNodes(NoHooksLU):
"""Logical unit for querying nodes.
"""
_OP_REQP = ["output_fields", "names", "use_locking"]
REQ_BGL = False
+
+ _SIMPLE_FIELDS = ["name", "serial_no", "ctime", "mtime", "uuid",
+ "master_candidate", "offline", "drained"]
+
_FIELDS_DYNAMIC = utils.FieldSet(
"dtotal", "dfree",
"mtotal", "mnode", "mfree",
"ctotal", "cnodes", "csockets",
)
- _FIELDS_STATIC = utils.FieldSet(
- "name", "pinst_cnt", "sinst_cnt",
+ _FIELDS_STATIC = utils.FieldSet(*[
+ "pinst_cnt", "sinst_cnt",
"pinst_list", "sinst_list",
"pip", "sip", "tags",
- "serial_no", "ctime", "mtime",
- "master_candidate",
"master",
- "offline",
- "drained",
- "role",
+ "role"] + _SIMPLE_FIELDS
)
def ExpandNames(self):
for node in nodelist:
node_output = []
for field in self.op.output_fields:
- if field == "name":
- val = node.name
+ if field in self._SIMPLE_FIELDS:
+ val = getattr(node, field)
elif field == "pinst_list":
val = list(node_to_primary[node.name])
elif field == "sinst_list":
val = node.secondary_ip
elif field == "tags":
val = list(node.GetTags())
- elif field == "serial_no":
- val = node.serial_no
- elif field == "ctime":
- val = node.ctime
- elif field == "mtime":
- val = node.mtime
- elif field == "master_candidate":
- val = node.master_candidate
elif field == "master":
val = node.name == master_node
- elif field == "offline":
- val = node.offline
- elif field == "drained":
- val = node.drained
elif self._FIELDS_DYNAMIC.Matches(field):
val = live_data[node.name].get(field, None)
elif field == "role":
raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
" based ping to noded port")
- cp_size = self.cfg.GetClusterInfo().candidate_pool_size
if self.op.readd:
exceptions = [node]
else:
exceptions = []
- mc_now, mc_max = self.cfg.GetMasterCandidateStats(exceptions)
- # the new node will increase mc_max with one, so:
- mc_max = min(mc_max + 1, cp_size)
- self.master_candidate = mc_now < mc_max
+
+ self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
if self.op.readd:
self.new_node = self.cfg.GetNodeInfo(node)
nl_payload = result[verifier].payload[constants.NV_NODELIST]
if nl_payload:
for failed in nl_payload:
- feedback_fn("ssh/hostname verification failed %s -> %s" %
+ feedback_fn("ssh/hostname verification failed"
+ " (checking from %s): %s" %
(verifier, nl_payload[failed]))
raise errors.OpExecError("ssh/hostname verification failed.")
# and make sure the new node will not have old files around
if not new_node.master_candidate:
result = self.rpc.call_node_demote_from_mc(new_node.name)
- msg = result.RemoteFailMsg()
+ msg = result.fail_msg
if msg:
self.LogWarning("Node failed to demote itself from master"
" candidate status: %s" % msg)
raise errors.OpPrereqError("The master role can be changed"
" only via masterfailover")
- if ((self.op.master_candidate == False or self.op.offline == True or
- self.op.drained == True) and node.master_candidate):
+ # Boolean value that tells us whether we're offlining or draining the node
+ offline_or_drain = self.op.offline == True or self.op.drained == True
+ deoffline_or_drain = self.op.offline == False or self.op.drained == False
+
+ if (node.master_candidate and
+ (self.op.master_candidate == False or offline_or_drain)):
cp_size = self.cfg.GetClusterInfo().candidate_pool_size
- num_candidates, _ = self.cfg.GetMasterCandidateStats()
- if num_candidates <= cp_size:
+ mc_now, mc_should, mc_max = self.cfg.GetMasterCandidateStats()
+ if mc_now <= cp_size:
msg = ("Not enough master candidates (desired"
- " %d, new value will be %d)" % (cp_size, num_candidates-1))
- if self.op.force:
+ " %d, new value will be %d)" % (cp_size, mc_now-1))
+ # Only allow forcing the operation if it's an offline/drain operation,
+ # and we could not possibly promote more nodes.
+ # FIXME: this can still lead to issues if in any way another node which
+ # could be promoted appears in the meantime.
+ if self.op.force and offline_or_drain and mc_should == mc_max:
self.LogWarning(msg)
else:
raise errors.OpPrereqError(msg)
raise errors.OpPrereqError("Node '%s' is offline or drained, can't set"
" to master_candidate" % node.name)
+ # If we're being deofflined/drained, we'll MC ourself if needed
+ if (deoffline_or_drain and not offline_or_drain and not
+ self.op.master_candidate == True):
+ self.op.master_candidate = _DecideSelfPromotion(self)
+ if self.op.master_candidate:
+ self.LogInfo("Autopromoting node to master candidate")
+
return
def Exec(self, feedback_fn):
changed_mc = True
result.append(("master_candidate", "auto-demotion due to drain"))
rrc = self.rpc.call_node_demote_from_mc(node.name)
- msg = rrc.RemoteFailMsg()
+ msg = rrc.fail_msg
if msg:
self.LogWarning("Node failed to demote itself: %s" % msg)
if node.offline:
"file_storage_dir": cluster.file_storage_dir,
"ctime": cluster.ctime,
"mtime": cluster.mtime,
+ "uuid": cluster.uuid,
"tags": list(cluster.GetTags()),
}
instance.primary_node))
self.op.os_type = getattr(self.op, "os_type", None)
+ self.op.force_variant = getattr(self.op, "force_variant", False)
if self.op.os_type is not None:
# OS verification
pnode = self.cfg.GetNodeInfo(
result = self.rpc.call_os_get(pnode.name, self.op.os_type)
result.Raise("OS '%s' not in supported OS list for primary node %s" %
(self.op.os_type, pnode.name), prereq=True)
+ if not self.op.force_variant:
+ _CheckOSVariant(result.payload, self.op.os_type)
self.instance = instance
"""
_OP_REQP = ["output_fields", "names", "use_locking"]
REQ_BGL = False
+ _SIMPLE_FIELDS = ["name", "os", "network_port", "hypervisor",
+ "serial_no", "ctime", "mtime", "uuid"]
_FIELDS_STATIC = utils.FieldSet(*["name", "os", "pnode", "snodes",
"admin_state",
"disk_template", "ip", "mac", "bridge",
r"(nic)\.(bridge)/([0-9]+)",
r"(nic)\.(macs|ips|modes|links|bridges)",
r"(disk|nic)\.(count)",
- "serial_no", "hypervisor", "hvparams",
- "ctime", "mtime",
- ] +
+ "hvparams",
+ ] + _SIMPLE_FIELDS +
["hv/%s" % name
for name in constants.HVS_PARAMETERS] +
["be/%s" % name
if result.offline:
# offline nodes will be in both lists
off_nodes.append(name)
- if result.RemoteFailMsg():
+ if result.fail_msg:
bad_nodes.append(name)
else:
if result.payload:
nic.nicparams) for nic in instance.nics]
for field in self.op.output_fields:
st_match = self._FIELDS_STATIC.Matches(field)
- if field == "name":
- val = instance.name
- elif field == "os":
- val = instance.os
+ if field in self._SIMPLE_FIELDS:
+ val = getattr(instance, field)
elif field == "pnode":
val = instance.primary_node
elif field == "snodes":
val = _ComputeDiskSize(instance.disk_template, disk_sizes)
elif field == "tags":
val = list(instance.GetTags())
- elif field == "serial_no":
- val = instance.serial_no
- elif field == "ctime":
- val = instance.ctime
- elif field == "mtime":
- val = instance.mtime
- elif field == "network_port":
- val = instance.network_port
- elif field == "hypervisor":
- val = instance.hypervisor
elif field == "hvparams":
val = i_hv
elif (field.startswith(HVPREFIX) and
result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
result.Raise("Failed to create directory '%s' on"
- " node %s: %s" % (file_storage_dir, pnode))
+ " node %s" % (file_storage_dir, pnode))
# Note: this needs to be kept in sync with adding of disks in
# LUSetInstanceParams
if instance.disk_template == constants.DT_FILE:
file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
- if target_node is node:
- tgt = instance.primary_node
+ if target_node:
+ tgt = target_node
else:
- tgt = instance.target_node
+ tgt = instance.primary_node
result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
if result.fail_msg:
lu.LogWarning("Could not remove directory '%s' on node %s: %s",
self.op.src_path = src_path = \
os.path.join(constants.EXPORT_DIR, src_path)
+ # On import force_variant must be True, because if we forced it at
+ # initial install, our only chance when importing it back is that it
+ # works again!
+ self.op.force_variant = True
+
else: # INSTANCE_CREATE
if getattr(self.op, "os_type", None) is None:
raise errors.OpPrereqError("No guest OS specified")
+ self.op.force_variant = getattr(self.op, "force_variant", False)
def _RunAllocator(self):
"""Run the allocator based on input opcode.
result = self.rpc.call_os_get(pnode.name, self.op.os_type)
result.Raise("OS '%s' not in supported os list for primary node %s" %
(self.op.os_type, pnode.name), prereq=True)
+ if not self.op.force_variant:
+ _CheckOSVariant(result.payload, self.op.os_type)
_CheckNicsBridgesExist(self, self.nics, self.pnode.name)
"serial_no": instance.serial_no,
"mtime": instance.mtime,
"ctime": instance.ctime,
+ "uuid": instance.uuid,
}
result[instance.name] = idict