X-Git-Url: https://code.grnet.gr/git/ganeti-local/blobdiff_plain/a0c9776a0019a37e6533633875a50204827c3947..88cd08aaaba8ad68c59228551c75424fb86c1904:/lib/cmdlib.py?ds=sidebyside diff --git a/lib/cmdlib.py b/lib/cmdlib.py index 4c8584c..0023cac 100644 --- a/lib/cmdlib.py +++ b/lib/cmdlib.py @@ -670,22 +670,33 @@ def _BuildInstanceHookEnvByObject(lu, instance, override=None): return _BuildInstanceHookEnv(**args) -def _AdjustCandidatePool(lu): +def _AdjustCandidatePool(lu, exceptions): """Adjust the candidate pool after node operations. """ - mod_list = lu.cfg.MaintainCandidatePool() + mod_list = lu.cfg.MaintainCandidatePool(exceptions) if mod_list: lu.LogInfo("Promoted nodes to master candidate role: %s", ", ".join(node.name for node in mod_list)) for name in mod_list: lu.context.ReaddNode(name) - mc_now, mc_max = lu.cfg.GetMasterCandidateStats() + mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions) if mc_now > mc_max: lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" % (mc_now, mc_max)) +def _DecideSelfPromotion(lu, exceptions=None): + """Decide whether I should promote myself as a master candidate. + + """ + cp_size = lu.cfg.GetClusterInfo().candidate_pool_size + mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions) + # the new node will increase mc_max with one, so: + mc_should = min(mc_should + 1, cp_size) + return mc_now < mc_should + + def _CheckNicsBridgesExist(lu, target_nics, target_node, profile=constants.PP_DEFAULT): """Check that the brigdes needed by a list of nics exist. @@ -711,6 +722,26 @@ def _CheckInstanceBridgesExist(lu, instance, node=None): _CheckNicsBridgesExist(lu, instance.nics, node) +def _CheckOSVariant(os, name): + """Check whether an OS name conforms to the os variants specification. + + @type os: L{objects.OS} + @param os: OS object to check + @type name: string + @param name: OS name passed by the user, to check for validity + + """ + if not os.supported_variants: + return + try: + variant = name.split("+", 1)[1] + except IndexError: + raise errors.OpPrereqError("OS name must include a variant") + + if variant not in os.supported_variants: + raise errors.OpPrereqError("Unsupported OS variant") + + def _GetNodeInstancesInner(cfg, fn): return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)] @@ -1594,7 +1625,6 @@ class LURepairDiskSizes(NoHooksLU): if full_name is None: raise errors.OpPrereqError("Instance '%s' not known" % name) self.wanted_names.append(full_name) - self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names self.needed_locks = { locking.LEVEL_NODE: [], locking.LEVEL_INSTANCE: self.wanted_names, @@ -1624,6 +1654,29 @@ class LURepairDiskSizes(NoHooksLU): self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name in self.wanted_names] + def _EnsureChildSizes(self, disk): + """Ensure children of the disk have the needed disk size. + + This is valid mainly for DRBD8 and fixes an issue where the + children have smaller disk size. + + @param disk: an L{ganeti.objects.Disk} object + + """ + if disk.dev_type == constants.LD_DRBD8: + assert disk.children, "Empty children for DRBD8?" + fchild = disk.children[0] + mismatch = fchild.size < disk.size + if mismatch: + self.LogInfo("Child disk has size %d, parent %d, fixing", + fchild.size, disk.size) + fchild.size = disk.size + + # and we recurse on this child only, not on the metadev + return self._EnsureChildSizes(fchild) or mismatch + else: + return False + def Exec(self, feedback_fn): """Verify the size of cluster disks. @@ -1640,8 +1693,11 @@ class LURepairDiskSizes(NoHooksLU): changed = [] for node, dskl in per_node_disks.items(): - result = self.rpc.call_blockdev_getsizes(node, [v[2] for v in dskl]) - if result.RemoteFailMsg(): + newl = [v[2].Copy() for v in dskl] + for dsk in newl: + self.cfg.SetDiskID(dsk, node) + result = self.rpc.call_blockdev_getsizes(node, newl) + if result.fail_msg: self.LogWarning("Failure in blockdev_getsizes call to node" " %s, ignoring", node) continue @@ -1666,6 +1722,9 @@ class LURepairDiskSizes(NoHooksLU): disk.size = size self.cfg.Update(instance) changed.append((instance.name, idx, size)) + if self._EnsureChildSizes(disk): + self.cfg.Update(instance) + changed.append((instance.name, idx, disk.size)) return changed @@ -1876,8 +1935,7 @@ class LUSetClusterParams(LogicalUnit): invalid_hvs = set(self.hv_list) - constants.HYPER_TYPES if invalid_hvs: raise errors.OpPrereqError("Enabled hypervisors contains invalid" - " entries: %s" % - utils.CommaJoin(invalid_hvs)) + " entries: %s" % " ,".join(invalid_hvs)) else: self.hv_list = cluster.enabled_hypervisors @@ -1918,7 +1976,7 @@ class LUSetClusterParams(LogicalUnit): if self.op.candidate_pool_size is not None: self.cluster.candidate_pool_size = self.op.candidate_pool_size # we need to update the pool size here, otherwise the save will fail - _AdjustCandidatePool(self) + _AdjustCandidatePool(self, []) self.cfg.Update(self.cluster) @@ -2106,7 +2164,9 @@ class LUDiagnoseOS(NoHooksLU): _OP_REQP = ["output_fields", "names"] REQ_BGL = False _FIELDS_STATIC = utils.FieldSet() - _FIELDS_DYNAMIC = utils.FieldSet("name", "valid", "node_status") + _FIELDS_DYNAMIC = utils.FieldSet("name", "valid", "node_status", "variants") + # Fields that need calculation of global os validity + _FIELDS_NEEDVALID = frozenset(["valid", "variants"]) def ExpandNames(self): if self.op.names: @@ -2154,14 +2214,14 @@ class LUDiagnoseOS(NoHooksLU): for node_name, nr in rlist.items(): if nr.fail_msg or not nr.payload: continue - for name, path, status, diagnose in nr.payload: + for name, path, status, diagnose, variants in nr.payload: if name not in all_os: # build a list of nodes for this os containing empty lists # for each node in node_list all_os[name] = {} for nname in good_nodes: all_os[name][nname] = [] - all_os[name][node_name].append((path, status, diagnose)) + all_os[name][node_name].append((path, status, diagnose, variants)) return all_os def Exec(self, feedback_fn): @@ -2172,18 +2232,38 @@ class LUDiagnoseOS(NoHooksLU): node_data = self.rpc.call_os_diagnose(valid_nodes) pol = self._DiagnoseByOS(valid_nodes, node_data) output = [] + calc_valid = self._FIELDS_NEEDVALID.intersection(self.op.output_fields) + calc_variants = "variants" in self.op.output_fields + for os_name, os_data in pol.items(): row = [] + if calc_valid: + valid = True + variants = None + for osl in os_data.values(): + valid = valid and osl and osl[0][1] + if not valid: + variants = None + break + if calc_variants: + node_variants = osl[0][3] + if variants is None: + variants = node_variants + else: + variants = [v for v in variants if v in node_variants] + for field in self.op.output_fields: if field == "name": val = os_name elif field == "valid": - val = utils.all([osl and osl[0][1] for osl in os_data.values()]) + val = valid elif field == "node_status": # this is just a copy of the dict val = {} for node_name, nos_list in os_data.items(): val[node_name] = nos_list + elif field == "variants": + val = variants else: raise errors.ParameterError(field) row.append(val) @@ -2254,6 +2334,8 @@ class LURemoveNode(LogicalUnit): logging.info("Stopping the node daemon and removing configs from node %s", node.name) + # Promote nodes to master candidate as needed + _AdjustCandidatePool(self, exceptions=[node.name]) self.context.RemoveNode(node.name) # Run post hooks on the node before it's removed @@ -2269,9 +2351,6 @@ class LURemoveNode(LogicalUnit): self.LogWarning("Errors encountered on the remote node while leaving" " the cluster: %s", msg) - # Promote nodes to master candidate as needed - _AdjustCandidatePool(self) - class LUQueryNodes(NoHooksLU): """Logical unit for querying nodes. @@ -2279,6 +2358,10 @@ class LUQueryNodes(NoHooksLU): """ _OP_REQP = ["output_fields", "names", "use_locking"] REQ_BGL = False + + _SIMPLE_FIELDS = ["name", "serial_no", "ctime", "mtime", "uuid", + "master_candidate", "offline", "drained"] + _FIELDS_DYNAMIC = utils.FieldSet( "dtotal", "dfree", "mtotal", "mnode", "mfree", @@ -2286,16 +2369,12 @@ class LUQueryNodes(NoHooksLU): "ctotal", "cnodes", "csockets", ) - _FIELDS_STATIC = utils.FieldSet( - "name", "pinst_cnt", "sinst_cnt", + _FIELDS_STATIC = utils.FieldSet(*[ + "pinst_cnt", "sinst_cnt", "pinst_list", "sinst_list", "pip", "sip", "tags", - "serial_no", "ctime", "mtime", - "master_candidate", "master", - "offline", - "drained", - "role", + "role"] + _SIMPLE_FIELDS ) def ExpandNames(self): @@ -2396,8 +2475,8 @@ class LUQueryNodes(NoHooksLU): for node in nodelist: node_output = [] for field in self.op.output_fields: - if field == "name": - val = node.name + if field in self._SIMPLE_FIELDS: + val = getattr(node, field) elif field == "pinst_list": val = list(node_to_primary[node.name]) elif field == "sinst_list": @@ -2412,20 +2491,8 @@ class LUQueryNodes(NoHooksLU): val = node.secondary_ip elif field == "tags": val = list(node.GetTags()) - elif field == "serial_no": - val = node.serial_no - elif field == "ctime": - val = node.ctime - elif field == "mtime": - val = node.mtime - elif field == "master_candidate": - val = node.master_candidate elif field == "master": val = node.name == master_node - elif field == "offline": - val = node.offline - elif field == "drained": - val = node.drained elif self._FIELDS_DYNAMIC.Matches(field): val = live_data[node.name].get(field, None) elif field == "role": @@ -2778,15 +2845,12 @@ class LUAddNode(LogicalUnit): raise errors.OpPrereqError("Node secondary ip not reachable by TCP" " based ping to noded port") - cp_size = self.cfg.GetClusterInfo().candidate_pool_size if self.op.readd: exceptions = [node] else: exceptions = [] - mc_now, mc_max = self.cfg.GetMasterCandidateStats(exceptions) - # the new node will increase mc_max with one, so: - mc_max = min(mc_max + 1, cp_size) - self.master_candidate = mc_now < mc_max + + self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions) if self.op.readd: self.new_node = self.cfg.GetNodeInfo(node) @@ -2873,7 +2937,8 @@ class LUAddNode(LogicalUnit): nl_payload = result[verifier].payload[constants.NV_NODELIST] if nl_payload: for failed in nl_payload: - feedback_fn("ssh/hostname verification failed %s -> %s" % + feedback_fn("ssh/hostname verification failed" + " (checking from %s): %s" % (verifier, nl_payload[failed])) raise errors.OpExecError("ssh/hostname verification failed.") @@ -2885,7 +2950,7 @@ class LUAddNode(LogicalUnit): # and make sure the new node will not have old files around if not new_node.master_candidate: result = self.rpc.call_node_demote_from_mc(new_node.name) - msg = result.RemoteFailMsg() + msg = result.fail_msg if msg: self.LogWarning("Node failed to demote itself from master" " candidate status: %s" % msg) @@ -2953,14 +3018,22 @@ class LUSetNodeParams(LogicalUnit): raise errors.OpPrereqError("The master role can be changed" " only via masterfailover") - if ((self.op.master_candidate == False or self.op.offline == True or - self.op.drained == True) and node.master_candidate): + # Boolean value that tells us whether we're offlining or draining the node + offline_or_drain = self.op.offline == True or self.op.drained == True + deoffline_or_drain = self.op.offline == False or self.op.drained == False + + if (node.master_candidate and + (self.op.master_candidate == False or offline_or_drain)): cp_size = self.cfg.GetClusterInfo().candidate_pool_size - num_candidates, _ = self.cfg.GetMasterCandidateStats() - if num_candidates <= cp_size: + mc_now, mc_should, mc_max = self.cfg.GetMasterCandidateStats() + if mc_now <= cp_size: msg = ("Not enough master candidates (desired" - " %d, new value will be %d)" % (cp_size, num_candidates-1)) - if self.op.force: + " %d, new value will be %d)" % (cp_size, mc_now-1)) + # Only allow forcing the operation if it's an offline/drain operation, + # and we could not possibly promote more nodes. + # FIXME: this can still lead to issues if in any way another node which + # could be promoted appears in the meantime. + if self.op.force and offline_or_drain and mc_should == mc_max: self.LogWarning(msg) else: raise errors.OpPrereqError(msg) @@ -2971,6 +3044,13 @@ class LUSetNodeParams(LogicalUnit): raise errors.OpPrereqError("Node '%s' is offline or drained, can't set" " to master_candidate" % node.name) + # If we're being deofflined/drained, we'll MC ourself if needed + if (deoffline_or_drain and not offline_or_drain and not + self.op.master_candidate == True): + self.op.master_candidate = _DecideSelfPromotion(self) + if self.op.master_candidate: + self.LogInfo("Autopromoting node to master candidate") + return def Exec(self, feedback_fn): @@ -3013,7 +3093,7 @@ class LUSetNodeParams(LogicalUnit): changed_mc = True result.append(("master_candidate", "auto-demotion due to drain")) rrc = self.rpc.call_node_demote_from_mc(node.name) - msg = rrc.RemoteFailMsg() + msg = rrc.fail_msg if msg: self.LogWarning("Node failed to demote itself: %s" % msg) if node.offline: @@ -3114,6 +3194,7 @@ class LUQueryClusterInfo(NoHooksLU): "file_storage_dir": cluster.file_storage_dir, "ctime": cluster.ctime, "mtime": cluster.mtime, + "uuid": cluster.uuid, "tags": list(cluster.GetTags()), } @@ -3687,6 +3768,7 @@ class LUReinstallInstance(LogicalUnit): instance.primary_node)) self.op.os_type = getattr(self.op, "os_type", None) + self.op.force_variant = getattr(self.op, "force_variant", False) if self.op.os_type is not None: # OS verification pnode = self.cfg.GetNodeInfo( @@ -3697,6 +3779,8 @@ class LUReinstallInstance(LogicalUnit): result = self.rpc.call_os_get(pnode.name, self.op.os_type) result.Raise("OS '%s' not in supported OS list for primary node %s" % (self.op.os_type, pnode.name), prereq=True) + if not self.op.force_variant: + _CheckOSVariant(result.payload, self.op.os_type) self.instance = instance @@ -3983,6 +4067,8 @@ class LUQueryInstances(NoHooksLU): """ _OP_REQP = ["output_fields", "names", "use_locking"] REQ_BGL = False + _SIMPLE_FIELDS = ["name", "os", "network_port", "hypervisor", + "serial_no", "ctime", "mtime", "uuid"] _FIELDS_STATIC = utils.FieldSet(*["name", "os", "pnode", "snodes", "admin_state", "disk_template", "ip", "mac", "bridge", @@ -3995,9 +4081,8 @@ class LUQueryInstances(NoHooksLU): r"(nic)\.(bridge)/([0-9]+)", r"(nic)\.(macs|ips|modes|links|bridges)", r"(disk|nic)\.(count)", - "serial_no", "hypervisor", "hvparams", - "ctime", "mtime", - ] + + "hvparams", + ] + _SIMPLE_FIELDS + ["hv/%s" % name for name in constants.HVS_PARAMETERS] + ["be/%s" % name @@ -4077,7 +4162,7 @@ class LUQueryInstances(NoHooksLU): if result.offline: # offline nodes will be in both lists off_nodes.append(name) - if result.RemoteFailMsg(): + if result.fail_msg: bad_nodes.append(name) else: if result.payload: @@ -4100,10 +4185,8 @@ class LUQueryInstances(NoHooksLU): nic.nicparams) for nic in instance.nics] for field in self.op.output_fields: st_match = self._FIELDS_STATIC.Matches(field) - if field == "name": - val = instance.name - elif field == "os": - val = instance.os + if field in self._SIMPLE_FIELDS: + val = getattr(instance, field) elif field == "pnode": val = instance.primary_node elif field == "snodes": @@ -4180,16 +4263,6 @@ class LUQueryInstances(NoHooksLU): val = _ComputeDiskSize(instance.disk_template, disk_sizes) elif field == "tags": val = list(instance.GetTags()) - elif field == "serial_no": - val = instance.serial_no - elif field == "ctime": - val = instance.ctime - elif field == "mtime": - val = instance.mtime - elif field == "network_port": - val = instance.network_port - elif field == "hypervisor": - val = instance.hypervisor elif field == "hvparams": val = i_hv elif (field.startswith(HVPREFIX) and @@ -5207,7 +5280,7 @@ def _CreateDisks(lu, instance, to_skip=None, target_node=None): result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir) result.Raise("Failed to create directory '%s' on" - " node %s: %s" % (file_storage_dir, pnode)) + " node %s" % (file_storage_dir, pnode)) # Note: this needs to be kept in sync with adding of disks in # LUSetInstanceParams @@ -5258,10 +5331,10 @@ def _RemoveDisks(lu, instance, target_node=None): if instance.disk_template == constants.DT_FILE: file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1]) - if target_node is node: - tgt = instance.primary_node + if target_node: + tgt = target_node else: - tgt = instance.target_node + tgt = instance.primary_node result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir) if result.fail_msg: lu.LogWarning("Could not remove directory '%s' on node %s: %s", @@ -5530,9 +5603,15 @@ class LUCreateInstance(LogicalUnit): self.op.src_path = src_path = \ os.path.join(constants.EXPORT_DIR, src_path) + # On import force_variant must be True, because if we forced it at + # initial install, our only chance when importing it back is that it + # works again! + self.op.force_variant = True + else: # INSTANCE_CREATE if getattr(self.op, "os_type", None) is None: raise errors.OpPrereqError("No guest OS specified") + self.op.force_variant = getattr(self.op, "force_variant", False) def _RunAllocator(self): """Run the allocator based on input opcode. @@ -5762,6 +5841,8 @@ class LUCreateInstance(LogicalUnit): result = self.rpc.call_os_get(pnode.name, self.op.os_type) result.Raise("OS '%s' not in supported os list for primary node %s" % (self.op.os_type, pnode.name), prereq=True) + if not self.op.force_variant: + _CheckOSVariant(result.payload, self.op.os_type) _CheckNicsBridgesExist(self, self.nics, self.pnode.name) @@ -7030,6 +7111,7 @@ class LUQueryInstanceData(NoHooksLU): "serial_no": instance.serial_no, "mtime": instance.mtime, "ctime": instance.ctime, + "uuid": instance.uuid, } result[instance.name] = idict