X-Git-Url: https://code.grnet.gr/git/ganeti-local/blobdiff_plain/b6dd40f59821b4e3e686f3c30f35be4e41a1a84d..6d50f5f97869f4f57d4e26752e193c4a39d78f23:/lib/cmdlib.py diff --git a/lib/cmdlib.py b/lib/cmdlib.py index d29c2b5..5a7e3fe 100644 --- a/lib/cmdlib.py +++ b/lib/cmdlib.py @@ -1,7 +1,7 @@ # # -# Copyright (C) 2006, 2007, 2008 Google Inc. +# Copyright (C) 2006, 2007, 2008, 2009, 2010 Google Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -36,6 +36,9 @@ import platform import logging import copy import OpenSSL +import socket +import tempfile +import shutil from ganeti import ssh from ganeti import utils @@ -49,184 +52,39 @@ from ganeti import ssconf from ganeti import uidpool from ganeti import compat from ganeti import masterd +from ganeti import netutils +from ganeti import ht import ganeti.masterd.instance # pylint: disable-msg=W0611 - -# Modifiable default values; need to define these here before the -# actual LUs - -def _EmptyList(): - """Returns an empty list. - - """ - return [] - - -def _EmptyDict(): - """Returns an empty dict. - - """ - return {} - - -#: The without-default default value -_NoDefault = object() - - -#: The no-type (value to complex to check it in the type system) -_NoType = object() - - -# Some basic types -def _TNotNone(val): - """Checks if the given value is not None. - - """ - return val is not None - - -def _TNone(val): - """Checks if the given value is None. - - """ - return val is None - - -def _TBool(val): - """Checks if the given value is a boolean. - - """ - return isinstance(val, bool) - - -def _TInt(val): - """Checks if the given value is an integer. - - """ - return isinstance(val, int) - - -def _TFloat(val): - """Checks if the given value is a float. - - """ - return isinstance(val, float) - - -def _TString(val): - """Checks if the given value is a string. - - """ - return isinstance(val, basestring) - - -def _TTrue(val): - """Checks if a given value evaluates to a boolean True value. - - """ - return bool(val) - - -def _TElemOf(target_list): - """Builds a function that checks if a given value is a member of a list. - - """ - return lambda val: val in target_list - - -# Container types -def _TList(val): - """Checks if the given value is a list. - - """ - return isinstance(val, list) - - -def _TDict(val): - """Checks if the given value is a dictionary. - - """ - return isinstance(val, dict) - - -# Combinator types -def _TAnd(*args): - """Combine multiple functions using an AND operation. - - """ - def fn(val): - return compat.all(t(val) for t in args) - return fn - - -def _TOr(*args): - """Combine multiple functions using an AND operation. - - """ - def fn(val): - return compat.any(t(val) for t in args) - return fn - - -# Type aliases - -#: a non-empty string -_TNonEmptyString = _TAnd(_TString, _TTrue) - - -#: a maybe non-empty string -_TMaybeString = _TOr(_TNonEmptyString, _TNone) - - -#: a maybe boolean (bool or none) -_TMaybeBool = _TOr(_TBool, _TNone) - - -#: a positive integer -_TPositiveInt = _TAnd(_TInt, lambda v: v >= 0) - -#: a strictly positive integer -_TStrictPositiveInt = _TAnd(_TInt, lambda v: v > 0) - - -def _TListOf(my_type): - """Checks if a given value is a list with all elements of the same type. - - """ - return _TAnd(_TList, - lambda lst: compat.all(my_type(v) for v in lst)) - - -def _TDictOf(key_type, val_type): - """Checks a dict type for the type of its key/values. - - """ - return _TAnd(_TDict, - lambda my_dict: (compat.all(key_type(v) for v in my_dict.keys()) - and compat.all(val_type(v) - for v in my_dict.values()))) - - # Common opcode attributes #: output fields for a query operation -_POutputFields = ("output_fields", _NoDefault, _TListOf(_TNonEmptyString)) +_POutputFields = ("output_fields", ht.NoDefault, ht.TListOf(ht.TNonEmptyString)) #: the shutdown timeout _PShutdownTimeout = ("shutdown_timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT, - _TPositiveInt) + ht.TPositiveInt) #: the force parameter -_PForce = ("force", False, _TBool) +_PForce = ("force", False, ht.TBool) #: a required instance name (for single-instance LUs) -_PInstanceName = ("instance_name", _NoDefault, _TNonEmptyString) +_PInstanceName = ("instance_name", ht.NoDefault, ht.TNonEmptyString) +#: Whether to ignore offline nodes +_PIgnoreOfflineNodes = ("ignore_offline_nodes", False, ht.TBool) #: a required node name (for single-node LUs) -_PNodeName = ("node_name", _NoDefault, _TNonEmptyString) +_PNodeName = ("node_name", ht.NoDefault, ht.TNonEmptyString) + +#: the migration type (live/non-live) +_PMigrationMode = ("mode", None, + ht.TOr(ht.TNone, ht.TElemOf(constants.HT_MIGRATION_MODES))) + +#: the obsolete 'live' mode (boolean) +_PMigrationLive = ("live", None, ht.TMaybeBool) # End types @@ -277,6 +135,7 @@ class LogicalUnit(object): self.recalculate_locks = {} self.__ssh = None # logging + self.Log = processor.Log # pylint: disable-msg=C0103 self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103 self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103 self.LogStep = processor.LogStep # pylint: disable-msg=C0103 @@ -294,7 +153,7 @@ class LogicalUnit(object): op_id = self.op.OP_ID for attr_name, aval, test in self._OP_PARAMS: if not hasattr(op, attr_name): - if aval == _NoDefault: + if aval == ht.NoDefault: raise errors.OpPrereqError("Required parameter '%s.%s' missing" % (op_id, attr_name), errors.ECODE_INVAL) else: @@ -304,7 +163,7 @@ class LogicalUnit(object): dval = aval setattr(self.op, attr_name, dval) attr_val = getattr(op, attr_name) - if test == _NoType: + if test == ht.NoType: # no tests here continue if not callable(test): @@ -377,11 +236,11 @@ class LogicalUnit(object): # Acquire all nodes and one instance self.needed_locks = { locking.LEVEL_NODE: locking.ALL_SET, - locking.LEVEL_INSTANCE: ['instance1.example.tld'], + locking.LEVEL_INSTANCE: ['instance1.example.com'], } # Acquire just two nodes self.needed_locks = { - locking.LEVEL_NODE: ['node1.example.tld', 'node2.example.tld'], + locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'], } # Acquire no locks self.needed_locks = {} # No, you can't leave it to the default value None @@ -1076,9 +935,8 @@ def _CheckOSVariant(os_obj, name): """ if not os_obj.supported_variants: return - try: - variant = name.split("+", 1)[1] - except IndexError: + variant = objects.OS.GetVariant(name) + if not variant: raise errors.OpPrereqError("OS name must include a variant", errors.ECODE_INVAL) @@ -1237,7 +1095,6 @@ class LUDestroyCluster(LogicalUnit): """ master = self.cfg.GetMasterNode() - modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup # Run post hooks on master node before it's removed hm = self.proc.hmclass(self.rpc.call_hooks_runner, self) @@ -1250,11 +1107,6 @@ class LUDestroyCluster(LogicalUnit): result = self.rpc.call_node_stop_master(master, False) result.Raise("Could not disable the master role") - if modify_ssh_setup: - priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS) - utils.CreateBackup(priv_key) - utils.CreateBackup(pub_key) - return master @@ -1298,11 +1150,11 @@ class LUVerifyCluster(LogicalUnit): HPATH = "cluster-verify" HTYPE = constants.HTYPE_CLUSTER _OP_PARAMS = [ - ("skip_checks", _EmptyList, - _TListOf(_TElemOf(constants.VERIFY_OPTIONAL_CHECKS))), - ("verbose", False, _TBool), - ("error_codes", False, _TBool), - ("debug_simulate_errors", False, _TBool), + ("skip_checks", ht.EmptyList, + ht.TListOf(ht.TElemOf(constants.VERIFY_OPTIONAL_CHECKS))), + ("verbose", False, ht.TBool), + ("error_codes", False, ht.TBool), + ("debug_simulate_errors", False, ht.TBool), ] REQ_BGL = False @@ -1431,14 +1283,11 @@ class LUVerifyCluster(LogicalUnit): self.bad = self.bad or cond def _VerifyNode(self, ninfo, nresult): - """Run multiple tests against a node. - - Test list: + """Perform some basic validation on data returned from a node. - - compares ganeti version - - checks vg existence and size > 20G - - checks config file checksum - - checks ssh to other nodes + - check the result data structure is well formed and has all the + mandatory fields + - check ganeti version @type ninfo: L{objects.Node} @param ninfo: the node to check @@ -1648,20 +1497,24 @@ class LUVerifyCluster(LogicalUnit): _ErrorIf(test, self.EINSTANCEWRONGNODE, instance, "instance should not run on node %s", node) - def _VerifyOrphanVolumes(self, node_vol_should, node_image): + def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved): """Verify if there are any unknown volumes in the cluster. The .os, .swap and backup volumes are ignored. All other volumes are reported as unknown. + @type reserved: L{ganeti.utils.FieldSet} + @param reserved: a FieldSet of reserved volume names + """ for node, n_img in node_image.items(): if n_img.offline or n_img.rpc_fail or n_img.lvm_fail: # skip non-healthy nodes continue for volume in n_img.volumes: - test = (node not in node_vol_should or - volume not in node_vol_should[node]) + test = ((node not in node_vol_should or + volume not in node_vol_should[node]) and + not reserved.Matches(volume)) self._ErrorIf(test, self.ENODEORPHANLV, node, "volume %s is unknown", volume) @@ -2226,7 +2079,8 @@ class LUVerifyCluster(LogicalUnit): "instance lives on ghost node %s", node) feedback_fn("* Verifying orphan volumes") - self._VerifyOrphanVolumes(node_vol_should, node_image) + reserved = utils.FieldSet(*cluster.reserved_lvs) + self._VerifyOrphanVolumes(node_vol_should, node_image, reserved) feedback_fn("* Verifying orphan instances") self._VerifyOrphanInstances(instancelist, node_image) @@ -2377,7 +2231,7 @@ class LURepairDiskSizes(NoHooksLU): """Verifies the cluster disks sizes. """ - _OP_PARAMS = [("instances", _EmptyList, _TListOf(_TNonEmptyString))] + _OP_PARAMS = [("instances", ht.EmptyList, ht.TListOf(ht.TNonEmptyString))] REQ_BGL = False def ExpandNames(self): @@ -2495,7 +2349,7 @@ class LURenameCluster(LogicalUnit): """ HPATH = "cluster-rename" HTYPE = constants.HTYPE_CLUSTER - _OP_PARAMS = [("name", _NoDefault, _TNonEmptyString)] + _OP_PARAMS = [("name", ht.NoDefault, ht.TNonEmptyString)] def BuildHooksEnv(self): """Build hooks env. @@ -2513,7 +2367,8 @@ class LURenameCluster(LogicalUnit): """Verify that the passed name is a valid one. """ - hostname = utils.GetHostInfo(self.op.name) + hostname = netutils.GetHostname(name=self.op.name, + family=self.cfg.GetPrimaryIPFamily()) new_name = hostname.name self.ip = new_ip = hostname.ip @@ -2524,9 +2379,9 @@ class LURenameCluster(LogicalUnit): " cluster has changed", errors.ECODE_INVAL) if new_ip != old_ip: - if utils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT): + if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT): raise errors.OpPrereqError("The given cluster IP address (%s) is" - " reachable on the network. Aborting." % + " reachable on the network" % new_ip, errors.ECODE_NOTUNIQUE) self.op.name = new_name @@ -2572,6 +2427,8 @@ class LURenameCluster(LogicalUnit): self.LogWarning("Could not re-enable the master role on" " the master, please restart manually: %s", msg) + return clustername + class LUSetClusterParams(LogicalUnit): """Change the parameters of the cluster. @@ -2580,21 +2437,38 @@ class LUSetClusterParams(LogicalUnit): HPATH = "cluster-modify" HTYPE = constants.HTYPE_CLUSTER _OP_PARAMS = [ - ("vg_name", None, _TMaybeString), + ("vg_name", None, ht.TMaybeString), ("enabled_hypervisors", None, - _TOr(_TAnd(_TListOf(_TElemOf(constants.HYPER_TYPES)), _TTrue), _TNone)), - ("hvparams", None, _TOr(_TDictOf(_TNonEmptyString, _TDict), _TNone)), - ("beparams", None, _TOr(_TDictOf(_TNonEmptyString, _TDict), _TNone)), - ("os_hvp", None, _TOr(_TDictOf(_TNonEmptyString, _TDict), _TNone)), - ("osparams", None, _TOr(_TDictOf(_TNonEmptyString, _TDict), _TNone)), - ("candidate_pool_size", None, _TOr(_TStrictPositiveInt, _TNone)), - ("uid_pool", None, _NoType), - ("add_uids", None, _NoType), - ("remove_uids", None, _NoType), - ("maintain_node_health", None, _TMaybeBool), - ("nicparams", None, _TOr(_TDict, _TNone)), - ("drbd_helper", None, _TOr(_TString, _TNone)), - ("default_iallocator", None, _TMaybeString), + ht.TOr(ht.TAnd(ht.TListOf(ht.TElemOf(constants.HYPER_TYPES)), ht.TTrue), + ht.TNone)), + ("hvparams", None, ht.TOr(ht.TDictOf(ht.TNonEmptyString, ht.TDict), + ht.TNone)), + ("beparams", None, ht.TOr(ht.TDictOf(ht.TNonEmptyString, ht.TDict), + ht.TNone)), + ("os_hvp", None, ht.TOr(ht.TDictOf(ht.TNonEmptyString, ht.TDict), + ht.TNone)), + ("osparams", None, ht.TOr(ht.TDictOf(ht.TNonEmptyString, ht.TDict), + ht.TNone)), + ("candidate_pool_size", None, ht.TOr(ht.TStrictPositiveInt, ht.TNone)), + ("uid_pool", None, ht.NoType), + ("add_uids", None, ht.NoType), + ("remove_uids", None, ht.NoType), + ("maintain_node_health", None, ht.TMaybeBool), + ("prealloc_wipe_disks", None, ht.TMaybeBool), + ("nicparams", None, ht.TOr(ht.TDict, ht.TNone)), + ("drbd_helper", None, ht.TOr(ht.TString, ht.TNone)), + ("default_iallocator", None, ht.TOr(ht.TString, ht.TNone)), + ("reserved_lvs", None, ht.TOr(ht.TListOf(ht.TNonEmptyString), ht.TNone)), + ("hidden_os", None, ht.TOr(ht.TListOf(\ + ht.TAnd(ht.TList, + ht.TIsLength(2), + ht.TMap(lambda v: v[0], ht.TElemOf(constants.DDMS_VALUES)))), + ht.TNone)), + ("blacklisted_os", None, ht.TOr(ht.TListOf(\ + ht.TAnd(ht.TList, + ht.TIsLength(2), + ht.TMap(lambda v: v[0], ht.TElemOf(constants.DDMS_VALUES)))), + ht.TNone)), ] REQ_BGL = False @@ -2853,6 +2727,9 @@ class LUSetClusterParams(LogicalUnit): if self.op.maintain_node_health is not None: self.cluster.maintain_node_health = self.op.maintain_node_health + if self.op.prealloc_wipe_disks is not None: + self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks + if self.op.add_uids is not None: uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids) @@ -2865,6 +2742,32 @@ class LUSetClusterParams(LogicalUnit): if self.op.default_iallocator is not None: self.cluster.default_iallocator = self.op.default_iallocator + if self.op.reserved_lvs is not None: + self.cluster.reserved_lvs = self.op.reserved_lvs + + def helper_os(aname, mods, desc): + desc += " OS list" + lst = getattr(self.cluster, aname) + for key, val in mods: + if key == constants.DDM_ADD: + if val in lst: + feedback_fn("OS %s already in %s, ignoring", val, desc) + else: + lst.append(val) + elif key == constants.DDM_REMOVE: + if val in lst: + lst.remove(val) + else: + feedback_fn("OS %s not found in %s, ignoring", val, desc) + else: + raise errors.ProgrammerError("Invalid modification '%s'" % key) + + if self.op.hidden_os: + helper_os("hidden_os", self.op.hidden_os, "hidden") + + if self.op.blacklisted_os: + helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted") + self.cfg.Update(self.cluster, feedback_fn) @@ -3050,12 +2953,15 @@ class LUDiagnoseOS(NoHooksLU): """ _OP_PARAMS = [ _POutputFields, - ("names", _EmptyList, _TListOf(_TNonEmptyString)), + ("names", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)), ] REQ_BGL = False + _HID = "hidden" + _BLK = "blacklisted" + _VLD = "valid" _FIELDS_STATIC = utils.FieldSet() - _FIELDS_DYNAMIC = utils.FieldSet("name", "valid", "node_status", "variants", - "parameters", "api_versions") + _FIELDS_DYNAMIC = utils.FieldSet("name", _VLD, "node_status", "variants", + "parameters", "api_versions", _HID, _BLK) def CheckArguments(self): if self.op.names: @@ -3122,8 +3028,10 @@ class LUDiagnoseOS(NoHooksLU): node_data = self.rpc.call_os_diagnose(valid_nodes) pol = self._DiagnoseByOS(node_data) output = [] + cluster = self.cfg.GetClusterInfo() - for os_name, os_data in pol.items(): + for os_name in utils.NiceSort(pol.keys()): + os_data = pol[os_name] row = [] valid = True (variants, params, api_versions) = null_state = (set(), set(), set()) @@ -3142,10 +3050,17 @@ class LUDiagnoseOS(NoHooksLU): params.intersection_update(node_params) api_versions.intersection_update(node_api) + is_hid = os_name in cluster.hidden_os + is_blk = os_name in cluster.blacklisted_os + if ((self._HID not in self.op.output_fields and is_hid) or + (self._BLK not in self.op.output_fields and is_blk) or + (self._VLD not in self.op.output_fields and not valid)): + continue + for field in self.op.output_fields: if field == "name": val = os_name - elif field == "valid": + elif field == self._VLD: val = valid elif field == "node_status": # this is just a copy of the dict @@ -3153,11 +3068,15 @@ class LUDiagnoseOS(NoHooksLU): for node_name, nos_list in os_data.items(): val[node_name] = nos_list elif field == "variants": - val = list(variants) + val = utils.NiceSort(list(variants)) elif field == "parameters": val = list(params) elif field == "api_versions": val = list(api_versions) + elif field == self._HID: + val = is_hid + elif field == self._BLK: + val = is_blk else: raise errors.ParameterError(field) row.append(val) @@ -3257,8 +3176,11 @@ class LURemoveNode(LogicalUnit): # Remove node from our /etc/hosts if self.cfg.GetClusterInfo().modify_etc_hosts: - # FIXME: this should be done via an rpc call to node daemon - utils.RemoveHostFromEtcHosts(node.name) + master_node = self.cfg.GetMasterNode() + result = self.rpc.call_etc_hosts_modify(master_node, + constants.ETC_HOSTS_REMOVE, + node.name, None) + result.Raise("Can't update hosts file with new host data") _RedistributeAncillaryFiles(self) @@ -3269,13 +3191,14 @@ class LUQueryNodes(NoHooksLU): # pylint: disable-msg=W0142 _OP_PARAMS = [ _POutputFields, - ("names", _EmptyList, _TListOf(_TNonEmptyString)), - ("use_locking", False, _TBool), + ("names", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)), + ("use_locking", False, ht.TBool), ] REQ_BGL = False _SIMPLE_FIELDS = ["name", "serial_no", "ctime", "mtime", "uuid", - "master_candidate", "offline", "drained"] + "master_candidate", "offline", "drained", + "master_capable", "vm_capable"] _FIELDS_DYNAMIC = utils.FieldSet( "dtotal", "dfree", @@ -3425,8 +3348,8 @@ class LUQueryNodeVolumes(NoHooksLU): """ _OP_PARAMS = [ - ("nodes", _EmptyList, _TListOf(_TNonEmptyString)), - ("output_fields", _NoDefault, _TListOf(_TNonEmptyString)), + ("nodes", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)), + ("output_fields", ht.NoDefault, ht.TListOf(ht.TNonEmptyString)), ] REQ_BGL = False _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance") @@ -3508,10 +3431,10 @@ class LUQueryNodeStorage(NoHooksLU): """ _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE) _OP_PARAMS = [ - ("nodes", _EmptyList, _TListOf(_TNonEmptyString)), - ("storage_type", _NoDefault, _CheckStorageType), - ("output_fields", _NoDefault, _TListOf(_TNonEmptyString)), - ("name", None, _TMaybeString), + ("nodes", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)), + ("storage_type", ht.NoDefault, _CheckStorageType), + ("output_fields", ht.NoDefault, ht.TListOf(ht.TNonEmptyString)), + ("name", None, ht.TMaybeString), ] REQ_BGL = False @@ -3597,9 +3520,9 @@ class LUModifyNodeStorage(NoHooksLU): """ _OP_PARAMS = [ _PNodeName, - ("storage_type", _NoDefault, _CheckStorageType), - ("name", _NoDefault, _TNonEmptyString), - ("changes", _NoDefault, _TDict), + ("storage_type", ht.NoDefault, _CheckStorageType), + ("name", ht.NoDefault, ht.TNonEmptyString), + ("changes", ht.NoDefault, ht.TDict), ] REQ_BGL = False @@ -3647,14 +3570,21 @@ class LUAddNode(LogicalUnit): HTYPE = constants.HTYPE_NODE _OP_PARAMS = [ _PNodeName, - ("primary_ip", None, _NoType), - ("secondary_ip", None, _TMaybeString), - ("readd", False, _TBool), + ("primary_ip", None, ht.NoType), + ("secondary_ip", None, ht.TMaybeString), + ("readd", False, ht.TBool), + ("group", None, ht.TMaybeString) ] def CheckArguments(self): + self.primary_ip_family = self.cfg.GetPrimaryIPFamily() # validate/normalize the node name - self.op.node_name = utils.HostInfo.NormalizeName(self.op.node_name) + self.hostname = netutils.GetHostname(name=self.op.node_name, + family=self.primary_ip_family) + self.op.node_name = self.hostname.name + if self.op.readd and self.op.group: + raise errors.OpPrereqError("Cannot pass a node group when a node is" + " being readded", errors.ECODE_INVAL) def BuildHooksEnv(self): """Build hooks env. @@ -3683,19 +3613,21 @@ class LUAddNode(LogicalUnit): Any errors are signaled by raising errors.OpPrereqError. """ - node_name = self.op.node_name cfg = self.cfg - - dns_data = utils.GetHostInfo(node_name) - - node = dns_data.name - primary_ip = self.op.primary_ip = dns_data.ip + hostname = self.hostname + node = hostname.name + primary_ip = self.op.primary_ip = hostname.ip if self.op.secondary_ip is None: + if self.primary_ip_family == netutils.IP6Address.family: + raise errors.OpPrereqError("When using a IPv6 primary address, a valid" + " IPv4 address must be given as secondary", + errors.ECODE_INVAL) self.op.secondary_ip = primary_ip - if not utils.IsValidIP4(self.op.secondary_ip): - raise errors.OpPrereqError("Invalid secondary IP given", - errors.ECODE_INVAL) + secondary_ip = self.op.secondary_ip + if not netutils.IP4Address.IsValid(secondary_ip): + raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4" + " address" % secondary_ip, errors.ECODE_INVAL) node_list = cfg.GetNodeList() if not self.op.readd and node in node_list: @@ -3744,13 +3676,13 @@ class LUAddNode(LogicalUnit): errors.ECODE_INVAL) # checks reachability - if not utils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT): + if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT): raise errors.OpPrereqError("Node not reachable by ping", errors.ECODE_ENVIRON) if not newbie_singlehomed: # check reachability from my secondary ip to newbie's secondary ip - if not utils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT, + if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT, source=myself.secondary_ip): raise errors.OpPrereqError("Node secondary ip not reachable by TCP" " based ping to noded port", @@ -3767,11 +3699,15 @@ class LUAddNode(LogicalUnit): self.new_node = self.cfg.GetNodeInfo(node) assert self.new_node is not None, "Can't retrieve locked node %s" % node else: + node_group = cfg.LookupNodeGroup(self.op.group) self.new_node = objects.Node(name=node, primary_ip=primary_ip, secondary_ip=secondary_ip, master_candidate=self.master_candidate, - offline=False, drained=False) + master_capable=True, + vm_capable=True, + offline=False, drained=False, + group=node_group) def Exec(self, feedback_fn): """Adds the new node to the cluster. @@ -3807,27 +3743,14 @@ class LUAddNode(LogicalUnit): " node version %s" % (constants.PROTOCOL_VERSION, result.payload)) - # setup ssh on node - if self.cfg.GetClusterInfo().modify_ssh_setup: - logging.info("Copy ssh key to node %s", node) - priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS) - keyarray = [] - keyfiles = [constants.SSH_HOST_DSA_PRIV, constants.SSH_HOST_DSA_PUB, - constants.SSH_HOST_RSA_PRIV, constants.SSH_HOST_RSA_PUB, - priv_key, pub_key] - - for i in keyfiles: - keyarray.append(utils.ReadFile(i)) - - result = self.rpc.call_node_add(node, keyarray[0], keyarray[1], - keyarray[2], keyarray[3], keyarray[4], - keyarray[5]) - result.Raise("Cannot transfer ssh keys to the new node") - # Add node to our /etc/hosts, and add key to known_hosts if self.cfg.GetClusterInfo().modify_etc_hosts: - # FIXME: this should be done via an rpc call to node daemon - utils.AddHostToEtcHosts(new_node.name) + master_node = self.cfg.GetMasterNode() + result = self.rpc.call_etc_hosts_modify(master_node, + constants.ETC_HOSTS_ADD, + self.hostname.name, + self.hostname.ip) + result.Raise("Can't update hosts file with new host data") if new_node.secondary_ip != new_node.primary_ip: result = self.rpc.call_node_has_ip_address(new_node.name, @@ -3877,23 +3800,39 @@ class LUAddNode(LogicalUnit): class LUSetNodeParams(LogicalUnit): """Modifies the parameters of a node. + @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline) + to the node role (as _ROLE_*) + @cvar _R2F: a dictionary from node role to tuples of flags + @cvar _FLAGS: a list of attribute names corresponding to the flags + """ HPATH = "node-modify" HTYPE = constants.HTYPE_NODE _OP_PARAMS = [ _PNodeName, - ("master_candidate", None, _TMaybeBool), - ("offline", None, _TMaybeBool), - ("drained", None, _TMaybeBool), - ("auto_promote", False, _TBool), + ("master_candidate", None, ht.TMaybeBool), + ("offline", None, ht.TMaybeBool), + ("drained", None, ht.TMaybeBool), + ("auto_promote", False, ht.TBool), + ("master_capable", None, ht.TMaybeBool), _PForce, ] REQ_BGL = False + (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4) + _F2R = { + (True, False, False): _ROLE_CANDIDATE, + (False, True, False): _ROLE_DRAINED, + (False, False, True): _ROLE_OFFLINE, + (False, False, False): _ROLE_REGULAR, + } + _R2F = dict((v, k) for k, v in _F2R.items()) + _FLAGS = ["master_candidate", "drained", "offline"] def CheckArguments(self): self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name) - all_mods = [self.op.offline, self.op.master_candidate, self.op.drained] - if all_mods.count(None) == 3: + all_mods = [self.op.offline, self.op.master_candidate, self.op.drained, + self.op.master_capable] + if all_mods.count(None) == len(all_mods): raise errors.OpPrereqError("Please pass at least one modification", errors.ECODE_INVAL) if all_mods.count(True) > 1: @@ -3901,17 +3840,14 @@ class LUSetNodeParams(LogicalUnit): " state at the same time", errors.ECODE_INVAL) - # Boolean value that tells us whether we're offlining or draining the node - self.offline_or_drain = (self.op.offline == True or - self.op.drained == True) - self.deoffline_or_drain = (self.op.offline == False or - self.op.drained == False) + # Boolean value that tells us whether we might be demoting from MC self.might_demote = (self.op.master_candidate == False or - self.offline_or_drain) + self.op.offline == True or + self.op.drained == True or + self.op.master_capable == False) self.lock_all = self.op.auto_promote and self.might_demote - def ExpandNames(self): if self.lock_all: self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET} @@ -3929,6 +3865,7 @@ class LUSetNodeParams(LogicalUnit): "MASTER_CANDIDATE": str(self.op.master_candidate), "OFFLINE": str(self.op.offline), "DRAINED": str(self.op.drained), + "MASTER_CAPABLE": str(self.op.master_capable), } nl = [self.cfg.GetMasterNode(), self.op.node_name] @@ -3948,9 +3885,13 @@ class LUSetNodeParams(LogicalUnit): # we can't change the master's node flags if self.op.node_name == self.cfg.GetMasterNode(): raise errors.OpPrereqError("The master role can be changed" - " only via masterfailover", + " only via master-failover", errors.ECODE_INVAL) + if self.op.master_candidate and not node.master_capable: + raise errors.OpPrereqError("Node %s is not master capable, cannot make" + " it a master candidate" % node.name, + errors.ECODE_STATE) if node.master_candidate and self.might_demote and not self.lock_all: assert not self.op.auto_promote, "auto-promote set but lock_all not" @@ -3961,70 +3902,76 @@ class LUSetNodeParams(LogicalUnit): if mc_remaining < mc_should: raise errors.OpPrereqError("Not enough master candidates, please" " pass auto_promote to allow promotion", - errors.ECODE_INVAL) + errors.ECODE_STATE) - if (self.op.master_candidate == True and - ((node.offline and not self.op.offline == False) or - (node.drained and not self.op.drained == False))): - raise errors.OpPrereqError("Node '%s' is offline or drained, can't set" - " to master_candidate" % node.name, - errors.ECODE_INVAL) + self.old_flags = old_flags = (node.master_candidate, + node.drained, node.offline) + assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags) + self.old_role = self._F2R[old_flags] + + # Check for ineffective changes + for attr in self._FLAGS: + if (getattr(self.op, attr) == False and getattr(node, attr) == False): + self.LogInfo("Ignoring request to unset flag %s, already unset", attr) + setattr(self.op, attr, None) + + # Past this point, any flag change to False means a transition + # away from the respective state, as only real changes are kept # If we're being deofflined/drained, we'll MC ourself if needed - if (self.deoffline_or_drain and not self.offline_or_drain and not - self.op.master_candidate == True and not node.master_candidate): - self.op.master_candidate = _DecideSelfPromotion(self) - if self.op.master_candidate: - self.LogInfo("Autopromoting node to master candidate") + if (self.op.drained == False or self.op.offline == False or + (self.op.master_capable and not node.master_capable)): + if _DecideSelfPromotion(self): + self.op.master_candidate = True + self.LogInfo("Auto-promoting node to master candidate") - return + # If we're no longer master capable, we'll demote ourselves from MC + if self.op.master_capable == False and node.master_candidate: + self.LogInfo("Demoting from master candidate") + self.op.master_candidate = False def Exec(self, feedback_fn): """Modifies a node. """ node = self.node + old_role = self.old_role + + assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1 + + # compute new flags + if self.op.master_candidate: + new_role = self._ROLE_CANDIDATE + elif self.op.drained: + new_role = self._ROLE_DRAINED + elif self.op.offline: + new_role = self._ROLE_OFFLINE + elif False in [self.op.master_candidate, self.op.drained, self.op.offline]: + # False is still in new flags, which means we're un-setting (the + # only) True flag + new_role = self._ROLE_REGULAR + else: # no new flags, nothing, keep old role + new_role = old_role result = [] - changed_mc = False - - if self.op.offline is not None: - node.offline = self.op.offline - result.append(("offline", str(self.op.offline))) - if self.op.offline == True: - if node.master_candidate: - node.master_candidate = False - changed_mc = True - result.append(("master_candidate", "auto-demotion due to offline")) - if node.drained: - node.drained = False - result.append(("drained", "clear drained status due to offline")) - - if self.op.master_candidate is not None: - node.master_candidate = self.op.master_candidate - changed_mc = True - result.append(("master_candidate", str(self.op.master_candidate))) - if self.op.master_candidate == False: - rrc = self.rpc.call_node_demote_from_mc(node.name) - msg = rrc.fail_msg - if msg: - self.LogWarning("Node failed to demote itself: %s" % msg) - - if self.op.drained is not None: - node.drained = self.op.drained - result.append(("drained", str(self.op.drained))) - if self.op.drained == True: - if node.master_candidate: - node.master_candidate = False - changed_mc = True - result.append(("master_candidate", "auto-demotion due to drain")) - rrc = self.rpc.call_node_demote_from_mc(node.name) - msg = rrc.fail_msg - if msg: - self.LogWarning("Node failed to demote itself: %s" % msg) - if node.offline: - node.offline = False - result.append(("offline", "clear offline status due to drain")) + changed_mc = [old_role, new_role].count(self._ROLE_CANDIDATE) == 1 + + if self.op.master_capable is not None: + node.master_capable = self.op.master_capable + result.append(("master_capable", str(self.op.master_capable))) + + # Tell the node to demote itself, if no longer MC and not offline + if (old_role == self._ROLE_CANDIDATE and + new_role != self._ROLE_OFFLINE and new_role != old_role): + msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg + if msg: + self.LogWarning("Node failed to demote itself: %s", msg) + + new_flags = self._R2F[new_role] + for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS): + if of != nf: + result.append((desc, str(nf))) + (node.master_candidate, node.drained, node.offline) = new_flags # we locked all nodes, we adjust the CP before updating this node if self.lock_all: @@ -4099,6 +4046,11 @@ class LUQueryClusterInfo(NoHooksLU): if hv_name in cluster.enabled_hypervisors: os_hvp[os_name][hv_name] = hv_params + # Convert ip_family to ip_version + primary_ip_version = constants.IP4_VERSION + if cluster.primary_ip_family == netutils.IP6Address.family: + primary_ip_version = constants.IP6_VERSION + result = { "software_version": constants.RELEASE_VERSION, "protocol_version": constants.PROTOCOL_VERSION, @@ -4128,6 +4080,9 @@ class LUQueryClusterInfo(NoHooksLU): "tags": list(cluster.GetTags()), "uid_pool": cluster.uid_pool, "default_iallocator": cluster.default_iallocator, + "reserved_lvs": cluster.reserved_lvs, + "primary_ip_version": primary_ip_version, + "prealloc_wipe_disks": cluster.prealloc_wipe_disks, } return result @@ -4141,7 +4096,7 @@ class LUQueryConfigValues(NoHooksLU): REQ_BGL = False _FIELDS_DYNAMIC = utils.FieldSet() _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag", - "watcher_pause") + "watcher_pause", "volume_group_name") def CheckArguments(self): _CheckOutputFields(static=self._FIELDS_STATIC, @@ -4165,6 +4120,8 @@ class LUQueryConfigValues(NoHooksLU): entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE) elif field == "watcher_pause": entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE) + elif field == "volume_group_name": + entry = self.cfg.GetVGName() else: raise errors.ParameterError(field) values.append(entry) @@ -4177,7 +4134,7 @@ class LUActivateInstanceDisks(NoHooksLU): """ _OP_PARAMS = [ _PInstanceName, - ("ignore_size", False, _TBool), + ("ignore_size", False, ht.TBool), ] REQ_BGL = False @@ -4489,8 +4446,9 @@ class LUStartupInstance(LogicalUnit): _OP_PARAMS = [ _PInstanceName, _PForce, - ("hvparams", _EmptyDict, _TDict), - ("beparams", _EmptyDict, _TDict), + _PIgnoreOfflineNodes, + ("hvparams", ht.EmptyDict, ht.TDict), + ("beparams", ht.EmptyDict, ht.TDict), ] REQ_BGL = False @@ -4537,21 +4495,30 @@ class LUStartupInstance(LogicalUnit): hv_type.CheckParameterSyntax(filled_hvp) _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp) - _CheckNodeOnline(self, instance.primary_node) + self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline - bep = self.cfg.GetClusterInfo().FillBE(instance) - # check bridges existence - _CheckInstanceBridgesExist(self, instance) + if self.primary_offline and self.op.ignore_offline_nodes: + self.proc.LogWarning("Ignoring offline primary node") + + if self.op.hvparams or self.op.beparams: + self.proc.LogWarning("Overridden parameters are ignored") + else: + _CheckNodeOnline(self, instance.primary_node) + + bep = self.cfg.GetClusterInfo().FillBE(instance) - remote_info = self.rpc.call_instance_info(instance.primary_node, - instance.name, - instance.hypervisor) - remote_info.Raise("Error checking node %s" % instance.primary_node, - prereq=True, ecode=errors.ECODE_ENVIRON) - if not remote_info.payload: # not running already - _CheckNodeFreeMemory(self, instance.primary_node, - "starting instance %s" % instance.name, - bep[constants.BE_MEMORY], instance.hypervisor) + # check bridges existence + _CheckInstanceBridgesExist(self, instance) + + remote_info = self.rpc.call_instance_info(instance.primary_node, + instance.name, + instance.hypervisor) + remote_info.Raise("Error checking node %s" % instance.primary_node, + prereq=True, ecode=errors.ECODE_ENVIRON) + if not remote_info.payload: # not running already + _CheckNodeFreeMemory(self, instance.primary_node, + "starting instance %s" % instance.name, + bep[constants.BE_MEMORY], instance.hypervisor) def Exec(self, feedback_fn): """Start the instance. @@ -4562,16 +4529,20 @@ class LUStartupInstance(LogicalUnit): self.cfg.MarkInstanceUp(instance.name) - node_current = instance.primary_node + if self.primary_offline: + assert self.op.ignore_offline_nodes + self.proc.LogInfo("Primary node offline, marked instance as started") + else: + node_current = instance.primary_node - _StartInstanceDisks(self, instance, force) + _StartInstanceDisks(self, instance, force) - result = self.rpc.call_instance_start(node_current, instance, - self.op.hvparams, self.op.beparams) - msg = result.fail_msg - if msg: - _ShutdownInstanceDisks(self, instance) - raise errors.OpExecError("Could not start instance: %s" % msg) + result = self.rpc.call_instance_start(node_current, instance, + self.op.hvparams, self.op.beparams) + msg = result.fail_msg + if msg: + _ShutdownInstanceDisks(self, instance) + raise errors.OpExecError("Could not start instance: %s" % msg) class LURebootInstance(LogicalUnit): @@ -4582,8 +4553,8 @@ class LURebootInstance(LogicalUnit): HTYPE = constants.HTYPE_INSTANCE _OP_PARAMS = [ _PInstanceName, - ("ignore_secondaries", False, _TBool), - ("reboot_type", _NoDefault, _TElemOf(constants.REBOOT_TYPES)), + ("ignore_secondaries", False, ht.TBool), + ("reboot_type", ht.NoDefault, ht.TElemOf(constants.REBOOT_TYPES)), _PShutdownTimeout, ] REQ_BGL = False @@ -4663,7 +4634,8 @@ class LUShutdownInstance(LogicalUnit): HTYPE = constants.HTYPE_INSTANCE _OP_PARAMS = [ _PInstanceName, - ("timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT, _TPositiveInt), + _PIgnoreOfflineNodes, + ("timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT, ht.TPositiveInt), ] REQ_BGL = False @@ -4690,7 +4662,14 @@ class LUShutdownInstance(LogicalUnit): self.instance = self.cfg.GetInstanceInfo(self.op.instance_name) assert self.instance is not None, \ "Cannot retrieve locked instance %s" % self.op.instance_name - _CheckNodeOnline(self, self.instance.primary_node) + + self.primary_offline = \ + self.cfg.GetNodeInfo(self.instance.primary_node).offline + + if self.primary_offline and self.op.ignore_offline_nodes: + self.proc.LogWarning("Ignoring offline primary node") + else: + _CheckNodeOnline(self, self.instance.primary_node) def Exec(self, feedback_fn): """Shutdown the instance. @@ -4699,13 +4678,19 @@ class LUShutdownInstance(LogicalUnit): instance = self.instance node_current = instance.primary_node timeout = self.op.timeout + self.cfg.MarkInstanceDown(instance.name) - result = self.rpc.call_instance_shutdown(node_current, instance, timeout) - msg = result.fail_msg - if msg: - self.proc.LogWarning("Could not shutdown instance: %s" % msg) - _ShutdownInstanceDisks(self, instance) + if self.primary_offline: + assert self.op.ignore_offline_nodes + self.proc.LogInfo("Primary node offline, marked instance as stopped") + else: + result = self.rpc.call_instance_shutdown(node_current, instance, timeout) + msg = result.fail_msg + if msg: + self.proc.LogWarning("Could not shutdown instance: %s" % msg) + + _ShutdownInstanceDisks(self, instance) class LUReinstallInstance(LogicalUnit): @@ -4716,8 +4701,9 @@ class LUReinstallInstance(LogicalUnit): HTYPE = constants.HTYPE_INSTANCE _OP_PARAMS = [ _PInstanceName, - ("os_type", None, _TMaybeString), - ("force_variant", False, _TBool), + ("os_type", None, ht.TMaybeString), + ("force_variant", False, ht.TBool), + ("osparams", None, ht.TOr(ht.TDict, ht.TNone)), ] REQ_BGL = False @@ -4755,6 +4741,18 @@ class LUReinstallInstance(LogicalUnit): # OS verification pnode = _ExpandNodeName(self.cfg, instance.primary_node) _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant) + instance_os = self.op.os_type + else: + instance_os = instance.os + + nodelist = list(instance.all_nodes) + + if self.op.osparams: + i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams) + _CheckOSParams(self, True, nodelist, instance_os, i_osdict) + self.os_inst = i_osdict # the new dict (without defaults) + else: + self.os_inst = None self.instance = instance @@ -4767,6 +4765,7 @@ class LUReinstallInstance(LogicalUnit): if self.op.os_type is not None: feedback_fn("Changing OS to '%s'..." % self.op.os_type) inst.os = self.op.os_type + # Write to configuration self.cfg.Update(inst, feedback_fn) _StartInstanceDisks(self, inst, None) @@ -4774,7 +4773,8 @@ class LUReinstallInstance(LogicalUnit): feedback_fn("Running the instance OS create scripts...") # FIXME: pass debug option from opcode to backend result = self.rpc.call_instance_os_add(inst.primary_node, inst, True, - self.op.debug_level) + self.op.debug_level, + osparams=self.os_inst) result.Raise("Could not install OS for instance %s on node %s" % (inst.name, inst.primary_node)) finally: @@ -4789,7 +4789,7 @@ class LURecreateInstanceDisks(LogicalUnit): HTYPE = constants.HTYPE_INSTANCE _OP_PARAMS = [ _PInstanceName, - ("disks", _EmptyList, _TListOf(_TPositiveInt)), + ("disks", ht.EmptyList, ht.TListOf(ht.TPositiveInt)), ] REQ_BGL = False @@ -4853,11 +4853,20 @@ class LURenameInstance(LogicalUnit): HTYPE = constants.HTYPE_INSTANCE _OP_PARAMS = [ _PInstanceName, - ("new_name", _NoDefault, _TNonEmptyString), - ("ignore_ip", False, _TBool), - ("check_name", True, _TBool), + ("new_name", ht.NoDefault, ht.TNonEmptyString), + ("ip_check", False, ht.TBool), + ("name_check", True, ht.TBool), ] + def CheckArguments(self): + """Check arguments. + + """ + if self.op.ip_check and not self.op.name_check: + # TODO: make the ip check more flexible and not depend on the name check + raise errors.OpPrereqError("Cannot do ip check without a name check", + errors.ECODE_INVAL) + def BuildHooksEnv(self): """Build hooks env. @@ -4883,24 +4892,21 @@ class LURenameInstance(LogicalUnit): _CheckInstanceDown(self, instance, "cannot rename") self.instance = instance - # new name verification - if self.op.check_name: - name_info = utils.GetHostInfo(self.op.new_name) - self.op.new_name = name_info.name - new_name = self.op.new_name + if self.op.name_check: + hostname = netutils.GetHostname(name=new_name) + new_name = self.op.new_name = hostname.name + if (self.op.ip_check and + netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)): + raise errors.OpPrereqError("IP %s of instance %s already in use" % + (hostname.ip, new_name), + errors.ECODE_NOTUNIQUE) instance_list = self.cfg.GetInstanceList() if new_name in instance_list: raise errors.OpPrereqError("Instance '%s' is already in the cluster" % new_name, errors.ECODE_EXISTS) - if not self.op.ignore_ip: - if utils.TcpPing(name_info.ip, constants.DEFAULT_NODED_PORT): - raise errors.OpPrereqError("IP %s of instance %s already in use" % - (name_info.ip, new_name), - errors.ECODE_NOTUNIQUE) - def Exec(self, feedback_fn): """Reinstall the instance. @@ -4942,6 +4948,8 @@ class LURenameInstance(LogicalUnit): finally: _ShutdownInstanceDisks(self, inst) + return inst.name + class LURemoveInstance(LogicalUnit): """Remove an instance. @@ -4951,7 +4959,7 @@ class LURemoveInstance(LogicalUnit): HTYPE = constants.HTYPE_INSTANCE _OP_PARAMS = [ _PInstanceName, - ("ignore_failures", False, _TBool), + ("ignore_failures", False, ht.TBool), _PShutdownTimeout, ] REQ_BGL = False @@ -5037,9 +5045,9 @@ class LUQueryInstances(NoHooksLU): """ # pylint: disable-msg=W0142 _OP_PARAMS = [ - ("output_fields", _NoDefault, _TListOf(_TNonEmptyString)), - ("names", _EmptyList, _TListOf(_TNonEmptyString)), - ("use_locking", False, _TBool), + ("output_fields", ht.NoDefault, ht.TListOf(ht.TNonEmptyString)), + ("names", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)), + ("use_locking", False, ht.TBool), ] REQ_BGL = False _SIMPLE_FIELDS = ["name", "os", "network_port", "hypervisor", @@ -5056,14 +5064,18 @@ class LUQueryInstances(NoHooksLU): r"(nic)\.(bridge)/([0-9]+)", r"(nic)\.(macs|ips|modes|links|bridges)", r"(disk|nic)\.(count)", - "hvparams", + "hvparams", "custom_hvparams", + "custom_beparams", "custom_nicparams", ] + _SIMPLE_FIELDS + ["hv/%s" % name for name in constants.HVS_PARAMETERS if name not in constants.HVC_GLOBALS] + ["be/%s" % name for name in constants.BES_PARAMETERS]) - _FIELDS_DYNAMIC = utils.FieldSet("oper_state", "oper_ram", "status") + _FIELDS_DYNAMIC = utils.FieldSet("oper_state", + "oper_ram", + "oper_vcpus", + "status") def CheckArguments(self): @@ -5194,6 +5206,13 @@ class LUQueryInstances(NoHooksLU): val = live_data[instance.name].get("memory", "?") else: val = "-" + elif field == "oper_vcpus": + if instance.primary_node in bad_nodes: + val = None + elif instance.name in live_data: + val = live_data[instance.name].get("vcpus", "?") + else: + val = "-" elif field == "vcpus": val = i_be[constants.BE_VCPUS] elif field == "disk_template": @@ -5224,6 +5243,8 @@ class LUQueryInstances(NoHooksLU): val = instance.nics[0].mac else: val = None + elif field == "custom_nicparams": + val = [nic.nicparams for nic in instance.nics] elif field == "sda_size" or field == "sdb_size": idx = ord(field[2]) - ord('a') try: @@ -5235,12 +5256,16 @@ class LUQueryInstances(NoHooksLU): val = _ComputeDiskSize(instance.disk_template, disk_sizes) elif field == "tags": val = list(instance.GetTags()) + elif field == "custom_hvparams": + val = instance.hvparams # not filled! elif field == "hvparams": val = i_hv elif (field.startswith(HVPREFIX) and field[len(HVPREFIX):] in constants.HVS_PARAMETERS and field[len(HVPREFIX):] not in constants.HVC_GLOBALS): val = i_hv.get(field[len(HVPREFIX):], None) + elif field == "custom_beparams": + val = instance.beparams elif field == "beparams": val = i_be elif (field.startswith(BEPREFIX) and @@ -5320,7 +5345,7 @@ class LUFailoverInstance(LogicalUnit): HTYPE = constants.HTYPE_INSTANCE _OP_PARAMS = [ _PInstanceName, - ("ignore_consistency", False, _TBool), + ("ignore_consistency", False, ht.TBool), _PShutdownTimeout, ] REQ_BGL = False @@ -5401,6 +5426,7 @@ class LUFailoverInstance(LogicalUnit): """ instance = self.instance + primary_node = self.cfg.GetNodeInfo(instance.primary_node) source_node = instance.primary_node target_node = instance.secondary_nodes[0] @@ -5424,7 +5450,7 @@ class LUFailoverInstance(LogicalUnit): self.op.shutdown_timeout) msg = result.fail_msg if msg: - if self.op.ignore_consistency: + if self.op.ignore_consistency or primary_node.offline: self.proc.LogWarning("Could not shutdown instance %s on node %s." " Proceeding anyway. Please make sure node" " %s is down. Error details: %s", @@ -5474,8 +5500,9 @@ class LUMigrateInstance(LogicalUnit): HTYPE = constants.HTYPE_INSTANCE _OP_PARAMS = [ _PInstanceName, - ("live", True, _TBool), - ("cleanup", False, _TBool), + _PMigrationMode, + _PMigrationLive, + ("cleanup", False, ht.TBool), ] REQ_BGL = False @@ -5487,7 +5514,7 @@ class LUMigrateInstance(LogicalUnit): self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE self._migrater = TLMigrateInstance(self, self.op.instance_name, - self.op.live, self.op.cleanup) + self.op.cleanup) self.tasklets = [self._migrater] def DeclareLocks(self, level): @@ -5504,7 +5531,7 @@ class LUMigrateInstance(LogicalUnit): source_node = instance.primary_node target_node = instance.secondary_nodes[0] env = _BuildInstanceHookEnvByObject(self, instance) - env["MIGRATE_LIVE"] = self.op.live + env["MIGRATE_LIVE"] = self._migrater.live env["MIGRATE_CLEANUP"] = self.op.cleanup env.update({ "OLD_PRIMARY": source_node, @@ -5526,7 +5553,7 @@ class LUMoveInstance(LogicalUnit): HTYPE = constants.HTYPE_INSTANCE _OP_PARAMS = [ _PInstanceName, - ("target_node", _NoDefault, _TNonEmptyString), + ("target_node", ht.NoDefault, ht.TNonEmptyString), _PShutdownTimeout, ] REQ_BGL = False @@ -5705,7 +5732,8 @@ class LUMigrateNode(LogicalUnit): HTYPE = constants.HTYPE_NODE _OP_PARAMS = [ _PNodeName, - ("live", False, _TBool), + _PMigrationMode, + _PMigrationLive, ] REQ_BGL = False @@ -5726,7 +5754,7 @@ class LUMigrateNode(LogicalUnit): logging.debug("Migrating instance %s", inst.name) names.append(inst.name) - tasklets.append(TLMigrateInstance(self, inst.name, self.op.live, False)) + tasklets.append(TLMigrateInstance(self, inst.name, False)) self.tasklets = tasklets @@ -5753,7 +5781,14 @@ class LUMigrateNode(LogicalUnit): class TLMigrateInstance(Tasklet): - def __init__(self, lu, instance_name, live, cleanup): + """Tasklet class for instance migration. + + @type live: boolean + @ivar live: whether the migration will be done live or non-live; + this variable is initalized only after CheckPrereq has run + + """ + def __init__(self, lu, instance_name, cleanup): """Initializes this class. """ @@ -5761,8 +5796,8 @@ class TLMigrateInstance(Tasklet): # Parameters self.instance_name = instance_name - self.live = live self.cleanup = cleanup + self.live = False # will be overridden later def CheckPrereq(self): """Check prerequisites. @@ -5803,6 +5838,25 @@ class TLMigrateInstance(Tasklet): self.instance = instance + if self.lu.op.live is not None and self.lu.op.mode is not None: + raise errors.OpPrereqError("Only one of the 'live' and 'mode'" + " parameters are accepted", + errors.ECODE_INVAL) + if self.lu.op.live is not None: + if self.lu.op.live: + self.lu.op.mode = constants.HT_MIGRATION_LIVE + else: + self.lu.op.mode = constants.HT_MIGRATION_NONLIVE + # reset the 'live' parameter to None so that repeated + # invocations of CheckPrereq do not raise an exception + self.lu.op.live = None + elif self.lu.op.mode is None: + # read the default value from the hypervisor + i_hv = self.cfg.GetClusterInfo().FillHV(instance, skip_globals=False) + self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE] + + self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE + def _WaitUntilSync(self): """Poll with custom rpc for disk sync. @@ -6264,6 +6318,58 @@ def _GetInstanceInfoText(instance): return "originstname+%s" % instance.name +def _CalcEta(time_taken, written, total_size): + """Calculates the ETA based on size written and total size. + + @param time_taken: The time taken so far + @param written: amount written so far + @param total_size: The total size of data to be written + @return: The remaining time in seconds + + """ + avg_time = time_taken / float(written) + return (total_size - written) * avg_time + + +def _WipeDisks(lu, instance): + """Wipes instance disks. + + @type lu: L{LogicalUnit} + @param lu: the logical unit on whose behalf we execute + @type instance: L{objects.Instance} + @param instance: the instance whose disks we should create + @return: the success of the wipe + + """ + node = instance.primary_node + for idx, device in enumerate(instance.disks): + lu.LogInfo("* Wiping disk %d", idx) + logging.info("Wiping disk %d for instance %s", idx, instance.name) + + # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but + # MAX_WIPE_CHUNK at max + wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 * + constants.MIN_WIPE_CHUNK_PERCENT) + + offset = 0 + size = device.size + last_output = 0 + start_time = time.time() + + while offset < size: + wipe_size = min(wipe_chunk_size, size - offset) + result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size) + result.Raise("Could not wipe disk %d at offset %d for size %d" % + (idx, offset, wipe_size)) + now = time.time() + offset += wipe_size + if now - last_output >= 60: + eta = _CalcEta(now - start_time, offset, size) + lu.LogInfo(" - done: %.1f%% ETA: %s" % + (offset / float(size) * 100, utils.FormatSeconds(eta))) + last_output = now + + def _CreateDisks(lu, instance, to_skip=None, target_node=None): """Create all disks for an instance. @@ -6442,33 +6548,32 @@ class LUCreateInstance(LogicalUnit): HTYPE = constants.HTYPE_INSTANCE _OP_PARAMS = [ _PInstanceName, - ("mode", _NoDefault, _TElemOf(constants.INSTANCE_CREATE_MODES)), - ("start", True, _TBool), - ("wait_for_sync", True, _TBool), - ("ip_check", True, _TBool), - ("name_check", True, _TBool), - ("disks", _NoDefault, _TListOf(_TDict)), - ("nics", _NoDefault, _TListOf(_TDict)), - ("hvparams", _EmptyDict, _TDict), - ("beparams", _EmptyDict, _TDict), - ("osparams", _EmptyDict, _TDict), - ("no_install", None, _TMaybeBool), - ("os_type", None, _TMaybeString), - ("force_variant", False, _TBool), - ("source_handshake", None, _TOr(_TList, _TNone)), - ("source_x509_ca", None, _TOr(_TList, _TNone)), - ("source_instance_name", None, _TMaybeString), - ("src_node", None, _TMaybeString), - ("src_path", None, _TMaybeString), - ("pnode", None, _TMaybeString), - ("snode", None, _TMaybeString), - ("iallocator", None, _TMaybeString), - ("hypervisor", None, _TMaybeString), - ("disk_template", _NoDefault, _CheckDiskTemplate), - ("identify_defaults", False, _TBool), - ("file_driver", None, _TOr(_TNone, _TElemOf(constants.FILE_DRIVER))), - ("file_storage_dir", None, _TMaybeString), - ("dry_run", False, _TBool), + ("mode", ht.NoDefault, ht.TElemOf(constants.INSTANCE_CREATE_MODES)), + ("start", True, ht.TBool), + ("wait_for_sync", True, ht.TBool), + ("ip_check", True, ht.TBool), + ("name_check", True, ht.TBool), + ("disks", ht.NoDefault, ht.TListOf(ht.TDict)), + ("nics", ht.NoDefault, ht.TListOf(ht.TDict)), + ("hvparams", ht.EmptyDict, ht.TDict), + ("beparams", ht.EmptyDict, ht.TDict), + ("osparams", ht.EmptyDict, ht.TDict), + ("no_install", None, ht.TMaybeBool), + ("os_type", None, ht.TMaybeString), + ("force_variant", False, ht.TBool), + ("source_handshake", None, ht.TOr(ht.TList, ht.TNone)), + ("source_x509_ca", None, ht.TMaybeString), + ("source_instance_name", None, ht.TMaybeString), + ("src_node", None, ht.TMaybeString), + ("src_path", None, ht.TMaybeString), + ("pnode", None, ht.TMaybeString), + ("snode", None, ht.TMaybeString), + ("iallocator", None, ht.TMaybeString), + ("hypervisor", None, ht.TMaybeString), + ("disk_template", ht.NoDefault, _CheckDiskTemplate), + ("identify_defaults", False, ht.TBool), + ("file_driver", None, ht.TOr(ht.TNone, ht.TElemOf(constants.FILE_DRIVER))), + ("file_storage_dir", None, ht.TMaybeString), ] REQ_BGL = False @@ -6482,10 +6587,12 @@ class LUCreateInstance(LogicalUnit): self.LogInfo("No-installation mode selected, disabling startup") self.op.start = False # validate/normalize the instance name - self.op.instance_name = utils.HostInfo.NormalizeName(self.op.instance_name) + self.op.instance_name = \ + netutils.Hostname.GetNormalizedName(self.op.instance_name) + if self.op.ip_check and not self.op.name_check: # TODO: make the ip check more flexible and not depend on the name check - raise errors.OpPrereqError("Cannot do ip checks without a name check", + raise errors.OpPrereqError("Cannot do ip check without a name check", errors.ECODE_INVAL) # check nics' parameter names @@ -6520,13 +6627,10 @@ class LUCreateInstance(LogicalUnit): # instance name verification if self.op.name_check: - self.hostname1 = utils.GetHostInfo(self.op.instance_name) + self.hostname1 = netutils.GetHostname(name=self.op.instance_name) self.op.instance_name = self.hostname1.name # used in CheckPrereq for ip ping check self.check_ip = self.hostname1.ip - elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT: - raise errors.OpPrereqError("Remote imports require names to be checked" % - errors.ECODE_INVAL) else: self.check_ip = None @@ -6541,10 +6645,17 @@ class LUCreateInstance(LogicalUnit): errors.ECODE_INVAL) ### Node/iallocator related checks - if [self.op.iallocator, self.op.pnode].count(None) != 1: - raise errors.OpPrereqError("One and only one of iallocator and primary" - " node must be given", - errors.ECODE_INVAL) + _CheckIAllocatorOrNode(self, "iallocator", "pnode") + + if self.op.pnode is not None: + if self.op.disk_template in constants.DTS_NET_MIRROR: + if self.op.snode is None: + raise errors.OpPrereqError("The networked disk templates need" + " a mirror node", errors.ECODE_INVAL) + elif self.op.snode: + self.LogWarning("Secondary node will be ignored on non-mirrored disk" + " template") + self.op.snode = None self._cds = _GetClusterDomainSecret() @@ -6561,6 +6672,10 @@ class LUCreateInstance(LogicalUnit): if self.op.os_type is None: raise errors.OpPrereqError("No guest OS specified", errors.ECODE_INVAL) + if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os: + raise errors.OpPrereqError("Guest OS '%s' is not allowed for" + " installation" % self.op.os_type, + errors.ECODE_STATE) if self.op.disk_template is None: raise errors.OpPrereqError("No disk template specified", errors.ECODE_INVAL) @@ -6604,7 +6719,7 @@ class LUCreateInstance(LogicalUnit): errors.ECODE_INVAL) self.source_instance_name = \ - utils.GetHostInfo(utils.HostInfo.NormalizeName(src_instance_name)).name + netutils.GetHostname(name=src_instance_name).name else: raise errors.OpPrereqError("Invalid instance creation mode %r" % @@ -6944,13 +7059,12 @@ class LUCreateInstance(LogicalUnit): elif ip.lower() == constants.VALUE_AUTO: if not self.op.name_check: raise errors.OpPrereqError("IP address set to auto but name checks" - " have been skipped. Aborting.", + " have been skipped", errors.ECODE_INVAL) nic_ip = self.hostname1.ip else: - if not utils.IsValidIP4(ip): - raise errors.OpPrereqError("Given IP address '%s' doesn't look" - " like a valid IP" % ip, + if not netutils.IPAddress.IsValid(ip): + raise errors.OpPrereqError("Invalid IP address '%s'" % ip, errors.ECODE_INVAL) nic_ip = ip @@ -7054,7 +7168,7 @@ class LUCreateInstance(LogicalUnit): # ip ping checks (we use the same ip that was resolved in ExpandNames) if self.op.ip_check: - if utils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT): + if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT): raise errors.OpPrereqError("IP %s of instance %s already in use" % (self.check_ip, self.op.instance_name), errors.ECODE_NOTUNIQUE) @@ -7093,9 +7207,6 @@ class LUCreateInstance(LogicalUnit): # mirror node verification if self.op.disk_template in constants.DTS_NET_MIRROR: - if self.op.snode is None: - raise errors.OpPrereqError("The networked disk templates need" - " a mirror node", errors.ECODE_INVAL) if self.op.snode == pnode.name: raise errors.OpPrereqError("The secondary node cannot be the" " primary node.", errors.ECODE_INVAL) @@ -7230,6 +7341,18 @@ class LUCreateInstance(LogicalUnit): self.cfg.ReleaseDRBDMinors(instance) raise + if self.cfg.GetClusterInfo().prealloc_wipe_disks: + feedback_fn("* wiping instance disks...") + try: + _WipeDisks(self, iobj) + except errors.OpExecError: + self.LogWarning("Device wiping failed, reverting...") + try: + _RemoveDisks(self, iobj) + finally: + self.cfg.ReleaseDRBDMinors(instance) + raise + feedback_fn("adding instance %s to cluster config" % instance) self.cfg.AddInstance(iobj, self.proc.GetECId()) @@ -7381,7 +7504,12 @@ class LUConnectConsole(NoHooksLU): node_insts.Raise("Can't get node information from %s" % node) if instance.name not in node_insts.payload: - raise errors.OpExecError("Instance %s is not running." % instance.name) + if instance.admin_up: + state = "ERROR_down" + else: + state = "ADMIN_down" + raise errors.OpExecError("Instance %s is not running (state %s)" % + (instance.name, state)) logging.debug("Connecting to console of %s on %s", instance.name, node) @@ -7405,11 +7533,11 @@ class LUReplaceDisks(LogicalUnit): HTYPE = constants.HTYPE_INSTANCE _OP_PARAMS = [ _PInstanceName, - ("mode", _NoDefault, _TElemOf(constants.REPLACE_MODES)), - ("disks", _EmptyList, _TListOf(_TPositiveInt)), - ("remote_node", None, _TMaybeString), - ("iallocator", None, _TMaybeString), - ("early_release", False, _TBool), + ("mode", ht.NoDefault, ht.TElemOf(constants.REPLACE_MODES)), + ("disks", ht.EmptyList, ht.TListOf(ht.TPositiveInt)), + ("remote_node", None, ht.TMaybeString), + ("iallocator", None, ht.TMaybeString), + ("early_release", False, ht.TBool), ] REQ_BGL = False @@ -8148,9 +8276,9 @@ class LURepairNodeStorage(NoHooksLU): """ _OP_PARAMS = [ _PNodeName, - ("storage_type", _NoDefault, _CheckStorageType), - ("name", _NoDefault, _TNonEmptyString), - ("ignore_consistency", False, _TBool), + ("storage_type", ht.NoDefault, _CheckStorageType), + ("name", ht.NoDefault, ht.TNonEmptyString), + ("ignore_consistency", False, ht.TBool), ] REQ_BGL = False @@ -8215,16 +8343,14 @@ class LUNodeEvacuationStrategy(NoHooksLU): """ _OP_PARAMS = [ - ("nodes", _NoDefault, _TListOf(_TNonEmptyString)), - ("remote_node", None, _TMaybeString), - ("iallocator", None, _TMaybeString), + ("nodes", ht.NoDefault, ht.TListOf(ht.TNonEmptyString)), + ("remote_node", None, ht.TMaybeString), + ("iallocator", None, ht.TMaybeString), ] REQ_BGL = False def CheckArguments(self): - if self.op.remote_node is not None and self.op.iallocator is not None: - raise errors.OpPrereqError("Give either the iallocator or the new" - " secondary, not both", errors.ECODE_INVAL) + _CheckIAllocatorOrNode(self, "iallocator", "remote_node") def ExpandNames(self): self.op.nodes = _GetWantedNodes(self, self.op.nodes) @@ -8269,9 +8395,9 @@ class LUGrowDisk(LogicalUnit): HTYPE = constants.HTYPE_INSTANCE _OP_PARAMS = [ _PInstanceName, - ("disk", _NoDefault, _TInt), - ("amount", _NoDefault, _TInt), - ("wait_for_sync", True, _TBool), + ("disk", ht.NoDefault, ht.TInt), + ("amount", ht.NoDefault, ht.TInt), + ("wait_for_sync", True, ht.TBool), ] REQ_BGL = False @@ -8367,8 +8493,8 @@ class LUQueryInstanceData(NoHooksLU): """ _OP_PARAMS = [ - ("instances", _EmptyList, _TListOf(_TNonEmptyString)), - ("static", False, _TBool), + ("instances", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)), + ("static", False, ht.TBool), ] REQ_BGL = False @@ -8528,15 +8654,15 @@ class LUSetInstanceParams(LogicalUnit): HTYPE = constants.HTYPE_INSTANCE _OP_PARAMS = [ _PInstanceName, - ("nics", _EmptyList, _TList), - ("disks", _EmptyList, _TList), - ("beparams", _EmptyDict, _TDict), - ("hvparams", _EmptyDict, _TDict), - ("disk_template", None, _TMaybeString), - ("remote_node", None, _TMaybeString), - ("os_name", None, _TMaybeString), - ("force_variant", False, _TBool), - ("osparams", None, _TOr(_TDict, _TNone)), + ("nics", ht.EmptyList, ht.TList), + ("disks", ht.EmptyList, ht.TList), + ("beparams", ht.EmptyDict, ht.TDict), + ("hvparams", ht.EmptyDict, ht.TDict), + ("disk_template", None, ht.TMaybeString), + ("remote_node", None, ht.TMaybeString), + ("os_name", None, ht.TMaybeString), + ("force_variant", False, ht.TBool), + ("osparams", None, ht.TOr(ht.TDict, ht.TNone)), _PForce, ] REQ_BGL = False @@ -8625,7 +8751,7 @@ class LUSetInstanceParams(LogicalUnit): if nic_ip.lower() == constants.VALUE_NONE: nic_dict['ip'] = None else: - if not utils.IsValidIP4(nic_ip): + if not netutils.IPAddress.IsValid(nic_ip): raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip, errors.ECODE_INVAL) @@ -8758,6 +8884,10 @@ class LUSetInstanceParams(LogicalUnit): errors.ECODE_INVAL) _CheckInstanceDown(self, instance, "cannot change disk template") if self.op.disk_template in constants.DTS_NET_MIRROR: + if self.op.remote_node == pnode: + raise errors.OpPrereqError("Given new secondary node %s is the same" + " as the primary node of the instance" % + self.op.remote_node, errors.ECODE_STATE) _CheckNodeOnline(self, self.op.remote_node) _CheckNodeNotDrained(self, self.op.remote_node) disks = [{"size": d.size} for d in instance.disks] @@ -8794,10 +8924,9 @@ class LUSetInstanceParams(LogicalUnit): if self.op.osparams: i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams) _CheckOSParams(self, True, nodelist, instance_os, i_osdict) - self.os_new = cluster.SimpleFillOS(instance_os, i_osdict) self.os_inst = i_osdict # the new dict (without defaults) else: - self.os_new = self.os_inst = {} + self.os_inst = {} self.warn = [] @@ -9189,8 +9318,8 @@ class LUQueryExports(NoHooksLU): """ _OP_PARAMS = [ - ("nodes", _EmptyList, _TListOf(_TNonEmptyString)), - ("use_locking", False, _TBool), + ("nodes", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)), + ("use_locking", False, ht.TBool), ] REQ_BGL = False @@ -9230,7 +9359,7 @@ class LUPrepareExport(NoHooksLU): """ _OP_PARAMS = [ _PInstanceName, - ("mode", _NoDefault, _TElemOf(constants.EXPORT_MODES)), + ("mode", ht.NoDefault, ht.TElemOf(constants.EXPORT_MODES)), ] REQ_BGL = False @@ -9287,14 +9416,14 @@ class LUExportInstance(LogicalUnit): HTYPE = constants.HTYPE_INSTANCE _OP_PARAMS = [ _PInstanceName, - ("target_node", _NoDefault, _TOr(_TNonEmptyString, _TList)), - ("shutdown", True, _TBool), + ("target_node", ht.NoDefault, ht.TOr(ht.TNonEmptyString, ht.TList)), + ("shutdown", True, ht.TBool), _PShutdownTimeout, - ("remove_instance", False, _TBool), - ("ignore_remove_failures", False, _TBool), - ("mode", constants.EXPORT_MODE_LOCAL, _TElemOf(constants.EXPORT_MODES)), - ("x509_key_name", None, _TOr(_TList, _TNone)), - ("destination_x509_ca", None, _TMaybeString), + ("remove_instance", False, ht.TBool), + ("ignore_remove_failures", False, ht.TBool), + ("mode", constants.EXPORT_MODE_LOCAL, ht.TElemOf(constants.EXPORT_MODES)), + ("x509_key_name", None, ht.TOr(ht.TList, ht.TNone)), + ("destination_x509_ca", None, ht.TMaybeString), ] REQ_BGL = False @@ -9643,6 +9772,9 @@ class TagsLU(NoHooksLU): # pylint: disable-msg=W0223 self.op.name = _ExpandInstanceName(self.cfg, self.op.name) self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name + # FIXME: Acquire BGL for cluster tag operations (as of this writing it's + # not possible to acquire the BGL based on opcode parameters) + def CheckPrereq(self): """Check prerequisites. @@ -9663,11 +9795,18 @@ class LUGetTags(TagsLU): """ _OP_PARAMS = [ - ("kind", _NoDefault, _TElemOf(constants.VALID_TAG_TYPES)), - ("name", _NoDefault, _TNonEmptyString), + ("kind", ht.NoDefault, ht.TElemOf(constants.VALID_TAG_TYPES)), + # Name is only meaningful for nodes and instances + ("name", ht.NoDefault, ht.TMaybeString), ] REQ_BGL = False + def ExpandNames(self): + TagsLU.ExpandNames(self) + + # Share locks as this is only a read operation + self.share_locks = dict.fromkeys(locking.LEVELS, 1) + def Exec(self, feedback_fn): """Returns the tag list. @@ -9680,7 +9819,7 @@ class LUSearchTags(NoHooksLU): """ _OP_PARAMS = [ - ("pattern", _NoDefault, _TNonEmptyString), + ("pattern", ht.NoDefault, ht.TNonEmptyString), ] REQ_BGL = False @@ -9722,9 +9861,10 @@ class LUAddTags(TagsLU): """ _OP_PARAMS = [ - ("kind", _NoDefault, _TElemOf(constants.VALID_TAG_TYPES)), - ("name", _NoDefault, _TNonEmptyString), - ("tags", _NoDefault, _TListOf(_TNonEmptyString)), + ("kind", ht.NoDefault, ht.TElemOf(constants.VALID_TAG_TYPES)), + # Name is only meaningful for nodes and instances + ("name", ht.NoDefault, ht.TMaybeString), + ("tags", ht.NoDefault, ht.TListOf(ht.TNonEmptyString)), ] REQ_BGL = False @@ -9755,9 +9895,10 @@ class LUDelTags(TagsLU): """ _OP_PARAMS = [ - ("kind", _NoDefault, _TElemOf(constants.VALID_TAG_TYPES)), - ("name", _NoDefault, _TNonEmptyString), - ("tags", _NoDefault, _TListOf(_TNonEmptyString)), + ("kind", ht.NoDefault, ht.TElemOf(constants.VALID_TAG_TYPES)), + # Name is only meaningful for nodes and instances + ("name", ht.NoDefault, ht.TMaybeString), + ("tags", ht.NoDefault, ht.TListOf(ht.TNonEmptyString)), ] REQ_BGL = False @@ -9772,12 +9913,13 @@ class LUDelTags(TagsLU): objects.TaggableObject.ValidateTag(tag) del_tags = frozenset(self.op.tags) cur_tags = self.target.GetTags() - if not del_tags <= cur_tags: - diff_tags = del_tags - cur_tags - diff_names = ["'%s'" % tag for tag in diff_tags] - diff_names.sort() + + diff_tags = del_tags - cur_tags + if diff_tags: + diff_names = ("'%s'" % i for i in sorted(diff_tags)) raise errors.OpPrereqError("Tag(s) %s not found" % - (",".join(diff_names)), errors.ECODE_NOENT) + (utils.CommaJoin(diff_names), ), + errors.ECODE_NOENT) def Exec(self, feedback_fn): """Remove the tag from the object. @@ -9796,10 +9938,10 @@ class LUTestDelay(NoHooksLU): """ _OP_PARAMS = [ - ("duration", _NoDefault, _TFloat), - ("on_master", True, _TBool), - ("on_nodes", _EmptyList, _TListOf(_TNonEmptyString)), - ("repeat", 0, _TPositiveInt) + ("duration", ht.NoDefault, ht.TFloat), + ("on_master", True, ht.TBool), + ("on_nodes", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)), + ("repeat", 0, ht.TPositiveInt) ] REQ_BGL = False @@ -9842,6 +9984,148 @@ class LUTestDelay(NoHooksLU): self._TestDelay() +class LUTestJobqueue(NoHooksLU): + """Utility LU to test some aspects of the job queue. + + """ + _OP_PARAMS = [ + ("notify_waitlock", False, ht.TBool), + ("notify_exec", False, ht.TBool), + ("log_messages", ht.EmptyList, ht.TListOf(ht.TString)), + ("fail", False, ht.TBool), + ] + REQ_BGL = False + + # Must be lower than default timeout for WaitForJobChange to see whether it + # notices changed jobs + _CLIENT_CONNECT_TIMEOUT = 20.0 + _CLIENT_CONFIRM_TIMEOUT = 60.0 + + @classmethod + def _NotifyUsingSocket(cls, cb, errcls): + """Opens a Unix socket and waits for another program to connect. + + @type cb: callable + @param cb: Callback to send socket name to client + @type errcls: class + @param errcls: Exception class to use for errors + + """ + # Using a temporary directory as there's no easy way to create temporary + # sockets without writing a custom loop around tempfile.mktemp and + # socket.bind + tmpdir = tempfile.mkdtemp() + try: + tmpsock = utils.PathJoin(tmpdir, "sock") + + logging.debug("Creating temporary socket at %s", tmpsock) + sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) + try: + sock.bind(tmpsock) + sock.listen(1) + + # Send details to client + cb(tmpsock) + + # Wait for client to connect before continuing + sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT) + try: + (conn, _) = sock.accept() + except socket.error, err: + raise errcls("Client didn't connect in time (%s)" % err) + finally: + sock.close() + finally: + # Remove as soon as client is connected + shutil.rmtree(tmpdir) + + # Wait for client to close + try: + try: + # pylint: disable-msg=E1101 + # Instance of '_socketobject' has no ... member + conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT) + conn.recv(1) + except socket.error, err: + raise errcls("Client failed to confirm notification (%s)" % err) + finally: + conn.close() + + def _SendNotification(self, test, arg, sockname): + """Sends a notification to the client. + + @type test: string + @param test: Test name + @param arg: Test argument (depends on test) + @type sockname: string + @param sockname: Socket path + + """ + self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg)) + + def _Notify(self, prereq, test, arg): + """Notifies the client of a test. + + @type prereq: bool + @param prereq: Whether this is a prereq-phase test + @type test: string + @param test: Test name + @param arg: Test argument (depends on test) + + """ + if prereq: + errcls = errors.OpPrereqError + else: + errcls = errors.OpExecError + + return self._NotifyUsingSocket(compat.partial(self._SendNotification, + test, arg), + errcls) + + def CheckArguments(self): + self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1 + self.expandnames_calls = 0 + + def ExpandNames(self): + checkargs_calls = getattr(self, "checkargs_calls", 0) + if checkargs_calls < 1: + raise errors.ProgrammerError("CheckArguments was not called") + + self.expandnames_calls += 1 + + if self.op.notify_waitlock: + self._Notify(True, constants.JQT_EXPANDNAMES, None) + + self.LogInfo("Expanding names") + + # Get lock on master node (just to get a lock, not for a particular reason) + self.needed_locks = { + locking.LEVEL_NODE: self.cfg.GetMasterNode(), + } + + def Exec(self, feedback_fn): + if self.expandnames_calls < 1: + raise errors.ProgrammerError("ExpandNames was not called") + + if self.op.notify_exec: + self._Notify(False, constants.JQT_EXEC, None) + + self.LogInfo("Executing") + + if self.op.log_messages: + self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages)) + for idx, msg in enumerate(self.op.log_messages): + self.LogInfo("Sending log message %s", idx + 1) + feedback_fn(constants.JQT_MSGPREFIX + msg) + # Report how many test messages have been sent + self._Notify(False, constants.JQT_LOGMSG, idx + 1) + + if self.op.fail: + raise errors.OpExecError("Opcode failure was requested") + + return True + + class IAllocator(object): """IAllocator framework. @@ -9929,7 +10213,6 @@ class IAllocator(object): i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo] # node data - node_results = {} node_list = cfg.GetNodeList() if self.mode == constants.IALLOCATOR_MODE_ALLOC: @@ -9944,6 +10227,31 @@ class IAllocator(object): node_iinfo = \ self.rpc.call_all_instances_info(node_list, cluster_info.enabled_hypervisors) + + data["nodegroups"] = self._ComputeNodeGroupData(cfg) + + data["nodes"] = self._ComputeNodeData(cfg, node_data, node_iinfo, i_list) + + data["instances"] = self._ComputeInstanceData(cluster_info, i_list) + + self.in_data = data + + @staticmethod + def _ComputeNodeGroupData(cfg): + """Compute node groups data. + + """ + ng = {} + for guuid, gdata in cfg.GetAllNodeGroupsInfo().items(): + ng[guuid] = { "name": gdata.name } + return ng + + @staticmethod + def _ComputeNodeData(cfg, node_data, node_iinfo, i_list): + """Compute global node data. + + """ + node_results = {} for nname, nresult in node_data.items(): # first fill in static (config-based) values ninfo = cfg.GetNodeInfo(nname) @@ -9954,6 +10262,9 @@ class IAllocator(object): "offline": ninfo.offline, "drained": ninfo.drained, "master_candidate": ninfo.master_candidate, + "group": ninfo.group, + "master_capable": ninfo.master_capable, + "vm_capable": ninfo.vm_capable, } if not (ninfo.offline or ninfo.drained): @@ -10000,9 +10311,14 @@ class IAllocator(object): pnr.update(pnr_dyn) node_results[nname] = pnr - data["nodes"] = node_results - # instance data + return node_results + + @staticmethod + def _ComputeInstanceData(cluster_info, i_list): + """Compute global instance data. + + """ instance_data = {} for iinfo, beinfo in i_list: nic_data = [] @@ -10032,9 +10348,7 @@ class IAllocator(object): pir["disks"]) instance_data[iinfo.name] = pir - data["instances"] = instance_data - - self.in_data = data + return instance_data def _AddNewInstance(self): """Add new instance data to allocator structure. @@ -10175,21 +10489,22 @@ class LUTestAllocator(NoHooksLU): """ _OP_PARAMS = [ - ("direction", _NoDefault, _TElemOf(constants.VALID_IALLOCATOR_DIRECTIONS)), - ("mode", _NoDefault, _TElemOf(constants.VALID_IALLOCATOR_MODES)), - ("name", _NoDefault, _TNonEmptyString), - ("nics", _NoDefault, _TOr(_TNone, _TListOf( - _TDictOf(_TElemOf(["mac", "ip", "bridge"]), - _TOr(_TNone, _TNonEmptyString))))), - ("disks", _NoDefault, _TOr(_TNone, _TList)), - ("hypervisor", None, _TMaybeString), - ("allocator", None, _TMaybeString), - ("tags", _EmptyList, _TListOf(_TNonEmptyString)), - ("mem_size", None, _TOr(_TNone, _TPositiveInt)), - ("vcpus", None, _TOr(_TNone, _TPositiveInt)), - ("os", None, _TMaybeString), - ("disk_template", None, _TMaybeString), - ("evac_nodes", None, _TOr(_TNone, _TListOf(_TNonEmptyString))), + ("direction", ht.NoDefault, + ht.TElemOf(constants.VALID_IALLOCATOR_DIRECTIONS)), + ("mode", ht.NoDefault, ht.TElemOf(constants.VALID_IALLOCATOR_MODES)), + ("name", ht.NoDefault, ht.TNonEmptyString), + ("nics", ht.NoDefault, ht.TOr(ht.TNone, ht.TListOf( + ht.TDictOf(ht.TElemOf(["mac", "ip", "bridge"]), + ht.TOr(ht.TNone, ht.TNonEmptyString))))), + ("disks", ht.NoDefault, ht.TOr(ht.TNone, ht.TList)), + ("hypervisor", None, ht.TMaybeString), + ("allocator", None, ht.TMaybeString), + ("tags", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)), + ("mem_size", None, ht.TOr(ht.TNone, ht.TPositiveInt)), + ("vcpus", None, ht.TOr(ht.TNone, ht.TPositiveInt)), + ("os", None, ht.TMaybeString), + ("disk_template", None, ht.TMaybeString), + ("evac_nodes", None, ht.TOr(ht.TNone, ht.TListOf(ht.TNonEmptyString))), ] def CheckPrereq(self):