from ganeti import qlang
from ganeti import opcodes
from ganeti import ht
+from ganeti import rpc
import ganeti.masterd.instance # pylint: disable=W0611
+#: Size of DRBD meta block device
+DRBD_META_SIZE = 128
+
+
class ResultWithJobs:
"""Data container for LU results with jobs.
HTYPE = None
REQ_BGL = True
- def __init__(self, processor, op, context, rpc):
+ def __init__(self, processor, op, context, rpc_runner):
"""Constructor for LogicalUnit.
This needs to be overridden in derived classes in order to check op
# readability alias
self.owned_locks = context.glm.list_owned
self.context = context
- self.rpc = rpc
+ self.rpc = rpc_runner
# Dicts used to declare locking needs to mcpu
self.needed_locks = None
self.share_locks = dict.fromkeys(locking.LEVELS, 0)
self.op.instance_name)
self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
- def _LockInstancesNodes(self, primary_only=False):
+ def _LockInstancesNodes(self, primary_only=False,
+ level=locking.LEVEL_NODE):
"""Helper function to declare instances' nodes for locking.
This function should be called after locking one or more instances to lock
@type primary_only: boolean
@param primary_only: only lock primary nodes of locked instances
+ @param level: Which lock level to use for locking nodes
"""
- assert locking.LEVEL_NODE in self.recalculate_locks, \
+ assert level in self.recalculate_locks, \
"_LockInstancesNodes helper function called with no nodes to recalculate"
# TODO: check if we're really been called with the instance locks held
if not primary_only:
wanted_nodes.extend(instance.secondary_nodes)
- if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
- self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
- elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
- self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
+ if self.recalculate_locks[level] == constants.LOCKS_REPLACE:
+ self.needed_locks[level] = wanted_nodes
+ elif self.recalculate_locks[level] == constants.LOCKS_APPEND:
+ self.needed_locks[level].extend(wanted_nodes)
+ else:
+ raise errors.ProgrammerError("Unknown recalculation mode")
- del self.recalculate_locks[locking.LEVEL_NODE]
+ del self.recalculate_locks[level]
class NoHooksLU(LogicalUnit): # pylint: disable=W0223
#: Attribute holding field definitions
FIELDS = None
- def __init__(self, filter_, fields, use_locking):
+ def __init__(self, qfilter, fields, use_locking):
"""Initializes this class.
"""
self.use_locking = use_locking
- self.query = query.Query(self.FIELDS, fields, filter_=filter_,
+ self.query = query.Query(self.FIELDS, fields, qfilter=qfilter,
namefield="name")
self.requested_data = self.query.RequestedData()
self.names = self.query.RequestedNames()
"""Runs the post-hook for an opcode on a single node.
"""
- hm = lu.proc.hmclass(lu.rpc.call_hooks_runner, lu)
+ hm = lu.proc.BuildHooksManager(lu)
try:
hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
except:
return []
-def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
+def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
faulty = []
for dev in instance.disks:
cfg.SetDiskID(dev, node_name)
- result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
+ result = rpc_runner.call_blockdev_getmirrorstatus(node_name, instance.disks)
result.Raise("Failed to get disk status from node %s" % node_name,
prereq=prereq, ecode=errors.ECODE_ENVIRON)
"""Destroys the cluster.
"""
- master = self.cfg.GetMasterNode()
+ master_params = self.cfg.GetMasterNetworkParameters()
# Run post hooks on master node before it's removed
- _RunPostHook(self, master)
+ _RunPostHook(self, master_params.name)
- result = self.rpc.call_node_deactivate_master_ip(master)
+ ems = self.cfg.GetUseExternalMipScript()
+ result = self.rpc.call_node_deactivate_master_ip(master_params.name,
+ master_params, ems)
result.Raise("Could not disable the master role")
- return master
+ return master_params.name
def _VerifyCertificate(filename):
self.op and self._feedback_fn to be available.)
"""
- TCLUSTER = "cluster"
- TNODE = "node"
- TINSTANCE = "instance"
-
- ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
- ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
- ECLUSTERFILECHECK = (TCLUSTER, "ECLUSTERFILECHECK")
- ECLUSTERDANGLINGNODES = (TNODE, "ECLUSTERDANGLINGNODES")
- ECLUSTERDANGLINGINST = (TNODE, "ECLUSTERDANGLINGINST")
- EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
- EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
- EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
- EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
- EINSTANCEFAULTYDISK = (TINSTANCE, "EINSTANCEFAULTYDISK")
- EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
- EINSTANCESPLITGROUPS = (TINSTANCE, "EINSTANCESPLITGROUPS")
- ENODEDRBD = (TNODE, "ENODEDRBD")
- ENODEDRBDHELPER = (TNODE, "ENODEDRBDHELPER")
- ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
- ENODEHOOKS = (TNODE, "ENODEHOOKS")
- ENODEHV = (TNODE, "ENODEHV")
- ENODELVM = (TNODE, "ENODELVM")
- ENODEN1 = (TNODE, "ENODEN1")
- ENODENET = (TNODE, "ENODENET")
- ENODEOS = (TNODE, "ENODEOS")
- ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
- ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
- ENODERPC = (TNODE, "ENODERPC")
- ENODESSH = (TNODE, "ENODESSH")
- ENODEVERSION = (TNODE, "ENODEVERSION")
- ENODESETUP = (TNODE, "ENODESETUP")
- ENODETIME = (TNODE, "ENODETIME")
- ENODEOOBPATH = (TNODE, "ENODEOOBPATH")
ETYPE_FIELD = "code"
ETYPE_ERROR = "ERROR"
"""
ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
- itype, etxt = ecode
+ itype, etxt, _ = ecode
# first complete the msg
if args:
msg = msg % args
# and finally report it via the feedback_fn
self._feedback_fn(" - %s" % msg) # Mix-in. pylint: disable=E1101
- def _ErrorIf(self, cond, *args, **kwargs):
+ def _ErrorIf(self, cond, ecode, *args, **kwargs):
"""Log an error message if the passed condition is True.
"""
cond = (bool(cond)
or self.op.debug_simulate_errors) # pylint: disable=E1101
+
+ # If the error code is in the list of ignored errors, demote the error to a
+ # warning
+ (_, etxt, _) = ecode
+ if etxt in self.op.ignore_errors: # pylint: disable=E1101
+ kwargs[self.ETYPE_FIELD] = self.ETYPE_WARNING
+
if cond:
- self._Error(*args, **kwargs)
+ self._Error(ecode, *args, **kwargs)
+
# do not mark the operation as failed for WARN cases only
if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
self.bad = self.bad or cond
groups = self.cfg.GetNodeGroupList()
# Verify global configuration
- jobs.append([opcodes.OpClusterVerifyConfig()])
+ jobs.append([
+ opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors)
+ ])
# Always depend on global verification
depends_fn = lambda: [(-len(jobs), [])]
jobs.extend([opcodes.OpClusterVerifyGroup(group_name=group,
- depends=depends_fn())]
+ ignore_errors=self.op.ignore_errors,
+ depends=depends_fn())]
for group in groups)
# Fix up all parameters
utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
hv_class.CheckParameterSyntax(hv_params)
except errors.GenericError, err:
- self._ErrorIf(True, self.ECLUSTERCFG, None, msg % str(err))
+ self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
def ExpandNames(self):
# Information can be safely retrieved as the BGL is acquired in exclusive
feedback_fn("* Verifying cluster config")
for msg in self.cfg.VerifyConfig():
- self._ErrorIf(True, self.ECLUSTERCFG, None, msg)
+ self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
feedback_fn("* Verifying cluster certificate files")
for cert_filename in constants.ALL_CERT_FILES:
(errcode, msg) = _VerifyCertificate(cert_filename)
- self._ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
+ self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
feedback_fn("* Verifying hypervisor parameters")
["no instances"])))
for node in dangling_nodes]
- self._ErrorIf(bool(dangling_nodes), self.ECLUSTERDANGLINGNODES, None,
+ self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
+ None,
"the following nodes (and their instances) belong to a non"
" existing group: %s", utils.CommaJoin(pretty_dangling))
- self._ErrorIf(bool(no_node_instances), self.ECLUSTERDANGLINGINST, None,
+ self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
+ None,
"the following instances have a non-existing primary-node:"
" %s", utils.CommaJoin(no_node_instances))
# main result, nresult should be a non-empty dict
test = not nresult or not isinstance(nresult, dict)
- _ErrorIf(test, self.ENODERPC, node,
+ _ErrorIf(test, constants.CV_ENODERPC, node,
"unable to verify node: no data returned")
if test:
return False
test = not (remote_version and
isinstance(remote_version, (list, tuple)) and
len(remote_version) == 2)
- _ErrorIf(test, self.ENODERPC, node,
+ _ErrorIf(test, constants.CV_ENODERPC, node,
"connection to node returned invalid data")
if test:
return False
test = local_version != remote_version[0]
- _ErrorIf(test, self.ENODEVERSION, node,
+ _ErrorIf(test, constants.CV_ENODEVERSION, node,
"incompatible protocol versions: master %s,"
" node %s", local_version, remote_version[0])
if test:
# full package version
self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
- self.ENODEVERSION, node,
+ constants.CV_ENODEVERSION, node,
"software version mismatch: master %s, node %s",
constants.RELEASE_VERSION, remote_version[1],
code=self.ETYPE_WARNING)
if ninfo.vm_capable and isinstance(hyp_result, dict):
for hv_name, hv_result in hyp_result.iteritems():
test = hv_result is not None
- _ErrorIf(test, self.ENODEHV, node,
+ _ErrorIf(test, constants.CV_ENODEHV, node,
"hypervisor %s verify failure: '%s'", hv_name, hv_result)
hvp_result = nresult.get(constants.NV_HVPARAMS, None)
if ninfo.vm_capable and isinstance(hvp_result, list):
for item, hv_name, hv_result in hvp_result:
- _ErrorIf(True, self.ENODEHV, node,
+ _ErrorIf(True, constants.CV_ENODEHV, node,
"hypervisor %s parameter verify failure (source %s): %s",
hv_name, item, hv_result)
test = nresult.get(constants.NV_NODESETUP,
["Missing NODESETUP results"])
- _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
+ _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
"; ".join(test))
return True
try:
ntime_merged = utils.MergeTime(ntime)
except (ValueError, TypeError):
- _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
+ _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
return
if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
else:
ntime_diff = None
- _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
+ _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
"Node time diverges by at least %s from master node time",
ntime_diff)
# checks vg existence and size > 20G
vglist = nresult.get(constants.NV_VGLIST, None)
test = not vglist
- _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
+ _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
if not test:
vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
constants.MIN_VG_SIZE)
- _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
+ _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
# check pv names
pvlist = nresult.get(constants.NV_PVLIST, None)
test = pvlist is None
- _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
+ _ErrorIf(test, constants.CV_ENODELVM, node, "Can't get PV list from node")
if not test:
# check that ':' is not present in PV names, since it's a
# special character for lvcreate (denotes the range of PEs to
# use on the PV)
for _, pvname, owner_vg in pvlist:
test = ":" in pvname
- _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
- " '%s' of VG '%s'", pvname, owner_vg)
+ _ErrorIf(test, constants.CV_ENODELVM, node,
+ "Invalid character ':' in PV '%s' of VG '%s'",
+ pvname, owner_vg)
def _VerifyNodeBridges(self, ninfo, nresult, bridges):
"""Check the node bridges.
missing = nresult.get(constants.NV_BRIDGES, None)
test = not isinstance(missing, list)
- _ErrorIf(test, self.ENODENET, node,
+ _ErrorIf(test, constants.CV_ENODENET, node,
"did not return valid bridge information")
if not test:
- _ErrorIf(bool(missing), self.ENODENET, node, "missing bridges: %s" %
- utils.CommaJoin(sorted(missing)))
+ _ErrorIf(bool(missing), constants.CV_ENODENET, node,
+ "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
+
+ def _VerifyNodeUserScripts(self, ninfo, nresult):
+ """Check the results of user scripts presence and executability on the node
+
+ @type ninfo: L{objects.Node}
+ @param ninfo: the node to check
+ @param nresult: the remote results for the node
+
+ """
+ node = ninfo.name
+
+ test = not constants.NV_USERSCRIPTS in nresult
+ self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node,
+ "did not return user scripts information")
+
+ broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None)
+ if not test:
+ self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node,
+ "user scripts not present or not executable: %s" %
+ utils.CommaJoin(sorted(broken_scripts)))
def _VerifyNodeNetwork(self, ninfo, nresult):
"""Check the node network connectivity results.
_ErrorIf = self._ErrorIf # pylint: disable=C0103
test = constants.NV_NODELIST not in nresult
- _ErrorIf(test, self.ENODESSH, node,
+ _ErrorIf(test, constants.CV_ENODESSH, node,
"node hasn't returned node ssh connectivity data")
if not test:
if nresult[constants.NV_NODELIST]:
for a_node, a_msg in nresult[constants.NV_NODELIST].items():
- _ErrorIf(True, self.ENODESSH, node,
+ _ErrorIf(True, constants.CV_ENODESSH, node,
"ssh communication with node '%s': %s", a_node, a_msg)
test = constants.NV_NODENETTEST not in nresult
- _ErrorIf(test, self.ENODENET, node,
+ _ErrorIf(test, constants.CV_ENODENET, node,
"node hasn't returned node tcp connectivity data")
if not test:
if nresult[constants.NV_NODENETTEST]:
nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
for anode in nlist:
- _ErrorIf(True, self.ENODENET, node,
+ _ErrorIf(True, constants.CV_ENODENET, node,
"tcp communication with node '%s': %s",
anode, nresult[constants.NV_NODENETTEST][anode])
test = constants.NV_MASTERIP not in nresult
- _ErrorIf(test, self.ENODENET, node,
+ _ErrorIf(test, constants.CV_ENODENET, node,
"node hasn't returned node master IP reachability data")
if not test:
if not nresult[constants.NV_MASTERIP]:
msg = "the master node cannot reach the master IP (not configured?)"
else:
msg = "cannot reach the master IP"
- _ErrorIf(True, self.ENODENET, node, msg)
+ _ErrorIf(True, constants.CV_ENODENET, node, msg)
def _VerifyInstance(self, instance, instanceconfig, node_image,
diskstatus):
continue
for volume in node_vol_should[node]:
test = volume not in n_img.volumes
- _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
+ _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
"volume %s missing on node %s", volume, node)
if instanceconfig.admin_up:
pri_img = node_image[node_current]
test = instance not in pri_img.instances and not pri_img.offline
- _ErrorIf(test, self.EINSTANCEDOWN, instance,
+ _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
"instance not running on its primary node %s",
node_current)
snode = node_image[nname]
bad_snode = snode.ghost or snode.offline
_ErrorIf(instanceconfig.admin_up and not success and not bad_snode,
- self.EINSTANCEFAULTYDISK, instance,
+ constants.CV_EINSTANCEFAULTYDISK, instance,
"couldn't retrieve status for disk/%s on %s: %s",
idx, nname, bdev_status)
_ErrorIf((instanceconfig.admin_up and success and
bdev_status.ldisk_status == constants.LDS_FAULTY),
- self.EINSTANCEFAULTYDISK, instance,
+ constants.CV_EINSTANCEFAULTYDISK, instance,
"disk/%s on %s is faulty", idx, nname)
def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
test = ((node not in node_vol_should or
volume not in node_vol_should[node]) and
not reserved.Matches(volume))
- self._ErrorIf(test, self.ENODEORPHANLV, node,
+ self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
"volume %s is unknown", volume)
def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
if bep[constants.BE_AUTO_BALANCE]:
needed_mem += bep[constants.BE_MEMORY]
test = n_img.mfree < needed_mem
- self._ErrorIf(test, self.ENODEN1, node,
+ self._ErrorIf(test, constants.CV_ENODEN1, node,
"not enough memory to accomodate instance failovers"
" should node %s fail (%dMiB needed, %dMiB available)",
prinode, needed_mem, n_img.mfree)
@classmethod
def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
- (files_all, files_all_opt, files_mc, files_vm)):
+ (files_all, files_opt, files_mc, files_vm)):
"""Verifies file checksums collected from all nodes.
@param errorif: Callback for reporting errors
@param all_nvinfo: RPC results
"""
- assert (len(files_all | files_all_opt | files_mc | files_vm) ==
- sum(map(len, [files_all, files_all_opt, files_mc, files_vm]))), \
- "Found file listed in more than one file list"
-
# Define functions determining which nodes to consider for a file
files2nodefn = [
(files_all, None),
- (files_all_opt, None),
(files_mc, lambda node: (node.master_candidate or
node.name == master_node)),
(files_vm, lambda node: node.vm_capable),
frozenset(map(operator.attrgetter("name"), filenodes)))
for filename in files)
- assert set(nodefiles) == (files_all | files_all_opt | files_mc | files_vm)
+ assert set(nodefiles) == (files_all | files_mc | files_vm)
fileinfo = dict((filename, {}) for filename in nodefiles)
ignore_nodes = set()
node_files = nresult.payload.get(constants.NV_FILELIST, None)
test = not (node_files and isinstance(node_files, dict))
- errorif(test, cls.ENODEFILECHECK, node.name,
+ errorif(test, constants.CV_ENODEFILECHECK, node.name,
"Node did not return file checksum data")
if test:
ignore_nodes.add(node.name)
# Nodes missing file
missing_file = expected_nodes - with_file
- if filename in files_all_opt:
+ if filename in files_opt:
# All or no nodes
errorif(missing_file and missing_file != expected_nodes,
- cls.ECLUSTERFILECHECK, None,
+ constants.CV_ECLUSTERFILECHECK, None,
"File %s is optional, but it must exist on all or no"
" nodes (not found on %s)",
filename, utils.CommaJoin(utils.NiceSort(missing_file)))
else:
- # Non-optional files
- errorif(missing_file, cls.ECLUSTERFILECHECK, None,
+ errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
"File %s is missing from node(s) %s", filename,
utils.CommaJoin(utils.NiceSort(missing_file)))
# Warn if a node has a file it shouldn't
unexpected = with_file - expected_nodes
errorif(unexpected,
- cls.ECLUSTERFILECHECK, None,
+ constants.CV_ECLUSTERFILECHECK, None,
"File %s should not exist on node(s) %s",
filename, utils.CommaJoin(utils.NiceSort(unexpected)))
else:
variants = []
- errorif(test, cls.ECLUSTERFILECHECK, None,
+ errorif(test, constants.CV_ECLUSTERFILECHECK, None,
"File %s found with %s different checksums (%s)",
filename, len(checksums), "; ".join(variants))
if drbd_helper:
helper_result = nresult.get(constants.NV_DRBDHELPER, None)
test = (helper_result == None)
- _ErrorIf(test, self.ENODEDRBDHELPER, node,
+ _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
"no drbd usermode helper returned")
if helper_result:
status, payload = helper_result
test = not status
- _ErrorIf(test, self.ENODEDRBDHELPER, node,
+ _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
"drbd usermode helper check unsuccessful: %s", payload)
test = status and (payload != drbd_helper)
- _ErrorIf(test, self.ENODEDRBDHELPER, node,
+ _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
"wrong drbd usermode helper: %s", payload)
# compute the DRBD minors
node_drbd = {}
for minor, instance in drbd_map[node].items():
test = instance not in instanceinfo
- _ErrorIf(test, self.ECLUSTERCFG, None,
+ _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
"ghost instance '%s' in temporary DRBD map", instance)
# ghost instance should not be running, but otherwise we
# don't give double warnings (both ghost instance and
# and now check them
used_minors = nresult.get(constants.NV_DRBDLIST, [])
test = not isinstance(used_minors, (tuple, list))
- _ErrorIf(test, self.ENODEDRBD, node,
+ _ErrorIf(test, constants.CV_ENODEDRBD, node,
"cannot parse drbd status file: %s", str(used_minors))
if test:
# we cannot check drbd status
for minor, (iname, must_exist) in node_drbd.items():
test = minor not in used_minors and must_exist
- _ErrorIf(test, self.ENODEDRBD, node,
+ _ErrorIf(test, constants.CV_ENODEDRBD, node,
"drbd minor %d of instance %s is not active", minor, iname)
for minor in used_minors:
test = minor not in node_drbd
- _ErrorIf(test, self.ENODEDRBD, node,
+ _ErrorIf(test, constants.CV_ENODEDRBD, node,
"unallocated drbd minor %d is in use", minor)
def _UpdateNodeOS(self, ninfo, nresult, nimg):
not compat.all(isinstance(v, list) and len(v) == 7
for v in remote_os))
- _ErrorIf(test, self.ENODEOS, node,
+ _ErrorIf(test, constants.CV_ENODEOS, node,
"node hasn't returned valid OS data")
nimg.os_fail = test
for os_name, os_data in nimg.oslist.items():
assert os_data, "Empty OS status for OS %s?!" % os_name
f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
- _ErrorIf(not f_status, self.ENODEOS, node,
+ _ErrorIf(not f_status, constants.CV_ENODEOS, node,
"Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
- _ErrorIf(len(os_data) > 1, self.ENODEOS, node,
+ _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
"OS '%s' has multiple entries (first one shadows the rest): %s",
os_name, utils.CommaJoin([v[0] for v in os_data]))
# comparisons with the 'base' image
test = os_name not in base.oslist
- _ErrorIf(test, self.ENODEOS, node,
+ _ErrorIf(test, constants.CV_ENODEOS, node,
"Extra OS %s not present on reference node (%s)",
os_name, base.name)
if test:
("variants list", f_var, b_var),
("parameters", beautify_params(f_param),
beautify_params(b_param))]:
- _ErrorIf(a != b, self.ENODEOS, node,
+ _ErrorIf(a != b, constants.CV_ENODEOS, node,
"OS %s for %s differs from reference node %s: [%s] vs. [%s]",
kind, os_name, base.name,
utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
# check any missing OSes
missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
- _ErrorIf(missing, self.ENODEOS, node,
+ _ErrorIf(missing, constants.CV_ENODEOS, node,
"OSes present on reference node %s but missing on this node: %s",
base.name, utils.CommaJoin(missing))
if ((ninfo.master_candidate or ninfo.master_capable) and
constants.NV_OOB_PATHS in nresult):
for path_result in nresult[constants.NV_OOB_PATHS]:
- self._ErrorIf(path_result, self.ENODEOOBPATH, node, path_result)
+ self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
"""Verifies and updates the node volume data.
if vg_name is None:
pass
elif isinstance(lvdata, basestring):
- _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
+ _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
utils.SafeEncode(lvdata))
elif not isinstance(lvdata, dict):
- _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
+ _ErrorIf(True, constants.CV_ENODELVM, node,
+ "rpc call to node failed (lvlist)")
else:
nimg.volumes = lvdata
nimg.lvm_fail = False
"""
idata = nresult.get(constants.NV_INSTANCELIST, None)
test = not isinstance(idata, list)
- self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
- " (instancelist): %s", utils.SafeEncode(str(idata)))
+ self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
+ "rpc call to node failed (instancelist): %s",
+ utils.SafeEncode(str(idata)))
if test:
nimg.hyp_fail = True
else:
# try to read free memory (from the hypervisor)
hv_info = nresult.get(constants.NV_HVINFO, None)
test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
- _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
+ _ErrorIf(test, constants.CV_ENODEHV, node,
+ "rpc call to node failed (hvinfo)")
if not test:
try:
nimg.mfree = int(hv_info["memory_free"])
except (ValueError, TypeError):
- _ErrorIf(True, self.ENODERPC, node,
+ _ErrorIf(True, constants.CV_ENODERPC, node,
"node returned invalid nodeinfo, check hypervisor")
# FIXME: devise a free space model for file based instances as well
if vg_name is not None:
test = (constants.NV_VGLIST not in nresult or
vg_name not in nresult[constants.NV_VGLIST])
- _ErrorIf(test, self.ENODELVM, node,
+ _ErrorIf(test, constants.CV_ENODELVM, node,
"node didn't return data for the volume group '%s'"
" - it is either missing or broken", vg_name)
if not test:
try:
nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
except (ValueError, TypeError):
- _ErrorIf(True, self.ENODERPC, node,
+ _ErrorIf(True, constants.CV_ENODERPC, node,
"node returned invalid LVM info, check LVM status")
def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
data = len(disks) * [(False, "node offline")]
else:
msg = nres.fail_msg
- _ErrorIf(msg, self.ENODERPC, nname,
+ _ErrorIf(msg, constants.CV_ENODERPC, nname,
"while getting disk information: %s", msg)
if msg:
# No data from this node
feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
+ user_scripts = []
+ if self.cfg.GetUseExternalMipScript():
+ user_scripts.append(constants.EXTERNAL_MASTER_SETUP_SCRIPT)
+
node_verify_param = {
constants.NV_FILELIST:
utils.UniqueSequence(filename
constants.NV_MASTERIP: (master_node, master_ip),
constants.NV_OSLIST: None,
constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
+ constants.NV_USERSCRIPTS: user_scripts,
}
if vg_name is not None:
feedback_fn("* Verifying node %s (%s)" % (node, ntype))
msg = all_nvinfo[node].fail_msg
- _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
+ _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
+ msg)
if msg:
nimg.rpc_fail = True
continue
nimg.call_ok = self._VerifyNode(node_i, nresult)
self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
self._VerifyNodeNetwork(node_i, nresult)
+ self._VerifyNodeUserScripts(node_i, nresult)
self._VerifyOob(node_i, nresult)
if nimg.vm_capable:
for inst in non_primary_inst:
test = inst in self.all_inst_info
- _ErrorIf(test, self.EINSTANCEWRONGNODE, inst,
+ _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
"instance should not run on node %s", node_i.name)
- _ErrorIf(not test, self.ENODEORPHANINSTANCE, node_i.name,
+ _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
"node is running unknown instance %s", inst)
for node, result in extra_lv_nvinfo.items():
pnode = inst_config.primary_node
pnode_img = node_image[pnode]
_ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
- self.ENODERPC, pnode, "instance %s, connection to"
+ constants.CV_ENODERPC, pnode, "instance %s, connection to"
" primary node failed", instance)
_ErrorIf(inst_config.admin_up and pnode_img.offline,
- self.EINSTANCEBADNODE, instance,
+ constants.CV_EINSTANCEBADNODE, instance,
"instance is marked as running and lives on offline node %s",
inst_config.primary_node)
if not inst_config.secondary_nodes:
i_non_redundant.append(instance)
- _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
+ _ErrorIf(len(inst_config.secondary_nodes) > 1,
+ constants.CV_EINSTANCELAYOUT,
instance, "instance has multiple secondary nodes: %s",
utils.CommaJoin(inst_config.secondary_nodes),
code=self.ETYPE_WARNING)
key=lambda (_, nodes): pnode in nodes,
reverse=True)]
- self._ErrorIf(len(instance_groups) > 1, self.EINSTANCESPLITGROUPS,
+ self._ErrorIf(len(instance_groups) > 1,
+ constants.CV_EINSTANCESPLITGROUPS,
instance, "instance has primary and secondary nodes in"
" different groups: %s", utils.CommaJoin(pretty_list),
code=self.ETYPE_WARNING)
for snode in inst_config.secondary_nodes:
s_img = node_image[snode]
- _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
- "instance %s, connection to secondary node failed", instance)
+ _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
+ snode, "instance %s, connection to secondary node failed",
+ instance)
if s_img.offline:
inst_nodes_offline.append(snode)
# warn that the instance lives on offline nodes
- _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
+ _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
"instance has offline secondary node(s) %s",
utils.CommaJoin(inst_nodes_offline))
# ... or ghost/non-vm_capable nodes
for node in inst_config.all_nodes:
- _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
- "instance lives on ghost node %s", node)
- _ErrorIf(not node_image[node].vm_capable, self.EINSTANCEBADNODE,
+ _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
+ instance, "instance lives on ghost node %s", node)
+ _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
instance, "instance lives on non-vm_capable node %s", node)
feedback_fn("* Verifying orphan volumes")
res = hooks_results[node_name]
msg = res.fail_msg
test = msg and not res.offline
- self._ErrorIf(test, self.ENODEHOOKS, node_name,
+ self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
"Communication failure in hooks execution: %s", msg)
if res.offline or msg:
# No need to investigate payload if node is offline or gave
continue
for script, hkr, output in res.payload:
test = hkr == constants.HKR_FAIL
- self._ErrorIf(test, self.ENODEHOOKS, node_name,
+ self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
"Script %s failed, output:", script)
if test:
output = self._HOOKS_INDENT_RE.sub(" ", output)
# any leftover items in nv_dict are missing LVs, let's arrange the data
# better
for key, inst in nv_dict.iteritems():
- res_missing.setdefault(inst, []).append(key)
+ res_missing.setdefault(inst, []).append(list(key))
return (res_nodes, list(res_instances), res_missing)
if self.op.instances:
self.wanted_names = _GetWantedInstances(self, self.op.instances)
self.needed_locks = {
- locking.LEVEL_NODE: [],
+ locking.LEVEL_NODE_RES: [],
locking.LEVEL_INSTANCE: self.wanted_names,
}
- self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
+ self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
else:
self.wanted_names = None
self.needed_locks = {
- locking.LEVEL_NODE: locking.ALL_SET,
+ locking.LEVEL_NODE_RES: locking.ALL_SET,
locking.LEVEL_INSTANCE: locking.ALL_SET,
}
self.share_locks = _ShareAll()
def DeclareLocks(self, level):
- if level == locking.LEVEL_NODE and self.wanted_names is not None:
- self._LockInstancesNodes(primary_only=True)
+ if level == locking.LEVEL_NODE_RES and self.wanted_names is not None:
+ self._LockInstancesNodes(primary_only=True, level=level)
def CheckPrereq(self):
"""Check prerequisites.
for idx, disk in enumerate(instance.disks):
per_node_disks[pnode].append((instance, idx, disk))
+ assert not (frozenset(per_node_disks.keys()) -
+ self.owned_locks(locking.LEVEL_NODE_RES)), \
+ "Not owning correct locks"
+ assert not self.owned_locks(locking.LEVEL_NODE)
+
changed = []
for node, dskl in per_node_disks.items():
newl = [v[2].Copy() for v in dskl]
"""
clustername = self.op.name
- ip = self.ip
+ new_ip = self.ip
# shutdown the master IP
- master = self.cfg.GetMasterNode()
- result = self.rpc.call_node_deactivate_master_ip(master)
+ master_params = self.cfg.GetMasterNetworkParameters()
+ ems = self.cfg.GetUseExternalMipScript()
+ result = self.rpc.call_node_deactivate_master_ip(master_params.name,
+ master_params, ems)
result.Raise("Could not disable the master role")
try:
cluster = self.cfg.GetClusterInfo()
cluster.cluster_name = clustername
- cluster.master_ip = ip
+ cluster.master_ip = new_ip
self.cfg.Update(cluster, feedback_fn)
# update the known hosts file
ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
node_list = self.cfg.GetOnlineNodeList()
try:
- node_list.remove(master)
+ node_list.remove(master_params.name)
except ValueError:
pass
_UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
finally:
- result = self.rpc.call_node_activate_master_ip(master)
+ master_params.ip = new_ip
+ result = self.rpc.call_node_activate_master_ip(master_params.name,
+ master_params, ems)
msg = result.fail_msg
if msg:
self.LogWarning("Could not re-enable the master role on"
if self.op.reserved_lvs is not None:
self.cluster.reserved_lvs = self.op.reserved_lvs
+ if self.op.use_external_mip_script is not None:
+ self.cluster.use_external_mip_script = self.op.use_external_mip_script
+
def helper_os(aname, mods, desc):
desc += " OS list"
lst = getattr(self.cluster, aname)
helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
if self.op.master_netdev:
- master = self.cfg.GetMasterNode()
+ master_params = self.cfg.GetMasterNetworkParameters()
+ ems = self.cfg.GetUseExternalMipScript()
feedback_fn("Shutting down master ip on the current netdev (%s)" %
self.cluster.master_netdev)
- result = self.rpc.call_node_deactivate_master_ip(master)
+ result = self.rpc.call_node_deactivate_master_ip(master_params.name,
+ master_params, ems)
result.Raise("Could not disable the master ip")
feedback_fn("Changing master_netdev from %s to %s" %
- (self.cluster.master_netdev, self.op.master_netdev))
+ (master_params.netdev, self.op.master_netdev))
self.cluster.master_netdev = self.op.master_netdev
if self.op.master_netmask:
- master = self.cfg.GetMasterNode()
+ master_params = self.cfg.GetMasterNetworkParameters()
feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
- result = self.rpc.call_node_change_master_netmask(master,
- self.op.master_netmask)
+ result = self.rpc.call_node_change_master_netmask(master_params.name,
+ master_params.netmask,
+ self.op.master_netmask,
+ master_params.ip,
+ master_params.netdev)
if result.fail_msg:
msg = "Could not change the master IP netmask: %s" % result.fail_msg
- self.LogWarning(msg)
feedback_fn(msg)
- else:
- self.cluster.master_netmask = self.op.master_netmask
+
+ self.cluster.master_netmask = self.op.master_netmask
self.cfg.Update(self.cluster, feedback_fn)
if self.op.master_netdev:
+ master_params = self.cfg.GetMasterNetworkParameters()
feedback_fn("Starting the master ip on the new master netdev (%s)" %
self.op.master_netdev)
- result = self.rpc.call_node_activate_master_ip(master)
+ ems = self.cfg.GetUseExternalMipScript()
+ result = self.rpc.call_node_activate_master_ip(master_params.name,
+ master_params, ems)
if result.fail_msg:
self.LogWarning("Could not re-enable the master ip on"
" the master, please restart manually: %s",
constants.SSH_KNOWN_HOSTS_FILE,
constants.CONFD_HMAC_KEY,
constants.CLUSTER_DOMAIN_SECRET_FILE,
+ constants.SPICE_CERT_FILE,
+ constants.SPICE_CACERT_FILE,
+ constants.RAPI_USERS_FILE,
])
if not redist:
if cluster.modify_etc_hosts:
files_all.add(constants.ETC_HOSTS)
- # Files which must either exist on all nodes or on none
- files_all_opt = set([
+ # Files which are optional, these must:
+ # - be present in one other category as well
+ # - either exist or not exist on all nodes of that category (mc, vm all)
+ files_opt = set([
constants.RAPI_USERS_FILE,
])
# Files which should only be on master candidates
files_mc = set()
+
if not redist:
files_mc.add(constants.CLUSTER_CONF_FILE)
+ # FIXME: this should also be replicated but Ganeti doesn't support files_mc
+ # replication
+ files_mc.add(constants.DEFAULT_MASTER_SETUP_SCRIPT)
+
# Files which should only be on VM-capable nodes
files_vm = set(filename
for hv_name in cluster.enabled_hypervisors
- for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles())
+ for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[0])
+
+ files_opt |= set(filename
+ for hv_name in cluster.enabled_hypervisors
+ for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[1])
- # Filenames must be unique
- assert (len(files_all | files_all_opt | files_mc | files_vm) ==
- sum(map(len, [files_all, files_all_opt, files_mc, files_vm]))), \
+ # Filenames in each category must be unique
+ all_files_set = files_all | files_mc | files_vm
+ assert (len(all_files_set) ==
+ sum(map(len, [files_all, files_mc, files_vm]))), \
"Found file listed in more than one file list"
- return (files_all, files_all_opt, files_mc, files_vm)
+ # Optional files must be present in one other category
+ assert all_files_set.issuperset(files_opt), \
+ "Optional file not in a different required list"
+
+ return (files_all, files_opt, files_mc, files_vm)
def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
nodelist.remove(master_info.name)
# Gather file lists
- (files_all, files_all_opt, files_mc, files_vm) = \
+ (files_all, _, files_mc, files_vm) = \
_ComputeAncillaryFiles(cluster, True)
# Never re-distribute configuration file from here
filemap = [
(online_nodes, files_all),
- (online_nodes, files_all_opt),
(vm_nodes, files_vm),
]
"""Activate the master IP.
"""
- master = self.cfg.GetMasterNode()
- self.rpc.call_node_activate_master_ip(master)
+ master_params = self.cfg.GetMasterNetworkParameters()
+ ems = self.cfg.GetUseExternalMipScript()
+ self.rpc.call_node_activate_master_ip(master_params.name,
+ master_params, ems)
class LUClusterDeactivateMasterIp(NoHooksLU):
"""Deactivate the master IP.
"""
- master = self.cfg.GetMasterNode()
- self.rpc.call_node_deactivate_master_ip(master)
+ master_params = self.cfg.GetMasterNetworkParameters()
+ ems = self.cfg.GetUseExternalMipScript()
+ self.rpc.call_node_deactivate_master_ip(master_params.name, master_params,
+ ems)
def _WaitForSync(lu, instance, disks=None, oneshot=False):
modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
+ assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
+ "Not owning BGL"
+
# Promote nodes to master candidate as needed
_AdjustCandidatePool(self, exceptions=[node.name])
self.context.RemoveNode(node.name)
def ExpandNames(self):
self.nq.ExpandNames(self)
+ def DeclareLocks(self, level):
+ self.nq.DeclareLocks(self, level)
+
def Exec(self, feedback_fn):
return self.nq.OldStyleQuery(self)
selected=self.op.output_fields)
def ExpandNames(self):
+ self.share_locks = _ShareAll()
self.needed_locks = {}
- self.share_locks[locking.LEVEL_NODE] = 1
+
if not self.op.nodes:
self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
else:
selected=self.op.output_fields)
def ExpandNames(self):
+ self.share_locks = _ShareAll()
self.needed_locks = {}
- self.share_locks[locking.LEVEL_NODE] = 1
if self.op.nodes:
self.needed_locks[locking.LEVEL_NODE] = \
def CheckArguments(self):
qcls = _GetQueryImplementation(self.op.what)
- self.impl = qcls(self.op.filter, self.op.fields, self.op.use_locking)
+ self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
def ExpandNames(self):
self.impl.ExpandNames(self)
new_node = self.new_node
node = new_node.name
+ assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
+ "Not owning BGL"
+
# We adding a new node so we assume it's powered
new_node.powered = True
self.lock_all = self.op.auto_promote and self.might_demote
self.lock_instances = self.op.secondary_ip is not None
+ def _InstanceFilter(self, instance):
+ """Filter for getting affected instances.
+
+ """
+ return (instance.disk_template in constants.DTS_INT_MIRROR and
+ self.op.node_name in instance.all_nodes)
+
def ExpandNames(self):
if self.lock_all:
self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
if self.lock_instances:
- self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
-
- def DeclareLocks(self, level):
- # If we have locked all instances, before waiting to lock nodes, release
- # all the ones living on nodes unrelated to the current operation.
- if level == locking.LEVEL_NODE and self.lock_instances:
- self.affected_instances = []
- if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
- instances_keep = []
-
- # Build list of instances to release
- locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
- for instance_name, instance in self.cfg.GetMultiInstanceInfo(locked_i):
- if (instance.disk_template in constants.DTS_INT_MIRROR and
- self.op.node_name in instance.all_nodes):
- instances_keep.append(instance_name)
- self.affected_instances.append(instance)
-
- _ReleaseLocks(self, locking.LEVEL_INSTANCE, keep=instances_keep)
-
- assert (set(self.owned_locks(locking.LEVEL_INSTANCE)) ==
- set(instances_keep))
+ self.needed_locks[locking.LEVEL_INSTANCE] = \
+ frozenset(self.cfg.GetInstancesInfoByFilter(self._InstanceFilter))
def BuildHooksEnv(self):
"""Build hooks env.
"""
node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
+ if self.lock_instances:
+ affected_instances = \
+ self.cfg.GetInstancesInfoByFilter(self._InstanceFilter)
+
+ # Verify instance locks
+ owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
+ wanted_instances = frozenset(affected_instances.keys())
+ if wanted_instances - owned_instances:
+ raise errors.OpPrereqError("Instances affected by changing node %s's"
+ " secondary IP address have changed since"
+ " locks were acquired, wanted '%s', have"
+ " '%s'; retry the operation" %
+ (self.op.node_name,
+ utils.CommaJoin(wanted_instances),
+ utils.CommaJoin(owned_instances)),
+ errors.ECODE_STATE)
+ else:
+ affected_instances = None
+
if (self.op.master_candidate is not None or
self.op.drained is not None or
self.op.offline is not None):
if old_role == self._ROLE_OFFLINE and new_role != old_role:
# Trying to transition out of offline status
- result = self.rpc.call_version([node.name])[node.name]
+ # TODO: Use standard RPC runner, but make sure it works when the node is
+ # still marked offline
+ result = rpc.BootstrapRunner().call_version([node.name])[node.name]
if result.fail_msg:
raise errors.OpPrereqError("Node %s is being de-offlined but fails"
" to report its version: %s" %
raise errors.OpPrereqError("Cannot change the secondary ip on a single"
" homed cluster", errors.ECODE_INVAL)
+ assert not (frozenset(affected_instances) -
+ self.owned_locks(locking.LEVEL_INSTANCE))
+
if node.offline:
- if self.affected_instances:
- raise errors.OpPrereqError("Cannot change secondary ip: offline"
- " node has instances (%s) configured"
- " to use it" % self.affected_instances)
+ if affected_instances:
+ raise errors.OpPrereqError("Cannot change secondary IP address:"
+ " offline node has instances (%s)"
+ " configured to use it" %
+ utils.CommaJoin(affected_instances.keys()))
else:
# On online nodes, check that no instances are running, and that
# the node has the new ip and we can reach it.
- for instance in self.affected_instances:
+ for instance in affected_instances.values():
_CheckInstanceDown(self, instance, "cannot change secondary ip")
_CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
"candidate_pool_size": cluster.candidate_pool_size,
"master_netdev": cluster.master_netdev,
"master_netmask": cluster.master_netmask,
+ "use_external_mip_script": cluster.use_external_mip_script,
"volume_group_name": cluster.volume_group_name,
"drbd_usermode_helper": cluster.drbd_usermode_helper,
"file_storage_dir": cluster.file_storage_dir,
_StartInstanceDisks(self, instance, force)
- result = self.rpc.call_instance_start(node_current, instance,
- self.op.hvparams, self.op.beparams,
- self.op.startup_paused)
+ result = \
+ self.rpc.call_instance_start(node_current,
+ (instance, self.op.hvparams,
+ self.op.beparams),
+ self.op.startup_paused)
msg = result.fail_msg
if msg:
_ShutdownInstanceDisks(self, instance)
self.LogInfo("Instance %s was already stopped, starting now",
instance.name)
_StartInstanceDisks(self, instance, ignore_secondaries)
- result = self.rpc.call_instance_start(node_current, instance,
- None, None, False)
+ result = self.rpc.call_instance_start(node_current,
+ (instance, None, None), False)
msg = result.fail_msg
if msg:
_ShutdownInstanceDisks(self, instance)
try:
feedback_fn("Running the instance OS create scripts...")
# FIXME: pass debug option from opcode to backend
- result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
- self.op.debug_level,
- osparams=self.os_inst)
+ result = self.rpc.call_instance_os_add(inst.primary_node,
+ (inst, self.os_inst), True,
+ self.op.debug_level)
result.Raise("Could not install OS for instance %s on node %s" %
(inst.name, inst.primary_node))
finally:
# otherwise we need to lock all nodes for disk re-creation
primary_only = bool(self.op.nodes)
self._LockInstancesNodes(primary_only=primary_only)
+ elif level == locking.LEVEL_NODE_RES:
+ # Copy node locks
+ self.needed_locks[locking.LEVEL_NODE_RES] = \
+ self.needed_locks[locking.LEVEL_NODE][:]
def BuildHooksEnv(self):
"""Build hooks env.
self.op.instance_name, errors.ECODE_INVAL)
# if we replace nodes *and* the old primary is offline, we don't
# check
- assert instance.primary_node in self.needed_locks[locking.LEVEL_NODE]
+ assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE)
+ assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE_RES)
old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
if not (self.op.nodes and old_pnode.offline):
_CheckInstanceDown(self, instance, "cannot recreate disks")
"""
instance = self.instance
+ assert (self.owned_locks(locking.LEVEL_NODE) ==
+ self.owned_locks(locking.LEVEL_NODE_RES))
+
to_skip = []
mods = [] # keeps track of needed logical_id changes
def ExpandNames(self):
self._ExpandAndLockInstance()
self.needed_locks[locking.LEVEL_NODE] = []
+ self.needed_locks[locking.LEVEL_NODE_RES] = []
self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
def DeclareLocks(self, level):
if level == locking.LEVEL_NODE:
self._LockInstancesNodes()
+ elif level == locking.LEVEL_NODE_RES:
+ # Copy node locks
+ self.needed_locks[locking.LEVEL_NODE_RES] = \
+ self.needed_locks[locking.LEVEL_NODE][:]
def BuildHooksEnv(self):
"""Build hooks env.
" node %s: %s" %
(instance.name, instance.primary_node, msg))
+ assert (self.owned_locks(locking.LEVEL_NODE) ==
+ self.owned_locks(locking.LEVEL_NODE_RES))
+ assert not (set(instance.all_nodes) -
+ self.owned_locks(locking.LEVEL_NODE)), \
+ "Not owning correct locks"
+
_RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
target_node = _ExpandNodeName(self.cfg, self.op.target_node)
self.op.target_node = target_node
self.needed_locks[locking.LEVEL_NODE] = [target_node]
+ self.needed_locks[locking.LEVEL_NODE_RES] = []
self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
def DeclareLocks(self, level):
if level == locking.LEVEL_NODE:
self._LockInstancesNodes(primary_only=True)
+ elif level == locking.LEVEL_NODE_RES:
+ # Copy node locks
+ self.needed_locks[locking.LEVEL_NODE_RES] = \
+ self.needed_locks[locking.LEVEL_NODE][:]
def BuildHooksEnv(self):
"""Build hooks env.
self.LogInfo("Shutting down instance %s on source node %s",
instance.name, source_node)
+ assert (self.owned_locks(locking.LEVEL_NODE) ==
+ self.owned_locks(locking.LEVEL_NODE_RES))
+
result = self.rpc.call_instance_shutdown(source_node, instance,
self.op.shutdown_timeout)
msg = result.fail_msg
_ShutdownInstanceDisks(self, instance)
raise errors.OpExecError("Can't activate the instance's disks")
- result = self.rpc.call_instance_start(target_node, instance,
- None, None, False)
+ result = self.rpc.call_instance_start(target_node,
+ (instance, None, None), False)
msg = result.fail_msg
if msg:
_ShutdownInstanceDisks(self, instance)
self.feedback_fn("* starting the instance on the target node %s" %
target_node)
- result = self.rpc.call_instance_start(target_node, instance, None, None,
+ result = self.rpc.call_instance_start(target_node, (instance, None, None),
False)
msg = result.fail_msg
if msg:
shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
logical_id=(vgnames[0], names[0]))
- dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
+ dev_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
logical_id=(vgnames[1], names[1]))
drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
logical_id=(primary, secondary, port,
constants.DT_DISKLESS: {},
constants.DT_PLAIN: _compute(disks, 0),
# 128 MB are added for drbd metadata for each disk
- constants.DT_DRBD8: _compute(disks, 128),
+ constants.DT_DRBD8: _compute(disks, DRBD_META_SIZE),
constants.DT_FILE: {},
constants.DT_SHARED_FILE: {},
}
constants.DT_DISKLESS: None,
constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
# 128 MB are added for drbd metadata for each disk
- constants.DT_DRBD8: sum(d[constants.IDISK_SIZE] + 128 for d in disks),
+ constants.DT_DRBD8:
+ sum(d[constants.IDISK_SIZE] + DRBD_META_SIZE for d in disks),
constants.DT_FILE: None,
constants.DT_SHARED_FILE: 0,
constants.DT_BLOCK: 0,
"""
nodenames = _FilterVmNodes(lu, nodenames)
- hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
- hvname,
- hvparams)
+
+ cluster = lu.cfg.GetClusterInfo()
+ hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams)
+
+ hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, hvname, hvfull)
for node in nodenames:
info = hvinfo[node]
if info.offline:
"""
nodenames = _FilterVmNodes(lu, nodenames)
- result = lu.rpc.call_os_validate(required, nodenames, osname,
+ result = lu.rpc.call_os_validate(nodenames, required, osname,
[constants.OS_VALIDATE_PARAMETERS],
osparams)
for node, nres in result.items():
self.add_locks[locking.LEVEL_INSTANCE] = instance_name
if self.op.iallocator:
+ # TODO: Find a solution to not lock all nodes in the cluster, e.g. by
+ # specifying a group on instance creation and then selecting nodes from
+ # that group
self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
+ self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
else:
self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
nodelist = [self.op.pnode]
self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
nodelist.append(self.op.snode)
self.needed_locks[locking.LEVEL_NODE] = nodelist
+ # Lock resources of instance's primary and secondary nodes (copy to
+ # prevent accidential modification)
+ self.needed_locks[locking.LEVEL_NODE_RES] = list(nodelist)
# in case of import lock the source node too
if self.op.mode == constants.INSTANCE_IMPORT:
instance = self.op.instance_name
pnode_name = self.pnode.name
+ assert not (self.owned_locks(locking.LEVEL_NODE_RES) -
+ self.owned_locks(locking.LEVEL_NODE)), \
+ "Node locks differ from node resource locks"
+
ht_kind = self.op.hypervisor
if ht_kind in constants.HTS_REQ_PORT:
network_port = self.cfg.AllocatePort()
raise errors.OpExecError("There are some degraded disks for"
" this instance")
+ # Release all node resource locks
+ _ReleaseLocks(self, locking.LEVEL_NODE_RES)
+
if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
if self.op.mode == constants.INSTANCE_CREATE:
if not self.op.no_install:
feedback_fn("* running the instance OS create scripts...")
# FIXME: pass debug option from opcode to backend
os_add_result = \
- self.rpc.call_instance_os_add(pnode_name, iobj, False,
+ self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
self.op.debug_level)
if pause_sync:
feedback_fn("* resuming disk sync")
raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
% self.op.mode)
+ assert not self.owned_locks(locking.LEVEL_NODE_RES)
+
if self.op.start:
iobj.admin_up = True
self.cfg.Update(iobj, feedback_fn)
logging.info("Starting instance %s on node %s", instance, pnode_name)
feedback_fn("* starting instance...")
- result = self.rpc.call_instance_start(pnode_name, iobj,
- None, None, False)
+ result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
+ False)
result.Raise("Could not start instance")
return list(iobj.all_nodes)
REQ_BGL = False
def ExpandNames(self):
+ self.share_locks = _ShareAll()
self._ExpandAndLockInstance()
def CheckPrereq(self):
lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
logical_id=(vg_data, names[0]))
vg_meta = dev.children[1].logical_id[0]
- lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
+ lv_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
logical_id=(vg_meta, names[1]))
new_lvs = [lv_data, lv_meta]
def ExpandNames(self):
self._ExpandAndLockInstance()
self.needed_locks[locking.LEVEL_NODE] = []
- self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
+ self.needed_locks[locking.LEVEL_NODE_RES] = []
+ self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
def DeclareLocks(self, level):
if level == locking.LEVEL_NODE:
self._LockInstancesNodes()
+ elif level == locking.LEVEL_NODE_RES:
+ # Copy node locks
+ self.needed_locks[locking.LEVEL_NODE_RES] = \
+ self.needed_locks[locking.LEVEL_NODE][:]
def BuildHooksEnv(self):
"""Build hooks env.
instance = self.instance
disk = self.disk
+ assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
+ assert (self.owned_locks(locking.LEVEL_NODE) ==
+ self.owned_locks(locking.LEVEL_NODE_RES))
+
disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
if not disks_ok:
raise errors.OpExecError("Cannot activate block device to grow")
+ feedback_fn("Growing disk %s of instance '%s' by %s" %
+ (self.op.disk, instance.name,
+ utils.FormatUnit(self.op.amount, "h")))
+
# First run all grow ops in dry-run mode
for node in instance.all_nodes:
self.cfg.SetDiskID(disk, node)
disk.RecordGrow(self.op.amount)
self.cfg.Update(instance, feedback_fn)
+
+ # Changes have been recorded, release node lock
+ _ReleaseLocks(self, locking.LEVEL_NODE)
+
+ # Downgrade lock while waiting for sync
+ self.glm.downgrade(locking.LEVEL_INSTANCE)
+
if self.op.wait_for_sync:
disk_abort = not _WaitForSync(self, instance, disks=[disk])
if disk_abort:
" not supposed to be running because no wait for"
" sync mode was requested")
+ assert self.owned_locks(locking.LEVEL_NODE_RES)
+ assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
+
class LUInstanceQueryData(NoHooksLU):
"""Query runtime instance data.
not self.op.remove_instance):
assert not activate_disks
feedback_fn("Starting instance %s" % instance.name)
- result = self.rpc.call_instance_start(src_node, instance,
- None, None, False)
+ result = self.rpc.call_instance_start(src_node,
+ (instance, None, None), False)
msg = result.fail_msg
if msg:
feedback_fn("Failed to start instance: %s" % msg)
# pylint: disable=R0902
# lots of instance attributes
- def __init__(self, cfg, rpc, mode, **kwargs):
+ def __init__(self, cfg, rpc_runner, mode, **kwargs):
self.cfg = cfg
- self.rpc = rpc
+ self.rpc = rpc_runner
# init buffer variables
self.in_text = self.out_text = self.in_data = self.out_data = None
# init all input fields so that pylint is happy