30 |
30 |
|
31 |
31 |
import os
|
32 |
32 |
import time
|
33 |
|
import re
|
34 |
33 |
import logging
|
35 |
34 |
import copy
|
36 |
35 |
import OpenSSL
|
37 |
36 |
import itertools
|
38 |
37 |
import operator
|
39 |
38 |
|
40 |
|
from ganeti import ssh
|
41 |
39 |
from ganeti import utils
|
42 |
40 |
from ganeti import errors
|
43 |
41 |
from ganeti import hypervisor
|
44 |
42 |
from ganeti import locking
|
45 |
43 |
from ganeti import constants
|
46 |
44 |
from ganeti import objects
|
47 |
|
from ganeti import ssconf
|
48 |
|
from ganeti import uidpool
|
49 |
45 |
from ganeti import compat
|
50 |
46 |
from ganeti import masterd
|
51 |
47 |
from ganeti import netutils
|
... | ... | |
54 |
50 |
from ganeti import opcodes
|
55 |
51 |
from ganeti import ht
|
56 |
52 |
from ganeti import rpc
|
57 |
|
from ganeti import runtime
|
58 |
53 |
from ganeti import pathutils
|
59 |
|
from ganeti import vcluster
|
60 |
54 |
from ganeti import network
|
61 |
55 |
from ganeti.masterd import iallocator
|
62 |
56 |
|
... | ... | |
64 |
58 |
Tasklet, _QueryBase
|
65 |
59 |
from ganeti.cmdlib.common import _ExpandInstanceName, _ExpandItemName, \
|
66 |
60 |
_ExpandNodeName, _ShareAll, _CheckNodeGroupInstances, _GetWantedNodes, \
|
67 |
|
_GetWantedInstances
|
|
61 |
_GetWantedInstances, _RunPostHook, _RedistributeAncillaryFiles, \
|
|
62 |
_MergeAndVerifyHvState, _MergeAndVerifyDiskState, _GetUpdatedIPolicy, \
|
|
63 |
_ComputeNewInstanceViolations, _GetUpdatedParams, _CheckOSParams, \
|
|
64 |
_CheckHVParams, _AdjustCandidatePool, _CheckNodePVs, \
|
|
65 |
_ComputeIPolicyInstanceViolation, _AnnotateDiskParams, _SupportsOob, \
|
|
66 |
_ComputeIPolicySpecViolation
|
|
67 |
|
|
68 |
from ganeti.cmdlib.cluster import LUClusterActivateMasterIp, \
|
|
69 |
LUClusterDeactivateMasterIp, LUClusterConfigQuery, LUClusterDestroy, \
|
|
70 |
LUClusterPostInit, _ClusterQuery, LUClusterQuery, LUClusterRedistConf, \
|
|
71 |
LUClusterRename, LUClusterRepairDiskSizes, LUClusterSetParams, \
|
|
72 |
LUClusterVerify, LUClusterVerifyConfig, LUClusterVerifyGroup, \
|
|
73 |
LUClusterVerifyDisks
|
68 |
74 |
from ganeti.cmdlib.tags import LUTagsGet, LUTagsSearch, LUTagsSet, LUTagsDel
|
69 |
75 |
from ganeti.cmdlib.network import LUNetworkAdd, LUNetworkRemove, \
|
70 |
76 |
LUNetworkSetParams, _NetworkQuery, LUNetworkQuery, LUNetworkConnect, \
|
... | ... | |
85 |
91 |
]))
|
86 |
92 |
|
87 |
93 |
|
88 |
|
def _AnnotateDiskParams(instance, devs, cfg):
|
89 |
|
"""Little helper wrapper to the rpc annotation method.
|
90 |
|
|
91 |
|
@param instance: The instance object
|
92 |
|
@type devs: List of L{objects.Disk}
|
93 |
|
@param devs: The root devices (not any of its children!)
|
94 |
|
@param cfg: The config object
|
95 |
|
@returns The annotated disk copies
|
96 |
|
@see L{rpc.AnnotateDiskParams}
|
97 |
|
|
98 |
|
"""
|
99 |
|
return rpc.AnnotateDiskParams(instance.disk_template, devs,
|
100 |
|
cfg.GetInstanceDiskParams(instance))
|
101 |
|
|
102 |
|
|
103 |
94 |
def _CheckInstancesNodeGroups(cfg, instances, owned_groups, owned_nodes,
|
104 |
95 |
cur_group_uuid):
|
105 |
96 |
"""Checks if node groups for locked instances are still correct.
|
... | ... | |
155 |
146 |
return inst_groups
|
156 |
147 |
|
157 |
148 |
|
158 |
|
def _SupportsOob(cfg, node):
|
159 |
|
"""Tells if node supports OOB.
|
160 |
|
|
161 |
|
@type cfg: L{config.ConfigWriter}
|
162 |
|
@param cfg: The cluster configuration
|
163 |
|
@type node: L{objects.Node}
|
164 |
|
@param node: The node
|
165 |
|
@return: The OOB script if supported or an empty string otherwise
|
166 |
|
|
167 |
|
"""
|
168 |
|
return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
|
169 |
|
|
170 |
|
|
171 |
149 |
def _IsExclusiveStorageEnabledNode(cfg, node):
|
172 |
150 |
"""Whether exclusive_storage is in effect for the given node.
|
173 |
151 |
|
... | ... | |
213 |
191 |
return names[:]
|
214 |
192 |
|
215 |
193 |
|
216 |
|
def _GetUpdatedParams(old_params, update_dict,
|
217 |
|
use_default=True, use_none=False):
|
218 |
|
"""Return the new version of a parameter dictionary.
|
219 |
|
|
220 |
|
@type old_params: dict
|
221 |
|
@param old_params: old parameters
|
222 |
|
@type update_dict: dict
|
223 |
|
@param update_dict: dict containing new parameter values, or
|
224 |
|
constants.VALUE_DEFAULT to reset the parameter to its default
|
225 |
|
value
|
226 |
|
@param use_default: boolean
|
227 |
|
@type use_default: whether to recognise L{constants.VALUE_DEFAULT}
|
228 |
|
values as 'to be deleted' values
|
229 |
|
@param use_none: boolean
|
230 |
|
@type use_none: whether to recognise C{None} values as 'to be
|
231 |
|
deleted' values
|
232 |
|
@rtype: dict
|
233 |
|
@return: the new parameter dictionary
|
234 |
|
|
235 |
|
"""
|
236 |
|
params_copy = copy.deepcopy(old_params)
|
237 |
|
for key, val in update_dict.iteritems():
|
238 |
|
if ((use_default and val == constants.VALUE_DEFAULT) or
|
239 |
|
(use_none and val is None)):
|
240 |
|
try:
|
241 |
|
del params_copy[key]
|
242 |
|
except KeyError:
|
243 |
|
pass
|
244 |
|
else:
|
245 |
|
params_copy[key] = val
|
246 |
|
return params_copy
|
247 |
|
|
248 |
|
|
249 |
|
def _GetUpdatedIPolicy(old_ipolicy, new_ipolicy, group_policy=False):
|
250 |
|
"""Return the new version of an instance policy.
|
251 |
|
|
252 |
|
@param group_policy: whether this policy applies to a group and thus
|
253 |
|
we should support removal of policy entries
|
254 |
|
|
255 |
|
"""
|
256 |
|
ipolicy = copy.deepcopy(old_ipolicy)
|
257 |
|
for key, value in new_ipolicy.items():
|
258 |
|
if key not in constants.IPOLICY_ALL_KEYS:
|
259 |
|
raise errors.OpPrereqError("Invalid key in new ipolicy: %s" % key,
|
260 |
|
errors.ECODE_INVAL)
|
261 |
|
if (not value or value == [constants.VALUE_DEFAULT] or
|
262 |
|
value == constants.VALUE_DEFAULT):
|
263 |
|
if group_policy:
|
264 |
|
if key in ipolicy:
|
265 |
|
del ipolicy[key]
|
266 |
|
else:
|
267 |
|
raise errors.OpPrereqError("Can't unset ipolicy attribute '%s'"
|
268 |
|
" on the cluster'" % key,
|
269 |
|
errors.ECODE_INVAL)
|
270 |
|
else:
|
271 |
|
if key in constants.IPOLICY_PARAMETERS:
|
272 |
|
# FIXME: we assume all such values are float
|
273 |
|
try:
|
274 |
|
ipolicy[key] = float(value)
|
275 |
|
except (TypeError, ValueError), err:
|
276 |
|
raise errors.OpPrereqError("Invalid value for attribute"
|
277 |
|
" '%s': '%s', error: %s" %
|
278 |
|
(key, value, err), errors.ECODE_INVAL)
|
279 |
|
elif key == constants.ISPECS_MINMAX:
|
280 |
|
for minmax in value:
|
281 |
|
for k in minmax.keys():
|
282 |
|
utils.ForceDictType(minmax[k], constants.ISPECS_PARAMETER_TYPES)
|
283 |
|
ipolicy[key] = value
|
284 |
|
elif key == constants.ISPECS_STD:
|
285 |
|
if group_policy:
|
286 |
|
msg = "%s cannot appear in group instance specs" % key
|
287 |
|
raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
|
288 |
|
ipolicy[key] = _GetUpdatedParams(old_ipolicy.get(key, {}), value,
|
289 |
|
use_none=False, use_default=False)
|
290 |
|
utils.ForceDictType(ipolicy[key], constants.ISPECS_PARAMETER_TYPES)
|
291 |
|
else:
|
292 |
|
# FIXME: we assume all others are lists; this should be redone
|
293 |
|
# in a nicer way
|
294 |
|
ipolicy[key] = list(value)
|
295 |
|
try:
|
296 |
|
objects.InstancePolicy.CheckParameterSyntax(ipolicy, not group_policy)
|
297 |
|
except errors.ConfigurationError, err:
|
298 |
|
raise errors.OpPrereqError("Invalid instance policy: %s" % err,
|
299 |
|
errors.ECODE_INVAL)
|
300 |
|
return ipolicy
|
301 |
|
|
302 |
|
|
303 |
|
def _UpdateAndVerifySubDict(base, updates, type_check):
|
304 |
|
"""Updates and verifies a dict with sub dicts of the same type.
|
305 |
|
|
306 |
|
@param base: The dict with the old data
|
307 |
|
@param updates: The dict with the new data
|
308 |
|
@param type_check: Dict suitable to ForceDictType to verify correct types
|
309 |
|
@returns: A new dict with updated and verified values
|
310 |
|
|
311 |
|
"""
|
312 |
|
def fn(old, value):
|
313 |
|
new = _GetUpdatedParams(old, value)
|
314 |
|
utils.ForceDictType(new, type_check)
|
315 |
|
return new
|
316 |
|
|
317 |
|
ret = copy.deepcopy(base)
|
318 |
|
ret.update(dict((key, fn(base.get(key, {}), value))
|
319 |
|
for key, value in updates.items()))
|
320 |
|
return ret
|
321 |
|
|
322 |
|
|
323 |
|
def _MergeAndVerifyHvState(op_input, obj_input):
|
324 |
|
"""Combines the hv state from an opcode with the one of the object
|
325 |
|
|
326 |
|
@param op_input: The input dict from the opcode
|
327 |
|
@param obj_input: The input dict from the objects
|
328 |
|
@return: The verified and updated dict
|
329 |
|
|
330 |
|
"""
|
331 |
|
if op_input:
|
332 |
|
invalid_hvs = set(op_input) - constants.HYPER_TYPES
|
333 |
|
if invalid_hvs:
|
334 |
|
raise errors.OpPrereqError("Invalid hypervisor(s) in hypervisor state:"
|
335 |
|
" %s" % utils.CommaJoin(invalid_hvs),
|
336 |
|
errors.ECODE_INVAL)
|
337 |
|
if obj_input is None:
|
338 |
|
obj_input = {}
|
339 |
|
type_check = constants.HVSTS_PARAMETER_TYPES
|
340 |
|
return _UpdateAndVerifySubDict(obj_input, op_input, type_check)
|
341 |
|
|
342 |
|
return None
|
343 |
|
|
344 |
|
|
345 |
|
def _MergeAndVerifyDiskState(op_input, obj_input):
|
346 |
|
"""Combines the disk state from an opcode with the one of the object
|
347 |
|
|
348 |
|
@param op_input: The input dict from the opcode
|
349 |
|
@param obj_input: The input dict from the objects
|
350 |
|
@return: The verified and updated dict
|
351 |
|
"""
|
352 |
|
if op_input:
|
353 |
|
invalid_dst = set(op_input) - constants.DS_VALID_TYPES
|
354 |
|
if invalid_dst:
|
355 |
|
raise errors.OpPrereqError("Invalid storage type(s) in disk state: %s" %
|
356 |
|
utils.CommaJoin(invalid_dst),
|
357 |
|
errors.ECODE_INVAL)
|
358 |
|
type_check = constants.DSS_PARAMETER_TYPES
|
359 |
|
if obj_input is None:
|
360 |
|
obj_input = {}
|
361 |
|
return dict((key, _UpdateAndVerifySubDict(obj_input.get(key, {}), value,
|
362 |
|
type_check))
|
363 |
|
for key, value in op_input.items())
|
364 |
|
|
365 |
|
return None
|
366 |
|
|
367 |
|
|
368 |
194 |
def _ReleaseLocks(lu, level, names=None, keep=None):
|
369 |
195 |
"""Releases locks owned by an LU.
|
370 |
196 |
|
... | ... | |
428 |
254 |
for vol in vols)
|
429 |
255 |
|
430 |
256 |
|
431 |
|
def _RunPostHook(lu, node_name):
|
432 |
|
"""Runs the post-hook for an opcode on a single node.
|
433 |
|
|
434 |
|
"""
|
435 |
|
hm = lu.proc.BuildHooksManager(lu)
|
436 |
|
try:
|
437 |
|
hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
|
438 |
|
except Exception, err: # pylint: disable=W0703
|
439 |
|
lu.LogWarning("Errors occurred running hooks on %s: %s",
|
440 |
|
node_name, err)
|
441 |
|
|
442 |
|
|
443 |
257 |
def _CheckOutputFields(static, dynamic, selected):
|
444 |
258 |
"""Checks whether all selected fields are valid.
|
445 |
259 |
|
... | ... | |
574 |
388 |
raise errors.OpExecError(msg)
|
575 |
389 |
|
576 |
390 |
|
577 |
|
def _CheckNodePVs(nresult, exclusive_storage):
|
578 |
|
"""Check node PVs.
|
579 |
|
|
580 |
|
"""
|
581 |
|
pvlist_dict = nresult.get(constants.NV_PVLIST, None)
|
582 |
|
if pvlist_dict is None:
|
583 |
|
return (["Can't get PV list from node"], None)
|
584 |
|
pvlist = map(objects.LvmPvInfo.FromDict, pvlist_dict)
|
585 |
|
errlist = []
|
586 |
|
# check that ':' is not present in PV names, since it's a
|
587 |
|
# special character for lvcreate (denotes the range of PEs to
|
588 |
|
# use on the PV)
|
589 |
|
for pv in pvlist:
|
590 |
|
if ":" in pv.name:
|
591 |
|
errlist.append("Invalid character ':' in PV '%s' of VG '%s'" %
|
592 |
|
(pv.name, pv.vg_name))
|
593 |
|
es_pvinfo = None
|
594 |
|
if exclusive_storage:
|
595 |
|
(errmsgs, es_pvinfo) = utils.LvmExclusiveCheckNodePvs(pvlist)
|
596 |
|
errlist.extend(errmsgs)
|
597 |
|
shared_pvs = nresult.get(constants.NV_EXCLUSIVEPVS, None)
|
598 |
|
if shared_pvs:
|
599 |
|
for (pvname, lvlist) in shared_pvs:
|
600 |
|
# TODO: Check that LVs are really unrelated (snapshots, DRBD meta...)
|
601 |
|
errlist.append("PV %s is shared among unrelated LVs (%s)" %
|
602 |
|
(pvname, utils.CommaJoin(lvlist)))
|
603 |
|
return (errlist, es_pvinfo)
|
604 |
|
|
605 |
|
|
606 |
391 |
def _GetClusterDomainSecret():
|
607 |
392 |
"""Reads the cluster domain secret.
|
608 |
393 |
|
... | ... | |
642 |
427 |
" is down")
|
643 |
428 |
|
644 |
429 |
|
645 |
|
def _ComputeMinMaxSpec(name, qualifier, ispecs, value):
|
646 |
|
"""Computes if value is in the desired range.
|
647 |
|
|
648 |
|
@param name: name of the parameter for which we perform the check
|
649 |
|
@param qualifier: a qualifier used in the error message (e.g. 'disk/1',
|
650 |
|
not just 'disk')
|
651 |
|
@param ispecs: dictionary containing min and max values
|
652 |
|
@param value: actual value that we want to use
|
653 |
|
@return: None or an error string
|
654 |
|
|
655 |
|
"""
|
656 |
|
if value in [None, constants.VALUE_AUTO]:
|
657 |
|
return None
|
658 |
|
max_v = ispecs[constants.ISPECS_MAX].get(name, value)
|
659 |
|
min_v = ispecs[constants.ISPECS_MIN].get(name, value)
|
660 |
|
if value > max_v or min_v > value:
|
661 |
|
if qualifier:
|
662 |
|
fqn = "%s/%s" % (name, qualifier)
|
663 |
|
else:
|
664 |
|
fqn = name
|
665 |
|
return ("%s value %s is not in range [%s, %s]" %
|
666 |
|
(fqn, value, min_v, max_v))
|
667 |
|
return None
|
668 |
|
|
669 |
|
|
670 |
|
def _ComputeIPolicySpecViolation(ipolicy, mem_size, cpu_count, disk_count,
|
671 |
|
nic_count, disk_sizes, spindle_use,
|
672 |
|
disk_template,
|
673 |
|
_compute_fn=_ComputeMinMaxSpec):
|
674 |
|
"""Verifies ipolicy against provided specs.
|
675 |
|
|
676 |
|
@type ipolicy: dict
|
677 |
|
@param ipolicy: The ipolicy
|
678 |
|
@type mem_size: int
|
679 |
|
@param mem_size: The memory size
|
680 |
|
@type cpu_count: int
|
681 |
|
@param cpu_count: Used cpu cores
|
682 |
|
@type disk_count: int
|
683 |
|
@param disk_count: Number of disks used
|
684 |
|
@type nic_count: int
|
685 |
|
@param nic_count: Number of nics used
|
686 |
|
@type disk_sizes: list of ints
|
687 |
|
@param disk_sizes: Disk sizes of used disk (len must match C{disk_count})
|
688 |
|
@type spindle_use: int
|
689 |
|
@param spindle_use: The number of spindles this instance uses
|
690 |
|
@type disk_template: string
|
691 |
|
@param disk_template: The disk template of the instance
|
692 |
|
@param _compute_fn: The compute function (unittest only)
|
693 |
|
@return: A list of violations, or an empty list of no violations are found
|
694 |
|
|
695 |
|
"""
|
696 |
|
assert disk_count == len(disk_sizes)
|
697 |
|
|
698 |
|
test_settings = [
|
699 |
|
(constants.ISPEC_MEM_SIZE, "", mem_size),
|
700 |
|
(constants.ISPEC_CPU_COUNT, "", cpu_count),
|
701 |
|
(constants.ISPEC_NIC_COUNT, "", nic_count),
|
702 |
|
(constants.ISPEC_SPINDLE_USE, "", spindle_use),
|
703 |
|
] + [(constants.ISPEC_DISK_SIZE, str(idx), d)
|
704 |
|
for idx, d in enumerate(disk_sizes)]
|
705 |
|
if disk_template != constants.DT_DISKLESS:
|
706 |
|
# This check doesn't make sense for diskless instances
|
707 |
|
test_settings.append((constants.ISPEC_DISK_COUNT, "", disk_count))
|
708 |
|
ret = []
|
709 |
|
allowed_dts = ipolicy[constants.IPOLICY_DTS]
|
710 |
|
if disk_template not in allowed_dts:
|
711 |
|
ret.append("Disk template %s is not allowed (allowed templates: %s)" %
|
712 |
|
(disk_template, utils.CommaJoin(allowed_dts)))
|
713 |
|
|
714 |
|
min_errs = None
|
715 |
|
for minmax in ipolicy[constants.ISPECS_MINMAX]:
|
716 |
|
errs = filter(None,
|
717 |
|
(_compute_fn(name, qualifier, minmax, value)
|
718 |
|
for (name, qualifier, value) in test_settings))
|
719 |
|
if min_errs is None or len(errs) < len(min_errs):
|
720 |
|
min_errs = errs
|
721 |
|
assert min_errs is not None
|
722 |
|
return ret + min_errs
|
723 |
|
|
724 |
|
|
725 |
|
def _ComputeIPolicyInstanceViolation(ipolicy, instance, cfg,
|
726 |
|
_compute_fn=_ComputeIPolicySpecViolation):
|
727 |
|
"""Compute if instance meets the specs of ipolicy.
|
728 |
|
|
729 |
|
@type ipolicy: dict
|
730 |
|
@param ipolicy: The ipolicy to verify against
|
731 |
|
@type instance: L{objects.Instance}
|
732 |
|
@param instance: The instance to verify
|
733 |
|
@type cfg: L{config.ConfigWriter}
|
734 |
|
@param cfg: Cluster configuration
|
735 |
|
@param _compute_fn: The function to verify ipolicy (unittest only)
|
736 |
|
@see: L{_ComputeIPolicySpecViolation}
|
737 |
|
|
738 |
|
"""
|
739 |
|
be_full = cfg.GetClusterInfo().FillBE(instance)
|
740 |
|
mem_size = be_full[constants.BE_MAXMEM]
|
741 |
|
cpu_count = be_full[constants.BE_VCPUS]
|
742 |
|
spindle_use = be_full[constants.BE_SPINDLE_USE]
|
743 |
|
disk_count = len(instance.disks)
|
744 |
|
disk_sizes = [disk.size for disk in instance.disks]
|
745 |
|
nic_count = len(instance.nics)
|
746 |
|
disk_template = instance.disk_template
|
747 |
|
|
748 |
|
return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
|
749 |
|
disk_sizes, spindle_use, disk_template)
|
750 |
|
|
751 |
|
|
752 |
430 |
def _ComputeIPolicyInstanceSpecViolation(
|
753 |
431 |
ipolicy, instance_spec, disk_template,
|
754 |
432 |
_compute_fn=_ComputeIPolicySpecViolation):
|
... | ... | |
822 |
500 |
raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
|
823 |
501 |
|
824 |
502 |
|
825 |
|
def _ComputeNewInstanceViolations(old_ipolicy, new_ipolicy, instances, cfg):
|
826 |
|
"""Computes a set of any instances that would violate the new ipolicy.
|
827 |
|
|
828 |
|
@param old_ipolicy: The current (still in-place) ipolicy
|
829 |
|
@param new_ipolicy: The new (to become) ipolicy
|
830 |
|
@param instances: List of instances to verify
|
831 |
|
@type cfg: L{config.ConfigWriter}
|
832 |
|
@param cfg: Cluster configuration
|
833 |
|
@return: A list of instances which violates the new ipolicy but
|
834 |
|
did not before
|
835 |
|
|
836 |
|
"""
|
837 |
|
return (_ComputeViolatingInstances(new_ipolicy, instances, cfg) -
|
838 |
|
_ComputeViolatingInstances(old_ipolicy, instances, cfg))
|
839 |
|
|
840 |
|
|
841 |
503 |
def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
|
842 |
504 |
minmem, maxmem, vcpus, nics, disk_template, disks,
|
843 |
505 |
bep, hvp, hypervisor_name, tags):
|
... | ... | |
1022 |
684 |
return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
|
1023 |
685 |
|
1024 |
686 |
|
1025 |
|
def _AdjustCandidatePool(lu, exceptions):
|
1026 |
|
"""Adjust the candidate pool after node operations.
|
1027 |
|
|
1028 |
|
"""
|
1029 |
|
mod_list = lu.cfg.MaintainCandidatePool(exceptions)
|
1030 |
|
if mod_list:
|
1031 |
|
lu.LogInfo("Promoted nodes to master candidate role: %s",
|
1032 |
|
utils.CommaJoin(node.name for node in mod_list))
|
1033 |
|
for name in mod_list:
|
1034 |
|
lu.context.ReaddNode(name)
|
1035 |
|
mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
|
1036 |
|
if mc_now > mc_max:
|
1037 |
|
lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
|
1038 |
|
(mc_now, mc_max))
|
1039 |
|
|
1040 |
|
|
1041 |
687 |
def _DecideSelfPromotion(lu, exceptions=None):
|
1042 |
688 |
"""Decide whether I should promote myself as a master candidate.
|
1043 |
689 |
|
... | ... | |
1049 |
695 |
return mc_now < mc_should
|
1050 |
696 |
|
1051 |
697 |
|
1052 |
|
def _ComputeViolatingInstances(ipolicy, instances, cfg):
|
1053 |
|
"""Computes a set of instances who violates given ipolicy.
|
1054 |
|
|
1055 |
|
@param ipolicy: The ipolicy to verify
|
1056 |
|
@type instances: L{objects.Instance}
|
1057 |
|
@param instances: List of instances to verify
|
1058 |
|
@type cfg: L{config.ConfigWriter}
|
1059 |
|
@param cfg: Cluster configuration
|
1060 |
|
@return: A frozenset of instance names violating the ipolicy
|
1061 |
|
|
1062 |
|
"""
|
1063 |
|
return frozenset([inst.name for inst in instances
|
1064 |
|
if _ComputeIPolicyInstanceViolation(ipolicy, inst, cfg)])
|
1065 |
|
|
1066 |
|
|
1067 |
698 |
def _CheckNicsBridgesExist(lu, target_nics, target_node):
|
1068 |
699 |
"""Check that the brigdes needed by a list of nics exist.
|
1069 |
700 |
|
... | ... | |
1173 |
804 |
"""Check the sanity of iallocator and node arguments and use the
|
1174 |
805 |
cluster-wide iallocator if appropriate.
|
1175 |
806 |
|
1176 |
|
Check that at most one of (iallocator, node) is specified. If none is
|
1177 |
|
specified, or the iallocator is L{constants.DEFAULT_IALLOCATOR_SHORTCUT},
|
1178 |
|
then the LU's opcode's iallocator slot is filled with the cluster-wide
|
1179 |
|
default iallocator.
|
1180 |
|
|
1181 |
|
@type iallocator_slot: string
|
1182 |
|
@param iallocator_slot: the name of the opcode iallocator slot
|
1183 |
|
@type node_slot: string
|
1184 |
|
@param node_slot: the name of the opcode target node slot
|
1185 |
|
|
1186 |
|
"""
|
1187 |
|
node = getattr(lu.op, node_slot, None)
|
1188 |
|
ialloc = getattr(lu.op, iallocator_slot, None)
|
1189 |
|
if node == []:
|
1190 |
|
node = None
|
1191 |
|
|
1192 |
|
if node is not None and ialloc is not None:
|
1193 |
|
raise errors.OpPrereqError("Do not specify both, iallocator and node",
|
1194 |
|
errors.ECODE_INVAL)
|
1195 |
|
elif ((node is None and ialloc is None) or
|
1196 |
|
ialloc == constants.DEFAULT_IALLOCATOR_SHORTCUT):
|
1197 |
|
default_iallocator = lu.cfg.GetDefaultIAllocator()
|
1198 |
|
if default_iallocator:
|
1199 |
|
setattr(lu.op, iallocator_slot, default_iallocator)
|
1200 |
|
else:
|
1201 |
|
raise errors.OpPrereqError("No iallocator or node given and no"
|
1202 |
|
" cluster-wide default iallocator found;"
|
1203 |
|
" please specify either an iallocator or a"
|
1204 |
|
" node, or set a cluster-wide default"
|
1205 |
|
" iallocator", errors.ECODE_INVAL)
|
1206 |
|
|
1207 |
|
|
1208 |
|
def _GetDefaultIAllocator(cfg, ialloc):
|
1209 |
|
"""Decides on which iallocator to use.
|
1210 |
|
|
1211 |
|
@type cfg: L{config.ConfigWriter}
|
1212 |
|
@param cfg: Cluster configuration object
|
1213 |
|
@type ialloc: string or None
|
1214 |
|
@param ialloc: Iallocator specified in opcode
|
1215 |
|
@rtype: string
|
1216 |
|
@return: Iallocator name
|
1217 |
|
|
1218 |
|
"""
|
1219 |
|
if not ialloc:
|
1220 |
|
# Use default iallocator
|
1221 |
|
ialloc = cfg.GetDefaultIAllocator()
|
1222 |
|
|
1223 |
|
if not ialloc:
|
1224 |
|
raise errors.OpPrereqError("No iallocator was specified, neither in the"
|
1225 |
|
" opcode nor as a cluster-wide default",
|
1226 |
|
errors.ECODE_INVAL)
|
1227 |
|
|
1228 |
|
return ialloc
|
1229 |
|
|
1230 |
|
|
1231 |
|
def _CheckHostnameSane(lu, name):
|
1232 |
|
"""Ensures that a given hostname resolves to a 'sane' name.
|
1233 |
|
|
1234 |
|
The given name is required to be a prefix of the resolved hostname,
|
1235 |
|
to prevent accidental mismatches.
|
1236 |
|
|
1237 |
|
@param lu: the logical unit on behalf of which we're checking
|
1238 |
|
@param name: the name we should resolve and check
|
1239 |
|
@return: the resolved hostname object
|
1240 |
|
|
1241 |
|
"""
|
1242 |
|
hostname = netutils.GetHostname(name=name)
|
1243 |
|
if hostname.name != name:
|
1244 |
|
lu.LogInfo("Resolved given name '%s' to '%s'", name, hostname.name)
|
1245 |
|
if not utils.MatchNameComponent(name, [hostname.name]):
|
1246 |
|
raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
|
1247 |
|
" same as given hostname '%s'") %
|
1248 |
|
(hostname.name, name), errors.ECODE_INVAL)
|
1249 |
|
return hostname
|
1250 |
|
|
1251 |
|
|
1252 |
|
class LUClusterPostInit(LogicalUnit):
|
1253 |
|
"""Logical unit for running hooks after cluster initialization.
|
1254 |
|
|
1255 |
|
"""
|
1256 |
|
HPATH = "cluster-init"
|
1257 |
|
HTYPE = constants.HTYPE_CLUSTER
|
1258 |
|
|
1259 |
|
def BuildHooksEnv(self):
|
1260 |
|
"""Build hooks env.
|
1261 |
|
|
1262 |
|
"""
|
1263 |
|
return {
|
1264 |
|
"OP_TARGET": self.cfg.GetClusterName(),
|
1265 |
|
}
|
1266 |
|
|
1267 |
|
def BuildHooksNodes(self):
|
1268 |
|
"""Build hooks nodes.
|
1269 |
|
|
1270 |
|
"""
|
1271 |
|
return ([], [self.cfg.GetMasterNode()])
|
1272 |
|
|
1273 |
|
def Exec(self, feedback_fn):
|
1274 |
|
"""Nothing to do.
|
1275 |
|
|
1276 |
|
"""
|
1277 |
|
return True
|
1278 |
|
|
1279 |
|
|
1280 |
|
class LUClusterDestroy(LogicalUnit):
|
1281 |
|
"""Logical unit for destroying the cluster.
|
1282 |
|
|
1283 |
|
"""
|
1284 |
|
HPATH = "cluster-destroy"
|
1285 |
|
HTYPE = constants.HTYPE_CLUSTER
|
1286 |
|
|
1287 |
|
def BuildHooksEnv(self):
|
1288 |
|
"""Build hooks env.
|
1289 |
|
|
1290 |
|
"""
|
1291 |
|
return {
|
1292 |
|
"OP_TARGET": self.cfg.GetClusterName(),
|
1293 |
|
}
|
1294 |
|
|
1295 |
|
def BuildHooksNodes(self):
|
1296 |
|
"""Build hooks nodes.
|
1297 |
|
|
1298 |
|
"""
|
1299 |
|
return ([], [])
|
1300 |
|
|
1301 |
|
def CheckPrereq(self):
|
1302 |
|
"""Check prerequisites.
|
1303 |
|
|
1304 |
|
This checks whether the cluster is empty.
|
1305 |
|
|
1306 |
|
Any errors are signaled by raising errors.OpPrereqError.
|
1307 |
|
|
1308 |
|
"""
|
1309 |
|
master = self.cfg.GetMasterNode()
|
1310 |
|
|
1311 |
|
nodelist = self.cfg.GetNodeList()
|
1312 |
|
if len(nodelist) != 1 or nodelist[0] != master:
|
1313 |
|
raise errors.OpPrereqError("There are still %d node(s) in"
|
1314 |
|
" this cluster." % (len(nodelist) - 1),
|
1315 |
|
errors.ECODE_INVAL)
|
1316 |
|
instancelist = self.cfg.GetInstanceList()
|
1317 |
|
if instancelist:
|
1318 |
|
raise errors.OpPrereqError("There are still %d instance(s) in"
|
1319 |
|
" this cluster." % len(instancelist),
|
1320 |
|
errors.ECODE_INVAL)
|
1321 |
|
|
1322 |
|
def Exec(self, feedback_fn):
|
1323 |
|
"""Destroys the cluster.
|
1324 |
|
|
1325 |
|
"""
|
1326 |
|
master_params = self.cfg.GetMasterNetworkParameters()
|
1327 |
|
|
1328 |
|
# Run post hooks on master node before it's removed
|
1329 |
|
_RunPostHook(self, master_params.name)
|
1330 |
|
|
1331 |
|
ems = self.cfg.GetUseExternalMipScript()
|
1332 |
|
result = self.rpc.call_node_deactivate_master_ip(master_params.name,
|
1333 |
|
master_params, ems)
|
1334 |
|
if result.fail_msg:
|
1335 |
|
self.LogWarning("Error disabling the master IP address: %s",
|
1336 |
|
result.fail_msg)
|
1337 |
|
|
1338 |
|
return master_params.name
|
1339 |
|
|
1340 |
|
|
1341 |
|
def _VerifyCertificate(filename):
|
1342 |
|
"""Verifies a certificate for L{LUClusterVerifyConfig}.
|
1343 |
|
|
1344 |
|
@type filename: string
|
1345 |
|
@param filename: Path to PEM file
|
1346 |
|
|
1347 |
|
"""
|
1348 |
|
try:
|
1349 |
|
cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
|
1350 |
|
utils.ReadFile(filename))
|
1351 |
|
except Exception, err: # pylint: disable=W0703
|
1352 |
|
return (LUClusterVerifyConfig.ETYPE_ERROR,
|
1353 |
|
"Failed to load X509 certificate %s: %s" % (filename, err))
|
1354 |
|
|
1355 |
|
(errcode, msg) = \
|
1356 |
|
utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
|
1357 |
|
constants.SSL_CERT_EXPIRATION_ERROR)
|
1358 |
|
|
1359 |
|
if msg:
|
1360 |
|
fnamemsg = "While verifying %s: %s" % (filename, msg)
|
1361 |
|
else:
|
1362 |
|
fnamemsg = None
|
1363 |
|
|
1364 |
|
if errcode is None:
|
1365 |
|
return (None, fnamemsg)
|
1366 |
|
elif errcode == utils.CERT_WARNING:
|
1367 |
|
return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
|
1368 |
|
elif errcode == utils.CERT_ERROR:
|
1369 |
|
return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
|
1370 |
|
|
1371 |
|
raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
|
1372 |
|
|
1373 |
|
|
1374 |
|
def _GetAllHypervisorParameters(cluster, instances):
|
1375 |
|
"""Compute the set of all hypervisor parameters.
|
1376 |
|
|
1377 |
|
@type cluster: L{objects.Cluster}
|
1378 |
|
@param cluster: the cluster object
|
1379 |
|
@param instances: list of L{objects.Instance}
|
1380 |
|
@param instances: additional instances from which to obtain parameters
|
1381 |
|
@rtype: list of (origin, hypervisor, parameters)
|
1382 |
|
@return: a list with all parameters found, indicating the hypervisor they
|
1383 |
|
apply to, and the origin (can be "cluster", "os X", or "instance Y")
|
1384 |
|
|
1385 |
|
"""
|
1386 |
|
hvp_data = []
|
1387 |
|
|
1388 |
|
for hv_name in cluster.enabled_hypervisors:
|
1389 |
|
hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
|
1390 |
|
|
1391 |
|
for os_name, os_hvp in cluster.os_hvp.items():
|
1392 |
|
for hv_name, hv_params in os_hvp.items():
|
1393 |
|
if hv_params:
|
1394 |
|
full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
|
1395 |
|
hvp_data.append(("os %s" % os_name, hv_name, full_params))
|
1396 |
|
|
1397 |
|
# TODO: collapse identical parameter values in a single one
|
1398 |
|
for instance in instances:
|
1399 |
|
if instance.hvparams:
|
1400 |
|
hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
|
1401 |
|
cluster.FillHV(instance)))
|
1402 |
|
|
1403 |
|
return hvp_data
|
1404 |
|
|
1405 |
|
|
1406 |
|
class _VerifyErrors(object):
|
1407 |
|
"""Mix-in for cluster/group verify LUs.
|
1408 |
|
|
1409 |
|
It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
|
1410 |
|
self.op and self._feedback_fn to be available.)
|
1411 |
|
|
1412 |
|
"""
|
1413 |
|
|
1414 |
|
ETYPE_FIELD = "code"
|
1415 |
|
ETYPE_ERROR = "ERROR"
|
1416 |
|
ETYPE_WARNING = "WARNING"
|
1417 |
|
|
1418 |
|
def _Error(self, ecode, item, msg, *args, **kwargs):
|
1419 |
|
"""Format an error message.
|
1420 |
|
|
1421 |
|
Based on the opcode's error_codes parameter, either format a
|
1422 |
|
parseable error code, or a simpler error string.
|
1423 |
|
|
1424 |
|
This must be called only from Exec and functions called from Exec.
|
1425 |
|
|
1426 |
|
"""
|
1427 |
|
ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
|
1428 |
|
itype, etxt, _ = ecode
|
1429 |
|
# If the error code is in the list of ignored errors, demote the error to a
|
1430 |
|
# warning
|
1431 |
|
if etxt in self.op.ignore_errors: # pylint: disable=E1101
|
1432 |
|
ltype = self.ETYPE_WARNING
|
1433 |
|
# first complete the msg
|
1434 |
|
if args:
|
1435 |
|
msg = msg % args
|
1436 |
|
# then format the whole message
|
1437 |
|
if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
|
1438 |
|
msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
|
1439 |
|
else:
|
1440 |
|
if item:
|
1441 |
|
item = " " + item
|
1442 |
|
else:
|
1443 |
|
item = ""
|
1444 |
|
msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
|
1445 |
|
# and finally report it via the feedback_fn
|
1446 |
|
self._feedback_fn(" - %s" % msg) # Mix-in. pylint: disable=E1101
|
1447 |
|
# do not mark the operation as failed for WARN cases only
|
1448 |
|
if ltype == self.ETYPE_ERROR:
|
1449 |
|
self.bad = True
|
1450 |
|
|
1451 |
|
def _ErrorIf(self, cond, *args, **kwargs):
|
1452 |
|
"""Log an error message if the passed condition is True.
|
1453 |
|
|
1454 |
|
"""
|
1455 |
|
if (bool(cond)
|
1456 |
|
or self.op.debug_simulate_errors): # pylint: disable=E1101
|
1457 |
|
self._Error(*args, **kwargs)
|
1458 |
|
|
1459 |
|
|
1460 |
|
class LUClusterVerify(NoHooksLU):
|
1461 |
|
"""Submits all jobs necessary to verify the cluster.
|
1462 |
|
|
1463 |
|
"""
|
1464 |
|
REQ_BGL = False
|
1465 |
|
|
1466 |
|
def ExpandNames(self):
|
1467 |
|
self.needed_locks = {}
|
1468 |
|
|
1469 |
|
def Exec(self, feedback_fn):
|
1470 |
|
jobs = []
|
1471 |
|
|
1472 |
|
if self.op.group_name:
|
1473 |
|
groups = [self.op.group_name]
|
1474 |
|
depends_fn = lambda: None
|
1475 |
|
else:
|
1476 |
|
groups = self.cfg.GetNodeGroupList()
|
1477 |
|
|
1478 |
|
# Verify global configuration
|
1479 |
|
jobs.append([
|
1480 |
|
opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors),
|
1481 |
|
])
|
1482 |
|
|
1483 |
|
# Always depend on global verification
|
1484 |
|
depends_fn = lambda: [(-len(jobs), [])]
|
1485 |
|
|
1486 |
|
jobs.extend(
|
1487 |
|
[opcodes.OpClusterVerifyGroup(group_name=group,
|
1488 |
|
ignore_errors=self.op.ignore_errors,
|
1489 |
|
depends=depends_fn())]
|
1490 |
|
for group in groups)
|
1491 |
|
|
1492 |
|
# Fix up all parameters
|
1493 |
|
for op in itertools.chain(*jobs): # pylint: disable=W0142
|
1494 |
|
op.debug_simulate_errors = self.op.debug_simulate_errors
|
1495 |
|
op.verbose = self.op.verbose
|
1496 |
|
op.error_codes = self.op.error_codes
|
1497 |
|
try:
|
1498 |
|
op.skip_checks = self.op.skip_checks
|
1499 |
|
except AttributeError:
|
1500 |
|
assert not isinstance(op, opcodes.OpClusterVerifyGroup)
|
1501 |
|
|
1502 |
|
return ResultWithJobs(jobs)
|
1503 |
|
|
1504 |
|
|
1505 |
|
class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
|
1506 |
|
"""Verifies the cluster config.
|
1507 |
|
|
1508 |
|
"""
|
1509 |
|
REQ_BGL = False
|
1510 |
|
|
1511 |
|
def _VerifyHVP(self, hvp_data):
|
1512 |
|
"""Verifies locally the syntax of the hypervisor parameters.
|
1513 |
|
|
1514 |
|
"""
|
1515 |
|
for item, hv_name, hv_params in hvp_data:
|
1516 |
|
msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
|
1517 |
|
(item, hv_name))
|
1518 |
|
try:
|
1519 |
|
hv_class = hypervisor.GetHypervisorClass(hv_name)
|
1520 |
|
utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
|
1521 |
|
hv_class.CheckParameterSyntax(hv_params)
|
1522 |
|
except errors.GenericError, err:
|
1523 |
|
self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
|
1524 |
|
|
1525 |
|
def ExpandNames(self):
|
1526 |
|
self.needed_locks = dict.fromkeys(locking.LEVELS, locking.ALL_SET)
|
1527 |
|
self.share_locks = _ShareAll()
|
1528 |
|
|
1529 |
|
def CheckPrereq(self):
|
1530 |
|
"""Check prerequisites.
|
1531 |
|
|
1532 |
|
"""
|
1533 |
|
# Retrieve all information
|
1534 |
|
self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
|
1535 |
|
self.all_node_info = self.cfg.GetAllNodesInfo()
|
1536 |
|
self.all_inst_info = self.cfg.GetAllInstancesInfo()
|
1537 |
|
|
1538 |
|
def Exec(self, feedback_fn):
|
1539 |
|
"""Verify integrity of cluster, performing various test on nodes.
|
1540 |
|
|
1541 |
|
"""
|
1542 |
|
self.bad = False
|
1543 |
|
self._feedback_fn = feedback_fn
|
1544 |
|
|
1545 |
|
feedback_fn("* Verifying cluster config")
|
1546 |
|
|
1547 |
|
for msg in self.cfg.VerifyConfig():
|
1548 |
|
self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
|
1549 |
|
|
1550 |
|
feedback_fn("* Verifying cluster certificate files")
|
1551 |
|
|
1552 |
|
for cert_filename in pathutils.ALL_CERT_FILES:
|
1553 |
|
(errcode, msg) = _VerifyCertificate(cert_filename)
|
1554 |
|
self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
|
1555 |
|
|
1556 |
|
feedback_fn("* Verifying hypervisor parameters")
|
1557 |
|
|
1558 |
|
self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
|
1559 |
|
self.all_inst_info.values()))
|
1560 |
|
|
1561 |
|
feedback_fn("* Verifying all nodes belong to an existing group")
|
1562 |
|
|
1563 |
|
# We do this verification here because, should this bogus circumstance
|
1564 |
|
# occur, it would never be caught by VerifyGroup, which only acts on
|
1565 |
|
# nodes/instances reachable from existing node groups.
|
1566 |
|
|
1567 |
|
dangling_nodes = set(node.name for node in self.all_node_info.values()
|
1568 |
|
if node.group not in self.all_group_info)
|
1569 |
|
|
1570 |
|
dangling_instances = {}
|
1571 |
|
no_node_instances = []
|
1572 |
|
|
1573 |
|
for inst in self.all_inst_info.values():
|
1574 |
|
if inst.primary_node in dangling_nodes:
|
1575 |
|
dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
|
1576 |
|
elif inst.primary_node not in self.all_node_info:
|
1577 |
|
no_node_instances.append(inst.name)
|
1578 |
|
|
1579 |
|
pretty_dangling = [
|
1580 |
|
"%s (%s)" %
|
1581 |
|
(node.name,
|
1582 |
|
utils.CommaJoin(dangling_instances.get(node.name,
|
1583 |
|
["no instances"])))
|
1584 |
|
for node in dangling_nodes]
|
1585 |
|
|
1586 |
|
self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
|
1587 |
|
None,
|
1588 |
|
"the following nodes (and their instances) belong to a non"
|
1589 |
|
" existing group: %s", utils.CommaJoin(pretty_dangling))
|
1590 |
|
|
1591 |
|
self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
|
1592 |
|
None,
|
1593 |
|
"the following instances have a non-existing primary-node:"
|
1594 |
|
" %s", utils.CommaJoin(no_node_instances))
|
1595 |
|
|
1596 |
|
return not self.bad
|
1597 |
|
|
1598 |
|
|
1599 |
|
class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
|
1600 |
|
"""Verifies the status of a node group.
|
1601 |
|
|
1602 |
|
"""
|
1603 |
|
HPATH = "cluster-verify"
|
1604 |
|
HTYPE = constants.HTYPE_CLUSTER
|
1605 |
|
REQ_BGL = False
|
1606 |
|
|
1607 |
|
_HOOKS_INDENT_RE = re.compile("^", re.M)
|
1608 |
|
|
1609 |
|
class NodeImage(object):
|
1610 |
|
"""A class representing the logical and physical status of a node.
|
1611 |
|
|
1612 |
|
@type name: string
|
1613 |
|
@ivar name: the node name to which this object refers
|
1614 |
|
@ivar volumes: a structure as returned from
|
1615 |
|
L{ganeti.backend.GetVolumeList} (runtime)
|
1616 |
|
@ivar instances: a list of running instances (runtime)
|
1617 |
|
@ivar pinst: list of configured primary instances (config)
|
1618 |
|
@ivar sinst: list of configured secondary instances (config)
|
1619 |
|
@ivar sbp: dictionary of {primary-node: list of instances} for all
|
1620 |
|
instances for which this node is secondary (config)
|
1621 |
|
@ivar mfree: free memory, as reported by hypervisor (runtime)
|
1622 |
|
@ivar dfree: free disk, as reported by the node (runtime)
|
1623 |
|
@ivar offline: the offline status (config)
|
1624 |
|
@type rpc_fail: boolean
|
1625 |
|
@ivar rpc_fail: whether the RPC verify call was successfull (overall,
|
1626 |
|
not whether the individual keys were correct) (runtime)
|
1627 |
|
@type lvm_fail: boolean
|
1628 |
|
@ivar lvm_fail: whether the RPC call didn't return valid LVM data
|
1629 |
|
@type hyp_fail: boolean
|
1630 |
|
@ivar hyp_fail: whether the RPC call didn't return the instance list
|
1631 |
|
@type ghost: boolean
|
1632 |
|
@ivar ghost: whether this is a known node or not (config)
|
1633 |
|
@type os_fail: boolean
|
1634 |
|
@ivar os_fail: whether the RPC call didn't return valid OS data
|
1635 |
|
@type oslist: list
|
1636 |
|
@ivar oslist: list of OSes as diagnosed by DiagnoseOS
|
1637 |
|
@type vm_capable: boolean
|
1638 |
|
@ivar vm_capable: whether the node can host instances
|
1639 |
|
@type pv_min: float
|
1640 |
|
@ivar pv_min: size in MiB of the smallest PVs
|
1641 |
|
@type pv_max: float
|
1642 |
|
@ivar pv_max: size in MiB of the biggest PVs
|
1643 |
|
|
1644 |
|
"""
|
1645 |
|
def __init__(self, offline=False, name=None, vm_capable=True):
|
1646 |
|
self.name = name
|
1647 |
|
self.volumes = {}
|
1648 |
|
self.instances = []
|
1649 |
|
self.pinst = []
|
1650 |
|
self.sinst = []
|
1651 |
|
self.sbp = {}
|
1652 |
|
self.mfree = 0
|
1653 |
|
self.dfree = 0
|
1654 |
|
self.offline = offline
|
1655 |
|
self.vm_capable = vm_capable
|
1656 |
|
self.rpc_fail = False
|
1657 |
|
self.lvm_fail = False
|
1658 |
|
self.hyp_fail = False
|
1659 |
|
self.ghost = False
|
1660 |
|
self.os_fail = False
|
1661 |
|
self.oslist = {}
|
1662 |
|
self.pv_min = None
|
1663 |
|
self.pv_max = None
|
1664 |
|
|
1665 |
|
def ExpandNames(self):
|
1666 |
|
# This raises errors.OpPrereqError on its own:
|
1667 |
|
self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
|
1668 |
|
|
1669 |
|
# Get instances in node group; this is unsafe and needs verification later
|
1670 |
|
inst_names = \
|
1671 |
|
self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
|
1672 |
|
|
1673 |
|
self.needed_locks = {
|
1674 |
|
locking.LEVEL_INSTANCE: inst_names,
|
1675 |
|
locking.LEVEL_NODEGROUP: [self.group_uuid],
|
1676 |
|
locking.LEVEL_NODE: [],
|
1677 |
|
|
1678 |
|
# This opcode is run by watcher every five minutes and acquires all nodes
|
1679 |
|
# for a group. It doesn't run for a long time, so it's better to acquire
|
1680 |
|
# the node allocation lock as well.
|
1681 |
|
locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
|
1682 |
|
}
|
1683 |
|
|
1684 |
|
self.share_locks = _ShareAll()
|
1685 |
|
|
1686 |
|
def DeclareLocks(self, level):
|
1687 |
|
if level == locking.LEVEL_NODE:
|
1688 |
|
# Get members of node group; this is unsafe and needs verification later
|
1689 |
|
nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
|
1690 |
|
|
1691 |
|
all_inst_info = self.cfg.GetAllInstancesInfo()
|
1692 |
|
|
1693 |
|
# In Exec(), we warn about mirrored instances that have primary and
|
1694 |
|
# secondary living in separate node groups. To fully verify that
|
1695 |
|
# volumes for these instances are healthy, we will need to do an
|
1696 |
|
# extra call to their secondaries. We ensure here those nodes will
|
1697 |
|
# be locked.
|
1698 |
|
for inst in self.owned_locks(locking.LEVEL_INSTANCE):
|
1699 |
|
# Important: access only the instances whose lock is owned
|
1700 |
|
if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
|
1701 |
|
nodes.update(all_inst_info[inst].secondary_nodes)
|
1702 |
|
|
1703 |
|
self.needed_locks[locking.LEVEL_NODE] = nodes
|
1704 |
|
|
1705 |
|
def CheckPrereq(self):
|
1706 |
|
assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
|
1707 |
|
self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
|
1708 |
|
|
1709 |
|
group_nodes = set(self.group_info.members)
|
1710 |
|
group_instances = \
|
1711 |
|
self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
|
1712 |
|
|
1713 |
|
unlocked_nodes = \
|
1714 |
|
group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
|
1715 |
|
|
1716 |
|
unlocked_instances = \
|
1717 |
|
group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
|
1718 |
|
|
1719 |
|
if unlocked_nodes:
|
1720 |
|
raise errors.OpPrereqError("Missing lock for nodes: %s" %
|
1721 |
|
utils.CommaJoin(unlocked_nodes),
|
1722 |
|
errors.ECODE_STATE)
|
1723 |
|
|
1724 |
|
if unlocked_instances:
|
1725 |
|
raise errors.OpPrereqError("Missing lock for instances: %s" %
|
1726 |
|
utils.CommaJoin(unlocked_instances),
|
1727 |
|
errors.ECODE_STATE)
|
1728 |
|
|
1729 |
|
self.all_node_info = self.cfg.GetAllNodesInfo()
|
1730 |
|
self.all_inst_info = self.cfg.GetAllInstancesInfo()
|
1731 |
|
|
1732 |
|
self.my_node_names = utils.NiceSort(group_nodes)
|
1733 |
|
self.my_inst_names = utils.NiceSort(group_instances)
|
1734 |
|
|
1735 |
|
self.my_node_info = dict((name, self.all_node_info[name])
|
1736 |
|
for name in self.my_node_names)
|
1737 |
|
|
1738 |
|
self.my_inst_info = dict((name, self.all_inst_info[name])
|
1739 |
|
for name in self.my_inst_names)
|
1740 |
|
|
1741 |
|
# We detect here the nodes that will need the extra RPC calls for verifying
|
1742 |
|
# split LV volumes; they should be locked.
|
1743 |
|
extra_lv_nodes = set()
|
1744 |
|
|
1745 |
|
for inst in self.my_inst_info.values():
|
1746 |
|
if inst.disk_template in constants.DTS_INT_MIRROR:
|
1747 |
|
for nname in inst.all_nodes:
|
1748 |
|
if self.all_node_info[nname].group != self.group_uuid:
|
1749 |
|
extra_lv_nodes.add(nname)
|
1750 |
|
|
1751 |
|
unlocked_lv_nodes = \
|
1752 |
|
extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
|
1753 |
|
|
1754 |
|
if unlocked_lv_nodes:
|
1755 |
|
raise errors.OpPrereqError("Missing node locks for LV check: %s" %
|
1756 |
|
utils.CommaJoin(unlocked_lv_nodes),
|
1757 |
|
errors.ECODE_STATE)
|
1758 |
|
self.extra_lv_nodes = list(extra_lv_nodes)
|
1759 |
|
|
1760 |
|
def _VerifyNode(self, ninfo, nresult):
|
1761 |
|
"""Perform some basic validation on data returned from a node.
|
1762 |
|
|
1763 |
|
- check the result data structure is well formed and has all the
|
1764 |
|
mandatory fields
|
1765 |
|
- check ganeti version
|
1766 |
|
|
1767 |
|
@type ninfo: L{objects.Node}
|
1768 |
|
@param ninfo: the node to check
|
1769 |
|
@param nresult: the results from the node
|
1770 |
|
@rtype: boolean
|
1771 |
|
@return: whether overall this call was successful (and we can expect
|
1772 |
|
reasonable values in the respose)
|
1773 |
|
|
1774 |
|
"""
|
1775 |
|
node = ninfo.name
|
1776 |
|
_ErrorIf = self._ErrorIf # pylint: disable=C0103
|
1777 |
|
|
1778 |
|
# main result, nresult should be a non-empty dict
|
1779 |
|
test = not nresult or not isinstance(nresult, dict)
|
1780 |
|
_ErrorIf(test, constants.CV_ENODERPC, node,
|
1781 |
|
"unable to verify node: no data returned")
|
1782 |
|
if test:
|
1783 |
|
return False
|
1784 |
|
|
1785 |
|
# compares ganeti version
|
1786 |
|
local_version = constants.PROTOCOL_VERSION
|
1787 |
|
remote_version = nresult.get("version", None)
|
1788 |
|
test = not (remote_version and
|
1789 |
|
isinstance(remote_version, (list, tuple)) and
|
1790 |
|
len(remote_version) == 2)
|
1791 |
|
_ErrorIf(test, constants.CV_ENODERPC, node,
|
1792 |
|
"connection to node returned invalid data")
|
1793 |
|
if test:
|
1794 |
|
return False
|
1795 |
|
|
1796 |
|
test = local_version != remote_version[0]
|
1797 |
|
_ErrorIf(test, constants.CV_ENODEVERSION, node,
|
1798 |
|
"incompatible protocol versions: master %s,"
|
1799 |
|
" node %s", local_version, remote_version[0])
|
1800 |
|
if test:
|
1801 |
|
return False
|
1802 |
|
|
1803 |
|
# node seems compatible, we can actually try to look into its results
|
1804 |
|
|
1805 |
|
# full package version
|
1806 |
|
self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
|
1807 |
|
constants.CV_ENODEVERSION, node,
|
1808 |
|
"software version mismatch: master %s, node %s",
|
1809 |
|
constants.RELEASE_VERSION, remote_version[1],
|
1810 |
|
code=self.ETYPE_WARNING)
|
1811 |
|
|
1812 |
|
hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
|
1813 |
|
if ninfo.vm_capable and isinstance(hyp_result, dict):
|
1814 |
|
for hv_name, hv_result in hyp_result.iteritems():
|
1815 |
|
test = hv_result is not None
|
1816 |
|
_ErrorIf(test, constants.CV_ENODEHV, node,
|
1817 |
|
"hypervisor %s verify failure: '%s'", hv_name, hv_result)
|
1818 |
|
|
1819 |
|
hvp_result = nresult.get(constants.NV_HVPARAMS, None)
|
1820 |
|
if ninfo.vm_capable and isinstance(hvp_result, list):
|
1821 |
|
for item, hv_name, hv_result in hvp_result:
|
1822 |
|
_ErrorIf(True, constants.CV_ENODEHV, node,
|
1823 |
|
"hypervisor %s parameter verify failure (source %s): %s",
|
1824 |
|
hv_name, item, hv_result)
|
1825 |
|
|
1826 |
|
test = nresult.get(constants.NV_NODESETUP,
|
1827 |
|
["Missing NODESETUP results"])
|
1828 |
|
_ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
|
1829 |
|
"; ".join(test))
|
1830 |
|
|
1831 |
|
return True
|
1832 |
|
|
1833 |
|
def _VerifyNodeTime(self, ninfo, nresult,
|
1834 |
|
nvinfo_starttime, nvinfo_endtime):
|
1835 |
|
"""Check the node time.
|
1836 |
|
|
1837 |
|
@type ninfo: L{objects.Node}
|
1838 |
|
@param ninfo: the node to check
|
1839 |
|
@param nresult: the remote results for the node
|
1840 |
|
@param nvinfo_starttime: the start time of the RPC call
|
1841 |
|
@param nvinfo_endtime: the end time of the RPC call
|
1842 |
|
|
1843 |
|
"""
|
1844 |
|
node = ninfo.name
|
1845 |
|
_ErrorIf = self._ErrorIf # pylint: disable=C0103
|
1846 |
|
|
1847 |
|
ntime = nresult.get(constants.NV_TIME, None)
|
1848 |
|
try:
|
1849 |
|
ntime_merged = utils.MergeTime(ntime)
|
1850 |
|
except (ValueError, TypeError):
|
1851 |
|
_ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
|
1852 |
|
return
|
1853 |
|
|
1854 |
|
if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
|
1855 |
|
ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
|
1856 |
|
elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
|
1857 |
|
ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
|
1858 |
|
else:
|
1859 |
|
ntime_diff = None
|
1860 |
|
|
1861 |
|
_ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
|
1862 |
|
"Node time diverges by at least %s from master node time",
|
1863 |
|
ntime_diff)
|
1864 |
|
|
1865 |
|
def _UpdateVerifyNodeLVM(self, ninfo, nresult, vg_name, nimg):
|
1866 |
|
"""Check the node LVM results and update info for cross-node checks.
|
1867 |
|
|
1868 |
|
@type ninfo: L{objects.Node}
|
1869 |
|
@param ninfo: the node to check
|
1870 |
|
@param nresult: the remote results for the node
|
1871 |
|
@param vg_name: the configured VG name
|
1872 |
|
@type nimg: L{NodeImage}
|
1873 |
|
@param nimg: node image
|
1874 |
|
|
1875 |
|
"""
|
1876 |
|
if vg_name is None:
|
1877 |
|
return
|
1878 |
|
|
1879 |
|
node = ninfo.name
|
1880 |
|
_ErrorIf = self._ErrorIf # pylint: disable=C0103
|
1881 |
|
|
1882 |
|
# checks vg existence and size > 20G
|
1883 |
|
vglist = nresult.get(constants.NV_VGLIST, None)
|
1884 |
|
test = not vglist
|
1885 |
|
_ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
|
1886 |
|
if not test:
|
1887 |
|
vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
|
1888 |
|
constants.MIN_VG_SIZE)
|
1889 |
|
_ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
|
1890 |
|
|
1891 |
|
# Check PVs
|
1892 |
|
(errmsgs, pvminmax) = _CheckNodePVs(nresult, self._exclusive_storage)
|
1893 |
|
for em in errmsgs:
|
1894 |
|
self._Error(constants.CV_ENODELVM, node, em)
|
1895 |
|
if pvminmax is not None:
|
1896 |
|
(nimg.pv_min, nimg.pv_max) = pvminmax
|
1897 |
|
|
1898 |
|
def _VerifyGroupLVM(self, node_image, vg_name):
|
1899 |
|
"""Check cross-node consistency in LVM.
|
1900 |
|
|
1901 |
|
@type node_image: dict
|
1902 |
|
@param node_image: info about nodes, mapping from node to names to
|
1903 |
|
L{NodeImage} objects
|
1904 |
|
@param vg_name: the configured VG name
|
1905 |
|
|
1906 |
|
"""
|
1907 |
|
if vg_name is None:
|
1908 |
|
return
|
1909 |
|
|
1910 |
|
# Only exlcusive storage needs this kind of checks
|
1911 |
|
if not self._exclusive_storage:
|
1912 |
|
return
|
1913 |
|
|
1914 |
|
# exclusive_storage wants all PVs to have the same size (approximately),
|
1915 |
|
# if the smallest and the biggest ones are okay, everything is fine.
|
1916 |
|
# pv_min is None iff pv_max is None
|
1917 |
|
vals = filter((lambda ni: ni.pv_min is not None), node_image.values())
|
1918 |
|
if not vals:
|
1919 |
|
return
|
1920 |
|
(pvmin, minnode) = min((ni.pv_min, ni.name) for ni in vals)
|
1921 |
|
(pvmax, maxnode) = max((ni.pv_max, ni.name) for ni in vals)
|
1922 |
|
bad = utils.LvmExclusiveTestBadPvSizes(pvmin, pvmax)
|
1923 |
|
self._ErrorIf(bad, constants.CV_EGROUPDIFFERENTPVSIZE, self.group_info.name,
|
1924 |
|
"PV sizes differ too much in the group; smallest (%s MB) is"
|
1925 |
|
" on %s, biggest (%s MB) is on %s",
|
1926 |
|
pvmin, minnode, pvmax, maxnode)
|
1927 |
|
|
1928 |
|
def _VerifyNodeBridges(self, ninfo, nresult, bridges):
|
1929 |
|
"""Check the node bridges.
|
1930 |
|
|
1931 |
|
@type ninfo: L{objects.Node}
|
1932 |
|
@param ninfo: the node to check
|
1933 |
|
@param nresult: the remote results for the node
|
1934 |
|
@param bridges: the expected list of bridges
|
1935 |
|
|
1936 |
|
"""
|
1937 |
|
if not bridges:
|
1938 |
|
return
|
1939 |
|
|
1940 |
|
node = ninfo.name
|
1941 |
|
_ErrorIf = self._ErrorIf # pylint: disable=C0103
|
1942 |
|
|
1943 |
|
missing = nresult.get(constants.NV_BRIDGES, None)
|
1944 |
|
test = not isinstance(missing, list)
|
1945 |
|
_ErrorIf(test, constants.CV_ENODENET, node,
|
1946 |
|
"did not return valid bridge information")
|
1947 |
|
if not test:
|
1948 |
|
_ErrorIf(bool(missing), constants.CV_ENODENET, node,
|
1949 |
|
"missing bridges: %s" % utils.CommaJoin(sorted(missing)))
|
1950 |
|
|
1951 |
|
def _VerifyNodeUserScripts(self, ninfo, nresult):
|
1952 |
|
"""Check the results of user scripts presence and executability on the node
|
1953 |
|
|
1954 |
|
@type ninfo: L{objects.Node}
|
1955 |
|
@param ninfo: the node to check
|
1956 |
|
@param nresult: the remote results for the node
|
1957 |
|
|
1958 |
|
"""
|
1959 |
|
node = ninfo.name
|
1960 |
|
|
1961 |
|
test = not constants.NV_USERSCRIPTS in nresult
|
1962 |
|
self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node,
|
1963 |
|
"did not return user scripts information")
|
1964 |
|
|
1965 |
|
broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None)
|
1966 |
|
if not test:
|
1967 |
|
self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node,
|
1968 |
|
"user scripts not present or not executable: %s" %
|
1969 |
|
utils.CommaJoin(sorted(broken_scripts)))
|
1970 |
|
|
1971 |
|
def _VerifyNodeNetwork(self, ninfo, nresult):
|
1972 |
|
"""Check the node network connectivity results.
|
1973 |
|
|
1974 |
|
@type ninfo: L{objects.Node}
|
1975 |
|
@param ninfo: the node to check
|
1976 |
|
@param nresult: the remote results for the node
|
1977 |
|
|
1978 |
|
"""
|
1979 |
|
node = ninfo.name
|
1980 |
|
_ErrorIf = self._ErrorIf # pylint: disable=C0103
|
1981 |
|
|
1982 |
|
test = constants.NV_NODELIST not in nresult
|
1983 |
|
_ErrorIf(test, constants.CV_ENODESSH, node,
|
1984 |
|
"node hasn't returned node ssh connectivity data")
|
1985 |
|
if not test:
|
1986 |
|
if nresult[constants.NV_NODELIST]:
|
1987 |
|
for a_node, a_msg in nresult[constants.NV_NODELIST].items():
|
1988 |
|
_ErrorIf(True, constants.CV_ENODESSH, node,
|
1989 |
|
"ssh communication with node '%s': %s", a_node, a_msg)
|
1990 |
|
|
1991 |
|
test = constants.NV_NODENETTEST not in nresult
|
1992 |
|
_ErrorIf(test, constants.CV_ENODENET, node,
|
1993 |
|
"node hasn't returned node tcp connectivity data")
|
1994 |
|
if not test:
|
1995 |
|
if nresult[constants.NV_NODENETTEST]:
|
1996 |
|
nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
|
1997 |
|
for anode in nlist:
|
1998 |
|
_ErrorIf(True, constants.CV_ENODENET, node,
|
1999 |
|
"tcp communication with node '%s': %s",
|
2000 |
|
anode, nresult[constants.NV_NODENETTEST][anode])
|
2001 |
|
|
2002 |
|
test = constants.NV_MASTERIP not in nresult
|
2003 |
|
_ErrorIf(test, constants.CV_ENODENET, node,
|
2004 |
|
"node hasn't returned node master IP reachability data")
|
2005 |
|
if not test:
|
2006 |
|
if not nresult[constants.NV_MASTERIP]:
|
2007 |
|
if node == self.master_node:
|
2008 |
|
msg = "the master node cannot reach the master IP (not configured?)"
|
2009 |
|
else:
|
2010 |
|
msg = "cannot reach the master IP"
|
2011 |
|
_ErrorIf(True, constants.CV_ENODENET, node, msg)
|
2012 |
|
|
2013 |
|
def _VerifyInstance(self, instance, inst_config, node_image,
|
2014 |
|
diskstatus):
|
2015 |
|
"""Verify an instance.
|
2016 |
|
|
2017 |
|
This function checks to see if the required block devices are
|
2018 |
|
available on the instance's node, and that the nodes are in the correct
|
2019 |
|
state.
|
2020 |
|
|
2021 |
|
"""
|
2022 |
|
_ErrorIf = self._ErrorIf # pylint: disable=C0103
|
2023 |
|
pnode = inst_config.primary_node
|
2024 |
|
pnode_img = node_image[pnode]
|
2025 |
|
groupinfo = self.cfg.GetAllNodeGroupsInfo()
|
2026 |
|
|
2027 |
|
node_vol_should = {}
|
2028 |
|
inst_config.MapLVsByNode(node_vol_should)
|
2029 |
|
|
2030 |
|
cluster = self.cfg.GetClusterInfo()
|
2031 |
|
ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
|
2032 |
|
self.group_info)
|
2033 |
|
err = _ComputeIPolicyInstanceViolation(ipolicy, inst_config, self.cfg)
|
2034 |
|
_ErrorIf(err, constants.CV_EINSTANCEPOLICY, instance, utils.CommaJoin(err),
|
2035 |
|
code=self.ETYPE_WARNING)
|
2036 |
|
|
2037 |
|
for node in node_vol_should:
|
2038 |
|
n_img = node_image[node]
|
2039 |
|
if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
|
2040 |
|
# ignore missing volumes on offline or broken nodes
|
2041 |
|
continue
|
2042 |
|
for volume in node_vol_should[node]:
|
2043 |
|
test = volume not in n_img.volumes
|
2044 |
|
_ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
|
2045 |
|
"volume %s missing on node %s", volume, node)
|
2046 |
|
|
2047 |
|
if inst_config.admin_state == constants.ADMINST_UP:
|
2048 |
|
test = instance not in pnode_img.instances and not pnode_img.offline
|
2049 |
|
_ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
|
2050 |
|
"instance not running on its primary node %s",
|
2051 |
|
pnode)
|
2052 |
|
_ErrorIf(pnode_img.offline, constants.CV_EINSTANCEBADNODE, instance,
|
2053 |
|
"instance is marked as running and lives on offline node %s",
|
2054 |
|
pnode)
|
2055 |
|
|
2056 |
|
diskdata = [(nname, success, status, idx)
|
2057 |
|
for (nname, disks) in diskstatus.items()
|
2058 |
|
for idx, (success, status) in enumerate(disks)]
|
2059 |
|
|
2060 |
|
for nname, success, bdev_status, idx in diskdata:
|
2061 |
|
# the 'ghost node' construction in Exec() ensures that we have a
|
2062 |
|
# node here
|
2063 |
|
snode = node_image[nname]
|
2064 |
|
bad_snode = snode.ghost or snode.offline
|
2065 |
|
_ErrorIf(inst_config.admin_state == constants.ADMINST_UP and
|
2066 |
|
not success and not bad_snode,
|
2067 |
|
constants.CV_EINSTANCEFAULTYDISK, instance,
|
2068 |
|
"couldn't retrieve status for disk/%s on %s: %s",
|
2069 |
|
idx, nname, bdev_status)
|
2070 |
|
_ErrorIf((inst_config.admin_state == constants.ADMINST_UP and
|
2071 |
|
success and bdev_status.ldisk_status == constants.LDS_FAULTY),
|
2072 |
|
constants.CV_EINSTANCEFAULTYDISK, instance,
|
2073 |
|
"disk/%s on %s is faulty", idx, nname)
|
2074 |
|
|
2075 |
|
_ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
|
2076 |
|
constants.CV_ENODERPC, pnode, "instance %s, connection to"
|
2077 |
|
" primary node failed", instance)
|
2078 |
|
|
2079 |
|
_ErrorIf(len(inst_config.secondary_nodes) > 1,
|
2080 |
|
constants.CV_EINSTANCELAYOUT,
|
2081 |
|
instance, "instance has multiple secondary nodes: %s",
|
2082 |
|
utils.CommaJoin(inst_config.secondary_nodes),
|
2083 |
|
code=self.ETYPE_WARNING)
|
2084 |
|
|
2085 |
|
if inst_config.disk_template not in constants.DTS_EXCL_STORAGE:
|
2086 |
|
# Disk template not compatible with exclusive_storage: no instance
|
2087 |
|
# node should have the flag set
|
2088 |
|
es_flags = rpc.GetExclusiveStorageForNodeNames(self.cfg,
|
2089 |
|
inst_config.all_nodes)
|
2090 |
|
es_nodes = [n for (n, es) in es_flags.items()
|
2091 |
|
if es]
|
2092 |
|
_ErrorIf(es_nodes, constants.CV_EINSTANCEUNSUITABLENODE, instance,
|
2093 |
|
"instance has template %s, which is not supported on nodes"
|
2094 |
|
" that have exclusive storage set: %s",
|
2095 |
|
inst_config.disk_template, utils.CommaJoin(es_nodes))
|
2096 |
|
|
2097 |
|
if inst_config.disk_template in constants.DTS_INT_MIRROR:
|
2098 |
|
instance_nodes = utils.NiceSort(inst_config.all_nodes)
|
2099 |
|
instance_groups = {}
|
2100 |
|
|
2101 |
|
for node in instance_nodes:
|
2102 |
|
instance_groups.setdefault(self.all_node_info[node].group,
|
2103 |
|
[]).append(node)
|
2104 |
|
|
2105 |
|
pretty_list = [
|
2106 |
|
"%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
|
2107 |
|
# Sort so that we always list the primary node first.
|
2108 |
|
for group, nodes in sorted(instance_groups.items(),
|
2109 |
|
key=lambda (_, nodes): pnode in nodes,
|
2110 |
|
reverse=True)]
|
2111 |
|
|
2112 |
|
self._ErrorIf(len(instance_groups) > 1,
|
2113 |
|
constants.CV_EINSTANCESPLITGROUPS,
|
2114 |
|
instance, "instance has primary and secondary nodes in"
|
2115 |
|
" different groups: %s", utils.CommaJoin(pretty_list),
|
2116 |
|
code=self.ETYPE_WARNING)
|
2117 |
|
|
2118 |
|
inst_nodes_offline = []
|
2119 |
|
for snode in inst_config.secondary_nodes:
|
2120 |
|
s_img = node_image[snode]
|
2121 |
|
_ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
|
2122 |
|
snode, "instance %s, connection to secondary node failed",
|
2123 |
|
instance)
|
2124 |
|
|
2125 |
|
if s_img.offline:
|
2126 |
|
inst_nodes_offline.append(snode)
|
2127 |
|
|
2128 |
|
# warn that the instance lives on offline nodes
|
2129 |
|
_ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
|
2130 |
|
"instance has offline secondary node(s) %s",
|
2131 |
|
utils.CommaJoin(inst_nodes_offline))
|
2132 |
|
# ... or ghost/non-vm_capable nodes
|
2133 |
|
for node in inst_config.all_nodes:
|
2134 |
|
_ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
|
2135 |
|
instance, "instance lives on ghost node %s", node)
|
2136 |
|
_ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
|
2137 |
|
instance, "instance lives on non-vm_capable node %s", node)
|
2138 |
|
|
2139 |
|
def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
|
2140 |
|
"""Verify if there are any unknown volumes in the cluster.
|
2141 |
|
|
2142 |
|
The .os, .swap and backup volumes are ignored. All other volumes are
|
2143 |
|
reported as unknown.
|
2144 |
|
|
2145 |
|
@type reserved: L{ganeti.utils.FieldSet}
|
2146 |
|
@param reserved: a FieldSet of reserved volume names
|
2147 |
|
|
2148 |
|
"""
|
2149 |
|
for node, n_img in node_image.items():
|
2150 |
|
if (n_img.offline or n_img.rpc_fail or n_img.lvm_fail or
|
2151 |
|
self.all_node_info[node].group != self.group_uuid):
|