Revision 7352d33b lib/cmdlib/__init__.py
b/lib/cmdlib/__init__.py | ||
---|---|---|
30 | 30 |
|
31 | 31 |
import os |
32 | 32 |
import time |
33 |
import re |
|
34 | 33 |
import logging |
35 | 34 |
import copy |
36 | 35 |
import OpenSSL |
37 | 36 |
import itertools |
38 | 37 |
import operator |
39 | 38 |
|
40 |
from ganeti import ssh |
|
41 | 39 |
from ganeti import utils |
42 | 40 |
from ganeti import errors |
43 | 41 |
from ganeti import hypervisor |
44 | 42 |
from ganeti import locking |
45 | 43 |
from ganeti import constants |
46 | 44 |
from ganeti import objects |
47 |
from ganeti import ssconf |
|
48 |
from ganeti import uidpool |
|
49 | 45 |
from ganeti import compat |
50 | 46 |
from ganeti import masterd |
51 | 47 |
from ganeti import netutils |
... | ... | |
54 | 50 |
from ganeti import opcodes |
55 | 51 |
from ganeti import ht |
56 | 52 |
from ganeti import rpc |
57 |
from ganeti import runtime |
|
58 | 53 |
from ganeti import pathutils |
59 |
from ganeti import vcluster |
|
60 | 54 |
from ganeti import network |
61 | 55 |
from ganeti.masterd import iallocator |
62 | 56 |
|
... | ... | |
64 | 58 |
Tasklet, _QueryBase |
65 | 59 |
from ganeti.cmdlib.common import _ExpandInstanceName, _ExpandItemName, \ |
66 | 60 |
_ExpandNodeName, _ShareAll, _CheckNodeGroupInstances, _GetWantedNodes, \ |
67 |
_GetWantedInstances |
|
61 |
_GetWantedInstances, _RunPostHook, _RedistributeAncillaryFiles, \ |
|
62 |
_MergeAndVerifyHvState, _MergeAndVerifyDiskState, _GetUpdatedIPolicy, \ |
|
63 |
_ComputeNewInstanceViolations, _GetUpdatedParams, _CheckOSParams, \ |
|
64 |
_CheckHVParams, _AdjustCandidatePool, _CheckNodePVs, \ |
|
65 |
_ComputeIPolicyInstanceViolation, _AnnotateDiskParams, _SupportsOob, \ |
|
66 |
_ComputeIPolicySpecViolation |
|
67 |
|
|
68 |
from ganeti.cmdlib.cluster import LUClusterActivateMasterIp, \ |
|
69 |
LUClusterDeactivateMasterIp, LUClusterConfigQuery, LUClusterDestroy, \ |
|
70 |
LUClusterPostInit, _ClusterQuery, LUClusterQuery, LUClusterRedistConf, \ |
|
71 |
LUClusterRename, LUClusterRepairDiskSizes, LUClusterSetParams, \ |
|
72 |
LUClusterVerify, LUClusterVerifyConfig, LUClusterVerifyGroup, \ |
|
73 |
LUClusterVerifyDisks |
|
68 | 74 |
from ganeti.cmdlib.tags import LUTagsGet, LUTagsSearch, LUTagsSet, LUTagsDel |
69 | 75 |
from ganeti.cmdlib.network import LUNetworkAdd, LUNetworkRemove, \ |
70 | 76 |
LUNetworkSetParams, _NetworkQuery, LUNetworkQuery, LUNetworkConnect, \ |
... | ... | |
85 | 91 |
])) |
86 | 92 |
|
87 | 93 |
|
88 |
def _AnnotateDiskParams(instance, devs, cfg): |
|
89 |
"""Little helper wrapper to the rpc annotation method. |
|
90 |
|
|
91 |
@param instance: The instance object |
|
92 |
@type devs: List of L{objects.Disk} |
|
93 |
@param devs: The root devices (not any of its children!) |
|
94 |
@param cfg: The config object |
|
95 |
@returns The annotated disk copies |
|
96 |
@see L{rpc.AnnotateDiskParams} |
|
97 |
|
|
98 |
""" |
|
99 |
return rpc.AnnotateDiskParams(instance.disk_template, devs, |
|
100 |
cfg.GetInstanceDiskParams(instance)) |
|
101 |
|
|
102 |
|
|
103 | 94 |
def _CheckInstancesNodeGroups(cfg, instances, owned_groups, owned_nodes, |
104 | 95 |
cur_group_uuid): |
105 | 96 |
"""Checks if node groups for locked instances are still correct. |
... | ... | |
155 | 146 |
return inst_groups |
156 | 147 |
|
157 | 148 |
|
158 |
def _SupportsOob(cfg, node): |
|
159 |
"""Tells if node supports OOB. |
|
160 |
|
|
161 |
@type cfg: L{config.ConfigWriter} |
|
162 |
@param cfg: The cluster configuration |
|
163 |
@type node: L{objects.Node} |
|
164 |
@param node: The node |
|
165 |
@return: The OOB script if supported or an empty string otherwise |
|
166 |
|
|
167 |
""" |
|
168 |
return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM] |
|
169 |
|
|
170 |
|
|
171 | 149 |
def _IsExclusiveStorageEnabledNode(cfg, node): |
172 | 150 |
"""Whether exclusive_storage is in effect for the given node. |
173 | 151 |
|
... | ... | |
213 | 191 |
return names[:] |
214 | 192 |
|
215 | 193 |
|
216 |
def _GetUpdatedParams(old_params, update_dict, |
|
217 |
use_default=True, use_none=False): |
|
218 |
"""Return the new version of a parameter dictionary. |
|
219 |
|
|
220 |
@type old_params: dict |
|
221 |
@param old_params: old parameters |
|
222 |
@type update_dict: dict |
|
223 |
@param update_dict: dict containing new parameter values, or |
|
224 |
constants.VALUE_DEFAULT to reset the parameter to its default |
|
225 |
value |
|
226 |
@param use_default: boolean |
|
227 |
@type use_default: whether to recognise L{constants.VALUE_DEFAULT} |
|
228 |
values as 'to be deleted' values |
|
229 |
@param use_none: boolean |
|
230 |
@type use_none: whether to recognise C{None} values as 'to be |
|
231 |
deleted' values |
|
232 |
@rtype: dict |
|
233 |
@return: the new parameter dictionary |
|
234 |
|
|
235 |
""" |
|
236 |
params_copy = copy.deepcopy(old_params) |
|
237 |
for key, val in update_dict.iteritems(): |
|
238 |
if ((use_default and val == constants.VALUE_DEFAULT) or |
|
239 |
(use_none and val is None)): |
|
240 |
try: |
|
241 |
del params_copy[key] |
|
242 |
except KeyError: |
|
243 |
pass |
|
244 |
else: |
|
245 |
params_copy[key] = val |
|
246 |
return params_copy |
|
247 |
|
|
248 |
|
|
249 |
def _GetUpdatedIPolicy(old_ipolicy, new_ipolicy, group_policy=False): |
|
250 |
"""Return the new version of an instance policy. |
|
251 |
|
|
252 |
@param group_policy: whether this policy applies to a group and thus |
|
253 |
we should support removal of policy entries |
|
254 |
|
|
255 |
""" |
|
256 |
ipolicy = copy.deepcopy(old_ipolicy) |
|
257 |
for key, value in new_ipolicy.items(): |
|
258 |
if key not in constants.IPOLICY_ALL_KEYS: |
|
259 |
raise errors.OpPrereqError("Invalid key in new ipolicy: %s" % key, |
|
260 |
errors.ECODE_INVAL) |
|
261 |
if (not value or value == [constants.VALUE_DEFAULT] or |
|
262 |
value == constants.VALUE_DEFAULT): |
|
263 |
if group_policy: |
|
264 |
if key in ipolicy: |
|
265 |
del ipolicy[key] |
|
266 |
else: |
|
267 |
raise errors.OpPrereqError("Can't unset ipolicy attribute '%s'" |
|
268 |
" on the cluster'" % key, |
|
269 |
errors.ECODE_INVAL) |
|
270 |
else: |
|
271 |
if key in constants.IPOLICY_PARAMETERS: |
|
272 |
# FIXME: we assume all such values are float |
|
273 |
try: |
|
274 |
ipolicy[key] = float(value) |
|
275 |
except (TypeError, ValueError), err: |
|
276 |
raise errors.OpPrereqError("Invalid value for attribute" |
|
277 |
" '%s': '%s', error: %s" % |
|
278 |
(key, value, err), errors.ECODE_INVAL) |
|
279 |
elif key == constants.ISPECS_MINMAX: |
|
280 |
for minmax in value: |
|
281 |
for k in minmax.keys(): |
|
282 |
utils.ForceDictType(minmax[k], constants.ISPECS_PARAMETER_TYPES) |
|
283 |
ipolicy[key] = value |
|
284 |
elif key == constants.ISPECS_STD: |
|
285 |
if group_policy: |
|
286 |
msg = "%s cannot appear in group instance specs" % key |
|
287 |
raise errors.OpPrereqError(msg, errors.ECODE_INVAL) |
|
288 |
ipolicy[key] = _GetUpdatedParams(old_ipolicy.get(key, {}), value, |
|
289 |
use_none=False, use_default=False) |
|
290 |
utils.ForceDictType(ipolicy[key], constants.ISPECS_PARAMETER_TYPES) |
|
291 |
else: |
|
292 |
# FIXME: we assume all others are lists; this should be redone |
|
293 |
# in a nicer way |
|
294 |
ipolicy[key] = list(value) |
|
295 |
try: |
|
296 |
objects.InstancePolicy.CheckParameterSyntax(ipolicy, not group_policy) |
|
297 |
except errors.ConfigurationError, err: |
|
298 |
raise errors.OpPrereqError("Invalid instance policy: %s" % err, |
|
299 |
errors.ECODE_INVAL) |
|
300 |
return ipolicy |
|
301 |
|
|
302 |
|
|
303 |
def _UpdateAndVerifySubDict(base, updates, type_check): |
|
304 |
"""Updates and verifies a dict with sub dicts of the same type. |
|
305 |
|
|
306 |
@param base: The dict with the old data |
|
307 |
@param updates: The dict with the new data |
|
308 |
@param type_check: Dict suitable to ForceDictType to verify correct types |
|
309 |
@returns: A new dict with updated and verified values |
|
310 |
|
|
311 |
""" |
|
312 |
def fn(old, value): |
|
313 |
new = _GetUpdatedParams(old, value) |
|
314 |
utils.ForceDictType(new, type_check) |
|
315 |
return new |
|
316 |
|
|
317 |
ret = copy.deepcopy(base) |
|
318 |
ret.update(dict((key, fn(base.get(key, {}), value)) |
|
319 |
for key, value in updates.items())) |
|
320 |
return ret |
|
321 |
|
|
322 |
|
|
323 |
def _MergeAndVerifyHvState(op_input, obj_input): |
|
324 |
"""Combines the hv state from an opcode with the one of the object |
|
325 |
|
|
326 |
@param op_input: The input dict from the opcode |
|
327 |
@param obj_input: The input dict from the objects |
|
328 |
@return: The verified and updated dict |
|
329 |
|
|
330 |
""" |
|
331 |
if op_input: |
|
332 |
invalid_hvs = set(op_input) - constants.HYPER_TYPES |
|
333 |
if invalid_hvs: |
|
334 |
raise errors.OpPrereqError("Invalid hypervisor(s) in hypervisor state:" |
|
335 |
" %s" % utils.CommaJoin(invalid_hvs), |
|
336 |
errors.ECODE_INVAL) |
|
337 |
if obj_input is None: |
|
338 |
obj_input = {} |
|
339 |
type_check = constants.HVSTS_PARAMETER_TYPES |
|
340 |
return _UpdateAndVerifySubDict(obj_input, op_input, type_check) |
|
341 |
|
|
342 |
return None |
|
343 |
|
|
344 |
|
|
345 |
def _MergeAndVerifyDiskState(op_input, obj_input): |
|
346 |
"""Combines the disk state from an opcode with the one of the object |
|
347 |
|
|
348 |
@param op_input: The input dict from the opcode |
|
349 |
@param obj_input: The input dict from the objects |
|
350 |
@return: The verified and updated dict |
|
351 |
""" |
|
352 |
if op_input: |
|
353 |
invalid_dst = set(op_input) - constants.DS_VALID_TYPES |
|
354 |
if invalid_dst: |
|
355 |
raise errors.OpPrereqError("Invalid storage type(s) in disk state: %s" % |
|
356 |
utils.CommaJoin(invalid_dst), |
|
357 |
errors.ECODE_INVAL) |
|
358 |
type_check = constants.DSS_PARAMETER_TYPES |
|
359 |
if obj_input is None: |
|
360 |
obj_input = {} |
|
361 |
return dict((key, _UpdateAndVerifySubDict(obj_input.get(key, {}), value, |
|
362 |
type_check)) |
|
363 |
for key, value in op_input.items()) |
|
364 |
|
|
365 |
return None |
|
366 |
|
|
367 |
|
|
368 | 194 |
def _ReleaseLocks(lu, level, names=None, keep=None): |
369 | 195 |
"""Releases locks owned by an LU. |
370 | 196 |
|
... | ... | |
428 | 254 |
for vol in vols) |
429 | 255 |
|
430 | 256 |
|
431 |
def _RunPostHook(lu, node_name): |
|
432 |
"""Runs the post-hook for an opcode on a single node. |
|
433 |
|
|
434 |
""" |
|
435 |
hm = lu.proc.BuildHooksManager(lu) |
|
436 |
try: |
|
437 |
hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name]) |
|
438 |
except Exception, err: # pylint: disable=W0703 |
|
439 |
lu.LogWarning("Errors occurred running hooks on %s: %s", |
|
440 |
node_name, err) |
|
441 |
|
|
442 |
|
|
443 | 257 |
def _CheckOutputFields(static, dynamic, selected): |
444 | 258 |
"""Checks whether all selected fields are valid. |
445 | 259 |
|
... | ... | |
574 | 388 |
raise errors.OpExecError(msg) |
575 | 389 |
|
576 | 390 |
|
577 |
def _CheckNodePVs(nresult, exclusive_storage): |
|
578 |
"""Check node PVs. |
|
579 |
|
|
580 |
""" |
|
581 |
pvlist_dict = nresult.get(constants.NV_PVLIST, None) |
|
582 |
if pvlist_dict is None: |
|
583 |
return (["Can't get PV list from node"], None) |
|
584 |
pvlist = map(objects.LvmPvInfo.FromDict, pvlist_dict) |
|
585 |
errlist = [] |
|
586 |
# check that ':' is not present in PV names, since it's a |
|
587 |
# special character for lvcreate (denotes the range of PEs to |
|
588 |
# use on the PV) |
|
589 |
for pv in pvlist: |
|
590 |
if ":" in pv.name: |
|
591 |
errlist.append("Invalid character ':' in PV '%s' of VG '%s'" % |
|
592 |
(pv.name, pv.vg_name)) |
|
593 |
es_pvinfo = None |
|
594 |
if exclusive_storage: |
|
595 |
(errmsgs, es_pvinfo) = utils.LvmExclusiveCheckNodePvs(pvlist) |
|
596 |
errlist.extend(errmsgs) |
|
597 |
shared_pvs = nresult.get(constants.NV_EXCLUSIVEPVS, None) |
|
598 |
if shared_pvs: |
|
599 |
for (pvname, lvlist) in shared_pvs: |
|
600 |
# TODO: Check that LVs are really unrelated (snapshots, DRBD meta...) |
|
601 |
errlist.append("PV %s is shared among unrelated LVs (%s)" % |
|
602 |
(pvname, utils.CommaJoin(lvlist))) |
|
603 |
return (errlist, es_pvinfo) |
|
604 |
|
|
605 |
|
|
606 | 391 |
def _GetClusterDomainSecret(): |
607 | 392 |
"""Reads the cluster domain secret. |
608 | 393 |
|
... | ... | |
642 | 427 |
" is down") |
643 | 428 |
|
644 | 429 |
|
645 |
def _ComputeMinMaxSpec(name, qualifier, ispecs, value): |
|
646 |
"""Computes if value is in the desired range. |
|
647 |
|
|
648 |
@param name: name of the parameter for which we perform the check |
|
649 |
@param qualifier: a qualifier used in the error message (e.g. 'disk/1', |
|
650 |
not just 'disk') |
|
651 |
@param ispecs: dictionary containing min and max values |
|
652 |
@param value: actual value that we want to use |
|
653 |
@return: None or an error string |
|
654 |
|
|
655 |
""" |
|
656 |
if value in [None, constants.VALUE_AUTO]: |
|
657 |
return None |
|
658 |
max_v = ispecs[constants.ISPECS_MAX].get(name, value) |
|
659 |
min_v = ispecs[constants.ISPECS_MIN].get(name, value) |
|
660 |
if value > max_v or min_v > value: |
|
661 |
if qualifier: |
|
662 |
fqn = "%s/%s" % (name, qualifier) |
|
663 |
else: |
|
664 |
fqn = name |
|
665 |
return ("%s value %s is not in range [%s, %s]" % |
|
666 |
(fqn, value, min_v, max_v)) |
|
667 |
return None |
|
668 |
|
|
669 |
|
|
670 |
def _ComputeIPolicySpecViolation(ipolicy, mem_size, cpu_count, disk_count, |
|
671 |
nic_count, disk_sizes, spindle_use, |
|
672 |
disk_template, |
|
673 |
_compute_fn=_ComputeMinMaxSpec): |
|
674 |
"""Verifies ipolicy against provided specs. |
|
675 |
|
|
676 |
@type ipolicy: dict |
|
677 |
@param ipolicy: The ipolicy |
|
678 |
@type mem_size: int |
|
679 |
@param mem_size: The memory size |
|
680 |
@type cpu_count: int |
|
681 |
@param cpu_count: Used cpu cores |
|
682 |
@type disk_count: int |
|
683 |
@param disk_count: Number of disks used |
|
684 |
@type nic_count: int |
|
685 |
@param nic_count: Number of nics used |
|
686 |
@type disk_sizes: list of ints |
|
687 |
@param disk_sizes: Disk sizes of used disk (len must match C{disk_count}) |
|
688 |
@type spindle_use: int |
|
689 |
@param spindle_use: The number of spindles this instance uses |
|
690 |
@type disk_template: string |
|
691 |
@param disk_template: The disk template of the instance |
|
692 |
@param _compute_fn: The compute function (unittest only) |
|
693 |
@return: A list of violations, or an empty list of no violations are found |
|
694 |
|
|
695 |
""" |
|
696 |
assert disk_count == len(disk_sizes) |
|
697 |
|
|
698 |
test_settings = [ |
|
699 |
(constants.ISPEC_MEM_SIZE, "", mem_size), |
|
700 |
(constants.ISPEC_CPU_COUNT, "", cpu_count), |
|
701 |
(constants.ISPEC_NIC_COUNT, "", nic_count), |
|
702 |
(constants.ISPEC_SPINDLE_USE, "", spindle_use), |
|
703 |
] + [(constants.ISPEC_DISK_SIZE, str(idx), d) |
|
704 |
for idx, d in enumerate(disk_sizes)] |
|
705 |
if disk_template != constants.DT_DISKLESS: |
|
706 |
# This check doesn't make sense for diskless instances |
|
707 |
test_settings.append((constants.ISPEC_DISK_COUNT, "", disk_count)) |
|
708 |
ret = [] |
|
709 |
allowed_dts = ipolicy[constants.IPOLICY_DTS] |
|
710 |
if disk_template not in allowed_dts: |
|
711 |
ret.append("Disk template %s is not allowed (allowed templates: %s)" % |
|
712 |
(disk_template, utils.CommaJoin(allowed_dts))) |
|
713 |
|
|
714 |
min_errs = None |
|
715 |
for minmax in ipolicy[constants.ISPECS_MINMAX]: |
|
716 |
errs = filter(None, |
|
717 |
(_compute_fn(name, qualifier, minmax, value) |
|
718 |
for (name, qualifier, value) in test_settings)) |
|
719 |
if min_errs is None or len(errs) < len(min_errs): |
|
720 |
min_errs = errs |
|
721 |
assert min_errs is not None |
|
722 |
return ret + min_errs |
|
723 |
|
|
724 |
|
|
725 |
def _ComputeIPolicyInstanceViolation(ipolicy, instance, cfg, |
|
726 |
_compute_fn=_ComputeIPolicySpecViolation): |
|
727 |
"""Compute if instance meets the specs of ipolicy. |
|
728 |
|
|
729 |
@type ipolicy: dict |
|
730 |
@param ipolicy: The ipolicy to verify against |
|
731 |
@type instance: L{objects.Instance} |
|
732 |
@param instance: The instance to verify |
|
733 |
@type cfg: L{config.ConfigWriter} |
|
734 |
@param cfg: Cluster configuration |
|
735 |
@param _compute_fn: The function to verify ipolicy (unittest only) |
|
736 |
@see: L{_ComputeIPolicySpecViolation} |
|
737 |
|
|
738 |
""" |
|
739 |
be_full = cfg.GetClusterInfo().FillBE(instance) |
|
740 |
mem_size = be_full[constants.BE_MAXMEM] |
|
741 |
cpu_count = be_full[constants.BE_VCPUS] |
|
742 |
spindle_use = be_full[constants.BE_SPINDLE_USE] |
|
743 |
disk_count = len(instance.disks) |
|
744 |
disk_sizes = [disk.size for disk in instance.disks] |
|
745 |
nic_count = len(instance.nics) |
|
746 |
disk_template = instance.disk_template |
|
747 |
|
|
748 |
return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count, |
|
749 |
disk_sizes, spindle_use, disk_template) |
|
750 |
|
|
751 |
|
|
752 | 430 |
def _ComputeIPolicyInstanceSpecViolation( |
753 | 431 |
ipolicy, instance_spec, disk_template, |
754 | 432 |
_compute_fn=_ComputeIPolicySpecViolation): |
... | ... | |
822 | 500 |
raise errors.OpPrereqError(msg, errors.ECODE_INVAL) |
823 | 501 |
|
824 | 502 |
|
825 |
def _ComputeNewInstanceViolations(old_ipolicy, new_ipolicy, instances, cfg): |
|
826 |
"""Computes a set of any instances that would violate the new ipolicy. |
|
827 |
|
|
828 |
@param old_ipolicy: The current (still in-place) ipolicy |
|
829 |
@param new_ipolicy: The new (to become) ipolicy |
|
830 |
@param instances: List of instances to verify |
|
831 |
@type cfg: L{config.ConfigWriter} |
|
832 |
@param cfg: Cluster configuration |
|
833 |
@return: A list of instances which violates the new ipolicy but |
|
834 |
did not before |
|
835 |
|
|
836 |
""" |
|
837 |
return (_ComputeViolatingInstances(new_ipolicy, instances, cfg) - |
|
838 |
_ComputeViolatingInstances(old_ipolicy, instances, cfg)) |
|
839 |
|
|
840 |
|
|
841 | 503 |
def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status, |
842 | 504 |
minmem, maxmem, vcpus, nics, disk_template, disks, |
843 | 505 |
bep, hvp, hypervisor_name, tags): |
... | ... | |
1022 | 684 |
return _BuildInstanceHookEnv(**args) # pylint: disable=W0142 |
1023 | 685 |
|
1024 | 686 |
|
1025 |
def _AdjustCandidatePool(lu, exceptions): |
|
1026 |
"""Adjust the candidate pool after node operations. |
|
1027 |
|
|
1028 |
""" |
|
1029 |
mod_list = lu.cfg.MaintainCandidatePool(exceptions) |
|
1030 |
if mod_list: |
|
1031 |
lu.LogInfo("Promoted nodes to master candidate role: %s", |
|
1032 |
utils.CommaJoin(node.name for node in mod_list)) |
|
1033 |
for name in mod_list: |
|
1034 |
lu.context.ReaddNode(name) |
|
1035 |
mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions) |
|
1036 |
if mc_now > mc_max: |
|
1037 |
lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" % |
|
1038 |
(mc_now, mc_max)) |
|
1039 |
|
|
1040 |
|
|
1041 | 687 |
def _DecideSelfPromotion(lu, exceptions=None): |
1042 | 688 |
"""Decide whether I should promote myself as a master candidate. |
1043 | 689 |
|
... | ... | |
1049 | 695 |
return mc_now < mc_should |
1050 | 696 |
|
1051 | 697 |
|
1052 |
def _ComputeViolatingInstances(ipolicy, instances, cfg): |
|
1053 |
"""Computes a set of instances who violates given ipolicy. |
|
1054 |
|
|
1055 |
@param ipolicy: The ipolicy to verify |
|
1056 |
@type instances: L{objects.Instance} |
|
1057 |
@param instances: List of instances to verify |
|
1058 |
@type cfg: L{config.ConfigWriter} |
|
1059 |
@param cfg: Cluster configuration |
|
1060 |
@return: A frozenset of instance names violating the ipolicy |
|
1061 |
|
|
1062 |
""" |
|
1063 |
return frozenset([inst.name for inst in instances |
|
1064 |
if _ComputeIPolicyInstanceViolation(ipolicy, inst, cfg)]) |
|
1065 |
|
|
1066 |
|
|
1067 | 698 |
def _CheckNicsBridgesExist(lu, target_nics, target_node): |
1068 | 699 |
"""Check that the brigdes needed by a list of nics exist. |
1069 | 700 |
|
... | ... | |
1173 | 804 |
"""Check the sanity of iallocator and node arguments and use the |
1174 | 805 |
cluster-wide iallocator if appropriate. |
1175 | 806 |
|
1176 |
Check that at most one of (iallocator, node) is specified. If none is |
|
1177 |
specified, or the iallocator is L{constants.DEFAULT_IALLOCATOR_SHORTCUT}, |
|
1178 |
then the LU's opcode's iallocator slot is filled with the cluster-wide |
|
1179 |
default iallocator. |
|
1180 |
|
|
1181 |
@type iallocator_slot: string |
|
1182 |
@param iallocator_slot: the name of the opcode iallocator slot |
|
1183 |
@type node_slot: string |
|
1184 |
@param node_slot: the name of the opcode target node slot |
|
1185 |
|
|
1186 |
""" |
|
1187 |
node = getattr(lu.op, node_slot, None) |
|
1188 |
ialloc = getattr(lu.op, iallocator_slot, None) |
|
1189 |
if node == []: |
|
1190 |
node = None |
|
1191 |
|
|
1192 |
if node is not None and ialloc is not None: |
|
1193 |
raise errors.OpPrereqError("Do not specify both, iallocator and node", |
|
1194 |
errors.ECODE_INVAL) |
|
1195 |
elif ((node is None and ialloc is None) or |
|
1196 |
ialloc == constants.DEFAULT_IALLOCATOR_SHORTCUT): |
|
1197 |
default_iallocator = lu.cfg.GetDefaultIAllocator() |
|
1198 |
if default_iallocator: |
|
1199 |
setattr(lu.op, iallocator_slot, default_iallocator) |
|
1200 |
else: |
|
1201 |
raise errors.OpPrereqError("No iallocator or node given and no" |
|
1202 |
" cluster-wide default iallocator found;" |
|
1203 |
" please specify either an iallocator or a" |
|
1204 |
" node, or set a cluster-wide default" |
|
1205 |
" iallocator", errors.ECODE_INVAL) |
|
1206 |
|
|
1207 |
|
|
1208 |
def _GetDefaultIAllocator(cfg, ialloc): |
|
1209 |
"""Decides on which iallocator to use. |
|
1210 |
|
|
1211 |
@type cfg: L{config.ConfigWriter} |
|
1212 |
@param cfg: Cluster configuration object |
|
1213 |
@type ialloc: string or None |
|
1214 |
@param ialloc: Iallocator specified in opcode |
|
1215 |
@rtype: string |
|
1216 |
@return: Iallocator name |
|
1217 |
|
|
1218 |
""" |
|
1219 |
if not ialloc: |
|
1220 |
# Use default iallocator |
|
1221 |
ialloc = cfg.GetDefaultIAllocator() |
|
1222 |
|
|
1223 |
if not ialloc: |
|
1224 |
raise errors.OpPrereqError("No iallocator was specified, neither in the" |
|
1225 |
" opcode nor as a cluster-wide default", |
|
1226 |
errors.ECODE_INVAL) |
|
1227 |
|
|
1228 |
return ialloc |
|
1229 |
|
|
1230 |
|
|
1231 |
def _CheckHostnameSane(lu, name): |
|
1232 |
"""Ensures that a given hostname resolves to a 'sane' name. |
|
1233 |
|
|
1234 |
The given name is required to be a prefix of the resolved hostname, |
|
1235 |
to prevent accidental mismatches. |
|
1236 |
|
|
1237 |
@param lu: the logical unit on behalf of which we're checking |
|
1238 |
@param name: the name we should resolve and check |
|
1239 |
@return: the resolved hostname object |
|
1240 |
|
|
1241 |
""" |
|
1242 |
hostname = netutils.GetHostname(name=name) |
|
1243 |
if hostname.name != name: |
|
1244 |
lu.LogInfo("Resolved given name '%s' to '%s'", name, hostname.name) |
|
1245 |
if not utils.MatchNameComponent(name, [hostname.name]): |
|
1246 |
raise errors.OpPrereqError(("Resolved hostname '%s' does not look the" |
|
1247 |
" same as given hostname '%s'") % |
|
1248 |
(hostname.name, name), errors.ECODE_INVAL) |
|
1249 |
return hostname |
|
1250 |
|
|
1251 |
|
|
1252 |
class LUClusterPostInit(LogicalUnit): |
|
1253 |
"""Logical unit for running hooks after cluster initialization. |
|
1254 |
|
|
1255 |
""" |
|
1256 |
HPATH = "cluster-init" |
|
1257 |
HTYPE = constants.HTYPE_CLUSTER |
|
1258 |
|
|
1259 |
def BuildHooksEnv(self): |
|
1260 |
"""Build hooks env. |
|
1261 |
|
|
1262 |
""" |
|
1263 |
return { |
|
1264 |
"OP_TARGET": self.cfg.GetClusterName(), |
|
1265 |
} |
|
1266 |
|
|
1267 |
def BuildHooksNodes(self): |
|
1268 |
"""Build hooks nodes. |
|
1269 |
|
|
1270 |
""" |
|
1271 |
return ([], [self.cfg.GetMasterNode()]) |
|
1272 |
|
|
1273 |
def Exec(self, feedback_fn): |
|
1274 |
"""Nothing to do. |
|
1275 |
|
|
1276 |
""" |
|
1277 |
return True |
|
1278 |
|
|
1279 |
|
|
1280 |
class LUClusterDestroy(LogicalUnit): |
|
1281 |
"""Logical unit for destroying the cluster. |
|
1282 |
|
|
1283 |
""" |
|
1284 |
HPATH = "cluster-destroy" |
|
1285 |
HTYPE = constants.HTYPE_CLUSTER |
|
1286 |
|
|
1287 |
def BuildHooksEnv(self): |
|
1288 |
"""Build hooks env. |
|
1289 |
|
|
1290 |
""" |
|
1291 |
return { |
|
1292 |
"OP_TARGET": self.cfg.GetClusterName(), |
|
1293 |
} |
|
1294 |
|
|
1295 |
def BuildHooksNodes(self): |
|
1296 |
"""Build hooks nodes. |
|
1297 |
|
|
1298 |
""" |
|
1299 |
return ([], []) |
|
1300 |
|
|
1301 |
def CheckPrereq(self): |
|
1302 |
"""Check prerequisites. |
|
1303 |
|
|
1304 |
This checks whether the cluster is empty. |
|
1305 |
|
|
1306 |
Any errors are signaled by raising errors.OpPrereqError. |
|
1307 |
|
|
1308 |
""" |
|
1309 |
master = self.cfg.GetMasterNode() |
|
1310 |
|
|
1311 |
nodelist = self.cfg.GetNodeList() |
|
1312 |
if len(nodelist) != 1 or nodelist[0] != master: |
|
1313 |
raise errors.OpPrereqError("There are still %d node(s) in" |
|
1314 |
" this cluster." % (len(nodelist) - 1), |
|
1315 |
errors.ECODE_INVAL) |
|
1316 |
instancelist = self.cfg.GetInstanceList() |
|
1317 |
if instancelist: |
|
1318 |
raise errors.OpPrereqError("There are still %d instance(s) in" |
|
1319 |
" this cluster." % len(instancelist), |
|
1320 |
errors.ECODE_INVAL) |
|
1321 |
|
|
1322 |
def Exec(self, feedback_fn): |
|
1323 |
"""Destroys the cluster. |
|
1324 |
|
|
1325 |
""" |
|
1326 |
master_params = self.cfg.GetMasterNetworkParameters() |
|
1327 |
|
|
1328 |
# Run post hooks on master node before it's removed |
|
1329 |
_RunPostHook(self, master_params.name) |
|
1330 |
|
|
1331 |
ems = self.cfg.GetUseExternalMipScript() |
|
1332 |
result = self.rpc.call_node_deactivate_master_ip(master_params.name, |
|
1333 |
master_params, ems) |
|
1334 |
if result.fail_msg: |
|
1335 |
self.LogWarning("Error disabling the master IP address: %s", |
|
1336 |
result.fail_msg) |
|
1337 |
|
|
1338 |
return master_params.name |
|
1339 |
|
|
1340 |
|
|
1341 |
def _VerifyCertificate(filename): |
|
1342 |
"""Verifies a certificate for L{LUClusterVerifyConfig}. |
|
1343 |
|
|
1344 |
@type filename: string |
|
1345 |
@param filename: Path to PEM file |
|
1346 |
|
|
1347 |
""" |
|
1348 |
try: |
|
1349 |
cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM, |
|
1350 |
utils.ReadFile(filename)) |
|
1351 |
except Exception, err: # pylint: disable=W0703 |
|
1352 |
return (LUClusterVerifyConfig.ETYPE_ERROR, |
|
1353 |
"Failed to load X509 certificate %s: %s" % (filename, err)) |
|
1354 |
|
|
1355 |
(errcode, msg) = \ |
|
1356 |
utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN, |
|
1357 |
constants.SSL_CERT_EXPIRATION_ERROR) |
|
1358 |
|
|
1359 |
if msg: |
|
1360 |
fnamemsg = "While verifying %s: %s" % (filename, msg) |
|
1361 |
else: |
|
1362 |
fnamemsg = None |
|
1363 |
|
|
1364 |
if errcode is None: |
|
1365 |
return (None, fnamemsg) |
|
1366 |
elif errcode == utils.CERT_WARNING: |
|
1367 |
return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg) |
|
1368 |
elif errcode == utils.CERT_ERROR: |
|
1369 |
return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg) |
|
1370 |
|
|
1371 |
raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode) |
|
1372 |
|
|
1373 |
|
|
1374 |
def _GetAllHypervisorParameters(cluster, instances): |
|
1375 |
"""Compute the set of all hypervisor parameters. |
|
1376 |
|
|
1377 |
@type cluster: L{objects.Cluster} |
|
1378 |
@param cluster: the cluster object |
|
1379 |
@param instances: list of L{objects.Instance} |
|
1380 |
@param instances: additional instances from which to obtain parameters |
|
1381 |
@rtype: list of (origin, hypervisor, parameters) |
|
1382 |
@return: a list with all parameters found, indicating the hypervisor they |
|
1383 |
apply to, and the origin (can be "cluster", "os X", or "instance Y") |
|
1384 |
|
|
1385 |
""" |
|
1386 |
hvp_data = [] |
|
1387 |
|
|
1388 |
for hv_name in cluster.enabled_hypervisors: |
|
1389 |
hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name))) |
|
1390 |
|
|
1391 |
for os_name, os_hvp in cluster.os_hvp.items(): |
|
1392 |
for hv_name, hv_params in os_hvp.items(): |
|
1393 |
if hv_params: |
|
1394 |
full_params = cluster.GetHVDefaults(hv_name, os_name=os_name) |
|
1395 |
hvp_data.append(("os %s" % os_name, hv_name, full_params)) |
|
1396 |
|
|
1397 |
# TODO: collapse identical parameter values in a single one |
|
1398 |
for instance in instances: |
|
1399 |
if instance.hvparams: |
|
1400 |
hvp_data.append(("instance %s" % instance.name, instance.hypervisor, |
|
1401 |
cluster.FillHV(instance))) |
|
1402 |
|
|
1403 |
return hvp_data |
|
1404 |
|
|
1405 |
|
|
1406 |
class _VerifyErrors(object): |
|
1407 |
"""Mix-in for cluster/group verify LUs. |
|
1408 |
|
|
1409 |
It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects |
|
1410 |
self.op and self._feedback_fn to be available.) |
|
1411 |
|
|
1412 |
""" |
|
1413 |
|
|
1414 |
ETYPE_FIELD = "code" |
|
1415 |
ETYPE_ERROR = "ERROR" |
|
1416 |
ETYPE_WARNING = "WARNING" |
|
1417 |
|
|
1418 |
def _Error(self, ecode, item, msg, *args, **kwargs): |
|
1419 |
"""Format an error message. |
|
1420 |
|
|
1421 |
Based on the opcode's error_codes parameter, either format a |
|
1422 |
parseable error code, or a simpler error string. |
|
1423 |
|
|
1424 |
This must be called only from Exec and functions called from Exec. |
|
1425 |
|
|
1426 |
""" |
|
1427 |
ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) |
|
1428 |
itype, etxt, _ = ecode |
|
1429 |
# If the error code is in the list of ignored errors, demote the error to a |
|
1430 |
# warning |
|
1431 |
if etxt in self.op.ignore_errors: # pylint: disable=E1101 |
|
1432 |
ltype = self.ETYPE_WARNING |
|
1433 |
# first complete the msg |
|
1434 |
if args: |
|
1435 |
msg = msg % args |
|
1436 |
# then format the whole message |
|
1437 |
if self.op.error_codes: # This is a mix-in. pylint: disable=E1101 |
|
1438 |
msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg) |
|
1439 |
else: |
|
1440 |
if item: |
|
1441 |
item = " " + item |
|
1442 |
else: |
|
1443 |
item = "" |
|
1444 |
msg = "%s: %s%s: %s" % (ltype, itype, item, msg) |
|
1445 |
# and finally report it via the feedback_fn |
|
1446 |
self._feedback_fn(" - %s" % msg) # Mix-in. pylint: disable=E1101 |
|
1447 |
# do not mark the operation as failed for WARN cases only |
|
1448 |
if ltype == self.ETYPE_ERROR: |
|
1449 |
self.bad = True |
|
1450 |
|
|
1451 |
def _ErrorIf(self, cond, *args, **kwargs): |
|
1452 |
"""Log an error message if the passed condition is True. |
|
1453 |
|
|
1454 |
""" |
|
1455 |
if (bool(cond) |
|
1456 |
or self.op.debug_simulate_errors): # pylint: disable=E1101 |
|
1457 |
self._Error(*args, **kwargs) |
|
1458 |
|
|
1459 |
|
|
1460 |
class LUClusterVerify(NoHooksLU): |
|
1461 |
"""Submits all jobs necessary to verify the cluster. |
|
1462 |
|
|
1463 |
""" |
|
1464 |
REQ_BGL = False |
|
1465 |
|
|
1466 |
def ExpandNames(self): |
|
1467 |
self.needed_locks = {} |
|
1468 |
|
|
1469 |
def Exec(self, feedback_fn): |
|
1470 |
jobs = [] |
|
1471 |
|
|
1472 |
if self.op.group_name: |
|
1473 |
groups = [self.op.group_name] |
|
1474 |
depends_fn = lambda: None |
|
1475 |
else: |
|
1476 |
groups = self.cfg.GetNodeGroupList() |
|
1477 |
|
|
1478 |
# Verify global configuration |
|
1479 |
jobs.append([ |
|
1480 |
opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors), |
|
1481 |
]) |
|
1482 |
|
|
1483 |
# Always depend on global verification |
|
1484 |
depends_fn = lambda: [(-len(jobs), [])] |
|
1485 |
|
|
1486 |
jobs.extend( |
|
1487 |
[opcodes.OpClusterVerifyGroup(group_name=group, |
|
1488 |
ignore_errors=self.op.ignore_errors, |
|
1489 |
depends=depends_fn())] |
|
1490 |
for group in groups) |
|
1491 |
|
|
1492 |
# Fix up all parameters |
|
1493 |
for op in itertools.chain(*jobs): # pylint: disable=W0142 |
|
1494 |
op.debug_simulate_errors = self.op.debug_simulate_errors |
|
1495 |
op.verbose = self.op.verbose |
|
1496 |
op.error_codes = self.op.error_codes |
|
1497 |
try: |
|
1498 |
op.skip_checks = self.op.skip_checks |
|
1499 |
except AttributeError: |
|
1500 |
assert not isinstance(op, opcodes.OpClusterVerifyGroup) |
|
1501 |
|
|
1502 |
return ResultWithJobs(jobs) |
|
1503 |
|
|
1504 |
|
|
1505 |
class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors): |
|
1506 |
"""Verifies the cluster config. |
|
1507 |
|
|
1508 |
""" |
|
1509 |
REQ_BGL = False |
|
1510 |
|
|
1511 |
def _VerifyHVP(self, hvp_data): |
|
1512 |
"""Verifies locally the syntax of the hypervisor parameters. |
|
1513 |
|
|
1514 |
""" |
|
1515 |
for item, hv_name, hv_params in hvp_data: |
|
1516 |
msg = ("hypervisor %s parameters syntax check (source %s): %%s" % |
|
1517 |
(item, hv_name)) |
|
1518 |
try: |
|
1519 |
hv_class = hypervisor.GetHypervisorClass(hv_name) |
|
1520 |
utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES) |
|
1521 |
hv_class.CheckParameterSyntax(hv_params) |
|
1522 |
except errors.GenericError, err: |
|
1523 |
self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err)) |
|
1524 |
|
|
1525 |
def ExpandNames(self): |
|
1526 |
self.needed_locks = dict.fromkeys(locking.LEVELS, locking.ALL_SET) |
|
1527 |
self.share_locks = _ShareAll() |
|
1528 |
|
|
1529 |
def CheckPrereq(self): |
|
1530 |
"""Check prerequisites. |
|
1531 |
|
|
1532 |
""" |
|
1533 |
# Retrieve all information |
|
1534 |
self.all_group_info = self.cfg.GetAllNodeGroupsInfo() |
|
1535 |
self.all_node_info = self.cfg.GetAllNodesInfo() |
|
1536 |
self.all_inst_info = self.cfg.GetAllInstancesInfo() |
|
1537 |
|
|
1538 |
def Exec(self, feedback_fn): |
|
1539 |
"""Verify integrity of cluster, performing various test on nodes. |
|
1540 |
|
|
1541 |
""" |
|
1542 |
self.bad = False |
|
1543 |
self._feedback_fn = feedback_fn |
|
1544 |
|
|
1545 |
feedback_fn("* Verifying cluster config") |
|
1546 |
|
|
1547 |
for msg in self.cfg.VerifyConfig(): |
|
1548 |
self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg) |
|
1549 |
|
|
1550 |
feedback_fn("* Verifying cluster certificate files") |
|
1551 |
|
|
1552 |
for cert_filename in pathutils.ALL_CERT_FILES: |
|
1553 |
(errcode, msg) = _VerifyCertificate(cert_filename) |
|
1554 |
self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode) |
|
1555 |
|
|
1556 |
feedback_fn("* Verifying hypervisor parameters") |
|
1557 |
|
|
1558 |
self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(), |
|
1559 |
self.all_inst_info.values())) |
|
1560 |
|
|
1561 |
feedback_fn("* Verifying all nodes belong to an existing group") |
|
1562 |
|
|
1563 |
# We do this verification here because, should this bogus circumstance |
|
1564 |
# occur, it would never be caught by VerifyGroup, which only acts on |
|
1565 |
# nodes/instances reachable from existing node groups. |
|
1566 |
|
|
1567 |
dangling_nodes = set(node.name for node in self.all_node_info.values() |
|
1568 |
if node.group not in self.all_group_info) |
|
1569 |
|
|
1570 |
dangling_instances = {} |
|
1571 |
no_node_instances = [] |
|
1572 |
|
|
1573 |
for inst in self.all_inst_info.values(): |
|
1574 |
if inst.primary_node in dangling_nodes: |
|
1575 |
dangling_instances.setdefault(inst.primary_node, []).append(inst.name) |
|
1576 |
elif inst.primary_node not in self.all_node_info: |
|
1577 |
no_node_instances.append(inst.name) |
|
1578 |
|
|
1579 |
pretty_dangling = [ |
|
1580 |
"%s (%s)" % |
|
1581 |
(node.name, |
|
1582 |
utils.CommaJoin(dangling_instances.get(node.name, |
|
1583 |
["no instances"]))) |
|
1584 |
for node in dangling_nodes] |
|
1585 |
|
|
1586 |
self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES, |
|
1587 |
None, |
|
1588 |
"the following nodes (and their instances) belong to a non" |
|
1589 |
" existing group: %s", utils.CommaJoin(pretty_dangling)) |
|
1590 |
|
|
1591 |
self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST, |
|
1592 |
None, |
|
1593 |
"the following instances have a non-existing primary-node:" |
|
1594 |
" %s", utils.CommaJoin(no_node_instances)) |
|
1595 |
|
|
1596 |
return not self.bad |
|
1597 |
|
|
1598 |
|
|
1599 |
class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors): |
|
1600 |
"""Verifies the status of a node group. |
|
1601 |
|
|
1602 |
""" |
|
1603 |
HPATH = "cluster-verify" |
|
1604 |
HTYPE = constants.HTYPE_CLUSTER |
|
1605 |
REQ_BGL = False |
|
1606 |
|
|
1607 |
_HOOKS_INDENT_RE = re.compile("^", re.M) |
|
1608 |
|
|
1609 |
class NodeImage(object): |
|
1610 |
"""A class representing the logical and physical status of a node. |
|
1611 |
|
|
1612 |
@type name: string |
|
1613 |
@ivar name: the node name to which this object refers |
|
1614 |
@ivar volumes: a structure as returned from |
|
1615 |
L{ganeti.backend.GetVolumeList} (runtime) |
|
1616 |
@ivar instances: a list of running instances (runtime) |
|
1617 |
@ivar pinst: list of configured primary instances (config) |
|
1618 |
@ivar sinst: list of configured secondary instances (config) |
|
1619 |
@ivar sbp: dictionary of {primary-node: list of instances} for all |
|
1620 |
instances for which this node is secondary (config) |
|
1621 |
@ivar mfree: free memory, as reported by hypervisor (runtime) |
|
1622 |
@ivar dfree: free disk, as reported by the node (runtime) |
|
1623 |
@ivar offline: the offline status (config) |
|
1624 |
@type rpc_fail: boolean |
|
1625 |
@ivar rpc_fail: whether the RPC verify call was successfull (overall, |
|
1626 |
not whether the individual keys were correct) (runtime) |
|
1627 |
@type lvm_fail: boolean |
|
1628 |
@ivar lvm_fail: whether the RPC call didn't return valid LVM data |
|
1629 |
@type hyp_fail: boolean |
|
1630 |
@ivar hyp_fail: whether the RPC call didn't return the instance list |
|
1631 |
@type ghost: boolean |
|
1632 |
@ivar ghost: whether this is a known node or not (config) |
|
1633 |
@type os_fail: boolean |
|
1634 |
@ivar os_fail: whether the RPC call didn't return valid OS data |
|
1635 |
@type oslist: list |
|
1636 |
@ivar oslist: list of OSes as diagnosed by DiagnoseOS |
|
1637 |
@type vm_capable: boolean |
|
1638 |
@ivar vm_capable: whether the node can host instances |
|
1639 |
@type pv_min: float |
|
1640 |
@ivar pv_min: size in MiB of the smallest PVs |
|
1641 |
@type pv_max: float |
|
1642 |
@ivar pv_max: size in MiB of the biggest PVs |
|
1643 |
|
|
1644 |
""" |
|
1645 |
def __init__(self, offline=False, name=None, vm_capable=True): |
|
1646 |
self.name = name |
|
1647 |
self.volumes = {} |
|
1648 |
self.instances = [] |
|
1649 |
self.pinst = [] |
|
1650 |
self.sinst = [] |
|
1651 |
self.sbp = {} |
|
1652 |
self.mfree = 0 |
|
1653 |
self.dfree = 0 |
|
1654 |
self.offline = offline |
|
1655 |
self.vm_capable = vm_capable |
|
1656 |
self.rpc_fail = False |
|
1657 |
self.lvm_fail = False |
|
1658 |
self.hyp_fail = False |
|
1659 |
self.ghost = False |
|
1660 |
self.os_fail = False |
|
1661 |
self.oslist = {} |
|
1662 |
self.pv_min = None |
|
1663 |
self.pv_max = None |
|
1664 |
|
|
1665 |
def ExpandNames(self): |
|
1666 |
# This raises errors.OpPrereqError on its own: |
|
1667 |
self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name) |
|
1668 |
|
|
1669 |
# Get instances in node group; this is unsafe and needs verification later |
|
1670 |
inst_names = \ |
|
1671 |
self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True) |
|
1672 |
|
|
1673 |
self.needed_locks = { |
|
1674 |
locking.LEVEL_INSTANCE: inst_names, |
|
1675 |
locking.LEVEL_NODEGROUP: [self.group_uuid], |
|
1676 |
locking.LEVEL_NODE: [], |
|
1677 |
|
|
1678 |
# This opcode is run by watcher every five minutes and acquires all nodes |
|
1679 |
# for a group. It doesn't run for a long time, so it's better to acquire |
|
1680 |
# the node allocation lock as well. |
|
1681 |
locking.LEVEL_NODE_ALLOC: locking.ALL_SET, |
|
1682 |
} |
|
1683 |
|
|
1684 |
self.share_locks = _ShareAll() |
|
1685 |
|
|
1686 |
def DeclareLocks(self, level): |
|
1687 |
if level == locking.LEVEL_NODE: |
|
1688 |
# Get members of node group; this is unsafe and needs verification later |
|
1689 |
nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members) |
|
1690 |
|
|
1691 |
all_inst_info = self.cfg.GetAllInstancesInfo() |
|
1692 |
|
|
1693 |
# In Exec(), we warn about mirrored instances that have primary and |
|
1694 |
# secondary living in separate node groups. To fully verify that |
|
1695 |
# volumes for these instances are healthy, we will need to do an |
|
1696 |
# extra call to their secondaries. We ensure here those nodes will |
|
1697 |
# be locked. |
|
1698 |
for inst in self.owned_locks(locking.LEVEL_INSTANCE): |
|
1699 |
# Important: access only the instances whose lock is owned |
|
1700 |
if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR: |
|
1701 |
nodes.update(all_inst_info[inst].secondary_nodes) |
|
1702 |
|
|
1703 |
self.needed_locks[locking.LEVEL_NODE] = nodes |
|
1704 |
|
|
1705 |
def CheckPrereq(self): |
|
1706 |
assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP) |
|
1707 |
self.group_info = self.cfg.GetNodeGroup(self.group_uuid) |
|
1708 |
|
|
1709 |
group_nodes = set(self.group_info.members) |
|
1710 |
group_instances = \ |
|
1711 |
self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True) |
|
1712 |
|
|
1713 |
unlocked_nodes = \ |
|
1714 |
group_nodes.difference(self.owned_locks(locking.LEVEL_NODE)) |
|
1715 |
|
|
1716 |
unlocked_instances = \ |
|
1717 |
group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE)) |
|
1718 |
|
|
1719 |
if unlocked_nodes: |
|
1720 |
raise errors.OpPrereqError("Missing lock for nodes: %s" % |
|
1721 |
utils.CommaJoin(unlocked_nodes), |
|
1722 |
errors.ECODE_STATE) |
|
1723 |
|
|
1724 |
if unlocked_instances: |
|
1725 |
raise errors.OpPrereqError("Missing lock for instances: %s" % |
|
1726 |
utils.CommaJoin(unlocked_instances), |
|
1727 |
errors.ECODE_STATE) |
|
1728 |
|
|
1729 |
self.all_node_info = self.cfg.GetAllNodesInfo() |
|
1730 |
self.all_inst_info = self.cfg.GetAllInstancesInfo() |
|
1731 |
|
|
1732 |
self.my_node_names = utils.NiceSort(group_nodes) |
|
1733 |
self.my_inst_names = utils.NiceSort(group_instances) |
|
1734 |
|
|
1735 |
self.my_node_info = dict((name, self.all_node_info[name]) |
|
1736 |
for name in self.my_node_names) |
|
1737 |
|
|
1738 |
self.my_inst_info = dict((name, self.all_inst_info[name]) |
|
1739 |
for name in self.my_inst_names) |
|
1740 |
|
|
1741 |
# We detect here the nodes that will need the extra RPC calls for verifying |
|
1742 |
# split LV volumes; they should be locked. |
|
1743 |
extra_lv_nodes = set() |
|
1744 |
|
|
1745 |
for inst in self.my_inst_info.values(): |
|
1746 |
if inst.disk_template in constants.DTS_INT_MIRROR: |
|
1747 |
for nname in inst.all_nodes: |
|
1748 |
if self.all_node_info[nname].group != self.group_uuid: |
|
1749 |
extra_lv_nodes.add(nname) |
|
1750 |
|
|
1751 |
unlocked_lv_nodes = \ |
|
1752 |
extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE)) |
|
1753 |
|
|
1754 |
if unlocked_lv_nodes: |
|
1755 |
raise errors.OpPrereqError("Missing node locks for LV check: %s" % |
|
1756 |
utils.CommaJoin(unlocked_lv_nodes), |
|
1757 |
errors.ECODE_STATE) |
|
1758 |
self.extra_lv_nodes = list(extra_lv_nodes) |
|
1759 |
|
|
1760 |
def _VerifyNode(self, ninfo, nresult): |
|
1761 |
"""Perform some basic validation on data returned from a node. |
|
1762 |
|
|
1763 |
- check the result data structure is well formed and has all the |
|
1764 |
mandatory fields |
|
1765 |
- check ganeti version |
|
1766 |
|
|
1767 |
@type ninfo: L{objects.Node} |
|
1768 |
@param ninfo: the node to check |
|
1769 |
@param nresult: the results from the node |
|
1770 |
@rtype: boolean |
|
1771 |
@return: whether overall this call was successful (and we can expect |
|
1772 |
reasonable values in the respose) |
|
1773 |
|
|
1774 |
""" |
|
1775 |
node = ninfo.name |
|
1776 |
_ErrorIf = self._ErrorIf # pylint: disable=C0103 |
|
1777 |
|
|
1778 |
# main result, nresult should be a non-empty dict |
|
1779 |
test = not nresult or not isinstance(nresult, dict) |
|
1780 |
_ErrorIf(test, constants.CV_ENODERPC, node, |
|
1781 |
"unable to verify node: no data returned") |
|
1782 |
if test: |
|
1783 |
return False |
|
1784 |
|
|
1785 |
# compares ganeti version |
|
1786 |
local_version = constants.PROTOCOL_VERSION |
|
1787 |
remote_version = nresult.get("version", None) |
|
1788 |
test = not (remote_version and |
|
1789 |
isinstance(remote_version, (list, tuple)) and |
|
1790 |
len(remote_version) == 2) |
|
1791 |
_ErrorIf(test, constants.CV_ENODERPC, node, |
|
1792 |
"connection to node returned invalid data") |
|
1793 |
if test: |
|
1794 |
return False |
|
1795 |
|
|
1796 |
test = local_version != remote_version[0] |
|
1797 |
_ErrorIf(test, constants.CV_ENODEVERSION, node, |
|
1798 |
"incompatible protocol versions: master %s," |
|
1799 |
" node %s", local_version, remote_version[0]) |
|
1800 |
if test: |
|
1801 |
return False |
|
1802 |
|
|
1803 |
# node seems compatible, we can actually try to look into its results |
|
1804 |
|
|
1805 |
# full package version |
|
1806 |
self._ErrorIf(constants.RELEASE_VERSION != remote_version[1], |
|
1807 |
constants.CV_ENODEVERSION, node, |
|
1808 |
"software version mismatch: master %s, node %s", |
|
1809 |
constants.RELEASE_VERSION, remote_version[1], |
|
1810 |
code=self.ETYPE_WARNING) |
|
1811 |
|
|
1812 |
hyp_result = nresult.get(constants.NV_HYPERVISOR, None) |
|
1813 |
if ninfo.vm_capable and isinstance(hyp_result, dict): |
|
1814 |
for hv_name, hv_result in hyp_result.iteritems(): |
|
1815 |
test = hv_result is not None |
|
1816 |
_ErrorIf(test, constants.CV_ENODEHV, node, |
|
1817 |
"hypervisor %s verify failure: '%s'", hv_name, hv_result) |
|
1818 |
|
|
1819 |
hvp_result = nresult.get(constants.NV_HVPARAMS, None) |
|
1820 |
if ninfo.vm_capable and isinstance(hvp_result, list): |
|
1821 |
for item, hv_name, hv_result in hvp_result: |
|
1822 |
_ErrorIf(True, constants.CV_ENODEHV, node, |
|
1823 |
"hypervisor %s parameter verify failure (source %s): %s", |
|
1824 |
hv_name, item, hv_result) |
|
1825 |
|
|
1826 |
test = nresult.get(constants.NV_NODESETUP, |
|
1827 |
["Missing NODESETUP results"]) |
|
1828 |
_ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s", |
|
1829 |
"; ".join(test)) |
|
1830 |
|
|
1831 |
return True |
|
1832 |
|
|
1833 |
def _VerifyNodeTime(self, ninfo, nresult, |
|
1834 |
nvinfo_starttime, nvinfo_endtime): |
|
1835 |
"""Check the node time. |
|
1836 |
|
|
1837 |
@type ninfo: L{objects.Node} |
|
1838 |
@param ninfo: the node to check |
|
1839 |
@param nresult: the remote results for the node |
|
1840 |
@param nvinfo_starttime: the start time of the RPC call |
|
1841 |
@param nvinfo_endtime: the end time of the RPC call |
|
1842 |
|
|
1843 |
""" |
|
1844 |
node = ninfo.name |
|
1845 |
_ErrorIf = self._ErrorIf # pylint: disable=C0103 |
|
1846 |
|
|
1847 |
ntime = nresult.get(constants.NV_TIME, None) |
|
1848 |
try: |
|
1849 |
ntime_merged = utils.MergeTime(ntime) |
|
1850 |
except (ValueError, TypeError): |
|
1851 |
_ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time") |
|
1852 |
return |
|
1853 |
|
|
1854 |
if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW): |
|
1855 |
ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged) |
|
1856 |
elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW): |
|
1857 |
ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime) |
|
1858 |
else: |
|
1859 |
ntime_diff = None |
|
1860 |
|
|
1861 |
_ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node, |
|
1862 |
"Node time diverges by at least %s from master node time", |
|
1863 |
ntime_diff) |
|
1864 |
|
|
1865 |
def _UpdateVerifyNodeLVM(self, ninfo, nresult, vg_name, nimg): |
|
1866 |
"""Check the node LVM results and update info for cross-node checks. |
|
1867 |
|
|
1868 |
@type ninfo: L{objects.Node} |
|
1869 |
@param ninfo: the node to check |
|
1870 |
@param nresult: the remote results for the node |
|
1871 |
@param vg_name: the configured VG name |
|
1872 |
@type nimg: L{NodeImage} |
|
1873 |
@param nimg: node image |
|
1874 |
|
|
1875 |
""" |
|
1876 |
if vg_name is None: |
|
1877 |
return |
|
1878 |
|
|
1879 |
node = ninfo.name |
|
1880 |
_ErrorIf = self._ErrorIf # pylint: disable=C0103 |
|
1881 |
|
|
1882 |
# checks vg existence and size > 20G |
|
1883 |
vglist = nresult.get(constants.NV_VGLIST, None) |
|
1884 |
test = not vglist |
|
1885 |
_ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups") |
|
1886 |
if not test: |
|
1887 |
vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name, |
|
1888 |
constants.MIN_VG_SIZE) |
|
1889 |
_ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus) |
|
1890 |
|
|
1891 |
# Check PVs |
|
1892 |
(errmsgs, pvminmax) = _CheckNodePVs(nresult, self._exclusive_storage) |
|
1893 |
for em in errmsgs: |
|
1894 |
self._Error(constants.CV_ENODELVM, node, em) |
|
1895 |
if pvminmax is not None: |
|
1896 |
(nimg.pv_min, nimg.pv_max) = pvminmax |
|
1897 |
|
|
1898 |
def _VerifyGroupLVM(self, node_image, vg_name): |
|
1899 |
"""Check cross-node consistency in LVM. |
|
1900 |
|
|
1901 |
@type node_image: dict |
|
1902 |
@param node_image: info about nodes, mapping from node to names to |
|
1903 |
L{NodeImage} objects |
|
1904 |
@param vg_name: the configured VG name |
|
1905 |
|
|
1906 |
""" |
|
1907 |
if vg_name is None: |
|
1908 |
return |
|
1909 |
|
|
1910 |
# Only exlcusive storage needs this kind of checks |
|
1911 |
if not self._exclusive_storage: |
|
1912 |
return |
|
1913 |
|
|
1914 |
# exclusive_storage wants all PVs to have the same size (approximately), |
|
1915 |
# if the smallest and the biggest ones are okay, everything is fine. |
|
1916 |
# pv_min is None iff pv_max is None |
|
1917 |
vals = filter((lambda ni: ni.pv_min is not None), node_image.values()) |
|
1918 |
if not vals: |
|
1919 |
return |
|
1920 |
(pvmin, minnode) = min((ni.pv_min, ni.name) for ni in vals) |
|
1921 |
(pvmax, maxnode) = max((ni.pv_max, ni.name) for ni in vals) |
|
1922 |
bad = utils.LvmExclusiveTestBadPvSizes(pvmin, pvmax) |
|
1923 |
self._ErrorIf(bad, constants.CV_EGROUPDIFFERENTPVSIZE, self.group_info.name, |
|
1924 |
"PV sizes differ too much in the group; smallest (%s MB) is" |
|
1925 |
" on %s, biggest (%s MB) is on %s", |
|
1926 |
pvmin, minnode, pvmax, maxnode) |
|
1927 |
|
|
1928 |
def _VerifyNodeBridges(self, ninfo, nresult, bridges): |
|
1929 |
"""Check the node bridges. |
|
1930 |
|
|
1931 |
@type ninfo: L{objects.Node} |
|
1932 |
@param ninfo: the node to check |
|
1933 |
@param nresult: the remote results for the node |
|
1934 |
@param bridges: the expected list of bridges |
|
1935 |
|
|
1936 |
""" |
|
1937 |
if not bridges: |
|
1938 |
return |
|
1939 |
|
|
1940 |
node = ninfo.name |
|
1941 |
_ErrorIf = self._ErrorIf # pylint: disable=C0103 |
|
1942 |
|
|
1943 |
missing = nresult.get(constants.NV_BRIDGES, None) |
|
1944 |
test = not isinstance(missing, list) |
|
1945 |
_ErrorIf(test, constants.CV_ENODENET, node, |
|
1946 |
"did not return valid bridge information") |
|
1947 |
if not test: |
|
1948 |
_ErrorIf(bool(missing), constants.CV_ENODENET, node, |
|
1949 |
"missing bridges: %s" % utils.CommaJoin(sorted(missing))) |
|
1950 |
|
|
1951 |
def _VerifyNodeUserScripts(self, ninfo, nresult): |
|
1952 |
"""Check the results of user scripts presence and executability on the node |
|
1953 |
|
|
1954 |
@type ninfo: L{objects.Node} |
|
1955 |
@param ninfo: the node to check |
|
1956 |
@param nresult: the remote results for the node |
|
1957 |
|
|
1958 |
""" |
|
1959 |
node = ninfo.name |
|
1960 |
|
|
1961 |
test = not constants.NV_USERSCRIPTS in nresult |
|
1962 |
self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node, |
|
1963 |
"did not return user scripts information") |
|
1964 |
|
|
1965 |
broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None) |
|
1966 |
if not test: |
|
1967 |
self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node, |
|
1968 |
"user scripts not present or not executable: %s" % |
|
1969 |
utils.CommaJoin(sorted(broken_scripts))) |
|
1970 |
|
|
1971 |
def _VerifyNodeNetwork(self, ninfo, nresult): |
|
1972 |
"""Check the node network connectivity results. |
|
1973 |
|
|
1974 |
@type ninfo: L{objects.Node} |
|
1975 |
@param ninfo: the node to check |
|
1976 |
@param nresult: the remote results for the node |
|
1977 |
|
|
1978 |
""" |
|
1979 |
node = ninfo.name |
|
1980 |
_ErrorIf = self._ErrorIf # pylint: disable=C0103 |
|
1981 |
|
|
1982 |
test = constants.NV_NODELIST not in nresult |
|
1983 |
_ErrorIf(test, constants.CV_ENODESSH, node, |
|
1984 |
"node hasn't returned node ssh connectivity data") |
|
1985 |
if not test: |
|
1986 |
if nresult[constants.NV_NODELIST]: |
|
1987 |
for a_node, a_msg in nresult[constants.NV_NODELIST].items(): |
|
1988 |
_ErrorIf(True, constants.CV_ENODESSH, node, |
|
1989 |
"ssh communication with node '%s': %s", a_node, a_msg) |
|
1990 |
|
|
1991 |
test = constants.NV_NODENETTEST not in nresult |
|
1992 |
_ErrorIf(test, constants.CV_ENODENET, node, |
|
1993 |
"node hasn't returned node tcp connectivity data") |
|
1994 |
if not test: |
|
1995 |
if nresult[constants.NV_NODENETTEST]: |
|
1996 |
nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys()) |
|
1997 |
for anode in nlist: |
|
1998 |
_ErrorIf(True, constants.CV_ENODENET, node, |
|
1999 |
"tcp communication with node '%s': %s", |
|
2000 |
anode, nresult[constants.NV_NODENETTEST][anode]) |
|
2001 |
|
|
2002 |
test = constants.NV_MASTERIP not in nresult |
|
2003 |
_ErrorIf(test, constants.CV_ENODENET, node, |
|
2004 |
"node hasn't returned node master IP reachability data") |
|
2005 |
if not test: |
|
2006 |
if not nresult[constants.NV_MASTERIP]: |
|
2007 |
if node == self.master_node: |
|
2008 |
msg = "the master node cannot reach the master IP (not configured?)" |
|
2009 |
else: |
|
2010 |
msg = "cannot reach the master IP" |
|
2011 |
_ErrorIf(True, constants.CV_ENODENET, node, msg) |
|
2012 |
|
|
2013 |
def _VerifyInstance(self, instance, inst_config, node_image, |
|
2014 |
diskstatus): |
|
2015 |
"""Verify an instance. |
|
2016 |
|
|
2017 |
This function checks to see if the required block devices are |
|
2018 |
available on the instance's node, and that the nodes are in the correct |
|
2019 |
state. |
|
2020 |
|
|
2021 |
""" |
|
2022 |
_ErrorIf = self._ErrorIf # pylint: disable=C0103 |
|
2023 |
pnode = inst_config.primary_node |
|
2024 |
pnode_img = node_image[pnode] |
|
2025 |
groupinfo = self.cfg.GetAllNodeGroupsInfo() |
|
2026 |
|
|
2027 |
node_vol_should = {} |
|
2028 |
inst_config.MapLVsByNode(node_vol_should) |
|
2029 |
|
|
2030 |
cluster = self.cfg.GetClusterInfo() |
|
2031 |
ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, |
|
2032 |
self.group_info) |
|
2033 |
err = _ComputeIPolicyInstanceViolation(ipolicy, inst_config, self.cfg) |
|
2034 |
_ErrorIf(err, constants.CV_EINSTANCEPOLICY, instance, utils.CommaJoin(err), |
|
2035 |
code=self.ETYPE_WARNING) |
|
2036 |
|
|
2037 |
for node in node_vol_should: |
|
2038 |
n_img = node_image[node] |
|
2039 |
if n_img.offline or n_img.rpc_fail or n_img.lvm_fail: |
|
2040 |
# ignore missing volumes on offline or broken nodes |
|
2041 |
continue |
|
2042 |
for volume in node_vol_should[node]: |
|
2043 |
test = volume not in n_img.volumes |
|
2044 |
_ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance, |
|
2045 |
"volume %s missing on node %s", volume, node) |
|
2046 |
|
|
2047 |
if inst_config.admin_state == constants.ADMINST_UP: |
|
2048 |
test = instance not in pnode_img.instances and not pnode_img.offline |
|
2049 |
_ErrorIf(test, constants.CV_EINSTANCEDOWN, instance, |
|
2050 |
"instance not running on its primary node %s", |
|
2051 |
pnode) |
|
2052 |
_ErrorIf(pnode_img.offline, constants.CV_EINSTANCEBADNODE, instance, |
|
2053 |
"instance is marked as running and lives on offline node %s", |
|
2054 |
pnode) |
|
2055 |
|
|
2056 |
diskdata = [(nname, success, status, idx) |
|
2057 |
for (nname, disks) in diskstatus.items() |
|
2058 |
for idx, (success, status) in enumerate(disks)] |
|
2059 |
|
|
2060 |
for nname, success, bdev_status, idx in diskdata: |
|
2061 |
# the 'ghost node' construction in Exec() ensures that we have a |
|
2062 |
# node here |
|
2063 |
snode = node_image[nname] |
|
2064 |
bad_snode = snode.ghost or snode.offline |
|
2065 |
_ErrorIf(inst_config.admin_state == constants.ADMINST_UP and |
|
2066 |
not success and not bad_snode, |
|
2067 |
constants.CV_EINSTANCEFAULTYDISK, instance, |
|
2068 |
"couldn't retrieve status for disk/%s on %s: %s", |
|
2069 |
idx, nname, bdev_status) |
|
2070 |
_ErrorIf((inst_config.admin_state == constants.ADMINST_UP and |
|
2071 |
success and bdev_status.ldisk_status == constants.LDS_FAULTY), |
|
2072 |
constants.CV_EINSTANCEFAULTYDISK, instance, |
|
2073 |
"disk/%s on %s is faulty", idx, nname) |
|
2074 |
|
|
2075 |
_ErrorIf(pnode_img.rpc_fail and not pnode_img.offline, |
|
2076 |
constants.CV_ENODERPC, pnode, "instance %s, connection to" |
|
2077 |
" primary node failed", instance) |
|
2078 |
|
|
2079 |
_ErrorIf(len(inst_config.secondary_nodes) > 1, |
|
2080 |
constants.CV_EINSTANCELAYOUT, |
|
2081 |
instance, "instance has multiple secondary nodes: %s", |
|
2082 |
utils.CommaJoin(inst_config.secondary_nodes), |
|
2083 |
code=self.ETYPE_WARNING) |
|
2084 |
|
|
2085 |
if inst_config.disk_template not in constants.DTS_EXCL_STORAGE: |
|
2086 |
# Disk template not compatible with exclusive_storage: no instance |
|
2087 |
# node should have the flag set |
|
2088 |
es_flags = rpc.GetExclusiveStorageForNodeNames(self.cfg, |
|
2089 |
inst_config.all_nodes) |
|
2090 |
es_nodes = [n for (n, es) in es_flags.items() |
|
2091 |
if es] |
|
2092 |
_ErrorIf(es_nodes, constants.CV_EINSTANCEUNSUITABLENODE, instance, |
|
2093 |
"instance has template %s, which is not supported on nodes" |
|
2094 |
" that have exclusive storage set: %s", |
|
2095 |
inst_config.disk_template, utils.CommaJoin(es_nodes)) |
|
2096 |
|
|
2097 |
if inst_config.disk_template in constants.DTS_INT_MIRROR: |
|
2098 |
instance_nodes = utils.NiceSort(inst_config.all_nodes) |
|
2099 |
instance_groups = {} |
|
2100 |
|
|
2101 |
for node in instance_nodes: |
|
2102 |
instance_groups.setdefault(self.all_node_info[node].group, |
|
2103 |
[]).append(node) |
|
2104 |
|
|
2105 |
pretty_list = [ |
|
2106 |
"%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name) |
|
2107 |
# Sort so that we always list the primary node first. |
|
2108 |
for group, nodes in sorted(instance_groups.items(), |
|
2109 |
key=lambda (_, nodes): pnode in nodes, |
|
2110 |
reverse=True)] |
|
2111 |
|
|
2112 |
self._ErrorIf(len(instance_groups) > 1, |
|
2113 |
constants.CV_EINSTANCESPLITGROUPS, |
|
2114 |
instance, "instance has primary and secondary nodes in" |
|
2115 |
" different groups: %s", utils.CommaJoin(pretty_list), |
|
2116 |
code=self.ETYPE_WARNING) |
|
2117 |
|
|
2118 |
inst_nodes_offline = [] |
|
2119 |
for snode in inst_config.secondary_nodes: |
|
2120 |
s_img = node_image[snode] |
|
2121 |
_ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC, |
|
2122 |
snode, "instance %s, connection to secondary node failed", |
|
2123 |
instance) |
|
2124 |
|
|
2125 |
if s_img.offline: |
|
2126 |
inst_nodes_offline.append(snode) |
|
2127 |
|
|
2128 |
# warn that the instance lives on offline nodes |
|
2129 |
_ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance, |
|
2130 |
"instance has offline secondary node(s) %s", |
|
2131 |
utils.CommaJoin(inst_nodes_offline)) |
|
2132 |
# ... or ghost/non-vm_capable nodes |
|
2133 |
for node in inst_config.all_nodes: |
|
2134 |
_ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE, |
|
2135 |
instance, "instance lives on ghost node %s", node) |
|
2136 |
_ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE, |
|
2137 |
instance, "instance lives on non-vm_capable node %s", node) |
|
2138 |
|
|
2139 |
def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved): |
|
2140 |
"""Verify if there are any unknown volumes in the cluster. |
|
2141 |
|
|
2142 |
The .os, .swap and backup volumes are ignored. All other volumes are |
|
2143 |
reported as unknown. |
|
2144 |
|
|
2145 |
@type reserved: L{ganeti.utils.FieldSet} |
|
2146 |
@param reserved: a FieldSet of reserved volume names |
|
2147 |
|
|
2148 |
""" |
|
2149 |
for node, n_img in node_image.items(): |
|
2150 |
if (n_img.offline or n_img.rpc_fail or n_img.lvm_fail or |
|
2151 |
self.all_node_info[node].group != self.group_uuid): |
|
2152 |
# skip non-healthy nodes |
|
2153 |
continue |
|
2154 |
for volume in n_img.volumes: |
|
2155 |
test = ((node not in node_vol_should or |
|
2156 |
volume not in node_vol_should[node]) and |
|
2157 |
not reserved.Matches(volume)) |
|
2158 |
self._ErrorIf(test, constants.CV_ENODEORPHANLV, node, |
|
2159 |
"volume %s is unknown", volume) |
|
2160 |
|
|
2161 |
def _VerifyNPlusOneMemory(self, node_image, instance_cfg): |
|
2162 |
"""Verify N+1 Memory Resilience. |
|
2163 |
|
Also available in: Unified diff