Revision 1c3231aa lib/cmdlib/cluster.py
b/lib/cmdlib/cluster.py | ||
---|---|---|
71 | 71 |
""" |
72 | 72 |
master_params = self.cfg.GetMasterNetworkParameters() |
73 | 73 |
ems = self.cfg.GetUseExternalMipScript() |
74 |
result = self.rpc.call_node_activate_master_ip(master_params.name,
|
|
74 |
result = self.rpc.call_node_activate_master_ip(master_params.uuid,
|
|
75 | 75 |
master_params, ems) |
76 | 76 |
result.Raise("Could not activate the master IP") |
77 | 77 |
|
... | ... | |
86 | 86 |
""" |
87 | 87 |
master_params = self.cfg.GetMasterNetworkParameters() |
88 | 88 |
ems = self.cfg.GetUseExternalMipScript() |
89 |
result = self.rpc.call_node_deactivate_master_ip(master_params.name,
|
|
89 |
result = self.rpc.call_node_deactivate_master_ip(master_params.uuid,
|
|
90 | 90 |
master_params, ems) |
91 | 91 |
result.Raise("Could not deactivate the master IP") |
92 | 92 |
|
... | ... | |
163 | 163 |
master_params = self.cfg.GetMasterNetworkParameters() |
164 | 164 |
|
165 | 165 |
# Run post hooks on master node before it's removed |
166 |
RunPostHook(self, master_params.name)
|
|
166 |
RunPostHook(self, self.cfg.GetNodeName(master_params.uuid))
|
|
167 | 167 |
|
168 | 168 |
ems = self.cfg.GetUseExternalMipScript() |
169 |
result = self.rpc.call_node_deactivate_master_ip(master_params.name,
|
|
169 |
result = self.rpc.call_node_deactivate_master_ip(master_params.uuid,
|
|
170 | 170 |
master_params, ems) |
171 | 171 |
result.Warn("Error disabling the master IP address", self.LogWarning) |
172 |
return master_params.name
|
|
172 |
return master_params.uuid
|
|
173 | 173 |
|
174 | 174 |
|
175 | 175 |
class LUClusterPostInit(LogicalUnit): |
... | ... | |
232 | 232 |
|
233 | 233 |
if query.CQ_CONFIG in self.requested_data: |
234 | 234 |
cluster = lu.cfg.GetClusterInfo() |
235 |
nodes = lu.cfg.GetAllNodesInfo() |
|
235 | 236 |
else: |
236 | 237 |
cluster = NotImplemented |
238 |
nodes = NotImplemented |
|
237 | 239 |
|
238 | 240 |
if query.CQ_QUEUE_DRAINED in self.requested_data: |
239 | 241 |
drain_flag = os.path.exists(pathutils.JOB_QUEUE_DRAIN_FILE) |
... | ... | |
241 | 243 |
drain_flag = NotImplemented |
242 | 244 |
|
243 | 245 |
if query.CQ_WATCHER_PAUSE in self.requested_data: |
244 |
master_name = lu.cfg.GetMasterNode()
|
|
246 |
master_node_uuid = lu.cfg.GetMasterNode()
|
|
245 | 247 |
|
246 |
result = lu.rpc.call_get_watcher_pause(master_name)
|
|
248 |
result = lu.rpc.call_get_watcher_pause(master_node_uuid)
|
|
247 | 249 |
result.Raise("Can't retrieve watcher pause from master node '%s'" % |
248 |
master_name)
|
|
250 |
lu.cfg.GetMasterNodeName())
|
|
249 | 251 |
|
250 | 252 |
watcher_pause = result.payload |
251 | 253 |
else: |
252 | 254 |
watcher_pause = NotImplemented |
253 | 255 |
|
254 |
return query.ClusterQueryData(cluster, drain_flag, watcher_pause) |
|
256 |
return query.ClusterQueryData(cluster, nodes, drain_flag, watcher_pause)
|
|
255 | 257 |
|
256 | 258 |
|
257 | 259 |
class LUClusterQuery(NoHooksLU): |
... | ... | |
290 | 292 |
"export_version": constants.EXPORT_VERSION, |
291 | 293 |
"architecture": runtime.GetArchInfo(), |
292 | 294 |
"name": cluster.cluster_name, |
293 |
"master": cluster.master_node,
|
|
295 |
"master": self.cfg.GetMasterNodeName(),
|
|
294 | 296 |
"default_hypervisor": cluster.primary_hypervisor, |
295 | 297 |
"enabled_hypervisors": cluster.enabled_hypervisors, |
296 | 298 |
"hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name]) |
... | ... | |
405 | 407 |
# shutdown the master IP |
406 | 408 |
master_params = self.cfg.GetMasterNetworkParameters() |
407 | 409 |
ems = self.cfg.GetUseExternalMipScript() |
408 |
result = self.rpc.call_node_deactivate_master_ip(master_params.name,
|
|
410 |
result = self.rpc.call_node_deactivate_master_ip(master_params.uuid,
|
|
409 | 411 |
master_params, ems) |
410 | 412 |
result.Raise("Could not disable the master role") |
411 | 413 |
|
... | ... | |
419 | 421 |
ssh.WriteKnownHostsFile(self.cfg, pathutils.SSH_KNOWN_HOSTS_FILE) |
420 | 422 |
node_list = self.cfg.GetOnlineNodeList() |
421 | 423 |
try: |
422 |
node_list.remove(master_params.name)
|
|
424 |
node_list.remove(master_params.uuid)
|
|
423 | 425 |
except ValueError: |
424 | 426 |
pass |
425 | 427 |
UploadHelper(self, node_list, pathutils.SSH_KNOWN_HOSTS_FILE) |
426 | 428 |
finally: |
427 | 429 |
master_params.ip = new_ip |
428 |
result = self.rpc.call_node_activate_master_ip(master_params.name,
|
|
430 |
result = self.rpc.call_node_activate_master_ip(master_params.uuid,
|
|
429 | 431 |
master_params, ems) |
430 | 432 |
result.Warn("Could not re-enable the master role on the master," |
431 | 433 |
" please restart manually", self.LogWarning) |
... | ... | |
523 | 525 |
"Not owning correct locks" |
524 | 526 |
assert not self.owned_locks(locking.LEVEL_NODE) |
525 | 527 |
|
526 |
es_flags = rpc.GetExclusiveStorageForNodeNames(self.cfg,
|
|
527 |
per_node_disks.keys())
|
|
528 |
es_flags = rpc.GetExclusiveStorageForNodes(self.cfg, |
|
529 |
per_node_disks.keys()) |
|
528 | 530 |
|
529 | 531 |
changed = [] |
530 |
for node, dskl in per_node_disks.items(): |
|
532 |
for node_uuid, dskl in per_node_disks.items():
|
|
531 | 533 |
newl = [v[2].Copy() for v in dskl] |
532 | 534 |
for dsk in newl: |
533 |
self.cfg.SetDiskID(dsk, node) |
|
534 |
result = self.rpc.call_blockdev_getdimensions(node, newl) |
|
535 |
self.cfg.SetDiskID(dsk, node_uuid) |
|
536 |
node_name = self.cfg.GetNodeName(node_uuid) |
|
537 |
result = self.rpc.call_blockdev_getdimensions(node_uuid, newl) |
|
535 | 538 |
if result.fail_msg: |
536 | 539 |
self.LogWarning("Failure in blockdev_getdimensions call to node" |
537 |
" %s, ignoring", node) |
|
540 |
" %s, ignoring", node_name)
|
|
538 | 541 |
continue |
539 | 542 |
if len(result.payload) != len(dskl): |
540 | 543 |
logging.warning("Invalid result from node %s: len(dksl)=%d," |
541 |
" result.payload=%s", node, len(dskl), result.payload) |
|
544 |
" result.payload=%s", node_name, len(dskl), |
|
545 |
result.payload) |
|
542 | 546 |
self.LogWarning("Invalid result from node %s, ignoring node results", |
543 |
node) |
|
547 |
node_name)
|
|
544 | 548 |
continue |
545 | 549 |
for ((instance, idx, disk), dimensions) in zip(dskl, result.payload): |
546 | 550 |
if dimensions is None: |
... | ... | |
565 | 569 |
disk.size = size |
566 | 570 |
self.cfg.Update(instance, feedback_fn) |
567 | 571 |
changed.append((instance.name, idx, "size", size)) |
568 |
if es_flags[node]: |
|
572 |
if es_flags[node_uuid]:
|
|
569 | 573 |
if spindles is None: |
570 | 574 |
self.LogWarning("Disk %d of instance %s did not return valid" |
571 | 575 |
" spindles information, ignoring", idx, |
... | ... | |
666 | 670 |
mn = self.cfg.GetMasterNode() |
667 | 671 |
return ([mn], [mn]) |
668 | 672 |
|
669 |
def _CheckVgName(self, node_list, enabled_disk_templates,
|
|
673 |
def _CheckVgName(self, node_uuids, enabled_disk_templates,
|
|
670 | 674 |
new_enabled_disk_templates): |
671 | 675 |
"""Check the consistency of the vg name on all nodes and in case it gets |
672 | 676 |
unset whether there are instances still using it. |
... | ... | |
682 | 686 |
(self.cfg.GetVGName() is not None and |
683 | 687 |
utils.LvmGetsEnabled(enabled_disk_templates, |
684 | 688 |
new_enabled_disk_templates)): |
685 |
self._CheckVgNameOnNodes(node_list)
|
|
689 |
self._CheckVgNameOnNodes(node_uuids)
|
|
686 | 690 |
|
687 |
def _CheckVgNameOnNodes(self, node_list):
|
|
691 |
def _CheckVgNameOnNodes(self, node_uuids):
|
|
688 | 692 |
"""Check the status of the volume group on each node. |
689 | 693 |
|
690 | 694 |
""" |
691 |
vglist = self.rpc.call_vg_list(node_list)
|
|
692 |
for node in node_list:
|
|
693 |
msg = vglist[node].fail_msg |
|
695 |
vglist = self.rpc.call_vg_list(node_uuids)
|
|
696 |
for node_uuid in node_uuids:
|
|
697 |
msg = vglist[node_uuid].fail_msg
|
|
694 | 698 |
if msg: |
695 | 699 |
# ignoring down node |
696 | 700 |
self.LogWarning("Error while gathering data on node %s" |
697 |
" (ignoring node): %s", node, msg) |
|
701 |
" (ignoring node): %s", |
|
702 |
self.cfg.GetNodeName(node_uuid), msg) |
|
698 | 703 |
continue |
699 |
vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload, |
|
704 |
vgstatus = utils.CheckVolumeGroupSize(vglist[node_uuid].payload,
|
|
700 | 705 |
self.op.vg_name, |
701 | 706 |
constants.MIN_VG_SIZE) |
702 | 707 |
if vgstatus: |
703 | 708 |
raise errors.OpPrereqError("Error on node '%s': %s" % |
704 |
(node, vgstatus), errors.ECODE_ENVIRON) |
|
709 |
(self.cfg.GetNodeName(node_uuid), vgstatus), |
|
710 |
errors.ECODE_ENVIRON) |
|
705 | 711 |
|
706 | 712 |
def _GetEnabledDiskTemplates(self, cluster): |
707 | 713 |
"""Determines the enabled disk templates and the subset of disk templates |
... | ... | |
732 | 738 |
" drbd-based instances exist", |
733 | 739 |
errors.ECODE_INVAL) |
734 | 740 |
|
735 |
node_list = self.owned_locks(locking.LEVEL_NODE)
|
|
741 |
node_uuids = self.owned_locks(locking.LEVEL_NODE)
|
|
736 | 742 |
self.cluster = cluster = self.cfg.GetClusterInfo() |
737 | 743 |
|
738 |
vm_capable_nodes = [node.name
|
|
739 |
for node in self.cfg.GetAllNodesInfo().values() |
|
740 |
if node.name in node_list and node.vm_capable]
|
|
744 |
vm_capable_node_uuids = [node.uuid
|
|
745 |
for node in self.cfg.GetAllNodesInfo().values()
|
|
746 |
if node.uuid in node_uuids and node.vm_capable]
|
|
741 | 747 |
|
742 | 748 |
(enabled_disk_templates, new_enabled_disk_templates) = \ |
743 | 749 |
self._GetEnabledDiskTemplates(cluster) |
744 | 750 |
|
745 |
self._CheckVgName(vm_capable_nodes, enabled_disk_templates, |
|
751 |
self._CheckVgName(vm_capable_node_uuids, enabled_disk_templates,
|
|
746 | 752 |
new_enabled_disk_templates) |
747 | 753 |
|
748 | 754 |
if self.op.drbd_helper: |
749 | 755 |
# checks given drbd helper on all nodes |
750 |
helpers = self.rpc.call_drbd_helper(node_list)
|
|
751 |
for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
|
|
756 |
helpers = self.rpc.call_drbd_helper(node_uuids)
|
|
757 |
for (_, ninfo) in self.cfg.GetMultiNodeInfo(node_uuids):
|
|
752 | 758 |
if ninfo.offline: |
753 |
self.LogInfo("Not checking drbd helper on offline node %s", node) |
|
759 |
self.LogInfo("Not checking drbd helper on offline node %s", |
|
760 |
ninfo.name) |
|
754 | 761 |
continue |
755 |
msg = helpers[node].fail_msg
|
|
762 |
msg = helpers[ninfo.uuid].fail_msg
|
|
756 | 763 |
if msg: |
757 | 764 |
raise errors.OpPrereqError("Error checking drbd helper on node" |
758 |
" '%s': %s" % (node, msg),
|
|
765 |
" '%s': %s" % (ninfo.name, msg),
|
|
759 | 766 |
errors.ECODE_ENVIRON) |
760 |
node_helper = helpers[node].payload
|
|
767 |
node_helper = helpers[ninfo.uuid].payload
|
|
761 | 768 |
if node_helper != self.op.drbd_helper: |
762 | 769 |
raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" % |
763 |
(node, node_helper), errors.ECODE_ENVIRON) |
|
770 |
(ninfo.name, node_helper), |
|
771 |
errors.ECODE_ENVIRON) |
|
764 | 772 |
|
765 | 773 |
# validate params changes |
766 | 774 |
if self.op.beparams: |
... | ... | |
800 | 808 |
violations = set() |
801 | 809 |
for group in self.cfg.GetAllNodeGroupsInfo().values(): |
802 | 810 |
instances = frozenset([inst for inst in all_instances |
803 |
if compat.any(node in group.members
|
|
804 |
for node in inst.all_nodes)])
|
|
811 |
if compat.any(nuuid in group.members
|
|
812 |
for nuuid in inst.all_nodes)])
|
|
805 | 813 |
new_ipolicy = objects.FillIPolicy(self.new_ipolicy, group.ipolicy) |
806 | 814 |
ipol = masterd.instance.CalculateGroupIPolicy(cluster, group) |
807 | 815 |
new = ComputeNewInstanceViolations(ipol, |
... | ... | |
920 | 928 |
hv_class = hypervisor.GetHypervisorClass(hv_name) |
921 | 929 |
utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES) |
922 | 930 |
hv_class.CheckParameterSyntax(hv_params) |
923 |
CheckHVParams(self, node_list, hv_name, hv_params)
|
|
931 |
CheckHVParams(self, node_uuids, hv_name, hv_params)
|
|
924 | 932 |
|
925 | 933 |
self._CheckDiskTemplateConsistency() |
926 | 934 |
|
... | ... | |
935 | 943 |
new_osp = objects.FillDict(cluster_defaults, hv_params) |
936 | 944 |
hv_class = hypervisor.GetHypervisorClass(hv_name) |
937 | 945 |
hv_class.CheckParameterSyntax(new_osp) |
938 |
CheckHVParams(self, node_list, hv_name, new_osp)
|
|
946 |
CheckHVParams(self, node_uuids, hv_name, new_osp)
|
|
939 | 947 |
|
940 | 948 |
if self.op.default_iallocator: |
941 | 949 |
alloc_script = utils.FindFile(self.op.default_iallocator, |
... | ... | |
1095 | 1103 |
ems = self.cfg.GetUseExternalMipScript() |
1096 | 1104 |
feedback_fn("Shutting down master ip on the current netdev (%s)" % |
1097 | 1105 |
self.cluster.master_netdev) |
1098 |
result = self.rpc.call_node_deactivate_master_ip(master_params.name,
|
|
1106 |
result = self.rpc.call_node_deactivate_master_ip(master_params.uuid,
|
|
1099 | 1107 |
master_params, ems) |
1100 | 1108 |
result.Raise("Could not disable the master ip") |
1101 | 1109 |
feedback_fn("Changing master_netdev from %s to %s" % |
... | ... | |
1105 | 1113 |
if self.op.master_netmask: |
1106 | 1114 |
master_params = self.cfg.GetMasterNetworkParameters() |
1107 | 1115 |
feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask) |
1108 |
result = self.rpc.call_node_change_master_netmask(master_params.name, |
|
1109 |
master_params.netmask, |
|
1110 |
self.op.master_netmask, |
|
1111 |
master_params.ip, |
|
1112 |
master_params.netdev) |
|
1116 |
result = self.rpc.call_node_change_master_netmask( |
|
1117 |
master_params.uuid, master_params.netmask, |
|
1118 |
self.op.master_netmask, master_params.ip, |
|
1119 |
master_params.netdev) |
|
1113 | 1120 |
result.Warn("Could not change the master IP netmask", feedback_fn) |
1114 | 1121 |
self.cluster.master_netmask = self.op.master_netmask |
1115 | 1122 |
|
... | ... | |
1120 | 1127 |
feedback_fn("Starting the master ip on the new master netdev (%s)" % |
1121 | 1128 |
self.op.master_netdev) |
1122 | 1129 |
ems = self.cfg.GetUseExternalMipScript() |
1123 |
result = self.rpc.call_node_activate_master_ip(master_params.name,
|
|
1130 |
result = self.rpc.call_node_activate_master_ip(master_params.uuid,
|
|
1124 | 1131 |
master_params, ems) |
1125 | 1132 |
result.Warn("Could not re-enable the master ip on the master," |
1126 | 1133 |
" please restart manually", self.LogWarning) |
... | ... | |
1352 | 1359 |
# occur, it would never be caught by VerifyGroup, which only acts on |
1353 | 1360 |
# nodes/instances reachable from existing node groups. |
1354 | 1361 |
|
1355 |
dangling_nodes = set(node.name for node in self.all_node_info.values()
|
|
1362 |
dangling_nodes = set(node for node in self.all_node_info.values() |
|
1356 | 1363 |
if node.group not in self.all_group_info) |
1357 | 1364 |
|
1358 | 1365 |
dangling_instances = {} |
1359 | 1366 |
no_node_instances = [] |
1360 | 1367 |
|
1361 | 1368 |
for inst in self.all_inst_info.values(): |
1362 |
if inst.primary_node in dangling_nodes:
|
|
1369 |
if inst.primary_node in [node.uuid for node in dangling_nodes]:
|
|
1363 | 1370 |
dangling_instances.setdefault(inst.primary_node, []).append(inst.name) |
1364 | 1371 |
elif inst.primary_node not in self.all_node_info: |
1365 | 1372 |
no_node_instances.append(inst.name) |
... | ... | |
1367 | 1374 |
pretty_dangling = [ |
1368 | 1375 |
"%s (%s)" % |
1369 | 1376 |
(node.name, |
1370 |
utils.CommaJoin(dangling_instances.get(node.name,
|
|
1377 |
utils.CommaJoin(dangling_instances.get(node.uuid,
|
|
1371 | 1378 |
["no instances"]))) |
1372 | 1379 |
for node in dangling_nodes] |
1373 | 1380 |
|
... | ... | |
1397 | 1404 |
class NodeImage(object): |
1398 | 1405 |
"""A class representing the logical and physical status of a node. |
1399 | 1406 |
|
1400 |
@type name: string
|
|
1401 |
@ivar name: the node name to which this object refers
|
|
1407 |
@type uuid: string
|
|
1408 |
@ivar uuid: the node UUID to which this object refers
|
|
1402 | 1409 |
@ivar volumes: a structure as returned from |
1403 | 1410 |
L{ganeti.backend.GetVolumeList} (runtime) |
1404 | 1411 |
@ivar instances: a list of running instances (runtime) |
... | ... | |
1430 | 1437 |
@ivar pv_max: size in MiB of the biggest PVs |
1431 | 1438 |
|
1432 | 1439 |
""" |
1433 |
def __init__(self, offline=False, name=None, vm_capable=True):
|
|
1434 |
self.name = name
|
|
1440 |
def __init__(self, offline=False, uuid=None, vm_capable=True):
|
|
1441 |
self.uuid = uuid
|
|
1435 | 1442 |
self.volumes = {} |
1436 | 1443 |
self.instances = [] |
1437 | 1444 |
self.pinst = [] |
... | ... | |
1494 | 1501 |
assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP) |
1495 | 1502 |
self.group_info = self.cfg.GetNodeGroup(self.group_uuid) |
1496 | 1503 |
|
1497 |
group_nodes = set(self.group_info.members) |
|
1504 |
group_node_uuids = set(self.group_info.members)
|
|
1498 | 1505 |
group_instances = \ |
1499 | 1506 |
self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True) |
1500 | 1507 |
|
1501 |
unlocked_nodes = \ |
|
1502 |
group_nodes.difference(self.owned_locks(locking.LEVEL_NODE)) |
|
1508 |
unlocked_node_uuids = \
|
|
1509 |
group_node_uuids.difference(self.owned_locks(locking.LEVEL_NODE))
|
|
1503 | 1510 |
|
1504 | 1511 |
unlocked_instances = \ |
1505 | 1512 |
group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE)) |
1506 | 1513 |
|
1507 |
if unlocked_nodes: |
|
1508 |
raise errors.OpPrereqError("Missing lock for nodes: %s" % |
|
1509 |
utils.CommaJoin(unlocked_nodes), |
|
1510 |
errors.ECODE_STATE) |
|
1514 |
if unlocked_node_uuids: |
|
1515 |
raise errors.OpPrereqError( |
|
1516 |
"Missing lock for nodes: %s" % |
|
1517 |
utils.CommaJoin(self.cfg.GetNodeNames(unlocked_node_uuids)), |
|
1518 |
errors.ECODE_STATE) |
|
1511 | 1519 |
|
1512 | 1520 |
if unlocked_instances: |
1513 | 1521 |
raise errors.OpPrereqError("Missing lock for instances: %s" % |
... | ... | |
1517 | 1525 |
self.all_node_info = self.cfg.GetAllNodesInfo() |
1518 | 1526 |
self.all_inst_info = self.cfg.GetAllInstancesInfo() |
1519 | 1527 |
|
1520 |
self.my_node_names = utils.NiceSort(group_nodes) |
|
1521 |
self.my_inst_names = utils.NiceSort(group_instances) |
|
1522 |
|
|
1523 |
self.my_node_info = dict((name, self.all_node_info[name]) |
|
1524 |
for name in self.my_node_names) |
|
1528 |
self.my_node_uuids = group_node_uuids |
|
1529 |
self.my_node_info = dict((node_uuid, self.all_node_info[node_uuid]) |
|
1530 |
for node_uuid in group_node_uuids) |
|
1525 | 1531 |
|
1532 |
self.my_inst_names = utils.NiceSort(group_instances) |
|
1526 | 1533 |
self.my_inst_info = dict((name, self.all_inst_info[name]) |
1527 | 1534 |
for name in self.my_inst_names) |
1528 | 1535 |
|
... | ... | |
1532 | 1539 |
|
1533 | 1540 |
for inst in self.my_inst_info.values(): |
1534 | 1541 |
if inst.disk_template in constants.DTS_INT_MIRROR: |
1535 |
for nname in inst.all_nodes:
|
|
1536 |
if self.all_node_info[nname].group != self.group_uuid:
|
|
1537 |
extra_lv_nodes.add(nname)
|
|
1542 |
for nuuid in inst.all_nodes:
|
|
1543 |
if self.all_node_info[nuuid].group != self.group_uuid:
|
|
1544 |
extra_lv_nodes.add(nuuid)
|
|
1538 | 1545 |
|
1539 | 1546 |
unlocked_lv_nodes = \ |
1540 | 1547 |
extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE)) |
... | ... | |
1560 | 1567 |
reasonable values in the respose) |
1561 | 1568 |
|
1562 | 1569 |
""" |
1563 |
node = ninfo.name |
|
1570 |
node_name = ninfo.name
|
|
1564 | 1571 |
_ErrorIf = self._ErrorIf # pylint: disable=C0103 |
1565 | 1572 |
|
1566 | 1573 |
# main result, nresult should be a non-empty dict |
1567 | 1574 |
test = not nresult or not isinstance(nresult, dict) |
1568 |
_ErrorIf(test, constants.CV_ENODERPC, node, |
|
1575 |
_ErrorIf(test, constants.CV_ENODERPC, node_name,
|
|
1569 | 1576 |
"unable to verify node: no data returned") |
1570 | 1577 |
if test: |
1571 | 1578 |
return False |
... | ... | |
1576 | 1583 |
test = not (remote_version and |
1577 | 1584 |
isinstance(remote_version, (list, tuple)) and |
1578 | 1585 |
len(remote_version) == 2) |
1579 |
_ErrorIf(test, constants.CV_ENODERPC, node, |
|
1586 |
_ErrorIf(test, constants.CV_ENODERPC, node_name,
|
|
1580 | 1587 |
"connection to node returned invalid data") |
1581 | 1588 |
if test: |
1582 | 1589 |
return False |
1583 | 1590 |
|
1584 | 1591 |
test = local_version != remote_version[0] |
1585 |
_ErrorIf(test, constants.CV_ENODEVERSION, node, |
|
1592 |
_ErrorIf(test, constants.CV_ENODEVERSION, node_name,
|
|
1586 | 1593 |
"incompatible protocol versions: master %s," |
1587 | 1594 |
" node %s", local_version, remote_version[0]) |
1588 | 1595 |
if test: |
... | ... | |
1592 | 1599 |
|
1593 | 1600 |
# full package version |
1594 | 1601 |
self._ErrorIf(constants.RELEASE_VERSION != remote_version[1], |
1595 |
constants.CV_ENODEVERSION, node, |
|
1602 |
constants.CV_ENODEVERSION, node_name,
|
|
1596 | 1603 |
"software version mismatch: master %s, node %s", |
1597 | 1604 |
constants.RELEASE_VERSION, remote_version[1], |
1598 | 1605 |
code=self.ETYPE_WARNING) |
... | ... | |
1601 | 1608 |
if ninfo.vm_capable and isinstance(hyp_result, dict): |
1602 | 1609 |
for hv_name, hv_result in hyp_result.iteritems(): |
1603 | 1610 |
test = hv_result is not None |
1604 |
_ErrorIf(test, constants.CV_ENODEHV, node, |
|
1611 |
_ErrorIf(test, constants.CV_ENODEHV, node_name,
|
|
1605 | 1612 |
"hypervisor %s verify failure: '%s'", hv_name, hv_result) |
1606 | 1613 |
|
1607 | 1614 |
hvp_result = nresult.get(constants.NV_HVPARAMS, None) |
1608 | 1615 |
if ninfo.vm_capable and isinstance(hvp_result, list): |
1609 | 1616 |
for item, hv_name, hv_result in hvp_result: |
1610 |
_ErrorIf(True, constants.CV_ENODEHV, node, |
|
1617 |
_ErrorIf(True, constants.CV_ENODEHV, node_name,
|
|
1611 | 1618 |
"hypervisor %s parameter verify failure (source %s): %s", |
1612 | 1619 |
hv_name, item, hv_result) |
1613 | 1620 |
|
1614 | 1621 |
test = nresult.get(constants.NV_NODESETUP, |
1615 | 1622 |
["Missing NODESETUP results"]) |
1616 |
_ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s", |
|
1623 |
_ErrorIf(test, constants.CV_ENODESETUP, node_name, "node setup error: %s",
|
|
1617 | 1624 |
"; ".join(test)) |
1618 | 1625 |
|
1619 | 1626 |
return True |
... | ... | |
1629 | 1636 |
@param nvinfo_endtime: the end time of the RPC call |
1630 | 1637 |
|
1631 | 1638 |
""" |
1632 |
node = ninfo.name |
|
1639 |
node_name = ninfo.name
|
|
1633 | 1640 |
_ErrorIf = self._ErrorIf # pylint: disable=C0103 |
1634 | 1641 |
|
1635 | 1642 |
ntime = nresult.get(constants.NV_TIME, None) |
1636 | 1643 |
try: |
1637 | 1644 |
ntime_merged = utils.MergeTime(ntime) |
1638 | 1645 |
except (ValueError, TypeError): |
1639 |
_ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time") |
|
1646 |
_ErrorIf(True, constants.CV_ENODETIME, node_name, |
|
1647 |
"Node returned invalid time") |
|
1640 | 1648 |
return |
1641 | 1649 |
|
1642 | 1650 |
if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW): |
... | ... | |
1646 | 1654 |
else: |
1647 | 1655 |
ntime_diff = None |
1648 | 1656 |
|
1649 |
_ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node, |
|
1657 |
_ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node_name,
|
|
1650 | 1658 |
"Node time diverges by at least %s from master node time", |
1651 | 1659 |
ntime_diff) |
1652 | 1660 |
|
... | ... | |
1664 | 1672 |
if vg_name is None: |
1665 | 1673 |
return |
1666 | 1674 |
|
1667 |
node = ninfo.name |
|
1675 |
node_name = ninfo.name
|
|
1668 | 1676 |
_ErrorIf = self._ErrorIf # pylint: disable=C0103 |
1669 | 1677 |
|
1670 | 1678 |
# checks vg existence and size > 20G |
1671 | 1679 |
vglist = nresult.get(constants.NV_VGLIST, None) |
1672 | 1680 |
test = not vglist |
1673 |
_ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups") |
|
1681 |
_ErrorIf(test, constants.CV_ENODELVM, node_name, |
|
1682 |
"unable to check volume groups") |
|
1674 | 1683 |
if not test: |
1675 | 1684 |
vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name, |
1676 | 1685 |
constants.MIN_VG_SIZE) |
1677 |
_ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus) |
|
1686 |
_ErrorIf(vgstatus, constants.CV_ENODELVM, node_name, vgstatus)
|
|
1678 | 1687 |
|
1679 | 1688 |
# Check PVs |
1680 | 1689 |
(errmsgs, pvminmax) = CheckNodePVs(nresult, self._exclusive_storage) |
1681 | 1690 |
for em in errmsgs: |
1682 |
self._Error(constants.CV_ENODELVM, node, em) |
|
1691 |
self._Error(constants.CV_ENODELVM, node_name, em)
|
|
1683 | 1692 |
if pvminmax is not None: |
1684 | 1693 |
(nimg.pv_min, nimg.pv_max) = pvminmax |
1685 | 1694 |
|
... | ... | |
1692 | 1701 |
|
1693 | 1702 |
""" |
1694 | 1703 |
node_versions = {} |
1695 |
for node, ndata in node_verify_infos.items(): |
|
1704 |
for node_uuid, ndata in node_verify_infos.items():
|
|
1696 | 1705 |
nresult = ndata.payload |
1697 | 1706 |
version = nresult.get(constants.NV_DRBDVERSION, "Missing DRBD version") |
1698 |
node_versions[node] = version |
|
1707 |
node_versions[node_uuid] = version
|
|
1699 | 1708 |
|
1700 | 1709 |
if len(set(node_versions.values())) > 1: |
1701 |
for node, version in sorted(node_versions.items()): |
|
1710 |
for node_uuid, version in sorted(node_versions.items()):
|
|
1702 | 1711 |
msg = "DRBD version mismatch: %s" % version |
1703 |
self._Error(constants.CV_ENODEDRBDHELPER, node, msg, |
|
1712 |
self._Error(constants.CV_ENODEDRBDHELPER, node_uuid, msg,
|
|
1704 | 1713 |
code=self.ETYPE_WARNING) |
1705 | 1714 |
|
1706 | 1715 |
def _VerifyGroupLVM(self, node_image, vg_name): |
... | ... | |
1745 | 1754 |
if not bridges: |
1746 | 1755 |
return |
1747 | 1756 |
|
1748 |
node = ninfo.name |
|
1757 |
node_name = ninfo.name
|
|
1749 | 1758 |
_ErrorIf = self._ErrorIf # pylint: disable=C0103 |
1750 | 1759 |
|
1751 | 1760 |
missing = nresult.get(constants.NV_BRIDGES, None) |
1752 | 1761 |
test = not isinstance(missing, list) |
1753 |
_ErrorIf(test, constants.CV_ENODENET, node, |
|
1762 |
_ErrorIf(test, constants.CV_ENODENET, node_name,
|
|
1754 | 1763 |
"did not return valid bridge information") |
1755 | 1764 |
if not test: |
1756 |
_ErrorIf(bool(missing), constants.CV_ENODENET, node, |
|
1765 |
_ErrorIf(bool(missing), constants.CV_ENODENET, node_name,
|
|
1757 | 1766 |
"missing bridges: %s" % utils.CommaJoin(sorted(missing))) |
1758 | 1767 |
|
1759 | 1768 |
def _VerifyNodeUserScripts(self, ninfo, nresult): |
... | ... | |
1764 | 1773 |
@param nresult: the remote results for the node |
1765 | 1774 |
|
1766 | 1775 |
""" |
1767 |
node = ninfo.name |
|
1776 |
node_name = ninfo.name
|
|
1768 | 1777 |
|
1769 | 1778 |
test = not constants.NV_USERSCRIPTS in nresult |
1770 |
self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node, |
|
1779 |
self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node_name,
|
|
1771 | 1780 |
"did not return user scripts information") |
1772 | 1781 |
|
1773 | 1782 |
broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None) |
1774 | 1783 |
if not test: |
1775 |
self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node, |
|
1784 |
self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node_name,
|
|
1776 | 1785 |
"user scripts not present or not executable: %s" % |
1777 | 1786 |
utils.CommaJoin(sorted(broken_scripts))) |
1778 | 1787 |
|
... | ... | |
1784 | 1793 |
@param nresult: the remote results for the node |
1785 | 1794 |
|
1786 | 1795 |
""" |
1787 |
node = ninfo.name |
|
1796 |
node_name = ninfo.name
|
|
1788 | 1797 |
_ErrorIf = self._ErrorIf # pylint: disable=C0103 |
1789 | 1798 |
|
1790 | 1799 |
test = constants.NV_NODELIST not in nresult |
1791 |
_ErrorIf(test, constants.CV_ENODESSH, node, |
|
1800 |
_ErrorIf(test, constants.CV_ENODESSH, node_name,
|
|
1792 | 1801 |
"node hasn't returned node ssh connectivity data") |
1793 | 1802 |
if not test: |
1794 | 1803 |
if nresult[constants.NV_NODELIST]: |
1795 | 1804 |
for a_node, a_msg in nresult[constants.NV_NODELIST].items(): |
1796 |
_ErrorIf(True, constants.CV_ENODESSH, node, |
|
1805 |
_ErrorIf(True, constants.CV_ENODESSH, node_name,
|
|
1797 | 1806 |
"ssh communication with node '%s': %s", a_node, a_msg) |
1798 | 1807 |
|
1799 | 1808 |
test = constants.NV_NODENETTEST not in nresult |
1800 |
_ErrorIf(test, constants.CV_ENODENET, node, |
|
1809 |
_ErrorIf(test, constants.CV_ENODENET, node_name,
|
|
1801 | 1810 |
"node hasn't returned node tcp connectivity data") |
1802 | 1811 |
if not test: |
1803 | 1812 |
if nresult[constants.NV_NODENETTEST]: |
1804 | 1813 |
nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys()) |
1805 | 1814 |
for anode in nlist: |
1806 |
_ErrorIf(True, constants.CV_ENODENET, node, |
|
1815 |
_ErrorIf(True, constants.CV_ENODENET, node_name,
|
|
1807 | 1816 |
"tcp communication with node '%s': %s", |
1808 | 1817 |
anode, nresult[constants.NV_NODENETTEST][anode]) |
1809 | 1818 |
|
1810 | 1819 |
test = constants.NV_MASTERIP not in nresult |
1811 |
_ErrorIf(test, constants.CV_ENODENET, node, |
|
1820 |
_ErrorIf(test, constants.CV_ENODENET, node_name,
|
|
1812 | 1821 |
"node hasn't returned node master IP reachability data") |
1813 | 1822 |
if not test: |
1814 | 1823 |
if not nresult[constants.NV_MASTERIP]: |
1815 |
if node == self.master_node:
|
|
1824 |
if ninfo.uuid == self.master_node:
|
|
1816 | 1825 |
msg = "the master node cannot reach the master IP (not configured?)" |
1817 | 1826 |
else: |
1818 | 1827 |
msg = "cannot reach the master IP" |
1819 |
_ErrorIf(True, constants.CV_ENODENET, node, msg) |
|
1828 |
_ErrorIf(True, constants.CV_ENODENET, node_name, msg)
|
|
1820 | 1829 |
|
1821 | 1830 |
def _VerifyInstance(self, instance, inst_config, node_image, |
1822 | 1831 |
diskstatus): |
... | ... | |
1850 | 1859 |
for volume in node_vol_should[node]: |
1851 | 1860 |
test = volume not in n_img.volumes |
1852 | 1861 |
_ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance, |
1853 |
"volume %s missing on node %s", volume, node) |
|
1862 |
"volume %s missing on node %s", volume, |
|
1863 |
self.cfg.GetNodeName(node)) |
|
1854 | 1864 |
|
1855 | 1865 |
if inst_config.admin_state == constants.ADMINST_UP: |
1856 | 1866 |
test = instance not in pnode_img.instances and not pnode_img.offline |
1857 | 1867 |
_ErrorIf(test, constants.CV_EINSTANCEDOWN, instance, |
1858 | 1868 |
"instance not running on its primary node %s", |
1859 |
pnode)
|
|
1869 |
self.cfg.GetNodeName(pnode))
|
|
1860 | 1870 |
_ErrorIf(pnode_img.offline, constants.CV_EINSTANCEBADNODE, instance, |
1861 | 1871 |
"instance is marked as running and lives on offline node %s", |
1862 |
pnode)
|
|
1872 |
self.cfg.GetNodeName(pnode))
|
|
1863 | 1873 |
|
1864 | 1874 |
diskdata = [(nname, success, status, idx) |
1865 | 1875 |
for (nname, disks) in diskstatus.items() |
... | ... | |
1874 | 1884 |
not success and not bad_snode, |
1875 | 1885 |
constants.CV_EINSTANCEFAULTYDISK, instance, |
1876 | 1886 |
"couldn't retrieve status for disk/%s on %s: %s", |
1877 |
idx, nname, bdev_status)
|
|
1887 |
idx, self.cfg.GetNodeName(nname), bdev_status)
|
|
1878 | 1888 |
_ErrorIf((inst_config.disks_active and |
1879 | 1889 |
success and bdev_status.ldisk_status == constants.LDS_FAULTY), |
1880 | 1890 |
constants.CV_EINSTANCEFAULTYDISK, instance, |
1881 |
"disk/%s on %s is faulty", idx, nname)
|
|
1891 |
"disk/%s on %s is faulty", idx, self.cfg.GetNodeName(nname))
|
|
1882 | 1892 |
|
1883 | 1893 |
_ErrorIf(pnode_img.rpc_fail and not pnode_img.offline, |
1884 | 1894 |
constants.CV_ENODERPC, pnode, "instance %s, connection to" |
... | ... | |
1890 | 1900 |
utils.CommaJoin(inst_config.secondary_nodes), |
1891 | 1901 |
code=self.ETYPE_WARNING) |
1892 | 1902 |
|
1893 |
es_flags = rpc.GetExclusiveStorageForNodeNames(self.cfg,
|
|
1894 |
inst_config.all_nodes)
|
|
1903 |
es_flags = rpc.GetExclusiveStorageForNodes(self.cfg, |
|
1904 |
inst_config.all_nodes) |
|
1895 | 1905 |
if any(es_flags.values()): |
1896 | 1906 |
if inst_config.disk_template not in constants.DTS_EXCL_STORAGE: |
1897 | 1907 |
# Disk template not compatible with exclusive_storage: no instance |
... | ... | |
1902 | 1912 |
self._Error(constants.CV_EINSTANCEUNSUITABLENODE, instance, |
1903 | 1913 |
"instance has template %s, which is not supported on nodes" |
1904 | 1914 |
" that have exclusive storage set: %s", |
1905 |
inst_config.disk_template, utils.CommaJoin(es_nodes)) |
|
1915 |
inst_config.disk_template, |
|
1916 |
utils.CommaJoin(self.cfg.GetNodeNames(es_nodes))) |
|
1906 | 1917 |
for (idx, disk) in enumerate(inst_config.disks): |
1907 | 1918 |
_ErrorIf(disk.spindles is None, |
1908 | 1919 |
constants.CV_EINSTANCEMISSINGCFGPARAMETER, instance, |
... | ... | |
1920 | 1931 |
[]).append(node) |
1921 | 1932 |
|
1922 | 1933 |
pretty_list = [ |
1923 |
"%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name) |
|
1934 |
"%s (group %s)" % (utils.CommaJoin(self.cfg.GetNodeNames(nodes)), |
|
1935 |
groupinfo[group].name) |
|
1924 | 1936 |
# Sort so that we always list the primary node first. |
1925 | 1937 |
for group, nodes in sorted(instance_groups.items(), |
1926 | 1938 |
key=lambda (_, nodes): pnode in nodes, |
... | ... | |
1945 | 1957 |
# warn that the instance lives on offline nodes |
1946 | 1958 |
_ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance, |
1947 | 1959 |
"instance has offline secondary node(s) %s", |
1948 |
utils.CommaJoin(inst_nodes_offline))
|
|
1960 |
utils.CommaJoin(self.cfg.GetNodeNames(inst_nodes_offline)))
|
|
1949 | 1961 |
# ... or ghost/non-vm_capable nodes |
1950 | 1962 |
for node in inst_config.all_nodes: |
1951 | 1963 |
_ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE, |
1952 |
instance, "instance lives on ghost node %s", node) |
|
1964 |
instance, "instance lives on ghost node %s", |
|
1965 |
self.cfg.GetNodeName(node)) |
|
1953 | 1966 |
_ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE, |
1954 |
instance, "instance lives on non-vm_capable node %s", node) |
|
1967 |
instance, "instance lives on non-vm_capable node %s", |
|
1968 |
self.cfg.GetNodeName(node)) |
|
1955 | 1969 |
|
1956 | 1970 |
def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved): |
1957 | 1971 |
"""Verify if there are any unknown volumes in the cluster. |
... | ... | |
1963 | 1977 |
@param reserved: a FieldSet of reserved volume names |
1964 | 1978 |
|
1965 | 1979 |
""" |
1966 |
for node, n_img in node_image.items(): |
|
1980 |
for node_uuid, n_img in node_image.items():
|
|
1967 | 1981 |
if (n_img.offline or n_img.rpc_fail or n_img.lvm_fail or |
1968 |
self.all_node_info[node].group != self.group_uuid): |
|
1982 |
self.all_node_info[node_uuid].group != self.group_uuid):
|
|
1969 | 1983 |
# skip non-healthy nodes |
1970 | 1984 |
continue |
1971 | 1985 |
for volume in n_img.volumes: |
1972 |
test = ((node not in node_vol_should or |
|
1973 |
volume not in node_vol_should[node]) and |
|
1986 |
test = ((node_uuid not in node_vol_should or
|
|
1987 |
volume not in node_vol_should[node_uuid]) and
|
|
1974 | 1988 |
not reserved.Matches(volume)) |
1975 |
self._ErrorIf(test, constants.CV_ENODEORPHANLV, node, |
|
1989 |
self._ErrorIf(test, constants.CV_ENODEORPHANLV, |
|
1990 |
self.cfg.GetNodeName(node_uuid), |
|
1976 | 1991 |
"volume %s is unknown", volume) |
1977 | 1992 |
|
1978 | 1993 |
def _VerifyNPlusOneMemory(self, node_image, instance_cfg): |
... | ... | |
1983 | 1998 |
|
1984 | 1999 |
""" |
1985 | 2000 |
cluster_info = self.cfg.GetClusterInfo() |
1986 |
for node, n_img in node_image.items(): |
|
2001 |
for node_uuid, n_img in node_image.items():
|
|
1987 | 2002 |
# This code checks that every node which is now listed as |
1988 | 2003 |
# secondary has enough memory to host all instances it is |
1989 | 2004 |
# supposed to should a single other node in the cluster fail. |
... | ... | |
1992 | 2007 |
# WARNING: we currently take into account down instances as well |
1993 | 2008 |
# as up ones, considering that even if they're down someone |
1994 | 2009 |
# might want to start them even in the event of a node failure. |
1995 |
if n_img.offline or self.all_node_info[node].group != self.group_uuid: |
|
2010 |
if n_img.offline or \ |
|
2011 |
self.all_node_info[node_uuid].group != self.group_uuid: |
|
1996 | 2012 |
# we're skipping nodes marked offline and nodes in other groups from |
1997 | 2013 |
# the N+1 warning, since most likely we don't have good memory |
1998 | 2014 |
# infromation from them; we already list instances living on such |
... | ... | |
2006 | 2022 |
if bep[constants.BE_AUTO_BALANCE]: |
2007 | 2023 |
needed_mem += bep[constants.BE_MINMEM] |
2008 | 2024 |
test = n_img.mfree < needed_mem |
2009 |
self._ErrorIf(test, constants.CV_ENODEN1, node, |
|
2025 |
self._ErrorIf(test, constants.CV_ENODEN1, |
|
2026 |
self.cfg.GetNodeName(node_uuid), |
|
2010 | 2027 |
"not enough memory to accomodate instance failovers" |
2011 | 2028 |
" should node %s fail (%dMiB needed, %dMiB available)", |
2012 |
prinode, needed_mem, n_img.mfree)
|
|
2029 |
self.cfg.GetNodeName(prinode), needed_mem, n_img.mfree)
|
|
2013 | 2030 |
|
2014 |
@classmethod |
|
2015 |
def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo, |
|
2031 |
def _VerifyFiles(self, nodes, master_node_uuid, all_nvinfo, |
|
2016 | 2032 |
(files_all, files_opt, files_mc, files_vm)): |
2017 | 2033 |
"""Verifies file checksums collected from all nodes. |
2018 | 2034 |
|
2019 |
@param errorif: Callback for reporting errors |
|
2020 |
@param nodeinfo: List of L{objects.Node} objects |
|
2021 |
@param master_node: Name of master node |
|
2035 |
@param nodes: List of L{objects.Node} objects |
|
2036 |
@param master_node_uuid: UUID of master node |
|
2022 | 2037 |
@param all_nvinfo: RPC results |
2023 | 2038 |
|
2024 | 2039 |
""" |
... | ... | |
2026 | 2041 |
files2nodefn = [ |
2027 | 2042 |
(files_all, None), |
2028 | 2043 |
(files_mc, lambda node: (node.master_candidate or |
2029 |
node.name == master_node)),
|
|
2044 |
node.uuid == master_node_uuid)),
|
|
2030 | 2045 |
(files_vm, lambda node: node.vm_capable), |
2031 | 2046 |
] |
2032 | 2047 |
|
... | ... | |
2034 | 2049 |
nodefiles = {} |
2035 | 2050 |
for (files, fn) in files2nodefn: |
2036 | 2051 |
if fn is None: |
2037 |
filenodes = nodeinfo
|
|
2052 |
filenodes = nodes
|
|
2038 | 2053 |
else: |
2039 |
filenodes = filter(fn, nodeinfo)
|
|
2054 |
filenodes = filter(fn, nodes)
|
|
2040 | 2055 |
nodefiles.update((filename, |
2041 |
frozenset(map(operator.attrgetter("name"), filenodes)))
|
|
2056 |
frozenset(map(operator.attrgetter("uuid"), filenodes)))
|
|
2042 | 2057 |
for filename in files) |
2043 | 2058 |
|
2044 | 2059 |
assert set(nodefiles) == (files_all | files_mc | files_vm) |
... | ... | |
2046 | 2061 |
fileinfo = dict((filename, {}) for filename in nodefiles) |
2047 | 2062 |
ignore_nodes = set() |
2048 | 2063 |
|
2049 |
for node in nodeinfo:
|
|
2064 |
for node in nodes:
|
|
2050 | 2065 |
if node.offline: |
2051 |
ignore_nodes.add(node.name)
|
|
2066 |
ignore_nodes.add(node.uuid)
|
|
2052 | 2067 |
continue |
2053 | 2068 |
|
2054 |
nresult = all_nvinfo[node.name]
|
|
2069 |
nresult = all_nvinfo[node.uuid]
|
|
2055 | 2070 |
|
2056 | 2071 |
if nresult.fail_msg or not nresult.payload: |
2057 | 2072 |
node_files = None |
... | ... | |
2062 | 2077 |
del fingerprints |
2063 | 2078 |
|
2064 | 2079 |
test = not (node_files and isinstance(node_files, dict)) |
2065 |
errorif(test, constants.CV_ENODEFILECHECK, node.name,
|
|
2066 |
"Node did not return file checksum data") |
|
2080 |
self._ErrorIf(test, constants.CV_ENODEFILECHECK, node.name,
|
|
2081 |
"Node did not return file checksum data")
|
|
2067 | 2082 |
if test: |
2068 |
ignore_nodes.add(node.name)
|
|
2083 |
ignore_nodes.add(node.uuid)
|
|
2069 | 2084 |
continue |
2070 | 2085 |
|
2071 | 2086 |
# Build per-checksum mapping from filename to nodes having it |
2072 | 2087 |
for (filename, checksum) in node_files.items(): |
2073 | 2088 |
assert filename in nodefiles |
2074 |
fileinfo[filename].setdefault(checksum, set()).add(node.name)
|
|
2089 |
fileinfo[filename].setdefault(checksum, set()).add(node.uuid)
|
|
2075 | 2090 |
|
2076 | 2091 |
for (filename, checksums) in fileinfo.items(): |
2077 | 2092 |
assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum" |
2078 | 2093 |
|
2079 | 2094 |
# Nodes having the file |
2080 |
with_file = frozenset(node_name
|
|
2081 |
for nodes in fileinfo[filename].values() |
|
2082 |
for node_name in nodes) - ignore_nodes
|
|
2095 |
with_file = frozenset(node_uuid
|
|
2096 |
for node_uuids in fileinfo[filename].values()
|
|
2097 |
for node_uuid in node_uuids) - ignore_nodes
|
|
2083 | 2098 |
|
2084 | 2099 |
expected_nodes = nodefiles[filename] - ignore_nodes |
2085 | 2100 |
|
... | ... | |
2088 | 2103 |
|
2089 | 2104 |
if filename in files_opt: |
2090 | 2105 |
# All or no nodes |
2091 |
errorif(missing_file and missing_file != expected_nodes, |
|
2092 |
constants.CV_ECLUSTERFILECHECK, None, |
|
2093 |
"File %s is optional, but it must exist on all or no" |
|
2094 |
" nodes (not found on %s)", |
|
2095 |
filename, utils.CommaJoin(utils.NiceSort(missing_file))) |
|
2106 |
self._ErrorIf(missing_file and missing_file != expected_nodes, |
|
2107 |
constants.CV_ECLUSTERFILECHECK, None, |
|
2108 |
"File %s is optional, but it must exist on all or no" |
|
2109 |
" nodes (not found on %s)", |
|
2110 |
filename, |
|
2111 |
utils.CommaJoin( |
|
2112 |
utils.NiceSort( |
|
2113 |
map(self.cfg.GetNodeName, missing_file)))) |
|
2096 | 2114 |
else: |
2097 |
errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None, |
|
2098 |
"File %s is missing from node(s) %s", filename, |
|
2099 |
utils.CommaJoin(utils.NiceSort(missing_file))) |
|
2115 |
self._ErrorIf(missing_file, constants.CV_ECLUSTERFILECHECK, None, |
|
2116 |
"File %s is missing from node(s) %s", filename, |
|
2117 |
utils.CommaJoin( |
|
2118 |
utils.NiceSort( |
|
2119 |
map(self.cfg.GetNodeName, missing_file)))) |
|
2100 | 2120 |
|
2101 | 2121 |
# Warn if a node has a file it shouldn't |
2102 | 2122 |
unexpected = with_file - expected_nodes |
2103 |
errorif(unexpected, |
|
2104 |
constants.CV_ECLUSTERFILECHECK, None, |
|
2105 |
"File %s should not exist on node(s) %s", |
|
2106 |
filename, utils.CommaJoin(utils.NiceSort(unexpected))) |
|
2123 |
self._ErrorIf(unexpected, |
|
2124 |
constants.CV_ECLUSTERFILECHECK, None, |
|
2125 |
"File %s should not exist on node(s) %s", |
|
2126 |
filename, utils.CommaJoin( |
|
2127 |
utils.NiceSort(map(self.cfg.GetNodeName, unexpected)))) |
|
2107 | 2128 |
|
2108 | 2129 |
# See if there are multiple versions of the file |
2109 | 2130 |
test = len(checksums) > 1 |
2110 | 2131 |
if test: |
2111 | 2132 |
variants = ["variant %s on %s" % |
2112 |
(idx + 1, utils.CommaJoin(utils.NiceSort(nodes))) |
|
2113 |
for (idx, (checksum, nodes)) in |
|
2133 |
(idx + 1, |
|
2134 |
utils.CommaJoin(utils.NiceSort( |
|
2135 |
map(self.cfg.GetNodeName, node_uuids)))) |
|
2136 |
for (idx, (checksum, node_uuids)) in |
|
2114 | 2137 |
enumerate(sorted(checksums.items()))] |
2115 | 2138 |
else: |
2116 | 2139 |
variants = [] |
2117 | 2140 |
|
2118 |
errorif(test, constants.CV_ECLUSTERFILECHECK, None,
|
|
2119 |
"File %s found with %s different checksums (%s)", |
|
2120 |
filename, len(checksums), "; ".join(variants)) |
|
2141 |
self._ErrorIf(test, constants.CV_ECLUSTERFILECHECK, None,
|
|
2142 |
"File %s found with %s different checksums (%s)",
|
|
2143 |
filename, len(checksums), "; ".join(variants))
|
|
2121 | 2144 |
|
2122 | 2145 |
def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper, |
2123 | 2146 |
drbd_map): |
... | ... | |
2132 | 2155 |
L{ganeti.config.ConfigWriter.ComputeDRBDMap} |
2133 | 2156 |
|
2134 | 2157 |
""" |
2135 |
node = ninfo.name |
|
2158 |
node_name = ninfo.name
|
|
2136 | 2159 |
_ErrorIf = self._ErrorIf # pylint: disable=C0103 |
2137 | 2160 |
|
2138 | 2161 |
if drbd_helper: |
2139 | 2162 |
helper_result = nresult.get(constants.NV_DRBDHELPER, None) |
2140 | 2163 |
test = (helper_result is None) |
2141 |
_ErrorIf(test, constants.CV_ENODEDRBDHELPER, node, |
|
2164 |
_ErrorIf(test, constants.CV_ENODEDRBDHELPER, node_name,
|
|
2142 | 2165 |
"no drbd usermode helper returned") |
2143 | 2166 |
if helper_result: |
2144 | 2167 |
status, payload = helper_result |
2145 | 2168 |
test = not status |
2146 |
_ErrorIf(test, constants.CV_ENODEDRBDHELPER, node, |
|
2169 |
_ErrorIf(test, constants.CV_ENODEDRBDHELPER, node_name,
|
|
2147 | 2170 |
"drbd usermode helper check unsuccessful: %s", payload) |
2148 | 2171 |
test = status and (payload != drbd_helper) |
2149 |
_ErrorIf(test, constants.CV_ENODEDRBDHELPER, node, |
|
2172 |
_ErrorIf(test, constants.CV_ENODEDRBDHELPER, node_name,
|
|
2150 | 2173 |
"wrong drbd usermode helper: %s", payload) |
2151 | 2174 |
|
2152 | 2175 |
# compute the DRBD minors |
2153 | 2176 |
node_drbd = {} |
2154 |
for minor, instance in drbd_map[node].items():
|
|
2177 |
for minor, instance in drbd_map[ninfo.uuid].items():
|
|
2155 | 2178 |
test = instance not in instanceinfo |
2156 | 2179 |
_ErrorIf(test, constants.CV_ECLUSTERCFG, None, |
2157 | 2180 |
"ghost instance '%s' in temporary DRBD map", instance) |
... | ... | |
2167 | 2190 |
# and now check them |
2168 | 2191 |
used_minors = nresult.get(constants.NV_DRBDLIST, []) |
2169 | 2192 |
test = not isinstance(used_minors, (tuple, list)) |
2170 |
_ErrorIf(test, constants.CV_ENODEDRBD, node, |
|
2193 |
_ErrorIf(test, constants.CV_ENODEDRBD, node_name,
|
|
2171 | 2194 |
"cannot parse drbd status file: %s", str(used_minors)) |
2172 | 2195 |
if test: |
2173 | 2196 |
# we cannot check drbd status |
... | ... | |
2175 | 2198 |
|
2176 | 2199 |
for minor, (iname, must_exist) in node_drbd.items(): |
2177 | 2200 |
test = minor not in used_minors and must_exist |
2178 |
_ErrorIf(test, constants.CV_ENODEDRBD, node, |
|
2201 |
_ErrorIf(test, constants.CV_ENODEDRBD, node_name,
|
|
2179 | 2202 |
"drbd minor %d of instance %s is not active", minor, iname) |
2180 | 2203 |
for minor in used_minors: |
2181 | 2204 |
test = minor not in node_drbd |
2182 |
_ErrorIf(test, constants.CV_ENODEDRBD, node, |
|
2205 |
_ErrorIf(test, constants.CV_ENODEDRBD, node_name,
|
|
2183 | 2206 |
"unallocated drbd minor %d is in use", minor) |
2184 | 2207 |
|
2185 | 2208 |
def _UpdateNodeOS(self, ninfo, nresult, nimg): |
... | ... | |
2191 | 2214 |
@param nimg: the node image object |
2192 | 2215 |
|
2193 | 2216 |
""" |
2194 |
node = ninfo.name |
|
2217 |
node_name = ninfo.name
|
|
2195 | 2218 |
_ErrorIf = self._ErrorIf # pylint: disable=C0103 |
2196 | 2219 |
|
2197 | 2220 |
remote_os = nresult.get(constants.NV_OSLIST, None) |
... | ... | |
2199 | 2222 |
not compat.all(isinstance(v, list) and len(v) == 7 |
2200 | 2223 |
for v in remote_os)) |
2201 | 2224 |
|
2202 |
_ErrorIf(test, constants.CV_ENODEOS, node, |
|
2225 |
_ErrorIf(test, constants.CV_ENODEOS, node_name,
|
|
2203 | 2226 |
"node hasn't returned valid OS data") |
2204 | 2227 |
|
2205 | 2228 |
nimg.os_fail = test |
... | ... | |
2232 | 2255 |
@param base: the 'template' node we match against (e.g. from the master) |
2233 | 2256 |
|
2234 | 2257 |
""" |
2235 |
node = ninfo.name |
|
2258 |
node_name = ninfo.name
|
|
2236 | 2259 |
_ErrorIf = self._ErrorIf # pylint: disable=C0103 |
2237 | 2260 |
|
2238 | 2261 |
assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?" |
... | ... | |
2241 | 2264 |
for os_name, os_data in nimg.oslist.items(): |
2242 | 2265 |
assert os_data, "Empty OS status for OS %s?!" % os_name |
2243 | 2266 |
f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0] |
2244 |
_ErrorIf(not f_status, constants.CV_ENODEOS, node, |
|
2267 |
_ErrorIf(not f_status, constants.CV_ENODEOS, node_name,
|
|
2245 | 2268 |
"Invalid OS %s (located at %s): %s", os_name, f_path, f_diag) |
2246 |
_ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node, |
|
2269 |
_ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node_name,
|
|
2247 | 2270 |
"OS '%s' has multiple entries (first one shadows the rest): %s", |
2248 | 2271 |
os_name, utils.CommaJoin([v[0] for v in os_data])) |
2249 | 2272 |
# comparisons with the 'base' image |
2250 | 2273 |
test = os_name not in base.oslist |
2251 |
_ErrorIf(test, constants.CV_ENODEOS, node, |
|
2274 |
_ErrorIf(test, constants.CV_ENODEOS, node_name,
|
|
2252 | 2275 |
"Extra OS %s not present on reference node (%s)", |
2253 |
os_name, base.name)
|
|
2276 |
os_name, self.cfg.GetNodeName(base.uuid))
|
|
2254 | 2277 |
if test: |
2255 | 2278 |
continue |
2256 | 2279 |
assert base.oslist[os_name], "Base node has empty OS status?" |
... | ... | |
2262 | 2285 |
("variants list", f_var, b_var), |
2263 | 2286 |
("parameters", beautify_params(f_param), |
2264 | 2287 |
beautify_params(b_param))]: |
2265 |
_ErrorIf(a != b, constants.CV_ENODEOS, node, |
|
2288 |
_ErrorIf(a != b, constants.CV_ENODEOS, node_name,
|
|
2266 | 2289 |
"OS %s for %s differs from reference node %s: [%s] vs. [%s]", |
2267 |
kind, os_name, base.name,
|
|
2290 |
kind, os_name, self.cfg.GetNodeName(base.uuid),
|
|
2268 | 2291 |
utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b))) |
2269 | 2292 |
|
2270 | 2293 |
# check any missing OSes |
2271 | 2294 |
missing = set(base.oslist.keys()).difference(nimg.oslist.keys()) |
2272 |
_ErrorIf(missing, constants.CV_ENODEOS, node, |
|
2295 |
_ErrorIf(missing, constants.CV_ENODEOS, node_name,
|
|
2273 | 2296 |
"OSes present on reference node %s but missing on this node: %s", |
2274 |
base.name, utils.CommaJoin(missing))
|
|
2297 |
self.cfg.GetNodeName(base.uuid), utils.CommaJoin(missing))
|
|
2275 | 2298 |
|
2276 | 2299 |
def _VerifyFileStoragePaths(self, ninfo, nresult, is_master): |
2277 | 2300 |
"""Verifies paths in L{pathutils.FILE_STORAGE_PATHS_FILE}. |
... | ... | |
2283 | 2306 |
@param is_master: Whether node is the master node |
2284 | 2307 |
|
2285 | 2308 |
""" |
2286 |
node = ninfo.name |
|
2309 |
node_name = ninfo.name
|
|
2287 | 2310 |
|
2288 | 2311 |
if (is_master and |
2289 | 2312 |
(constants.ENABLE_FILE_STORAGE or |
... | ... | |
2292 | 2315 |
fspaths = nresult[constants.NV_FILE_STORAGE_PATHS] |
2293 | 2316 |
except KeyError: |
2294 | 2317 |
# This should never happen |
2295 |
self._ErrorIf(True, constants.CV_ENODEFILESTORAGEPATHS, node, |
|
2318 |
self._ErrorIf(True, constants.CV_ENODEFILESTORAGEPATHS, node_name,
|
|
2296 | 2319 |
"Node did not return forbidden file storage paths") |
2297 | 2320 |
else: |
2298 |
self._ErrorIf(fspaths, constants.CV_ENODEFILESTORAGEPATHS, node, |
|
2321 |
self._ErrorIf(fspaths, constants.CV_ENODEFILESTORAGEPATHS, node_name,
|
|
2299 | 2322 |
"Found forbidden file storage paths: %s", |
2300 | 2323 |
utils.CommaJoin(fspaths)) |
2301 | 2324 |
else: |
2302 | 2325 |
self._ErrorIf(constants.NV_FILE_STORAGE_PATHS in nresult, |
2303 |
constants.CV_ENODEFILESTORAGEPATHS, node, |
|
2326 |
constants.CV_ENODEFILESTORAGEPATHS, node_name,
|
|
2304 | 2327 |
"Node should not have returned forbidden file storage" |
2305 | 2328 |
" paths") |
2306 | 2329 |
|
... | ... | |
2312 | 2335 |
@param nresult: the remote results for the node |
2313 | 2336 |
|
2314 | 2337 |
""" |
2315 |
node = ninfo.name |
|
2338 |
node_name = ninfo.name
|
|
2316 | 2339 |
# We just have to verify the paths on master and/or master candidates |
2317 | 2340 |
# as the oob helper is invoked on the master |
2318 | 2341 |
if ((ninfo.master_candidate or ninfo.master_capable) and |
2319 | 2342 |
constants.NV_OOB_PATHS in nresult): |
2320 | 2343 |
for path_result in nresult[constants.NV_OOB_PATHS]: |
2321 |
self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result) |
|
2344 |
self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, |
|
2345 |
node_name, path_result) |
|
2322 | 2346 |
|
2323 | 2347 |
def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name): |
2324 | 2348 |
"""Verifies and updates the node volume data. |
... | ... | |
2333 | 2357 |
@param vg_name: the configured VG name |
2334 | 2358 |
|
2335 | 2359 |
""" |
2336 |
node = ninfo.name |
|
2360 |
node_name = ninfo.name
|
|
2337 | 2361 |
_ErrorIf = self._ErrorIf # pylint: disable=C0103 |
2338 | 2362 |
|
2339 | 2363 |
nimg.lvm_fail = True |
... | ... | |
2341 | 2365 |
if vg_name is None: |
2342 | 2366 |
pass |
2343 | 2367 |
elif isinstance(lvdata, basestring): |
2344 |
_ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
|
|
2345 |
utils.SafeEncode(lvdata)) |
|
2368 |
_ErrorIf(True, constants.CV_ENODELVM, node_name,
|
|
2369 |
"LVM problem on node: %s", utils.SafeEncode(lvdata))
|
|
2346 | 2370 |
elif not isinstance(lvdata, dict): |
2347 |
_ErrorIf(True, constants.CV_ENODELVM, node, |
|
2371 |
_ErrorIf(True, constants.CV_ENODELVM, node_name,
|
|
2348 | 2372 |
"rpc call to node failed (lvlist)") |
2349 | 2373 |
else: |
2350 | 2374 |
nimg.volumes = lvdata |
... | ... | |
2383 | 2407 |
@param vg_name: the configured VG name |
2384 | 2408 |
|
2385 | 2409 |
""" |
2386 |
node = ninfo.name |
|
2410 |
node_name = ninfo.name
|
|
2387 | 2411 |
_ErrorIf = self._ErrorIf # pylint: disable=C0103 |
2388 | 2412 |
|
2389 | 2413 |
# try to read free memory (from the hypervisor) |
2390 | 2414 |
hv_info = nresult.get(constants.NV_HVINFO, None) |
2391 | 2415 |
test = not isinstance(hv_info, dict) or "memory_free" not in hv_info |
2392 |
_ErrorIf(test, constants.CV_ENODEHV, node, |
|
2416 |
_ErrorIf(test, constants.CV_ENODEHV, node_name,
|
|
2393 | 2417 |
"rpc call to node failed (hvinfo)") |
2394 | 2418 |
if not test: |
2395 | 2419 |
try: |
2396 | 2420 |
nimg.mfree = int(hv_info["memory_free"]) |
2397 | 2421 |
except (ValueError, TypeError): |
2398 |
_ErrorIf(True, constants.CV_ENODERPC, node, |
|
2422 |
_ErrorIf(True, constants.CV_ENODERPC, node_name,
|
|
2399 | 2423 |
"node returned invalid nodeinfo, check hypervisor") |
2400 | 2424 |
|
2401 | 2425 |
# FIXME: devise a free space model for file based instances as well |
2402 | 2426 |
if vg_name is not None: |
2403 | 2427 |
test = (constants.NV_VGLIST not in nresult or |
2404 | 2428 |
vg_name not in nresult[constants.NV_VGLIST]) |
2405 |
_ErrorIf(test, constants.CV_ENODELVM, node, |
|
2429 |
_ErrorIf(test, constants.CV_ENODELVM, node_name,
|
|
2406 | 2430 |
"node didn't return data for the volume group '%s'" |
2407 | 2431 |
" - it is either missing or broken", vg_name) |
2408 | 2432 |
if not test: |
2409 | 2433 |
try: |
2410 | 2434 |
nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name]) |
2411 | 2435 |
except (ValueError, TypeError): |
2412 |
_ErrorIf(True, constants.CV_ENODERPC, node, |
|
2436 |
_ErrorIf(True, constants.CV_ENODERPC, node_name,
|
|
2413 | 2437 |
"node returned invalid LVM info, check LVM status") |
2414 | 2438 |
|
2415 |
def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
|
|
2439 |
def _CollectDiskInfo(self, node_uuids, node_image, instanceinfo):
|
|
2416 | 2440 |
"""Gets per-disk status information for all instances. |
2417 | 2441 |
|
2418 |
@type nodelist: list of strings
|
|
2419 |
@param nodelist: Node names
|
|
2442 |
@type node_uuids: list of strings
|
|
2443 |
@param node_uuids: Node UUIDs
|
|
2420 | 2444 |
@type node_image: dict of (name, L{objects.Node}) |
2421 | 2445 |
@param node_image: Node objects |
2422 | 2446 |
@type instanceinfo: dict of (name, L{objects.Instance}) |
... | ... | |
2427 | 2451 |
list of tuples (success, payload) |
2428 | 2452 |
|
2429 | 2453 |
""" |
2430 |
_ErrorIf = self._ErrorIf # pylint: disable=C0103 |
|
2431 |
|
|
2432 | 2454 |
node_disks = {} |
2433 | 2455 |
node_disks_devonly = {} |
2434 | 2456 |
diskless_instances = set() |
2435 | 2457 |
diskless = constants.DT_DISKLESS |
2436 | 2458 |
|
2437 |
for nname in nodelist:
|
|
2438 |
node_instances = list(itertools.chain(node_image[nname].pinst,
|
|
2439 |
node_image[nname].sinst))
|
|
2459 |
for nuuid in node_uuids:
|
|
2460 |
node_instances = list(itertools.chain(node_image[nuuid].pinst,
|
|
2461 |
node_image[nuuid].sinst))
|
|
2440 | 2462 |
diskless_instances.update(inst for inst in node_instances |
2441 | 2463 |
if instanceinfo[inst].disk_template == diskless) |
2442 | 2464 |
disks = [(inst, disk) |
... | ... | |
2447 | 2469 |
# No need to collect data |
2448 | 2470 |
continue |
2449 | 2471 |
|
2450 |
node_disks[nname] = disks
|
|
2472 |
node_disks[nuuid] = disks
|
|
2451 | 2473 |
|
2452 | 2474 |
# _AnnotateDiskParams makes already copies of the disks |
2453 | 2475 |
devonly = [] |
2454 | 2476 |
for (inst, dev) in disks: |
2455 | 2477 |
(anno_disk,) = AnnotateDiskParams(instanceinfo[inst], [dev], self.cfg) |
2456 |
self.cfg.SetDiskID(anno_disk, nname)
|
|
2478 |
self.cfg.SetDiskID(anno_disk, nuuid)
|
|
2457 | 2479 |
devonly.append(anno_disk) |
2458 | 2480 |
|
2459 |
node_disks_devonly[nname] = devonly
|
|
2481 |
node_disks_devonly[nuuid] = devonly
|
|
2460 | 2482 |
|
2461 | 2483 |
assert len(node_disks) == len(node_disks_devonly) |
2462 | 2484 |
|
... | ... | |
2468 | 2490 |
|
2469 | 2491 |
instdisk = {} |
2470 | 2492 |
|
2471 |
for (nname, nres) in result.items(): |
|
2472 |
disks = node_disks[nname] |
|
2493 |
for (nuuid, nres) in result.items(): |
|
2494 |
node = self.cfg.GetNodeInfo(nuuid) |
|
2495 |
disks = node_disks[node.uuid] |
|
2473 | 2496 |
|
2474 | 2497 |
if nres.offline: |
2475 | 2498 |
# No data from this node |
2476 | 2499 |
data = len(disks) * [(False, "node offline")] |
2477 | 2500 |
else: |
2478 | 2501 |
msg = nres.fail_msg |
2479 |
_ErrorIf(msg, constants.CV_ENODERPC, nname,
|
|
2480 |
"while getting disk information: %s", msg) |
|
2502 |
self._ErrorIf(msg, constants.CV_ENODERPC, node.name,
|
|
2503 |
"while getting disk information: %s", msg)
|
|
2481 | 2504 |
if msg: |
2482 | 2505 |
# No data from this node |
2483 | 2506 |
data = len(disks) * [(False, msg)] |
... | ... | |
2488 | 2511 |
data.append(i) |
2489 | 2512 |
else: |
2490 | 2513 |
logging.warning("Invalid result from node %s, entry %d: %s", |
2491 |
nname, idx, i) |
|
2514 |
node.name, idx, i)
|
|
2492 | 2515 |
data.append((False, "Invalid result from the remote node")) |
2493 | 2516 |
|
2494 | 2517 |
for ((inst, _), status) in zip(disks, data): |
2495 |
instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
|
|
2518 |
instdisk.setdefault(inst, {}).setdefault(node.uuid, []).append(status)
|
|
2496 | 2519 |
|
2497 | 2520 |
# Add empty entries for diskless instances. |
2498 | 2521 |
for inst in diskless_instances: |
... | ... | |
2500 | 2523 |
instdisk[inst] = {} |
2501 | 2524 |
|
2502 | 2525 |
assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and |
2503 |
len(nnames) <= len(instanceinfo[inst].all_nodes) and
|
|
2526 |
len(nuuids) <= len(instanceinfo[inst].all_nodes) and
|
|
2504 | 2527 |
compat.all(isinstance(s, (tuple, list)) and |
2505 | 2528 |
len(s) == 2 for s in statuses) |
2506 |
for inst, nnames in instdisk.items()
|
|
2507 |
for nname, statuses in nnames.items())
|
|
2529 |
for inst, nuuids in instdisk.items()
|
|
2530 |
for nuuid, statuses in nuuids.items())
|
|
2508 | 2531 |
if __debug__: |
2509 | 2532 |
instdisk_keys = set(instdisk) |
2510 | 2533 |
instanceinfo_keys = set(instanceinfo) |
... | ... | |
2568 | 2591 |
"""Build hooks nodes. |
2569 | 2592 |
|
2570 | 2593 |
""" |
2571 |
return ([], self.my_node_names)
|
|
2594 |
return ([], list(self.my_node_info.keys()))
|
|
2572 | 2595 |
|
2573 | 2596 |
def Exec(self, feedback_fn): |
2574 | 2597 |
"""Verify integrity of the node group, performing various test on nodes. |
... | ... | |
2577 | 2600 |
# This method has too many local variables. pylint: disable=R0914 |
2578 | 2601 |
feedback_fn("* Verifying group '%s'" % self.group_info.name) |
2579 | 2602 |
|
2580 |
if not self.my_node_names:
|
|
2603 |
if not self.my_node_uuids:
|
|
2581 | 2604 |
# empty node group |
2582 | 2605 |
feedback_fn("* Empty node group, skipping verification") |
2583 | 2606 |
return True |
... | ... | |
2591 | 2614 |
drbd_helper = self.cfg.GetDRBDHelper() |
2592 | 2615 |
cluster = self.cfg.GetClusterInfo() |
2593 | 2616 |
hypervisors = cluster.enabled_hypervisors |
2594 |
node_data_list = [self.my_node_info[name] for name in self.my_node_names]
|
|
2617 |
node_data_list = self.my_node_info.values()
|
|
2595 | 2618 |
|
2596 | 2619 |
i_non_redundant = [] # Non redundant instances |
2597 | 2620 |
i_non_a_balanced = [] # Non auto-balanced instances |
... | ... | |
2606 | 2629 |
filemap = ComputeAncillaryFiles(cluster, False) |
2607 | 2630 |
|
2608 | 2631 |
# do local checksums |
2609 |
master_node = self.master_node = self.cfg.GetMasterNode() |
|
2632 |
master_node_uuid = self.master_node = self.cfg.GetMasterNode()
|
|
2610 | 2633 |
master_ip = self.cfg.GetMasterIP() |
2611 | 2634 |
|
2612 |
feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
|
|
2635 |
feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_uuids))
|
|
2613 | 2636 |
|
2614 | 2637 |
user_scripts = [] |
2615 | 2638 |
if self.cfg.GetUseExternalMipScript(): |
... | ... | |
2635 | 2658 |
constants.NV_HVINFO: self.cfg.GetHypervisorType(), |
2636 | 2659 |
constants.NV_NODESETUP: None, |
2637 | 2660 |
constants.NV_TIME: None, |
2638 |
constants.NV_MASTERIP: (master_node, master_ip),
|
|
2661 |
constants.NV_MASTERIP: (self.cfg.GetMasterNodeName(), master_ip),
|
|
2639 | 2662 |
constants.NV_OSLIST: None, |
2640 | 2663 |
constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(), |
2641 | 2664 |
constants.NV_USERSCRIPTS: user_scripts, |
... | ... | |
2653 | 2676 |
|
2654 | 2677 |
if constants.ENABLE_FILE_STORAGE or constants.ENABLE_SHARED_FILE_STORAGE: |
2655 | 2678 |
# Load file storage paths only from master node |
2656 |
node_verify_param[constants.NV_FILE_STORAGE_PATHS] = master_node |
|
2679 |
node_verify_param[constants.NV_FILE_STORAGE_PATHS] = \ |
|
2680 |
self.cfg.GetMasterNodeName() |
|
2657 | 2681 |
|
2658 | 2682 |
# bridge checks |
2659 | 2683 |
# FIXME: this needs to be changed per node-group, not cluster-wide |
... | ... | |
2671 | 2695 |
node_verify_param[constants.NV_BRIDGES] = list(bridges) |
2672 | 2696 |
|
2673 | 2697 |
# Build our expected cluster state |
2674 |
node_image = dict((node.name, self.NodeImage(offline=node.offline,
|
|
2675 |
name=node.name,
|
|
2698 |
node_image = dict((node.uuid, self.NodeImage(offline=node.offline,
|
|
2699 |
uuid=node.uuid,
|
|
2676 | 2700 |
vm_capable=node.vm_capable)) |
2677 | 2701 |
for node in node_data_list) |
2678 | 2702 |
|
... | ... | |
2691 | 2715 |
if inst_config.admin_state == constants.ADMINST_OFFLINE: |
2692 | 2716 |
i_offline += 1 |
2693 | 2717 |
|
2694 |
for nname in inst_config.all_nodes:
|
|
2695 |
if nname not in node_image:
|
|
2696 |
gnode = self.NodeImage(name=nname)
|
|
2697 |
gnode.ghost = (nname not in self.all_node_info)
|
|
2698 |
node_image[nname] = gnode
|
|
2718 |
for nuuid in inst_config.all_nodes:
|
|
2719 |
if nuuid not in node_image:
|
|
2720 |
gnode = self.NodeImage(uuid=nuuid)
|
|
2721 |
gnode.ghost = (nuuid not in self.all_node_info)
|
|
2722 |
node_image[nuuid] = gnode
|
|
2699 | 2723 |
|
2700 | 2724 |
inst_config.MapLVsByNode(node_vol_should) |
2701 | 2725 |
|
... | ... | |
2709 | 2733 |
nimg.sbp[pnode] = [] |
2710 | 2734 |
nimg.sbp[pnode].append(instance) |
2711 | 2735 |
|
2712 |
es_flags = rpc.GetExclusiveStorageForNodeNames(self.cfg, self.my_node_names) |
|
2736 |
es_flags = rpc.GetExclusiveStorageForNodes(self.cfg, |
|
2737 |
self.my_node_info.keys()) |
|
2713 | 2738 |
# The value of exclusive_storage should be the same across the group, so if |
2714 | 2739 |
# it's True for at least a node, we act as if it were set for all the nodes |
2715 | 2740 |
self._exclusive_storage = compat.any(es_flags.values()) |
... | ... | |
2724 | 2749 |
# time before and after executing the request, we can at least have a time |
2725 | 2750 |
# window. |
2726 | 2751 |
nvinfo_starttime = time.time() |
2727 |
all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
|
|
2752 |
all_nvinfo = self.rpc.call_node_verify(self.my_node_uuids,
|
|
2728 | 2753 |
node_verify_param, |
2729 | 2754 |
self.cfg.GetClusterName(), |
2730 | 2755 |
self.cfg.GetClusterInfo().hvparams) |
... | ... | |
2742 | 2767 |
all_drbd_map = self.cfg.ComputeDRBDMap() |
2743 | 2768 |
|
2744 | 2769 |
feedback_fn("* Gathering disk information (%s nodes)" % |
2745 |
len(self.my_node_names))
|
|
2746 |
instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
|
|
2770 |
len(self.my_node_uuids))
|
|
2771 |
instdisk = self._CollectDiskInfo(self.my_node_info.keys(), node_image,
|
|
2747 | 2772 |
self.my_inst_info) |
2748 | 2773 |
|
2749 | 2774 |
feedback_fn("* Verifying configuration file consistency") |
2750 | 2775 |
|
2751 | 2776 |
# If not all nodes are being checked, we need to make sure the master node |
2752 | 2777 |
# and a non-checked vm_capable node are in the list. |
2753 |
absent_nodes = set(self.all_node_info).difference(self.my_node_info) |
|
2754 |
if absent_nodes: |
|
2778 |
absent_node_uuids = set(self.all_node_info).difference(self.my_node_info)
|
|
2779 |
if absent_node_uuids:
|
|
2755 | 2780 |
vf_nvinfo = all_nvinfo.copy() |
2756 | 2781 |
vf_node_info = list(self.my_node_info.values()) |
2757 |
additional_nodes = [] |
|
2758 |
if master_node not in self.my_node_info: |
|
2759 |
additional_nodes.append(master_node)
|
|
2760 |
vf_node_info.append(self.all_node_info[master_node]) |
|
2782 |
additional_node_uuids = []
|
|
2783 |
if master_node_uuid not in self.my_node_info:
|
|
2784 |
additional_node_uuids.append(master_node_uuid)
|
|
2785 |
vf_node_info.append(self.all_node_info[master_node_uuid])
|
|
2761 | 2786 |
# Add the first vm_capable node we find which is not included, |
2762 | 2787 |
# excluding the master node (which we already have) |
2763 |
for node in absent_nodes:
|
|
2764 |
nodeinfo = self.all_node_info[node] |
|
2788 |
for node_uuid in absent_node_uuids:
|
|
2789 |
nodeinfo = self.all_node_info[node_uuid]
|
|
2765 | 2790 |
if (nodeinfo.vm_capable and not nodeinfo.offline and |
2766 |
node != master_node):
|
|
2767 |
additional_nodes.append(node)
|
|
2768 |
vf_node_info.append(self.all_node_info[node]) |
|
2791 |
node_uuid != master_node_uuid):
|
|
2792 |
additional_node_uuids.append(node_uuid)
|
|
2793 |
vf_node_info.append(self.all_node_info[node_uuid])
|
|
2769 | 2794 |
break |
2770 | 2795 |
key = constants.NV_FILELIST |
2771 | 2796 |
vf_nvinfo.update(self.rpc.call_node_verify( |
2772 |
additional_nodes, {key: node_verify_param[key]}, |
|
2797 |
additional_node_uuids, {key: node_verify_param[key]},
|
|
2773 | 2798 |
self.cfg.GetClusterName(), self.cfg.GetClusterInfo().hvparams)) |
2774 | 2799 |
else: |
2775 | 2800 |
vf_nvinfo = all_nvinfo |
2776 | 2801 |
vf_node_info = self.my_node_info.values() |
2777 | 2802 |
|
2778 |
self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
|
|
2803 |
self._VerifyFiles(vf_node_info, master_node_uuid, vf_nvinfo, filemap)
|
|
2779 | 2804 |
|
2780 | 2805 |
feedback_fn("* Verifying node status") |
2781 | 2806 |
|
2782 | 2807 |
refos_img = None |
2783 | 2808 |
|
2784 | 2809 |
for node_i in node_data_list: |
2785 |
node = node_i.name |
|
2786 |
nimg = node_image[node] |
|
2810 |
nimg = node_image[node_i.uuid] |
|
2787 | 2811 |
|
2788 | 2812 |
if node_i.offline: |
2789 | 2813 |
if verbose: |
2790 |
feedback_fn("* Skipping offline node %s" % (node,)) |
|
2814 |
feedback_fn("* Skipping offline node %s" % (node_i.name,))
|
|
2791 | 2815 |
n_offline += 1 |
2792 | 2816 |
continue |
2793 | 2817 |
|
2794 |
if node == master_node:
|
|
2818 |
if node_i.uuid == master_node_uuid:
|
|
2795 | 2819 |
ntype = "master" |
2796 | 2820 |
elif node_i.master_candidate: |
2797 | 2821 |
ntype = "master candidate" |
... | ... | |
2801 | 2825 |
else: |
2802 | 2826 |
ntype = "regular" |
2803 | 2827 |
if verbose: |
2804 |
feedback_fn("* Verifying node %s (%s)" % (node, ntype)) |
|
2828 |
feedback_fn("* Verifying node %s (%s)" % (node_i.name, ntype))
|
|
2805 | 2829 |
|
2806 |
msg = all_nvinfo[node].fail_msg |
|
2807 |
_ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
|
|
2808 |
msg) |
|
2830 |
msg = all_nvinfo[node_i.uuid].fail_msg
|
|
2831 |
_ErrorIf(msg, constants.CV_ENODERPC, node_i.name,
|
|
2832 |
"while contacting node: %s", msg)
|
|
2809 | 2833 |
if msg: |
2810 | 2834 |
nimg.rpc_fail = True |
2811 | 2835 |
continue |
2812 | 2836 |
|
2813 |
nresult = all_nvinfo[node].payload |
|
2837 |
nresult = all_nvinfo[node_i.uuid].payload
|
|
2814 | 2838 |
|
2815 | 2839 |
nimg.call_ok = self._VerifyNode(node_i, nresult) |
2816 | 2840 |
self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime) |
... | ... | |
2818 | 2842 |
self._VerifyNodeUserScripts(node_i, nresult) |
2819 | 2843 |
self._VerifyOob(node_i, nresult) |
2820 | 2844 |
self._VerifyFileStoragePaths(node_i, nresult, |
2821 |
node == master_node)
|
|
2845 |
node_i.uuid == master_node_uuid)
|
|
2822 | 2846 |
|
2823 | 2847 |
if nimg.vm_capable: |
Also available in: Unified diff