Revision 1c3231aa
b/Makefile.am | ||
---|---|---|
596 | 596 |
src/Ganeti/OpCodes.hs \ |
597 | 597 |
src/Ganeti/OpParams.hs \ |
598 | 598 |
src/Ganeti/Path.hs \ |
599 |
src/Ganeti/Query/Cluster.hs \ |
|
599 | 600 |
src/Ganeti/Query/Common.hs \ |
600 | 601 |
src/Ganeti/Query/Export.hs \ |
601 | 602 |
src/Ganeti/Query/Filter.hs \ |
b/lib/backend.py | ||
---|---|---|
3777 | 3777 |
shutil.rmtree(status_dir, ignore_errors=True) |
3778 | 3778 |
|
3779 | 3779 |
|
3780 |
def _FindDisks(nodes_ip, disks): |
|
3780 |
def _FindDisks(target_node_uuid, nodes_ip, disks):
|
|
3781 | 3781 |
"""Sets the physical ID on disks and returns the block devices. |
3782 | 3782 |
|
3783 | 3783 |
""" |
3784 | 3784 |
# set the correct physical ID |
3785 |
my_name = netutils.Hostname.GetSysName() |
|
3786 | 3785 |
for cf in disks: |
3787 |
cf.SetPhysicalID(my_name, nodes_ip)
|
|
3786 |
cf.SetPhysicalID(target_node_uuid, nodes_ip)
|
|
3788 | 3787 |
|
3789 | 3788 |
bdevs = [] |
3790 | 3789 |
|
... | ... | |
3796 | 3795 |
return bdevs |
3797 | 3796 |
|
3798 | 3797 |
|
3799 |
def DrbdDisconnectNet(nodes_ip, disks): |
|
3798 |
def DrbdDisconnectNet(target_node_uuid, nodes_ip, disks):
|
|
3800 | 3799 |
"""Disconnects the network on a list of drbd devices. |
3801 | 3800 |
|
3802 | 3801 |
""" |
3803 |
bdevs = _FindDisks(nodes_ip, disks) |
|
3802 |
bdevs = _FindDisks(target_node_uuid, nodes_ip, disks)
|
|
3804 | 3803 |
|
3805 | 3804 |
# disconnect disks |
3806 | 3805 |
for rd in bdevs: |
... | ... | |
3811 | 3810 |
err, exc=True) |
3812 | 3811 |
|
3813 | 3812 |
|
3814 |
def DrbdAttachNet(nodes_ip, disks, instance_name, multimaster): |
|
3813 |
def DrbdAttachNet(target_node_uuid, nodes_ip, disks, instance_name, |
|
3814 |
multimaster): |
|
3815 | 3815 |
"""Attaches the network on a list of drbd devices. |
3816 | 3816 |
|
3817 | 3817 |
""" |
3818 |
bdevs = _FindDisks(nodes_ip, disks) |
|
3818 |
bdevs = _FindDisks(target_node_uuid, nodes_ip, disks)
|
|
3819 | 3819 |
|
3820 | 3820 |
if multimaster: |
3821 | 3821 |
for idx, rd in enumerate(bdevs): |
... | ... | |
3873 | 3873 |
_Fail("Can't change to primary mode: %s", err) |
3874 | 3874 |
|
3875 | 3875 |
|
3876 |
def DrbdWaitSync(nodes_ip, disks): |
|
3876 |
def DrbdWaitSync(target_node_uuid, nodes_ip, disks):
|
|
3877 | 3877 |
"""Wait until DRBDs have synchronized. |
3878 | 3878 |
|
3879 | 3879 |
""" |
... | ... | |
3883 | 3883 |
raise utils.RetryAgain() |
3884 | 3884 |
return stats |
3885 | 3885 |
|
3886 |
bdevs = _FindDisks(nodes_ip, disks) |
|
3886 |
bdevs = _FindDisks(target_node_uuid, nodes_ip, disks)
|
|
3887 | 3887 |
|
3888 | 3888 |
min_resync = 100 |
3889 | 3889 |
alldone = True |
b/lib/bootstrap.py | ||
---|---|---|
609 | 609 |
mac_prefix=mac_prefix, |
610 | 610 |
volume_group_name=vg_name, |
611 | 611 |
tcpudp_port_pool=set(), |
612 |
master_node=hostname.name, |
|
613 | 612 |
master_ip=clustername.ip, |
614 | 613 |
master_netmask=master_netmask, |
615 | 614 |
master_netdev=master_netdev, |
... | ... | |
688 | 687 |
_INITCONF_ECID) |
689 | 688 |
master_node_config.uuid = uuid_generator.Generate([], utils.NewUUID, |
690 | 689 |
_INITCONF_ECID) |
690 |
cluster_config.master_node = master_node_config.uuid |
|
691 | 691 |
nodes = { |
692 |
master_node_config.name: master_node_config,
|
|
692 |
master_node_config.uuid: master_node_config,
|
|
693 | 693 |
} |
694 | 694 |
default_nodegroup = objects.NodeGroup( |
695 | 695 |
uuid=uuid_generator.Generate([], utils.NewUUID, _INITCONF_ECID), |
696 | 696 |
name=constants.INITIAL_NODE_GROUP_NAME, |
697 |
members=[master_node_config.name],
|
|
697 |
members=[master_node_config.uuid],
|
|
698 | 698 |
diskparams={}, |
699 | 699 |
) |
700 | 700 |
nodegroups = { |
... | ... | |
714 | 714 |
mode=0600) |
715 | 715 |
|
716 | 716 |
|
717 |
def FinalizeClusterDestroy(master): |
|
717 |
def FinalizeClusterDestroy(master_uuid):
|
|
718 | 718 |
"""Execute the last steps of cluster destroy |
719 | 719 |
|
720 | 720 |
This function shuts down all the daemons, completing the destroy |
... | ... | |
725 | 725 |
modify_ssh_setup = cfg.GetClusterInfo().modify_ssh_setup |
726 | 726 |
runner = rpc.BootstrapRunner() |
727 | 727 |
|
728 |
master_name = cfg.GetNodeName(master_uuid) |
|
729 |
|
|
728 | 730 |
master_params = cfg.GetMasterNetworkParameters() |
729 |
master_params.name = master
|
|
731 |
master_params.uuid = master_uuid
|
|
730 | 732 |
ems = cfg.GetUseExternalMipScript() |
731 |
result = runner.call_node_deactivate_master_ip(master_params.name,
|
|
732 |
master_params, ems)
|
|
733 |
result = runner.call_node_deactivate_master_ip(master_name, master_params,
|
|
734 |
ems) |
|
733 | 735 |
|
734 | 736 |
msg = result.fail_msg |
735 | 737 |
if msg: |
736 | 738 |
logging.warning("Could not disable the master IP: %s", msg) |
737 | 739 |
|
738 |
result = runner.call_node_stop_master(master) |
|
740 |
result = runner.call_node_stop_master(master_name)
|
|
739 | 741 |
msg = result.fail_msg |
740 | 742 |
if msg: |
741 | 743 |
logging.warning("Could not disable the master role: %s", msg) |
742 | 744 |
|
743 |
result = runner.call_node_leave_cluster(master, modify_ssh_setup) |
|
745 |
result = runner.call_node_leave_cluster(master_name, modify_ssh_setup)
|
|
744 | 746 |
msg = result.fail_msg |
745 | 747 |
if msg: |
746 | 748 |
logging.warning("Could not shutdown the node daemon and cleanup" |
... | ... | |
788 | 790 |
sstore = ssconf.SimpleStore() |
789 | 791 |
|
790 | 792 |
old_master, new_master = ssconf.GetMasterAndMyself(sstore) |
791 |
node_list = sstore.GetNodeList()
|
|
793 |
node_names = sstore.GetNodeList()
|
|
792 | 794 |
mc_list = sstore.GetMasterCandidates() |
793 | 795 |
|
794 | 796 |
if old_master == new_master: |
... | ... | |
807 | 809 |
errors.ECODE_STATE) |
808 | 810 |
|
809 | 811 |
if not no_voting: |
810 |
vote_list = GatherMasterVotes(node_list)
|
|
812 |
vote_list = GatherMasterVotes(node_names)
|
|
811 | 813 |
|
812 | 814 |
if vote_list: |
813 | 815 |
voted_master = vote_list[0][0] |
... | ... | |
832 | 834 |
# configuration data |
833 | 835 |
cfg = config.ConfigWriter(accept_foreign=True) |
834 | 836 |
|
837 |
old_master_node = cfg.GetNodeInfoByName(old_master) |
|
838 |
if old_master_node is None: |
|
839 |
raise errors.OpPrereqError("Could not find old master node '%s' in" |
|
840 |
" cluster configuration." % old_master, |
|
841 |
errors.ECODE_NOENT) |
|
842 |
|
|
835 | 843 |
cluster_info = cfg.GetClusterInfo() |
836 |
cluster_info.master_node = new_master |
|
844 |
new_master_node = cfg.GetNodeInfoByName(new_master) |
|
845 |
if new_master_node is None: |
|
846 |
raise errors.OpPrereqError("Could not find new master node '%s' in" |
|
847 |
" cluster configuration." % new_master, |
|
848 |
errors.ECODE_NOENT) |
|
849 |
|
|
850 |
cluster_info.master_node = new_master_node.uuid |
|
837 | 851 |
# this will also regenerate the ssconf files, since we updated the |
838 | 852 |
# cluster info |
839 | 853 |
cfg.Update(cluster_info, logging.error) |
... | ... | |
851 | 865 |
|
852 | 866 |
runner = rpc.BootstrapRunner() |
853 | 867 |
master_params = cfg.GetMasterNetworkParameters() |
854 |
master_params.name = old_master
|
|
868 |
master_params.uuid = old_master_node.uuid
|
|
855 | 869 |
ems = cfg.GetUseExternalMipScript() |
856 |
result = runner.call_node_deactivate_master_ip(master_params.name,
|
|
870 |
result = runner.call_node_deactivate_master_ip(old_master,
|
|
857 | 871 |
master_params, ems) |
858 | 872 |
|
859 | 873 |
msg = result.fail_msg |
... | ... | |
917 | 931 |
return old_master |
918 | 932 |
|
919 | 933 |
|
920 |
def GatherMasterVotes(node_list):
|
|
934 |
def GatherMasterVotes(node_names):
|
|
921 | 935 |
"""Check the agreement on who is the master. |
922 | 936 |
|
923 | 937 |
This function will return a list of (node, number of votes), ordered |
... | ... | |
931 | 945 |
since we use the same source for configuration information for both |
932 | 946 |
backend and boostrap, we'll always vote for ourselves. |
933 | 947 |
|
934 |
@type node_list: list
|
|
935 |
@param node_list: the list of nodes to query for master info; the current
|
|
948 |
@type node_names: list
|
|
949 |
@param node_names: the list of nodes to query for master info; the current
|
|
936 | 950 |
node will be removed if it is in the list |
937 | 951 |
@rtype: list |
938 | 952 |
@return: list of (node, votes) |
... | ... | |
940 | 954 |
""" |
941 | 955 |
myself = netutils.Hostname.GetSysName() |
942 | 956 |
try: |
943 |
node_list.remove(myself)
|
|
957 |
node_names.remove(myself)
|
|
944 | 958 |
except ValueError: |
945 | 959 |
pass |
946 |
if not node_list:
|
|
960 |
if not node_names:
|
|
947 | 961 |
# no nodes left (eventually after removing myself) |
948 | 962 |
return [] |
949 |
results = rpc.BootstrapRunner().call_master_info(node_list)
|
|
963 |
results = rpc.BootstrapRunner().call_master_info(node_names)
|
|
950 | 964 |
if not isinstance(results, dict): |
951 | 965 |
# this should not happen (unless internal error in rpc) |
952 | 966 |
logging.critical("Can't complete rpc call, aborting master startup") |
953 |
return [(None, len(node_list))]
|
|
967 |
return [(None, len(node_names))]
|
|
954 | 968 |
votes = {} |
955 |
for node in results: |
|
956 |
nres = results[node] |
|
969 |
for node_name in results:
|
|
970 |
nres = results[node_name]
|
|
957 | 971 |
data = nres.payload |
958 | 972 |
msg = nres.fail_msg |
959 | 973 |
fail = False |
960 | 974 |
if msg: |
961 |
logging.warning("Error contacting node %s: %s", node, msg) |
|
975 |
logging.warning("Error contacting node %s: %s", node_name, msg)
|
|
962 | 976 |
fail = True |
963 | 977 |
# for now we accept both length 3, 4 and 5 (data[3] is primary ip version |
964 | 978 |
# and data[4] is the master netmask) |
965 | 979 |
elif not isinstance(data, (tuple, list)) or len(data) < 3: |
966 |
logging.warning("Invalid data received from node %s: %s", node, data) |
|
980 |
logging.warning("Invalid data received from node %s: %s", |
|
981 |
node_name, data) |
|
967 | 982 |
fail = True |
968 | 983 |
if fail: |
969 | 984 |
if None not in votes: |
b/lib/client/gnt_cluster.py | ||
---|---|---|
275 | 275 |
return 1 |
276 | 276 |
|
277 | 277 |
op = opcodes.OpClusterDestroy() |
278 |
master = SubmitOpCode(op, opts=opts) |
|
278 |
master_uuid = SubmitOpCode(op, opts=opts)
|
|
279 | 279 |
# if we reached this, the opcode didn't fail; we can proceed to |
280 | 280 |
# shutdown all the daemons |
281 |
bootstrap.FinalizeClusterDestroy(master) |
|
281 |
bootstrap.FinalizeClusterDestroy(master_uuid)
|
|
282 | 282 |
return 0 |
283 | 283 |
|
284 | 284 |
|
b/lib/client/gnt_instance.py | ||
---|---|---|
915 | 915 |
return constants.EXIT_SUCCESS |
916 | 916 |
|
917 | 917 |
|
918 |
def _FormatLogicalID(dev_type, logical_id, roman):
|
|
918 |
def _FormatDiskDetails(dev_type, dev, roman):
|
|
919 | 919 |
"""Formats the logical_id of a disk. |
920 | 920 |
|
921 | 921 |
""" |
922 | 922 |
if dev_type == constants.LD_DRBD8: |
923 |
node_a, node_b, port, minor_a, minor_b, key = logical_id
|
|
923 |
drbd_info = dev["drbd_info"]
|
|
924 | 924 |
data = [ |
925 |
("nodeA", "%s, minor=%s" % (node_a, compat.TryToRoman(minor_a, |
|
926 |
convert=roman))), |
|
927 |
("nodeB", "%s, minor=%s" % (node_b, compat.TryToRoman(minor_b, |
|
928 |
convert=roman))), |
|
929 |
("port", str(compat.TryToRoman(port, convert=roman))), |
|
930 |
("auth key", str(key)), |
|
925 |
("nodeA", "%s, minor=%s" % |
|
926 |
(drbd_info["primary_node"], |
|
927 |
compat.TryToRoman(drbd_info["primary_minor"], |
|
928 |
convert=roman))), |
|
929 |
("nodeB", "%s, minor=%s" % |
|
930 |
(drbd_info["secondary_node"], |
|
931 |
compat.TryToRoman(drbd_info["secondary_minor"], |
|
932 |
convert=roman))), |
|
933 |
("port", str(compat.TryToRoman(drbd_info["port"], convert=roman))), |
|
934 |
("auth key", str(drbd_info["secret"])), |
|
931 | 935 |
] |
932 | 936 |
elif dev_type == constants.LD_LV: |
933 |
vg_name, lv_name = logical_id
|
|
937 |
vg_name, lv_name = dev["logical_id"]
|
|
934 | 938 |
data = ["%s/%s" % (vg_name, lv_name)] |
935 | 939 |
else: |
936 |
data = [str(logical_id)]
|
|
940 |
data = [str(dev["logical_id"])]
|
|
937 | 941 |
|
938 | 942 |
return data |
939 | 943 |
|
... | ... | |
1032 | 1036 |
data.append(("access mode", dev["mode"])) |
1033 | 1037 |
if dev["logical_id"] is not None: |
1034 | 1038 |
try: |
1035 |
l_id = _FormatLogicalID(dev["dev_type"], dev["logical_id"], roman)
|
|
1039 |
l_id = _FormatDiskDetails(dev["dev_type"], dev, roman)
|
|
1036 | 1040 |
except ValueError: |
1037 | 1041 |
l_id = [str(dev["logical_id"])] |
1038 | 1042 |
if len(l_id) == 1: |
b/lib/cmdlib/backup.py | ||
---|---|---|
35 | 35 |
|
36 | 36 |
from ganeti.cmdlib.base import QueryBase, NoHooksLU, LogicalUnit |
37 | 37 |
from ganeti.cmdlib.common import GetWantedNodes, ShareAll, CheckNodeOnline, \ |
38 |
ExpandNodeName |
|
38 |
ExpandNodeUuidAndName
|
|
39 | 39 |
from ganeti.cmdlib.instance_storage import StartInstanceDisks, \ |
40 | 40 |
ShutdownInstanceDisks |
41 | 41 |
from ganeti.cmdlib.instance_utils import GetClusterDomainSecret, \ |
... | ... | |
53 | 53 |
|
54 | 54 |
# The following variables interact with _QueryBase._GetNames |
55 | 55 |
if self.names: |
56 |
self.wanted = GetWantedNodes(lu, self.names)
|
|
56 |
(self.wanted, _) = GetWantedNodes(lu, self.names)
|
|
57 | 57 |
else: |
58 | 58 |
self.wanted = locking.ALL_SET |
59 | 59 |
|
... | ... | |
82 | 82 |
if level != locking.LEVEL_CLUSTER) or |
83 | 83 |
self.do_locking or self.use_locking) |
84 | 84 |
|
85 |
nodes = self._GetNames(lu, lu.cfg.GetNodeList(), locking.LEVEL_NODE) |
|
85 |
node_uuids = self._GetNames(lu, lu.cfg.GetNodeList(), locking.LEVEL_NODE)
|
|
86 | 86 |
|
87 | 87 |
result = [] |
88 | 88 |
|
89 |
for (node, nres) in lu.rpc.call_export_list(nodes).items():
|
|
89 |
for (node_uuid, nres) in lu.rpc.call_export_list(node_uuids).items():
|
|
90 | 90 |
if nres.fail_msg: |
91 |
result.append((node, None)) |
|
91 |
result.append((node_uuid, None))
|
|
92 | 92 |
else: |
93 |
result.extend((node, expname) for expname in nres.payload) |
|
93 |
result.extend((node_uuid, expname) for expname in nres.payload)
|
|
94 | 94 |
|
95 | 95 |
return result |
96 | 96 |
|
... | ... | |
154 | 154 |
if self.op.mode == constants.EXPORT_MODE_REMOTE: |
155 | 155 |
salt = utils.GenerateSecret(8) |
156 | 156 |
|
157 |
feedback_fn("Generating X509 certificate on %s" % instance.primary_node) |
|
157 |
feedback_fn("Generating X509 certificate on %s" % |
|
158 |
self.cfg.GetNodeName(instance.primary_node)) |
|
158 | 159 |
result = self.rpc.call_x509_cert_create(instance.primary_node, |
159 | 160 |
constants.RIE_CERT_VALIDITY) |
160 |
result.Raise("Can't create X509 key and certificate on %s" % result.node) |
|
161 |
result.Raise("Can't create X509 key and certificate on %s" % |
|
162 |
self.cfg.GetNodeName(result.node)) |
|
161 | 163 |
|
162 | 164 |
(name, cert_pem) = result.payload |
163 | 165 |
|
... | ... | |
203 | 205 |
|
204 | 206 |
# Lock all nodes for local exports |
205 | 207 |
if self.op.mode == constants.EXPORT_MODE_LOCAL: |
208 |
(self.op.target_node_uuid, self.op.target_node) = \ |
|
209 |
ExpandNodeUuidAndName(self.cfg, self.op.target_node_uuid, |
|
210 |
self.op.target_node) |
|
206 | 211 |
# FIXME: lock only instance primary and destination node |
207 | 212 |
# |
208 | 213 |
# Sad but true, for now we have do lock all nodes, as we don't know where |
... | ... | |
248 | 253 |
nl = [self.cfg.GetMasterNode(), self.instance.primary_node] |
249 | 254 |
|
250 | 255 |
if self.op.mode == constants.EXPORT_MODE_LOCAL: |
251 |
nl.append(self.op.target_node) |
|
256 |
nl.append(self.op.target_node_uuid)
|
|
252 | 257 |
|
253 | 258 |
return (nl, nl) |
254 | 259 |
|
... | ... | |
272 | 277 |
" down before", errors.ECODE_STATE) |
273 | 278 |
|
274 | 279 |
if self.op.mode == constants.EXPORT_MODE_LOCAL: |
275 |
self.op.target_node = ExpandNodeName(self.cfg, self.op.target_node) |
|
276 |
self.dst_node = self.cfg.GetNodeInfo(self.op.target_node) |
|
280 |
self.dst_node = self.cfg.GetNodeInfo(self.op.target_node_uuid) |
|
277 | 281 |
assert self.dst_node is not None |
278 | 282 |
|
279 |
CheckNodeOnline(self, self.dst_node.name)
|
|
280 |
CheckNodeNotDrained(self, self.dst_node.name)
|
|
283 |
CheckNodeOnline(self, self.dst_node.uuid)
|
|
284 |
CheckNodeNotDrained(self, self.dst_node.uuid)
|
|
281 | 285 |
|
282 | 286 |
self._cds = None |
283 | 287 |
self.dest_disk_info = None |
... | ... | |
355 | 359 |
""" |
356 | 360 |
assert self.op.mode != constants.EXPORT_MODE_REMOTE |
357 | 361 |
|
358 |
nodelist = self.cfg.GetNodeList()
|
|
359 |
nodelist.remove(self.dst_node.name)
|
|
362 |
node_uuids = self.cfg.GetNodeList()
|
|
363 |
node_uuids.remove(self.dst_node.uuid)
|
|
360 | 364 |
|
361 | 365 |
# on one-node clusters nodelist will be empty after the removal |
362 | 366 |
# if we proceed the backup would be removed because OpBackupQuery |
363 | 367 |
# substitutes an empty list with the full cluster node list. |
364 | 368 |
iname = self.instance.name |
365 |
if nodelist:
|
|
369 |
if node_uuids:
|
|
366 | 370 |
feedback_fn("Removing old exports for instance %s" % iname) |
367 |
exportlist = self.rpc.call_export_list(nodelist)
|
|
368 |
for node in exportlist: |
|
369 |
if exportlist[node].fail_msg: |
|
371 |
exportlist = self.rpc.call_export_list(node_uuids)
|
|
372 |
for node_uuid in exportlist:
|
|
373 |
if exportlist[node_uuid].fail_msg:
|
|
370 | 374 |
continue |
371 |
if iname in exportlist[node].payload: |
|
372 |
msg = self.rpc.call_export_remove(node, iname).fail_msg |
|
375 |
if iname in exportlist[node_uuid].payload:
|
|
376 |
msg = self.rpc.call_export_remove(node_uuid, iname).fail_msg
|
|
373 | 377 |
if msg: |
374 | 378 |
self.LogWarning("Could not remove older export for instance %s" |
375 |
" on node %s: %s", iname, node, msg) |
|
379 |
" on node %s: %s", iname, |
|
380 |
self.cfg.GetNodeName(node_uuid), msg) |
|
376 | 381 |
|
377 | 382 |
def Exec(self, feedback_fn): |
378 | 383 |
"""Export an instance to an image in the cluster. |
... | ... | |
381 | 386 |
assert self.op.mode in constants.EXPORT_MODES |
382 | 387 |
|
383 | 388 |
instance = self.instance |
384 |
src_node = instance.primary_node |
|
389 |
src_node_uuid = instance.primary_node
|
|
385 | 390 |
|
386 | 391 |
if self.op.shutdown: |
387 | 392 |
# shutdown the instance, but not the disks |
388 | 393 |
feedback_fn("Shutting down instance %s" % instance.name) |
389 |
result = self.rpc.call_instance_shutdown(src_node, instance, |
|
394 |
result = self.rpc.call_instance_shutdown(src_node_uuid, instance,
|
|
390 | 395 |
self.op.shutdown_timeout, |
391 | 396 |
self.op.reason) |
392 | 397 |
# TODO: Maybe ignore failures if ignore_remove_failures is set |
393 | 398 |
result.Raise("Could not shutdown instance %s on" |
394 |
" node %s" % (instance.name, src_node)) |
|
399 |
" node %s" % (instance.name, |
|
400 |
self.cfg.GetNodeName(src_node_uuid))) |
|
395 | 401 |
|
396 | 402 |
# set the disks ID correctly since call_instance_start needs the |
397 | 403 |
# correct drbd minor to create the symlinks |
398 | 404 |
for disk in instance.disks: |
399 |
self.cfg.SetDiskID(disk, src_node) |
|
405 |
self.cfg.SetDiskID(disk, src_node_uuid)
|
|
400 | 406 |
|
401 | 407 |
activate_disks = not instance.disks_active |
402 | 408 |
|
... | ... | |
416 | 422 |
not self.op.remove_instance): |
417 | 423 |
assert not activate_disks |
418 | 424 |
feedback_fn("Starting instance %s" % instance.name) |
419 |
result = self.rpc.call_instance_start(src_node, |
|
425 |
result = self.rpc.call_instance_start(src_node_uuid,
|
|
420 | 426 |
(instance, None, None), False, |
421 | 427 |
self.op.reason) |
422 | 428 |
msg = result.fail_msg |
... | ... | |
515 | 521 |
locked_nodes = self.owned_locks(locking.LEVEL_NODE) |
516 | 522 |
exportlist = self.rpc.call_export_list(locked_nodes) |
517 | 523 |
found = False |
518 |
for node in exportlist: |
|
519 |
msg = exportlist[node].fail_msg |
|
524 |
for node_uuid in exportlist:
|
|
525 |
msg = exportlist[node_uuid].fail_msg
|
|
520 | 526 |
if msg: |
521 |
self.LogWarning("Failed to query node %s (continuing): %s", node, msg) |
|
527 |
self.LogWarning("Failed to query node %s (continuing): %s", |
|
528 |
self.cfg.GetNodeName(node_uuid), msg) |
|
522 | 529 |
continue |
523 |
if instance_name in exportlist[node].payload: |
|
530 |
if instance_name in exportlist[node_uuid].payload:
|
|
524 | 531 |
found = True |
525 |
result = self.rpc.call_export_remove(node, instance_name) |
|
532 |
result = self.rpc.call_export_remove(node_uuid, instance_name)
|
|
526 | 533 |
msg = result.fail_msg |
527 | 534 |
if msg: |
528 | 535 |
logging.error("Could not remove export for instance %s" |
529 |
" on node %s: %s", instance_name, node, msg) |
|
536 |
" on node %s: %s", instance_name, |
|
537 |
self.cfg.GetNodeName(node_uuid), msg) |
|
530 | 538 |
|
531 | 539 |
if fqdn_warn and not found: |
532 | 540 |
feedback_fn("Export not found. If trying to remove an export belonging" |
b/lib/cmdlib/base.py | ||
---|---|---|
181 | 181 |
} |
182 | 182 |
# Acquire just two nodes |
183 | 183 |
self.needed_locks = { |
184 |
locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
|
|
184 |
locking.LEVEL_NODE: ['node1-uuid', 'node2-uuid'],
|
|
185 | 185 |
} |
186 | 186 |
# Acquire no locks |
187 | 187 |
self.needed_locks = {} # No, you can't leave it to the default value None |
... | ... | |
269 | 269 |
def BuildHooksNodes(self): |
270 | 270 |
"""Build list of nodes to run LU's hooks. |
271 | 271 |
|
272 |
@rtype: tuple; (list, list) |
|
273 |
@return: Tuple containing a list of node names on which the hook |
|
274 |
should run before the execution and a list of node names on which the |
|
275 |
hook should run after the execution. No nodes should be returned as an |
|
276 |
empty list (and not None). |
|
272 |
@rtype: tuple; (list, list) or (list, list, list) |
|
273 |
@return: Tuple containing a list of node UUIDs on which the hook |
|
274 |
should run before the execution and a list of node UUIDs on which the |
|
275 |
hook should run after the execution. As it might be possible that the |
|
276 |
node UUID is not known at the time this method is invoked, an optional |
|
277 |
third list can be added which contains node names on which the hook |
|
278 |
should run after the execution (in case of node add, for instance). |
|
279 |
No nodes should be returned as an empty list (and not None). |
|
277 | 280 |
@note: If the C{HPATH} attribute of the LU class is C{None}, this function |
278 | 281 |
will not be called. |
279 | 282 |
|
... | ... | |
356 | 359 |
# For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the |
357 | 360 |
# future we might want to have different behaviors depending on the value |
358 | 361 |
# of self.recalculate_locks[locking.LEVEL_NODE] |
359 |
wanted_nodes = [] |
|
362 |
wanted_node_uuids = []
|
|
360 | 363 |
locked_i = self.owned_locks(locking.LEVEL_INSTANCE) |
361 | 364 |
for _, instance in self.cfg.GetMultiInstanceInfo(locked_i): |
362 |
wanted_nodes.append(instance.primary_node) |
|
365 |
wanted_node_uuids.append(instance.primary_node)
|
|
363 | 366 |
if not primary_only: |
364 |
wanted_nodes.extend(instance.secondary_nodes) |
|
367 |
wanted_node_uuids.extend(instance.secondary_nodes)
|
|
365 | 368 |
|
366 | 369 |
if self.recalculate_locks[level] == constants.LOCKS_REPLACE: |
367 |
self.needed_locks[level] = wanted_nodes |
|
370 |
self.needed_locks[level] = wanted_node_uuids
|
|
368 | 371 |
elif self.recalculate_locks[level] == constants.LOCKS_APPEND: |
369 |
self.needed_locks[level].extend(wanted_nodes) |
|
372 |
self.needed_locks[level].extend(wanted_node_uuids)
|
|
370 | 373 |
else: |
371 | 374 |
raise errors.ProgrammerError("Unknown recalculation mode") |
372 | 375 |
|
b/lib/cmdlib/cluster.py | ||
---|---|---|
71 | 71 |
""" |
72 | 72 |
master_params = self.cfg.GetMasterNetworkParameters() |
73 | 73 |
ems = self.cfg.GetUseExternalMipScript() |
74 |
result = self.rpc.call_node_activate_master_ip(master_params.name,
|
|
74 |
result = self.rpc.call_node_activate_master_ip(master_params.uuid,
|
|
75 | 75 |
master_params, ems) |
76 | 76 |
result.Raise("Could not activate the master IP") |
77 | 77 |
|
... | ... | |
86 | 86 |
""" |
87 | 87 |
master_params = self.cfg.GetMasterNetworkParameters() |
88 | 88 |
ems = self.cfg.GetUseExternalMipScript() |
89 |
result = self.rpc.call_node_deactivate_master_ip(master_params.name,
|
|
89 |
result = self.rpc.call_node_deactivate_master_ip(master_params.uuid,
|
|
90 | 90 |
master_params, ems) |
91 | 91 |
result.Raise("Could not deactivate the master IP") |
92 | 92 |
|
... | ... | |
163 | 163 |
master_params = self.cfg.GetMasterNetworkParameters() |
164 | 164 |
|
165 | 165 |
# Run post hooks on master node before it's removed |
166 |
RunPostHook(self, master_params.name)
|
|
166 |
RunPostHook(self, self.cfg.GetNodeName(master_params.uuid))
|
|
167 | 167 |
|
168 | 168 |
ems = self.cfg.GetUseExternalMipScript() |
169 |
result = self.rpc.call_node_deactivate_master_ip(master_params.name,
|
|
169 |
result = self.rpc.call_node_deactivate_master_ip(master_params.uuid,
|
|
170 | 170 |
master_params, ems) |
171 | 171 |
result.Warn("Error disabling the master IP address", self.LogWarning) |
172 |
return master_params.name
|
|
172 |
return master_params.uuid
|
|
173 | 173 |
|
174 | 174 |
|
175 | 175 |
class LUClusterPostInit(LogicalUnit): |
... | ... | |
232 | 232 |
|
233 | 233 |
if query.CQ_CONFIG in self.requested_data: |
234 | 234 |
cluster = lu.cfg.GetClusterInfo() |
235 |
nodes = lu.cfg.GetAllNodesInfo() |
|
235 | 236 |
else: |
236 | 237 |
cluster = NotImplemented |
238 |
nodes = NotImplemented |
|
237 | 239 |
|
238 | 240 |
if query.CQ_QUEUE_DRAINED in self.requested_data: |
239 | 241 |
drain_flag = os.path.exists(pathutils.JOB_QUEUE_DRAIN_FILE) |
... | ... | |
241 | 243 |
drain_flag = NotImplemented |
242 | 244 |
|
243 | 245 |
if query.CQ_WATCHER_PAUSE in self.requested_data: |
244 |
master_name = lu.cfg.GetMasterNode()
|
|
246 |
master_node_uuid = lu.cfg.GetMasterNode()
|
|
245 | 247 |
|
246 |
result = lu.rpc.call_get_watcher_pause(master_name)
|
|
248 |
result = lu.rpc.call_get_watcher_pause(master_node_uuid)
|
|
247 | 249 |
result.Raise("Can't retrieve watcher pause from master node '%s'" % |
248 |
master_name)
|
|
250 |
lu.cfg.GetMasterNodeName())
|
|
249 | 251 |
|
250 | 252 |
watcher_pause = result.payload |
251 | 253 |
else: |
252 | 254 |
watcher_pause = NotImplemented |
253 | 255 |
|
254 |
return query.ClusterQueryData(cluster, drain_flag, watcher_pause) |
|
256 |
return query.ClusterQueryData(cluster, nodes, drain_flag, watcher_pause)
|
|
255 | 257 |
|
256 | 258 |
|
257 | 259 |
class LUClusterQuery(NoHooksLU): |
... | ... | |
290 | 292 |
"export_version": constants.EXPORT_VERSION, |
291 | 293 |
"architecture": runtime.GetArchInfo(), |
292 | 294 |
"name": cluster.cluster_name, |
293 |
"master": cluster.master_node,
|
|
295 |
"master": self.cfg.GetMasterNodeName(),
|
|
294 | 296 |
"default_hypervisor": cluster.primary_hypervisor, |
295 | 297 |
"enabled_hypervisors": cluster.enabled_hypervisors, |
296 | 298 |
"hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name]) |
... | ... | |
405 | 407 |
# shutdown the master IP |
406 | 408 |
master_params = self.cfg.GetMasterNetworkParameters() |
407 | 409 |
ems = self.cfg.GetUseExternalMipScript() |
408 |
result = self.rpc.call_node_deactivate_master_ip(master_params.name,
|
|
410 |
result = self.rpc.call_node_deactivate_master_ip(master_params.uuid,
|
|
409 | 411 |
master_params, ems) |
410 | 412 |
result.Raise("Could not disable the master role") |
411 | 413 |
|
... | ... | |
419 | 421 |
ssh.WriteKnownHostsFile(self.cfg, pathutils.SSH_KNOWN_HOSTS_FILE) |
420 | 422 |
node_list = self.cfg.GetOnlineNodeList() |
421 | 423 |
try: |
422 |
node_list.remove(master_params.name)
|
|
424 |
node_list.remove(master_params.uuid)
|
|
423 | 425 |
except ValueError: |
424 | 426 |
pass |
425 | 427 |
UploadHelper(self, node_list, pathutils.SSH_KNOWN_HOSTS_FILE) |
426 | 428 |
finally: |
427 | 429 |
master_params.ip = new_ip |
428 |
result = self.rpc.call_node_activate_master_ip(master_params.name,
|
|
430 |
result = self.rpc.call_node_activate_master_ip(master_params.uuid,
|
|
429 | 431 |
master_params, ems) |
430 | 432 |
result.Warn("Could not re-enable the master role on the master," |
431 | 433 |
" please restart manually", self.LogWarning) |
... | ... | |
523 | 525 |
"Not owning correct locks" |
524 | 526 |
assert not self.owned_locks(locking.LEVEL_NODE) |
525 | 527 |
|
526 |
es_flags = rpc.GetExclusiveStorageForNodeNames(self.cfg,
|
|
527 |
per_node_disks.keys())
|
|
528 |
es_flags = rpc.GetExclusiveStorageForNodes(self.cfg, |
|
529 |
per_node_disks.keys()) |
|
528 | 530 |
|
529 | 531 |
changed = [] |
530 |
for node, dskl in per_node_disks.items(): |
|
532 |
for node_uuid, dskl in per_node_disks.items():
|
|
531 | 533 |
newl = [v[2].Copy() for v in dskl] |
532 | 534 |
for dsk in newl: |
533 |
self.cfg.SetDiskID(dsk, node) |
|
534 |
result = self.rpc.call_blockdev_getdimensions(node, newl) |
|
535 |
self.cfg.SetDiskID(dsk, node_uuid) |
|
536 |
node_name = self.cfg.GetNodeName(node_uuid) |
|
537 |
result = self.rpc.call_blockdev_getdimensions(node_uuid, newl) |
|
535 | 538 |
if result.fail_msg: |
536 | 539 |
self.LogWarning("Failure in blockdev_getdimensions call to node" |
537 |
" %s, ignoring", node) |
|
540 |
" %s, ignoring", node_name)
|
|
538 | 541 |
continue |
539 | 542 |
if len(result.payload) != len(dskl): |
540 | 543 |
logging.warning("Invalid result from node %s: len(dksl)=%d," |
541 |
" result.payload=%s", node, len(dskl), result.payload) |
|
544 |
" result.payload=%s", node_name, len(dskl), |
|
545 |
result.payload) |
|
542 | 546 |
self.LogWarning("Invalid result from node %s, ignoring node results", |
543 |
node) |
|
547 |
node_name)
|
|
544 | 548 |
continue |
545 | 549 |
for ((instance, idx, disk), dimensions) in zip(dskl, result.payload): |
546 | 550 |
if dimensions is None: |
... | ... | |
565 | 569 |
disk.size = size |
566 | 570 |
self.cfg.Update(instance, feedback_fn) |
567 | 571 |
changed.append((instance.name, idx, "size", size)) |
568 |
if es_flags[node]: |
|
572 |
if es_flags[node_uuid]:
|
|
569 | 573 |
if spindles is None: |
570 | 574 |
self.LogWarning("Disk %d of instance %s did not return valid" |
571 | 575 |
" spindles information, ignoring", idx, |
... | ... | |
666 | 670 |
mn = self.cfg.GetMasterNode() |
667 | 671 |
return ([mn], [mn]) |
668 | 672 |
|
669 |
def _CheckVgName(self, node_list, enabled_disk_templates,
|
|
673 |
def _CheckVgName(self, node_uuids, enabled_disk_templates,
|
|
670 | 674 |
new_enabled_disk_templates): |
671 | 675 |
"""Check the consistency of the vg name on all nodes and in case it gets |
672 | 676 |
unset whether there are instances still using it. |
... | ... | |
682 | 686 |
(self.cfg.GetVGName() is not None and |
683 | 687 |
utils.LvmGetsEnabled(enabled_disk_templates, |
684 | 688 |
new_enabled_disk_templates)): |
685 |
self._CheckVgNameOnNodes(node_list)
|
|
689 |
self._CheckVgNameOnNodes(node_uuids)
|
|
686 | 690 |
|
687 |
def _CheckVgNameOnNodes(self, node_list):
|
|
691 |
def _CheckVgNameOnNodes(self, node_uuids):
|
|
688 | 692 |
"""Check the status of the volume group on each node. |
689 | 693 |
|
690 | 694 |
""" |
691 |
vglist = self.rpc.call_vg_list(node_list)
|
|
692 |
for node in node_list:
|
|
693 |
msg = vglist[node].fail_msg |
|
695 |
vglist = self.rpc.call_vg_list(node_uuids)
|
|
696 |
for node_uuid in node_uuids:
|
|
697 |
msg = vglist[node_uuid].fail_msg
|
|
694 | 698 |
if msg: |
695 | 699 |
# ignoring down node |
696 | 700 |
self.LogWarning("Error while gathering data on node %s" |
697 |
" (ignoring node): %s", node, msg) |
|
701 |
" (ignoring node): %s", |
|
702 |
self.cfg.GetNodeName(node_uuid), msg) |
|
698 | 703 |
continue |
699 |
vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload, |
|
704 |
vgstatus = utils.CheckVolumeGroupSize(vglist[node_uuid].payload,
|
|
700 | 705 |
self.op.vg_name, |
701 | 706 |
constants.MIN_VG_SIZE) |
702 | 707 |
if vgstatus: |
703 | 708 |
raise errors.OpPrereqError("Error on node '%s': %s" % |
704 |
(node, vgstatus), errors.ECODE_ENVIRON) |
|
709 |
(self.cfg.GetNodeName(node_uuid), vgstatus), |
|
710 |
errors.ECODE_ENVIRON) |
|
705 | 711 |
|
706 | 712 |
def _GetEnabledDiskTemplates(self, cluster): |
707 | 713 |
"""Determines the enabled disk templates and the subset of disk templates |
... | ... | |
732 | 738 |
" drbd-based instances exist", |
733 | 739 |
errors.ECODE_INVAL) |
734 | 740 |
|
735 |
node_list = self.owned_locks(locking.LEVEL_NODE)
|
|
741 |
node_uuids = self.owned_locks(locking.LEVEL_NODE)
|
|
736 | 742 |
self.cluster = cluster = self.cfg.GetClusterInfo() |
737 | 743 |
|
738 |
vm_capable_nodes = [node.name
|
|
739 |
for node in self.cfg.GetAllNodesInfo().values() |
|
740 |
if node.name in node_list and node.vm_capable]
|
|
744 |
vm_capable_node_uuids = [node.uuid
|
|
745 |
for node in self.cfg.GetAllNodesInfo().values()
|
|
746 |
if node.uuid in node_uuids and node.vm_capable]
|
|
741 | 747 |
|
742 | 748 |
(enabled_disk_templates, new_enabled_disk_templates) = \ |
743 | 749 |
self._GetEnabledDiskTemplates(cluster) |
744 | 750 |
|
745 |
self._CheckVgName(vm_capable_nodes, enabled_disk_templates, |
|
751 |
self._CheckVgName(vm_capable_node_uuids, enabled_disk_templates,
|
|
746 | 752 |
new_enabled_disk_templates) |
747 | 753 |
|
748 | 754 |
if self.op.drbd_helper: |
749 | 755 |
# checks given drbd helper on all nodes |
750 |
helpers = self.rpc.call_drbd_helper(node_list)
|
|
751 |
for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
|
|
756 |
helpers = self.rpc.call_drbd_helper(node_uuids)
|
|
757 |
for (_, ninfo) in self.cfg.GetMultiNodeInfo(node_uuids):
|
|
752 | 758 |
if ninfo.offline: |
753 |
self.LogInfo("Not checking drbd helper on offline node %s", node) |
|
759 |
self.LogInfo("Not checking drbd helper on offline node %s", |
|
760 |
ninfo.name) |
|
754 | 761 |
continue |
755 |
msg = helpers[node].fail_msg
|
|
762 |
msg = helpers[ninfo.uuid].fail_msg
|
|
756 | 763 |
if msg: |
757 | 764 |
raise errors.OpPrereqError("Error checking drbd helper on node" |
758 |
" '%s': %s" % (node, msg),
|
|
765 |
" '%s': %s" % (ninfo.name, msg),
|
|
759 | 766 |
errors.ECODE_ENVIRON) |
760 |
node_helper = helpers[node].payload
|
|
767 |
node_helper = helpers[ninfo.uuid].payload
|
|
761 | 768 |
if node_helper != self.op.drbd_helper: |
762 | 769 |
raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" % |
763 |
(node, node_helper), errors.ECODE_ENVIRON) |
|
770 |
(ninfo.name, node_helper), |
|
771 |
errors.ECODE_ENVIRON) |
|
764 | 772 |
|
765 | 773 |
# validate params changes |
766 | 774 |
if self.op.beparams: |
... | ... | |
800 | 808 |
violations = set() |
801 | 809 |
for group in self.cfg.GetAllNodeGroupsInfo().values(): |
802 | 810 |
instances = frozenset([inst for inst in all_instances |
803 |
if compat.any(node in group.members
|
|
804 |
for node in inst.all_nodes)])
|
|
811 |
if compat.any(nuuid in group.members
|
|
812 |
for nuuid in inst.all_nodes)])
|
|
805 | 813 |
new_ipolicy = objects.FillIPolicy(self.new_ipolicy, group.ipolicy) |
806 | 814 |
ipol = masterd.instance.CalculateGroupIPolicy(cluster, group) |
807 | 815 |
new = ComputeNewInstanceViolations(ipol, |
... | ... | |
920 | 928 |
hv_class = hypervisor.GetHypervisorClass(hv_name) |
921 | 929 |
utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES) |
922 | 930 |
hv_class.CheckParameterSyntax(hv_params) |
923 |
CheckHVParams(self, node_list, hv_name, hv_params)
|
|
931 |
CheckHVParams(self, node_uuids, hv_name, hv_params)
|
|
924 | 932 |
|
925 | 933 |
self._CheckDiskTemplateConsistency() |
926 | 934 |
|
... | ... | |
935 | 943 |
new_osp = objects.FillDict(cluster_defaults, hv_params) |
936 | 944 |
hv_class = hypervisor.GetHypervisorClass(hv_name) |
937 | 945 |
hv_class.CheckParameterSyntax(new_osp) |
938 |
CheckHVParams(self, node_list, hv_name, new_osp)
|
|
946 |
CheckHVParams(self, node_uuids, hv_name, new_osp)
|
|
939 | 947 |
|
940 | 948 |
if self.op.default_iallocator: |
941 | 949 |
alloc_script = utils.FindFile(self.op.default_iallocator, |
... | ... | |
1095 | 1103 |
ems = self.cfg.GetUseExternalMipScript() |
1096 | 1104 |
feedback_fn("Shutting down master ip on the current netdev (%s)" % |
1097 | 1105 |
self.cluster.master_netdev) |
1098 |
result = self.rpc.call_node_deactivate_master_ip(master_params.name,
|
|
1106 |
result = self.rpc.call_node_deactivate_master_ip(master_params.uuid,
|
|
1099 | 1107 |
master_params, ems) |
1100 | 1108 |
result.Raise("Could not disable the master ip") |
1101 | 1109 |
feedback_fn("Changing master_netdev from %s to %s" % |
... | ... | |
1105 | 1113 |
if self.op.master_netmask: |
1106 | 1114 |
master_params = self.cfg.GetMasterNetworkParameters() |
1107 | 1115 |
feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask) |
1108 |
result = self.rpc.call_node_change_master_netmask(master_params.name, |
|
1109 |
master_params.netmask, |
|
1110 |
self.op.master_netmask, |
|
1111 |
master_params.ip, |
|
1112 |
master_params.netdev) |
|
1116 |
result = self.rpc.call_node_change_master_netmask( |
|
1117 |
master_params.uuid, master_params.netmask, |
|
1118 |
self.op.master_netmask, master_params.ip, |
|
1119 |
master_params.netdev) |
|
1113 | 1120 |
result.Warn("Could not change the master IP netmask", feedback_fn) |
1114 | 1121 |
self.cluster.master_netmask = self.op.master_netmask |
1115 | 1122 |
|
... | ... | |
1120 | 1127 |
feedback_fn("Starting the master ip on the new master netdev (%s)" % |
1121 | 1128 |
self.op.master_netdev) |
1122 | 1129 |
ems = self.cfg.GetUseExternalMipScript() |
1123 |
result = self.rpc.call_node_activate_master_ip(master_params.name,
|
|
1130 |
result = self.rpc.call_node_activate_master_ip(master_params.uuid,
|
|
1124 | 1131 |
master_params, ems) |
1125 | 1132 |
result.Warn("Could not re-enable the master ip on the master," |
1126 | 1133 |
" please restart manually", self.LogWarning) |
... | ... | |
1352 | 1359 |
# occur, it would never be caught by VerifyGroup, which only acts on |
1353 | 1360 |
# nodes/instances reachable from existing node groups. |
1354 | 1361 |
|
1355 |
dangling_nodes = set(node.name for node in self.all_node_info.values()
|
|
1362 |
dangling_nodes = set(node for node in self.all_node_info.values() |
|
1356 | 1363 |
if node.group not in self.all_group_info) |
1357 | 1364 |
|
1358 | 1365 |
dangling_instances = {} |
1359 | 1366 |
no_node_instances = [] |
1360 | 1367 |
|
1361 | 1368 |
for inst in self.all_inst_info.values(): |
1362 |
if inst.primary_node in dangling_nodes:
|
|
1369 |
if inst.primary_node in [node.uuid for node in dangling_nodes]:
|
|
1363 | 1370 |
dangling_instances.setdefault(inst.primary_node, []).append(inst.name) |
1364 | 1371 |
elif inst.primary_node not in self.all_node_info: |
1365 | 1372 |
no_node_instances.append(inst.name) |
... | ... | |
1367 | 1374 |
pretty_dangling = [ |
1368 | 1375 |
"%s (%s)" % |
1369 | 1376 |
(node.name, |
1370 |
utils.CommaJoin(dangling_instances.get(node.name,
|
|
1377 |
utils.CommaJoin(dangling_instances.get(node.uuid,
|
|
1371 | 1378 |
["no instances"]))) |
1372 | 1379 |
for node in dangling_nodes] |
1373 | 1380 |
|
... | ... | |
1397 | 1404 |
class NodeImage(object): |
1398 | 1405 |
"""A class representing the logical and physical status of a node. |
1399 | 1406 |
|
1400 |
@type name: string
|
|
1401 |
@ivar name: the node name to which this object refers
|
|
1407 |
@type uuid: string
|
|
1408 |
@ivar uuid: the node UUID to which this object refers
|
|
1402 | 1409 |
@ivar volumes: a structure as returned from |
1403 | 1410 |
L{ganeti.backend.GetVolumeList} (runtime) |
1404 | 1411 |
@ivar instances: a list of running instances (runtime) |
... | ... | |
1430 | 1437 |
@ivar pv_max: size in MiB of the biggest PVs |
1431 | 1438 |
|
1432 | 1439 |
""" |
1433 |
def __init__(self, offline=False, name=None, vm_capable=True):
|
|
1434 |
self.name = name
|
|
1440 |
def __init__(self, offline=False, uuid=None, vm_capable=True):
|
|
1441 |
self.uuid = uuid
|
|
1435 | 1442 |
self.volumes = {} |
1436 | 1443 |
self.instances = [] |
1437 | 1444 |
self.pinst = [] |
... | ... | |
1494 | 1501 |
assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP) |
1495 | 1502 |
self.group_info = self.cfg.GetNodeGroup(self.group_uuid) |
1496 | 1503 |
|
1497 |
group_nodes = set(self.group_info.members) |
|
1504 |
group_node_uuids = set(self.group_info.members)
|
|
1498 | 1505 |
group_instances = \ |
1499 | 1506 |
self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True) |
1500 | 1507 |
|
1501 |
unlocked_nodes = \ |
|
1502 |
group_nodes.difference(self.owned_locks(locking.LEVEL_NODE)) |
|
1508 |
unlocked_node_uuids = \
|
|
1509 |
group_node_uuids.difference(self.owned_locks(locking.LEVEL_NODE))
|
|
1503 | 1510 |
|
1504 | 1511 |
unlocked_instances = \ |
1505 | 1512 |
group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE)) |
1506 | 1513 |
|
1507 |
if unlocked_nodes: |
|
1508 |
raise errors.OpPrereqError("Missing lock for nodes: %s" % |
|
1509 |
utils.CommaJoin(unlocked_nodes), |
|
1510 |
errors.ECODE_STATE) |
|
1514 |
if unlocked_node_uuids: |
|
1515 |
raise errors.OpPrereqError( |
|
1516 |
"Missing lock for nodes: %s" % |
|
1517 |
utils.CommaJoin(self.cfg.GetNodeNames(unlocked_node_uuids)), |
|
1518 |
errors.ECODE_STATE) |
|
1511 | 1519 |
|
1512 | 1520 |
if unlocked_instances: |
1513 | 1521 |
raise errors.OpPrereqError("Missing lock for instances: %s" % |
... | ... | |
1517 | 1525 |
self.all_node_info = self.cfg.GetAllNodesInfo() |
1518 | 1526 |
self.all_inst_info = self.cfg.GetAllInstancesInfo() |
1519 | 1527 |
|
1520 |
self.my_node_names = utils.NiceSort(group_nodes) |
|
1521 |
self.my_inst_names = utils.NiceSort(group_instances) |
|
1522 |
|
|
1523 |
self.my_node_info = dict((name, self.all_node_info[name]) |
|
1524 |
for name in self.my_node_names) |
|
1528 |
self.my_node_uuids = group_node_uuids |
|
1529 |
self.my_node_info = dict((node_uuid, self.all_node_info[node_uuid]) |
|
1530 |
for node_uuid in group_node_uuids) |
|
1525 | 1531 |
|
1532 |
self.my_inst_names = utils.NiceSort(group_instances) |
|
1526 | 1533 |
self.my_inst_info = dict((name, self.all_inst_info[name]) |
1527 | 1534 |
for name in self.my_inst_names) |
1528 | 1535 |
|
... | ... | |
1532 | 1539 |
|
1533 | 1540 |
for inst in self.my_inst_info.values(): |
1534 | 1541 |
if inst.disk_template in constants.DTS_INT_MIRROR: |
1535 |
for nname in inst.all_nodes:
|
|
1536 |
if self.all_node_info[nname].group != self.group_uuid:
|
|
1537 |
extra_lv_nodes.add(nname)
|
|
1542 |
for nuuid in inst.all_nodes:
|
|
1543 |
if self.all_node_info[nuuid].group != self.group_uuid:
|
|
1544 |
extra_lv_nodes.add(nuuid)
|
|
1538 | 1545 |
|
1539 | 1546 |
unlocked_lv_nodes = \ |
1540 | 1547 |
extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE)) |
... | ... | |
1560 | 1567 |
reasonable values in the respose) |
1561 | 1568 |
|
1562 | 1569 |
""" |
1563 |
node = ninfo.name |
|
1570 |
node_name = ninfo.name
|
|
1564 | 1571 |
_ErrorIf = self._ErrorIf # pylint: disable=C0103 |
1565 | 1572 |
|
1566 | 1573 |
# main result, nresult should be a non-empty dict |
1567 | 1574 |
test = not nresult or not isinstance(nresult, dict) |
1568 |
_ErrorIf(test, constants.CV_ENODERPC, node, |
|
1575 |
_ErrorIf(test, constants.CV_ENODERPC, node_name,
|
|
1569 | 1576 |
"unable to verify node: no data returned") |
1570 | 1577 |
if test: |
1571 | 1578 |
return False |
... | ... | |
1576 | 1583 |
test = not (remote_version and |
1577 | 1584 |
isinstance(remote_version, (list, tuple)) and |
1578 | 1585 |
len(remote_version) == 2) |
1579 |
_ErrorIf(test, constants.CV_ENODERPC, node, |
|
1586 |
_ErrorIf(test, constants.CV_ENODERPC, node_name,
|
|
1580 | 1587 |
"connection to node returned invalid data") |
1581 | 1588 |
if test: |
1582 | 1589 |
return False |
1583 | 1590 |
|
1584 | 1591 |
test = local_version != remote_version[0] |
1585 |
_ErrorIf(test, constants.CV_ENODEVERSION, node, |
|
1592 |
_ErrorIf(test, constants.CV_ENODEVERSION, node_name,
|
|
1586 | 1593 |
"incompatible protocol versions: master %s," |
1587 | 1594 |
" node %s", local_version, remote_version[0]) |
1588 | 1595 |
if test: |
... | ... | |
1592 | 1599 |
|
1593 | 1600 |
# full package version |
1594 | 1601 |
self._ErrorIf(constants.RELEASE_VERSION != remote_version[1], |
1595 |
constants.CV_ENODEVERSION, node, |
|
1602 |
constants.CV_ENODEVERSION, node_name,
|
|
1596 | 1603 |
"software version mismatch: master %s, node %s", |
1597 | 1604 |
constants.RELEASE_VERSION, remote_version[1], |
1598 | 1605 |
code=self.ETYPE_WARNING) |
... | ... | |
1601 | 1608 |
if ninfo.vm_capable and isinstance(hyp_result, dict): |
1602 | 1609 |
for hv_name, hv_result in hyp_result.iteritems(): |
1603 | 1610 |
test = hv_result is not None |
1604 |
_ErrorIf(test, constants.CV_ENODEHV, node, |
|
1611 |
_ErrorIf(test, constants.CV_ENODEHV, node_name,
|
|
1605 | 1612 |
"hypervisor %s verify failure: '%s'", hv_name, hv_result) |
1606 | 1613 |
|
1607 | 1614 |
hvp_result = nresult.get(constants.NV_HVPARAMS, None) |
1608 | 1615 |
if ninfo.vm_capable and isinstance(hvp_result, list): |
1609 | 1616 |
for item, hv_name, hv_result in hvp_result: |
1610 |
_ErrorIf(True, constants.CV_ENODEHV, node, |
|
1617 |
_ErrorIf(True, constants.CV_ENODEHV, node_name,
|
|
1611 | 1618 |
"hypervisor %s parameter verify failure (source %s): %s", |
1612 | 1619 |
hv_name, item, hv_result) |
1613 | 1620 |
|
1614 | 1621 |
test = nresult.get(constants.NV_NODESETUP, |
1615 | 1622 |
["Missing NODESETUP results"]) |
1616 |
_ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s", |
|
1623 |
_ErrorIf(test, constants.CV_ENODESETUP, node_name, "node setup error: %s",
|
|
1617 | 1624 |
"; ".join(test)) |
1618 | 1625 |
|
1619 | 1626 |
return True |
... | ... | |
1629 | 1636 |
@param nvinfo_endtime: the end time of the RPC call |
1630 | 1637 |
|
1631 | 1638 |
""" |
1632 |
node = ninfo.name |
|
1639 |
node_name = ninfo.name
|
|
1633 | 1640 |
_ErrorIf = self._ErrorIf # pylint: disable=C0103 |
1634 | 1641 |
|
1635 | 1642 |
ntime = nresult.get(constants.NV_TIME, None) |
1636 | 1643 |
try: |
1637 | 1644 |
ntime_merged = utils.MergeTime(ntime) |
1638 | 1645 |
except (ValueError, TypeError): |
1639 |
_ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time") |
|
1646 |
_ErrorIf(True, constants.CV_ENODETIME, node_name, |
|
1647 |
"Node returned invalid time") |
|
1640 | 1648 |
return |
1641 | 1649 |
|
1642 | 1650 |
if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW): |
... | ... | |
1646 | 1654 |
else: |
1647 | 1655 |
ntime_diff = None |
1648 | 1656 |
|
1649 |
_ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node, |
|
1657 |
_ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node_name,
|
|
1650 | 1658 |
"Node time diverges by at least %s from master node time", |
1651 | 1659 |
ntime_diff) |
1652 | 1660 |
|
... | ... | |
1664 | 1672 |
if vg_name is None: |
1665 | 1673 |
return |
1666 | 1674 |
|
1667 |
node = ninfo.name |
|
1675 |
node_name = ninfo.name
|
|
1668 | 1676 |
_ErrorIf = self._ErrorIf # pylint: disable=C0103 |
1669 | 1677 |
|
1670 | 1678 |
# checks vg existence and size > 20G |
1671 | 1679 |
vglist = nresult.get(constants.NV_VGLIST, None) |
1672 | 1680 |
test = not vglist |
1673 |
_ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups") |
|
1681 |
_ErrorIf(test, constants.CV_ENODELVM, node_name, |
|
1682 |
"unable to check volume groups") |
|
1674 | 1683 |
if not test: |
1675 | 1684 |
vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name, |
1676 | 1685 |
constants.MIN_VG_SIZE) |
1677 |
_ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus) |
|
1686 |
_ErrorIf(vgstatus, constants.CV_ENODELVM, node_name, vgstatus)
|
|
1678 | 1687 |
|
1679 | 1688 |
# Check PVs |
1680 | 1689 |
(errmsgs, pvminmax) = CheckNodePVs(nresult, self._exclusive_storage) |
1681 | 1690 |
for em in errmsgs: |
1682 |
self._Error(constants.CV_ENODELVM, node, em) |
|
1691 |
self._Error(constants.CV_ENODELVM, node_name, em)
|
|
1683 | 1692 |
if pvminmax is not None: |
1684 | 1693 |
(nimg.pv_min, nimg.pv_max) = pvminmax |
1685 | 1694 |
|
... | ... | |
1692 | 1701 |
|
1693 | 1702 |
""" |
1694 | 1703 |
node_versions = {} |
1695 |
for node, ndata in node_verify_infos.items(): |
|
1704 |
for node_uuid, ndata in node_verify_infos.items():
|
|
1696 | 1705 |
nresult = ndata.payload |
1697 | 1706 |
version = nresult.get(constants.NV_DRBDVERSION, "Missing DRBD version") |
1698 |
node_versions[node] = version |
|
1707 |
node_versions[node_uuid] = version
|
|
1699 | 1708 |
|
1700 | 1709 |
if len(set(node_versions.values())) > 1: |
1701 |
for node, version in sorted(node_versions.items()): |
|
1710 |
for node_uuid, version in sorted(node_versions.items()):
|
|
1702 | 1711 |
msg = "DRBD version mismatch: %s" % version |
1703 |
self._Error(constants.CV_ENODEDRBDHELPER, node, msg, |
|
1712 |
self._Error(constants.CV_ENODEDRBDHELPER, node_uuid, msg,
|
|
1704 | 1713 |
code=self.ETYPE_WARNING) |
1705 | 1714 |
|
1706 | 1715 |
def _VerifyGroupLVM(self, node_image, vg_name): |
... | ... | |
1745 | 1754 |
if not bridges: |
1746 | 1755 |
return |
1747 | 1756 |
|
1748 |
node = ninfo.name |
|
1757 |
node_name = ninfo.name
|
|
1749 | 1758 |
_ErrorIf = self._ErrorIf # pylint: disable=C0103 |
1750 | 1759 |
|
1751 | 1760 |
missing = nresult.get(constants.NV_BRIDGES, None) |
1752 | 1761 |
test = not isinstance(missing, list) |
1753 |
_ErrorIf(test, constants.CV_ENODENET, node, |
|
1762 |
_ErrorIf(test, constants.CV_ENODENET, node_name,
|
|
1754 | 1763 |
"did not return valid bridge information") |
1755 | 1764 |
if not test: |
1756 |
_ErrorIf(bool(missing), constants.CV_ENODENET, node, |
|
1765 |
_ErrorIf(bool(missing), constants.CV_ENODENET, node_name,
|
|
1757 | 1766 |
"missing bridges: %s" % utils.CommaJoin(sorted(missing))) |
1758 | 1767 |
|
1759 | 1768 |
def _VerifyNodeUserScripts(self, ninfo, nresult): |
... | ... | |
1764 | 1773 |
@param nresult: the remote results for the node |
1765 | 1774 |
|
1766 | 1775 |
""" |
1767 |
node = ninfo.name |
|
1776 |
node_name = ninfo.name
|
|
1768 | 1777 |
|
1769 | 1778 |
test = not constants.NV_USERSCRIPTS in nresult |
1770 |
self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node, |
|
1779 |
self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node_name,
|
|
1771 | 1780 |
"did not return user scripts information") |
1772 | 1781 |
|
1773 | 1782 |
broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None) |
1774 | 1783 |
if not test: |
1775 |
self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node, |
|
1784 |
self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node_name,
|
|
1776 | 1785 |
"user scripts not present or not executable: %s" % |
1777 | 1786 |
utils.CommaJoin(sorted(broken_scripts))) |
1778 | 1787 |
|
... | ... | |
1784 | 1793 |
@param nresult: the remote results for the node |
1785 | 1794 |
|
1786 | 1795 |
""" |
1787 |
node = ninfo.name |
|
1796 |
node_name = ninfo.name
|
|
1788 | 1797 |
_ErrorIf = self._ErrorIf # pylint: disable=C0103 |
1789 | 1798 |
|
1790 | 1799 |
test = constants.NV_NODELIST not in nresult |
1791 |
_ErrorIf(test, constants.CV_ENODESSH, node, |
|
1800 |
_ErrorIf(test, constants.CV_ENODESSH, node_name,
|
|
1792 | 1801 |
"node hasn't returned node ssh connectivity data") |
1793 | 1802 |
if not test: |
1794 | 1803 |
if nresult[constants.NV_NODELIST]: |
1795 | 1804 |
for a_node, a_msg in nresult[constants.NV_NODELIST].items(): |
1796 |
_ErrorIf(True, constants.CV_ENODESSH, node, |
|
1805 |
_ErrorIf(True, constants.CV_ENODESSH, node_name,
|
|
1797 | 1806 |
"ssh communication with node '%s': %s", a_node, a_msg) |
1798 | 1807 |
|
1799 | 1808 |
test = constants.NV_NODENETTEST not in nresult |
1800 |
_ErrorIf(test, constants.CV_ENODENET, node, |
|
1809 |
_ErrorIf(test, constants.CV_ENODENET, node_name,
|
|
1801 | 1810 |
"node hasn't returned node tcp connectivity data") |
1802 | 1811 |
if not test: |
1803 | 1812 |
if nresult[constants.NV_NODENETTEST]: |
1804 | 1813 |
nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys()) |
1805 | 1814 |
for anode in nlist: |
1806 |
_ErrorIf(True, constants.CV_ENODENET, node, |
|
1815 |
_ErrorIf(True, constants.CV_ENODENET, node_name,
|
|
1807 | 1816 |
"tcp communication with node '%s': %s", |
1808 | 1817 |
anode, nresult[constants.NV_NODENETTEST][anode]) |
1809 | 1818 |
|
1810 | 1819 |
test = constants.NV_MASTERIP not in nresult |
1811 |
_ErrorIf(test, constants.CV_ENODENET, node, |
|
1820 |
_ErrorIf(test, constants.CV_ENODENET, node_name,
|
|
1812 | 1821 |
"node hasn't returned node master IP reachability data") |
1813 | 1822 |
if not test: |
1814 | 1823 |
if not nresult[constants.NV_MASTERIP]: |
1815 |
if node == self.master_node:
|
|
1824 |
if ninfo.uuid == self.master_node:
|
|
1816 | 1825 |
msg = "the master node cannot reach the master IP (not configured?)" |
1817 | 1826 |
else: |
1818 | 1827 |
msg = "cannot reach the master IP" |
1819 |
_ErrorIf(True, constants.CV_ENODENET, node, msg) |
|
1828 |
_ErrorIf(True, constants.CV_ENODENET, node_name, msg)
|
|
1820 | 1829 |
|
1821 | 1830 |
def _VerifyInstance(self, instance, inst_config, node_image, |
1822 | 1831 |
diskstatus): |
... | ... | |
1850 | 1859 |
for volume in node_vol_should[node]: |
1851 | 1860 |
test = volume not in n_img.volumes |
1852 | 1861 |
_ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance, |
1853 |
"volume %s missing on node %s", volume, node) |
|
1862 |
"volume %s missing on node %s", volume, |
|
1863 |
self.cfg.GetNodeName(node)) |
|
1854 | 1864 |
|
1855 | 1865 |
if inst_config.admin_state == constants.ADMINST_UP: |
1856 | 1866 |
test = instance not in pnode_img.instances and not pnode_img.offline |
1857 | 1867 |
_ErrorIf(test, constants.CV_EINSTANCEDOWN, instance, |
1858 | 1868 |
"instance not running on its primary node %s", |
1859 |
pnode)
|
|
1869 |
self.cfg.GetNodeName(pnode))
|
|
1860 | 1870 |
_ErrorIf(pnode_img.offline, constants.CV_EINSTANCEBADNODE, instance, |
1861 | 1871 |
"instance is marked as running and lives on offline node %s", |
1862 |
pnode)
|
|
1872 |
self.cfg.GetNodeName(pnode))
|
|
1863 | 1873 |
|
1864 | 1874 |
diskdata = [(nname, success, status, idx) |
1865 | 1875 |
for (nname, disks) in diskstatus.items() |
... | ... | |
1874 | 1884 |
not success and not bad_snode, |
1875 | 1885 |
constants.CV_EINSTANCEFAULTYDISK, instance, |
1876 | 1886 |
"couldn't retrieve status for disk/%s on %s: %s", |
1877 |
idx, nname, bdev_status)
|
|
1887 |
idx, self.cfg.GetNodeName(nname), bdev_status)
|
|
1878 | 1888 |
_ErrorIf((inst_config.disks_active and |
1879 | 1889 |
success and bdev_status.ldisk_status == constants.LDS_FAULTY), |
1880 | 1890 |
constants.CV_EINSTANCEFAULTYDISK, instance, |
1881 |
"disk/%s on %s is faulty", idx, nname)
|
|
1891 |
"disk/%s on %s is faulty", idx, self.cfg.GetNodeName(nname))
|
|
1882 | 1892 |
|
1883 | 1893 |
_ErrorIf(pnode_img.rpc_fail and not pnode_img.offline, |
1884 | 1894 |
constants.CV_ENODERPC, pnode, "instance %s, connection to" |
... | ... | |
1890 | 1900 |
utils.CommaJoin(inst_config.secondary_nodes), |
1891 | 1901 |
code=self.ETYPE_WARNING) |
1892 | 1902 |
|
1893 |
es_flags = rpc.GetExclusiveStorageForNodeNames(self.cfg,
|
|
1894 |
inst_config.all_nodes)
|
|
1903 |
es_flags = rpc.GetExclusiveStorageForNodes(self.cfg, |
|
1904 |
inst_config.all_nodes) |
|
1895 | 1905 |
if any(es_flags.values()): |
1896 | 1906 |
if inst_config.disk_template not in constants.DTS_EXCL_STORAGE: |
1897 | 1907 |
# Disk template not compatible with exclusive_storage: no instance |
... | ... | |
1902 | 1912 |
self._Error(constants.CV_EINSTANCEUNSUITABLENODE, instance, |
1903 | 1913 |
"instance has template %s, which is not supported on nodes" |
1904 | 1914 |
" that have exclusive storage set: %s", |
1905 |
inst_config.disk_template, utils.CommaJoin(es_nodes)) |
|
1915 |
inst_config.disk_template, |
|
1916 |
utils.CommaJoin(self.cfg.GetNodeNames(es_nodes))) |
|
1906 | 1917 |
for (idx, disk) in enumerate(inst_config.disks): |
1907 | 1918 |
_ErrorIf(disk.spindles is None, |
1908 | 1919 |
constants.CV_EINSTANCEMISSINGCFGPARAMETER, instance, |
... | ... | |
1920 | 1931 |
[]).append(node) |
1921 | 1932 |
|
1922 | 1933 |
pretty_list = [ |
1923 |
"%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name) |
|
1934 |
"%s (group %s)" % (utils.CommaJoin(self.cfg.GetNodeNames(nodes)), |
|
1935 |
groupinfo[group].name) |
|
1924 | 1936 |
# Sort so that we always list the primary node first. |
1925 | 1937 |
for group, nodes in sorted(instance_groups.items(), |
1926 | 1938 |
key=lambda (_, nodes): pnode in nodes, |
... | ... | |
1945 | 1957 |
# warn that the instance lives on offline nodes |
1946 | 1958 |
_ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance, |
1947 | 1959 |
"instance has offline secondary node(s) %s", |
1948 |
utils.CommaJoin(inst_nodes_offline))
|
|
1960 |
utils.CommaJoin(self.cfg.GetNodeNames(inst_nodes_offline)))
|
|
1949 | 1961 |
# ... or ghost/non-vm_capable nodes |
1950 | 1962 |
for node in inst_config.all_nodes: |
1951 | 1963 |
_ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE, |
1952 |
instance, "instance lives on ghost node %s", node) |
|
1964 |
instance, "instance lives on ghost node %s", |
|
1965 |
self.cfg.GetNodeName(node)) |
|
1953 | 1966 |
_ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE, |
1954 |
instance, "instance lives on non-vm_capable node %s", node) |
|
1967 |
instance, "instance lives on non-vm_capable node %s", |
|
1968 |
self.cfg.GetNodeName(node)) |
|
1955 | 1969 |
|
1956 | 1970 |
def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved): |
1957 | 1971 |
"""Verify if there are any unknown volumes in the cluster. |
... | ... | |
1963 | 1977 |
@param reserved: a FieldSet of reserved volume names |
1964 | 1978 |
|
1965 | 1979 |
""" |
1966 |
for node, n_img in node_image.items(): |
|
1980 |
for node_uuid, n_img in node_image.items():
|
|
1967 | 1981 |
if (n_img.offline or n_img.rpc_fail or n_img.lvm_fail or |
1968 |
self.all_node_info[node].group != self.group_uuid): |
|
1982 |
self.all_node_info[node_uuid].group != self.group_uuid):
|
|
1969 | 1983 |
# skip non-healthy nodes |
1970 | 1984 |
continue |
1971 | 1985 |
for volume in n_img.volumes: |
1972 |
test = ((node not in node_vol_should or |
|
1973 |
volume not in node_vol_should[node]) and |
|
1986 |
test = ((node_uuid not in node_vol_should or
|
|
1987 |
volume not in node_vol_should[node_uuid]) and
|
|
1974 | 1988 |
not reserved.Matches(volume)) |
1975 |
self._ErrorIf(test, constants.CV_ENODEORPHANLV, node, |
|
1989 |
self._ErrorIf(test, constants.CV_ENODEORPHANLV, |
|
1990 |
self.cfg.GetNodeName(node_uuid), |
|
1976 | 1991 |
"volume %s is unknown", volume) |
1977 | 1992 |
|
1978 | 1993 |
def _VerifyNPlusOneMemory(self, node_image, instance_cfg): |
... | ... | |
1983 | 1998 |
|
1984 | 1999 |
""" |
1985 | 2000 |
cluster_info = self.cfg.GetClusterInfo() |
1986 |
for node, n_img in node_image.items(): |
|
2001 |
for node_uuid, n_img in node_image.items():
|
|
1987 | 2002 |
# This code checks that every node which is now listed as |
1988 | 2003 |
# secondary has enough memory to host all instances it is |
1989 | 2004 |
# supposed to should a single other node in the cluster fail. |
... | ... | |
1992 | 2007 |
# WARNING: we currently take into account down instances as well |
1993 | 2008 |
# as up ones, considering that even if they're down someone |
1994 | 2009 |
# might want to start them even in the event of a node failure. |
1995 |
if n_img.offline or self.all_node_info[node].group != self.group_uuid: |
|
2010 |
if n_img.offline or \ |
|
2011 |
self.all_node_info[node_uuid].group != self.group_uuid: |
|
1996 | 2012 |
# we're skipping nodes marked offline and nodes in other groups from |
1997 | 2013 |
# the N+1 warning, since most likely we don't have good memory |
1998 | 2014 |
# infromation from them; we already list instances living on such |
... | ... | |
2006 | 2022 |
if bep[constants.BE_AUTO_BALANCE]: |
2007 | 2023 |
needed_mem += bep[constants.BE_MINMEM] |
2008 | 2024 |
test = n_img.mfree < needed_mem |
2009 |
self._ErrorIf(test, constants.CV_ENODEN1, node, |
|
2025 |
self._ErrorIf(test, constants.CV_ENODEN1, |
|
2026 |
self.cfg.GetNodeName(node_uuid), |
|
2010 | 2027 |
"not enough memory to accomodate instance failovers" |
2011 | 2028 |
" should node %s fail (%dMiB needed, %dMiB available)", |
2012 |
prinode, needed_mem, n_img.mfree)
|
|
2029 |
self.cfg.GetNodeName(prinode), needed_mem, n_img.mfree)
|
|
2013 | 2030 |
|
2014 |
@classmethod |
|
2015 |
def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo, |
|
2031 |
def _VerifyFiles(self, nodes, master_node_uuid, all_nvinfo, |
|
2016 | 2032 |
(files_all, files_opt, files_mc, files_vm)): |
2017 | 2033 |
"""Verifies file checksums collected from all nodes. |
2018 | 2034 |
|
2019 |
@param errorif: Callback for reporting errors |
|
2020 |
@param nodeinfo: List of L{objects.Node} objects |
|
2021 |
@param master_node: Name of master node |
|
2035 |
@param nodes: List of L{objects.Node} objects |
|
2036 |
@param master_node_uuid: UUID of master node |
|
2022 | 2037 |
@param all_nvinfo: RPC results |
2023 | 2038 |
|
2024 | 2039 |
""" |
... | ... | |
2026 | 2041 |
files2nodefn = [ |
2027 | 2042 |
(files_all, None), |
2028 | 2043 |
(files_mc, lambda node: (node.master_candidate or |
2029 |
node.name == master_node)),
|
|
2044 |
node.uuid == master_node_uuid)),
|
|
2030 | 2045 |
(files_vm, lambda node: node.vm_capable), |
2031 | 2046 |
] |
2032 | 2047 |
|
... | ... | |
2034 | 2049 |
nodefiles = {} |
2035 | 2050 |
for (files, fn) in files2nodefn: |
2036 | 2051 |
if fn is None: |
2037 |
filenodes = nodeinfo
|
|
2052 |
filenodes = nodes
|
|
2038 | 2053 |
else: |
2039 |
filenodes = filter(fn, nodeinfo)
|
|
2054 |
filenodes = filter(fn, nodes)
|
|
2040 | 2055 |
nodefiles.update((filename, |
2041 |
frozenset(map(operator.attrgetter("name"), filenodes)))
|
|
2056 |
frozenset(map(operator.attrgetter("uuid"), filenodes)))
|
|
2042 | 2057 |
for filename in files) |
2043 | 2058 |
|
2044 | 2059 |
assert set(nodefiles) == (files_all | files_mc | files_vm) |
... | ... | |
2046 | 2061 |
fileinfo = dict((filename, {}) for filename in nodefiles) |
2047 | 2062 |
ignore_nodes = set() |
2048 | 2063 |
|
2049 |
for node in nodeinfo:
|
|
2064 |
for node in nodes:
|
|
2050 | 2065 |
if node.offline: |
2051 |
ignore_nodes.add(node.name)
|
|
2066 |
ignore_nodes.add(node.uuid)
|
|
2052 | 2067 |
continue |
2053 | 2068 |
|
2054 |
nresult = all_nvinfo[node.name]
|
|
2069 |
nresult = all_nvinfo[node.uuid]
|
|
2055 | 2070 |
|
2056 | 2071 |
if nresult.fail_msg or not nresult.payload: |
2057 | 2072 |
node_files = None |
... | ... | |
2062 | 2077 |
del fingerprints |
2063 | 2078 |
|
2064 | 2079 |
test = not (node_files and isinstance(node_files, dict)) |
2065 |
errorif(test, constants.CV_ENODEFILECHECK, node.name,
|
|
2066 |
"Node did not return file checksum data") |
|
2080 |
self._ErrorIf(test, constants.CV_ENODEFILECHECK, node.name,
|
|
2081 |
"Node did not return file checksum data")
|
|
2067 | 2082 |
if test: |
2068 |
ignore_nodes.add(node.name)
|
|
2083 |
ignore_nodes.add(node.uuid)
|
|
2069 | 2084 |
continue |
2070 | 2085 |
|
2071 | 2086 |
# Build per-checksum mapping from filename to nodes having it |
2072 | 2087 |
for (filename, checksum) in node_files.items(): |
2073 | 2088 |
assert filename in nodefiles |
2074 |
fileinfo[filename].setdefault(checksum, set()).add(node.name)
|
|
2089 |
fileinfo[filename].setdefault(checksum, set()).add(node.uuid)
|
|
2075 | 2090 |
|
2076 | 2091 |
for (filename, checksums) in fileinfo.items(): |
2077 | 2092 |
assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum" |
2078 | 2093 |
|
2079 | 2094 |
# Nodes having the file |
2080 |
with_file = frozenset(node_name
|
|
2081 |
for nodes in fileinfo[filename].values() |
|
2082 |
for node_name in nodes) - ignore_nodes
|
|
2095 |
with_file = frozenset(node_uuid
|
|
2096 |
for node_uuids in fileinfo[filename].values()
|
|
2097 |
for node_uuid in node_uuids) - ignore_nodes
|
|
2083 | 2098 |
|
2084 | 2099 |
expected_nodes = nodefiles[filename] - ignore_nodes |
2085 | 2100 |
|
... | ... | |
2088 | 2103 |
|
2089 | 2104 |
if filename in files_opt: |
2090 | 2105 |
# All or no nodes |
2091 |
errorif(missing_file and missing_file != expected_nodes, |
|
2092 |
constants.CV_ECLUSTERFILECHECK, None, |
|
2093 |
"File %s is optional, but it must exist on all or no" |
|
2094 |
" nodes (not found on %s)", |
|
2095 |
filename, utils.CommaJoin(utils.NiceSort(missing_file))) |
|
2106 |
self._ErrorIf(missing_file and missing_file != expected_nodes, |
|
2107 |
constants.CV_ECLUSTERFILECHECK, None, |
|
2108 |
"File %s is optional, but it must exist on all or no" |
|
2109 |
" nodes (not found on %s)", |
|
2110 |
filename, |
|
2111 |
utils.CommaJoin( |
|
2112 |
utils.NiceSort( |
|
2113 |
map(self.cfg.GetNodeName, missing_file)))) |
|
2096 | 2114 |
else: |
2097 |
errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None, |
|
2098 |
"File %s is missing from node(s) %s", filename, |
|
2099 |
utils.CommaJoin(utils.NiceSort(missing_file))) |
|
2115 |
self._ErrorIf(missing_file, constants.CV_ECLUSTERFILECHECK, None, |
|
2116 |
"File %s is missing from node(s) %s", filename, |
|
2117 |
utils.CommaJoin( |
|
2118 |
utils.NiceSort( |
|
2119 |
map(self.cfg.GetNodeName, missing_file)))) |
|
2100 | 2120 |
|
2101 | 2121 |
# Warn if a node has a file it shouldn't |
Also available in: Unified diff