Revision 02c521e4 lib/cmdlib.py
b/lib/cmdlib.py | ||
---|---|---|
1091 | 1091 |
ETYPE_ERROR = "ERROR" |
1092 | 1092 |
ETYPE_WARNING = "WARNING" |
1093 | 1093 |
|
1094 |
class NodeImage(object): |
|
1095 |
"""A class representing the logical and physical status of a node. |
|
1096 |
|
|
1097 |
@ivar volumes: a structure as returned from |
|
1098 |
L{ganeti.utils.GetVolumeList} (runtime) |
|
1099 |
@ivar instances: a list of running instances (runtime) |
|
1100 |
@ivar pinst: list of configured primary instances (config) |
|
1101 |
@ivar sinst: list of configured secondary instances (config) |
|
1102 |
@ivar sbp: diction of {secondary-node: list of instances} of all peers |
|
1103 |
of this node (config) |
|
1104 |
@ivar mfree: free memory, as reported by hypervisor (runtime) |
|
1105 |
@ivar dfree: free disk, as reported by the node (runtime) |
|
1106 |
@ivar offline: the offline status (config) |
|
1107 |
@type rpc_fail: boolean |
|
1108 |
@ivar rpc_fail: whether the RPC verify call was successfull (overall, |
|
1109 |
not whether the individual keys were correct) (runtime) |
|
1110 |
@type lvm_fail: boolean |
|
1111 |
@ivar lvm_fail: whether the RPC call didn't return valid LVM data |
|
1112 |
@type hyp_fail: boolean |
|
1113 |
@ivar hyp_fail: whether the RPC call didn't return the instance list |
|
1114 |
@type ghost: boolean |
|
1115 |
@ivar ghost: whether this is a known node or not (config) |
|
1116 |
|
|
1117 |
""" |
|
1118 |
def __init__(self, offline=False): |
|
1119 |
self.volumes = {} |
|
1120 |
self.instances = [] |
|
1121 |
self.pinst = [] |
|
1122 |
self.sinst = [] |
|
1123 |
self.sbp = {} |
|
1124 |
self.mfree = 0 |
|
1125 |
self.dfree = 0 |
|
1126 |
self.offline = offline |
|
1127 |
self.rpc_fail = False |
|
1128 |
self.lvm_fail = False |
|
1129 |
self.hyp_fail = False |
|
1130 |
self.ghost = False |
|
1131 |
|
|
1094 | 1132 |
def ExpandNames(self): |
1095 | 1133 |
self.needed_locks = { |
1096 | 1134 |
locking.LEVEL_NODE: locking.ALL_SET, |
... | ... | |
1135 | 1173 |
if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR: |
1136 | 1174 |
self.bad = self.bad or cond |
1137 | 1175 |
|
1138 |
def _VerifyNode(self, nodeinfo, file_list, local_cksum, |
|
1139 |
node_result, master_files, drbd_map, vg_name): |
|
1176 |
def _VerifyNode(self, ninfo, nresult): |
|
1140 | 1177 |
"""Run multiple tests against a node. |
1141 | 1178 |
|
1142 | 1179 |
Test list: |
... | ... | |
1146 | 1183 |
- checks config file checksum |
1147 | 1184 |
- checks ssh to other nodes |
1148 | 1185 |
|
1149 |
@type nodeinfo: L{objects.Node} |
|
1150 |
@param nodeinfo: the node to check |
|
1151 |
@param file_list: required list of files |
|
1152 |
@param local_cksum: dictionary of local files and their checksums |
|
1153 |
@param node_result: the results from the node |
|
1154 |
@param master_files: list of files that only masters should have |
|
1155 |
@param drbd_map: the useddrbd minors for this node, in |
|
1156 |
form of minor: (instance, must_exist) which correspond to instances |
|
1157 |
and their running status |
|
1158 |
@param vg_name: Ganeti Volume Group (result of self.cfg.GetVGName()) |
|
1186 |
@type ninfo: L{objects.Node} |
|
1187 |
@param ninfo: the node to check |
|
1188 |
@param nresult: the results from the node |
|
1189 |
@rtype: boolean |
|
1190 |
@return: whether overall this call was successful (and we can expect |
|
1191 |
reasonable values in the respose) |
|
1159 | 1192 |
|
1160 | 1193 |
""" |
1161 |
node = nodeinfo.name
|
|
1194 |
node = ninfo.name |
|
1162 | 1195 |
_ErrorIf = self._ErrorIf # pylint: disable-msg=C0103 |
1163 | 1196 |
|
1164 |
# main result, node_result should be a non-empty dict
|
|
1165 |
test = not node_result or not isinstance(node_result, dict)
|
|
1197 |
# main result, nresult should be a non-empty dict |
|
1198 |
test = not nresult or not isinstance(nresult, dict)
|
|
1166 | 1199 |
_ErrorIf(test, self.ENODERPC, node, |
1167 | 1200 |
"unable to verify node: no data returned") |
1168 | 1201 |
if test: |
1169 |
return |
|
1202 |
return False
|
|
1170 | 1203 |
|
1171 | 1204 |
# compares ganeti version |
1172 | 1205 |
local_version = constants.PROTOCOL_VERSION |
1173 |
remote_version = node_result.get('version', None)
|
|
1206 |
remote_version = nresult.get("version", None)
|
|
1174 | 1207 |
test = not (remote_version and |
1175 | 1208 |
isinstance(remote_version, (list, tuple)) and |
1176 | 1209 |
len(remote_version) == 2) |
1177 | 1210 |
_ErrorIf(test, self.ENODERPC, node, |
1178 | 1211 |
"connection to node returned invalid data") |
1179 | 1212 |
if test: |
1180 |
return |
|
1213 |
return False
|
|
1181 | 1214 |
|
1182 | 1215 |
test = local_version != remote_version[0] |
1183 | 1216 |
_ErrorIf(test, self.ENODEVERSION, node, |
1184 | 1217 |
"incompatible protocol versions: master %s," |
1185 | 1218 |
" node %s", local_version, remote_version[0]) |
1186 | 1219 |
if test: |
1187 |
return |
|
1220 |
return False
|
|
1188 | 1221 |
|
1189 | 1222 |
# node seems compatible, we can actually try to look into its results |
1190 | 1223 |
|
... | ... | |
1195 | 1228 |
constants.RELEASE_VERSION, remote_version[1], |
1196 | 1229 |
code=self.ETYPE_WARNING) |
1197 | 1230 |
|
1198 |
# checks vg existence and size > 20G |
|
1199 |
if vg_name is not None: |
|
1200 |
vglist = node_result.get(constants.NV_VGLIST, None) |
|
1201 |
test = not vglist |
|
1202 |
_ErrorIf(test, self.ENODELVM, node, "unable to check volume groups") |
|
1203 |
if not test: |
|
1204 |
vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name, |
|
1205 |
constants.MIN_VG_SIZE) |
|
1206 |
_ErrorIf(vgstatus, self.ENODELVM, node, vgstatus) |
|
1231 |
hyp_result = nresult.get(constants.NV_HYPERVISOR, None) |
|
1232 |
if isinstance(hyp_result, dict): |
|
1233 |
for hv_name, hv_result in hyp_result.iteritems(): |
|
1234 |
test = hv_result is not None |
|
1235 |
_ErrorIf(test, self.ENODEHV, node, |
|
1236 |
"hypervisor %s verify failure: '%s'", hv_name, hv_result) |
|
1207 | 1237 |
|
1208 |
# checks config file checksum |
|
1209 | 1238 |
|
1210 |
remote_cksum = node_result.get(constants.NV_FILELIST, None) |
|
1211 |
test = not isinstance(remote_cksum, dict) |
|
1212 |
_ErrorIf(test, self.ENODEFILECHECK, node, |
|
1213 |
"node hasn't returned file checksum data") |
|
1239 |
test = nresult.get(constants.NV_NODESETUP, |
|
1240 |
["Missing NODESETUP results"]) |
|
1241 |
_ErrorIf(test, self.ENODESETUP, node, "node setup error: %s", |
|
1242 |
"; ".join(test)) |
|
1243 |
|
|
1244 |
return True |
|
1245 |
|
|
1246 |
def _VerifyNodeTime(self, ninfo, nresult, |
|
1247 |
nvinfo_starttime, nvinfo_endtime): |
|
1248 |
"""Check the node time. |
|
1249 |
|
|
1250 |
@type ninfo: L{objects.Node} |
|
1251 |
@param ninfo: the node to check |
|
1252 |
@param nresult: the remote results for the node |
|
1253 |
@param nvinfo_starttime: the start time of the RPC call |
|
1254 |
@param nvinfo_endtime: the end time of the RPC call |
|
1255 |
|
|
1256 |
""" |
|
1257 |
node = ninfo.name |
|
1258 |
_ErrorIf = self._ErrorIf # pylint: disable-msg=C0103 |
|
1259 |
|
|
1260 |
ntime = nresult.get(constants.NV_TIME, None) |
|
1261 |
try: |
|
1262 |
ntime_merged = utils.MergeTime(ntime) |
|
1263 |
except (ValueError, TypeError): |
|
1264 |
_ErrorIf(True, self.ENODETIME, node, "Node returned invalid time") |
|
1265 |
return |
|
1266 |
|
|
1267 |
if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW): |
|
1268 |
ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged) |
|
1269 |
elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW): |
|
1270 |
ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime) |
|
1271 |
else: |
|
1272 |
ntime_diff = None |
|
1273 |
|
|
1274 |
_ErrorIf(ntime_diff is not None, self.ENODETIME, node, |
|
1275 |
"Node time diverges by at least %s from master node time", |
|
1276 |
ntime_diff) |
|
1277 |
|
|
1278 |
def _VerifyNodeLVM(self, ninfo, nresult, vg_name): |
|
1279 |
"""Check the node time. |
|
1280 |
|
|
1281 |
@type ninfo: L{objects.Node} |
|
1282 |
@param ninfo: the node to check |
|
1283 |
@param nresult: the remote results for the node |
|
1284 |
@param vg_name: the configured VG name |
|
1285 |
|
|
1286 |
""" |
|
1287 |
if vg_name is None: |
|
1288 |
return |
|
1289 |
|
|
1290 |
node = ninfo.name |
|
1291 |
_ErrorIf = self._ErrorIf # pylint: disable-msg=C0103 |
|
1292 |
|
|
1293 |
# checks vg existence and size > 20G |
|
1294 |
vglist = nresult.get(constants.NV_VGLIST, None) |
|
1295 |
test = not vglist |
|
1296 |
_ErrorIf(test, self.ENODELVM, node, "unable to check volume groups") |
|
1214 | 1297 |
if not test: |
1215 |
for file_name in file_list: |
|
1216 |
node_is_mc = nodeinfo.master_candidate |
|
1217 |
must_have = (file_name not in master_files) or node_is_mc |
|
1218 |
# missing |
|
1219 |
test1 = file_name not in remote_cksum |
|
1220 |
# invalid checksum |
|
1221 |
test2 = not test1 and remote_cksum[file_name] != local_cksum[file_name] |
|
1222 |
# existing and good |
|
1223 |
test3 = not test1 and remote_cksum[file_name] == local_cksum[file_name] |
|
1224 |
_ErrorIf(test1 and must_have, self.ENODEFILECHECK, node, |
|
1225 |
"file '%s' missing", file_name) |
|
1226 |
_ErrorIf(test2 and must_have, self.ENODEFILECHECK, node, |
|
1227 |
"file '%s' has wrong checksum", file_name) |
|
1228 |
# not candidate and this is not a must-have file |
|
1229 |
_ErrorIf(test2 and not must_have, self.ENODEFILECHECK, node, |
|
1230 |
"file '%s' should not exist on non master" |
|
1231 |
" candidates (and the file is outdated)", file_name) |
|
1232 |
# all good, except non-master/non-must have combination |
|
1233 |
_ErrorIf(test3 and not must_have, self.ENODEFILECHECK, node, |
|
1234 |
"file '%s' should not exist" |
|
1235 |
" on non master candidates", file_name) |
|
1236 |
|
|
1237 |
# checks ssh to any |
|
1238 |
|
|
1239 |
test = constants.NV_NODELIST not in node_result |
|
1298 |
vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name, |
|
1299 |
constants.MIN_VG_SIZE) |
|
1300 |
_ErrorIf(vgstatus, self.ENODELVM, node, vgstatus) |
|
1301 |
|
|
1302 |
# check pv names |
|
1303 |
pvlist = nresult.get(constants.NV_PVLIST, None) |
|
1304 |
test = pvlist is None |
|
1305 |
_ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node") |
|
1306 |
if not test: |
|
1307 |
# check that ':' is not present in PV names, since it's a |
|
1308 |
# special character for lvcreate (denotes the range of PEs to |
|
1309 |
# use on the PV) |
|
1310 |
for _, pvname, owner_vg in pvlist: |
|
1311 |
test = ":" in pvname |
|
1312 |
_ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV" |
|
1313 |
" '%s' of VG '%s'", pvname, owner_vg) |
|
1314 |
|
|
1315 |
def _VerifyNodeNetwork(self, ninfo, nresult): |
|
1316 |
"""Check the node time. |
|
1317 |
|
|
1318 |
@type ninfo: L{objects.Node} |
|
1319 |
@param ninfo: the node to check |
|
1320 |
@param nresult: the remote results for the node |
|
1321 |
|
|
1322 |
""" |
|
1323 |
node = ninfo.name |
|
1324 |
_ErrorIf = self._ErrorIf # pylint: disable-msg=C0103 |
|
1325 |
|
|
1326 |
test = constants.NV_NODELIST not in nresult |
|
1240 | 1327 |
_ErrorIf(test, self.ENODESSH, node, |
1241 | 1328 |
"node hasn't returned node ssh connectivity data") |
1242 | 1329 |
if not test: |
1243 |
if node_result[constants.NV_NODELIST]:
|
|
1244 |
for a_node, a_msg in node_result[constants.NV_NODELIST].items():
|
|
1330 |
if nresult[constants.NV_NODELIST]: |
|
1331 |
for a_node, a_msg in nresult[constants.NV_NODELIST].items(): |
|
1245 | 1332 |
_ErrorIf(True, self.ENODESSH, node, |
1246 | 1333 |
"ssh communication with node '%s': %s", a_node, a_msg) |
1247 | 1334 |
|
1248 |
test = constants.NV_NODENETTEST not in node_result
|
|
1335 |
test = constants.NV_NODENETTEST not in nresult |
|
1249 | 1336 |
_ErrorIf(test, self.ENODENET, node, |
1250 | 1337 |
"node hasn't returned node tcp connectivity data") |
1251 | 1338 |
if not test: |
1252 |
if node_result[constants.NV_NODENETTEST]:
|
|
1253 |
nlist = utils.NiceSort(node_result[constants.NV_NODENETTEST].keys())
|
|
1339 |
if nresult[constants.NV_NODENETTEST]: |
|
1340 |
nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys()) |
|
1254 | 1341 |
for anode in nlist: |
1255 | 1342 |
_ErrorIf(True, self.ENODENET, node, |
1256 | 1343 |
"tcp communication with node '%s': %s", |
1257 |
anode, node_result[constants.NV_NODENETTEST][anode]) |
|
1258 |
|
|
1259 |
hyp_result = node_result.get(constants.NV_HYPERVISOR, None) |
|
1260 |
if isinstance(hyp_result, dict): |
|
1261 |
for hv_name, hv_result in hyp_result.iteritems(): |
|
1262 |
test = hv_result is not None |
|
1263 |
_ErrorIf(test, self.ENODEHV, node, |
|
1264 |
"hypervisor %s verify failure: '%s'", hv_name, hv_result) |
|
1265 |
|
|
1266 |
# check used drbd list |
|
1267 |
if vg_name is not None: |
|
1268 |
used_minors = node_result.get(constants.NV_DRBDLIST, []) |
|
1269 |
test = not isinstance(used_minors, (tuple, list)) |
|
1270 |
_ErrorIf(test, self.ENODEDRBD, node, |
|
1271 |
"cannot parse drbd status file: %s", str(used_minors)) |
|
1272 |
if not test: |
|
1273 |
for minor, (iname, must_exist) in drbd_map.items(): |
|
1274 |
test = minor not in used_minors and must_exist |
|
1275 |
_ErrorIf(test, self.ENODEDRBD, node, |
|
1276 |
"drbd minor %d of instance %s is not active", |
|
1277 |
minor, iname) |
|
1278 |
for minor in used_minors: |
|
1279 |
test = minor not in drbd_map |
|
1280 |
_ErrorIf(test, self.ENODEDRBD, node, |
|
1281 |
"unallocated drbd minor %d is in use", minor) |
|
1282 |
test = node_result.get(constants.NV_NODESETUP, |
|
1283 |
["Missing NODESETUP results"]) |
|
1284 |
_ErrorIf(test, self.ENODESETUP, node, "node setup error: %s", |
|
1285 |
"; ".join(test)) |
|
1344 |
anode, nresult[constants.NV_NODENETTEST][anode]) |
|
1286 | 1345 |
|
1287 |
# check pv names |
|
1288 |
if vg_name is not None: |
|
1289 |
pvlist = node_result.get(constants.NV_PVLIST, None) |
|
1290 |
test = pvlist is None |
|
1291 |
_ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node") |
|
1292 |
if not test: |
|
1293 |
# check that ':' is not present in PV names, since it's a |
|
1294 |
# special character for lvcreate (denotes the range of PEs to |
|
1295 |
# use on the PV) |
|
1296 |
for _, pvname, owner_vg in pvlist: |
|
1297 |
test = ":" in pvname |
|
1298 |
_ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV" |
|
1299 |
" '%s' of VG '%s'", pvname, owner_vg) |
|
1300 |
|
|
1301 |
def _VerifyInstance(self, instance, instanceconfig, node_vol_is, |
|
1302 |
node_instance, n_offline): |
|
1346 |
def _VerifyInstance(self, instance, instanceconfig, node_image): |
|
1303 | 1347 |
"""Verify an instance. |
1304 | 1348 |
|
1305 | 1349 |
This function checks to see if the required block devices are |
... | ... | |
1313 | 1357 |
instanceconfig.MapLVsByNode(node_vol_should) |
1314 | 1358 |
|
1315 | 1359 |
for node in node_vol_should: |
1316 |
if node in n_offline: |
|
1317 |
# ignore missing volumes on offline nodes |
|
1360 |
n_img = node_image[node] |
|
1361 |
if n_img.offline or n_img.rpc_fail or n_img.lvm_fail: |
|
1362 |
# ignore missing volumes on offline or broken nodes |
|
1318 | 1363 |
continue |
1319 | 1364 |
for volume in node_vol_should[node]: |
1320 |
test = node not in node_vol_is or volume not in node_vol_is[node]
|
|
1365 |
test = volume not in n_img.volumes
|
|
1321 | 1366 |
_ErrorIf(test, self.EINSTANCEMISSINGDISK, instance, |
1322 | 1367 |
"volume %s missing on node %s", volume, node) |
1323 | 1368 |
|
1324 | 1369 |
if instanceconfig.admin_up: |
1325 |
test = ((node_current not in node_instance or |
|
1326 |
not instance in node_instance[node_current]) and |
|
1327 |
node_current not in n_offline) |
|
1370 |
pri_img = node_image[node_current] |
|
1371 |
test = instance not in pri_img.instances and not pri_img.offline |
|
1328 | 1372 |
_ErrorIf(test, self.EINSTANCEDOWN, instance, |
1329 | 1373 |
"instance not running on its primary node %s", |
1330 | 1374 |
node_current) |
1331 | 1375 |
|
1332 |
for node in node_instance:
|
|
1376 |
for node, n_img in node_image.items():
|
|
1333 | 1377 |
if (not node == node_current): |
1334 |
test = instance in node_instance[node]
|
|
1378 |
test = instance in n_img.instances
|
|
1335 | 1379 |
_ErrorIf(test, self.EINSTANCEWRONGNODE, instance, |
1336 | 1380 |
"instance should not run on node %s", node) |
1337 | 1381 |
|
1338 |
def _VerifyOrphanVolumes(self, node_vol_should, node_vol_is):
|
|
1382 |
def _VerifyOrphanVolumes(self, node_vol_should, node_image):
|
|
1339 | 1383 |
"""Verify if there are any unknown volumes in the cluster. |
1340 | 1384 |
|
1341 | 1385 |
The .os, .swap and backup volumes are ignored. All other volumes are |
1342 | 1386 |
reported as unknown. |
1343 | 1387 |
|
1344 | 1388 |
""" |
1345 |
for node in node_vol_is: |
|
1346 |
for volume in node_vol_is[node]: |
|
1389 |
for node, n_img in node_image.items(): |
|
1390 |
if n_img.offline or n_img.rpc_fail or n_img.lvm_fail: |
|
1391 |
# skip non-healthy nodes |
|
1392 |
continue |
|
1393 |
for volume in n_img.volumes: |
|
1347 | 1394 |
test = (node not in node_vol_should or |
1348 | 1395 |
volume not in node_vol_should[node]) |
1349 | 1396 |
self._ErrorIf(test, self.ENODEORPHANLV, node, |
1350 | 1397 |
"volume %s is unknown", volume) |
1351 | 1398 |
|
1352 |
def _VerifyOrphanInstances(self, instancelist, node_instance):
|
|
1399 |
def _VerifyOrphanInstances(self, instancelist, node_image):
|
|
1353 | 1400 |
"""Verify the list of running instances. |
1354 | 1401 |
|
1355 | 1402 |
This checks what instances are running but unknown to the cluster. |
1356 | 1403 |
|
1357 | 1404 |
""" |
1358 |
for node in node_instance:
|
|
1359 |
for o_inst in node_instance[node]:
|
|
1405 |
for node, n_img in node_image.items():
|
|
1406 |
for o_inst in n_img.instances:
|
|
1360 | 1407 |
test = o_inst not in instancelist |
1361 | 1408 |
self._ErrorIf(test, self.ENODEORPHANINSTANCE, node, |
1362 | 1409 |
"instance %s on node %s should not exist", o_inst, node) |
1363 | 1410 |
|
1364 |
def _VerifyNPlusOneMemory(self, node_info, instance_cfg):
|
|
1411 |
def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
|
|
1365 | 1412 |
"""Verify N+1 Memory Resilience. |
1366 | 1413 |
|
1367 |
Check that if one single node dies we can still start all the instances it
|
|
1368 |
was primary for. |
|
1414 |
Check that if one single node dies we can still start all the |
|
1415 |
instances it was primary for.
|
|
1369 | 1416 |
|
1370 | 1417 |
""" |
1371 |
for node, nodeinfo in node_info.iteritems():
|
|
1372 |
# This code checks that every node which is now listed as secondary has
|
|
1373 |
# enough memory to host all instances it is supposed to should a single
|
|
1374 |
# other node in the cluster fail. |
|
1418 |
for node, n_img in node_image.items():
|
|
1419 |
# This code checks that every node which is now listed as |
|
1420 |
# secondary has enough memory to host all instances it is
|
|
1421 |
# supposed to should a single other node in the cluster fail.
|
|
1375 | 1422 |
# FIXME: not ready for failover to an arbitrary node |
1376 | 1423 |
# FIXME: does not support file-backed instances |
1377 |
# WARNING: we currently take into account down instances as well as up
|
|
1378 |
# ones, considering that even if they're down someone might want to start
|
|
1379 |
# them even in the event of a node failure. |
|
1380 |
for prinode, instances in nodeinfo['sinst-by-pnode'].iteritems():
|
|
1424 |
# WARNING: we currently take into account down instances as well |
|
1425 |
# as up ones, considering that even if they're down someone
|
|
1426 |
# might want to start them even in the event of a node failure.
|
|
1427 |
for prinode, instances in n_img.sbp.items():
|
|
1381 | 1428 |
needed_mem = 0 |
1382 | 1429 |
for instance in instances: |
1383 | 1430 |
bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance]) |
1384 | 1431 |
if bep[constants.BE_AUTO_BALANCE]: |
1385 | 1432 |
needed_mem += bep[constants.BE_MEMORY] |
1386 |
test = nodeinfo['mfree'] < needed_mem
|
|
1433 |
test = n_img.mfree < needed_mem
|
|
1387 | 1434 |
self._ErrorIf(test, self.ENODEN1, node, |
1388 | 1435 |
"not enough memory on to accommodate" |
1389 | 1436 |
" failovers should peer node %s fail", prinode) |
1390 | 1437 |
|
1438 |
def _VerifyNodeFiles(self, ninfo, nresult, file_list, local_cksum, |
|
1439 |
master_files): |
|
1440 |
"""Verifies and computes the node required file checksums. |
|
1441 |
|
|
1442 |
@type ninfo: L{objects.Node} |
|
1443 |
@param ninfo: the node to check |
|
1444 |
@param nresult: the remote results for the node |
|
1445 |
@param file_list: required list of files |
|
1446 |
@param local_cksum: dictionary of local files and their checksums |
|
1447 |
@param master_files: list of files that only masters should have |
|
1448 |
|
|
1449 |
""" |
|
1450 |
node = ninfo.name |
|
1451 |
_ErrorIf = self._ErrorIf # pylint: disable-msg=C0103 |
|
1452 |
|
|
1453 |
remote_cksum = nresult.get(constants.NV_FILELIST, None) |
|
1454 |
test = not isinstance(remote_cksum, dict) |
|
1455 |
_ErrorIf(test, self.ENODEFILECHECK, node, |
|
1456 |
"node hasn't returned file checksum data") |
|
1457 |
if test: |
|
1458 |
return |
|
1459 |
|
|
1460 |
for file_name in file_list: |
|
1461 |
node_is_mc = ninfo.master_candidate |
|
1462 |
must_have = (file_name not in master_files) or node_is_mc |
|
1463 |
# missing |
|
1464 |
test1 = file_name not in remote_cksum |
|
1465 |
# invalid checksum |
|
1466 |
test2 = not test1 and remote_cksum[file_name] != local_cksum[file_name] |
|
1467 |
# existing and good |
|
1468 |
test3 = not test1 and remote_cksum[file_name] == local_cksum[file_name] |
|
1469 |
_ErrorIf(test1 and must_have, self.ENODEFILECHECK, node, |
|
1470 |
"file '%s' missing", file_name) |
|
1471 |
_ErrorIf(test2 and must_have, self.ENODEFILECHECK, node, |
|
1472 |
"file '%s' has wrong checksum", file_name) |
|
1473 |
# not candidate and this is not a must-have file |
|
1474 |
_ErrorIf(test2 and not must_have, self.ENODEFILECHECK, node, |
|
1475 |
"file '%s' should not exist on non master" |
|
1476 |
" candidates (and the file is outdated)", file_name) |
|
1477 |
# all good, except non-master/non-must have combination |
|
1478 |
_ErrorIf(test3 and not must_have, self.ENODEFILECHECK, node, |
|
1479 |
"file '%s' should not exist" |
|
1480 |
" on non master candidates", file_name) |
|
1481 |
|
|
1482 |
def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_map): |
|
1483 |
"""Verifies and the node DRBD status. |
|
1484 |
|
|
1485 |
@type ninfo: L{objects.Node} |
|
1486 |
@param ninfo: the node to check |
|
1487 |
@param nresult: the remote results for the node |
|
1488 |
@param instanceinfo: the dict of instances |
|
1489 |
@param drbd_map: the DRBD map as returned by |
|
1490 |
L{ganeti.config.ConfigWriter.ComputeDRBDMap} |
|
1491 |
|
|
1492 |
""" |
|
1493 |
node = ninfo.name |
|
1494 |
_ErrorIf = self._ErrorIf # pylint: disable-msg=C0103 |
|
1495 |
|
|
1496 |
# compute the DRBD minors |
|
1497 |
node_drbd = {} |
|
1498 |
for minor, instance in drbd_map[node].items(): |
|
1499 |
test = instance not in instanceinfo |
|
1500 |
_ErrorIf(test, self.ECLUSTERCFG, None, |
|
1501 |
"ghost instance '%s' in temporary DRBD map", instance) |
|
1502 |
# ghost instance should not be running, but otherwise we |
|
1503 |
# don't give double warnings (both ghost instance and |
|
1504 |
# unallocated minor in use) |
|
1505 |
if test: |
|
1506 |
node_drbd[minor] = (instance, False) |
|
1507 |
else: |
|
1508 |
instance = instanceinfo[instance] |
|
1509 |
node_drbd[minor] = (instance.name, instance.admin_up) |
|
1510 |
|
|
1511 |
# and now check them |
|
1512 |
used_minors = nresult.get(constants.NV_DRBDLIST, []) |
|
1513 |
test = not isinstance(used_minors, (tuple, list)) |
|
1514 |
_ErrorIf(test, self.ENODEDRBD, node, |
|
1515 |
"cannot parse drbd status file: %s", str(used_minors)) |
|
1516 |
if test: |
|
1517 |
# we cannot check drbd status |
|
1518 |
return |
|
1519 |
|
|
1520 |
for minor, (iname, must_exist) in node_drbd.items(): |
|
1521 |
test = minor not in used_minors and must_exist |
|
1522 |
_ErrorIf(test, self.ENODEDRBD, node, |
|
1523 |
"drbd minor %d of instance %s is not active", minor, iname) |
|
1524 |
for minor in used_minors: |
|
1525 |
test = minor not in node_drbd |
|
1526 |
_ErrorIf(test, self.ENODEDRBD, node, |
|
1527 |
"unallocated drbd minor %d is in use", minor) |
|
1528 |
|
|
1529 |
def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name): |
|
1530 |
"""Verifies and updates the node volume data. |
|
1531 |
|
|
1532 |
This function will update a L{NodeImage}'s internal structures |
|
1533 |
with data from the remote call. |
|
1534 |
|
|
1535 |
@type ninfo: L{objects.Node} |
|
1536 |
@param ninfo: the node to check |
|
1537 |
@param nresult: the remote results for the node |
|
1538 |
@param nimg: the node image object |
|
1539 |
@param vg_name: the configured VG name |
|
1540 |
|
|
1541 |
""" |
|
1542 |
node = ninfo.name |
|
1543 |
_ErrorIf = self._ErrorIf # pylint: disable-msg=C0103 |
|
1544 |
|
|
1545 |
nimg.lvm_fail = True |
|
1546 |
lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data") |
|
1547 |
if vg_name is None: |
|
1548 |
pass |
|
1549 |
elif isinstance(lvdata, basestring): |
|
1550 |
_ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s", |
|
1551 |
utils.SafeEncode(lvdata)) |
|
1552 |
elif not isinstance(lvdata, dict): |
|
1553 |
_ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)") |
|
1554 |
else: |
|
1555 |
nimg.volumes = lvdata |
|
1556 |
nimg.lvm_fail = False |
|
1557 |
|
|
1558 |
def _UpdateNodeInstances(self, ninfo, nresult, nimg): |
|
1559 |
"""Verifies and updates the node instance list. |
|
1560 |
|
|
1561 |
If the listing was successful, then updates this node's instance |
|
1562 |
list. Otherwise, it marks the RPC call as failed for the instance |
|
1563 |
list key. |
|
1564 |
|
|
1565 |
@type ninfo: L{objects.Node} |
|
1566 |
@param ninfo: the node to check |
|
1567 |
@param nresult: the remote results for the node |
|
1568 |
@param nimg: the node image object |
|
1569 |
|
|
1570 |
""" |
|
1571 |
idata = nresult.get(constants.NV_INSTANCELIST, None) |
|
1572 |
test = not isinstance(idata, list) |
|
1573 |
self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed" |
|
1574 |
" (instancelist): %s", utils.SafeEncode(str(idata))) |
|
1575 |
if test: |
|
1576 |
nimg.hyp_fail = True |
|
1577 |
else: |
|
1578 |
nimg.instances = idata |
|
1579 |
|
|
1580 |
def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name): |
|
1581 |
"""Verifies and computes a node information map |
|
1582 |
|
|
1583 |
@type ninfo: L{objects.Node} |
|
1584 |
@param ninfo: the node to check |
|
1585 |
@param nresult: the remote results for the node |
|
1586 |
@param nimg: the node image object |
|
1587 |
@param vg_name: the configured VG name |
|
1588 |
|
|
1589 |
""" |
|
1590 |
node = ninfo.name |
|
1591 |
_ErrorIf = self._ErrorIf # pylint: disable-msg=C0103 |
|
1592 |
|
|
1593 |
# try to read free memory (from the hypervisor) |
|
1594 |
hv_info = nresult.get(constants.NV_HVINFO, None) |
|
1595 |
test = not isinstance(hv_info, dict) or "memory_free" not in hv_info |
|
1596 |
_ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)") |
|
1597 |
if not test: |
|
1598 |
try: |
|
1599 |
nimg.mfree = int(hv_info["memory_free"]) |
|
1600 |
except (ValueError, TypeError): |
|
1601 |
_ErrorIf(True, self.ENODERPC, node, |
|
1602 |
"node returned invalid nodeinfo, check hypervisor") |
|
1603 |
|
|
1604 |
# FIXME: devise a free space model for file based instances as well |
|
1605 |
if vg_name is not None: |
|
1606 |
test = (constants.NV_VGLIST not in nresult or |
|
1607 |
vg_name not in nresult[constants.NV_VGLIST]) |
|
1608 |
_ErrorIf(test, self.ENODELVM, node, |
|
1609 |
"node didn't return data for the volume group '%s'" |
|
1610 |
" - it is either missing or broken", vg_name) |
|
1611 |
if not test: |
|
1612 |
try: |
|
1613 |
nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name]) |
|
1614 |
except (ValueError, TypeError): |
|
1615 |
_ErrorIf(True, self.ENODERPC, node, |
|
1616 |
"node returned invalid LVM info, check LVM status") |
|
1617 |
|
|
1391 | 1618 |
def CheckPrereq(self): |
1392 | 1619 |
"""Check prerequisites. |
1393 | 1620 |
|
... | ... | |
1442 | 1669 |
for iname in instancelist) |
1443 | 1670 |
i_non_redundant = [] # Non redundant instances |
1444 | 1671 |
i_non_a_balanced = [] # Non auto-balanced instances |
1445 |
n_offline = [] # List of offline nodes |
|
1446 |
n_drained = [] # List of nodes being drained |
|
1447 |
node_volume = {} |
|
1448 |
node_instance = {} |
|
1449 |
node_info = {} |
|
1450 |
instance_cfg = {} |
|
1672 |
n_offline = 0 # Count of offline nodes |
|
1673 |
n_drained = 0 # Count of nodes being drained |
|
1674 |
node_vol_should = {} |
|
1451 | 1675 |
|
1452 | 1676 |
# FIXME: verify OS list |
1453 | 1677 |
# do local checksums |
... | ... | |
1481 | 1705 |
node_verify_param[constants.NV_PVLIST] = [vg_name] |
1482 | 1706 |
node_verify_param[constants.NV_DRBDLIST] = None |
1483 | 1707 |
|
1708 |
# Build our expected cluster state |
|
1709 |
node_image = dict((node.name, self.NodeImage(offline=node.offline)) |
|
1710 |
for node in nodeinfo) |
|
1711 |
|
|
1712 |
for instance in instancelist: |
|
1713 |
inst_config = instanceinfo[instance] |
|
1714 |
|
|
1715 |
for nname in inst_config.all_nodes: |
|
1716 |
if nname not in node_image: |
|
1717 |
# ghost node |
|
1718 |
gnode = self.NodeImage() |
|
1719 |
gnode.ghost = True |
|
1720 |
node_image[nname] = gnode |
|
1721 |
|
|
1722 |
inst_config.MapLVsByNode(node_vol_should) |
|
1723 |
|
|
1724 |
pnode = inst_config.primary_node |
|
1725 |
node_image[pnode].pinst.append(instance) |
|
1726 |
|
|
1727 |
for snode in inst_config.secondary_nodes: |
|
1728 |
nimg = node_image[snode] |
|
1729 |
nimg.sinst.append(instance) |
|
1730 |
if pnode not in nimg.sbp: |
|
1731 |
nimg.sbp[pnode] = [] |
|
1732 |
nimg.sbp[pnode].append(instance) |
|
1733 |
|
|
1734 |
# At this point, we have the in-memory data structures complete, |
|
1735 |
# except for the runtime information, which we'll gather next |
|
1736 |
|
|
1484 | 1737 |
# Due to the way our RPC system works, exact response times cannot be |
1485 | 1738 |
# guaranteed (e.g. a broken node could run into a timeout). By keeping the |
1486 | 1739 |
# time before and after executing the request, we can at least have a time |
... | ... | |
1497 | 1750 |
feedback_fn("* Verifying node status") |
1498 | 1751 |
for node_i in nodeinfo: |
1499 | 1752 |
node = node_i.name |
1753 |
nimg = node_image[node] |
|
1500 | 1754 |
|
1501 | 1755 |
if node_i.offline: |
1502 | 1756 |
if verbose: |
1503 | 1757 |
feedback_fn("* Skipping offline node %s" % (node,)) |
1504 |
n_offline.append(node)
|
|
1758 |
n_offline += 1
|
|
1505 | 1759 |
continue |
1506 | 1760 |
|
1507 | 1761 |
if node == master_node: |
... | ... | |
1510 | 1764 |
ntype = "master candidate" |
1511 | 1765 |
elif node_i.drained: |
1512 | 1766 |
ntype = "drained" |
1513 |
n_drained.append(node)
|
|
1767 |
n_drained += 1
|
|
1514 | 1768 |
else: |
1515 | 1769 |
ntype = "regular" |
1516 | 1770 |
if verbose: |
... | ... | |
1519 | 1773 |
msg = all_nvinfo[node].fail_msg |
1520 | 1774 |
_ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg) |
1521 | 1775 |
if msg: |
1776 |
nimg.rpc_fail = True |
|
1522 | 1777 |
continue |
1523 | 1778 |
|
1524 | 1779 |
nresult = all_nvinfo[node].payload |
1525 |
node_drbd = {} |
|
1526 |
for minor, instance in all_drbd_map[node].items(): |
|
1527 |
test = instance not in instanceinfo |
|
1528 |
_ErrorIf(test, self.ECLUSTERCFG, None, |
|
1529 |
"ghost instance '%s' in temporary DRBD map", instance) |
|
1530 |
# ghost instance should not be running, but otherwise we |
|
1531 |
# don't give double warnings (both ghost instance and |
|
1532 |
# unallocated minor in use) |
|
1533 |
if test: |
|
1534 |
node_drbd[minor] = (instance, False) |
|
1535 |
else: |
|
1536 |
instance = instanceinfo[instance] |
|
1537 |
node_drbd[minor] = (instance.name, instance.admin_up) |
|
1538 |
|
|
1539 |
self._VerifyNode(node_i, file_names, local_checksums, |
|
1540 |
nresult, master_files, node_drbd, vg_name) |
|
1541 |
|
|
1542 |
lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data") |
|
1543 |
if vg_name is None: |
|
1544 |
node_volume[node] = {} |
|
1545 |
elif isinstance(lvdata, basestring): |
|
1546 |
_ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s", |
|
1547 |
utils.SafeEncode(lvdata)) |
|
1548 |
node_volume[node] = {} |
|
1549 |
elif not isinstance(lvdata, dict): |
|
1550 |
_ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)") |
|
1551 |
continue |
|
1552 |
else: |
|
1553 |
node_volume[node] = lvdata |
|
1554 |
|
|
1555 |
# node_instance |
|
1556 |
idata = nresult.get(constants.NV_INSTANCELIST, None) |
|
1557 |
test = not isinstance(idata, list) |
|
1558 |
_ErrorIf(test, self.ENODEHV, node, |
|
1559 |
"rpc call to node failed (instancelist): %s", |
|
1560 |
utils.SafeEncode(str(idata))) |
|
1561 |
if test: |
|
1562 |
continue |
|
1563 |
|
|
1564 |
node_instance[node] = idata |
|
1565 |
|
|
1566 |
# node_info |
|
1567 |
nodeinfo = nresult.get(constants.NV_HVINFO, None) |
|
1568 |
test = not isinstance(nodeinfo, dict) |
|
1569 |
_ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)") |
|
1570 |
if test: |
|
1571 |
continue |
|
1572 |
|
|
1573 |
# Node time |
|
1574 |
ntime = nresult.get(constants.NV_TIME, None) |
|
1575 |
try: |
|
1576 |
ntime_merged = utils.MergeTime(ntime) |
|
1577 |
except (ValueError, TypeError): |
|
1578 |
_ErrorIf(True, self.ENODETIME, node, "Node returned invalid time") |
|
1579 | 1780 |
|
1580 |
if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW): |
|
1581 |
ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged) |
|
1582 |
elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW): |
|
1583 |
ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime) |
|
1584 |
else: |
|
1585 |
ntime_diff = None |
|
1781 |
nimg.call_ok = self._VerifyNode(node_i, nresult) |
|
1782 |
self._VerifyNodeNetwork(node_i, nresult) |
|
1783 |
self._VerifyNodeLVM(node_i, nresult, vg_name) |
|
1784 |
self._VerifyNodeFiles(node_i, nresult, file_names, local_checksums, |
|
1785 |
master_files) |
|
1786 |
self._VerifyNodeDrbd(node_i, nresult, instanceinfo, all_drbd_map) |
|
1787 |
self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime) |
|
1586 | 1788 |
|
1587 |
_ErrorIf(ntime_diff is not None, self.ENODETIME, node, |
|
1588 |
"Node time diverges by at least %s from master node time", |
|
1589 |
ntime_diff) |
|
1590 |
|
|
1591 |
if ntime_diff is not None: |
|
1592 |
continue |
|
1593 |
|
|
1594 |
try: |
|
1595 |
node_info[node] = { |
|
1596 |
"mfree": int(nodeinfo['memory_free']), |
|
1597 |
"pinst": [], |
|
1598 |
"sinst": [], |
|
1599 |
# dictionary holding all instances this node is secondary for, |
|
1600 |
# grouped by their primary node. Each key is a cluster node, and each |
|
1601 |
# value is a list of instances which have the key as primary and the |
|
1602 |
# current node as secondary. this is handy to calculate N+1 memory |
|
1603 |
# availability if you can only failover from a primary to its |
|
1604 |
# secondary. |
|
1605 |
"sinst-by-pnode": {}, |
|
1606 |
} |
|
1607 |
# FIXME: devise a free space model for file based instances as well |
|
1608 |
if vg_name is not None: |
|
1609 |
test = (constants.NV_VGLIST not in nresult or |
|
1610 |
vg_name not in nresult[constants.NV_VGLIST]) |
|
1611 |
_ErrorIf(test, self.ENODELVM, node, |
|
1612 |
"node didn't return data for the volume group '%s'" |
|
1613 |
" - it is either missing or broken", vg_name) |
|
1614 |
if test: |
|
1615 |
continue |
|
1616 |
node_info[node]["dfree"] = int(nresult[constants.NV_VGLIST][vg_name]) |
|
1617 |
except (ValueError, KeyError): |
|
1618 |
_ErrorIf(True, self.ENODERPC, node, |
|
1619 |
"node returned invalid nodeinfo, check lvm/hypervisor") |
|
1620 |
continue |
|
1621 |
|
|
1622 |
node_vol_should = {} |
|
1789 |
self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name) |
|
1790 |
self._UpdateNodeInstances(node_i, nresult, nimg) |
|
1791 |
self._UpdateNodeInfo(node_i, nresult, nimg, vg_name) |
|
1623 | 1792 |
|
1624 | 1793 |
feedback_fn("* Verifying instance status") |
1625 | 1794 |
for instance in instancelist: |
1626 | 1795 |
if verbose: |
1627 | 1796 |
feedback_fn("* Verifying instance %s" % instance) |
1628 | 1797 |
inst_config = instanceinfo[instance] |
1629 |
self._VerifyInstance(instance, inst_config, node_volume, |
|
1630 |
node_instance, n_offline) |
|
1798 |
self._VerifyInstance(instance, inst_config, node_image) |
|
1631 | 1799 |
inst_nodes_offline = [] |
1632 | 1800 |
|
1633 |
inst_config.MapLVsByNode(node_vol_should) |
|
1634 |
|
|
1635 |
instance_cfg[instance] = inst_config |
|
1636 |
|
|
1637 | 1801 |
pnode = inst_config.primary_node |
1638 |
_ErrorIf(pnode not in node_info and pnode not in n_offline, |
|
1802 |
pnode_img = node_image[pnode] |
|
1803 |
_ErrorIf(pnode_img.rpc_fail and not pnode_img.offline, |
|
1639 | 1804 |
self.ENODERPC, pnode, "instance %s, connection to" |
1640 | 1805 |
" primary node failed", instance) |
1641 |
if pnode in node_info: |
|
1642 |
node_info[pnode]['pinst'].append(instance) |
|
1643 | 1806 |
|
1644 |
if pnode in n_offline:
|
|
1807 |
if pnode_img.offline:
|
|
1645 | 1808 |
inst_nodes_offline.append(pnode) |
1646 | 1809 |
|
1647 | 1810 |
# If the instance is non-redundant we cannot survive losing its primary |
... | ... | |
1649 | 1812 |
# templates with more than one secondary so that situation is not well |
1650 | 1813 |
# supported either. |
1651 | 1814 |
# FIXME: does not support file-backed instances |
1652 |
if len(inst_config.secondary_nodes) == 0:
|
|
1815 |
if not inst_config.secondary_nodes:
|
|
1653 | 1816 |
i_non_redundant.append(instance) |
1654 |
_ErrorIf(len(inst_config.secondary_nodes) > 1, |
|
1655 |
self.EINSTANCELAYOUT, instance, |
|
1656 |
"instance has multiple secondary nodes", code="WARNING") |
|
1817 |
_ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT, |
|
1818 |
instance, "instance has multiple secondary nodes: %s", |
|
1819 |
utils.CommaJoin(inst_config.secondary_nodes), |
|
1820 |
code=self.ETYPE_WARNING) |
|
1657 | 1821 |
|
1658 | 1822 |
if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]: |
1659 | 1823 |
i_non_a_balanced.append(instance) |
1660 | 1824 |
|
1661 | 1825 |
for snode in inst_config.secondary_nodes: |
1662 |
_ErrorIf(snode not in node_info and snode not in n_offline, |
|
1663 |
self.ENODERPC, snode, |
|
1664 |
"instance %s, connection to secondary node" |
|
1665 |
" failed", instance) |
|
1666 |
|
|
1667 |
if snode in node_info: |
|
1668 |
node_info[snode]['sinst'].append(instance) |
|
1669 |
if pnode not in node_info[snode]['sinst-by-pnode']: |
|
1670 |
node_info[snode]['sinst-by-pnode'][pnode] = [] |
|
1671 |
node_info[snode]['sinst-by-pnode'][pnode].append(instance) |
|
1672 |
|
|
1673 |
if snode in n_offline: |
|
1826 |
s_img = node_image[snode] |
|
1827 |
_ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode, |
|
1828 |
"instance %s, connection to secondary node failed", instance) |
|
1829 |
|
|
1830 |
if s_img.offline: |
|
1674 | 1831 |
inst_nodes_offline.append(snode) |
1675 | 1832 |
|
1676 | 1833 |
# warn that the instance lives on offline nodes |
1677 | 1834 |
_ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance, |
1678 | 1835 |
"instance lives on offline node(s) %s", |
1679 | 1836 |
utils.CommaJoin(inst_nodes_offline)) |
1837 |
# ... or ghost nodes |
|
1838 |
for node in inst_config.all_nodes: |
|
1839 |
_ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance, |
|
1840 |
"instance lives on ghost node %s", node) |
|
1680 | 1841 |
|
1681 | 1842 |
feedback_fn("* Verifying orphan volumes") |
1682 |
self._VerifyOrphanVolumes(node_vol_should, node_volume)
|
|
1843 |
self._VerifyOrphanVolumes(node_vol_should, node_image)
|
|
1683 | 1844 |
|
1684 |
feedback_fn("* Verifying remaining instances")
|
|
1685 |
self._VerifyOrphanInstances(instancelist, node_instance)
|
|
1845 |
feedback_fn("* Verifying oprhan instances")
|
|
1846 |
self._VerifyOrphanInstances(instancelist, node_image)
|
|
1686 | 1847 |
|
1687 | 1848 |
if constants.VERIFY_NPLUSONE_MEM not in self.skip_set: |
1688 | 1849 |
feedback_fn("* Verifying N+1 Memory redundancy") |
1689 |
self._VerifyNPlusOneMemory(node_info, instance_cfg)
|
|
1850 |
self._VerifyNPlusOneMemory(node_image, instanceinfo)
|
|
1690 | 1851 |
|
1691 | 1852 |
feedback_fn("* Other Notes") |
1692 | 1853 |
if i_non_redundant: |
... | ... | |
1698 | 1859 |
% len(i_non_a_balanced)) |
1699 | 1860 |
|
1700 | 1861 |
if n_offline: |
1701 |
feedback_fn(" - NOTICE: %d offline node(s) found." % len(n_offline))
|
|
1862 |
feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline)
|
|
1702 | 1863 |
|
1703 | 1864 |
if n_drained: |
1704 |
feedback_fn(" - NOTICE: %d drained node(s) found." % len(n_drained))
|
|
1865 |
feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained)
|
|
1705 | 1866 |
|
1706 | 1867 |
return not self.bad |
1707 | 1868 |
|
Also available in: Unified diff