Revision da4a52a3 lib/cmdlib/cluster.py
b/lib/cmdlib/cluster.py | ||
---|---|---|
443 | 443 |
|
444 | 444 |
def ExpandNames(self): |
445 | 445 |
if self.op.instances: |
446 |
self.wanted_names = GetWantedInstances(self, self.op.instances)
|
|
446 |
(_, self.wanted_names) = GetWantedInstances(self, self.op.instances)
|
|
447 | 447 |
# Not getting the node allocation lock as only a specific set of |
448 | 448 |
# instances (and their nodes) is going to be acquired |
449 | 449 |
self.needed_locks = { |
... | ... | |
481 | 481 |
self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE) |
482 | 482 |
|
483 | 483 |
self.wanted_instances = \ |
484 |
map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names)) |
|
484 |
map(compat.snd, self.cfg.GetMultiInstanceInfoByName(self.wanted_names))
|
|
485 | 485 |
|
486 | 486 |
def _EnsureChildSizes(self, disk): |
487 | 487 |
"""Ensure children of the disk have the needed disk size. |
... | ... | |
812 | 812 |
for nuuid in inst.all_nodes)]) |
813 | 813 |
new_ipolicy = objects.FillIPolicy(self.new_ipolicy, group.ipolicy) |
814 | 814 |
ipol = masterd.instance.CalculateGroupIPolicy(cluster, group) |
815 |
new = ComputeNewInstanceViolations(ipol, |
|
816 |
new_ipolicy, instances, self.cfg)
|
|
815 |
new = ComputeNewInstanceViolations(ipol, new_ipolicy, instances,
|
|
816 |
self.cfg) |
|
817 | 817 |
if new: |
818 | 818 |
violations.update(new) |
819 | 819 |
|
... | ... | |
1373 | 1373 |
|
1374 | 1374 |
for inst in self.all_inst_info.values(): |
1375 | 1375 |
if inst.primary_node in [node.uuid for node in dangling_nodes]: |
1376 |
dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
|
|
1376 |
dangling_instances.setdefault(inst.primary_node, []).append(inst) |
|
1377 | 1377 |
elif inst.primary_node not in self.all_node_info: |
1378 |
no_node_instances.append(inst.name)
|
|
1378 |
no_node_instances.append(inst) |
|
1379 | 1379 |
|
1380 | 1380 |
pretty_dangling = [ |
1381 | 1381 |
"%s (%s)" % |
1382 | 1382 |
(node.name, |
1383 |
utils.CommaJoin(dangling_instances.get(node.uuid, |
|
1384 |
["no instances"]))) |
|
1383 |
utils.CommaJoin( |
|
1384 |
self.cfg.GetInstanceNames( |
|
1385 |
dangling_instances.get(node.uuid, ["no instances"])))) |
|
1385 | 1386 |
for node in dangling_nodes] |
1386 | 1387 |
|
1387 | 1388 |
self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES, |
... | ... | |
1392 | 1393 |
self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST, |
1393 | 1394 |
None, |
1394 | 1395 |
"the following instances have a non-existing primary-node:" |
1395 |
" %s", utils.CommaJoin(no_node_instances)) |
|
1396 |
" %s", utils.CommaJoin( |
|
1397 |
self.cfg.GetInstanceNames(no_node_instances))) |
|
1396 | 1398 |
|
1397 | 1399 |
return not self.bad |
1398 | 1400 |
|
... | ... | |
1468 | 1470 |
self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name) |
1469 | 1471 |
|
1470 | 1472 |
# Get instances in node group; this is unsafe and needs verification later |
1471 |
inst_names = \
|
|
1473 |
inst_uuids = \
|
|
1472 | 1474 |
self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True) |
1473 | 1475 |
|
1474 | 1476 |
self.needed_locks = { |
1475 |
locking.LEVEL_INSTANCE: inst_names,
|
|
1477 |
locking.LEVEL_INSTANCE: self.cfg.GetInstanceNames(inst_uuids),
|
|
1476 | 1478 |
locking.LEVEL_NODEGROUP: [self.group_uuid], |
1477 | 1479 |
locking.LEVEL_NODE: [], |
1478 | 1480 |
|
... | ... | |
1489 | 1491 |
# Get members of node group; this is unsafe and needs verification later |
1490 | 1492 |
nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members) |
1491 | 1493 |
|
1492 |
all_inst_info = self.cfg.GetAllInstancesInfo() |
|
1493 |
|
|
1494 | 1494 |
# In Exec(), we warn about mirrored instances that have primary and |
1495 | 1495 |
# secondary living in separate node groups. To fully verify that |
1496 | 1496 |
# volumes for these instances are healthy, we will need to do an |
1497 | 1497 |
# extra call to their secondaries. We ensure here those nodes will |
1498 | 1498 |
# be locked. |
1499 |
for inst in self.owned_locks(locking.LEVEL_INSTANCE): |
|
1499 |
for inst_name in self.owned_locks(locking.LEVEL_INSTANCE):
|
|
1500 | 1500 |
# Important: access only the instances whose lock is owned |
1501 |
if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR: |
|
1502 |
nodes.update(all_inst_info[inst].secondary_nodes) |
|
1501 |
instance = self.cfg.GetInstanceInfoByName(inst_name) |
|
1502 |
if instance.disk_template in constants.DTS_INT_MIRROR: |
|
1503 |
nodes.update(instance.secondary_nodes) |
|
1503 | 1504 |
|
1504 | 1505 |
self.needed_locks[locking.LEVEL_NODE] = nodes |
1505 | 1506 |
|
... | ... | |
1508 | 1509 |
self.group_info = self.cfg.GetNodeGroup(self.group_uuid) |
1509 | 1510 |
|
1510 | 1511 |
group_node_uuids = set(self.group_info.members) |
1511 |
group_instances = \
|
|
1512 |
group_inst_uuids = \
|
|
1512 | 1513 |
self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True) |
1513 | 1514 |
|
1514 | 1515 |
unlocked_node_uuids = \ |
1515 | 1516 |
group_node_uuids.difference(self.owned_locks(locking.LEVEL_NODE)) |
1516 | 1517 |
|
1517 |
unlocked_instances = \ |
|
1518 |
group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE)) |
|
1518 |
unlocked_inst_uuids = \ |
|
1519 |
group_inst_uuids.difference( |
|
1520 |
[self.cfg.GetInstanceInfoByName(name).uuid |
|
1521 |
for name in self.owned_locks(locking.LEVEL_INSTANCE)]) |
|
1519 | 1522 |
|
1520 | 1523 |
if unlocked_node_uuids: |
1521 | 1524 |
raise errors.OpPrereqError( |
... | ... | |
1523 | 1526 |
utils.CommaJoin(self.cfg.GetNodeNames(unlocked_node_uuids)), |
1524 | 1527 |
errors.ECODE_STATE) |
1525 | 1528 |
|
1526 |
if unlocked_instances: |
|
1527 |
raise errors.OpPrereqError("Missing lock for instances: %s" % |
|
1528 |
utils.CommaJoin(unlocked_instances), |
|
1529 |
errors.ECODE_STATE) |
|
1529 |
if unlocked_inst_uuids: |
|
1530 |
raise errors.OpPrereqError( |
|
1531 |
"Missing lock for instances: %s" % |
|
1532 |
utils.CommaJoin(self.cfg.GetInstanceNames(unlocked_inst_uuids)), |
|
1533 |
errors.ECODE_STATE) |
|
1530 | 1534 |
|
1531 | 1535 |
self.all_node_info = self.cfg.GetAllNodesInfo() |
1532 | 1536 |
self.all_inst_info = self.cfg.GetAllInstancesInfo() |
... | ... | |
1535 | 1539 |
self.my_node_info = dict((node_uuid, self.all_node_info[node_uuid]) |
1536 | 1540 |
for node_uuid in group_node_uuids) |
1537 | 1541 |
|
1538 |
self.my_inst_names = utils.NiceSort(group_instances)
|
|
1539 |
self.my_inst_info = dict((name, self.all_inst_info[name])
|
|
1540 |
for name in self.my_inst_names)
|
|
1542 |
self.my_inst_uuids = group_inst_uuids
|
|
1543 |
self.my_inst_info = dict((inst_uuid, self.all_inst_info[inst_uuid])
|
|
1544 |
for inst_uuid in group_inst_uuids)
|
|
1541 | 1545 |
|
1542 | 1546 |
# We detect here the nodes that will need the extra RPC calls for verifying |
1543 | 1547 |
# split LV volumes; they should be locked. |
... | ... | |
1817 | 1821 |
msg = "cannot reach the master IP" |
1818 | 1822 |
self._ErrorIf(True, constants.CV_ENODENET, ninfo.name, msg) |
1819 | 1823 |
|
1820 |
def _VerifyInstance(self, instance, inst_config, node_image, |
|
1821 |
diskstatus): |
|
1824 |
def _VerifyInstance(self, instance, node_image, diskstatus): |
|
1822 | 1825 |
"""Verify an instance. |
1823 | 1826 |
|
1824 | 1827 |
This function checks to see if the required block devices are |
... | ... | |
1826 | 1829 |
state. |
1827 | 1830 |
|
1828 | 1831 |
""" |
1829 |
pnode = inst_config.primary_node
|
|
1830 |
pnode_img = node_image[pnode] |
|
1832 |
pnode_uuid = instance.primary_node
|
|
1833 |
pnode_img = node_image[pnode_uuid]
|
|
1831 | 1834 |
groupinfo = self.cfg.GetAllNodeGroupsInfo() |
1832 | 1835 |
|
1833 | 1836 |
node_vol_should = {} |
1834 |
inst_config.MapLVsByNode(node_vol_should)
|
|
1837 |
instance.MapLVsByNode(node_vol_should)
|
|
1835 | 1838 |
|
1836 | 1839 |
cluster = self.cfg.GetClusterInfo() |
1837 | 1840 |
ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, |
1838 | 1841 |
self.group_info) |
1839 |
err = ComputeIPolicyInstanceViolation(ipolicy, inst_config, self.cfg)
|
|
1840 |
self._ErrorIf(err, constants.CV_EINSTANCEPOLICY, instance, |
|
1842 |
err = ComputeIPolicyInstanceViolation(ipolicy, instance, self.cfg)
|
|
1843 |
self._ErrorIf(err, constants.CV_EINSTANCEPOLICY, instance.name,
|
|
1841 | 1844 |
utils.CommaJoin(err), code=self.ETYPE_WARNING) |
1842 | 1845 |
|
1843 |
for node in node_vol_should: |
|
1844 |
n_img = node_image[node] |
|
1846 |
for node_uuid in node_vol_should:
|
|
1847 |
n_img = node_image[node_uuid]
|
|
1845 | 1848 |
if n_img.offline or n_img.rpc_fail or n_img.lvm_fail: |
1846 | 1849 |
# ignore missing volumes on offline or broken nodes |
1847 | 1850 |
continue |
1848 |
for volume in node_vol_should[node]: |
|
1851 |
for volume in node_vol_should[node_uuid]:
|
|
1849 | 1852 |
test = volume not in n_img.volumes |
1850 |
self._ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance, |
|
1853 |
self._ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance.name,
|
|
1851 | 1854 |
"volume %s missing on node %s", volume, |
1852 |
self.cfg.GetNodeName(node)) |
|
1855 |
self.cfg.GetNodeName(node_uuid))
|
|
1853 | 1856 |
|
1854 |
if inst_config.admin_state == constants.ADMINST_UP:
|
|
1855 |
test = instance not in pnode_img.instances and not pnode_img.offline |
|
1856 |
self._ErrorIf(test, constants.CV_EINSTANCEDOWN, instance, |
|
1857 |
if instance.admin_state == constants.ADMINST_UP:
|
|
1858 |
test = instance.uuid not in pnode_img.instances and not pnode_img.offline
|
|
1859 |
self._ErrorIf(test, constants.CV_EINSTANCEDOWN, instance.name,
|
|
1857 | 1860 |
"instance not running on its primary node %s", |
1858 |
self.cfg.GetNodeName(pnode)) |
|
1859 |
self._ErrorIf(pnode_img.offline, constants.CV_EINSTANCEBADNODE, instance,
|
|
1860 |
"instance is marked as running and lives on" |
|
1861 |
" offline node %s", self.cfg.GetNodeName(pnode)) |
|
1861 |
self.cfg.GetNodeName(pnode_uuid))
|
|
1862 |
self._ErrorIf(pnode_img.offline, constants.CV_EINSTANCEBADNODE, |
|
1863 |
instance.name, "instance is marked as running and lives on"
|
|
1864 |
" offline node %s", self.cfg.GetNodeName(pnode_uuid))
|
|
1862 | 1865 |
|
1863 | 1866 |
diskdata = [(nname, success, status, idx) |
1864 | 1867 |
for (nname, disks) in diskstatus.items() |
... | ... | |
1869 | 1872 |
# node here |
1870 | 1873 |
snode = node_image[nname] |
1871 | 1874 |
bad_snode = snode.ghost or snode.offline |
1872 |
self._ErrorIf(inst_config.disks_active and
|
|
1875 |
self._ErrorIf(instance.disks_active and
|
|
1873 | 1876 |
not success and not bad_snode, |
1874 |
constants.CV_EINSTANCEFAULTYDISK, instance, |
|
1877 |
constants.CV_EINSTANCEFAULTYDISK, instance.name,
|
|
1875 | 1878 |
"couldn't retrieve status for disk/%s on %s: %s", |
1876 | 1879 |
idx, self.cfg.GetNodeName(nname), bdev_status) |
1877 | 1880 |
|
1878 |
if inst_config.disks_active and success and \
|
|
1881 |
if instance.disks_active and success and \
|
|
1879 | 1882 |
(bdev_status.is_degraded or |
1880 | 1883 |
bdev_status.ldisk_status != constants.LDS_OKAY): |
1881 | 1884 |
msg = "disk/%s on %s" % (idx, self.cfg.GetNodeName(nname)) |
... | ... | |
1885 | 1888 |
msg += "; state is '%s'" % \ |
1886 | 1889 |
constants.LDS_NAMES[bdev_status.ldisk_status] |
1887 | 1890 |
|
1888 |
self._Error(constants.CV_EINSTANCEFAULTYDISK, instance, msg) |
|
1891 |
self._Error(constants.CV_EINSTANCEFAULTYDISK, instance.name, msg)
|
|
1889 | 1892 |
|
1890 | 1893 |
self._ErrorIf(pnode_img.rpc_fail and not pnode_img.offline, |
1891 |
constants.CV_ENODERPC, pnode, "instance %s, connection to" |
|
1892 |
" primary node failed", instance) |
|
1893 |
|
|
1894 |
self._ErrorIf(len(inst_config.secondary_nodes) > 1, |
|
1895 |
constants.CV_EINSTANCELAYOUT, |
|
1896 |
instance, "instance has multiple secondary nodes: %s", |
|
1897 |
utils.CommaJoin(inst_config.secondary_nodes), |
|
1894 |
constants.CV_ENODERPC, self.cfg.GetNodeName(pnode_uuid), |
|
1895 |
"instance %s, connection to primary node failed", |
|
1896 |
instance.name) |
|
1897 |
|
|
1898 |
self._ErrorIf(len(instance.secondary_nodes) > 1, |
|
1899 |
constants.CV_EINSTANCELAYOUT, instance.name, |
|
1900 |
"instance has multiple secondary nodes: %s", |
|
1901 |
utils.CommaJoin(instance.secondary_nodes), |
|
1898 | 1902 |
code=self.ETYPE_WARNING) |
1899 | 1903 |
|
1900 |
es_flags = rpc.GetExclusiveStorageForNodes(self.cfg, |
|
1901 |
inst_config.all_nodes) |
|
1904 |
es_flags = rpc.GetExclusiveStorageForNodes(self.cfg, instance.all_nodes) |
|
1902 | 1905 |
if any(es_flags.values()): |
1903 |
if inst_config.disk_template not in constants.DTS_EXCL_STORAGE:
|
|
1906 |
if instance.disk_template not in constants.DTS_EXCL_STORAGE:
|
|
1904 | 1907 |
# Disk template not compatible with exclusive_storage: no instance |
1905 | 1908 |
# node should have the flag set |
1906 | 1909 |
es_nodes = [n |
1907 | 1910 |
for (n, es) in es_flags.items() |
1908 | 1911 |
if es] |
1909 |
self._Error(constants.CV_EINSTANCEUNSUITABLENODE, instance, |
|
1912 |
self._Error(constants.CV_EINSTANCEUNSUITABLENODE, instance.name,
|
|
1910 | 1913 |
"instance has template %s, which is not supported on nodes" |
1911 | 1914 |
" that have exclusive storage set: %s", |
1912 |
inst_config.disk_template,
|
|
1915 |
instance.disk_template,
|
|
1913 | 1916 |
utils.CommaJoin(self.cfg.GetNodeNames(es_nodes))) |
1914 |
for (idx, disk) in enumerate(inst_config.disks):
|
|
1917 |
for (idx, disk) in enumerate(instance.disks):
|
|
1915 | 1918 |
self._ErrorIf(disk.spindles is None, |
1916 |
constants.CV_EINSTANCEMISSINGCFGPARAMETER, instance, |
|
1919 |
constants.CV_EINSTANCEMISSINGCFGPARAMETER, instance.name,
|
|
1917 | 1920 |
"number of spindles not configured for disk %s while" |
1918 | 1921 |
" exclusive storage is enabled, try running" |
1919 | 1922 |
" gnt-cluster repair-disk-sizes", idx) |
1920 | 1923 |
|
1921 |
if inst_config.disk_template in constants.DTS_INT_MIRROR:
|
|
1922 |
instance_nodes = utils.NiceSort(inst_config.all_nodes)
|
|
1924 |
if instance.disk_template in constants.DTS_INT_MIRROR:
|
|
1925 |
instance_nodes = utils.NiceSort(instance.all_nodes)
|
|
1923 | 1926 |
instance_groups = {} |
1924 | 1927 |
|
1925 |
for node in instance_nodes: |
|
1926 |
instance_groups.setdefault(self.all_node_info[node].group, |
|
1927 |
[]).append(node) |
|
1928 |
for node_uuid in instance_nodes:
|
|
1929 |
instance_groups.setdefault(self.all_node_info[node_uuid].group,
|
|
1930 |
[]).append(node_uuid)
|
|
1928 | 1931 |
|
1929 | 1932 |
pretty_list = [ |
1930 | 1933 |
"%s (group %s)" % (utils.CommaJoin(self.cfg.GetNodeNames(nodes)), |
1931 | 1934 |
groupinfo[group].name) |
1932 | 1935 |
# Sort so that we always list the primary node first. |
1933 | 1936 |
for group, nodes in sorted(instance_groups.items(), |
1934 |
key=lambda (_, nodes): pnode in nodes, |
|
1937 |
key=lambda (_, nodes): pnode_uuid in nodes,
|
|
1935 | 1938 |
reverse=True)] |
1936 | 1939 |
|
1937 | 1940 |
self._ErrorIf(len(instance_groups) > 1, |
1938 | 1941 |
constants.CV_EINSTANCESPLITGROUPS, |
1939 |
instance, "instance has primary and secondary nodes in" |
|
1942 |
instance.name, "instance has primary and secondary nodes in"
|
|
1940 | 1943 |
" different groups: %s", utils.CommaJoin(pretty_list), |
1941 | 1944 |
code=self.ETYPE_WARNING) |
1942 | 1945 |
|
1943 | 1946 |
inst_nodes_offline = [] |
1944 |
for snode in inst_config.secondary_nodes:
|
|
1947 |
for snode in instance.secondary_nodes:
|
|
1945 | 1948 |
s_img = node_image[snode] |
1946 | 1949 |
self._ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC, |
1947 |
snode, "instance %s, connection to secondary node failed", |
|
1948 |
instance) |
|
1950 |
self.cfg.GetNodeName(snode), |
|
1951 |
"instance %s, connection to secondary node failed", |
|
1952 |
instance.name) |
|
1949 | 1953 |
|
1950 | 1954 |
if s_img.offline: |
1951 | 1955 |
inst_nodes_offline.append(snode) |
1952 | 1956 |
|
1953 | 1957 |
# warn that the instance lives on offline nodes |
1954 |
self._ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
|
|
1955 |
"instance has offline secondary node(s) %s", |
|
1958 |
self._ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, |
|
1959 |
instance.name, "instance has offline secondary node(s) %s",
|
|
1956 | 1960 |
utils.CommaJoin(self.cfg.GetNodeNames(inst_nodes_offline))) |
1957 | 1961 |
# ... or ghost/non-vm_capable nodes |
1958 |
for node in inst_config.all_nodes:
|
|
1959 |
self._ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE, |
|
1960 |
instance, "instance lives on ghost node %s", |
|
1961 |
self.cfg.GetNodeName(node)) |
|
1962 |
self._ErrorIf(not node_image[node].vm_capable, |
|
1963 |
constants.CV_EINSTANCEBADNODE, instance, |
|
1962 |
for node_uuid in instance.all_nodes:
|
|
1963 |
self._ErrorIf(node_image[node_uuid].ghost, constants.CV_EINSTANCEBADNODE,
|
|
1964 |
instance.name, "instance lives on ghost node %s",
|
|
1965 |
self.cfg.GetNodeName(node_uuid))
|
|
1966 |
self._ErrorIf(not node_image[node_uuid].vm_capable,
|
|
1967 |
constants.CV_EINSTANCEBADNODE, instance.name,
|
|
1964 | 1968 |
"instance lives on non-vm_capable node %s", |
1965 |
self.cfg.GetNodeName(node)) |
|
1969 |
self.cfg.GetNodeName(node_uuid))
|
|
1966 | 1970 |
|
1967 | 1971 |
def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved): |
1968 | 1972 |
"""Verify if there are any unknown volumes in the cluster. |
... | ... | |
1987 | 1991 |
self.cfg.GetNodeName(node_uuid), |
1988 | 1992 |
"volume %s is unknown", volume) |
1989 | 1993 |
|
1990 |
def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
|
|
1994 |
def _VerifyNPlusOneMemory(self, node_image, all_insts):
|
|
1991 | 1995 |
"""Verify N+1 Memory Resilience. |
1992 | 1996 |
|
1993 | 1997 |
Check that if one single node dies we can still start all the |
... | ... | |
2012 | 2016 |
# nodes, and that's enough warning |
2013 | 2017 |
continue |
2014 | 2018 |
#TODO(dynmem): also consider ballooning out other instances |
2015 |
for prinode, instances in n_img.sbp.items():
|
|
2019 |
for prinode, inst_uuids in n_img.sbp.items():
|
|
2016 | 2020 |
needed_mem = 0 |
2017 |
for instance in instances:
|
|
2018 |
bep = cluster_info.FillBE(instance_cfg[instance])
|
|
2021 |
for inst_uuid in inst_uuids:
|
|
2022 |
bep = cluster_info.FillBE(all_insts[inst_uuid])
|
|
2019 | 2023 |
if bep[constants.BE_AUTO_BALANCE]: |
2020 | 2024 |
needed_mem += bep[constants.BE_MINMEM] |
2021 | 2025 |
test = n_img.mfree < needed_mem |
... | ... | |
2168 | 2172 |
|
2169 | 2173 |
# compute the DRBD minors |
2170 | 2174 |
node_drbd = {} |
2171 |
for minor, instance in drbd_map[ninfo.uuid].items():
|
|
2172 |
test = instance not in instanceinfo
|
|
2175 |
for minor, inst_uuid in drbd_map[ninfo.uuid].items():
|
|
2176 |
test = inst_uuid not in instanceinfo
|
|
2173 | 2177 |
self._ErrorIf(test, constants.CV_ECLUSTERCFG, None, |
2174 |
"ghost instance '%s' in temporary DRBD map", instance)
|
|
2178 |
"ghost instance '%s' in temporary DRBD map", inst_uuid)
|
|
2175 | 2179 |
# ghost instance should not be running, but otherwise we |
2176 | 2180 |
# don't give double warnings (both ghost instance and |
2177 | 2181 |
# unallocated minor in use) |
2178 | 2182 |
if test: |
2179 |
node_drbd[minor] = (instance, False)
|
|
2183 |
node_drbd[minor] = (inst_uuid, False)
|
|
2180 | 2184 |
else: |
2181 |
instance = instanceinfo[instance]
|
|
2182 |
node_drbd[minor] = (instance.name, instance.disks_active)
|
|
2185 |
instance = instanceinfo[inst_uuid]
|
|
2186 |
node_drbd[minor] = (inst_uuid, instance.disks_active)
|
|
2183 | 2187 |
|
2184 | 2188 |
# and now check them |
2185 | 2189 |
used_minors = nresult.get(constants.NV_DRBDLIST, []) |
... | ... | |
2190 | 2194 |
# we cannot check drbd status |
2191 | 2195 |
return |
2192 | 2196 |
|
2193 |
for minor, (iname, must_exist) in node_drbd.items():
|
|
2197 |
for minor, (inst_uuid, must_exist) in node_drbd.items():
|
|
2194 | 2198 |
test = minor not in used_minors and must_exist |
2195 | 2199 |
self._ErrorIf(test, constants.CV_ENODEDRBD, ninfo.name, |
2196 |
"drbd minor %d of instance %s is not active", minor, iname) |
|
2200 |
"drbd minor %d of instance %s is not active", minor, |
|
2201 |
self.cfg.GetInstanceName(inst_uuid)) |
|
2197 | 2202 |
for minor in used_minors: |
2198 | 2203 |
test = minor not in node_drbd |
2199 | 2204 |
self._ErrorIf(test, constants.CV_ENODEDRBD, ninfo.name, |
... | ... | |
2381 | 2386 |
if test: |
2382 | 2387 |
nimg.hyp_fail = True |
2383 | 2388 |
else: |
2384 |
nimg.instances = idata |
|
2389 |
nimg.instances = [inst.uuid for (_, inst) in |
|
2390 |
self.cfg.GetMultiInstanceInfoByName(idata)] |
|
2385 | 2391 |
|
2386 | 2392 |
def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name): |
2387 | 2393 |
"""Verifies and computes a node information map |
... | ... | |
2424 | 2430 |
|
2425 | 2431 |
@type node_uuids: list of strings |
2426 | 2432 |
@param node_uuids: Node UUIDs |
2427 |
@type node_image: dict of (name, L{objects.Node})
|
|
2433 |
@type node_image: dict of (UUID, L{objects.Node})
|
|
2428 | 2434 |
@param node_image: Node objects |
2429 |
@type instanceinfo: dict of (name, L{objects.Instance})
|
|
2435 |
@type instanceinfo: dict of (UUID, L{objects.Instance})
|
|
2430 | 2436 |
@param instanceinfo: Instance objects |
2431 | 2437 |
@rtype: {instance: {node: [(succes, payload)]}} |
2432 | 2438 |
@return: a dictionary of per-instance dictionaries with nodes as |
... | ... | |
2440 | 2446 |
diskless = constants.DT_DISKLESS |
2441 | 2447 |
|
2442 | 2448 |
for nuuid in node_uuids: |
2443 |
node_instances = list(itertools.chain(node_image[nuuid].pinst,
|
|
2444 |
node_image[nuuid].sinst)) |
|
2445 |
diskless_instances.update(inst for inst in node_instances
|
|
2446 |
if instanceinfo[inst].disk_template == diskless)
|
|
2447 |
disks = [(inst, disk) |
|
2448 |
for inst in node_instances
|
|
2449 |
for disk in instanceinfo[inst].disks] |
|
2449 |
node_inst_uuids = list(itertools.chain(node_image[nuuid].pinst,
|
|
2450 |
node_image[nuuid].sinst))
|
|
2451 |
diskless_instances.update(uuid for uuid in node_inst_uuids
|
|
2452 |
if instanceinfo[uuid].disk_template == diskless)
|
|
2453 |
disks = [(inst_uuid, disk)
|
|
2454 |
for inst_uuid in node_inst_uuids
|
|
2455 |
for disk in instanceinfo[inst_uuid].disks]
|
|
2450 | 2456 |
|
2451 | 2457 |
if not disks: |
2452 | 2458 |
# No need to collect data |
... | ... | |
2456 | 2462 |
|
2457 | 2463 |
# _AnnotateDiskParams makes already copies of the disks |
2458 | 2464 |
devonly = [] |
2459 |
for (inst, dev) in disks: |
|
2460 |
(anno_disk,) = AnnotateDiskParams(instanceinfo[inst], [dev], self.cfg) |
|
2465 |
for (inst_uuid, dev) in disks: |
|
2466 |
(anno_disk,) = AnnotateDiskParams(instanceinfo[inst_uuid], [dev], |
|
2467 |
self.cfg) |
|
2461 | 2468 |
self.cfg.SetDiskID(anno_disk, nuuid) |
2462 | 2469 |
devonly.append(anno_disk) |
2463 | 2470 |
|
... | ... | |
2497 | 2504 |
node.name, idx, i) |
2498 | 2505 |
data.append((False, "Invalid result from the remote node")) |
2499 | 2506 |
|
2500 |
for ((inst, _), status) in zip(disks, data): |
|
2501 |
instdisk.setdefault(inst, {}).setdefault(node.uuid, []).append(status) |
|
2507 |
for ((inst_uuid, _), status) in zip(disks, data): |
|
2508 |
instdisk.setdefault(inst_uuid, {}).setdefault(node.uuid, []) \ |
|
2509 |
.append(status) |
|
2502 | 2510 |
|
2503 | 2511 |
# Add empty entries for diskless instances. |
2504 |
for inst in diskless_instances: |
|
2505 |
assert inst not in instdisk |
|
2506 |
instdisk[inst] = {} |
|
2512 |
for inst_uuid in diskless_instances:
|
|
2513 |
assert inst_uuid not in instdisk
|
|
2514 |
instdisk[inst_uuid] = {}
|
|
2507 | 2515 |
|
2508 | 2516 |
assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and |
2509 | 2517 |
len(nuuids) <= len(instanceinfo[inst].all_nodes) and |
... | ... | |
2667 | 2675 |
default_nicpp = cluster.nicparams[constants.PP_DEFAULT] |
2668 | 2676 |
if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED: |
2669 | 2677 |
bridges.add(default_nicpp[constants.NIC_LINK]) |
2670 |
for instance in self.my_inst_info.values():
|
|
2671 |
for nic in instance.nics:
|
|
2678 |
for inst_uuid in self.my_inst_info.values():
|
|
2679 |
for nic in inst_uuid.nics:
|
|
2672 | 2680 |
full_nic = cluster.SimpleFillNIC(nic.nicparams) |
2673 | 2681 |
if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED: |
2674 | 2682 |
bridges.add(full_nic[constants.NIC_LINK]) |
... | ... | |
2692 | 2700 |
if oob_paths: |
2693 | 2701 |
node_verify_param[constants.NV_OOB_PATHS] = oob_paths |
2694 | 2702 |
|
2695 |
for instance in self.my_inst_names:
|
|
2696 |
inst_config = self.my_inst_info[instance]
|
|
2697 |
if inst_config.admin_state == constants.ADMINST_OFFLINE:
|
|
2703 |
for inst_uuid in self.my_inst_uuids:
|
|
2704 |
instance = self.my_inst_info[inst_uuid]
|
|
2705 |
if instance.admin_state == constants.ADMINST_OFFLINE:
|
|
2698 | 2706 |
i_offline += 1 |
2699 | 2707 |
|
2700 |
for nuuid in inst_config.all_nodes:
|
|
2708 |
for nuuid in instance.all_nodes:
|
|
2701 | 2709 |
if nuuid not in node_image: |
2702 | 2710 |
gnode = self.NodeImage(uuid=nuuid) |
2703 | 2711 |
gnode.ghost = (nuuid not in self.all_node_info) |
2704 | 2712 |
node_image[nuuid] = gnode |
2705 | 2713 |
|
2706 |
inst_config.MapLVsByNode(node_vol_should)
|
|
2714 |
instance.MapLVsByNode(node_vol_should)
|
|
2707 | 2715 |
|
2708 |
pnode = inst_config.primary_node
|
|
2709 |
node_image[pnode].pinst.append(instance) |
|
2716 |
pnode = instance.primary_node
|
|
2717 |
node_image[pnode].pinst.append(instance.uuid)
|
|
2710 | 2718 |
|
2711 |
for snode in inst_config.secondary_nodes:
|
|
2719 |
for snode in instance.secondary_nodes:
|
|
2712 | 2720 |
nimg = node_image[snode] |
2713 |
nimg.sinst.append(instance) |
|
2721 |
nimg.sinst.append(instance.uuid)
|
|
2714 | 2722 |
if pnode not in nimg.sbp: |
2715 | 2723 |
nimg.sbp[pnode] = [] |
2716 |
nimg.sbp[pnode].append(instance) |
|
2724 |
nimg.sbp[pnode].append(instance.uuid)
|
|
2717 | 2725 |
|
2718 | 2726 |
es_flags = rpc.GetExclusiveStorageForNodes(self.cfg, |
2719 | 2727 |
self.my_node_info.keys()) |
... | ... | |
2842 | 2850 |
self._VerifyNodeOS(node_i, nimg, refos_img) |
2843 | 2851 |
self._VerifyNodeBridges(node_i, nresult, bridges) |
2844 | 2852 |
|
2845 |
# Check whether all running instancies are primary for the node. (This
|
|
2853 |
# Check whether all running instances are primary for the node. (This |
|
2846 | 2854 |
# can no longer be done from _VerifyInstance below, since some of the |
2847 | 2855 |
# wrong instances could be from other node groups.) |
2848 |
non_primary_inst = set(nimg.instances).difference(nimg.pinst) |
|
2856 |
non_primary_inst_uuids = set(nimg.instances).difference(nimg.pinst)
|
|
2849 | 2857 |
|
2850 |
for inst in non_primary_inst: |
|
2851 |
test = inst in self.all_inst_info |
|
2852 |
self._ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst, |
|
2858 |
for inst_uuid in non_primary_inst_uuids: |
|
2859 |
test = inst_uuid in self.all_inst_info |
|
2860 |
self._ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, |
|
2861 |
self.cfg.GetInstanceName(inst_uuid), |
|
2853 | 2862 |
"instance should not run on node %s", node_i.name) |
2854 | 2863 |
self._ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name, |
2855 |
"node is running unknown instance %s", inst) |
|
2864 |
"node is running unknown instance %s", inst_uuid)
|
|
2856 | 2865 |
|
2857 | 2866 |
self._VerifyGroupDRBDVersion(all_nvinfo) |
2858 | 2867 |
self._VerifyGroupLVM(node_image, vg_name) |
... | ... | |
2862 | 2871 |
node_image[node_uuid], vg_name) |
2863 | 2872 |
|
2864 | 2873 |
feedback_fn("* Verifying instance status") |
2865 |
for instance in self.my_inst_names: |
|
2874 |
for inst_uuid in self.my_inst_uuids: |
|
2875 |
instance = self.my_inst_info[inst_uuid] |
|
2866 | 2876 |
if verbose: |
2867 |
feedback_fn("* Verifying instance %s" % instance) |
|
2868 |
inst_config = self.my_inst_info[instance] |
|
2869 |
self._VerifyInstance(instance, inst_config, node_image, |
|
2870 |
instdisk[instance]) |
|
2877 |
feedback_fn("* Verifying instance %s" % instance.name) |
|
2878 |
self._VerifyInstance(instance, node_image, instdisk[inst_uuid]) |
|
2871 | 2879 |
|
2872 | 2880 |
# If the instance is non-redundant we cannot survive losing its primary |
2873 | 2881 |
# node, so we are not N+1 compliant. |
2874 |
if inst_config.disk_template not in constants.DTS_MIRRORED:
|
|
2882 |
if instance.disk_template not in constants.DTS_MIRRORED:
|
|
2875 | 2883 |
i_non_redundant.append(instance) |
2876 | 2884 |
|
2877 |
if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
|
|
2885 |
if not cluster.FillBE(instance)[constants.BE_AUTO_BALANCE]:
|
|
2878 | 2886 |
i_non_a_balanced.append(instance) |
2879 | 2887 |
|
2880 | 2888 |
feedback_fn("* Verifying orphan volumes") |
... | ... | |
2883 | 2891 |
# We will get spurious "unknown volume" warnings if any node of this group |
2884 | 2892 |
# is secondary for an instance whose primary is in another group. To avoid |
2885 | 2893 |
# them, we find these instances and add their volumes to node_vol_should. |
2886 |
for inst in self.all_inst_info.values(): |
|
2887 |
for secondary in inst.secondary_nodes: |
|
2894 |
for instance in self.all_inst_info.values():
|
|
2895 |
for secondary in instance.secondary_nodes:
|
|
2888 | 2896 |
if (secondary in self.my_node_info |
2889 |
and inst.name not in self.my_inst_info): |
|
2890 |
inst.MapLVsByNode(node_vol_should) |
|
2897 |
and instance.name not in self.my_inst_info):
|
|
2898 |
instance.MapLVsByNode(node_vol_should)
|
|
2891 | 2899 |
break |
2892 | 2900 |
|
2893 | 2901 |
self._VerifyOrphanVolumes(node_vol_should, node_image, reserved) |
Also available in: Unified diff