Revision 39eb39dc lib/cmdlib.py
b/lib/cmdlib.py | ||
---|---|---|
2610 | 2610 |
msg = "cannot reach the master IP" |
2611 | 2611 |
_ErrorIf(True, constants.CV_ENODENET, node, msg) |
2612 | 2612 |
|
2613 |
def _VerifyInstance(self, instance, instanceconfig, node_image,
|
|
2613 |
def _VerifyInstance(self, instance, inst_config, node_image,
|
|
2614 | 2614 |
diskstatus): |
2615 | 2615 |
"""Verify an instance. |
2616 | 2616 |
|
2617 | 2617 |
This function checks to see if the required block devices are |
2618 |
available on the instance's node. |
|
2618 |
available on the instance's node, and that the nodes are in the correct |
|
2619 |
state. |
|
2619 | 2620 |
|
2620 | 2621 |
""" |
2621 | 2622 |
_ErrorIf = self._ErrorIf # pylint: disable=C0103 |
2622 |
node_current = instanceconfig.primary_node |
|
2623 |
pnode = inst_config.primary_node |
|
2624 |
pnode_img = node_image[pnode] |
|
2625 |
groupinfo = self.cfg.GetAllNodeGroupsInfo() |
|
2623 | 2626 |
|
2624 | 2627 |
node_vol_should = {} |
2625 |
instanceconfig.MapLVsByNode(node_vol_should)
|
|
2628 |
inst_config.MapLVsByNode(node_vol_should)
|
|
2626 | 2629 |
|
2627 | 2630 |
cluster = self.cfg.GetClusterInfo() |
2628 | 2631 |
ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, |
2629 | 2632 |
self.group_info) |
2630 |
err = _ComputeIPolicyInstanceViolation(ipolicy, instanceconfig)
|
|
2633 |
err = _ComputeIPolicyInstanceViolation(ipolicy, inst_config)
|
|
2631 | 2634 |
_ErrorIf(err, constants.CV_EINSTANCEPOLICY, instance, utils.CommaJoin(err), |
2632 | 2635 |
code=self.ETYPE_WARNING) |
2633 | 2636 |
|
... | ... | |
2641 | 2644 |
_ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance, |
2642 | 2645 |
"volume %s missing on node %s", volume, node) |
2643 | 2646 |
|
2644 |
if instanceconfig.admin_state == constants.ADMINST_UP: |
|
2645 |
pri_img = node_image[node_current] |
|
2646 |
test = instance not in pri_img.instances and not pri_img.offline |
|
2647 |
if inst_config.admin_state == constants.ADMINST_UP: |
|
2648 |
test = instance not in pnode_img.instances and not pnode_img.offline |
|
2647 | 2649 |
_ErrorIf(test, constants.CV_EINSTANCEDOWN, instance, |
2648 | 2650 |
"instance not running on its primary node %s", |
2649 |
node_current) |
|
2651 |
pnode) |
|
2652 |
_ErrorIf(pnode_img.offline, constants.CV_EINSTANCEBADNODE, instance, |
|
2653 |
"instance is marked as running and lives on offline node %s", |
|
2654 |
pnode) |
|
2650 | 2655 |
|
2651 | 2656 |
diskdata = [(nname, success, status, idx) |
2652 | 2657 |
for (nname, disks) in diskstatus.items() |
... | ... | |
2657 | 2662 |
# node here |
2658 | 2663 |
snode = node_image[nname] |
2659 | 2664 |
bad_snode = snode.ghost or snode.offline |
2660 |
_ErrorIf(instanceconfig.admin_state == constants.ADMINST_UP and
|
|
2665 |
_ErrorIf(inst_config.admin_state == constants.ADMINST_UP and
|
|
2661 | 2666 |
not success and not bad_snode, |
2662 | 2667 |
constants.CV_EINSTANCEFAULTYDISK, instance, |
2663 | 2668 |
"couldn't retrieve status for disk/%s on %s: %s", |
2664 | 2669 |
idx, nname, bdev_status) |
2665 |
_ErrorIf((instanceconfig.admin_state == constants.ADMINST_UP and
|
|
2670 |
_ErrorIf((inst_config.admin_state == constants.ADMINST_UP and
|
|
2666 | 2671 |
success and bdev_status.ldisk_status == constants.LDS_FAULTY), |
2667 | 2672 |
constants.CV_EINSTANCEFAULTYDISK, instance, |
2668 | 2673 |
"disk/%s on %s is faulty", idx, nname) |
2669 | 2674 |
|
2675 |
_ErrorIf(pnode_img.rpc_fail and not pnode_img.offline, |
|
2676 |
constants.CV_ENODERPC, pnode, "instance %s, connection to" |
|
2677 |
" primary node failed", instance) |
|
2678 |
|
|
2679 |
_ErrorIf(len(inst_config.secondary_nodes) > 1, |
|
2680 |
constants.CV_EINSTANCELAYOUT, |
|
2681 |
instance, "instance has multiple secondary nodes: %s", |
|
2682 |
utils.CommaJoin(inst_config.secondary_nodes), |
|
2683 |
code=self.ETYPE_WARNING) |
|
2684 |
|
|
2685 |
if inst_config.disk_template in constants.DTS_INT_MIRROR: |
|
2686 |
instance_nodes = utils.NiceSort(inst_config.all_nodes) |
|
2687 |
instance_groups = {} |
|
2688 |
|
|
2689 |
for node in instance_nodes: |
|
2690 |
instance_groups.setdefault(self.all_node_info[node].group, |
|
2691 |
[]).append(node) |
|
2692 |
|
|
2693 |
pretty_list = [ |
|
2694 |
"%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name) |
|
2695 |
# Sort so that we always list the primary node first. |
|
2696 |
for group, nodes in sorted(instance_groups.items(), |
|
2697 |
key=lambda (_, nodes): pnode in nodes, |
|
2698 |
reverse=True)] |
|
2699 |
|
|
2700 |
self._ErrorIf(len(instance_groups) > 1, |
|
2701 |
constants.CV_EINSTANCESPLITGROUPS, |
|
2702 |
instance, "instance has primary and secondary nodes in" |
|
2703 |
" different groups: %s", utils.CommaJoin(pretty_list), |
|
2704 |
code=self.ETYPE_WARNING) |
|
2705 |
|
|
2706 |
inst_nodes_offline = [] |
|
2707 |
for snode in inst_config.secondary_nodes: |
|
2708 |
s_img = node_image[snode] |
|
2709 |
_ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC, |
|
2710 |
snode, "instance %s, connection to secondary node failed", |
|
2711 |
instance) |
|
2712 |
|
|
2713 |
if s_img.offline: |
|
2714 |
inst_nodes_offline.append(snode) |
|
2715 |
|
|
2716 |
# warn that the instance lives on offline nodes |
|
2717 |
_ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance, |
|
2718 |
"instance has offline secondary node(s) %s", |
|
2719 |
utils.CommaJoin(inst_nodes_offline)) |
|
2720 |
# ... or ghost/non-vm_capable nodes |
|
2721 |
for node in inst_config.all_nodes: |
|
2722 |
_ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE, |
|
2723 |
instance, "instance lives on ghost node %s", node) |
|
2724 |
_ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE, |
|
2725 |
instance, "instance lives on non-vm_capable node %s", node) |
|
2726 |
|
|
2670 | 2727 |
def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved): |
2671 | 2728 |
"""Verify if there are any unknown volumes in the cluster. |
2672 | 2729 |
|
... | ... | |
3305 | 3362 |
vg_name = self.cfg.GetVGName() |
3306 | 3363 |
drbd_helper = self.cfg.GetDRBDHelper() |
3307 | 3364 |
cluster = self.cfg.GetClusterInfo() |
3308 |
groupinfo = self.cfg.GetAllNodeGroupsInfo() |
|
3309 | 3365 |
hypervisors = cluster.enabled_hypervisors |
3310 | 3366 |
node_data_list = [self.my_node_info[name] for name in self.my_node_names] |
3311 | 3367 |
|
... | ... | |
3584 | 3640 |
inst_config = self.my_inst_info[instance] |
3585 | 3641 |
self._VerifyInstance(instance, inst_config, node_image, |
3586 | 3642 |
instdisk[instance]) |
3587 |
inst_nodes_offline = [] |
|
3588 |
|
|
3589 |
pnode = inst_config.primary_node |
|
3590 |
pnode_img = node_image[pnode] |
|
3591 |
_ErrorIf(pnode_img.rpc_fail and not pnode_img.offline, |
|
3592 |
constants.CV_ENODERPC, pnode, "instance %s, connection to" |
|
3593 |
" primary node failed", instance) |
|
3594 |
|
|
3595 |
_ErrorIf(inst_config.admin_state == constants.ADMINST_UP and |
|
3596 |
pnode_img.offline, |
|
3597 |
constants.CV_EINSTANCEBADNODE, instance, |
|
3598 |
"instance is marked as running and lives on offline node %s", |
|
3599 |
inst_config.primary_node) |
|
3600 | 3643 |
|
3601 | 3644 |
# If the instance is non-redundant we cannot survive losing its primary |
3602 | 3645 |
# node, so we are not N+1 compliant. |
3603 | 3646 |
if inst_config.disk_template not in constants.DTS_MIRRORED: |
3604 | 3647 |
i_non_redundant.append(instance) |
3605 | 3648 |
|
3606 |
_ErrorIf(len(inst_config.secondary_nodes) > 1, |
|
3607 |
constants.CV_EINSTANCELAYOUT, |
|
3608 |
instance, "instance has multiple secondary nodes: %s", |
|
3609 |
utils.CommaJoin(inst_config.secondary_nodes), |
|
3610 |
code=self.ETYPE_WARNING) |
|
3611 |
|
|
3612 |
if inst_config.disk_template in constants.DTS_INT_MIRROR: |
|
3613 |
pnode = inst_config.primary_node |
|
3614 |
instance_nodes = utils.NiceSort(inst_config.all_nodes) |
|
3615 |
instance_groups = {} |
|
3616 |
|
|
3617 |
for node in instance_nodes: |
|
3618 |
instance_groups.setdefault(self.all_node_info[node].group, |
|
3619 |
[]).append(node) |
|
3620 |
|
|
3621 |
pretty_list = [ |
|
3622 |
"%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name) |
|
3623 |
# Sort so that we always list the primary node first. |
|
3624 |
for group, nodes in sorted(instance_groups.items(), |
|
3625 |
key=lambda (_, nodes): pnode in nodes, |
|
3626 |
reverse=True)] |
|
3627 |
|
|
3628 |
self._ErrorIf(len(instance_groups) > 1, |
|
3629 |
constants.CV_EINSTANCESPLITGROUPS, |
|
3630 |
instance, "instance has primary and secondary nodes in" |
|
3631 |
" different groups: %s", utils.CommaJoin(pretty_list), |
|
3632 |
code=self.ETYPE_WARNING) |
|
3633 |
|
|
3634 | 3649 |
if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]: |
3635 | 3650 |
i_non_a_balanced.append(instance) |
3636 | 3651 |
|
3637 |
for snode in inst_config.secondary_nodes: |
|
3638 |
s_img = node_image[snode] |
|
3639 |
_ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC, |
|
3640 |
snode, "instance %s, connection to secondary node failed", |
|
3641 |
instance) |
|
3642 |
|
|
3643 |
if s_img.offline: |
|
3644 |
inst_nodes_offline.append(snode) |
|
3645 |
|
|
3646 |
# warn that the instance lives on offline nodes |
|
3647 |
_ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance, |
|
3648 |
"instance has offline secondary node(s) %s", |
|
3649 |
utils.CommaJoin(inst_nodes_offline)) |
|
3650 |
# ... or ghost/non-vm_capable nodes |
|
3651 |
for node in inst_config.all_nodes: |
|
3652 |
_ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE, |
|
3653 |
instance, "instance lives on ghost node %s", node) |
|
3654 |
_ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE, |
|
3655 |
instance, "instance lives on non-vm_capable node %s", node) |
|
3656 |
|
|
3657 | 3652 |
feedback_fn("* Verifying orphan volumes") |
3658 | 3653 |
reserved = utils.FieldSet(*cluster.reserved_lvs) |
3659 | 3654 |
|
Also available in: Unified diff