Revision d0d7d7cf lib/cmdlib/instance_migration.py
b/lib/cmdlib/instance_migration.py | ||
---|---|---|
281 | 281 |
|
282 | 282 |
""" |
283 | 283 |
instance_name = ExpandInstanceName(self.lu.cfg, self.instance_name) |
284 |
instance = self.cfg.GetInstanceInfo(instance_name) |
|
285 |
assert instance is not None |
|
286 |
self.instance = instance |
|
284 |
self.instance = self.cfg.GetInstanceInfo(instance_name) |
|
285 |
assert self.instance is not None |
|
287 | 286 |
cluster = self.cfg.GetClusterInfo() |
288 | 287 |
|
289 | 288 |
if (not self.cleanup and |
290 |
not instance.admin_state == constants.ADMINST_UP and |
|
289 |
not self.instance.admin_state == constants.ADMINST_UP and
|
|
291 | 290 |
not self.failover and self.fallback): |
292 | 291 |
self.lu.LogInfo("Instance is marked down or offline, fallback allowed," |
293 | 292 |
" switching to failover") |
294 | 293 |
self.failover = True |
295 | 294 |
|
296 |
if instance.disk_template not in constants.DTS_MIRRORED: |
|
295 |
if self.instance.disk_template not in constants.DTS_MIRRORED:
|
|
297 | 296 |
if self.failover: |
298 | 297 |
text = "failovers" |
299 | 298 |
else: |
300 | 299 |
text = "migrations" |
301 | 300 |
raise errors.OpPrereqError("Instance's disk layout '%s' does not allow" |
302 |
" %s" % (instance.disk_template, text), |
|
301 |
" %s" % (self.instance.disk_template, text),
|
|
303 | 302 |
errors.ECODE_STATE) |
304 | 303 |
|
305 |
if instance.disk_template in constants.DTS_EXT_MIRROR: |
|
304 |
if self.instance.disk_template in constants.DTS_EXT_MIRROR:
|
|
306 | 305 |
CheckIAllocatorOrNode(self.lu, "iallocator", "target_node") |
307 | 306 |
|
308 | 307 |
if self.lu.op.iallocator: |
... | ... | |
318 | 317 |
group_info = self.cfg.GetNodeGroup(nodeinfo.group) |
319 | 318 |
ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, |
320 | 319 |
group_info) |
321 |
CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo, self.cfg,
|
|
322 |
ignore=self.ignore_ipolicy) |
|
320 |
CheckTargetNodeIPolicy(self.lu, ipolicy, self.instance, nodeinfo,
|
|
321 |
self.cfg, ignore=self.ignore_ipolicy)
|
|
323 | 322 |
|
324 | 323 |
# self.target_node is already populated, either directly or by the |
325 | 324 |
# iallocator run |
326 | 325 |
target_node_uuid = self.target_node_uuid |
327 |
if self.target_node_uuid == instance.primary_node: |
|
326 |
if self.target_node_uuid == self.instance.primary_node:
|
|
328 | 327 |
raise errors.OpPrereqError( |
329 | 328 |
"Cannot migrate instance %s to its primary (%s)" % |
330 |
(instance.name, self.cfg.GetNodeName(instance.primary_node)), |
|
329 |
(self.instance.name, |
|
330 |
self.cfg.GetNodeName(self.instance.primary_node)), |
|
331 | 331 |
errors.ECODE_STATE) |
332 | 332 |
|
333 | 333 |
if len(self.lu.tasklets) == 1: |
334 | 334 |
# It is safe to release locks only when we're the only tasklet |
335 | 335 |
# in the LU |
336 | 336 |
ReleaseLocks(self.lu, locking.LEVEL_NODE, |
337 |
keep=[instance.primary_node, self.target_node_uuid]) |
|
337 |
keep=[self.instance.primary_node, self.target_node_uuid])
|
|
338 | 338 |
ReleaseLocks(self.lu, locking.LEVEL_NODE_ALLOC) |
339 | 339 |
|
340 | 340 |
else: |
341 | 341 |
assert not self.lu.glm.is_owned(locking.LEVEL_NODE_ALLOC) |
342 | 342 |
|
343 |
secondary_node_uuids = instance.secondary_nodes |
|
343 |
secondary_node_uuids = self.instance.secondary_nodes
|
|
344 | 344 |
if not secondary_node_uuids: |
345 | 345 |
raise errors.ConfigurationError("No secondary node but using" |
346 | 346 |
" %s disk template" % |
347 |
instance.disk_template) |
|
347 |
self.instance.disk_template)
|
|
348 | 348 |
target_node_uuid = secondary_node_uuids[0] |
349 | 349 |
if self.lu.op.iallocator or \ |
350 | 350 |
(self.lu.op.target_node_uuid and |
... | ... | |
357 | 357 |
" be %s to arbitrary nodes" |
358 | 358 |
" (neither an iallocator nor a target" |
359 | 359 |
" node can be passed)" % |
360 |
(instance.disk_template, text), |
|
360 |
(self.instance.disk_template, text),
|
|
361 | 361 |
errors.ECODE_INVAL) |
362 | 362 |
nodeinfo = self.cfg.GetNodeInfo(target_node_uuid) |
363 | 363 |
group_info = self.cfg.GetNodeGroup(nodeinfo.group) |
364 | 364 |
ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, |
365 | 365 |
group_info) |
366 |
CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo, self.cfg,
|
|
367 |
ignore=self.ignore_ipolicy) |
|
366 |
CheckTargetNodeIPolicy(self.lu, ipolicy, self.instance, nodeinfo,
|
|
367 |
self.cfg, ignore=self.ignore_ipolicy)
|
|
368 | 368 |
|
369 |
i_be = cluster.FillBE(instance) |
|
369 |
i_be = cluster.FillBE(self.instance)
|
|
370 | 370 |
|
371 | 371 |
# check memory requirements on the secondary node |
372 | 372 |
if (not self.cleanup and |
373 |
(not self.failover or instance.admin_state == constants.ADMINST_UP)): |
|
373 |
(not self.failover or |
|
374 |
self.instance.admin_state == constants.ADMINST_UP)): |
|
374 | 375 |
self.tgt_free_mem = CheckNodeFreeMemory( |
375 |
self.lu, target_node_uuid, "migrating instance %s" % instance.name, |
|
376 |
i_be[constants.BE_MINMEM], instance.hypervisor, |
|
377 |
self.cfg.GetClusterInfo().hvparams[instance.hypervisor]) |
|
376 |
self.lu, target_node_uuid, |
|
377 |
"migrating instance %s" % self.instance.name, |
|
378 |
i_be[constants.BE_MINMEM], self.instance.hypervisor, |
|
379 |
self.cfg.GetClusterInfo().hvparams[self.instance.hypervisor]) |
|
378 | 380 |
else: |
379 | 381 |
self.lu.LogInfo("Not checking memory on the secondary node as" |
380 | 382 |
" instance will not be started") |
... | ... | |
387 | 389 |
self.failover = True |
388 | 390 |
|
389 | 391 |
# check bridge existance |
390 |
CheckInstanceBridgesExist(self.lu, instance, node_uuid=target_node_uuid) |
|
392 |
CheckInstanceBridgesExist(self.lu, self.instance, |
|
393 |
node_uuid=target_node_uuid) |
|
391 | 394 |
|
392 | 395 |
if not self.cleanup: |
393 | 396 |
CheckNodeNotDrained(self.lu, target_node_uuid) |
394 | 397 |
if not self.failover: |
395 |
result = self.rpc.call_instance_migratable(instance.primary_node, |
|
396 |
instance) |
|
398 |
result = self.rpc.call_instance_migratable(self.instance.primary_node,
|
|
399 |
self.instance)
|
|
397 | 400 |
if result.fail_msg and self.fallback: |
398 | 401 |
self.lu.LogInfo("Can't migrate, instance offline, fallback to" |
399 | 402 |
" failover") |
... | ... | |
429 | 432 |
|
430 | 433 |
if not (self.failover or self.cleanup): |
431 | 434 |
remote_info = self.rpc.call_instance_info( |
432 |
instance.primary_node, instance.name, instance.hypervisor,
|
|
433 |
cluster.hvparams[instance.hypervisor])
|
|
435 |
self.instance.primary_node, self.instance.name,
|
|
436 |
self.instance.hypervisor, cluster.hvparams[self.instance.hypervisor])
|
|
434 | 437 |
remote_info.Raise("Error checking instance on node %s" % |
435 |
self.cfg.GetNodeName(instance.primary_node)) |
|
438 |
self.cfg.GetNodeName(self.instance.primary_node))
|
|
436 | 439 |
instance_running = bool(remote_info.payload) |
437 | 440 |
if instance_running: |
438 | 441 |
self.current_mem = int(remote_info.payload["memory"]) |
... | ... | |
544 | 547 |
- wait again until disks are fully synchronized |
545 | 548 |
|
546 | 549 |
""" |
547 |
instance = self.instance |
|
548 |
target_node_uuid = self.target_node_uuid |
|
549 |
source_node_uuid = self.source_node_uuid |
|
550 |
|
|
551 | 550 |
# check running on only one node |
552 | 551 |
self.feedback_fn("* checking where the instance actually runs" |
553 | 552 |
" (if this hangs, the hypervisor might be in" |
554 | 553 |
" a bad state)") |
555 | 554 |
cluster_hvparams = self.cfg.GetClusterInfo().hvparams |
556 | 555 |
ins_l = self.rpc.call_instance_list(self.all_node_uuids, |
557 |
[instance.hypervisor], |
|
556 |
[self.instance.hypervisor],
|
|
558 | 557 |
cluster_hvparams) |
559 | 558 |
for node_uuid, result in ins_l.items(): |
560 | 559 |
result.Raise("Can't contact node %s" % node_uuid) |
561 | 560 |
|
562 |
runningon_source = instance.name in ins_l[source_node_uuid].payload |
|
563 |
runningon_target = instance.name in ins_l[target_node_uuid].payload |
|
561 |
runningon_source = self.instance.name in \ |
|
562 |
ins_l[self.source_node_uuid].payload |
|
563 |
runningon_target = self.instance.name in \ |
|
564 |
ins_l[self.target_node_uuid].payload |
|
564 | 565 |
|
565 | 566 |
if runningon_source and runningon_target: |
566 | 567 |
raise errors.OpExecError("Instance seems to be running on two nodes," |
... | ... | |
578 | 579 |
# the migration has actually succeeded, we need to update the config |
579 | 580 |
self.feedback_fn("* instance running on secondary node (%s)," |
580 | 581 |
" updating config" % |
581 |
self.cfg.GetNodeName(target_node_uuid)) |
|
582 |
instance.primary_node = target_node_uuid
|
|
583 |
self.cfg.Update(instance, self.feedback_fn) |
|
584 |
demoted_node_uuid = source_node_uuid |
|
582 |
self.cfg.GetNodeName(self.target_node_uuid))
|
|
583 |
self.instance.primary_node = self.target_node_uuid
|
|
584 |
self.cfg.Update(self.instance, self.feedback_fn)
|
|
585 |
demoted_node_uuid = self.source_node_uuid
|
|
585 | 586 |
else: |
586 | 587 |
self.feedback_fn("* instance confirmed to be running on its" |
587 | 588 |
" primary node (%s)" % |
588 |
self.cfg.GetNodeName(source_node_uuid)) |
|
589 |
demoted_node_uuid = target_node_uuid |
|
589 |
self.cfg.GetNodeName(self.source_node_uuid))
|
|
590 |
demoted_node_uuid = self.target_node_uuid
|
|
590 | 591 |
|
591 |
if instance.disk_template in constants.DTS_INT_MIRROR: |
|
592 |
if self.instance.disk_template in constants.DTS_INT_MIRROR:
|
|
592 | 593 |
self._EnsureSecondary(demoted_node_uuid) |
593 | 594 |
try: |
594 | 595 |
self._WaitUntilSync() |
... | ... | |
623 | 624 |
"""Call the hypervisor code to abort a started migration. |
624 | 625 |
|
625 | 626 |
""" |
626 |
instance = self.instance |
|
627 |
migration_info = self.migration_info |
|
628 |
|
|
629 | 627 |
abort_result = self.rpc.call_instance_finalize_migration_dst( |
630 |
self.target_node_uuid, instance, migration_info, False) |
|
628 |
self.target_node_uuid, self.instance, self.migration_info, |
|
629 |
False) |
|
631 | 630 |
abort_msg = abort_result.fail_msg |
632 | 631 |
if abort_msg: |
633 | 632 |
logging.error("Aborting migration failed on target node %s: %s", |
... | ... | |
636 | 635 |
# disk status, even if this step failed. |
637 | 636 |
|
638 | 637 |
abort_result = self.rpc.call_instance_finalize_migration_src( |
639 |
self.source_node_uuid, instance, False, self.live) |
|
638 |
self.source_node_uuid, self.instance, False, self.live)
|
|
640 | 639 |
abort_msg = abort_result.fail_msg |
641 | 640 |
if abort_msg: |
642 | 641 |
logging.error("Aborting migration failed on source node %s: %s", |
... | ... | |
654 | 653 |
- change disks into single-master mode |
655 | 654 |
|
656 | 655 |
""" |
657 |
instance = self.instance |
|
658 |
target_node_uuid = self.target_node_uuid |
|
659 |
source_node_uuid = self.source_node_uuid |
|
660 |
|
|
661 | 656 |
# Check for hypervisor version mismatch and warn the user. |
662 |
hvspecs = [(instance.hypervisor, |
|
663 |
self.cfg.GetClusterInfo().hvparams[instance.hypervisor])] |
|
664 |
nodeinfo = self.rpc.call_node_info([source_node_uuid, target_node_uuid], |
|
665 |
None, hvspecs, False) |
|
657 |
hvspecs = [(self.instance.hypervisor, |
|
658 |
self.cfg.GetClusterInfo().hvparams[self.instance.hypervisor])] |
|
659 |
nodeinfo = self.rpc.call_node_info( |
|
660 |
[self.source_node_uuid, self.target_node_uuid], None, hvspecs, |
|
661 |
False) |
|
666 | 662 |
for ninfo in nodeinfo.values(): |
667 | 663 |
ninfo.Raise("Unable to retrieve node information from node '%s'" % |
668 | 664 |
ninfo.node) |
669 |
(_, _, (src_info, )) = nodeinfo[source_node_uuid].payload |
|
670 |
(_, _, (dst_info, )) = nodeinfo[target_node_uuid].payload |
|
665 |
(_, _, (src_info, )) = nodeinfo[self.source_node_uuid].payload
|
|
666 |
(_, _, (dst_info, )) = nodeinfo[self.target_node_uuid].payload
|
|
671 | 667 |
|
672 | 668 |
if ((constants.HV_NODEINFO_KEY_VERSION in src_info) and |
673 | 669 |
(constants.HV_NODEINFO_KEY_VERSION in dst_info)): |
... | ... | |
679 | 675 |
(src_version, dst_version)) |
680 | 676 |
|
681 | 677 |
self.feedback_fn("* checking disk consistency between source and target") |
682 |
for (idx, dev) in enumerate(instance.disks): |
|
683 |
if not CheckDiskConsistency(self.lu, instance, dev, target_node_uuid, |
|
678 |
for (idx, dev) in enumerate(self.instance.disks): |
|
679 |
if not CheckDiskConsistency(self.lu, self.instance, dev, |
|
680 |
self.target_node_uuid, |
|
684 | 681 |
False): |
685 | 682 |
raise errors.OpExecError("Disk %s is degraded or not fully" |
686 | 683 |
" synchronized on target node," |
... | ... | |
691 | 688 |
raise errors.OpExecError("Memory ballooning not allowed and not enough" |
692 | 689 |
" free memory to fit instance %s on target" |
693 | 690 |
" node %s (have %dMB, need %dMB)" % |
694 |
(instance.name, |
|
695 |
self.cfg.GetNodeName(target_node_uuid), |
|
691 |
(self.instance.name,
|
|
692 |
self.cfg.GetNodeName(self.target_node_uuid),
|
|
696 | 693 |
self.tgt_free_mem, self.current_mem)) |
697 | 694 |
self.feedback_fn("* setting instance memory to %s" % self.tgt_free_mem) |
698 |
rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node, |
|
699 |
instance, |
|
695 |
rpcres = self.rpc.call_instance_balloon_memory(self.instance.primary_node,
|
|
696 |
self.instance,
|
|
700 | 697 |
self.tgt_free_mem) |
701 | 698 |
rpcres.Raise("Cannot modify instance runtime memory") |
702 | 699 |
|
703 | 700 |
# First get the migration information from the remote node |
704 |
result = self.rpc.call_migration_info(source_node_uuid, instance)
|
|
701 |
result = self.rpc.call_migration_info(self.source_node_uuid, self.instance)
|
|
705 | 702 |
msg = result.fail_msg |
706 | 703 |
if msg: |
707 | 704 |
log_err = ("Failed fetching source migration information from %s: %s" % |
708 |
(self.cfg.GetNodeName(source_node_uuid), msg)) |
|
705 |
(self.cfg.GetNodeName(self.source_node_uuid), msg))
|
|
709 | 706 |
logging.error(log_err) |
710 | 707 |
raise errors.OpExecError(log_err) |
711 | 708 |
|
... | ... | |
713 | 710 |
|
714 | 711 |
if self.instance.disk_template not in constants.DTS_EXT_MIRROR: |
715 | 712 |
# Then switch the disks to master/master mode |
716 |
self._EnsureSecondary(target_node_uuid) |
|
713 |
self._EnsureSecondary(self.target_node_uuid)
|
|
717 | 714 |
self._GoStandalone() |
718 | 715 |
self._GoReconnect(True) |
719 | 716 |
self._WaitUntilSync() |
720 | 717 |
|
721 | 718 |
self.feedback_fn("* preparing %s to accept the instance" % |
722 |
self.cfg.GetNodeName(target_node_uuid)) |
|
723 |
result = self.rpc.call_accept_instance(target_node_uuid, |
|
724 |
instance, |
|
719 |
self.cfg.GetNodeName(self.target_node_uuid))
|
|
720 |
result = self.rpc.call_accept_instance(self.target_node_uuid,
|
|
721 |
self.instance,
|
|
725 | 722 |
migration_info, |
726 |
self.nodes_ip[target_node_uuid]) |
|
723 |
self.nodes_ip[self.target_node_uuid])
|
|
727 | 724 |
|
728 | 725 |
msg = result.fail_msg |
729 | 726 |
if msg: |
... | ... | |
733 | 730 |
self._AbortMigration() |
734 | 731 |
self._RevertDiskStatus() |
735 | 732 |
raise errors.OpExecError("Could not pre-migrate instance %s: %s" % |
736 |
(instance.name, msg)) |
|
733 |
(self.instance.name, msg))
|
|
737 | 734 |
|
738 | 735 |
self.feedback_fn("* migrating instance to %s" % |
739 |
self.cfg.GetNodeName(target_node_uuid)) |
|
736 |
self.cfg.GetNodeName(self.target_node_uuid))
|
|
740 | 737 |
cluster = self.cfg.GetClusterInfo() |
741 | 738 |
result = self.rpc.call_instance_migrate( |
742 |
source_node_uuid, cluster.cluster_name, instance,
|
|
743 |
self.nodes_ip[target_node_uuid], self.live) |
|
739 |
self.source_node_uuid, cluster.cluster_name, self.instance,
|
|
740 |
self.nodes_ip[self.target_node_uuid], self.live)
|
|
744 | 741 |
msg = result.fail_msg |
745 | 742 |
if msg: |
746 | 743 |
logging.error("Instance migration failed, trying to revert" |
... | ... | |
749 | 746 |
self._AbortMigration() |
750 | 747 |
self._RevertDiskStatus() |
751 | 748 |
raise errors.OpExecError("Could not migrate instance %s: %s" % |
752 |
(instance.name, msg)) |
|
749 |
(self.instance.name, msg))
|
|
753 | 750 |
|
754 | 751 |
self.feedback_fn("* starting memory transfer") |
755 | 752 |
last_feedback = time.time() |
756 | 753 |
while True: |
757 |
result = self.rpc.call_instance_get_migration_status(source_node_uuid,
|
|
758 |
instance)
|
|
754 |
result = self.rpc.call_instance_get_migration_status( |
|
755 |
self.source_node_uuid, self.instance)
|
|
759 | 756 |
msg = result.fail_msg |
760 | 757 |
ms = result.payload # MigrationStatus instance |
761 | 758 |
if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES): |
... | ... | |
767 | 764 |
if not msg: |
768 | 765 |
msg = "hypervisor returned failure" |
769 | 766 |
raise errors.OpExecError("Could not migrate instance %s: %s" % |
770 |
(instance.name, msg)) |
|
767 |
(self.instance.name, msg))
|
|
771 | 768 |
|
772 | 769 |
if result.payload.status != constants.HV_MIGRATION_ACTIVE: |
773 | 770 |
self.feedback_fn("* memory transfer complete") |
... | ... | |
782 | 779 |
|
783 | 780 |
time.sleep(self._MIGRATION_POLL_INTERVAL) |
784 | 781 |
|
785 |
result = self.rpc.call_instance_finalize_migration_src(source_node_uuid, |
|
786 |
instance, |
|
787 |
True, |
|
788 |
self.live) |
|
782 |
result = self.rpc.call_instance_finalize_migration_src( |
|
783 |
self.source_node_uuid, self.instance, True, self.live) |
|
789 | 784 |
msg = result.fail_msg |
790 | 785 |
if msg: |
791 | 786 |
logging.error("Instance migration succeeded, but finalization failed" |
... | ... | |
793 | 788 |
raise errors.OpExecError("Could not finalize instance migration: %s" % |
794 | 789 |
msg) |
795 | 790 |
|
796 |
instance.primary_node = target_node_uuid
|
|
791 |
self.instance.primary_node = self.target_node_uuid
|
|
797 | 792 |
|
798 | 793 |
# distribute new instance config to the other nodes |
799 |
self.cfg.Update(instance, self.feedback_fn) |
|
794 |
self.cfg.Update(self.instance, self.feedback_fn)
|
|
800 | 795 |
|
801 |
result = self.rpc.call_instance_finalize_migration_dst(target_node_uuid, |
|
802 |
instance, |
|
803 |
migration_info, |
|
804 |
True) |
|
796 |
result = self.rpc.call_instance_finalize_migration_dst( |
|
797 |
self.target_node_uuid, self.instance, migration_info, True) |
|
805 | 798 |
msg = result.fail_msg |
806 | 799 |
if msg: |
807 | 800 |
logging.error("Instance migration succeeded, but finalization failed" |
... | ... | |
810 | 803 |
msg) |
811 | 804 |
|
812 | 805 |
if self.instance.disk_template not in constants.DTS_EXT_MIRROR: |
813 |
self._EnsureSecondary(source_node_uuid) |
|
806 |
self._EnsureSecondary(self.source_node_uuid)
|
|
814 | 807 |
self._WaitUntilSync() |
815 | 808 |
self._GoStandalone() |
816 | 809 |
self._GoReconnect(False) |
... | ... | |
819 | 812 |
# If the instance's disk template is `rbd' or `ext' and there was a |
820 | 813 |
# successful migration, unmap the device from the source node. |
821 | 814 |
if self.instance.disk_template in (constants.DT_RBD, constants.DT_EXT): |
822 |
disks = ExpandCheckDisks(instance, instance.disks)
|
|
815 |
disks = ExpandCheckDisks(self.instance, self.instance.disks)
|
|
823 | 816 |
self.feedback_fn("* unmapping instance's disks from %s" % |
824 |
self.cfg.GetNodeName(source_node_uuid)) |
|
817 |
self.cfg.GetNodeName(self.source_node_uuid))
|
|
825 | 818 |
for disk in disks: |
826 |
result = self.rpc.call_blockdev_shutdown(source_node_uuid, |
|
827 |
(disk, instance)) |
|
819 |
result = self.rpc.call_blockdev_shutdown(self.source_node_uuid,
|
|
820 |
(disk, self.instance))
|
|
828 | 821 |
msg = result.fail_msg |
829 | 822 |
if msg: |
830 | 823 |
logging.error("Migration was successful, but couldn't unmap the" |
831 | 824 |
" block device %s on source node %s: %s", |
832 |
disk.iv_name, self.cfg.GetNodeName(source_node_uuid),
|
|
833 |
msg) |
|
825 |
disk.iv_name, |
|
826 |
self.cfg.GetNodeName(self.source_node_uuid), msg)
|
|
834 | 827 |
logging.error("You need to unmap the device %s manually on %s", |
835 |
disk.iv_name, self.cfg.GetNodeName(source_node_uuid)) |
|
828 |
disk.iv_name, |
|
829 |
self.cfg.GetNodeName(self.source_node_uuid)) |
|
836 | 830 |
|
837 | 831 |
self.feedback_fn("* done") |
838 | 832 |
|
... | ... | |
843 | 837 |
starting it on the secondary. |
844 | 838 |
|
845 | 839 |
""" |
846 |
instance = self.instance |
|
847 |
primary_node = self.cfg.GetNodeInfo(instance.primary_node) |
|
840 |
primary_node = self.cfg.GetNodeInfo(self.instance.primary_node) |
|
848 | 841 |
|
849 |
source_node_uuid = instance.primary_node |
|
850 |
target_node_uuid = self.target_node_uuid |
|
842 |
source_node_uuid = self.instance.primary_node |
|
851 | 843 |
|
852 |
if instance.disks_active: |
|
844 |
if self.instance.disks_active:
|
|
853 | 845 |
self.feedback_fn("* checking disk consistency between source and target") |
854 |
for (idx, dev) in enumerate(instance.disks): |
|
846 |
for (idx, dev) in enumerate(self.instance.disks):
|
|
855 | 847 |
# for drbd, these are drbd over lvm |
856 |
if not CheckDiskConsistency(self.lu, instance, dev, target_node_uuid,
|
|
857 |
False): |
|
848 |
if not CheckDiskConsistency(self.lu, self.instance, dev,
|
|
849 |
self.target_node_uuid, False):
|
|
858 | 850 |
if primary_node.offline: |
859 | 851 |
self.feedback_fn("Node %s is offline, ignoring degraded disk %s on" |
860 | 852 |
" target node %s" % |
861 | 853 |
(primary_node.name, idx, |
862 |
self.cfg.GetNodeName(target_node_uuid))) |
|
854 |
self.cfg.GetNodeName(self.target_node_uuid)))
|
|
863 | 855 |
elif not self.ignore_consistency: |
864 | 856 |
raise errors.OpExecError("Disk %s is degraded on target node," |
865 | 857 |
" aborting failover" % idx) |
... | ... | |
869 | 861 |
|
870 | 862 |
self.feedback_fn("* shutting down instance on source node") |
871 | 863 |
logging.info("Shutting down instance %s on node %s", |
872 |
instance.name, self.cfg.GetNodeName(source_node_uuid)) |
|
864 |
self.instance.name, self.cfg.GetNodeName(source_node_uuid))
|
|
873 | 865 |
|
874 |
result = self.rpc.call_instance_shutdown(source_node_uuid, instance, |
|
866 |
result = self.rpc.call_instance_shutdown(source_node_uuid, self.instance,
|
|
875 | 867 |
self.shutdown_timeout, |
876 | 868 |
self.lu.op.reason) |
877 | 869 |
msg = result.fail_msg |
... | ... | |
880 | 872 |
self.lu.LogWarning("Could not shutdown instance %s on node %s," |
881 | 873 |
" proceeding anyway; please make sure node" |
882 | 874 |
" %s is down; error details: %s", |
883 |
instance.name, |
|
875 |
self.instance.name,
|
|
884 | 876 |
self.cfg.GetNodeName(source_node_uuid), |
885 | 877 |
self.cfg.GetNodeName(source_node_uuid), msg) |
886 | 878 |
else: |
887 | 879 |
raise errors.OpExecError("Could not shutdown instance %s on" |
888 | 880 |
" node %s: %s" % |
889 |
(instance.name, |
|
881 |
(self.instance.name,
|
|
890 | 882 |
self.cfg.GetNodeName(source_node_uuid), msg)) |
891 | 883 |
|
892 | 884 |
self.feedback_fn("* deactivating the instance's disks on source node") |
893 |
if not ShutdownInstanceDisks(self.lu, instance, ignore_primary=True): |
|
885 |
if not ShutdownInstanceDisks(self.lu, self.instance, ignore_primary=True):
|
|
894 | 886 |
raise errors.OpExecError("Can't shut down the instance's disks") |
895 | 887 |
|
896 |
instance.primary_node = target_node_uuid
|
|
888 |
self.instance.primary_node = self.target_node_uuid
|
|
897 | 889 |
# distribute new instance config to the other nodes |
898 |
self.cfg.Update(instance, self.feedback_fn) |
|
890 |
self.cfg.Update(self.instance, self.feedback_fn)
|
|
899 | 891 |
|
900 | 892 |
# Only start the instance if it's marked as up |
901 |
if instance.admin_state == constants.ADMINST_UP: |
|
893 |
if self.instance.admin_state == constants.ADMINST_UP:
|
|
902 | 894 |
self.feedback_fn("* activating the instance's disks on target node %s" % |
903 |
self.cfg.GetNodeName(target_node_uuid)) |
|
904 |
logging.info("Starting instance %s on node %s", |
|
905 |
instance.name, self.cfg.GetNodeName(target_node_uuid))
|
|
895 |
self.cfg.GetNodeName(self.target_node_uuid))
|
|
896 |
logging.info("Starting instance %s on node %s", self.instance.name,
|
|
897 |
self.cfg.GetNodeName(self.target_node_uuid))
|
|
906 | 898 |
|
907 |
disks_ok, _ = AssembleInstanceDisks(self.lu, instance, |
|
899 |
disks_ok, _ = AssembleInstanceDisks(self.lu, self.instance,
|
|
908 | 900 |
ignore_secondaries=True) |
909 | 901 |
if not disks_ok: |
910 |
ShutdownInstanceDisks(self.lu, instance) |
|
902 |
ShutdownInstanceDisks(self.lu, self.instance)
|
|
911 | 903 |
raise errors.OpExecError("Can't activate the instance's disks") |
912 | 904 |
|
913 | 905 |
self.feedback_fn("* starting the instance on the target node %s" % |
914 |
self.cfg.GetNodeName(target_node_uuid)) |
|
915 |
result = self.rpc.call_instance_start(target_node_uuid, |
|
916 |
(instance, None, None), False, |
|
906 |
self.cfg.GetNodeName(self.target_node_uuid))
|
|
907 |
result = self.rpc.call_instance_start(self.target_node_uuid,
|
|
908 |
(self.instance, None, None), False,
|
|
917 | 909 |
self.lu.op.reason) |
918 | 910 |
msg = result.fail_msg |
919 | 911 |
if msg: |
920 |
ShutdownInstanceDisks(self.lu, instance) |
|
912 |
ShutdownInstanceDisks(self.lu, self.instance)
|
|
921 | 913 |
raise errors.OpExecError("Could not start instance %s on node %s: %s" % |
922 |
(instance.name, |
|
923 |
self.cfg.GetNodeName(target_node_uuid), msg)) |
|
914 |
(self.instance.name, |
|
915 |
self.cfg.GetNodeName(self.target_node_uuid), |
|
916 |
msg)) |
|
924 | 917 |
|
925 | 918 |
def Exec(self, feedback_fn): |
926 | 919 |
"""Perform the migration. |
Also available in: Unified diff