Revision 1c3231aa lib/cmdlib/instance_migration.py
b/lib/cmdlib/instance_migration.py | ||
---|---|---|
31 | 31 |
from ganeti import utils |
32 | 32 |
from ganeti.cmdlib.base import LogicalUnit, Tasklet |
33 | 33 |
from ganeti.cmdlib.common import ExpandInstanceName, \ |
34 |
CheckIAllocatorOrNode, ExpandNodeName |
|
34 |
CheckIAllocatorOrNode, ExpandNodeUuidAndName
|
|
35 | 35 |
from ganeti.cmdlib.instance_storage import CheckDiskConsistency, \ |
36 | 36 |
ExpandCheckDisks, ShutdownInstanceDisks, AssembleInstanceDisks |
37 | 37 |
from ganeti.cmdlib.instance_utils import BuildInstanceHookEnvByObject, \ |
... | ... | |
48 | 48 |
|
49 | 49 |
""" |
50 | 50 |
if lu.op.target_node is not None: |
51 |
lu.op.target_node = ExpandNodeName(lu.cfg, lu.op.target_node) |
|
51 |
(lu.op.target_node_uuid, lu.op.target_node) = \ |
|
52 |
ExpandNodeUuidAndName(lu.cfg, lu.op.target_node_uuid, lu.op.target_node) |
|
52 | 53 |
|
53 | 54 |
lu.needed_locks[locking.LEVEL_NODE] = [] |
54 | 55 |
lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE |
... | ... | |
81 | 82 |
lu.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET |
82 | 83 |
else: |
83 | 84 |
lu.needed_locks[locking.LEVEL_NODE] = [instance.primary_node, |
84 |
lu.op.target_node] |
|
85 |
lu.op.target_node_uuid]
|
|
85 | 86 |
del lu.recalculate_locks[locking.LEVEL_NODE] |
86 | 87 |
else: |
87 | 88 |
lu._LockInstancesNodes() # pylint: disable=W0212 |
... | ... | |
133 | 134 |
|
134 | 135 |
""" |
135 | 136 |
instance = self._migrater.instance |
136 |
source_node = instance.primary_node |
|
137 |
target_node = self.op.target_node |
|
137 |
source_node_uuid = instance.primary_node |
|
138 | 138 |
env = { |
139 | 139 |
"IGNORE_CONSISTENCY": self.op.ignore_consistency, |
140 | 140 |
"SHUTDOWN_TIMEOUT": self.op.shutdown_timeout, |
141 |
"OLD_PRIMARY": source_node,
|
|
142 |
"NEW_PRIMARY": target_node, |
|
141 |
"OLD_PRIMARY": self.cfg.GetNodeName(source_node_uuid),
|
|
142 |
"NEW_PRIMARY": self.op.target_node,
|
|
143 | 143 |
} |
144 | 144 |
|
145 | 145 |
if instance.disk_template in constants.DTS_INT_MIRROR: |
146 |
env["OLD_SECONDARY"] = instance.secondary_nodes[0]
|
|
147 |
env["NEW_SECONDARY"] = source_node
|
|
146 |
env["OLD_SECONDARY"] = self.cfg.GetNodeName(instance.secondary_nodes[0])
|
|
147 |
env["NEW_SECONDARY"] = self.cfg.GetNodeName(source_node_uuid)
|
|
148 | 148 |
else: |
149 | 149 |
env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = "" |
150 | 150 |
|
... | ... | |
195 | 195 |
|
196 | 196 |
""" |
197 | 197 |
instance = self._migrater.instance |
198 |
source_node = instance.primary_node |
|
199 |
target_node = self.op.target_node |
|
198 |
source_node_uuid = instance.primary_node |
|
200 | 199 |
env = BuildInstanceHookEnvByObject(self, instance) |
201 | 200 |
env.update({ |
202 | 201 |
"MIGRATE_LIVE": self._migrater.live, |
203 | 202 |
"MIGRATE_CLEANUP": self.op.cleanup, |
204 |
"OLD_PRIMARY": source_node,
|
|
205 |
"NEW_PRIMARY": target_node, |
|
203 |
"OLD_PRIMARY": self.cfg.GetNodeName(source_node_uuid),
|
|
204 |
"NEW_PRIMARY": self.op.target_node,
|
|
206 | 205 |
"ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes, |
207 | 206 |
}) |
208 | 207 |
|
209 | 208 |
if instance.disk_template in constants.DTS_INT_MIRROR: |
210 |
env["OLD_SECONDARY"] = target_node
|
|
211 |
env["NEW_SECONDARY"] = source_node
|
|
209 |
env["OLD_SECONDARY"] = self.cfg.GetNodeName(instance.secondary_nodes[0])
|
|
210 |
env["NEW_SECONDARY"] = self.cfg.GetNodeName(source_node_uuid)
|
|
212 | 211 |
else: |
213 | 212 |
env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None |
214 | 213 |
|
... | ... | |
219 | 218 |
|
220 | 219 |
""" |
221 | 220 |
instance = self._migrater.instance |
222 |
snodes = list(instance.secondary_nodes) |
|
223 |
nl = [self.cfg.GetMasterNode(), instance.primary_node] + snodes |
|
221 |
snode_uuids = list(instance.secondary_nodes)
|
|
222 |
nl = [self.cfg.GetMasterNode(), instance.primary_node] + snode_uuids
|
|
224 | 223 |
return (nl, nl) |
225 | 224 |
|
226 | 225 |
|
... | ... | |
234 | 233 |
@ivar cleanup: Wheater we cleanup from a failed migration |
235 | 234 |
@type iallocator: string |
236 | 235 |
@ivar iallocator: The iallocator used to determine target_node |
237 |
@type target_node: string |
|
238 |
@ivar target_node: If given, the target_node to reallocate the instance to |
|
236 |
@type target_node_uuid: string |
|
237 |
@ivar target_node_uuid: If given, the target node UUID to reallocate the |
|
238 |
instance to |
|
239 | 239 |
@type failover: boolean |
240 | 240 |
@ivar failover: Whether operation results in failover or migration |
241 | 241 |
@type fallback: boolean |
... | ... | |
309 | 309 |
assert locking.NAL in self.lu.owned_locks(locking.LEVEL_NODE_ALLOC) |
310 | 310 |
self._RunAllocator() |
311 | 311 |
else: |
312 |
# We set set self.target_node as it is required by |
|
312 |
# We set set self.target_node_uuid as it is required by
|
|
313 | 313 |
# BuildHooksEnv |
314 |
self.target_node = self.lu.op.target_node
|
|
314 |
self.target_node_uuid = self.lu.op.target_node_uuid
|
|
315 | 315 |
|
316 | 316 |
# Check that the target node is correct in terms of instance policy |
317 |
nodeinfo = self.cfg.GetNodeInfo(self.target_node) |
|
317 |
nodeinfo = self.cfg.GetNodeInfo(self.target_node_uuid)
|
|
318 | 318 |
group_info = self.cfg.GetNodeGroup(nodeinfo.group) |
319 | 319 |
ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, |
320 | 320 |
group_info) |
... | ... | |
323 | 323 |
|
324 | 324 |
# self.target_node is already populated, either directly or by the |
325 | 325 |
# iallocator run |
326 |
target_node = self.target_node
|
|
327 |
if self.target_node == instance.primary_node: |
|
328 |
raise errors.OpPrereqError("Cannot migrate instance %s"
|
|
329 |
" to its primary (%s)" %
|
|
330 |
(instance.name, instance.primary_node),
|
|
331 |
errors.ECODE_STATE)
|
|
326 |
target_node_uuid = self.target_node_uuid
|
|
327 |
if self.target_node_uuid == instance.primary_node:
|
|
328 |
raise errors.OpPrereqError( |
|
329 |
"Cannot migrate instance %s to its primary (%s)" %
|
|
330 |
(instance.name, self.cfg.GetNodeName(instance.primary_node)),
|
|
331 |
errors.ECODE_STATE) |
|
332 | 332 |
|
333 | 333 |
if len(self.lu.tasklets) == 1: |
334 | 334 |
# It is safe to release locks only when we're the only tasklet |
335 | 335 |
# in the LU |
336 | 336 |
ReleaseLocks(self.lu, locking.LEVEL_NODE, |
337 |
keep=[instance.primary_node, self.target_node]) |
|
337 |
keep=[instance.primary_node, self.target_node_uuid])
|
|
338 | 338 |
ReleaseLocks(self.lu, locking.LEVEL_NODE_ALLOC) |
339 | 339 |
|
340 | 340 |
else: |
341 | 341 |
assert not self.lu.glm.is_owned(locking.LEVEL_NODE_ALLOC) |
342 | 342 |
|
343 |
secondary_nodes = instance.secondary_nodes |
|
344 |
if not secondary_nodes: |
|
343 |
secondary_node_uuids = instance.secondary_nodes
|
|
344 |
if not secondary_node_uuids:
|
|
345 | 345 |
raise errors.ConfigurationError("No secondary node but using" |
346 | 346 |
" %s disk template" % |
347 | 347 |
instance.disk_template) |
348 |
target_node = secondary_nodes[0] |
|
349 |
if self.lu.op.iallocator or (self.lu.op.target_node and |
|
350 |
self.lu.op.target_node != target_node): |
|
348 |
target_node_uuid = secondary_node_uuids[0] |
|
349 |
if self.lu.op.iallocator or \ |
|
350 |
(self.lu.op.target_node_uuid and |
|
351 |
self.lu.op.target_node_uuid != target_node_uuid): |
|
351 | 352 |
if self.failover: |
352 | 353 |
text = "failed over" |
353 | 354 |
else: |
... | ... | |
358 | 359 |
" node can be passed)" % |
359 | 360 |
(instance.disk_template, text), |
360 | 361 |
errors.ECODE_INVAL) |
361 |
nodeinfo = self.cfg.GetNodeInfo(target_node) |
|
362 |
nodeinfo = self.cfg.GetNodeInfo(target_node_uuid)
|
|
362 | 363 |
group_info = self.cfg.GetNodeGroup(nodeinfo.group) |
363 | 364 |
ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, |
364 | 365 |
group_info) |
... | ... | |
371 | 372 |
if (not self.cleanup and |
372 | 373 |
(not self.failover or instance.admin_state == constants.ADMINST_UP)): |
373 | 374 |
self.tgt_free_mem = CheckNodeFreeMemory( |
374 |
self.lu, target_node, "migrating instance %s" % instance.name, |
|
375 |
self.lu, target_node_uuid, "migrating instance %s" % instance.name,
|
|
375 | 376 |
i_be[constants.BE_MINMEM], instance.hypervisor, |
376 | 377 |
self.cfg.GetClusterInfo().hvparams[instance.hypervisor]) |
377 | 378 |
else: |
... | ... | |
386 | 387 |
self.failover = True |
387 | 388 |
|
388 | 389 |
# check bridge existance |
389 |
CheckInstanceBridgesExist(self.lu, instance, node=target_node)
|
|
390 |
CheckInstanceBridgesExist(self.lu, instance, node_uuid=target_node_uuid)
|
|
390 | 391 |
|
391 | 392 |
if not self.cleanup: |
392 |
CheckNodeNotDrained(self.lu, target_node) |
|
393 |
CheckNodeNotDrained(self.lu, target_node_uuid)
|
|
393 | 394 |
if not self.failover: |
394 | 395 |
result = self.rpc.call_instance_migratable(instance.primary_node, |
395 | 396 |
instance) |
... | ... | |
431 | 432 |
instance.primary_node, instance.name, instance.hypervisor, |
432 | 433 |
cluster.hvparams[instance.hypervisor]) |
433 | 434 |
remote_info.Raise("Error checking instance on node %s" % |
434 |
instance.primary_node)
|
|
435 |
self.cfg.GetNodeName(instance.primary_node))
|
|
435 | 436 |
instance_running = bool(remote_info.payload) |
436 | 437 |
if instance_running: |
437 | 438 |
self.current_mem = int(remote_info.payload["memory"]) |
... | ... | |
443 | 444 |
assert locking.NAL in self.lu.owned_locks(locking.LEVEL_NODE_ALLOC) |
444 | 445 |
|
445 | 446 |
# FIXME: add a self.ignore_ipolicy option |
446 |
req = iallocator.IAReqRelocate(name=self.instance_name, |
|
447 |
relocate_from=[self.instance.primary_node]) |
|
447 |
req = iallocator.IAReqRelocate( |
|
448 |
name=self.instance_name, |
|
449 |
relocate_from_node_uuids=[self.instance.primary_node]) |
|
448 | 450 |
ial = iallocator.IAllocator(self.cfg, self.rpc, req) |
449 | 451 |
|
450 | 452 |
ial.Run(self.lu.op.iallocator) |
... | ... | |
454 | 456 |
" iallocator '%s': %s" % |
455 | 457 |
(self.lu.op.iallocator, ial.info), |
456 | 458 |
errors.ECODE_NORES) |
457 |
self.target_node = ial.result[0]
|
|
459 |
self.target_node_uuid = self.cfg.GetNodeInfoByName(ial.result[0]).uuid
|
|
458 | 460 |
self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s", |
459 | 461 |
self.instance_name, self.lu.op.iallocator, |
460 | 462 |
utils.CommaJoin(ial.result)) |
... | ... | |
469 | 471 |
all_done = False |
470 | 472 |
while not all_done: |
471 | 473 |
all_done = True |
472 |
result = self.rpc.call_drbd_wait_sync(self.all_nodes, |
|
474 |
result = self.rpc.call_drbd_wait_sync(self.all_node_uuids,
|
|
473 | 475 |
self.nodes_ip, |
474 | 476 |
(self.instance.disks, |
475 | 477 |
self.instance)) |
476 | 478 |
min_percent = 100 |
477 |
for node, nres in result.items(): |
|
478 |
nres.Raise("Cannot resync disks on node %s" % node) |
|
479 |
for node_uuid, nres in result.items(): |
|
480 |
nres.Raise("Cannot resync disks on node %s" % |
|
481 |
self.cfg.GetNodeName(node_uuid)) |
|
479 | 482 |
node_done, node_percent = nres.payload |
480 | 483 |
all_done = all_done and node_done |
481 | 484 |
if node_percent is not None: |
... | ... | |
485 | 488 |
self.feedback_fn(" - progress: %.1f%%" % min_percent) |
486 | 489 |
time.sleep(2) |
487 | 490 |
|
488 |
def _EnsureSecondary(self, node): |
|
491 |
def _EnsureSecondary(self, node_uuid):
|
|
489 | 492 |
"""Demote a node to secondary. |
490 | 493 |
|
491 | 494 |
""" |
492 |
self.feedback_fn("* switching node %s to secondary mode" % node) |
|
495 |
self.feedback_fn("* switching node %s to secondary mode" % |
|
496 |
self.cfg.GetNodeName(node_uuid)) |
|
493 | 497 |
|
494 | 498 |
for dev in self.instance.disks: |
495 |
self.cfg.SetDiskID(dev, node) |
|
499 |
self.cfg.SetDiskID(dev, node_uuid)
|
|
496 | 500 |
|
497 |
result = self.rpc.call_blockdev_close(node, self.instance.name, |
|
501 |
result = self.rpc.call_blockdev_close(node_uuid, self.instance.name,
|
|
498 | 502 |
self.instance.disks) |
499 |
result.Raise("Cannot change disk to secondary on node %s" % node) |
|
503 |
result.Raise("Cannot change disk to secondary on node %s" % |
|
504 |
self.cfg.GetNodeName(node_uuid)) |
|
500 | 505 |
|
501 | 506 |
def _GoStandalone(self): |
502 | 507 |
"""Disconnect from the network. |
503 | 508 |
|
504 | 509 |
""" |
505 | 510 |
self.feedback_fn("* changing into standalone mode") |
506 |
result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip, |
|
511 |
result = self.rpc.call_drbd_disconnect_net(self.all_node_uuids, |
|
512 |
self.nodes_ip, |
|
507 | 513 |
self.instance.disks) |
508 |
for node, nres in result.items(): |
|
509 |
nres.Raise("Cannot disconnect disks node %s" % node) |
|
514 |
for node_uuid, nres in result.items(): |
|
515 |
nres.Raise("Cannot disconnect disks node %s" % |
|
516 |
self.cfg.GetNodeName(node_uuid)) |
|
510 | 517 |
|
511 | 518 |
def _GoReconnect(self, multimaster): |
512 | 519 |
"""Reconnect to the network. |
... | ... | |
517 | 524 |
else: |
518 | 525 |
msg = "single-master" |
519 | 526 |
self.feedback_fn("* changing disks into %s mode" % msg) |
520 |
result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip, |
|
527 |
result = self.rpc.call_drbd_attach_net(self.all_node_uuids, self.nodes_ip,
|
|
521 | 528 |
(self.instance.disks, self.instance), |
522 | 529 |
self.instance.name, multimaster) |
523 |
for node, nres in result.items(): |
|
524 |
nres.Raise("Cannot change disks config on node %s" % node) |
|
530 |
for node_uuid, nres in result.items(): |
|
531 |
nres.Raise("Cannot change disks config on node %s" % |
|
532 |
self.cfg.GetNodeName(node_uuid)) |
|
525 | 533 |
|
526 | 534 |
def _ExecCleanup(self): |
527 | 535 |
"""Try to cleanup after a failed migration. |
... | ... | |
537 | 545 |
|
538 | 546 |
""" |
539 | 547 |
instance = self.instance |
540 |
target_node = self.target_node
|
|
541 |
source_node = self.source_node
|
|
548 |
target_node_uuid = self.target_node_uuid
|
|
549 |
source_node_uuid = self.source_node_uuid
|
|
542 | 550 |
|
543 | 551 |
# check running on only one node |
544 | 552 |
self.feedback_fn("* checking where the instance actually runs" |
545 | 553 |
" (if this hangs, the hypervisor might be in" |
546 | 554 |
" a bad state)") |
547 | 555 |
cluster_hvparams = self.cfg.GetClusterInfo().hvparams |
548 |
ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor], |
|
556 |
ins_l = self.rpc.call_instance_list(self.all_node_uuids, |
|
557 |
[instance.hypervisor], |
|
549 | 558 |
cluster_hvparams) |
550 |
for node, result in ins_l.items(): |
|
551 |
result.Raise("Can't contact node %s" % node) |
|
559 |
for node_uuid, result in ins_l.items():
|
|
560 |
result.Raise("Can't contact node %s" % node_uuid)
|
|
552 | 561 |
|
553 |
runningon_source = instance.name in ins_l[source_node].payload |
|
554 |
runningon_target = instance.name in ins_l[target_node].payload |
|
562 |
runningon_source = instance.name in ins_l[source_node_uuid].payload
|
|
563 |
runningon_target = instance.name in ins_l[target_node_uuid].payload
|
|
555 | 564 |
|
556 | 565 |
if runningon_source and runningon_target: |
557 | 566 |
raise errors.OpExecError("Instance seems to be running on two nodes," |
... | ... | |
568 | 577 |
if runningon_target: |
569 | 578 |
# the migration has actually succeeded, we need to update the config |
570 | 579 |
self.feedback_fn("* instance running on secondary node (%s)," |
571 |
" updating config" % target_node) |
|
572 |
instance.primary_node = target_node |
|
580 |
" updating config" % |
|
581 |
self.cfg.GetNodeName(target_node_uuid)) |
|
582 |
instance.primary_node = target_node_uuid |
|
573 | 583 |
self.cfg.Update(instance, self.feedback_fn) |
574 |
demoted_node = source_node
|
|
584 |
demoted_node_uuid = source_node_uuid
|
|
575 | 585 |
else: |
576 | 586 |
self.feedback_fn("* instance confirmed to be running on its" |
577 |
" primary node (%s)" % source_node) |
|
578 |
demoted_node = target_node |
|
587 |
" primary node (%s)" % |
|
588 |
self.cfg.GetNodeName(source_node_uuid)) |
|
589 |
demoted_node_uuid = target_node_uuid |
|
579 | 590 |
|
580 | 591 |
if instance.disk_template in constants.DTS_INT_MIRROR: |
581 |
self._EnsureSecondary(demoted_node) |
|
592 |
self._EnsureSecondary(demoted_node_uuid)
|
|
582 | 593 |
try: |
583 | 594 |
self._WaitUntilSync() |
584 | 595 |
except errors.OpExecError: |
... | ... | |
595 | 606 |
"""Try to revert the disk status after a failed migration. |
596 | 607 |
|
597 | 608 |
""" |
598 |
target_node = self.target_node |
|
599 | 609 |
if self.instance.disk_template in constants.DTS_EXT_MIRROR: |
600 | 610 |
return |
601 | 611 |
|
602 | 612 |
try: |
603 |
self._EnsureSecondary(target_node)
|
|
613 |
self._EnsureSecondary(self.target_node_uuid)
|
|
604 | 614 |
self._GoStandalone() |
605 | 615 |
self._GoReconnect(False) |
606 | 616 |
self._WaitUntilSync() |
... | ... | |
614 | 624 |
|
615 | 625 |
""" |
616 | 626 |
instance = self.instance |
617 |
target_node = self.target_node |
|
618 |
source_node = self.source_node |
|
619 | 627 |
migration_info = self.migration_info |
620 | 628 |
|
621 |
abort_result = self.rpc.call_instance_finalize_migration_dst(target_node, |
|
622 |
instance, |
|
623 |
migration_info, |
|
624 |
False) |
|
629 |
abort_result = self.rpc.call_instance_finalize_migration_dst( |
|
630 |
self.target_node_uuid, instance, migration_info, False) |
|
625 | 631 |
abort_msg = abort_result.fail_msg |
626 | 632 |
if abort_msg: |
627 | 633 |
logging.error("Aborting migration failed on target node %s: %s", |
628 |
target_node, abort_msg)
|
|
634 |
self.cfg.GetNodeName(self.target_node_uuid), abort_msg)
|
|
629 | 635 |
# Don't raise an exception here, as we stil have to try to revert the |
630 | 636 |
# disk status, even if this step failed. |
631 | 637 |
|
632 | 638 |
abort_result = self.rpc.call_instance_finalize_migration_src( |
633 |
source_node, instance, False, self.live)
|
|
639 |
self.source_node_uuid, instance, False, self.live)
|
|
634 | 640 |
abort_msg = abort_result.fail_msg |
635 | 641 |
if abort_msg: |
636 | 642 |
logging.error("Aborting migration failed on source node %s: %s", |
637 |
source_node, abort_msg)
|
|
643 |
self.cfg.GetNodeName(self.source_node_uuid), abort_msg)
|
|
638 | 644 |
|
639 | 645 |
def _ExecMigration(self): |
640 | 646 |
"""Migrate an instance. |
... | ... | |
649 | 655 |
|
650 | 656 |
""" |
651 | 657 |
instance = self.instance |
652 |
target_node = self.target_node
|
|
653 |
source_node = self.source_node
|
|
658 |
target_node_uuid = self.target_node_uuid
|
|
659 |
source_node_uuid = self.source_node_uuid
|
|
654 | 660 |
|
655 | 661 |
# Check for hypervisor version mismatch and warn the user. |
656 | 662 |
hvspecs = [(instance.hypervisor, |
657 | 663 |
self.cfg.GetClusterInfo().hvparams[instance.hypervisor])] |
658 |
nodeinfo = self.rpc.call_node_info([source_node, target_node],
|
|
664 |
nodeinfo = self.rpc.call_node_info([source_node_uuid, target_node_uuid],
|
|
659 | 665 |
None, hvspecs, False) |
660 | 666 |
for ninfo in nodeinfo.values(): |
661 | 667 |
ninfo.Raise("Unable to retrieve node information from node '%s'" % |
662 | 668 |
ninfo.node) |
663 |
(_, _, (src_info, )) = nodeinfo[source_node].payload |
|
664 |
(_, _, (dst_info, )) = nodeinfo[target_node].payload |
|
669 |
(_, _, (src_info, )) = nodeinfo[source_node_uuid].payload
|
|
670 |
(_, _, (dst_info, )) = nodeinfo[target_node_uuid].payload
|
|
665 | 671 |
|
666 | 672 |
if ((constants.HV_NODEINFO_KEY_VERSION in src_info) and |
667 | 673 |
(constants.HV_NODEINFO_KEY_VERSION in dst_info)): |
... | ... | |
674 | 680 |
|
675 | 681 |
self.feedback_fn("* checking disk consistency between source and target") |
676 | 682 |
for (idx, dev) in enumerate(instance.disks): |
677 |
if not CheckDiskConsistency(self.lu, instance, dev, target_node, False): |
|
683 |
if not CheckDiskConsistency(self.lu, instance, dev, target_node_uuid, |
|
684 |
False): |
|
678 | 685 |
raise errors.OpExecError("Disk %s is degraded or not fully" |
679 | 686 |
" synchronized on target node," |
680 | 687 |
" aborting migration" % idx) |
... | ... | |
684 | 691 |
raise errors.OpExecError("Memory ballooning not allowed and not enough" |
685 | 692 |
" free memory to fit instance %s on target" |
686 | 693 |
" node %s (have %dMB, need %dMB)" % |
687 |
(instance.name, target_node, |
|
694 |
(instance.name, |
|
695 |
self.cfg.GetNodeName(target_node_uuid), |
|
688 | 696 |
self.tgt_free_mem, self.current_mem)) |
689 | 697 |
self.feedback_fn("* setting instance memory to %s" % self.tgt_free_mem) |
690 | 698 |
rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node, |
... | ... | |
693 | 701 |
rpcres.Raise("Cannot modify instance runtime memory") |
694 | 702 |
|
695 | 703 |
# First get the migration information from the remote node |
696 |
result = self.rpc.call_migration_info(source_node, instance) |
|
704 |
result = self.rpc.call_migration_info(source_node_uuid, instance)
|
|
697 | 705 |
msg = result.fail_msg |
698 | 706 |
if msg: |
699 | 707 |
log_err = ("Failed fetching source migration information from %s: %s" % |
700 |
(source_node, msg))
|
|
708 |
(self.cfg.GetNodeName(source_node_uuid), msg))
|
|
701 | 709 |
logging.error(log_err) |
702 | 710 |
raise errors.OpExecError(log_err) |
703 | 711 |
|
... | ... | |
705 | 713 |
|
706 | 714 |
if self.instance.disk_template not in constants.DTS_EXT_MIRROR: |
707 | 715 |
# Then switch the disks to master/master mode |
708 |
self._EnsureSecondary(target_node) |
|
716 |
self._EnsureSecondary(target_node_uuid)
|
|
709 | 717 |
self._GoStandalone() |
710 | 718 |
self._GoReconnect(True) |
711 | 719 |
self._WaitUntilSync() |
712 | 720 |
|
713 |
self.feedback_fn("* preparing %s to accept the instance" % target_node) |
|
714 |
result = self.rpc.call_accept_instance(target_node, |
|
721 |
self.feedback_fn("* preparing %s to accept the instance" % |
|
722 |
self.cfg.GetNodeName(target_node_uuid)) |
|
723 |
result = self.rpc.call_accept_instance(target_node_uuid, |
|
715 | 724 |
instance, |
716 | 725 |
migration_info, |
717 |
self.nodes_ip[target_node]) |
|
726 |
self.nodes_ip[target_node_uuid])
|
|
718 | 727 |
|
719 | 728 |
msg = result.fail_msg |
720 | 729 |
if msg: |
... | ... | |
726 | 735 |
raise errors.OpExecError("Could not pre-migrate instance %s: %s" % |
727 | 736 |
(instance.name, msg)) |
728 | 737 |
|
729 |
self.feedback_fn("* migrating instance to %s" % target_node) |
|
738 |
self.feedback_fn("* migrating instance to %s" % |
|
739 |
self.cfg.GetNodeName(target_node_uuid)) |
|
730 | 740 |
cluster = self.cfg.GetClusterInfo() |
731 | 741 |
result = self.rpc.call_instance_migrate( |
732 |
source_node, cluster.cluster_name, instance, self.nodes_ip[target_node],
|
|
733 |
self.live) |
|
742 |
source_node_uuid, cluster.cluster_name, instance,
|
|
743 |
self.nodes_ip[target_node_uuid], self.live)
|
|
734 | 744 |
msg = result.fail_msg |
735 | 745 |
if msg: |
736 | 746 |
logging.error("Instance migration failed, trying to revert" |
... | ... | |
744 | 754 |
self.feedback_fn("* starting memory transfer") |
745 | 755 |
last_feedback = time.time() |
746 | 756 |
while True: |
747 |
result = self.rpc.call_instance_get_migration_status(source_node, |
|
757 |
result = self.rpc.call_instance_get_migration_status(source_node_uuid,
|
|
748 | 758 |
instance) |
749 | 759 |
msg = result.fail_msg |
750 | 760 |
ms = result.payload # MigrationStatus instance |
... | ... | |
772 | 782 |
|
773 | 783 |
time.sleep(self._MIGRATION_POLL_INTERVAL) |
774 | 784 |
|
775 |
result = self.rpc.call_instance_finalize_migration_src(source_node, |
|
785 |
result = self.rpc.call_instance_finalize_migration_src(source_node_uuid,
|
|
776 | 786 |
instance, |
777 | 787 |
True, |
778 | 788 |
self.live) |
... | ... | |
783 | 793 |
raise errors.OpExecError("Could not finalize instance migration: %s" % |
784 | 794 |
msg) |
785 | 795 |
|
786 |
instance.primary_node = target_node |
|
796 |
instance.primary_node = target_node_uuid
|
|
787 | 797 |
|
788 | 798 |
# distribute new instance config to the other nodes |
789 | 799 |
self.cfg.Update(instance, self.feedback_fn) |
790 | 800 |
|
791 |
result = self.rpc.call_instance_finalize_migration_dst(target_node, |
|
801 |
result = self.rpc.call_instance_finalize_migration_dst(target_node_uuid,
|
|
792 | 802 |
instance, |
793 | 803 |
migration_info, |
794 | 804 |
True) |
... | ... | |
800 | 810 |
msg) |
801 | 811 |
|
802 | 812 |
if self.instance.disk_template not in constants.DTS_EXT_MIRROR: |
803 |
self._EnsureSecondary(source_node) |
|
813 |
self._EnsureSecondary(source_node_uuid)
|
|
804 | 814 |
self._WaitUntilSync() |
805 | 815 |
self._GoStandalone() |
806 | 816 |
self._GoReconnect(False) |
... | ... | |
810 | 820 |
# successful migration, unmap the device from the source node. |
811 | 821 |
if self.instance.disk_template in (constants.DT_RBD, constants.DT_EXT): |
812 | 822 |
disks = ExpandCheckDisks(instance, instance.disks) |
813 |
self.feedback_fn("* unmapping instance's disks from %s" % source_node) |
|
823 |
self.feedback_fn("* unmapping instance's disks from %s" % |
|
824 |
self.cfg.GetNodeName(source_node_uuid)) |
|
814 | 825 |
for disk in disks: |
815 |
result = self.rpc.call_blockdev_shutdown(source_node, (disk, instance)) |
|
826 |
result = self.rpc.call_blockdev_shutdown(source_node_uuid, |
|
827 |
(disk, instance)) |
|
816 | 828 |
msg = result.fail_msg |
817 | 829 |
if msg: |
818 | 830 |
logging.error("Migration was successful, but couldn't unmap the" |
819 | 831 |
" block device %s on source node %s: %s", |
820 |
disk.iv_name, source_node, msg) |
|
832 |
disk.iv_name, self.cfg.GetNodeName(source_node_uuid), |
|
833 |
msg) |
|
821 | 834 |
logging.error("You need to unmap the device %s manually on %s", |
822 |
disk.iv_name, source_node)
|
|
835 |
disk.iv_name, self.cfg.GetNodeName(source_node_uuid))
|
|
823 | 836 |
|
824 | 837 |
self.feedback_fn("* done") |
825 | 838 |
|
... | ... | |
833 | 846 |
instance = self.instance |
834 | 847 |
primary_node = self.cfg.GetNodeInfo(instance.primary_node) |
835 | 848 |
|
836 |
source_node = instance.primary_node |
|
837 |
target_node = self.target_node
|
|
849 |
source_node_uuid = instance.primary_node
|
|
850 |
target_node_uuid = self.target_node_uuid
|
|
838 | 851 |
|
839 | 852 |
if instance.disks_active: |
840 | 853 |
self.feedback_fn("* checking disk consistency between source and target") |
841 | 854 |
for (idx, dev) in enumerate(instance.disks): |
842 | 855 |
# for drbd, these are drbd over lvm |
843 |
if not CheckDiskConsistency(self.lu, instance, dev, target_node, |
|
856 |
if not CheckDiskConsistency(self.lu, instance, dev, target_node_uuid,
|
|
844 | 857 |
False): |
845 | 858 |
if primary_node.offline: |
846 | 859 |
self.feedback_fn("Node %s is offline, ignoring degraded disk %s on" |
847 | 860 |
" target node %s" % |
848 |
(primary_node.name, idx, target_node)) |
|
861 |
(primary_node.name, idx, |
|
862 |
self.cfg.GetNodeName(target_node_uuid))) |
|
849 | 863 |
elif not self.ignore_consistency: |
850 | 864 |
raise errors.OpExecError("Disk %s is degraded on target node," |
851 | 865 |
" aborting failover" % idx) |
... | ... | |
855 | 869 |
|
856 | 870 |
self.feedback_fn("* shutting down instance on source node") |
857 | 871 |
logging.info("Shutting down instance %s on node %s", |
858 |
instance.name, source_node)
|
|
872 |
instance.name, self.cfg.GetNodeName(source_node_uuid))
|
|
859 | 873 |
|
860 |
result = self.rpc.call_instance_shutdown(source_node, instance, |
|
874 |
result = self.rpc.call_instance_shutdown(source_node_uuid, instance,
|
|
861 | 875 |
self.shutdown_timeout, |
862 | 876 |
self.lu.op.reason) |
863 | 877 |
msg = result.fail_msg |
... | ... | |
866 | 880 |
self.lu.LogWarning("Could not shutdown instance %s on node %s," |
867 | 881 |
" proceeding anyway; please make sure node" |
868 | 882 |
" %s is down; error details: %s", |
869 |
instance.name, source_node, source_node, msg) |
|
883 |
instance.name, |
|
884 |
self.cfg.GetNodeName(source_node_uuid), |
|
885 |
self.cfg.GetNodeName(source_node_uuid), msg) |
|
870 | 886 |
else: |
871 | 887 |
raise errors.OpExecError("Could not shutdown instance %s on" |
872 | 888 |
" node %s: %s" % |
873 |
(instance.name, source_node, msg)) |
|
889 |
(instance.name, |
|
890 |
self.cfg.GetNodeName(source_node_uuid), msg)) |
|
874 | 891 |
|
875 | 892 |
self.feedback_fn("* deactivating the instance's disks on source node") |
876 | 893 |
if not ShutdownInstanceDisks(self.lu, instance, ignore_primary=True): |
877 | 894 |
raise errors.OpExecError("Can't shut down the instance's disks") |
878 | 895 |
|
879 |
instance.primary_node = target_node |
|
896 |
instance.primary_node = target_node_uuid
|
|
880 | 897 |
# distribute new instance config to the other nodes |
881 | 898 |
self.cfg.Update(instance, self.feedback_fn) |
882 | 899 |
|
883 | 900 |
# Only start the instance if it's marked as up |
884 | 901 |
if instance.admin_state == constants.ADMINST_UP: |
885 | 902 |
self.feedback_fn("* activating the instance's disks on target node %s" % |
886 |
target_node)
|
|
903 |
self.cfg.GetNodeName(target_node_uuid))
|
|
887 | 904 |
logging.info("Starting instance %s on node %s", |
888 |
instance.name, target_node)
|
|
905 |
instance.name, self.cfg.GetNodeName(target_node_uuid))
|
|
889 | 906 |
|
890 | 907 |
disks_ok, _ = AssembleInstanceDisks(self.lu, instance, |
891 | 908 |
ignore_secondaries=True) |
... | ... | |
894 | 911 |
raise errors.OpExecError("Can't activate the instance's disks") |
895 | 912 |
|
896 | 913 |
self.feedback_fn("* starting the instance on the target node %s" % |
897 |
target_node) |
|
898 |
result = self.rpc.call_instance_start(target_node, (instance, None, None), |
|
899 |
False, self.lu.op.reason) |
|
914 |
self.cfg.GetNodeName(target_node_uuid)) |
|
915 |
result = self.rpc.call_instance_start(target_node_uuid, |
|
916 |
(instance, None, None), False, |
|
917 |
self.lu.op.reason) |
|
900 | 918 |
msg = result.fail_msg |
901 | 919 |
if msg: |
902 | 920 |
ShutdownInstanceDisks(self.lu, instance) |
903 | 921 |
raise errors.OpExecError("Could not start instance %s on node %s: %s" % |
904 |
(instance.name, target_node, msg)) |
|
922 |
(instance.name, |
|
923 |
self.cfg.GetNodeName(target_node_uuid), msg)) |
|
905 | 924 |
|
906 | 925 |
def Exec(self, feedback_fn): |
907 | 926 |
"""Perform the migration. |
908 | 927 |
|
909 | 928 |
""" |
910 | 929 |
self.feedback_fn = feedback_fn |
911 |
self.source_node = self.instance.primary_node |
|
930 |
self.source_node_uuid = self.instance.primary_node
|
|
912 | 931 |
|
913 | 932 |
# FIXME: if we implement migrate-to-any in DRBD, this needs fixing |
914 | 933 |
if self.instance.disk_template in constants.DTS_INT_MIRROR: |
915 |
self.target_node = self.instance.secondary_nodes[0] |
|
934 |
self.target_node_uuid = self.instance.secondary_nodes[0]
|
|
916 | 935 |
# Otherwise self.target_node has been populated either |
917 | 936 |
# directly, or through an iallocator. |
918 | 937 |
|
919 |
self.all_nodes = [self.source_node, self.target_node]
|
|
920 |
self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
|
|
921 |
in self.cfg.GetMultiNodeInfo(self.all_nodes)) |
|
938 |
self.all_node_uuids = [self.source_node_uuid, self.target_node_uuid]
|
|
939 |
self.nodes_ip = dict((uuid, node.secondary_ip) for (uuid, node)
|
|
940 |
in self.cfg.GetMultiNodeInfo(self.all_node_uuids))
|
|
922 | 941 |
|
923 | 942 |
if self.failover: |
924 | 943 |
feedback_fn("Failover instance %s" % self.instance.name) |
Also available in: Unified diff