3387 |
3387 |
(instance.name, target_node))
|
3388 |
3388 |
|
3389 |
3389 |
|
|
3390 |
class LUMigrateInstance(LogicalUnit):
|
|
3391 |
"""Migrate an instance.
|
|
3392 |
|
|
3393 |
This is migration without shutting down, compared to the failover,
|
|
3394 |
which is done with shutdown.
|
|
3395 |
|
|
3396 |
"""
|
|
3397 |
HPATH = "instance-migrate"
|
|
3398 |
HTYPE = constants.HTYPE_INSTANCE
|
|
3399 |
_OP_REQP = ["instance_name", "live", "cleanup"]
|
|
3400 |
|
|
3401 |
REQ_BGL = False
|
|
3402 |
|
|
3403 |
def ExpandNames(self):
|
|
3404 |
self._ExpandAndLockInstance()
|
|
3405 |
self.needed_locks[locking.LEVEL_NODE] = []
|
|
3406 |
self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
|
|
3407 |
|
|
3408 |
def DeclareLocks(self, level):
|
|
3409 |
if level == locking.LEVEL_NODE:
|
|
3410 |
self._LockInstancesNodes()
|
|
3411 |
|
|
3412 |
def BuildHooksEnv(self):
|
|
3413 |
"""Build hooks env.
|
|
3414 |
|
|
3415 |
This runs on master, primary and secondary nodes of the instance.
|
|
3416 |
|
|
3417 |
"""
|
|
3418 |
env = _BuildInstanceHookEnvByObject(self, self.instance)
|
|
3419 |
nl = [self.cfg.GetMasterNode()] + list(self.instance.secondary_nodes)
|
|
3420 |
return env, nl, nl
|
|
3421 |
|
|
3422 |
def CheckPrereq(self):
|
|
3423 |
"""Check prerequisites.
|
|
3424 |
|
|
3425 |
This checks that the instance is in the cluster.
|
|
3426 |
|
|
3427 |
"""
|
|
3428 |
instance = self.cfg.GetInstanceInfo(
|
|
3429 |
self.cfg.ExpandInstanceName(self.op.instance_name))
|
|
3430 |
if instance is None:
|
|
3431 |
raise errors.OpPrereqError("Instance '%s' not known" %
|
|
3432 |
self.op.instance_name)
|
|
3433 |
|
|
3434 |
if instance.disk_template != constants.DT_DRBD8:
|
|
3435 |
raise errors.OpPrereqError("Instance's disk layout is not"
|
|
3436 |
" drbd8, cannot migrate.")
|
|
3437 |
|
|
3438 |
secondary_nodes = instance.secondary_nodes
|
|
3439 |
if not secondary_nodes:
|
|
3440 |
raise errors.ProgrammerError("no secondary node but using "
|
|
3441 |
"drbd8 disk template")
|
|
3442 |
|
|
3443 |
i_be = self.cfg.GetClusterInfo().FillBE(instance)
|
|
3444 |
|
|
3445 |
target_node = secondary_nodes[0]
|
|
3446 |
# check memory requirements on the secondary node
|
|
3447 |
_CheckNodeFreeMemory(self, target_node, "migrating instance %s" %
|
|
3448 |
instance.name, i_be[constants.BE_MEMORY],
|
|
3449 |
instance.hypervisor)
|
|
3450 |
|
|
3451 |
# check bridge existance
|
|
3452 |
brlist = [nic.bridge for nic in instance.nics]
|
|
3453 |
result = self.rpc.call_bridges_exist(target_node, brlist)
|
|
3454 |
if result.failed or not result.data:
|
|
3455 |
raise errors.OpPrereqError("One or more target bridges %s does not"
|
|
3456 |
" exist on destination node '%s'" %
|
|
3457 |
(brlist, target_node))
|
|
3458 |
|
|
3459 |
if not self.op.cleanup:
|
|
3460 |
result = self.rpc.call_instance_migratable(instance.primary_node,
|
|
3461 |
instance)
|
|
3462 |
msg = result.RemoteFailMsg()
|
|
3463 |
if msg:
|
|
3464 |
raise errors.OpPrereqError("Can't migrate: %s - please use failover" %
|
|
3465 |
msg)
|
|
3466 |
|
|
3467 |
self.instance = instance
|
|
3468 |
|
|
3469 |
def _WaitUntilSync(self):
|
|
3470 |
"""Poll with custom rpc for disk sync.
|
|
3471 |
|
|
3472 |
This uses our own step-based rpc call.
|
|
3473 |
|
|
3474 |
"""
|
|
3475 |
self.feedback_fn("* wait until resync is done")
|
|
3476 |
all_done = False
|
|
3477 |
while not all_done:
|
|
3478 |
all_done = True
|
|
3479 |
result = self.rpc.call_drbd_wait_sync(self.all_nodes,
|
|
3480 |
self.nodes_ip,
|
|
3481 |
self.instance.disks)
|
|
3482 |
min_percent = 100
|
|
3483 |
for node, nres in result.items():
|
|
3484 |
msg = nres.RemoteFailMsg()
|
|
3485 |
if msg:
|
|
3486 |
raise errors.OpExecError("Cannot resync disks on node %s: %s" %
|
|
3487 |
(node, msg))
|
|
3488 |
node_done, node_percent = nres.data[1]
|
|
3489 |
all_done = all_done and node_done
|
|
3490 |
if node_percent is not None:
|
|
3491 |
min_percent = min(min_percent, node_percent)
|
|
3492 |
if not all_done:
|
|
3493 |
if min_percent < 100:
|
|
3494 |
self.feedback_fn(" - progress: %.1f%%" % min_percent)
|
|
3495 |
time.sleep(2)
|
|
3496 |
|
|
3497 |
def _EnsureSecondary(self, node):
|
|
3498 |
"""Demote a node to secondary.
|
|
3499 |
|
|
3500 |
"""
|
|
3501 |
self.feedback_fn("* switching node %s to secondary mode" % node)
|
|
3502 |
|
|
3503 |
for dev in self.instance.disks:
|
|
3504 |
self.cfg.SetDiskID(dev, node)
|
|
3505 |
|
|
3506 |
result = self.rpc.call_blockdev_close(node, self.instance.name,
|
|
3507 |
self.instance.disks)
|
|
3508 |
msg = result.RemoteFailMsg()
|
|
3509 |
if msg:
|
|
3510 |
raise errors.OpExecError("Cannot change disk to secondary on node %s,"
|
|
3511 |
" error %s" % (node, msg))
|
|
3512 |
|
|
3513 |
def _GoStandalone(self):
|
|
3514 |
"""Disconnect from the network.
|
|
3515 |
|
|
3516 |
"""
|
|
3517 |
self.feedback_fn("* changing into standalone mode")
|
|
3518 |
result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
|
|
3519 |
self.instance.disks)
|
|
3520 |
for node, nres in result.items():
|
|
3521 |
msg = nres.RemoteFailMsg()
|
|
3522 |
if msg:
|
|
3523 |
raise errors.OpExecError("Cannot disconnect disks node %s,"
|
|
3524 |
" error %s" % (node, msg))
|
|
3525 |
|
|
3526 |
def _GoReconnect(self, multimaster):
|
|
3527 |
"""Reconnect to the network.
|
|
3528 |
|
|
3529 |
"""
|
|
3530 |
if multimaster:
|
|
3531 |
msg = "dual-master"
|
|
3532 |
else:
|
|
3533 |
msg = "single-master"
|
|
3534 |
self.feedback_fn("* changing disks into %s mode" % msg)
|
|
3535 |
result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
|
|
3536 |
self.instance.disks,
|
|
3537 |
self.instance.name, multimaster)
|
|
3538 |
for node, nres in result.items():
|
|
3539 |
msg = nres.RemoteFailMsg()
|
|
3540 |
if msg:
|
|
3541 |
raise errors.OpExecError("Cannot change disks config on node %s,"
|
|
3542 |
" error: %s" % (node, msg))
|
|
3543 |
|
|
3544 |
def _ExecCleanup(self):
|
|
3545 |
"""Try to cleanup after a failed migration.
|
|
3546 |
|
|
3547 |
The cleanup is done by:
|
|
3548 |
- check that the instance is running only on one node
|
|
3549 |
(and update the config if needed)
|
|
3550 |
- change disks on its secondary node to secondary
|
|
3551 |
- wait until disks are fully synchronized
|
|
3552 |
- disconnect from the network
|
|
3553 |
- change disks into single-master mode
|
|
3554 |
- wait again until disks are fully synchronized
|
|
3555 |
|
|
3556 |
"""
|
|
3557 |
instance = self.instance
|
|
3558 |
target_node = self.target_node
|
|
3559 |
source_node = self.source_node
|
|
3560 |
|
|
3561 |
# check running on only one node
|
|
3562 |
self.feedback_fn("* checking where the instance actually runs"
|
|
3563 |
" (if this hangs, the hypervisor might be in"
|
|
3564 |
" a bad state)")
|
|
3565 |
ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
|
|
3566 |
for node, result in ins_l.items():
|
|
3567 |
result.Raise()
|
|
3568 |
if not isinstance(result.data, list):
|
|
3569 |
raise errors.OpExecError("Can't contact node '%s'" % node)
|
|
3570 |
|
|
3571 |
runningon_source = instance.name in ins_l[source_node].data
|
|
3572 |
runningon_target = instance.name in ins_l[target_node].data
|
|
3573 |
|
|
3574 |
if runningon_source and runningon_target:
|
|
3575 |
raise errors.OpExecError("Instance seems to be running on two nodes,"
|
|
3576 |
" or the hypervisor is confused. You will have"
|
|
3577 |
" to ensure manually that it runs only on one"
|
|
3578 |
" and restart this operation.")
|
|
3579 |
|
|
3580 |
if not (runningon_source or runningon_target):
|
|
3581 |
raise errors.OpExecError("Instance does not seem to be running at all."
|
|
3582 |
" In this case, it's safer to repair by"
|
|
3583 |
" running 'gnt-instance stop' to ensure disk"
|
|
3584 |
" shutdown, and then restarting it.")
|
|
3585 |
|
|
3586 |
if runningon_target:
|
|
3587 |
# the migration has actually succeeded, we need to update the config
|
|
3588 |
self.feedback_fn("* instance running on secondary node (%s),"
|
|
3589 |
" updating config" % target_node)
|
|
3590 |
instance.primary_node = target_node
|
|
3591 |
self.cfg.Update(instance)
|
|
3592 |
demoted_node = source_node
|
|
3593 |
else:
|
|
3594 |
self.feedback_fn("* instance confirmed to be running on its"
|
|
3595 |
" primary node (%s)" % source_node)
|
|
3596 |
demoted_node = target_node
|
|
3597 |
|
|
3598 |
self._EnsureSecondary(demoted_node)
|
|
3599 |
try:
|
|
3600 |
self._WaitUntilSync()
|
|
3601 |
except errors.OpExecError:
|
|
3602 |
# we ignore here errors, since if the device is standalone, it
|
|
3603 |
# won't be able to sync
|
|
3604 |
pass
|
|
3605 |
self._GoStandalone()
|
|
3606 |
self._GoReconnect(False)
|
|
3607 |
self._WaitUntilSync()
|
|
3608 |
|
|
3609 |
self.feedback_fn("* done")
|
|
3610 |
|
|
3611 |
def _ExecMigration(self):
|
|
3612 |
"""Migrate an instance.
|
|
3613 |
|
|
3614 |
The migrate is done by:
|
|
3615 |
- change the disks into dual-master mode
|
|
3616 |
- wait until disks are fully synchronized again
|
|
3617 |
- migrate the instance
|
|
3618 |
- change disks on the new secondary node (the old primary) to secondary
|
|
3619 |
- wait until disks are fully synchronized
|
|
3620 |
- change disks into single-master mode
|
|
3621 |
|
|
3622 |
"""
|
|
3623 |
instance = self.instance
|
|
3624 |
target_node = self.target_node
|
|
3625 |
source_node = self.source_node
|
|
3626 |
|
|
3627 |
self.feedback_fn("* checking disk consistency between source and target")
|
|
3628 |
for dev in instance.disks:
|
|
3629 |
if not _CheckDiskConsistency(self, dev, target_node, False):
|
|
3630 |
raise errors.OpExecError("Disk %s is degraded or not fully"
|
|
3631 |
" synchronized on target node,"
|
|
3632 |
" aborting migrate." % dev.iv_name)
|
|
3633 |
|
|
3634 |
self._EnsureSecondary(target_node)
|
|
3635 |
self._GoStandalone()
|
|
3636 |
self._GoReconnect(True)
|
|
3637 |
self._WaitUntilSync()
|
|
3638 |
|
|
3639 |
self.feedback_fn("* migrating instance to %s" % target_node)
|
|
3640 |
time.sleep(10)
|
|
3641 |
result = self.rpc.call_instance_migrate(source_node, instance,
|
|
3642 |
self.nodes_ip[target_node],
|
|
3643 |
self.op.live)
|
|
3644 |
msg = result.RemoteFailMsg()
|
|
3645 |
if msg:
|
|
3646 |
logging.error("Instance migration failed, trying to revert"
|
|
3647 |
" disk status: %s", msg)
|
|
3648 |
try:
|
|
3649 |
self._EnsureSecondary(target_node)
|
|
3650 |
self._GoStandalone()
|
|
3651 |
self._GoReconnect(False)
|
|
3652 |
self._WaitUntilSync()
|
|
3653 |
except errors.OpExecError, err:
|
|
3654 |
self.LogWarning("Migration failed and I can't reconnect the"
|
|
3655 |
" drives: error '%s'\n"
|
|
3656 |
"Please look and recover the instance status" %
|
|
3657 |
str(err))
|
|
3658 |
|
|
3659 |
raise errors.OpExecError("Could not migrate instance %s: %s" %
|
|
3660 |
(instance.name, msg))
|
|
3661 |
time.sleep(10)
|
|
3662 |
|
|
3663 |
instance.primary_node = target_node
|
|
3664 |
# distribute new instance config to the other nodes
|
|
3665 |
self.cfg.Update(instance)
|
|
3666 |
|
|
3667 |
self._EnsureSecondary(source_node)
|
|
3668 |
self._WaitUntilSync()
|
|
3669 |
self._GoStandalone()
|
|
3670 |
self._GoReconnect(False)
|
|
3671 |
self._WaitUntilSync()
|
|
3672 |
|
|
3673 |
self.feedback_fn("* done")
|
|
3674 |
|
|
3675 |
def Exec(self, feedback_fn):
|
|
3676 |
"""Perform the migration.
|
|
3677 |
|
|
3678 |
"""
|
|
3679 |
self.feedback_fn = feedback_fn
|
|
3680 |
|
|
3681 |
self.source_node = self.instance.primary_node
|
|
3682 |
self.target_node = self.instance.secondary_nodes[0]
|
|
3683 |
self.all_nodes = [self.source_node, self.target_node]
|
|
3684 |
self.nodes_ip = {
|
|
3685 |
self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
|
|
3686 |
self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
|
|
3687 |
}
|
|
3688 |
if self.op.cleanup:
|
|
3689 |
return self._ExecCleanup()
|
|
3690 |
else:
|
|
3691 |
return self._ExecMigration()
|
|
3692 |
|
|
3693 |
|
3390 |
3694 |
def _CreateBlockDevOnPrimary(lu, node, instance, device, info):
|
3391 |
3695 |
"""Create a tree of block devices on the primary node.
|
3392 |
3696 |
|