+ self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
+ self.op.iallocator, self.op.remote_node,
+ self.op.disks, False, self.op.early_release)
+
+ self.tasklets = [self.replacer]
+
+ def DeclareLocks(self, level):
+ # If we're not already locking all nodes in the set we have to declare the
+ # instance's primary/secondary nodes.
+ if (level == locking.LEVEL_NODE and
+ self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
+ self._LockInstancesNodes()
+
+ def BuildHooksEnv(self):
+ """Build hooks env.
+
+ This runs on the master, the primary and all the secondaries.
+
+ """
+ instance = self.replacer.instance
+ env = {
+ "MODE": self.op.mode,
+ "NEW_SECONDARY": self.op.remote_node,
+ "OLD_SECONDARY": instance.secondary_nodes[0],
+ }
+ env.update(_BuildInstanceHookEnvByObject(self, instance))
+ nl = [
+ self.cfg.GetMasterNode(),
+ instance.primary_node,
+ ]
+ if self.op.remote_node is not None:
+ nl.append(self.op.remote_node)
+ return env, nl, nl
+
+
+class LUEvacuateNode(LogicalUnit):
+ """Relocate the secondary instances from a node.
+
+ """
+ HPATH = "node-evacuate"
+ HTYPE = constants.HTYPE_NODE
+ _OP_REQP = ["node_name"]
+ REQ_BGL = False
+
+ def CheckArguments(self):
+ if not hasattr(self.op, "remote_node"):
+ self.op.remote_node = None
+ if not hasattr(self.op, "iallocator"):
+ self.op.iallocator = None
+ if not hasattr(self.op, "early_release"):
+ self.op.early_release = False
+
+ TLReplaceDisks.CheckArguments(constants.REPLACE_DISK_CHG,
+ self.op.remote_node,
+ self.op.iallocator)
+
+ def ExpandNames(self):
+ self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
+
+ self.needed_locks = {}
+
+ # Declare node locks
+ if self.op.iallocator is not None:
+ self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
+
+ elif self.op.remote_node is not None:
+ self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
+
+ # Warning: do not remove the locking of the new secondary here
+ # unless DRBD8.AddChildren is changed to work in parallel;
+ # currently it doesn't since parallel invocations of
+ # FindUnusedMinor will conflict
+ self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
+ self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
+
+ else:
+ raise errors.OpPrereqError("Invalid parameters", errors.ECODE_INVAL)
+
+ # Create tasklets for replacing disks for all secondary instances on this
+ # node
+ names = []
+ tasklets = []
+
+ for inst in _GetNodeSecondaryInstances(self.cfg, self.op.node_name):
+ logging.debug("Replacing disks for instance %s", inst.name)
+ names.append(inst.name)
+
+ replacer = TLReplaceDisks(self, inst.name, constants.REPLACE_DISK_CHG,
+ self.op.iallocator, self.op.remote_node, [],
+ True, self.op.early_release)
+ tasklets.append(replacer)
+
+ self.tasklets = tasklets
+ self.instance_names = names
+
+ # Declare instance locks
+ self.needed_locks[locking.LEVEL_INSTANCE] = self.instance_names
+
+ def DeclareLocks(self, level):
+ # If we're not already locking all nodes in the set we have to declare the
+ # instance's primary/secondary nodes.
+ if (level == locking.LEVEL_NODE and
+ self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
+ self._LockInstancesNodes()
+
+ def BuildHooksEnv(self):
+ """Build hooks env.
+
+ This runs on the master, the primary and all the secondaries.
+
+ """
+ env = {
+ "NODE_NAME": self.op.node_name,
+ }
+
+ nl = [self.cfg.GetMasterNode()]
+
+ if self.op.remote_node is not None:
+ env["NEW_SECONDARY"] = self.op.remote_node
+ nl.append(self.op.remote_node)
+
+ return (env, nl, nl)
+
+
+class TLReplaceDisks(Tasklet):
+ """Replaces disks for an instance.
+
+ Note: Locking is not within the scope of this class.
+
+ """
+ def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
+ disks, delay_iallocator, early_release):
+ """Initializes this class.
+
+ """
+ Tasklet.__init__(self, lu)
+
+ # Parameters
+ self.instance_name = instance_name
+ self.mode = mode
+ self.iallocator_name = iallocator_name
+ self.remote_node = remote_node
+ self.disks = disks
+ self.delay_iallocator = delay_iallocator
+ self.early_release = early_release
+
+ # Runtime data
+ self.instance = None
+ self.new_node = None
+ self.target_node = None
+ self.other_node = None
+ self.remote_node_info = None
+ self.node_secondary_ip = None
+
+ @staticmethod
+ def CheckArguments(mode, remote_node, iallocator):
+ """Helper function for users of this class.
+
+ """
+ # check for valid parameter combination
+ if mode == constants.REPLACE_DISK_CHG:
+ if remote_node is None and iallocator is None:
+ raise errors.OpPrereqError("When changing the secondary either an"
+ " iallocator script must be used or the"
+ " new node given", errors.ECODE_INVAL)
+
+ if remote_node is not None and iallocator is not None:
+ raise errors.OpPrereqError("Give either the iallocator or the new"
+ " secondary, not both", errors.ECODE_INVAL)
+
+ elif remote_node is not None or iallocator is not None:
+ # Not replacing the secondary
+ raise errors.OpPrereqError("The iallocator and new node options can"
+ " only be used when changing the"
+ " secondary node", errors.ECODE_INVAL)
+
+ @staticmethod
+ def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
+ """Compute a new secondary node using an IAllocator.
+
+ """
+ ial = IAllocator(lu.cfg, lu.rpc,
+ mode=constants.IALLOCATOR_MODE_RELOC,
+ name=instance_name,
+ relocate_from=relocate_from)
+
+ ial.Run(iallocator_name)
+
+ if not ial.success:
+ raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
+ " %s" % (iallocator_name, ial.info),
+ errors.ECODE_NORES)
+
+ if len(ial.nodes) != ial.required_nodes:
+ raise errors.OpPrereqError("iallocator '%s' returned invalid number"
+ " of nodes (%s), required %s" %
+ (iallocator_name,
+ len(ial.nodes), ial.required_nodes),
+ errors.ECODE_FAULT)
+
+ remote_node_name = ial.nodes[0]
+
+ lu.LogInfo("Selected new secondary for instance '%s': %s",
+ instance_name, remote_node_name)
+
+ return remote_node_name
+
+ def _FindFaultyDisks(self, node_name):
+ return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
+ node_name, True)
+
+ def CheckPrereq(self):
+ """Check prerequisites.
+
+ This checks that the instance is in the cluster.
+
+ """
+ self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
+ assert instance is not None, \
+ "Cannot retrieve locked instance %s" % self.instance_name
+
+ if instance.disk_template != constants.DT_DRBD8:
+ raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
+ " instances", errors.ECODE_INVAL)
+
+ if len(instance.secondary_nodes) != 1:
+ raise errors.OpPrereqError("The instance has a strange layout,"
+ " expected one secondary but found %d" %
+ len(instance.secondary_nodes),
+ errors.ECODE_FAULT)
+
+ if not self.delay_iallocator:
+ self._CheckPrereq2()
+
+ def _CheckPrereq2(self):
+ """Check prerequisites, second part.
+
+ This function should always be part of CheckPrereq. It was separated and is
+ now called from Exec because during node evacuation iallocator was only
+ called with an unmodified cluster model, not taking planned changes into
+ account.
+
+ """
+ instance = self.instance
+ secondary_node = instance.secondary_nodes[0]
+
+ if self.iallocator_name is None:
+ remote_node = self.remote_node
+ else:
+ remote_node = self._RunAllocator(self.lu, self.iallocator_name,
+ instance.name, instance.secondary_nodes)
+
+ if remote_node is not None:
+ self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
+ assert self.remote_node_info is not None, \
+ "Cannot retrieve locked node %s" % remote_node
+ else:
+ self.remote_node_info = None
+
+ if remote_node == self.instance.primary_node:
+ raise errors.OpPrereqError("The specified node is the primary node of"
+ " the instance.", errors.ECODE_INVAL)
+
+ if remote_node == secondary_node:
+ raise errors.OpPrereqError("The specified node is already the"
+ " secondary node of the instance.",
+ errors.ECODE_INVAL)
+
+ if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
+ constants.REPLACE_DISK_CHG):
+ raise errors.OpPrereqError("Cannot specify disks to be replaced",
+ errors.ECODE_INVAL)
+
+ if self.mode == constants.REPLACE_DISK_AUTO:
+ faulty_primary = self._FindFaultyDisks(instance.primary_node)
+ faulty_secondary = self._FindFaultyDisks(secondary_node)
+
+ if faulty_primary and faulty_secondary:
+ raise errors.OpPrereqError("Instance %s has faulty disks on more than"
+ " one node and can not be repaired"
+ " automatically" % self.instance_name,
+ errors.ECODE_STATE)
+
+ if faulty_primary:
+ self.disks = faulty_primary
+ self.target_node = instance.primary_node
+ self.other_node = secondary_node
+ check_nodes = [self.target_node, self.other_node]
+ elif faulty_secondary:
+ self.disks = faulty_secondary
+ self.target_node = secondary_node
+ self.other_node = instance.primary_node
+ check_nodes = [self.target_node, self.other_node]
+ else:
+ self.disks = []
+ check_nodes = []
+
+ else:
+ # Non-automatic modes
+ if self.mode == constants.REPLACE_DISK_PRI:
+ self.target_node = instance.primary_node
+ self.other_node = secondary_node
+ check_nodes = [self.target_node, self.other_node]
+
+ elif self.mode == constants.REPLACE_DISK_SEC:
+ self.target_node = secondary_node
+ self.other_node = instance.primary_node
+ check_nodes = [self.target_node, self.other_node]
+
+ elif self.mode == constants.REPLACE_DISK_CHG:
+ self.new_node = remote_node
+ self.other_node = instance.primary_node
+ self.target_node = secondary_node
+ check_nodes = [self.new_node, self.other_node]
+
+ _CheckNodeNotDrained(self.lu, remote_node)
+
+ old_node_info = self.cfg.GetNodeInfo(secondary_node)
+ assert old_node_info is not None
+ if old_node_info.offline and not self.early_release:
+ # doesn't make sense to delay the release
+ self.early_release = True
+ self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
+ " early-release mode", secondary_node)
+
+ else:
+ raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
+ self.mode)
+
+ # If not specified all disks should be replaced
+ if not self.disks:
+ self.disks = range(len(self.instance.disks))
+
+ for node in check_nodes:
+ _CheckNodeOnline(self.lu, node)
+
+ # Check whether disks are valid
+ for disk_idx in self.disks:
+ instance.FindDisk(disk_idx)
+
+ # Get secondary node IP addresses
+ node_2nd_ip = {}
+
+ for node_name in [self.target_node, self.other_node, self.new_node]:
+ if node_name is not None:
+ node_2nd_ip[node_name] = self.cfg.GetNodeInfo(node_name).secondary_ip
+
+ self.node_secondary_ip = node_2nd_ip
+
+ def Exec(self, feedback_fn):
+ """Execute disk replacement.
+
+ This dispatches the disk replacement to the appropriate handler.
+
+ """
+ if self.delay_iallocator:
+ self._CheckPrereq2()
+
+ if not self.disks:
+ feedback_fn("No disks need replacement")
+ return
+
+ feedback_fn("Replacing disk(s) %s for %s" %
+ (utils.CommaJoin(self.disks), self.instance.name))
+
+ activate_disks = (not self.instance.admin_up)
+
+ # Activate the instance disks if we're replacing them on a down instance
+ if activate_disks:
+ _StartInstanceDisks(self.lu, self.instance, True)
+
+ try:
+ # Should we replace the secondary node?
+ if self.new_node is not None:
+ fn = self._ExecDrbd8Secondary
+ else:
+ fn = self._ExecDrbd8DiskOnly
+
+ return fn(feedback_fn)
+
+ finally:
+ # Deactivate the instance disks if we're replacing them on a
+ # down instance
+ if activate_disks:
+ _SafeShutdownInstanceDisks(self.lu, self.instance)
+
+ def _CheckVolumeGroup(self, nodes):
+ self.lu.LogInfo("Checking volume groups")
+
+ vgname = self.cfg.GetVGName()
+
+ # Make sure volume group exists on all involved nodes
+ results = self.rpc.call_vg_list(nodes)
+ if not results:
+ raise errors.OpExecError("Can't list volume groups on the nodes")
+
+ for node in nodes:
+ res = results[node]
+ res.Raise("Error checking node %s" % node)
+ if vgname not in res.payload:
+ raise errors.OpExecError("Volume group '%s' not found on node %s" %
+ (vgname, node))
+
+ def _CheckDisksExistence(self, nodes):
+ # Check disk existence
+ for idx, dev in enumerate(self.instance.disks):
+ if idx not in self.disks:
+ continue
+
+ for node in nodes:
+ self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
+ self.cfg.SetDiskID(dev, node)
+
+ result = self.rpc.call_blockdev_find(node, dev)
+
+ msg = result.fail_msg
+ if msg or not result.payload:
+ if not msg:
+ msg = "disk not found"
+ raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
+ (idx, node, msg))
+
+ def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
+ for idx, dev in enumerate(self.instance.disks):
+ if idx not in self.disks:
+ continue