self.op.remote_node = None
if not hasattr(self.op, "iallocator"):
self.op.iallocator = None
+ if not hasattr(self.op, "early_release"):
+ self.op.early_release = False
TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
self.op.iallocator)
self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
self.op.iallocator, self.op.remote_node,
- self.op.disks, False)
+ self.op.disks, False, self.op.early_release)
self.tasklets = [self.replacer]
self.op.remote_node = None
if not hasattr(self.op, "iallocator"):
self.op.iallocator = None
+ if not hasattr(self.op, "early_release"):
+ self.op.early_release = False
TLReplaceDisks.CheckArguments(constants.REPLACE_DISK_CHG,
self.op.remote_node,
replacer = TLReplaceDisks(self, inst.name, constants.REPLACE_DISK_CHG,
self.op.iallocator, self.op.remote_node, [],
- True)
+ True, self.op.early_release)
tasklets.append(replacer)
self.tasklets = tasklets
"""
def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
- disks, delay_iallocator):
+ disks, delay_iallocator, early_release):
"""Initializes this class.
"""
self.remote_node = remote_node
self.disks = disks
self.delay_iallocator = delay_iallocator
+ self.early_release = early_release
# Runtime data
self.instance = None
self.lu.LogWarning("Can't remove old LV: %s" % msg,
hint="remove unused LVs manually")
+ def _ReleaseNodeLock(self, node_name):
+ """Releases the lock for a given node."""
+ self.lu.context.glm.release(locking.LEVEL_NODE, node_name)
+
def _ExecDrbd8DiskOnly(self, feedback_fn):
"""Replace a disk on the primary or secondary for DRBD 8.
self.cfg.Update(self.instance, feedback_fn)
+ cstep = 5
+ if self.early_release:
+ self.lu.LogStep(cstep, steps_total, "Removing old storage")
+ cstep += 1
+ self._RemoveOldStorage(self.target_node, iv_names)
+ # only release the lock if we're doing secondary replace, since
+ # we use the primary node later
+ if self.target_node != self.instance.primary_node:
+ self._ReleaseNodeLock(self.target_node)
+
# Wait for sync
# This can fail as the old devices are degraded and _WaitForSync
# does a combined result over all disks, so we don't check its return value
- self.lu.LogStep(5, steps_total, "Sync devices")
+ self.lu.LogStep(cstep, steps_total, "Sync devices")
+ cstep += 1
_WaitForSync(self.lu, self.instance)
# Check all devices manually
self._CheckDevices(self.instance.primary_node, iv_names)
# Step: remove old storage
- self.lu.LogStep(6, steps_total, "Removing old storage")
- self._RemoveOldStorage(self.target_node, iv_names)
+ if not self.early_release:
+ self.lu.LogStep(cstep, steps_total, "Removing old storage")
+ cstep += 1
+ self._RemoveOldStorage(self.target_node, iv_names)
def _ExecDrbd8Secondary(self, feedback_fn):
"""Replace the secondary node for DRBD 8.
to_node, msg,
hint=("please do a gnt-instance info to see the"
" status of disks"))
+ cstep = 5
+ if self.early_release:
+ self.lu.LogStep(cstep, steps_total, "Removing old storage")
+ cstep += 1
+ self._RemoveOldStorage(self.target_node, iv_names)
+ self._ReleaseNodeLock([self.target_node, self.new_node])
# Wait for sync
# This can fail as the old devices are degraded and _WaitForSync
# does a combined result over all disks, so we don't check its return value
- self.lu.LogStep(5, steps_total, "Sync devices")
+ self.lu.LogStep(cstep, steps_total, "Sync devices")
+ cstep += 1
_WaitForSync(self.lu, self.instance)
# Check all devices manually
self._CheckDevices(self.instance.primary_node, iv_names)
# Step: remove old storage
- self.lu.LogStep(6, steps_total, "Removing old storage")
- self._RemoveOldStorage(self.target_node, iv_names)
+ if not self.early_release:
+ self.lu.LogStep(cstep, steps_total, "Removing old storage")
+ self._RemoveOldStorage(self.target_node, iv_names)
class LURepairNodeStorage(NoHooksLU):
<cmdsynopsis>
<command>replace-disks</command>
<arg>--submit</arg>
+ <arg>--early-release</arg>
<arg choice="req">-p</arg>
<arg>--disks <replaceable>idx</replaceable></arg>
<arg choice="req"><replaceable>instance</replaceable></arg>
<cmdsynopsis>
<command>replace-disks</command>
<arg>--submit</arg>
+ <arg>--early-release</arg>
<arg choice="req">-s</arg>
<arg>--disks <replaceable>idx</replaceable></arg>
<arg choice="req"><replaceable>instance</replaceable></arg>
<cmdsynopsis>
<command>replace-disks</command>
<arg>--submit</arg>
+ <arg>--early-release</arg>
<group choice="req">
<arg>--iallocator <replaceable>name</replaceable></arg>
<arg>--new-secondary <replaceable>NODE</replaceable></arg>
<cmdsynopsis>
<command>replace-disks</command>
<arg>--submit</arg>
+ <arg>--early-release</arg>
<arg choice="req">--auto</arg>
<arg choice="req"><replaceable>instance</replaceable></arg>
</cmdsynopsis>
</para>
<para>
+ The <option>--early-release</option> changes the code so
+ that the old storage on secondary node(s) is removed early
+ (before the resync is completed) and the internal Ganeti
+ locks for the current (and new, if any) secondary node are
+ also released, thus allowing more parallelism in the cluster
+ operation. This should be used only when recovering from a
+ disk failure on the current secondary (thus the old storage
+ is already broken) or when the storage on the primary node
+ is known to be fine (thus we won't need the old storage for
+ potential recovery).
+ </para>
+
+ <para>
Note that it is not possible to select an offline or drained
node as a new secondary.
</para>
cli.VERBOSE_OPT,
cli.NOIPCHECK_OPT,
cli.NONAMECHECK_OPT,
+ cli.EARLY_RELEASE_OPT,
cli.cli_option("--no-replace1", dest="do_replace1",
help="Skip disk replacement with the same secondary",
action="store_false", default=True),
for mode in constants.REPLACE_DISK_SEC, constants.REPLACE_DISK_PRI:
op = opcodes.OpReplaceDisks(instance_name=instance,
mode=mode,
- disks=[i for i in range(self.disk_count)])
+ disks=[i for i in range(self.disk_count)],
+ early_release=self.opts.early_release)
Log("run %s" % mode, indent=2)
ops.append(op)
self.ExecOrQueue(instance, *ops) # pylint: disable-msg=W0142
mode=mode,
remote_node=tnode,
iallocator=self.opts.iallocator,
- disks=[])
+ disks=[],
+ early_release=self.opts.early_release)
Log("run %s %s" % (mode, msg), indent=2)
self.ExecOrQueue(instance, op)