Revision 7ea7bcf6
b/lib/cli.py | ||
---|---|---|
56 | 56 |
"DISK_OPT", |
57 | 57 |
"DISK_TEMPLATE_OPT", |
58 | 58 |
"DRAINED_OPT", |
59 |
"EARLY_RELEASE_OPT", |
|
59 | 60 |
"ENABLED_HV_OPT", |
60 | 61 |
"ERROR_CODES_OPT", |
61 | 62 |
"FIELDS_OPT", |
... | ... | |
837 | 838 |
default=constants.DEFAULT_SHUTDOWN_TIMEOUT, |
838 | 839 |
help="Maximum time to wait for instance shutdown") |
839 | 840 |
|
841 |
EARLY_RELEASE_OPT = cli_option("--early-release", |
|
842 |
dest="early_release", default=False, |
|
843 |
action="store_true", |
|
844 |
help="Release the locks on the secondary" |
|
845 |
" node(s) early") |
|
846 |
|
|
840 | 847 |
|
841 | 848 |
def _ParseArgs(argv, commands, aliases): |
842 | 849 |
"""Parser for the command line arguments. |
b/lib/cmdlib.py | ||
---|---|---|
6332 | 6332 |
self.op.remote_node = None |
6333 | 6333 |
if not hasattr(self.op, "iallocator"): |
6334 | 6334 |
self.op.iallocator = None |
6335 |
if not hasattr(self.op, "early_release"): |
|
6336 |
self.op.early_release = False |
|
6335 | 6337 |
|
6336 | 6338 |
TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node, |
6337 | 6339 |
self.op.iallocator) |
... | ... | |
6363 | 6365 |
|
6364 | 6366 |
self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode, |
6365 | 6367 |
self.op.iallocator, self.op.remote_node, |
6366 |
self.op.disks, False) |
|
6368 |
self.op.disks, False, self.op.early_release)
|
|
6367 | 6369 |
|
6368 | 6370 |
self.tasklets = [self.replacer] |
6369 | 6371 |
|
... | ... | |
6410 | 6412 |
self.op.remote_node = None |
6411 | 6413 |
if not hasattr(self.op, "iallocator"): |
6412 | 6414 |
self.op.iallocator = None |
6415 |
if not hasattr(self.op, "early_release"): |
|
6416 |
self.op.early_release = False |
|
6413 | 6417 |
|
6414 | 6418 |
TLReplaceDisks.CheckArguments(constants.REPLACE_DISK_CHG, |
6415 | 6419 |
self.op.remote_node, |
... | ... | |
6456 | 6460 |
|
6457 | 6461 |
replacer = TLReplaceDisks(self, inst.name, constants.REPLACE_DISK_CHG, |
6458 | 6462 |
self.op.iallocator, self.op.remote_node, [], |
6459 |
True) |
|
6463 |
True, self.op.early_release)
|
|
6460 | 6464 |
tasklets.append(replacer) |
6461 | 6465 |
|
6462 | 6466 |
self.tasklets = tasklets |
... | ... | |
6498 | 6502 |
|
6499 | 6503 |
""" |
6500 | 6504 |
def __init__(self, lu, instance_name, mode, iallocator_name, remote_node, |
6501 |
disks, delay_iallocator): |
|
6505 |
disks, delay_iallocator, early_release):
|
|
6502 | 6506 |
"""Initializes this class. |
6503 | 6507 |
|
6504 | 6508 |
""" |
... | ... | |
6511 | 6515 |
self.remote_node = remote_node |
6512 | 6516 |
self.disks = disks |
6513 | 6517 |
self.delay_iallocator = delay_iallocator |
6518 |
self.early_release = early_release |
|
6514 | 6519 |
|
6515 | 6520 |
# Runtime data |
6516 | 6521 |
self.instance = None |
... | ... | |
6853 | 6858 |
self.lu.LogWarning("Can't remove old LV: %s" % msg, |
6854 | 6859 |
hint="remove unused LVs manually") |
6855 | 6860 |
|
6861 |
def _ReleaseNodeLock(self, node_name): |
|
6862 |
"""Releases the lock for a given node.""" |
|
6863 |
self.lu.context.glm.release(locking.LEVEL_NODE, node_name) |
|
6864 |
|
|
6856 | 6865 |
def _ExecDrbd8DiskOnly(self, feedback_fn): |
6857 | 6866 |
"""Replace a disk on the primary or secondary for DRBD 8. |
6858 | 6867 |
|
... | ... | |
6963 | 6972 |
|
6964 | 6973 |
self.cfg.Update(self.instance, feedback_fn) |
6965 | 6974 |
|
6975 |
cstep = 5 |
|
6976 |
if self.early_release: |
|
6977 |
self.lu.LogStep(cstep, steps_total, "Removing old storage") |
|
6978 |
cstep += 1 |
|
6979 |
self._RemoveOldStorage(self.target_node, iv_names) |
|
6980 |
# only release the lock if we're doing secondary replace, since |
|
6981 |
# we use the primary node later |
|
6982 |
if self.target_node != self.instance.primary_node: |
|
6983 |
self._ReleaseNodeLock(self.target_node) |
|
6984 |
|
|
6966 | 6985 |
# Wait for sync |
6967 | 6986 |
# This can fail as the old devices are degraded and _WaitForSync |
6968 | 6987 |
# does a combined result over all disks, so we don't check its return value |
6969 |
self.lu.LogStep(5, steps_total, "Sync devices") |
|
6988 |
self.lu.LogStep(cstep, steps_total, "Sync devices") |
|
6989 |
cstep += 1 |
|
6970 | 6990 |
_WaitForSync(self.lu, self.instance) |
6971 | 6991 |
|
6972 | 6992 |
# Check all devices manually |
6973 | 6993 |
self._CheckDevices(self.instance.primary_node, iv_names) |
6974 | 6994 |
|
6975 | 6995 |
# Step: remove old storage |
6976 |
self.lu.LogStep(6, steps_total, "Removing old storage") |
|
6977 |
self._RemoveOldStorage(self.target_node, iv_names) |
|
6996 |
if not self.early_release: |
|
6997 |
self.lu.LogStep(cstep, steps_total, "Removing old storage") |
|
6998 |
cstep += 1 |
|
6999 |
self._RemoveOldStorage(self.target_node, iv_names) |
|
6978 | 7000 |
|
6979 | 7001 |
def _ExecDrbd8Secondary(self, feedback_fn): |
6980 | 7002 |
"""Replace the secondary node for DRBD 8. |
... | ... | |
7108 | 7130 |
to_node, msg, |
7109 | 7131 |
hint=("please do a gnt-instance info to see the" |
7110 | 7132 |
" status of disks")) |
7133 |
cstep = 5 |
|
7134 |
if self.early_release: |
|
7135 |
self.lu.LogStep(cstep, steps_total, "Removing old storage") |
|
7136 |
cstep += 1 |
|
7137 |
self._RemoveOldStorage(self.target_node, iv_names) |
|
7138 |
self._ReleaseNodeLock([self.target_node, self.new_node]) |
|
7111 | 7139 |
|
7112 | 7140 |
# Wait for sync |
7113 | 7141 |
# This can fail as the old devices are degraded and _WaitForSync |
7114 | 7142 |
# does a combined result over all disks, so we don't check its return value |
7115 |
self.lu.LogStep(5, steps_total, "Sync devices") |
|
7143 |
self.lu.LogStep(cstep, steps_total, "Sync devices") |
|
7144 |
cstep += 1 |
|
7116 | 7145 |
_WaitForSync(self.lu, self.instance) |
7117 | 7146 |
|
7118 | 7147 |
# Check all devices manually |
7119 | 7148 |
self._CheckDevices(self.instance.primary_node, iv_names) |
7120 | 7149 |
|
7121 | 7150 |
# Step: remove old storage |
7122 |
self.lu.LogStep(6, steps_total, "Removing old storage") |
|
7123 |
self._RemoveOldStorage(self.target_node, iv_names) |
|
7151 |
if not self.early_release: |
|
7152 |
self.lu.LogStep(cstep, steps_total, "Removing old storage") |
|
7153 |
self._RemoveOldStorage(self.target_node, iv_names) |
|
7124 | 7154 |
|
7125 | 7155 |
|
7126 | 7156 |
class LURepairNodeStorage(NoHooksLU): |
b/lib/opcodes.py | ||
---|---|---|
419 | 419 |
OP_ID = "OP_NODE_EVACUATE" |
420 | 420 |
OP_DSC_FIELD = "node_name" |
421 | 421 |
__slots__ = [ |
422 |
"node_name", "remote_node", "iallocator", |
|
422 |
"node_name", "remote_node", "iallocator", "early_release",
|
|
423 | 423 |
] |
424 | 424 |
|
425 | 425 |
|
... | ... | |
509 | 509 |
OP_DSC_FIELD = "instance_name" |
510 | 510 |
__slots__ = [ |
511 | 511 |
"instance_name", "remote_node", "mode", "disks", "iallocator", |
512 |
"early_release", |
|
512 | 513 |
] |
513 | 514 |
|
514 | 515 |
|
b/man/gnt-instance.sgml | ||
---|---|---|
1828 | 1828 |
<cmdsynopsis> |
1829 | 1829 |
<command>replace-disks</command> |
1830 | 1830 |
<arg>--submit</arg> |
1831 |
<arg>--early-release</arg> |
|
1831 | 1832 |
<arg choice="req">-p</arg> |
1832 | 1833 |
<arg>--disks <replaceable>idx</replaceable></arg> |
1833 | 1834 |
<arg choice="req"><replaceable>instance</replaceable></arg> |
... | ... | |
1836 | 1837 |
<cmdsynopsis> |
1837 | 1838 |
<command>replace-disks</command> |
1838 | 1839 |
<arg>--submit</arg> |
1840 |
<arg>--early-release</arg> |
|
1839 | 1841 |
<arg choice="req">-s</arg> |
1840 | 1842 |
<arg>--disks <replaceable>idx</replaceable></arg> |
1841 | 1843 |
<arg choice="req"><replaceable>instance</replaceable></arg> |
... | ... | |
1844 | 1846 |
<cmdsynopsis> |
1845 | 1847 |
<command>replace-disks</command> |
1846 | 1848 |
<arg>--submit</arg> |
1849 |
<arg>--early-release</arg> |
|
1847 | 1850 |
<group choice="req"> |
1848 | 1851 |
<arg>--iallocator <replaceable>name</replaceable></arg> |
1849 | 1852 |
<arg>--new-secondary <replaceable>NODE</replaceable></arg> |
... | ... | |
1855 | 1858 |
<cmdsynopsis> |
1856 | 1859 |
<command>replace-disks</command> |
1857 | 1860 |
<arg>--submit</arg> |
1861 |
<arg>--early-release</arg> |
|
1858 | 1862 |
<arg choice="req">--auto</arg> |
1859 | 1863 |
<arg choice="req"><replaceable>instance</replaceable></arg> |
1860 | 1864 |
</cmdsynopsis> |
... | ... | |
1906 | 1910 |
</para> |
1907 | 1911 |
|
1908 | 1912 |
<para> |
1913 |
The <option>--early-release</option> changes the code so |
|
1914 |
that the old storage on secondary node(s) is removed early |
|
1915 |
(before the resync is completed) and the internal Ganeti |
|
1916 |
locks for the current (and new, if any) secondary node are |
|
1917 |
also released, thus allowing more parallelism in the cluster |
|
1918 |
operation. This should be used only when recovering from a |
|
1919 |
disk failure on the current secondary (thus the old storage |
|
1920 |
is already broken) or when the storage on the primary node |
|
1921 |
is known to be fine (thus we won't need the old storage for |
|
1922 |
potential recovery). |
|
1923 |
</para> |
|
1924 |
|
|
1925 |
<para> |
|
1909 | 1926 |
Note that it is not possible to select an offline or drained |
1910 | 1927 |
node as a new secondary. |
1911 | 1928 |
</para> |
b/man/gnt-node.sgml | ||
---|---|---|
143 | 143 |
<cmdsynopsis> |
144 | 144 |
<command>evacuate</command> |
145 | 145 |
<arg>-f</arg> |
146 |
<arg>--early-release</arg> |
|
146 | 147 |
<group> |
147 | 148 |
<arg>--iallocator <replaceable>NAME</replaceable></arg> |
148 | 149 |
<arg>--new-secondary <replaceable>destination_node</replaceable></arg> |
... | ... | |
173 | 174 |
</para> |
174 | 175 |
|
175 | 176 |
<para> |
177 |
The <option>--early-release</option> changes the code so that |
|
178 |
the old storage on node being evacuated is removed early |
|
179 |
(before the resync is completed) and the internal Ganeti locks |
|
180 |
are also released for both the current secondary and the new |
|
181 |
secondary, thus allowing more parallelism in the cluster |
|
182 |
operation. This should be used only when recovering from a |
|
183 |
disk failure on the current secondary (thus the old storage is |
|
184 |
already broken) or when the storage on the primary node is |
|
185 |
known to be fine (thus we won't need the old storage for |
|
186 |
potential recovery). |
|
187 |
</para> |
|
188 |
|
|
189 |
<para> |
|
176 | 190 |
Example: |
177 | 191 |
<screen> |
178 | 192 |
# gnt-node evacuate -I dumb node3.example.com |
b/scripts/gnt-instance | ||
---|---|---|
807 | 807 |
|
808 | 808 |
op = opcodes.OpReplaceDisks(instance_name=args[0], disks=disks, |
809 | 809 |
remote_node=new_2ndary, mode=mode, |
810 |
iallocator=iallocator) |
|
810 |
iallocator=iallocator, |
|
811 |
early_release=opts.early_release) |
|
811 | 812 |
SubmitOrSend(op, opts) |
812 | 813 |
return 0 |
813 | 814 |
|
... | ... | |
1400 | 1401 |
"<instance> <new_name>", "Rename the instance"), |
1401 | 1402 |
'replace-disks': ( |
1402 | 1403 |
ReplaceDisks, ARGS_ONE_INSTANCE, |
1403 |
[AUTO_REPLACE_OPT, DISKIDX_OPT, IALLOCATOR_OPT, |
|
1404 |
[AUTO_REPLACE_OPT, DISKIDX_OPT, IALLOCATOR_OPT, EARLY_RELEASE_OPT,
|
|
1404 | 1405 |
NEW_SECONDARY_OPT, ON_PRIMARY_OPT, ON_SECONDARY_OPT, SUBMIT_OPT], |
1405 | 1406 |
"[-s|-p|-n NODE|-I NAME] <instance>", |
1406 | 1407 |
"Replaces all disks for the instance"), |
b/tools/burnin | ||
---|---|---|
120 | 120 |
cli.VERBOSE_OPT, |
121 | 121 |
cli.NOIPCHECK_OPT, |
122 | 122 |
cli.NONAMECHECK_OPT, |
123 |
cli.EARLY_RELEASE_OPT, |
|
123 | 124 |
cli.cli_option("--no-replace1", dest="do_replace1", |
124 | 125 |
help="Skip disk replacement with the same secondary", |
125 | 126 |
action="store_false", default=True), |
... | ... | |
544 | 545 |
for mode in constants.REPLACE_DISK_SEC, constants.REPLACE_DISK_PRI: |
545 | 546 |
op = opcodes.OpReplaceDisks(instance_name=instance, |
546 | 547 |
mode=mode, |
547 |
disks=[i for i in range(self.disk_count)]) |
|
548 |
disks=[i for i in range(self.disk_count)], |
|
549 |
early_release=self.opts.early_release) |
|
548 | 550 |
Log("run %s" % mode, indent=2) |
549 | 551 |
ops.append(op) |
550 | 552 |
self.ExecOrQueue(instance, *ops) # pylint: disable-msg=W0142 |
... | ... | |
568 | 570 |
mode=mode, |
569 | 571 |
remote_node=tnode, |
570 | 572 |
iallocator=self.opts.iallocator, |
571 |
disks=[]) |
|
573 |
disks=[], |
|
574 |
early_release=self.opts.early_release) |
|
572 | 575 |
Log("run %s %s" % (mode, msg), indent=2) |
573 | 576 |
self.ExecOrQueue(instance, op) |
574 | 577 |
|
Also available in: Unified diff