Revision 7ea7bcf6

b/lib/cli.py
56 56
  "DISK_OPT",
57 57
  "DISK_TEMPLATE_OPT",
58 58
  "DRAINED_OPT",
59
  "EARLY_RELEASE_OPT",
59 60
  "ENABLED_HV_OPT",
60 61
  "ERROR_CODES_OPT",
61 62
  "FIELDS_OPT",
......
837 838
                         default=constants.DEFAULT_SHUTDOWN_TIMEOUT,
838 839
                         help="Maximum time to wait for instance shutdown")
839 840

  
841
EARLY_RELEASE_OPT = cli_option("--early-release",
842
                               dest="early_release", default=False,
843
                               action="store_true",
844
                               help="Release the locks on the secondary"
845
                               " node(s) early")
846

  
840 847

  
841 848
def _ParseArgs(argv, commands, aliases):
842 849
  """Parser for the command line arguments.
b/lib/cmdlib.py
6332 6332
      self.op.remote_node = None
6333 6333
    if not hasattr(self.op, "iallocator"):
6334 6334
      self.op.iallocator = None
6335
    if not hasattr(self.op, "early_release"):
6336
      self.op.early_release = False
6335 6337

  
6336 6338
    TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
6337 6339
                                  self.op.iallocator)
......
6363 6365

  
6364 6366
    self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
6365 6367
                                   self.op.iallocator, self.op.remote_node,
6366
                                   self.op.disks, False)
6368
                                   self.op.disks, False, self.op.early_release)
6367 6369

  
6368 6370
    self.tasklets = [self.replacer]
6369 6371

  
......
6410 6412
      self.op.remote_node = None
6411 6413
    if not hasattr(self.op, "iallocator"):
6412 6414
      self.op.iallocator = None
6415
    if not hasattr(self.op, "early_release"):
6416
      self.op.early_release = False
6413 6417

  
6414 6418
    TLReplaceDisks.CheckArguments(constants.REPLACE_DISK_CHG,
6415 6419
                                  self.op.remote_node,
......
6456 6460

  
6457 6461
      replacer = TLReplaceDisks(self, inst.name, constants.REPLACE_DISK_CHG,
6458 6462
                                self.op.iallocator, self.op.remote_node, [],
6459
                                True)
6463
                                True, self.op.early_release)
6460 6464
      tasklets.append(replacer)
6461 6465

  
6462 6466
    self.tasklets = tasklets
......
6498 6502

  
6499 6503
  """
6500 6504
  def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
6501
               disks, delay_iallocator):
6505
               disks, delay_iallocator, early_release):
6502 6506
    """Initializes this class.
6503 6507

  
6504 6508
    """
......
6511 6515
    self.remote_node = remote_node
6512 6516
    self.disks = disks
6513 6517
    self.delay_iallocator = delay_iallocator
6518
    self.early_release = early_release
6514 6519

  
6515 6520
    # Runtime data
6516 6521
    self.instance = None
......
6853 6858
          self.lu.LogWarning("Can't remove old LV: %s" % msg,
6854 6859
                             hint="remove unused LVs manually")
6855 6860

  
6861
  def _ReleaseNodeLock(self, node_name):
6862
    """Releases the lock for a given node."""
6863
    self.lu.context.glm.release(locking.LEVEL_NODE, node_name)
6864

  
6856 6865
  def _ExecDrbd8DiskOnly(self, feedback_fn):
6857 6866
    """Replace a disk on the primary or secondary for DRBD 8.
6858 6867

  
......
6963 6972

  
6964 6973
      self.cfg.Update(self.instance, feedback_fn)
6965 6974

  
6975
    cstep = 5
6976
    if self.early_release:
6977
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
6978
      cstep += 1
6979
      self._RemoveOldStorage(self.target_node, iv_names)
6980
      # only release the lock if we're doing secondary replace, since
6981
      # we use the primary node later
6982
      if self.target_node != self.instance.primary_node:
6983
        self._ReleaseNodeLock(self.target_node)
6984

  
6966 6985
    # Wait for sync
6967 6986
    # This can fail as the old devices are degraded and _WaitForSync
6968 6987
    # does a combined result over all disks, so we don't check its return value
6969
    self.lu.LogStep(5, steps_total, "Sync devices")
6988
    self.lu.LogStep(cstep, steps_total, "Sync devices")
6989
    cstep += 1
6970 6990
    _WaitForSync(self.lu, self.instance)
6971 6991

  
6972 6992
    # Check all devices manually
6973 6993
    self._CheckDevices(self.instance.primary_node, iv_names)
6974 6994

  
6975 6995
    # Step: remove old storage
6976
    self.lu.LogStep(6, steps_total, "Removing old storage")
6977
    self._RemoveOldStorage(self.target_node, iv_names)
6996
    if not self.early_release:
6997
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
6998
      cstep += 1
6999
      self._RemoveOldStorage(self.target_node, iv_names)
6978 7000

  
6979 7001
  def _ExecDrbd8Secondary(self, feedback_fn):
6980 7002
    """Replace the secondary node for DRBD 8.
......
7108 7130
                           to_node, msg,
7109 7131
                           hint=("please do a gnt-instance info to see the"
7110 7132
                                 " status of disks"))
7133
    cstep = 5
7134
    if self.early_release:
7135
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
7136
      cstep += 1
7137
      self._RemoveOldStorage(self.target_node, iv_names)
7138
      self._ReleaseNodeLock([self.target_node, self.new_node])
7111 7139

  
7112 7140
    # Wait for sync
7113 7141
    # This can fail as the old devices are degraded and _WaitForSync
7114 7142
    # does a combined result over all disks, so we don't check its return value
7115
    self.lu.LogStep(5, steps_total, "Sync devices")
7143
    self.lu.LogStep(cstep, steps_total, "Sync devices")
7144
    cstep += 1
7116 7145
    _WaitForSync(self.lu, self.instance)
7117 7146

  
7118 7147
    # Check all devices manually
7119 7148
    self._CheckDevices(self.instance.primary_node, iv_names)
7120 7149

  
7121 7150
    # Step: remove old storage
7122
    self.lu.LogStep(6, steps_total, "Removing old storage")
7123
    self._RemoveOldStorage(self.target_node, iv_names)
7151
    if not self.early_release:
7152
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
7153
      self._RemoveOldStorage(self.target_node, iv_names)
7124 7154

  
7125 7155

  
7126 7156
class LURepairNodeStorage(NoHooksLU):
b/lib/opcodes.py
419 419
  OP_ID = "OP_NODE_EVACUATE"
420 420
  OP_DSC_FIELD = "node_name"
421 421
  __slots__ = [
422
    "node_name", "remote_node", "iallocator",
422
    "node_name", "remote_node", "iallocator", "early_release",
423 423
    ]
424 424

  
425 425

  
......
509 509
  OP_DSC_FIELD = "instance_name"
510 510
  __slots__ = [
511 511
    "instance_name", "remote_node", "mode", "disks", "iallocator",
512
    "early_release",
512 513
    ]
513 514

  
514 515

  
b/man/gnt-instance.sgml
1828 1828
        <cmdsynopsis>
1829 1829
          <command>replace-disks</command>
1830 1830
          <arg>--submit</arg>
1831
          <arg>--early-release</arg>
1831 1832
          <arg choice="req">-p</arg>
1832 1833
          <arg>--disks <replaceable>idx</replaceable></arg>
1833 1834
          <arg choice="req"><replaceable>instance</replaceable></arg>
......
1836 1837
        <cmdsynopsis>
1837 1838
          <command>replace-disks</command>
1838 1839
          <arg>--submit</arg>
1840
          <arg>--early-release</arg>
1839 1841
          <arg choice="req">-s</arg>
1840 1842
          <arg>--disks <replaceable>idx</replaceable></arg>
1841 1843
          <arg choice="req"><replaceable>instance</replaceable></arg>
......
1844 1846
        <cmdsynopsis>
1845 1847
          <command>replace-disks</command>
1846 1848
          <arg>--submit</arg>
1849
          <arg>--early-release</arg>
1847 1850
          <group choice="req">
1848 1851
            <arg>--iallocator <replaceable>name</replaceable></arg>
1849 1852
            <arg>--new-secondary <replaceable>NODE</replaceable></arg>
......
1855 1858
        <cmdsynopsis>
1856 1859
          <command>replace-disks</command>
1857 1860
          <arg>--submit</arg>
1861
          <arg>--early-release</arg>
1858 1862
          <arg choice="req">--auto</arg>
1859 1863
          <arg choice="req"><replaceable>instance</replaceable></arg>
1860 1864
        </cmdsynopsis>
......
1906 1910
        </para>
1907 1911

  
1908 1912
        <para>
1913
          The <option>--early-release</option> changes the code so
1914
          that the old storage on secondary node(s) is removed early
1915
          (before the resync is completed) and the internal Ganeti
1916
          locks for the current (and new, if any) secondary node are
1917
          also released, thus allowing more parallelism in the cluster
1918
          operation. This should be used only when recovering from a
1919
          disk failure on the current secondary (thus the old storage
1920
          is already broken) or when the storage on the primary node
1921
          is known to be fine (thus we won't need the old storage for
1922
          potential recovery).
1923
        </para>
1924

  
1925
        <para>
1909 1926
          Note that it is not possible to select an offline or drained
1910 1927
          node as a new secondary.
1911 1928
        </para>
b/man/gnt-node.sgml
143 143
      <cmdsynopsis>
144 144
        <command>evacuate</command>
145 145
        <arg>-f</arg>
146
        <arg>--early-release</arg>
146 147
        <group>
147 148
          <arg>--iallocator <replaceable>NAME</replaceable></arg>
148 149
          <arg>--new-secondary <replaceable>destination_node</replaceable></arg>
......
173 174
      </para>
174 175

  
175 176
      <para>
177
        The <option>--early-release</option> changes the code so that
178
        the old storage on node being evacuated is removed early
179
        (before the resync is completed) and the internal Ganeti locks
180
        are also released for both the current secondary and the new
181
        secondary, thus allowing more parallelism in the cluster
182
        operation. This should be used only when recovering from a
183
        disk failure on the current secondary (thus the old storage is
184
        already broken) or when the storage on the primary node is
185
        known to be fine (thus we won't need the old storage for
186
        potential recovery).
187
      </para>
188

  
189
      <para>
176 190
        Example:
177 191
        <screen>
178 192
          # gnt-node evacuate -I dumb node3.example.com
b/scripts/gnt-instance
807 807

  
808 808
  op = opcodes.OpReplaceDisks(instance_name=args[0], disks=disks,
809 809
                              remote_node=new_2ndary, mode=mode,
810
                              iallocator=iallocator)
810
                              iallocator=iallocator,
811
                              early_release=opts.early_release)
811 812
  SubmitOrSend(op, opts)
812 813
  return 0
813 814

  
......
1400 1401
    "<instance> <new_name>", "Rename the instance"),
1401 1402
  'replace-disks': (
1402 1403
    ReplaceDisks, ARGS_ONE_INSTANCE,
1403
    [AUTO_REPLACE_OPT, DISKIDX_OPT, IALLOCATOR_OPT,
1404
    [AUTO_REPLACE_OPT, DISKIDX_OPT, IALLOCATOR_OPT, EARLY_RELEASE_OPT,
1404 1405
     NEW_SECONDARY_OPT, ON_PRIMARY_OPT, ON_SECONDARY_OPT, SUBMIT_OPT],
1405 1406
    "[-s|-p|-n NODE|-I NAME] <instance>",
1406 1407
    "Replaces all disks for the instance"),
b/tools/burnin
120 120
  cli.VERBOSE_OPT,
121 121
  cli.NOIPCHECK_OPT,
122 122
  cli.NONAMECHECK_OPT,
123
  cli.EARLY_RELEASE_OPT,
123 124
  cli.cli_option("--no-replace1", dest="do_replace1",
124 125
                 help="Skip disk replacement with the same secondary",
125 126
                 action="store_false", default=True),
......
544 545
      for mode in constants.REPLACE_DISK_SEC, constants.REPLACE_DISK_PRI:
545 546
        op = opcodes.OpReplaceDisks(instance_name=instance,
546 547
                                    mode=mode,
547
                                    disks=[i for i in range(self.disk_count)])
548
                                    disks=[i for i in range(self.disk_count)],
549
                                    early_release=self.opts.early_release)
548 550
        Log("run %s" % mode, indent=2)
549 551
        ops.append(op)
550 552
      self.ExecOrQueue(instance, *ops) # pylint: disable-msg=W0142
......
568 570
                                  mode=mode,
569 571
                                  remote_node=tnode,
570 572
                                  iallocator=self.opts.iallocator,
571
                                  disks=[])
573
                                  disks=[],
574
                                  early_release=self.opts.early_release)
572 575
      Log("run %s %s" % (mode, msg), indent=2)
573 576
      self.ExecOrQueue(instance, op)
574 577

  

Also available in: Unified diff