Revision 0834c866 lib/cmdlib.py

b/lib/cmdlib.py
1051 1051
        logger.ToStderr("Can't compute data for node %s/%s" %
1052 1052
                        (node, instance.disks[i].iv_name))
1053 1053
        continue
1054
      perc_done, est_time, is_degraded = mstat
1054
      # we ignore the ldisk parameter
1055
      perc_done, est_time, is_degraded, _ = mstat
1055 1056
      cumul_degraded = cumul_degraded or (is_degraded and perc_done is None)
1056 1057
      if perc_done is not None:
1057 1058
        done = False
......
1078 1079
  return not cumul_degraded
1079 1080

  
1080 1081

  
1081
def _CheckDiskConsistency(cfgw, dev, node, on_primary):
1082
def _CheckDiskConsistency(cfgw, dev, node, on_primary, ldisk=False):
1082 1083
  """Check that mirrors are not degraded.
1083 1084

  
1085
  The ldisk parameter, if True, will change the test from the
1086
  is_degraded attribute (which represents overall non-ok status for
1087
  the device(s)) to the ldisk (representing the local storage status).
1088

  
1084 1089
  """
1085 1090
  cfgw.SetDiskID(dev, node)
1091
  if ldisk:
1092
    idx = 6
1093
  else:
1094
    idx = 5
1086 1095

  
1087 1096
  result = True
1088 1097
  if on_primary or dev.AssembleOnSecondary():
......
1091 1100
      logger.ToStderr("Can't get any data from node %s" % node)
1092 1101
      result = False
1093 1102
    else:
1094
      result = result and (not rstats[5])
1103
      result = result and (not rstats[idx])
1095 1104
  if dev.children:
1096 1105
    for child in dev.children:
1097 1106
      result = result and _CheckDiskConsistency(cfgw, child, node, on_primary)
......
3360 3369
      "OLD_SECONDARY": self.instance.secondary_nodes[0],
3361 3370
      }
3362 3371
    env.update(_BuildInstanceHookEnvByObject(self.instance))
3363
    nl = [self.sstore.GetMasterNode(),
3364
          self.instance.primary_node] + list(self.instance.secondary_nodes)
3372
    nl = [
3373
      self.sstore.GetMasterNode(),
3374
      self.instance.primary_node,
3375
      ]
3376
    if self.op.remote_node is not None:
3377
      nl.append(self.op.remote_node)
3365 3378
    return env, nl, nl
3366 3379

  
3367 3380
  def CheckPrereq(self):
......
3401 3414
      raise errors.OpPrereqError("The specified node is the primary node of"
3402 3415
                                 " the instance.")
3403 3416
    elif remote_node == self.sec_node:
3417
      if self.op.mode == constants.REPLACE_DISK_SEC:
3418
        # this is for DRBD8, where we can't execute the same mode of
3419
        # replacement as for drbd7 (no different port allocated)
3420
        raise errors.OpPrereqError("Same secondary given, cannot execute"
3421
                                   " replacement")
3404 3422
      # the user gave the current secondary, switch to
3405
      # 'no-replace-secondary' mode
3423
      # 'no-replace-secondary' mode for drbd7
3406 3424
      remote_node = None
3407 3425
    if (instance.disk_template == constants.DT_REMOTE_RAID1 and
3408 3426
        self.op.mode != constants.REPLACE_DISK_ALL):
......
3717 3735
      - remove all disks from the old secondary
3718 3736

  
3719 3737
    Failures are not very well handled.
3738

  
3720 3739
    """
3740
    steps_total = 6
3741
    warning, info = (self.processor.LogWarning, self.processor.LogInfo)
3721 3742
    instance = self.instance
3722 3743
    iv_names = {}
3723 3744
    vgname = self.cfg.GetVGName()
......
3726 3747
    old_node = self.tgt_node
3727 3748
    new_node = self.new_node
3728 3749
    pri_node = instance.primary_node
3750

  
3751
    # Step: check device activation
3752
    self.processor.LogStep(1, steps_total, "check device existence")
3753
    info("checking volume groups")
3754
    my_vg = cfg.GetVGName()
3755
    results = rpc.call_vg_list([pri_node, new_node])
3756
    if not results:
3757
      raise errors.OpExecError("Can't list volume groups on the nodes")
3758
    for node in pri_node, new_node:
3759
      res = results.get(node, False)
3760
      if not res or my_vg not in res:
3761
        raise errors.OpExecError("Volume group '%s' not found on %s" %
3762
                                 (my_vg, node))
3763
    for dev in instance.disks:
3764
      if not dev.iv_name in self.op.disks:
3765
        continue
3766
      info("checking %s on %s" % (dev.iv_name, pri_node))
3767
      cfg.SetDiskID(dev, pri_node)
3768
      if not rpc.call_blockdev_find(pri_node, dev):
3769
        raise errors.OpExecError("Can't find device %s on node %s" %
3770
                                 (dev.iv_name, pri_node))
3771

  
3772
    # Step: check other node consistency
3773
    self.processor.LogStep(2, steps_total, "check peer consistency")
3774
    for dev in instance.disks:
3775
      if not dev.iv_name in self.op.disks:
3776
        continue
3777
      info("checking %s consistency on %s" % (dev.iv_name, pri_node))
3778
      if not _CheckDiskConsistency(self.cfg, dev, pri_node, True, ldisk=True):
3779
        raise errors.OpExecError("Primary node (%s) has degraded storage,"
3780
                                 " unsafe to replace the secondary" %
3781
                                 pri_node)
3782

  
3783
    # Step: create new storage
3784
    self.processor.LogStep(3, steps_total, "allocate new storage")
3729 3785
    for dev in instance.disks:
3730 3786
      size = dev.size
3731
      logger.Info("adding new local storage on %s for %s" %
3732
                  (new_node, dev.iv_name))
3787
      info("adding new local storage on %s for %s" % (new_node, dev.iv_name))
3733 3788
      # since we *always* want to create this LV, we use the
3734 3789
      # _Create...OnPrimary (which forces the creation), even if we
3735 3790
      # are talking about the secondary node
......
3740 3795
                                   " node '%s'" %
3741 3796
                                   (new_lv.logical_id[1], new_node))
3742 3797

  
3798
      iv_names[dev.iv_name] = (dev, dev.children)
3799

  
3800
    self.processor.LogStep(4, steps_total, "changing drbd configuration")
3801
    for dev in instance.disks:
3802
      size = dev.size
3803
      info("activating a new drbd on %s for %s" % (new_node, dev.iv_name))
3743 3804
      # create new devices on new_node
3744 3805
      new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
3745 3806
                              logical_id=(pri_node, new_node,
......
3751 3812
        raise errors.OpExecError("Failed to create new DRBD on"
3752 3813
                                 " node '%s'" % new_node)
3753 3814

  
3815
    for dev in instance.disks:
3754 3816
      # we have new devices, shutdown the drbd on the old secondary
3817
      info("shutting down drbd for %s on old node" % dev.iv_name)
3755 3818
      cfg.SetDiskID(dev, old_node)
3756 3819
      if not rpc.call_blockdev_shutdown(old_node, dev):
3757
        raise errors.OpExecError("Failed to shutdown DRBD on old node")
3820
        warning("Failed to shutdown drbd for %s on old node" % dev.iv_name,
3821
                "Please cleanup this device manuall as soon as possible")
3758 3822

  
3759 3823
      # we have new storage, we 'rename' the network on the primary
3824
      info("switching primary drbd for %s to new secondary node" % dev.iv_name)
3760 3825
      cfg.SetDiskID(dev, pri_node)
3761 3826
      # rename to the ip of the new node
3762 3827
      new_uid = list(dev.physical_id)
3763 3828
      new_uid[2] = self.remote_node_info.secondary_ip
3764 3829
      rlist = [(dev, tuple(new_uid))]
3765 3830
      if not rpc.call_blockdev_rename(pri_node, rlist):
3766
        raise errors.OpExecError("Can't detach re-attach drbd %s on node"
3831
        raise errors.OpExecError("Can't detach & re-attach drbd %s on node"
3767 3832
                                 " %s from %s to %s" %
3768 3833
                                 (dev.iv_name, pri_node, old_node, new_node))
3769 3834
      dev.logical_id = (pri_node, new_node, dev.logical_id[2])
3770 3835
      cfg.SetDiskID(dev, pri_node)
3771 3836
      cfg.Update(instance)
3772 3837

  
3773
      iv_names[dev.iv_name] = (dev, dev.children)
3774 3838

  
3775 3839
    # this can fail as the old devices are degraded and _WaitForSync
3776 3840
    # does a combined result over all disks, so we don't check its
3777 3841
    # return value
3778
    logger.Info("Done changing drbd configs, waiting for sync")
3842
    self.processor.LogStep(5, steps_total, "sync devices")
3779 3843
    _WaitForSync(cfg, instance, unlock=True)
3780 3844

  
3781 3845
    # so check manually all the devices
......
3785 3849
      if is_degr:
3786 3850
        raise errors.OpExecError("DRBD device %s is degraded!" % name)
3787 3851

  
3852
    self.processor.LogStep(6, steps_total, "removing old storage")
3788 3853
    for name, (dev, old_lvs) in iv_names.iteritems():
3789
      logger.Info("remove logical volumes for %s" % name)
3854
      info("remove logical volumes for %s" % name)
3790 3855
      for lv in old_lvs:
3791 3856
        cfg.SetDiskID(lv, old_node)
3792 3857
        if not rpc.call_blockdev_remove(old_node, lv):
3793
          logger.Error("Can't cleanup child device, skipping. You need to"
3794
                       " fix manually!")
3795
          continue
3858
          warning("Can't remove LV on old secondary",
3859
                  "Cleanup stale volumes by hand")
3796 3860

  
3797 3861
  def Exec(self, feedback_fn):
3798 3862
    """Execute disk replacement.

Also available in: Unified diff