Revision 97da6b71

b/htools/Ganeti/HTools/Cluster.hs
922 922
        ops = iMoveToJob nl' il' idx (ReplaceSecondary ndx)
923 923
    return (nl', il', ops)
924 924

  
925
-- The algorithm for ChangeAll is as follows:
926
--
927
-- * generate all (primary, secondary) node pairs for the target groups
928
-- * for each pair, execute the needed moves (r:s, f, r:s) and compute
929
--   the final node list state and group score
930
-- * select the best choice via a foldl that uses the same Either
931
--   String solution as the ChangeSecondary mode
925 932
nodeEvacInstance nl il ChangeAll
926 933
                 inst@(Instance.Instance {Instance.diskTemplate = DTDrbd8})
927 934
                 gdx avail_nodes =
928 935
  do
929
    let primary = Container.find (Instance.pNode inst) nl
930
        idx = Instance.idx inst
931
        no_nodes = Left "no nodes available"
932
    -- if the primary is offline, then we first failover
933
    (nl1, inst1, ops1) <-
934
        if Node.offline primary
935
        then do
936
          (nl', inst', _, _) <-
937
              annotateResult "Failing over to the secondary" $
938
              opToResult $ applyMove nl inst Failover
939
          return (nl', inst', [Failover])
940
        else return (nl, inst, [])
941
    -- we now need to execute a replace secondary to the future
942
    -- primary node
943
    (nl2, inst2, _, new_pdx) <- annotateResult "Searching for a new primary" $
944
                                eitherToResult $
945
                                foldl' (evacDrbdSecondaryInner nl1 inst1 gdx)
946
                                no_nodes avail_nodes
947
    let ops2 = ReplaceSecondary new_pdx:ops1
948
    -- since we chose the new primary, we remove it from the list of
949
    -- available nodes
950
    let avail_nodes_sec = new_pdx `delete` avail_nodes
951
    -- we now execute another failover, the primary stays fixed now
952
    (nl3, inst3, _, _) <- annotateResult "Failing over to new primary" $
953
                          opToResult $ applyMove nl2 inst2 Failover
954
    let ops3 = Failover:ops2
955
    -- and finally another replace secondary, to the final secondary
956
    (nl4, inst4, _, new_sdx) <-
957
        annotateResult "Searching for a new secondary" $
936
    let no_nodes = Left "no nodes available"
937
        node_pairs = [(p,s) | p <- avail_nodes, s <- avail_nodes, p /= s]
938
    (nl', il', ops, _) <-
939
        annotateResult "Can't find any good nodes for relocation" $
958 940
        eitherToResult $
959
        foldl' (evacDrbdSecondaryInner nl3 inst3 gdx) no_nodes avail_nodes_sec
960
    let ops4 = ReplaceSecondary new_sdx:ops3
961
        il' = Container.add idx inst4 il
962
        ops = concatMap (iMoveToJob nl4 il' idx) $ reverse ops4
963
    return (nl4, il', ops)
941
        foldl'
942
        (\accu nodes -> case evacDrbdAllInner nl il inst gdx nodes of
943
                          Bad msg ->
944
                              case accu of
945
                                Right _ -> accu
946
                                -- we don't need more details (which
947
                                -- nodes, etc.) as we only selected
948
                                -- this group if we can allocate on
949
                                -- it, hence failures will not
950
                                -- propagate out of this fold loop
951
                                Left _ -> Left $ "Allocation failed: " ++ msg
952
                          Ok result@(_, _, _, new_cv) ->
953
                              let new_accu = Right result in
954
                              case accu of
955
                                Left _ -> new_accu
956
                                Right (_, _, _, old_cv) ->
957
                                    if old_cv < new_cv
958
                                    then accu
959
                                    else new_accu
960
        ) no_nodes node_pairs
961

  
962
    return (nl', il', ops)
964 963

  
965 964
-- | Inner fold function for changing secondary of a DRBD instance.
966 965
--
967
-- The "running" solution is either a @Left String@, which means we
966
-- The running solution is either a @Left String@, which means we
968 967
-- don't have yet a working solution, or a @Right (...)@, which
969 968
-- represents a valid solution; it holds the modified node list, the
970 969
-- modified instance (after evacuation), the score of that solution,
......
1003 1002
                   then accu
1004 1003
                   else new_accu
1005 1004

  
1005
-- | Compute result of changing all nodes of a DRBD instance.
1006
--
1007
-- Given the target primary and secondary node (which might be in a
1008
-- different group or not), this function will 'execute' all the
1009
-- required steps and assuming all operations succceed, will return
1010
-- the modified node and instance lists, the opcodes needed for this
1011
-- and the new group score.
1012
evacDrbdAllInner :: Node.List         -- ^ Cluster node list
1013
                 -> Instance.List     -- ^ Cluster instance list
1014
                 -> Instance.Instance -- ^ The instance to be moved
1015
                 -> Gdx               -- ^ The target group index
1016
                                      -- (which can differ from the
1017
                                      -- current group of the
1018
                                      -- instance)
1019
                 -> (Ndx, Ndx)        -- ^ Tuple of new
1020
                                      -- primary\/secondary nodes
1021
                 -> Result (Node.List, Instance.List, [OpCodes.OpCode], Score)
1022
evacDrbdAllInner nl il inst gdx (t_pdx, t_sdx) =
1023
  do
1024
    let primary = Container.find (Instance.pNode inst) nl
1025
        idx = Instance.idx inst
1026
    -- if the primary is offline, then we first failover
1027
    (nl1, inst1, ops1) <-
1028
        if Node.offline primary
1029
        then do
1030
          (nl', inst', _, _) <-
1031
              annotateResult "Failing over to the secondary" $
1032
              opToResult $ applyMove nl inst Failover
1033
          return (nl', inst', [Failover])
1034
        else return (nl, inst, [])
1035
    let (o1, o2, o3) = (ReplaceSecondary t_pdx,
1036
                        Failover,
1037
                        ReplaceSecondary t_sdx)
1038
    -- we now need to execute a replace secondary to the future
1039
    -- primary node
1040
    (nl2, inst2, _, _) <-
1041
        annotateResult "Changing secondary to new primary" $
1042
        opToResult $
1043
        applyMove nl1 inst1 o1
1044
    let ops2 = o1:ops1
1045
    -- we now execute another failover, the primary stays fixed now
1046
    (nl3, inst3, _, _) <- annotateResult "Failing over to new primary" $
1047
                          opToResult $ applyMove nl2 inst2 o2
1048
    let ops3 = o2:ops2
1049
    -- and finally another replace secondary, to the final secondary
1050
    (nl4, inst4, _, _) <-
1051
        annotateResult "Changing secondary to final secondary" $
1052
        opToResult $
1053
        applyMove nl3 inst3 o3
1054
    let ops4 = o3:ops3
1055
        il' = Container.add idx inst4 il
1056
        ops = concatMap (iMoveToJob nl4 il' idx) $ reverse ops4
1057
    let nodes = Container.elems nl4
1058
        -- The fromJust below is ugly (it can fail nastily), but
1059
        -- at this point we should have any internal mismatches,
1060
        -- and adding a monad here would be quite involved
1061
        grpnodes = fromJust (gdx `lookup` Node.computeGroups nodes)
1062
        new_cv = compCVNodes grpnodes
1063
    return (nl4, il', ops, new_cv)
1064

  
1006 1065
-- | Computes the nodes in a given group which are available for
1007 1066
-- allocation.
1008 1067
availableGroupNodes :: [(Gdx, [Ndx])] -- ^ Group index/node index assoc list

Also available in: Unified diff