, tryAlloc
, tryMGAlloc
, tryReloc
- , tryEvac
, tryNodeEvac
, tryChangeGroup
, collapseFailures
import qualified Data.IntSet as IntSet
import Data.List
-import Data.Maybe (fromJust)
+import Data.Maybe (fromJust, isNothing)
import Data.Ord (comparing)
import Text.Printf (printf)
import Control.Monad
-- | Allocation\/relocation solution.
data AllocSolution = AllocSolution
- { asFailures :: [FailMode] -- ^ Failure counts
- , asAllocs :: Int -- ^ Good allocation count
- , asSolutions :: [Node.AllocElement] -- ^ The actual result, length
- -- of the list depends on the
- -- allocation/relocation mode
- , asLog :: [String] -- ^ A list of informational messages
+ { asFailures :: [FailMode] -- ^ Failure counts
+ , asAllocs :: Int -- ^ Good allocation count
+ , asSolution :: Maybe Node.AllocElement -- ^ The actual allocation result
+ , asLog :: [String] -- ^ Informational messages
}
-- | Node evacuation/group change iallocator result type. This result
-- | The empty solution we start with when computing allocations.
emptyAllocSolution :: AllocSolution
emptyAllocSolution = AllocSolution { asFailures = [], asAllocs = 0
- , asSolutions = [], asLog = [] }
+ , asSolution = Nothing, asLog = [] }
-- | The empty evac solution.
emptyEvacSolution :: EvacSolution
all_inst' = if evac_mode
then let bad_nodes = map Node.idx . filter Node.offline $
Container.elems ini_nl
- in filter (\e -> Instance.sNode e `elem` bad_nodes ||
- Instance.pNode e `elem` bad_nodes)
+ in filter (any (`elem` bad_nodes) . Instance.allNodes)
all_inst
else all_inst
reloc_inst = filter Instance.movable all_inst'
map (\k -> (k, foldl' (\a e -> if e == k then a + 1 else a) 0 flst))
[minBound..maxBound]
+-- | Compares two Maybe AllocElement and chooses the besst score.
+bestAllocElement :: Maybe Node.AllocElement
+ -> Maybe Node.AllocElement
+ -> Maybe Node.AllocElement
+bestAllocElement a Nothing = a
+bestAllocElement Nothing b = b
+bestAllocElement a@(Just (_, _, _, ascore)) b@(Just (_, _, _, bscore)) =
+ if ascore < bscore then a else b
+
-- | Update current Allocation solution and failure stats with new
-- elements.
concatAllocs :: AllocSolution -> OpResult Node.AllocElement -> AllocSolution
concatAllocs as (OpFail reason) = as { asFailures = reason : asFailures as }
-concatAllocs as (OpGood ns@(_, _, _, nscore)) =
+concatAllocs as (OpGood ns) =
let -- Choose the old or new solution, based on the cluster score
cntok = asAllocs as
- osols = asSolutions as
- nsols = case osols of
- [] -> [ns]
- (_, _, _, oscore):[] ->
- if oscore < nscore
- then osols
- else [ns]
- -- FIXME: here we simply concat to lists with more
- -- than one element; we should instead abort, since
- -- this is not a valid usage of this function
- xs -> ns:xs
+ osols = asSolution as
+ nsols = bestAllocElement osols (Just ns)
nsuc = cntok + 1
-- Note: we force evaluation of nsols here in order to keep the
-- memory profile low - we know that we will need nsols for sure
-- in the next cycle, so we force evaluation of nsols, since the
-- foldl' in the caller will only evaluate the tuple, but not the
-- elements of the tuple
- in nsols `seq` nsuc `seq` as { asAllocs = nsuc, asSolutions = nsols }
+ in nsols `seq` nsuc `seq` as { asAllocs = nsuc, asSolution = nsols }
-- | Given a solution, generates a reasonable description for it.
describeSolution :: AllocSolution -> String
describeSolution as =
let fcnt = asFailures as
- sols = asSolutions as
+ sols = asSolution as
freasons =
intercalate ", " . map (\(a, b) -> printf "%s: %d" (show a) b) .
filter ((> 0) . snd) . collapseFailures $ fcnt
- in if null sols
- then "No valid allocation solutions, failure reasons: " ++
- (if null fcnt
- then "unknown reasons"
- else freasons)
- else let (_, _, nodes, cv) = head sols
- in printf ("score: %.8f, successes %d, failures %d (%s)" ++
- " for node(s) %s") cv (asAllocs as) (length fcnt) freasons
- (intercalate "/" . map Node.name $ nodes)
+ in case sols of
+ Nothing -> "No valid allocation solutions, failure reasons: " ++
+ (if null fcnt then "unknown reasons" else freasons)
+ Just (_, _, nodes, cv) ->
+ printf ("score: %.8f, successes %d, failures %d (%s)" ++
+ " for node(s) %s") cv (asAllocs as) (length fcnt) freasons
+ (intercalate "/" . map Node.name $ nodes)
-- | Annotates a solution with the appropriate string.
annotateSolution :: AllocSolution -> AllocSolution
fn accu (gdx, rasol) =
case rasol of
Bad _ -> accu
- Ok sol | null (asSolutions sol) -> accu
+ Ok sol | isNothing (asSolution sol) -> accu
| unallocable gdx -> accu
| otherwise -> (gdx, sol):accu
sortMGResults gl sols =
let extractScore (_, _, _, x) = x
solScore (gdx, sol) = (Group.allocPolicy (Container.find gdx gl),
- (extractScore . head . asSolutions) sol)
+ (extractScore . fromJust . asSolution) sol)
in sortBy (comparing solScore) sols
-- | Finds the best group for an instance on a multi-group cluster.
\destinations required (" ++ show reqn ++
"), only one supported"
--- | Change an instance's secondary node.
-evacInstance :: (Monad m) =>
- [Ndx] -- ^ Excluded nodes
- -> Instance.List -- ^ The current instance list
- -> (Node.List, AllocSolution) -- ^ The current state
- -> Idx -- ^ The instance to evacuate
- -> m (Node.List, AllocSolution)
-evacInstance ex_ndx il (nl, old_as) idx = do
- -- FIXME: hardcoded one node here
-
- -- Longer explanation: evacuation is currently hardcoded to DRBD
- -- instances (which have one secondary); hence, even if the
- -- IAllocator protocol can request N nodes for an instance, and all
- -- the message parsing/loading pass this, this implementation only
- -- supports one; this situation needs to be revisited if we ever
- -- support more than one secondary, or if we change the storage
- -- model
- new_as <- tryReloc nl il idx 1 ex_ndx
- case asSolutions new_as of
- -- an individual relocation succeeded, we kind of compose the data
- -- from the two solutions
- csol@(nl', _, _, _):_ ->
- return (nl', new_as { asSolutions = csol:asSolutions old_as })
- -- this relocation failed, so we fail the entire evac
- _ -> fail $ "Can't evacuate instance " ++
- Instance.name (Container.find idx il) ++
- ": " ++ describeSolution new_as
-
--- | Try to evacuate a list of nodes.
-tryEvac :: (Monad m) =>
- Node.List -- ^ The node list
- -> Instance.List -- ^ The instance list
- -> [Idx] -- ^ Instances to be evacuated
- -> [Ndx] -- ^ Restricted nodes (the ones being evacuated)
- -> m AllocSolution -- ^ Solution list
-tryEvac nl il idxs ex_ndx = do
- (_, sol) <- foldM (evacInstance ex_ndx il) (nl, emptyAllocSolution) idxs
- return sol
-
-- | Function which fails if the requested mode is change secondary.
--
-- This is useful since except DRBD, no other disk template can
ops = iMoveToJob nl' il' idx (ReplaceSecondary ndx)
return (nl', il', ops)
+-- The algorithm for ChangeAll is as follows:
+--
+-- * generate all (primary, secondary) node pairs for the target groups
+-- * for each pair, execute the needed moves (r:s, f, r:s) and compute
+-- the final node list state and group score
+-- * select the best choice via a foldl that uses the same Either
+-- String solution as the ChangeSecondary mode
nodeEvacInstance nl il ChangeAll
inst@(Instance.Instance {Instance.diskTemplate = DTDrbd8})
gdx avail_nodes =
do
- let primary = Container.find (Instance.pNode inst) nl
- idx = Instance.idx inst
- no_nodes = Left "no nodes available"
- -- if the primary is offline, then we first failover
- (nl1, inst1, ops1) <-
- if Node.offline primary
- then do
- (nl', inst', _, _) <-
- annotateResult "Failing over to the secondary" $
- opToResult $ applyMove nl inst Failover
- return (nl', inst', [Failover])
- else return (nl, inst, [])
- -- we now need to execute a replace secondary to the future
- -- primary node
- (nl2, inst2, _, new_pdx) <- annotateResult "Searching for a new primary" $
- eitherToResult $
- foldl' (evacDrbdSecondaryInner nl1 inst1 gdx)
- no_nodes avail_nodes
- let ops2 = ReplaceSecondary new_pdx:ops1
- -- since we chose the new primary, we remove it from the list of
- -- available nodes
- let avail_nodes_sec = new_pdx `delete` avail_nodes
- -- we now execute another failover, the primary stays fixed now
- (nl3, inst3, _, _) <- annotateResult "Failing over to new primary" $
- opToResult $ applyMove nl2 inst2 Failover
- let ops3 = Failover:ops2
- -- and finally another replace secondary, to the final secondary
- (nl4, inst4, _, new_sdx) <-
- annotateResult "Searching for a new secondary" $
+ let no_nodes = Left "no nodes available"
+ node_pairs = [(p,s) | p <- avail_nodes, s <- avail_nodes, p /= s]
+ (nl', il', ops, _) <-
+ annotateResult "Can't find any good nodes for relocation" $
eitherToResult $
- foldl' (evacDrbdSecondaryInner nl3 inst3 gdx) no_nodes avail_nodes_sec
- let ops4 = ReplaceSecondary new_sdx:ops3
- il' = Container.add idx inst4 il
- ops = concatMap (iMoveToJob nl4 il' idx) $ reverse ops4
- return (nl4, il', ops)
+ foldl'
+ (\accu nodes -> case evacDrbdAllInner nl il inst gdx nodes of
+ Bad msg ->
+ case accu of
+ Right _ -> accu
+ -- we don't need more details (which
+ -- nodes, etc.) as we only selected
+ -- this group if we can allocate on
+ -- it, hence failures will not
+ -- propagate out of this fold loop
+ Left _ -> Left $ "Allocation failed: " ++ msg
+ Ok result@(_, _, _, new_cv) ->
+ let new_accu = Right result in
+ case accu of
+ Left _ -> new_accu
+ Right (_, _, _, old_cv) ->
+ if old_cv < new_cv
+ then accu
+ else new_accu
+ ) no_nodes node_pairs
+
+ return (nl', il', ops)
-- | Inner fold function for changing secondary of a DRBD instance.
--
--- The "running" solution is either a @Left String@, which means we
+-- The running solution is either a @Left String@, which means we
-- don't have yet a working solution, or a @Right (...)@, which
-- represents a valid solution; it holds the modified node list, the
-- modified instance (after evacuation), the score of that solution,
then accu
else new_accu
+-- | Compute result of changing all nodes of a DRBD instance.
+--
+-- Given the target primary and secondary node (which might be in a
+-- different group or not), this function will 'execute' all the
+-- required steps and assuming all operations succceed, will return
+-- the modified node and instance lists, the opcodes needed for this
+-- and the new group score.
+evacDrbdAllInner :: Node.List -- ^ Cluster node list
+ -> Instance.List -- ^ Cluster instance list
+ -> Instance.Instance -- ^ The instance to be moved
+ -> Gdx -- ^ The target group index
+ -- (which can differ from the
+ -- current group of the
+ -- instance)
+ -> (Ndx, Ndx) -- ^ Tuple of new
+ -- primary\/secondary nodes
+ -> Result (Node.List, Instance.List, [OpCodes.OpCode], Score)
+evacDrbdAllInner nl il inst gdx (t_pdx, t_sdx) =
+ do
+ let primary = Container.find (Instance.pNode inst) nl
+ idx = Instance.idx inst
+ -- if the primary is offline, then we first failover
+ (nl1, inst1, ops1) <-
+ if Node.offline primary
+ then do
+ (nl', inst', _, _) <-
+ annotateResult "Failing over to the secondary" $
+ opToResult $ applyMove nl inst Failover
+ return (nl', inst', [Failover])
+ else return (nl, inst, [])
+ let (o1, o2, o3) = (ReplaceSecondary t_pdx,
+ Failover,
+ ReplaceSecondary t_sdx)
+ -- we now need to execute a replace secondary to the future
+ -- primary node
+ (nl2, inst2, _, _) <-
+ annotateResult "Changing secondary to new primary" $
+ opToResult $
+ applyMove nl1 inst1 o1
+ let ops2 = o1:ops1
+ -- we now execute another failover, the primary stays fixed now
+ (nl3, inst3, _, _) <- annotateResult "Failing over to new primary" $
+ opToResult $ applyMove nl2 inst2 o2
+ let ops3 = o2:ops2
+ -- and finally another replace secondary, to the final secondary
+ (nl4, inst4, _, _) <-
+ annotateResult "Changing secondary to final secondary" $
+ opToResult $
+ applyMove nl3 inst3 o3
+ let ops4 = o3:ops3
+ il' = Container.add idx inst4 il
+ ops = concatMap (iMoveToJob nl4 il' idx) $ reverse ops4
+ let nodes = Container.elems nl4
+ -- The fromJust below is ugly (it can fail nastily), but
+ -- at this point we should have any internal mismatches,
+ -- and adding a monad here would be quite involved
+ grpnodes = fromJust (gdx `lookup` Node.computeGroups nodes)
+ new_cv = compCVNodes grpnodes
+ return (nl4, il', ops, new_cv)
+
-- | Computes the nodes in a given group which are available for
-- allocation.
availableGroupNodes :: [(Gdx, [Ndx])] -- ^ Group index/node index assoc list
splitCluster ini_nl ini_il
(fin_nl, fin_il, esol) =
foldl' (\state@(nl, il, _) inst ->
- let gdx = instancePriGroup nl inst in
+ let gdx = instancePriGroup nl inst
+ pdx = Instance.pNode inst in
updateEvacSolution state (Instance.idx inst) $
availableGroupNodes group_ndx
- excl_ndx gdx >>=
+ (IntSet.insert pdx excl_ndx) gdx >>=
nodeEvacInstance nl il mode inst gdx
)
(ini_nl, ini_il, emptyEvacSolution)
newlimit = fmap (flip (-) 1) limit
in case tryAlloc nl il newi2 allocnodes of
Bad s -> Bad s
- Ok (AllocSolution { asFailures = errs, asSolutions = sols3 }) ->
+ Ok (AllocSolution { asFailures = errs, asSolution = sols3 }) ->
let newsol = Ok (collapseFailures errs, nl, il, ixes, cstats) in
case sols3 of
- [] -> newsol
- (xnl, xi, _, _):[] ->
+ Nothing -> newsol
+ Just (xnl, xi, _, _) ->
if limit == Just 0
then newsol
else iterateAlloc xnl (Container.add newidx xi il)
newlimit newinst allocnodes (xi:ixes)
(totalResources xnl:cstats)
- _ -> Bad "Internal error: multiple solutions for single\
- \ allocation"
-- | The core of the tiered allocation mode.
tieredAlloc :: Node.List
cmds)
-- | Return the instance and involved nodes in an instance move.
-involvedNodes :: Instance.List -> Placement -> [Ndx]
+--
+-- Note that the output list length can vary, and is not required nor
+-- guaranteed to be of any specific length.
+involvedNodes :: Instance.List -- ^ Instance list, used for retrieving
+ -- the instance from its index; note
+ -- that this /must/ be the original
+ -- instance list, so that we can
+ -- retrieve the old nodes
+ -> Placement -- ^ The placement we're investigating,
+ -- containing the new nodes and
+ -- instance index
+ -> [Ndx] -- ^ Resulting list of node indices
involvedNodes il plc =
let (i, np, ns, _, _) = plc
inst = Container.find i il
- op = Instance.pNode inst
- os = Instance.sNode inst
- in nub [np, ns, op, os]
+ in nub $ [np, ns] ++ Instance.allNodes inst
-- | Inner function for splitJobs, that either appends the next job to
-- the current jobset, or starts a new jobset.
let inst = Container.find idx il
iname = Instance.name inst
lookNode = Just . Container.nameOf nl
- opF = OpCodes.OpInstanceMigrate iname True False True
+ opF = OpCodes.OpInstanceMigrate iname True False True Nothing
opR n = OpCodes.OpInstanceReplaceDisks iname (lookNode n)
OpCodes.ReplaceNewSecondary [] Nothing
in case move of