, tryAlloc
, tryMGAlloc
, tryReloc
- , tryMGReloc
- , tryEvac
- , tryMGEvac
, tryNodeEvac
+ , tryChangeGroup
, collapseFailures
-- * Allocation functions
, iterateAlloc
, tieredAlloc
- , tieredSpecMap
-- * Node group functions
, instanceGroup
, findSplitInstances
, splitCluster
) where
-import Data.Function (on)
import qualified Data.IntSet as IntSet
import Data.List
-import Data.Maybe (fromJust)
+import Data.Maybe (fromJust, isNothing)
import Data.Ord (comparing)
import Text.Printf (printf)
import Control.Monad
-import Control.Parallel.Strategies
import qualified Ganeti.HTools.Container as Container
import qualified Ganeti.HTools.Instance as Instance
import qualified Ganeti.HTools.Group as Group
import Ganeti.HTools.Types
import Ganeti.HTools.Utils
+import Ganeti.HTools.Compat
import qualified Ganeti.OpCodes as OpCodes
-- * Types
-- | Allocation\/relocation solution.
data AllocSolution = AllocSolution
- { asFailures :: [FailMode] -- ^ Failure counts
- , asAllocs :: Int -- ^ Good allocation count
- , asSolutions :: [Node.AllocElement] -- ^ The actual result, length
- -- of the list depends on the
- -- allocation/relocation mode
- , asLog :: [String] -- ^ A list of informational messages
+ { asFailures :: [FailMode] -- ^ Failure counts
+ , asAllocs :: Int -- ^ Good allocation count
+ , asSolution :: Maybe Node.AllocElement -- ^ The actual allocation result
+ , asLog :: [String] -- ^ Informational messages
}
-- | Node evacuation/group change iallocator result type. This result
-- type consists of actual opcodes (a restricted subset) that are
-- transmitted back to Ganeti.
data EvacSolution = EvacSolution
- { esMoved :: [String] -- ^ Instance moved successfully
- , esFailed :: [String] -- ^ Instance which were not
+ { esMoved :: [(Idx, Gdx, [Ndx])] -- ^ Instances moved successfully
+ , esFailed :: [(Idx, String)] -- ^ Instances which were not
-- relocated
, esOpCodes :: [[[OpCodes.OpCode]]] -- ^ List of lists of jobs
}
-- | The empty solution we start with when computing allocations.
emptyAllocSolution :: AllocSolution
emptyAllocSolution = AllocSolution { asFailures = [], asAllocs = 0
- , asSolutions = [], asLog = [] }
+ , asSolution = Nothing, asLog = [] }
-- | The empty evac solution.
emptyEvacSolution :: EvacSolution
data Table = Table Node.List Instance.List Score [Placement]
deriving (Show, Read)
+-- | Cluster statistics data type.
data CStats = CStats { csFmem :: Integer -- ^ Cluster free mem
, csFdsk :: Integer -- ^ Cluster free disk
, csAmem :: Integer -- ^ Cluster allocatable mem
, (2, "pri_tags_score")
]
+-- | Holds the weights used by 'compCVNodes' for each metric.
detailedCVWeights :: [Double]
detailedCVWeights = map fst detailedCVInfo
compCV :: Node.List -> Double
compCV = compCVNodes . Container.elems
-
-- | Compute online nodes from a 'Node.List'.
getOnline :: Node.List -> [Node.Node]
getOnline = filter (not . Node.offline) . Container.elems
all_inst' = if evac_mode
then let bad_nodes = map Node.idx . filter Node.offline $
Container.elems ini_nl
- in filter (\e -> Instance.sNode e `elem` bad_nodes ||
- Instance.pNode e `elem` bad_nodes)
+ in filter (any (`elem` bad_nodes) . Instance.allNodes)
all_inst
else all_inst
reloc_inst = filter Instance.movable all_inst'
map (\k -> (k, foldl' (\a e -> if e == k then a + 1 else a) 0 flst))
[minBound..maxBound]
+-- | Compares two Maybe AllocElement and chooses the besst score.
+bestAllocElement :: Maybe Node.AllocElement
+ -> Maybe Node.AllocElement
+ -> Maybe Node.AllocElement
+bestAllocElement a Nothing = a
+bestAllocElement Nothing b = b
+bestAllocElement a@(Just (_, _, _, ascore)) b@(Just (_, _, _, bscore)) =
+ if ascore < bscore then a else b
+
-- | Update current Allocation solution and failure stats with new
-- elements.
concatAllocs :: AllocSolution -> OpResult Node.AllocElement -> AllocSolution
concatAllocs as (OpFail reason) = as { asFailures = reason : asFailures as }
-concatAllocs as (OpGood ns@(_, _, _, nscore)) =
+concatAllocs as (OpGood ns) =
let -- Choose the old or new solution, based on the cluster score
cntok = asAllocs as
- osols = asSolutions as
- nsols = case osols of
- [] -> [ns]
- (_, _, _, oscore):[] ->
- if oscore < nscore
- then osols
- else [ns]
- -- FIXME: here we simply concat to lists with more
- -- than one element; we should instead abort, since
- -- this is not a valid usage of this function
- xs -> ns:xs
+ osols = asSolution as
+ nsols = bestAllocElement osols (Just ns)
nsuc = cntok + 1
-- Note: we force evaluation of nsols here in order to keep the
-- memory profile low - we know that we will need nsols for sure
-- in the next cycle, so we force evaluation of nsols, since the
-- foldl' in the caller will only evaluate the tuple, but not the
-- elements of the tuple
- in nsols `seq` nsuc `seq` as { asAllocs = nsuc, asSolutions = nsols }
-
--- | Sums two allocation solutions (e.g. for two separate node groups).
-sumAllocs :: AllocSolution -> AllocSolution -> AllocSolution
-sumAllocs (AllocSolution af aa as al) (AllocSolution bf ba bs bl) =
- AllocSolution (af ++ bf) (aa + ba) (as ++ bs) (al ++ bl)
+ in nsols `seq` nsuc `seq` as { asAllocs = nsuc, asSolution = nsols }
-- | Given a solution, generates a reasonable description for it.
describeSolution :: AllocSolution -> String
describeSolution as =
let fcnt = asFailures as
- sols = asSolutions as
+ sols = asSolution as
freasons =
intercalate ", " . map (\(a, b) -> printf "%s: %d" (show a) b) .
filter ((> 0) . snd) . collapseFailures $ fcnt
- in if null sols
- then "No valid allocation solutions, failure reasons: " ++
- (if null fcnt
- then "unknown reasons"
- else freasons)
- else let (_, _, nodes, cv) = head sols
- in printf ("score: %.8f, successes %d, failures %d (%s)" ++
- " for node(s) %s") cv (asAllocs as) (length fcnt) freasons
- (intercalate "/" . map Node.name $ nodes)
+ in case sols of
+ Nothing -> "No valid allocation solutions, failure reasons: " ++
+ (if null fcnt then "unknown reasons" else freasons)
+ Just (_, _, nodes, cv) ->
+ printf ("score: %.8f, successes %d, failures %d (%s)" ++
+ " for node(s) %s") cv (asAllocs as) (length fcnt) freasons
+ (intercalate "/" . map Node.name $ nodes)
-- | Annotates a solution with the appropriate string.
annotateSolution :: AllocSolution -> AllocSolution
fn accu (gdx, rasol) =
case rasol of
Bad _ -> accu
- Ok sol | null (asSolutions sol) -> accu
+ Ok sol | isNothing (asSolution sol) -> accu
| unallocable gdx -> accu
| otherwise -> (gdx, sol):accu
sortMGResults gl sols =
let extractScore (_, _, _, x) = x
solScore (gdx, sol) = (Group.allocPolicy (Container.find gdx gl),
- (extractScore . head . asSolutions) sol)
+ (extractScore . fromJust . asSolution) sol)
in sortBy (comparing solScore) sols
-- | Finds the best group for an instance on a multi-group cluster.
+--
+-- Only solutions in @preferred@ and @last_resort@ groups will be
+-- accepted as valid, and additionally if the allowed groups parameter
+-- is not null then allocation will only be run for those group
+-- indices.
findBestAllocGroup :: Group.List -- ^ The group list
-> Node.List -- ^ The node list
-> Instance.List -- ^ The instance list
+ -> Maybe [Gdx] -- ^ The allowed groups
-> Instance.Instance -- ^ The instance to allocate
-> Int -- ^ Required number of nodes
-> Result (Gdx, AllocSolution, [String])
-findBestAllocGroup mggl mgnl mgil inst cnt =
+findBestAllocGroup mggl mgnl mgil allowed_gdxs inst cnt =
let groups = splitCluster mgnl mgil
+ groups' = maybe groups (\gs -> filter ((`elem` gs) . fst) groups)
+ allowed_gdxs
sols = map (\(gid, (nl, il)) ->
(gid, genAllocNodes mggl nl cnt False >>=
tryAlloc nl il inst))
- groups::[(Gdx, Result AllocSolution)]
+ groups'::[(Gdx, Result AllocSolution)]
all_msgs = concatMap (solutionDescription mggl) sols
goodSols = filterMGResults mggl sols
sortedSols = sortMGResults mggl goodSols
-> Result AllocSolution -- ^ Possible solution list
tryMGAlloc mggl mgnl mgil inst cnt = do
(best_group, solution, all_msgs) <-
- findBestAllocGroup mggl mgnl mgil inst cnt
+ findBestAllocGroup mggl mgnl mgil Nothing inst cnt
let group_name = Group.name $ Container.find best_group mggl
selmsg = "Selected group: " ++ group_name
return $ solution { asLog = selmsg:all_msgs }
\destinations required (" ++ show reqn ++
"), only one supported"
-tryMGReloc :: (Monad m) =>
- Group.List -- ^ The group list
- -> Node.List -- ^ The node list
- -> Instance.List -- ^ The instance list
- -> Idx -- ^ The index of the instance to move
- -> Int -- ^ The number of nodes required
- -> [Ndx] -- ^ Nodes which should not be used
- -> m AllocSolution -- ^ Solution list
-tryMGReloc _ mgnl mgil xid ncount ex_ndx = do
- let groups = splitCluster mgnl mgil
- -- TODO: we only relocate inside the group for now
- inst = Container.find xid mgil
- (nl, il) <- case lookup (instancePriGroup mgnl inst) groups of
- Nothing -> fail $ "Cannot find group for instance " ++
- Instance.name inst
- Just v -> return v
- tryReloc nl il xid ncount ex_ndx
-
--- | Change an instance's secondary node.
-evacInstance :: (Monad m) =>
- [Ndx] -- ^ Excluded nodes
- -> Instance.List -- ^ The current instance list
- -> (Node.List, AllocSolution) -- ^ The current state
- -> Idx -- ^ The instance to evacuate
- -> m (Node.List, AllocSolution)
-evacInstance ex_ndx il (nl, old_as) idx = do
- -- FIXME: hardcoded one node here
-
- -- Longer explanation: evacuation is currently hardcoded to DRBD
- -- instances (which have one secondary); hence, even if the
- -- IAllocator protocol can request N nodes for an instance, and all
- -- the message parsing/loading pass this, this implementation only
- -- supports one; this situation needs to be revisited if we ever
- -- support more than one secondary, or if we change the storage
- -- model
- new_as <- tryReloc nl il idx 1 ex_ndx
- case asSolutions new_as of
- -- an individual relocation succeeded, we kind of compose the data
- -- from the two solutions
- csol@(nl', _, _, _):_ ->
- return (nl', new_as { asSolutions = csol:asSolutions old_as })
- -- this relocation failed, so we fail the entire evac
- _ -> fail $ "Can't evacuate instance " ++
- Instance.name (Container.find idx il) ++
- ": " ++ describeSolution new_as
-
--- | Try to evacuate a list of nodes.
-tryEvac :: (Monad m) =>
- Node.List -- ^ The node list
- -> Instance.List -- ^ The instance list
- -> [Idx] -- ^ Instances to be evacuated
- -> [Ndx] -- ^ Restricted nodes (the ones being evacuated)
- -> m AllocSolution -- ^ Solution list
-tryEvac nl il idxs ex_ndx = do
- (_, sol) <- foldM (evacInstance ex_ndx il) (nl, emptyAllocSolution) idxs
- return sol
-
--- | Multi-group evacuation of a list of nodes.
-tryMGEvac :: (Monad m) =>
- Group.List -- ^ The group list
- -> Node.List -- ^ The node list
- -> Instance.List -- ^ The instance list
- -> [Ndx] -- ^ Nodes to be evacuated
- -> m AllocSolution -- ^ Solution list
-tryMGEvac _ nl il ex_ndx =
- let ex_nodes = map (`Container.find` nl) ex_ndx
- all_insts = nub . concatMap Node.sList $ ex_nodes
- all_insts' = associateIdxs all_insts $ splitCluster nl il
- in do
- results <- mapM (\(_, (gnl, gil, idxs)) -> tryEvac gnl gil idxs ex_ndx)
- all_insts'
- let sol = foldl' sumAllocs emptyAllocSolution results
- return $ annotateSolution sol
-
-- | Function which fails if the requested mode is change secondary.
--
-- This is useful since except DRBD, no other disk template can
failOnSecondaryChange _ _ = return ()
-- | Run evacuation for a single instance.
+--
+-- /Note:/ this function should correctly execute both intra-group
+-- evacuations (in all modes) and inter-group evacuations (in the
+-- 'ChangeAll' mode). Of course, this requires that the correct list
+-- of target nodes is passed.
nodeEvacInstance :: Node.List -- ^ The node list (cluster-wide)
-> Instance.List -- ^ Instance list (cluster-wide)
-> EvacMode -- ^ The evacuation mode
-> Instance.Instance -- ^ The instance to be evacuated
+ -> Gdx -- ^ The group we're targetting
-> [Ndx] -- ^ The list of available nodes
-- for allocation
-> Result (Node.List, Instance.List, [OpCodes.OpCode])
nodeEvacInstance _ _ mode (Instance.Instance
- {Instance.diskTemplate = dt@DTDiskless}) _ =
+ {Instance.diskTemplate = dt@DTDiskless}) _ _ =
failOnSecondaryChange mode dt >>
fail "Diskless relocations not implemented yet"
nodeEvacInstance _ _ _ (Instance.Instance
- {Instance.diskTemplate = DTPlain}) _ =
+ {Instance.diskTemplate = DTPlain}) _ _ =
fail "Instances of type plain cannot be relocated"
nodeEvacInstance _ _ _ (Instance.Instance
- {Instance.diskTemplate = DTFile}) _ =
+ {Instance.diskTemplate = DTFile}) _ _ =
fail "Instances of type file cannot be relocated"
nodeEvacInstance _ _ mode (Instance.Instance
- {Instance.diskTemplate = dt@DTSharedFile}) _ =
+ {Instance.diskTemplate = dt@DTSharedFile}) _ _ =
failOnSecondaryChange mode dt >>
fail "Shared file relocations not implemented yet"
nodeEvacInstance _ _ mode (Instance.Instance
- {Instance.diskTemplate = dt@DTBlock}) _ =
+ {Instance.diskTemplate = dt@DTBlock}) _ _ =
failOnSecondaryChange mode dt >>
fail "Block device relocations not implemented yet"
nodeEvacInstance nl il ChangePrimary
- inst@(Instance.Instance {Instance.diskTemplate = DTDrbd8}) _ =
+ inst@(Instance.Instance {Instance.diskTemplate = DTDrbd8})
+ _ _ =
do
(nl', inst', _, _) <- opToResult $ applyMove nl inst Failover
let idx = Instance.idx inst
nodeEvacInstance nl il ChangeSecondary
inst@(Instance.Instance {Instance.diskTemplate = DTDrbd8})
- avail_nodes =
+ gdx avail_nodes =
do
- let gdx = instancePriGroup nl inst
(nl', inst', _, ndx) <- annotateResult "Can't find any good node" $
eitherToResult $
foldl' (evacDrbdSecondaryInner nl inst gdx)
ops = iMoveToJob nl' il' idx (ReplaceSecondary ndx)
return (nl', il', ops)
+-- The algorithm for ChangeAll is as follows:
+--
+-- * generate all (primary, secondary) node pairs for the target groups
+-- * for each pair, execute the needed moves (r:s, f, r:s) and compute
+-- the final node list state and group score
+-- * select the best choice via a foldl that uses the same Either
+-- String solution as the ChangeSecondary mode
nodeEvacInstance nl il ChangeAll
inst@(Instance.Instance {Instance.diskTemplate = DTDrbd8})
- avail_nodes =
+ gdx avail_nodes =
do
- let primary = Container.find (Instance.pNode inst) nl
- idx = Instance.idx inst
- gdx = instancePriGroup nl inst
- no_nodes = Left "no nodes available"
- -- if the primary is offline, then we first failover
- (nl1, inst1, ops1) <-
- if Node.offline primary
- then do
- (nl', inst', _, _) <-
- annotateResult "Failing over to the secondary" $
- opToResult $ applyMove nl inst Failover
- return (nl', inst', [Failover])
- else return (nl, inst, [])
- -- we now need to execute a replace secondary to the future
- -- primary node
- (nl2, inst2, _, new_pdx) <- annotateResult "Searching for a new primary" $
- eitherToResult $
- foldl' (evacDrbdSecondaryInner nl1 inst1 gdx)
- no_nodes avail_nodes
- let ops2 = ReplaceSecondary new_pdx:ops1
- -- since we chose the new primary, we remove it from the list of
- -- available nodes
- let avail_nodes_sec = new_pdx `delete` avail_nodes
- -- we now execute another failover, the primary stays fixed now
- (nl3, inst3, _, _) <- annotateResult "Failing over to new primary" $
- opToResult $ applyMove nl2 inst2 Failover
- let ops3 = Failover:ops2
- -- and finally another replace secondary, to the final secondary
- (nl4, inst4, _, new_sdx) <-
- annotateResult "Searching for a new secondary" $
+ let no_nodes = Left "no nodes available"
+ node_pairs = [(p,s) | p <- avail_nodes, s <- avail_nodes, p /= s]
+ (nl', il', ops, _) <-
+ annotateResult "Can't find any good nodes for relocation" $
eitherToResult $
- foldl' (evacDrbdSecondaryInner nl3 inst3 gdx) no_nodes avail_nodes_sec
- let ops4 = ReplaceSecondary new_sdx:ops3
- il' = Container.add idx inst4 il
- ops = concatMap (iMoveToJob nl4 il' idx) $ reverse ops4
- return (nl4, il', ops)
+ foldl'
+ (\accu nodes -> case evacDrbdAllInner nl il inst gdx nodes of
+ Bad msg ->
+ case accu of
+ Right _ -> accu
+ -- we don't need more details (which
+ -- nodes, etc.) as we only selected
+ -- this group if we can allocate on
+ -- it, hence failures will not
+ -- propagate out of this fold loop
+ Left _ -> Left $ "Allocation failed: " ++ msg
+ Ok result@(_, _, _, new_cv) ->
+ let new_accu = Right result in
+ case accu of
+ Left _ -> new_accu
+ Right (_, _, _, old_cv) ->
+ if old_cv < new_cv
+ then accu
+ else new_accu
+ ) no_nodes node_pairs
+
+ return (nl', il', ops)
-- | Inner fold function for changing secondary of a DRBD instance.
--
--- The "running" solution is either a @Left String@, which means we
+-- The running solution is either a @Left String@, which means we
-- don't have yet a working solution, or a @Right (...)@, which
-- represents a valid solution; it holds the modified node list, the
-- modified instance (after evacuation), the score of that solution,
-- The fromJust below is ugly (it can fail nastily), but
-- at this point we should have any internal mismatches,
-- and adding a monad here would be quite involved
- grpnodes = fromJust (gdx `lookup` (Node.computeGroups nodes))
+ grpnodes = fromJust (gdx `lookup` Node.computeGroups nodes)
new_cv = compCVNodes grpnodes
new_accu = Right (nl', inst', new_cv, ndx)
in case accu of
then accu
else new_accu
+-- | Compute result of changing all nodes of a DRBD instance.
+--
+-- Given the target primary and secondary node (which might be in a
+-- different group or not), this function will 'execute' all the
+-- required steps and assuming all operations succceed, will return
+-- the modified node and instance lists, the opcodes needed for this
+-- and the new group score.
+evacDrbdAllInner :: Node.List -- ^ Cluster node list
+ -> Instance.List -- ^ Cluster instance list
+ -> Instance.Instance -- ^ The instance to be moved
+ -> Gdx -- ^ The target group index
+ -- (which can differ from the
+ -- current group of the
+ -- instance)
+ -> (Ndx, Ndx) -- ^ Tuple of new
+ -- primary\/secondary nodes
+ -> Result (Node.List, Instance.List, [OpCodes.OpCode], Score)
+evacDrbdAllInner nl il inst gdx (t_pdx, t_sdx) =
+ do
+ let primary = Container.find (Instance.pNode inst) nl
+ idx = Instance.idx inst
+ -- if the primary is offline, then we first failover
+ (nl1, inst1, ops1) <-
+ if Node.offline primary
+ then do
+ (nl', inst', _, _) <-
+ annotateResult "Failing over to the secondary" $
+ opToResult $ applyMove nl inst Failover
+ return (nl', inst', [Failover])
+ else return (nl, inst, [])
+ let (o1, o2, o3) = (ReplaceSecondary t_pdx,
+ Failover,
+ ReplaceSecondary t_sdx)
+ -- we now need to execute a replace secondary to the future
+ -- primary node
+ (nl2, inst2, _, _) <-
+ annotateResult "Changing secondary to new primary" $
+ opToResult $
+ applyMove nl1 inst1 o1
+ let ops2 = o1:ops1
+ -- we now execute another failover, the primary stays fixed now
+ (nl3, inst3, _, _) <- annotateResult "Failing over to new primary" $
+ opToResult $ applyMove nl2 inst2 o2
+ let ops3 = o2:ops2
+ -- and finally another replace secondary, to the final secondary
+ (nl4, inst4, _, _) <-
+ annotateResult "Changing secondary to final secondary" $
+ opToResult $
+ applyMove nl3 inst3 o3
+ let ops4 = o3:ops3
+ il' = Container.add idx inst4 il
+ ops = concatMap (iMoveToJob nl4 il' idx) $ reverse ops4
+ let nodes = Container.elems nl4
+ -- The fromJust below is ugly (it can fail nastily), but
+ -- at this point we should have any internal mismatches,
+ -- and adding a monad here would be quite involved
+ grpnodes = fromJust (gdx `lookup` Node.computeGroups nodes)
+ new_cv = compCVNodes grpnodes
+ return (nl4, il', ops, new_cv)
+
-- | Computes the nodes in a given group which are available for
-- allocation.
availableGroupNodes :: [(Gdx, [Ndx])] -- ^ Group index/node index assoc list
-- | Updates the evac solution with the results of an instance
-- evacuation.
updateEvacSolution :: (Node.List, Instance.List, EvacSolution)
- -> Instance.Instance
+ -> Idx
-> Result (Node.List, Instance.List, [OpCodes.OpCode])
-> (Node.List, Instance.List, EvacSolution)
-updateEvacSolution (nl, il, es) inst (Bad msg) =
- (nl, il, es { esFailed = (Instance.name inst ++ ": " ++ msg):esFailed es})
-updateEvacSolution (_, _, es) inst (Ok (nl, il, opcodes)) =
- (nl, il, es { esMoved = Instance.name inst:esMoved es
+updateEvacSolution (nl, il, es) idx (Bad msg) =
+ (nl, il, es { esFailed = (idx, msg):esFailed es})
+updateEvacSolution (_, _, es) idx (Ok (nl, il, opcodes)) =
+ (nl, il, es { esMoved = new_elem:esMoved es
, esOpCodes = [opcodes]:esOpCodes es })
+ where inst = Container.find idx il
+ new_elem = (idx,
+ instancePriGroup nl inst,
+ Instance.allNodes inst)
-- | Node-evacuation IAllocator mode main function.
tryNodeEvac :: Group.List -- ^ The cluster groups
-> Instance.List -- ^ Instance list (cluster-wide)
-> EvacMode -- ^ The evacuation mode
-> [Idx] -- ^ List of instance (indices) to be evacuated
- -> Result EvacSolution
+ -> Result (Node.List, Instance.List, EvacSolution)
tryNodeEvac _ ini_nl ini_il mode idxs =
let evac_ndx = nodesToEvacuate ini_il mode idxs
offline = map Node.idx . filter Node.offline $ Container.elems ini_nl
group_ndx = map (\(gdx, (nl, _)) -> (gdx, map Node.idx
(Container.elems nl))) $
splitCluster ini_nl ini_il
- (_, _, esol) =
+ (fin_nl, fin_il, esol) =
foldl' (\state@(nl, il, _) inst ->
- updateEvacSolution state inst $
+ let gdx = instancePriGroup nl inst
+ pdx = Instance.pNode inst in
+ updateEvacSolution state (Instance.idx inst) $
availableGroupNodes group_ndx
- excl_ndx (instancePriGroup nl inst) >>=
- nodeEvacInstance nl il mode inst
+ (IntSet.insert pdx excl_ndx) gdx >>=
+ nodeEvacInstance nl il mode inst gdx
)
(ini_nl, ini_il, emptyEvacSolution)
(map (`Container.find` ini_il) idxs)
- in return $ reverseEvacSolution esol
+ in return (fin_nl, fin_il, reverseEvacSolution esol)
+
+-- | Change-group IAllocator mode main function.
+--
+-- This is very similar to 'tryNodeEvac', the only difference is that
+-- we don't choose as target group the current instance group, but
+-- instead:
+--
+-- 1. at the start of the function, we compute which are the target
+-- groups; either no groups were passed in, in which case we choose
+-- all groups out of which we don't evacuate instance, or there were
+-- some groups passed, in which case we use those
+--
+-- 2. for each instance, we use 'findBestAllocGroup' to choose the
+-- best group to hold the instance, and then we do what
+-- 'tryNodeEvac' does, except for this group instead of the current
+-- instance group.
+--
+-- Note that the correct behaviour of this function relies on the
+-- function 'nodeEvacInstance' to be able to do correctly both
+-- intra-group and inter-group moves when passed the 'ChangeAll' mode.
+tryChangeGroup :: Group.List -- ^ The cluster groups
+ -> Node.List -- ^ The node list (cluster-wide)
+ -> Instance.List -- ^ Instance list (cluster-wide)
+ -> [Gdx] -- ^ Target groups; if empty, any
+ -- groups not being evacuated
+ -> [Idx] -- ^ List of instance (indices) to be evacuated
+ -> Result (Node.List, Instance.List, EvacSolution)
+tryChangeGroup gl ini_nl ini_il gdxs idxs =
+ let evac_gdxs = nub $ map (instancePriGroup ini_nl .
+ flip Container.find ini_il) idxs
+ target_gdxs = (if null gdxs
+ then Container.keys gl
+ else gdxs) \\ evac_gdxs
+ offline = map Node.idx . filter Node.offline $ Container.elems ini_nl
+ excl_ndx = foldl' (flip IntSet.insert) IntSet.empty offline
+ group_ndx = map (\(gdx, (nl, _)) -> (gdx, map Node.idx
+ (Container.elems nl))) $
+ splitCluster ini_nl ini_il
+ (fin_nl, fin_il, esol) =
+ foldl' (\state@(nl, il, _) inst ->
+ let solution = do
+ let ncnt = Instance.requiredNodes $
+ Instance.diskTemplate inst
+ (gdx, _, _) <- findBestAllocGroup gl nl il
+ (Just target_gdxs) inst ncnt
+ av_nodes <- availableGroupNodes group_ndx
+ excl_ndx gdx
+ nodeEvacInstance nl il ChangeAll inst
+ gdx av_nodes
+ in updateEvacSolution state
+ (Instance.idx inst) solution
+ )
+ (ini_nl, ini_il, emptyEvacSolution)
+ (map (`Container.find` ini_il) idxs)
+ in return (fin_nl, fin_il, reverseEvacSolution esol)
-- | Recursively place instances on the cluster until we're out of space.
iterateAlloc :: Node.List
newlimit = fmap (flip (-) 1) limit
in case tryAlloc nl il newi2 allocnodes of
Bad s -> Bad s
- Ok (AllocSolution { asFailures = errs, asSolutions = sols3 }) ->
+ Ok (AllocSolution { asFailures = errs, asSolution = sols3 }) ->
let newsol = Ok (collapseFailures errs, nl, il, ixes, cstats) in
case sols3 of
- [] -> newsol
- (xnl, xi, _, _):[] ->
+ Nothing -> newsol
+ Just (xnl, xi, _, _) ->
if limit == Just 0
then newsol
else iterateAlloc xnl (Container.add newidx xi il)
newlimit newinst allocnodes (xi:ixes)
(totalResources xnl:cstats)
- _ -> Bad "Internal error: multiple solutions for single\
- \ allocation"
-- | The core of the tiered allocation mode.
tieredAlloc :: Node.List
Ok newinst' -> tieredAlloc nl' il' newlimit
newinst' allocnodes ixes' cstats'
--- | Compute the tiered spec string description from a list of
--- allocated instances.
-tieredSpecMap :: [Instance.Instance]
- -> [String]
-tieredSpecMap trl_ixes =
- let fin_trl_ixes = reverse trl_ixes
- ix_byspec = groupBy ((==) `on` Instance.specOf) fin_trl_ixes
- spec_map = map (\ixs -> (Instance.specOf $ head ixs, length ixs))
- ix_byspec
- in map (\(spec, cnt) -> printf "%d,%d,%d=%d" (rspecMem spec)
- (rspecDsk spec) (rspecCpu spec) cnt) spec_map
-
-- * Formatting functions
-- | Given the original and final nodes, computes the relocation description.
cmds)
-- | Return the instance and involved nodes in an instance move.
-involvedNodes :: Instance.List -> Placement -> [Ndx]
+--
+-- Note that the output list length can vary, and is not required nor
+-- guaranteed to be of any specific length.
+involvedNodes :: Instance.List -- ^ Instance list, used for retrieving
+ -- the instance from its index; note
+ -- that this /must/ be the original
+ -- instance list, so that we can
+ -- retrieve the old nodes
+ -> Placement -- ^ The placement we're investigating,
+ -- containing the new nodes and
+ -- instance index
+ -> [Ndx] -- ^ Resulting list of node indices
involvedNodes il plc =
let (i, np, ns, _, _) = plc
inst = Container.find i il
- op = Instance.pNode inst
- os = Instance.sNode inst
- in nub [np, ns, op, os]
+ in nub $ [np, ns] ++ Instance.allNodes inst
-- | Inner function for splitJobs, that either appends the next job to
-- the current jobset, or starts a new jobset.
in intercalate ", " formatted
-- | Convert a placement into a list of OpCodes (basically a job).
-iMoveToJob :: Node.List -> Instance.List
- -> Idx -> IMove -> [OpCodes.OpCode]
+iMoveToJob :: Node.List -- ^ The node list; only used for node
+ -- names, so any version is good
+ -- (before or after the operation)
+ -> Instance.List -- ^ The instance list; also used for
+ -- names only
+ -> Idx -- ^ The index of the instance being
+ -- moved
+ -> IMove -- ^ The actual move to be described
+ -> [OpCodes.OpCode] -- ^ The list of opcodes equivalent to
+ -- the given move
iMoveToJob nl il idx move =
let inst = Container.find idx il
iname = Instance.name inst
lookNode = Just . Container.nameOf nl
- opF = OpCodes.OpInstanceMigrate iname True False True
+ opF = OpCodes.OpInstanceMigrate iname True False True Nothing
opR n = OpCodes.OpInstanceReplaceDisks iname (lookNode n)
OpCodes.ReplaceNewSecondary [] Nothing
in case move of
instances = Container.filter ((`elem` nidxs) . Instance.pNode) il
in (guuid, (Container.fromList nodes', instances))) ngroups
--- | Split a global instance index map into per-group, and associate
--- it with the group/node/instance lists.
-associateIdxs :: [Idx] -- ^ Instance indices to be split/associated
- -> [(Gdx, (Node.List, Instance.List))] -- ^ Input groups
- -> [(Gdx, (Node.List, Instance.List, [Idx]))] -- ^ Result
-associateIdxs idxs =
- map (\(gdx, (nl, il)) ->
- (gdx, (nl, il, filter (`Container.member` il) idxs)))
-
-- | Compute the list of nodes that are to be evacuated, given a list
-- of instances and an evacuation mode.
nodesToEvacuate :: Instance.List -- ^ The cluster-wide instance list