{-| Implementation of cluster-wide logic.
This module holds all pure cluster-logic; I\/O related functionality
-goes into the "Main" module for the individual binaries.
+goes into the /Main/ module for the individual binaries.
-}
(
-- * Types
AllocSolution(..)
+ , EvacSolution(..)
, Table(..)
, CStats(..)
, AllocStats
, doNextBalance
, tryBalance
, compCV
+ , compCVNodes
, compDetailedCV
, printStats
, iMoveToJob
, tryMGReloc
, tryEvac
, tryMGEvac
+ , tryNodeEvac
, collapseFailures
-- * Allocation functions
, iterateAlloc
) where
import Data.Function (on)
+import qualified Data.IntSet as IntSet
import Data.List
+import Data.Maybe (fromJust)
import Data.Ord (comparing)
import Text.Printf (printf)
import Control.Monad
, asLog :: [String] -- ^ A list of informational messages
}
+-- | Node evacuation/group change iallocator result type. This result
+-- type consists of actual opcodes (a restricted subset) that are
+-- transmitted back to Ganeti.
+data EvacSolution = EvacSolution
+ { esMoved :: [String] -- ^ Instance moved successfully
+ , esFailed :: [String] -- ^ Instance which were not
+ -- relocated
+ , esOpCodes :: [[[OpCodes.OpCode]]] -- ^ List of lists of jobs
+ }
+
-- | Allocation results, as used in 'iterateAlloc' and 'tieredAlloc'.
type AllocResult = (FailStats, Node.List, Instance.List,
[Instance.Instance], [CStats])
-
-- | A type denoting the valid allocation mode/pairs.
+--
-- For a one-node allocation, this will be a @Left ['Node.Node']@,
-- whereas for a two-node allocation, this will be a @Right
-- [('Node.Node', 'Node.Node')]@.
type AllocNodes = Either [Ndx] [(Ndx, Ndx)]
--- | The empty solution we start with when computing allocations
-emptySolution :: AllocSolution
-emptySolution = AllocSolution { asFailures = [], asAllocs = 0
- , asSolutions = [], asLog = [] }
+-- | The empty solution we start with when computing allocations.
+emptyAllocSolution :: AllocSolution
+emptyAllocSolution = AllocSolution { asFailures = [], asAllocs = 0
+ , asSolutions = [], asLog = [] }
--- | The complete state for the balancing solution
+-- | The empty evac solution.
+emptyEvacSolution :: EvacSolution
+emptyEvacSolution = EvacSolution { esMoved = []
+ , esFailed = []
+ , esOpCodes = []
+ }
+
+-- | The complete state for the balancing solution.
data Table = Table Node.List Instance.List Score [Placement]
deriving (Show, Read)
-data CStats = CStats { csFmem :: Int -- ^ Cluster free mem
- , csFdsk :: Int -- ^ Cluster free disk
- , csAmem :: Int -- ^ Cluster allocatable mem
- , csAdsk :: Int -- ^ Cluster allocatable disk
- , csAcpu :: Int -- ^ Cluster allocatable cpus
- , csMmem :: Int -- ^ Max node allocatable mem
- , csMdsk :: Int -- ^ Max node allocatable disk
- , csMcpu :: Int -- ^ Max node allocatable cpu
- , csImem :: Int -- ^ Instance used mem
- , csIdsk :: Int -- ^ Instance used disk
- , csIcpu :: Int -- ^ Instance used cpu
- , csTmem :: Double -- ^ Cluster total mem
- , csTdsk :: Double -- ^ Cluster total disk
- , csTcpu :: Double -- ^ Cluster total cpus
- , csVcpu :: Int -- ^ Cluster virtual cpus (if
- -- node pCpu has been set,
- -- otherwise -1)
- , csXmem :: Int -- ^ Unnacounted for mem
- , csNmem :: Int -- ^ Node own memory
- , csScore :: Score -- ^ The cluster score
- , csNinst :: Int -- ^ The total number of instances
+data CStats = CStats { csFmem :: Integer -- ^ Cluster free mem
+ , csFdsk :: Integer -- ^ Cluster free disk
+ , csAmem :: Integer -- ^ Cluster allocatable mem
+ , csAdsk :: Integer -- ^ Cluster allocatable disk
+ , csAcpu :: Integer -- ^ Cluster allocatable cpus
+ , csMmem :: Integer -- ^ Max node allocatable mem
+ , csMdsk :: Integer -- ^ Max node allocatable disk
+ , csMcpu :: Integer -- ^ Max node allocatable cpu
+ , csImem :: Integer -- ^ Instance used mem
+ , csIdsk :: Integer -- ^ Instance used disk
+ , csIcpu :: Integer -- ^ Instance used cpu
+ , csTmem :: Double -- ^ Cluster total mem
+ , csTdsk :: Double -- ^ Cluster total disk
+ , csTcpu :: Double -- ^ Cluster total cpus
+ , csVcpu :: Integer -- ^ Cluster virtual cpus (if
+ -- node pCpu has been set,
+ -- otherwise -1)
+ , csXmem :: Integer -- ^ Unnacounted for mem
+ , csNmem :: Integer -- ^ Node own memory
+ , csScore :: Score -- ^ The cluster score
+ , csNinst :: Int -- ^ The total number of instances
}
deriving (Show, Read)
--- | Currently used, possibly to allocate, unallocable
+-- | Currently used, possibly to allocate, unallocable.
type AllocStats = (RSpec, RSpec, RSpec)
-- * Utility functions
in
(bad_nodes, bad_instances)
--- | Zero-initializer for the CStats type
+-- | Zero-initializer for the CStats type.
emptyCStats :: CStats
emptyCStats = CStats 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
--- | Update stats with data from a new node
+-- | Update stats with data from a new node.
updateCStats :: CStats -> Node.Node -> CStats
updateCStats cs node =
let CStats { csFmem = x_fmem, csFdsk = x_fdsk,
inc_vcpu = Node.hiCpu node
inc_acpu = Node.availCpu node
- in cs { csFmem = x_fmem + Node.fMem node
- , csFdsk = x_fdsk + Node.fDsk node
- , csAmem = x_amem + inc_amem'
- , csAdsk = x_adsk + inc_adsk
- , csAcpu = x_acpu + inc_acpu
- , csMmem = max x_mmem inc_amem'
- , csMdsk = max x_mdsk inc_adsk
- , csMcpu = max x_mcpu inc_acpu
- , csImem = x_imem + inc_imem
- , csIdsk = x_idsk + inc_idsk
- , csIcpu = x_icpu + inc_icpu
+ in cs { csFmem = x_fmem + fromIntegral (Node.fMem node)
+ , csFdsk = x_fdsk + fromIntegral (Node.fDsk node)
+ , csAmem = x_amem + fromIntegral inc_amem'
+ , csAdsk = x_adsk + fromIntegral inc_adsk
+ , csAcpu = x_acpu + fromIntegral inc_acpu
+ , csMmem = max x_mmem (fromIntegral inc_amem')
+ , csMdsk = max x_mdsk (fromIntegral inc_adsk)
+ , csMcpu = max x_mcpu (fromIntegral inc_acpu)
+ , csImem = x_imem + fromIntegral inc_imem
+ , csIdsk = x_idsk + fromIntegral inc_idsk
+ , csIcpu = x_icpu + fromIntegral inc_icpu
, csTmem = x_tmem + Node.tMem node
, csTdsk = x_tdsk + Node.tDsk node
, csTcpu = x_tcpu + Node.tCpu node
- , csVcpu = x_vcpu + inc_vcpu
- , csXmem = x_xmem + Node.xMem node
- , csNmem = x_nmem + Node.nMem node
+ , csVcpu = x_vcpu + fromIntegral inc_vcpu
+ , csXmem = x_xmem + fromIntegral (Node.xMem node)
+ , csNmem = x_nmem + fromIntegral (Node.nMem node)
, csNinst = x_ninst + length (Node.pList node)
}
let CStats {csImem = i_imem, csIdsk = i_idsk, csIcpu = i_icpu} = cini
CStats {csImem = f_imem, csIdsk = f_idsk, csIcpu = f_icpu,
csTmem = t_mem, csTdsk = t_dsk, csVcpu = v_cpu } = cfin
- rini = RSpec i_icpu i_imem i_idsk
- rfin = RSpec (f_icpu - i_icpu) (f_imem - i_imem) (f_idsk - i_idsk)
- un_cpu = v_cpu - f_icpu
- runa = RSpec un_cpu (truncate t_mem - f_imem) (truncate t_dsk - f_idsk)
+ rini = RSpec (fromIntegral i_icpu) (fromIntegral i_imem)
+ (fromIntegral i_idsk)
+ rfin = RSpec (fromIntegral (f_icpu - i_icpu))
+ (fromIntegral (f_imem - i_imem))
+ (fromIntegral (f_idsk - i_idsk))
+ un_cpu = fromIntegral (v_cpu - f_icpu)::Int
+ runa = RSpec un_cpu (truncate t_mem - fromIntegral f_imem)
+ (truncate t_dsk - fromIntegral f_idsk)
in (rini, rfin, runa)
--- | The names and weights of the individual elements in the CV list
+-- | The names and weights of the individual elements in the CV list.
detailedCVInfo :: [(Double, String)]
detailedCVInfo = [ (1, "free_mem_cv")
, (1, "free_disk_cv")
detailedCVWeights = map fst detailedCVInfo
-- | Compute the mem and disk covariance.
-compDetailedCV :: Node.List -> [Double]
-compDetailedCV nl =
+compDetailedCV :: [Node.Node] -> [Double]
+compDetailedCV all_nodes =
let
- all_nodes = Container.elems nl
(offline, nodes) = partition Node.offline all_nodes
mem_l = map Node.pMem nodes
dsk_l = map Node.pDsk nodes
, pri_tags_score ]
-- | Compute the /total/ variance.
+compCVNodes :: [Node.Node] -> Double
+compCVNodes = sum . zipWith (*) detailedCVWeights . compDetailedCV
+
+-- | Wrapper over 'compCVNodes' for callers that have a 'Node.List'.
compCV :: Node.List -> Double
-compCV = sum . zipWith (*) detailedCVWeights . compDetailedCV
+compCV = compCVNodes . Container.elems
+
--- | Compute online nodes from a Node.List
+-- | Compute online nodes from a 'Node.List'.
getOnline :: Node.List -> [Node.Node]
getOnline = filter (not . Node.offline) . Container.elems
--- * hbal functions
+-- * Balancing functions
-- | Compute best table. Note that the ordering of the arguments is important.
compareTables :: Table -> Table -> Table
then ini_tbl -- no advancement
else best_tbl
--- | Check if we are allowed to go deeper in the balancing
+-- | Check if we are allowed to go deeper in the balancing.
doNextBalance :: Table -- ^ The starting table
-> Int -- ^ Remaining length
-> Score -- ^ Score at which to stop
ini_plc_len = length ini_plc
in (max_rounds < 0 || ini_plc_len < max_rounds) && ini_cv > min_score
--- | Run a balance move
+-- | Run a balance move.
tryBalance :: Table -- ^ The starting table
-> Bool -- ^ Allow disk moves
-> Bool -- ^ Allow instance moves
-- * Allocation functions
--- | Build failure stats out of a list of failures
+-- | Build failure stats out of a list of failures.
collapseFailures :: [FailMode] -> FailStats
collapseFailures flst =
- map (\k -> (k, length $ filter (k ==) flst)) [minBound..maxBound]
+ map (\k -> (k, foldl' (\a e -> if e == k then a + 1 else a) 0 flst))
+ [minBound..maxBound]
-- | Update current Allocation solution and failure stats with new
--- elements
+-- elements.
concatAllocs :: AllocSolution -> OpResult Node.AllocElement -> AllocSolution
concatAllocs as (OpFail reason) = as { asFailures = reason : asFailures as }
sumAllocs (AllocSolution af aa as al) (AllocSolution bf ba bs bl) =
AllocSolution (af ++ bf) (aa + ba) (as ++ bs) (al ++ bl)
--- | Given a solution, generates a reasonable description for it
+-- | Given a solution, generates a reasonable description for it.
describeSolution :: AllocSolution -> String
describeSolution as =
let fcnt = asFailures as
" for node(s) %s") cv (asAllocs as) (length fcnt) freasons
(intercalate "/" . map Node.name $ nodes)
--- | Annotates a solution with the appropriate string
+-- | Annotates a solution with the appropriate string.
annotateSolution :: AllocSolution -> AllocSolution
annotateSolution as = as { asLog = describeSolution as : asLog as }
+-- | Reverses an evacuation solution.
+--
+-- Rationale: we always concat the results to the top of the lists, so
+-- for proper jobset execution, we should reverse all lists.
+reverseEvacSolution :: EvacSolution -> EvacSolution
+reverseEvacSolution (EvacSolution f m o) =
+ EvacSolution (reverse f) (reverse m) (reverse o)
+
-- | Generate the valid node allocation singles or pairs for a new instance.
genAllocNodes :: Group.List -- ^ Group list
-> Node.List -- ^ The node map
tryAlloc nl _ inst (Right ok_pairs) =
let sols = foldl' (\cstate (p, s) ->
concatAllocs cstate $ allocateOnPair nl inst p s
- ) emptySolution ok_pairs
+ ) emptyAllocSolution ok_pairs
in if null ok_pairs -- means we have just one node
then fail "Not enough online nodes"
tryAlloc nl _ inst (Left all_nodes) =
let sols = foldl' (\cstate ->
concatAllocs cstate . allocateOnSingle nl inst
- ) emptySolution all_nodes
+ ) emptyAllocSolution all_nodes
in if null all_nodes
then fail "No online nodes"
else return $ annotateSolution sols
--- | Given a group/result, describe it as a nice (list of) messages
+-- | Given a group/result, describe it as a nice (list of) messages.
solutionDescription :: Group.List -> (Gdx, Result AllocSolution) -> [String]
solutionDescription gl (groupId, result) =
case result of
pol = apolToString (Group.allocPolicy grp)
-- | From a list of possibly bad and possibly empty solutions, filter
--- only the groups with a valid result
+-- only the groups with a valid result. Note that the result will be
+-- reversed compared to the original list.
filterMGResults :: Group.List
-> [(Gdx, Result AllocSolution)]
-> [(Gdx, AllocSolution)]
-filterMGResults gl=
- filter ((/= AllocUnallocable) . Group.allocPolicy .
- flip Container.find gl . fst) .
- filter (not . null . asSolutions . snd) .
- map (\(y, Ok x) -> (y, x)) .
- filter (isOk . snd)
-
--- | Sort multigroup results based on policy and score
+filterMGResults gl = foldl' fn []
+ where unallocable = not . Group.isAllocable . flip Container.find gl
+ fn accu (gdx, rasol) =
+ case rasol of
+ Bad _ -> accu
+ Ok sol | null (asSolutions sol) -> accu
+ | unallocable gdx -> accu
+ | otherwise -> (gdx, sol):accu
+
+-- | Sort multigroup results based on policy and score.
sortMGResults :: Group.List
-> [(Gdx, AllocSolution)]
-> [(Gdx, AllocSolution)]
return (mnl, i, [Container.find x mnl],
compCV mnl)
in concatAllocs cstate em
- ) emptySolution valid_idxes
+ ) emptyAllocSolution valid_idxes
in return sols1
tryReloc _ _ _ reqn _ = fail $ "Unsupported number of relocation \
Just v -> return v
tryReloc nl il xid ncount ex_ndx
--- | Change an instance's secondary node
+-- | Change an instance's secondary node.
evacInstance :: (Monad m) =>
[Ndx] -- ^ Excluded nodes
-> Instance.List -- ^ The current instance list
-> [Ndx] -- ^ Restricted nodes (the ones being evacuated)
-> m AllocSolution -- ^ Solution list
tryEvac nl il idxs ex_ndx = do
- (_, sol) <- foldM (evacInstance ex_ndx il) (nl, emptySolution) idxs
+ (_, sol) <- foldM (evacInstance ex_ndx il) (nl, emptyAllocSolution) idxs
return sol
-- | Multi-group evacuation of a list of nodes.
tryMGEvac _ nl il ex_ndx =
let ex_nodes = map (`Container.find` nl) ex_ndx
all_insts = nub . concatMap Node.sList $ ex_nodes
- gni = splitCluster nl il
- -- we run the instance index list through a couple of maps to
- -- get finally to a structure of the type [(group index,
- -- [instance indices])]
- all_insts' = map (\idx ->
- (instancePriGroup nl (Container.find idx il),
- idx)) all_insts
- all_insts'' = groupBy ((==) `on` fst) all_insts'
- all_insts3 = map (\xs -> let (gdxs, idxs) = unzip xs
- in (head gdxs, idxs)) all_insts''
+ all_insts' = associateIdxs all_insts $ splitCluster nl il
in do
- -- that done, we now add the per-group nl/il to the tuple
- all_insts4 <-
- mapM (\(gdx, idxs) ->
- case lookup gdx gni of
- Nothing -> fail $ "Can't find group index " ++ show gdx
- Just (gnl, gil) -> return (gdx, gnl, gil, idxs))
- all_insts3
- results <- mapM (\(_, gnl, gil, idxs) -> tryEvac gnl gil idxs ex_ndx)
- all_insts4
- let sol = foldl' sumAllocs emptySolution results
+ results <- mapM (\(_, (gnl, gil, idxs)) -> tryEvac gnl gil idxs ex_ndx)
+ all_insts'
+ let sol = foldl' sumAllocs emptyAllocSolution results
return $ annotateSolution sol
--- | Recursively place instances on the cluster until we're out of space
+-- | Function which fails if the requested mode is change secondary.
+--
+-- This is useful since except DRBD, no other disk template can
+-- execute change secondary; thus, we can just call this function
+-- instead of always checking for secondary mode. After the call to
+-- this function, whatever mode we have is just a primary change.
+failOnSecondaryChange :: (Monad m) => EvacMode -> DiskTemplate -> m ()
+failOnSecondaryChange ChangeSecondary dt =
+ fail $ "Instances with disk template '" ++ dtToString dt ++
+ "' can't execute change secondary"
+failOnSecondaryChange _ _ = return ()
+
+-- | Run evacuation for a single instance.
+nodeEvacInstance :: Node.List -- ^ The node list (cluster-wide)
+ -> Instance.List -- ^ Instance list (cluster-wide)
+ -> EvacMode -- ^ The evacuation mode
+ -> Instance.Instance -- ^ The instance to be evacuated
+ -> [Ndx] -- ^ The list of available nodes
+ -- for allocation
+ -> Result (Node.List, Instance.List, [OpCodes.OpCode])
+nodeEvacInstance _ _ mode (Instance.Instance
+ {Instance.diskTemplate = dt@DTDiskless}) _ =
+ failOnSecondaryChange mode dt >>
+ fail "Diskless relocations not implemented yet"
+
+nodeEvacInstance _ _ _ (Instance.Instance
+ {Instance.diskTemplate = DTPlain}) _ =
+ fail "Instances of type plain cannot be relocated"
+
+nodeEvacInstance _ _ _ (Instance.Instance
+ {Instance.diskTemplate = DTFile}) _ =
+ fail "Instances of type file cannot be relocated"
+
+nodeEvacInstance _ _ mode (Instance.Instance
+ {Instance.diskTemplate = dt@DTSharedFile}) _ =
+ failOnSecondaryChange mode dt >>
+ fail "Shared file relocations not implemented yet"
+
+nodeEvacInstance _ _ mode (Instance.Instance
+ {Instance.diskTemplate = dt@DTBlock}) _ =
+ failOnSecondaryChange mode dt >>
+ fail "Block device relocations not implemented yet"
+
+nodeEvacInstance nl il ChangePrimary
+ inst@(Instance.Instance {Instance.diskTemplate = DTDrbd8}) _ =
+ do
+ (nl', inst', _, _) <- opToResult $ applyMove nl inst Failover
+ let idx = Instance.idx inst
+ il' = Container.add idx inst' il
+ ops = iMoveToJob nl' il' idx Failover
+ return (nl', il', ops)
+
+nodeEvacInstance nl il ChangeSecondary
+ inst@(Instance.Instance {Instance.diskTemplate = DTDrbd8})
+ avail_nodes =
+ do
+ let gdx = instancePriGroup nl inst
+ (nl', inst', _, ndx) <- annotateResult "Can't find any good node" $
+ eitherToResult $
+ foldl' (evacDrbdSecondaryInner nl inst gdx)
+ (Left "no nodes available") avail_nodes
+ let idx = Instance.idx inst
+ il' = Container.add idx inst' il
+ ops = iMoveToJob nl' il' idx (ReplaceSecondary ndx)
+ return (nl', il', ops)
+
+nodeEvacInstance nl il ChangeAll
+ inst@(Instance.Instance {Instance.diskTemplate = DTDrbd8})
+ avail_nodes =
+ do
+ let primary = Container.find (Instance.pNode inst) nl
+ idx = Instance.idx inst
+ gdx = instancePriGroup nl inst
+ no_nodes = Left "no nodes available"
+ -- if the primary is offline, then we first failover
+ (nl1, inst1, ops1) <-
+ if Node.offline primary
+ then do
+ (nl', inst', _, _) <-
+ annotateResult "Failing over to the secondary" $
+ opToResult $ applyMove nl inst Failover
+ return (nl', inst', [Failover])
+ else return (nl, inst, [])
+ -- we now need to execute a replace secondary to the future
+ -- primary node
+ (nl2, inst2, _, new_pdx) <- annotateResult "Searching for a new primary" $
+ eitherToResult $
+ foldl' (evacDrbdSecondaryInner nl1 inst1 gdx)
+ no_nodes avail_nodes
+ let ops2 = ReplaceSecondary new_pdx:ops1
+ -- since we chose the new primary, we remove it from the list of
+ -- available nodes
+ let avail_nodes_sec = new_pdx `delete` avail_nodes
+ -- we now execute another failover, the primary stays fixed now
+ (nl3, inst3, _, _) <- annotateResult "Failing over to new primary" $
+ opToResult $ applyMove nl2 inst2 Failover
+ let ops3 = Failover:ops2
+ -- and finally another replace secondary, to the final secondary
+ (nl4, inst4, _, new_sdx) <-
+ annotateResult "Searching for a new secondary" $
+ eitherToResult $
+ foldl' (evacDrbdSecondaryInner nl3 inst3 gdx) no_nodes avail_nodes_sec
+ let ops4 = ReplaceSecondary new_sdx:ops3
+ il' = Container.add idx inst4 il
+ ops = concatMap (iMoveToJob nl4 il' idx) $ reverse ops4
+ return (nl4, il', ops)
+
+-- | Inner fold function for changing secondary of a DRBD instance.
+--
+-- The "running" solution is either a @Left String@, which means we
+-- don't have yet a working solution, or a @Right (...)@, which
+-- represents a valid solution; it holds the modified node list, the
+-- modified instance (after evacuation), the score of that solution,
+-- and the new secondary node index.
+evacDrbdSecondaryInner :: Node.List -- ^ Cluster node list
+ -> Instance.Instance -- ^ Instance being evacuated
+ -> Gdx -- ^ The group index of the instance
+ -> Either String ( Node.List
+ , Instance.Instance
+ , Score
+ , Ndx) -- ^ Current best solution
+ -> Ndx -- ^ Node we're evaluating as new secondary
+ -> Either String ( Node.List
+ , Instance.Instance
+ , Score
+ , Ndx) -- ^ New best solution
+evacDrbdSecondaryInner nl inst gdx accu ndx =
+ case applyMove nl inst (ReplaceSecondary ndx) of
+ OpFail fm ->
+ case accu of
+ Right _ -> accu
+ Left _ -> Left $ "Node " ++ Container.nameOf nl ndx ++
+ " failed: " ++ show fm
+ OpGood (nl', inst', _, _) ->
+ let nodes = Container.elems nl'
+ -- The fromJust below is ugly (it can fail nastily), but
+ -- at this point we should have any internal mismatches,
+ -- and adding a monad here would be quite involved
+ grpnodes = fromJust (gdx `lookup` (Node.computeGroups nodes))
+ new_cv = compCVNodes grpnodes
+ new_accu = Right (nl', inst', new_cv, ndx)
+ in case accu of
+ Left _ -> new_accu
+ Right (_, _, old_cv, _) ->
+ if old_cv < new_cv
+ then accu
+ else new_accu
+
+-- | Computes the local nodes of a given instance which are available
+-- for allocation.
+availableLocalNodes :: Node.List
+ -> [(Gdx, [Ndx])]
+ -> IntSet.IntSet
+ -> Instance.Instance
+ -> Result [Ndx]
+availableLocalNodes nl group_nodes excl_ndx inst = do
+ let gdx = instancePriGroup nl inst
+ local_nodes <- maybe (Bad $ "Can't find group with index " ++ show gdx)
+ Ok (lookup gdx group_nodes)
+ let avail_nodes = filter (not . flip IntSet.member excl_ndx) local_nodes
+ return avail_nodes
+
+-- | Updates the evac solution with the results of an instance
+-- evacuation.
+updateEvacSolution :: (Node.List, Instance.List, EvacSolution)
+ -> Instance.Instance
+ -> Result (Node.List, Instance.List, [OpCodes.OpCode])
+ -> (Node.List, Instance.List, EvacSolution)
+updateEvacSolution (nl, il, es) inst (Bad msg) =
+ (nl, il, es { esFailed = (Instance.name inst ++ ": " ++ msg):esFailed es})
+updateEvacSolution (_, _, es) inst (Ok (nl, il, opcodes)) =
+ (nl, il, es { esMoved = Instance.name inst:esMoved es
+ , esOpCodes = [opcodes]:esOpCodes es })
+
+-- | Node-evacuation IAllocator mode main function.
+tryNodeEvac :: Group.List -- ^ The cluster groups
+ -> Node.List -- ^ The node list (cluster-wide, not per group)
+ -> Instance.List -- ^ Instance list (cluster-wide)
+ -> EvacMode -- ^ The evacuation mode
+ -> [Idx] -- ^ List of instance (indices) to be evacuated
+ -> Result EvacSolution
+tryNodeEvac _ ini_nl ini_il mode idxs =
+ let evac_ndx = nodesToEvacuate ini_il mode idxs
+ offline = map Node.idx . filter Node.offline $ Container.elems ini_nl
+ excl_ndx = foldl' (flip IntSet.insert) evac_ndx offline
+ group_ndx = map (\(gdx, (nl, _)) -> (gdx, map Node.idx
+ (Container.elems nl))) $
+ splitCluster ini_nl ini_il
+ (_, _, esol) =
+ foldl' (\state@(nl, il, _) inst ->
+ updateEvacSolution state inst $
+ availableLocalNodes nl group_ndx excl_ndx inst >>=
+ nodeEvacInstance nl il mode inst
+ )
+ (ini_nl, ini_il, emptyEvacSolution)
+ (map (`Container.find` ini_il) idxs)
+ in return $ reverseEvacSolution esol
+
+-- | Recursively place instances on the cluster until we're out of space.
iterateAlloc :: Node.List
-> Instance.List
+ -> Maybe Int
-> Instance.Instance
-> AllocNodes
-> [Instance.Instance]
-> [CStats]
-> Result AllocResult
-iterateAlloc nl il newinst allocnodes ixes cstats =
+iterateAlloc nl il limit newinst allocnodes ixes cstats =
let depth = length ixes
newname = printf "new-%d" depth::String
newidx = length (Container.elems il) + depth
newi2 = Instance.setIdx (Instance.setName newinst newname) newidx
+ newlimit = fmap (flip (-) 1) limit
in case tryAlloc nl il newi2 allocnodes of
Bad s -> Bad s
Ok (AllocSolution { asFailures = errs, asSolutions = sols3 }) ->
+ let newsol = Ok (collapseFailures errs, nl, il, ixes, cstats) in
case sols3 of
- [] -> Ok (collapseFailures errs, nl, il, ixes, cstats)
+ [] -> newsol
(xnl, xi, _, _):[] ->
- iterateAlloc xnl (Container.add newidx xi il)
- newinst allocnodes (xi:ixes)
- (totalResources xnl:cstats)
+ if limit == Just 0
+ then newsol
+ else iterateAlloc xnl (Container.add newidx xi il)
+ newlimit newinst allocnodes (xi:ixes)
+ (totalResources xnl:cstats)
_ -> Bad "Internal error: multiple solutions for single\
\ allocation"
--- | The core of the tiered allocation mode
+-- | The core of the tiered allocation mode.
tieredAlloc :: Node.List
-> Instance.List
+ -> Maybe Int
-> Instance.Instance
-> AllocNodes
-> [Instance.Instance]
-> [CStats]
-> Result AllocResult
-tieredAlloc nl il newinst allocnodes ixes cstats =
- case iterateAlloc nl il newinst allocnodes ixes cstats of
+tieredAlloc nl il limit newinst allocnodes ixes cstats =
+ case iterateAlloc nl il limit newinst allocnodes ixes cstats of
Bad s -> Bad s
Ok (errs, nl', il', ixes', cstats') ->
+ let newsol = Ok (errs, nl', il', ixes', cstats')
+ ixes_cnt = length ixes'
+ (stop, newlimit) = case limit of
+ Nothing -> (False, Nothing)
+ Just n -> (n <= ixes_cnt,
+ Just (n - ixes_cnt)) in
+ if stop then newsol else
case Instance.shrinkByType newinst . fst . last $
sortBy (comparing snd) errs of
- Bad _ -> Ok (errs, nl', il', ixes', cstats')
- Ok newinst' ->
- tieredAlloc nl' il' newinst' allocnodes ixes' cstats'
+ Bad _ -> newsol
+ Ok newinst' -> tieredAlloc nl' il' newlimit
+ newinst' allocnodes ixes' cstats'
-- | Compute the tiered spec string description from a list of
-- allocated instances.
in if sdx == Node.noSecondary
then ""
else Container.nameOf nl sdx
- , if Instance.auto_balance inst then "Y" else "N"
+ , if Instance.autoBalance inst then "Y" else "N"
, printf "%3d" $ Instance.vcpus inst
, printf "%5d" $ Instance.mem inst
, printf "%5d" $ Instance.dsk inst `div` 1024
-- | Shows statistics for a given node list.
printStats :: Node.List -> String
printStats nl =
- let dcvs = compDetailedCV nl
+ let dcvs = compDetailedCV $ Container.elems nl
(weights, names) = unzip detailedCVInfo
hd = zip3 (weights ++ repeat 1) (names ++ repeat "unknown") dcvs
formatted = map (\(w, header, val) ->
-- * Node group functions
--- | Computes the group of an instance
+-- | Computes the group of an instance.
instanceGroup :: Node.List -> Instance.Instance -> Result Gdx
instanceGroup nl i =
let sidx = Instance.sNode i
show pgroup ++ ", secondary " ++ show sgroup)
else return pgroup
--- | Computes the group of an instance per the primary node
+-- | Computes the group of an instance per the primary node.
instancePriGroup :: Node.List -> Instance.Instance -> Gdx
instancePriGroup nl i =
let pnode = Container.find (Instance.pNode i) nl
in Node.group pnode
-- | Compute the list of badly allocated instances (split across node
--- groups)
+-- groups).
findSplitInstances :: Node.List -> Instance.List -> [Instance.Instance]
findSplitInstances nl =
filter (not . isOk . instanceGroup nl) . Container.elems
--- | Splits a cluster into the component node groups
+-- | Splits a cluster into the component node groups.
splitCluster :: Node.List -> Instance.List ->
[(Gdx, (Node.List, Instance.List))]
splitCluster nl il =
nodes' = zip nidxs nodes
instances = Container.filter ((`elem` nidxs) . Instance.pNode) il
in (guuid, (Container.fromList nodes', instances))) ngroups
+
+-- | Split a global instance index map into per-group, and associate
+-- it with the group/node/instance lists.
+associateIdxs :: [Idx] -- ^ Instance indices to be split/associated
+ -> [(Gdx, (Node.List, Instance.List))] -- ^ Input groups
+ -> [(Gdx, (Node.List, Instance.List, [Idx]))] -- ^ Result
+associateIdxs idxs =
+ map (\(gdx, (nl, il)) ->
+ (gdx, (nl, il, filter (`Container.member` il) idxs)))
+
+-- | Compute the list of nodes that are to be evacuated, given a list
+-- of instances and an evacuation mode.
+nodesToEvacuate :: Instance.List -- ^ The cluster-wide instance list
+ -> EvacMode -- ^ The evacuation mode we're using
+ -> [Idx] -- ^ List of instance indices being evacuated
+ -> IntSet.IntSet -- ^ Set of node indices
+nodesToEvacuate il mode =
+ IntSet.delete Node.noSecondary .
+ foldl' (\ns idx ->
+ let i = Container.find idx il
+ pdx = Instance.pNode i
+ sdx = Instance.sNode i
+ dt = Instance.diskTemplate i
+ withSecondary = case dt of
+ DTDrbd8 -> IntSet.insert sdx ns
+ _ -> ns
+ in case mode of
+ ChangePrimary -> IntSet.insert pdx ns
+ ChangeSecondary -> withSecondary
+ ChangeAll -> IntSet.insert pdx withSecondary
+ ) IntSet.empty