{-
-Copyright (C) 2009 Google Inc.
+Copyright (C) 2009, 2010 Google Inc.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
module Ganeti.HTools.Cluster
(
-- * Types
- AllocSolution
+ AllocSolution(..)
, Table(..)
, CStats(..)
+ , AllocStats
-- * Generic functions
, totalResources
+ , computeAllocationDelta
-- * First phase functions
, computeBadItems
-- * Second phase functions
- , printSolution
, printSolutionLine
, formatCmds
, involvedNodes
, printInsts
-- * Balacing functions
, checkMove
+ , doNextBalance
, tryBalance
, compCV
+ , compDetailedCV
, printStats
, iMoveToJob
-- * IAllocator functions
, tryAlloc
+ , tryMGAlloc
, tryReloc
+ , tryEvac
, collapseFailures
+ -- * Allocation functions
+ , iterateAlloc
+ , tieredAlloc
+ , tieredSpecMap
+ -- * Node group functions
+ , instanceGroup
+ , findSplitInstances
+ , splitCluster
) where
+import Data.Function (on)
import Data.List
+import Data.Ord (comparing)
import Text.Printf (printf)
-import Data.Function
import Control.Monad
import qualified Ganeti.HTools.Container as Container
import qualified Ganeti.HTools.Instance as Instance
import qualified Ganeti.HTools.Node as Node
+import qualified Ganeti.HTools.Group as Group
import Ganeti.HTools.Types
import Ganeti.HTools.Utils
import qualified Ganeti.OpCodes as OpCodes
-- * Types
-- | Allocation\/relocation solution.
-type AllocSolution = ([FailMode], Int, Maybe (Score, AllocElement))
-
--- | Allocation\/relocation element.
-type AllocElement = (Node.List, Instance.Instance, [Node.Node])
-
+data AllocSolution = AllocSolution
+ { asFailures :: [FailMode] -- ^ Failure counts
+ , asAllocs :: Int -- ^ Good allocation count
+ , asSolutions :: [Node.AllocElement] -- ^ The actual result, length
+ -- of the list depends on the
+ -- allocation/relocation mode
+ , asLog :: [String] -- ^ A list of informational messages
+ }
+
+-- | The empty solution we start with when computing allocations
+emptySolution :: AllocSolution
+emptySolution = AllocSolution { asFailures = [], asAllocs = 0
+ , asSolutions = [], asLog = [] }
-- | The complete state for the balancing solution
data Table = Table Node.List Instance.List Score [Placement]
- deriving (Show)
+ deriving (Show, Read)
data CStats = CStats { csFmem :: Int -- ^ Cluster free mem
, csFdsk :: Int -- ^ Cluster free disk
, csTmem :: Double -- ^ Cluster total mem
, csTdsk :: Double -- ^ Cluster total disk
, csTcpu :: Double -- ^ Cluster total cpus
+ , csVcpu :: Int -- ^ Cluster virtual cpus (if
+ -- node pCpu has been set,
+ -- otherwise -1)
, csXmem :: Int -- ^ Unnacounted for mem
, csNmem :: Int -- ^ Node own memory
, csScore :: Score -- ^ The cluster score
, csNinst :: Int -- ^ The total number of instances
}
+ deriving (Show, Read)
+
+-- | Currently used, possibly to allocate, unallocable
+type AllocStats = (RSpec, RSpec, RSpec)
-- * Utility functions
([Node.Node], [Instance.Instance])
computeBadItems nl il =
let bad_nodes = verifyN1 $ getOnline nl
- bad_instances = map (\idx -> Container.find idx il) .
+ bad_instances = map (`Container.find` il) .
sort . nub $
concatMap (\ n -> Node.sList n ++ Node.pList n) bad_nodes
in
-- | Zero-initializer for the CStats type
emptyCStats :: CStats
-emptyCStats = CStats 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+emptyCStats = CStats 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
-- | Update stats with data from a new node
updateCStats :: CStats -> Node.Node -> CStats
csMmem = x_mmem, csMdsk = x_mdsk, csMcpu = x_mcpu,
csImem = x_imem, csIdsk = x_idsk, csIcpu = x_icpu,
csTmem = x_tmem, csTdsk = x_tdsk, csTcpu = x_tcpu,
+ csVcpu = x_vcpu,
csXmem = x_xmem, csNmem = x_nmem, csNinst = x_ninst
}
= cs
- Node.xMem node - Node.fMem node
inc_icpu = Node.uCpu node
inc_idsk = truncate (Node.tDsk node) - Node.fDsk node
+ inc_vcpu = Node.hiCpu node
in cs { csFmem = x_fmem + Node.fMem node
, csFdsk = x_fdsk + Node.fDsk node
, csTmem = x_tmem + Node.tMem node
, csTdsk = x_tdsk + Node.tDsk node
, csTcpu = x_tcpu + Node.tCpu node
+ , csVcpu = x_vcpu + inc_vcpu
, csXmem = x_xmem + Node.xMem node
, csNmem = x_nmem + Node.nMem node
, csNinst = x_ninst + length (Node.pList node)
let cs = foldl' updateCStats emptyCStats . Container.elems $ nl
in cs { csScore = compCV nl }
--- | The names of the individual elements in the CV list
-detailedCVNames :: [String]
-detailedCVNames = [ "free_mem_cv"
- , "free_disk_cv"
- , "n1_score"
- , "reserved_mem_cv"
- , "offline_score"
- , "vcpu_ratio_cv"
- , "cpu_load_cv"
- , "mem_load_cv"
- , "disk_load_cv"
- , "net_load_cv"
- ]
+-- | Compute the delta between two cluster state.
+--
+-- This is used when doing allocations, to understand better the
+-- available cluster resources. The return value is a triple of the
+-- current used values, the delta that was still allocated, and what
+-- was left unallocated.
+computeAllocationDelta :: CStats -> CStats -> AllocStats
+computeAllocationDelta cini cfin =
+ let CStats {csImem = i_imem, csIdsk = i_idsk, csIcpu = i_icpu} = cini
+ CStats {csImem = f_imem, csIdsk = f_idsk, csIcpu = f_icpu,
+ csTmem = t_mem, csTdsk = t_dsk, csVcpu = v_cpu } = cfin
+ rini = RSpec i_icpu i_imem i_idsk
+ rfin = RSpec (f_icpu - i_icpu) (f_imem - i_imem) (f_idsk - i_idsk)
+ un_cpu = v_cpu - f_icpu
+ runa = RSpec un_cpu (truncate t_mem - f_imem) (truncate t_dsk - f_idsk)
+ in (rini, rfin, runa)
+
+-- | The names and weights of the individual elements in the CV list
+detailedCVInfo :: [(Double, String)]
+detailedCVInfo = [ (1, "free_mem_cv")
+ , (1, "free_disk_cv")
+ , (1, "n1_cnt")
+ , (1, "reserved_mem_cv")
+ , (4, "offline_all_cnt")
+ , (16, "offline_pri_cnt")
+ , (1, "vcpu_ratio_cv")
+ , (1, "cpu_load_cv")
+ , (1, "mem_load_cv")
+ , (1, "disk_load_cv")
+ , (1, "net_load_cv")
+ , (2, "pri_tags_score")
+ ]
+
+detailedCVWeights :: [Double]
+detailedCVWeights = map fst detailedCVInfo
-- | Compute the mem and disk covariance.
compDetailedCV :: Node.List -> [Double]
(offline, nodes) = partition Node.offline all_nodes
mem_l = map Node.pMem nodes
dsk_l = map Node.pDsk nodes
- mem_cv = varianceCoeff mem_l
- dsk_cv = varianceCoeff dsk_l
- n1_l = length $ filter Node.failN1 nodes
- n1_score = fromIntegral n1_l /
- fromIntegral (length nodes)::Double
+ -- metric: memory covariance
+ mem_cv = stdDev mem_l
+ -- metric: disk covariance
+ dsk_cv = stdDev dsk_l
+ -- metric: count of instances living on N1 failing nodes
+ n1_score = fromIntegral . sum . map (\n -> length (Node.sList n) +
+ length (Node.pList n)) .
+ filter Node.failN1 $ nodes :: Double
res_l = map Node.pRem nodes
- res_cv = varianceCoeff res_l
- offline_inst = sum . map (\n -> (length . Node.pList $ n) +
- (length . Node.sList $ n)) $ offline
- online_inst = sum . map (\n -> (length . Node.pList $ n) +
- (length . Node.sList $ n)) $ nodes
- off_score = if offline_inst == 0
- then 0::Double
- else fromIntegral offline_inst /
- fromIntegral (offline_inst + online_inst)::Double
+ -- metric: reserved memory covariance
+ res_cv = stdDev res_l
+ -- offline instances metrics
+ offline_ipri = sum . map (length . Node.pList) $ offline
+ offline_isec = sum . map (length . Node.sList) $ offline
+ -- metric: count of instances on offline nodes
+ off_score = fromIntegral (offline_ipri + offline_isec)::Double
+ -- metric: count of primary instances on offline nodes (this
+ -- helps with evacuation/failover of primary instances on
+ -- 2-node clusters with one node offline)
+ off_pri_score = fromIntegral offline_ipri::Double
cpu_l = map Node.pCpu nodes
- cpu_cv = varianceCoeff cpu_l
+ -- metric: covariance of vcpu/pcpu ratio
+ cpu_cv = stdDev cpu_l
+ -- metrics: covariance of cpu, memory, disk and network load
(c_load, m_load, d_load, n_load) = unzip4 $
map (\n ->
let DynUtil c1 m1 d1 n1 = Node.utilLoad n
DynUtil c2 m2 d2 n2 = Node.utilPool n
in (c1/c2, m1/m2, d1/d2, n1/n2)
) nodes
- in [ mem_cv, dsk_cv, n1_score, res_cv, off_score, cpu_cv
- , varianceCoeff c_load, varianceCoeff m_load
- , varianceCoeff d_load, varianceCoeff n_load]
+ -- metric: conflicting instance count
+ pri_tags_inst = sum $ map Node.conflictingPrimaries nodes
+ pri_tags_score = fromIntegral pri_tags_inst::Double
+ in [ mem_cv, dsk_cv, n1_score, res_cv, off_score, off_pri_score, cpu_cv
+ , stdDev c_load, stdDev m_load , stdDev d_load, stdDev n_load
+ , pri_tags_score ]
-- | Compute the /total/ variance.
compCV :: Node.List -> Double
-compCV = sum . compDetailedCV
+compCV = sum . zipWith (*) detailedCVWeights . compDetailedCV
-- | Compute online nodes from a Node.List
getOnline :: Node.List -> [Node.Node]
old_s = Container.find old_sdx nl
int_p = Node.removePri old_p inst
int_s = Node.removeSec old_s inst
+ force_p = Node.offline old_p
new_nl = do -- Maybe monad
- new_p <- Node.addPri int_s inst
+ new_p <- Node.addPriEx force_p int_s inst
new_s <- Node.addSec int_p inst old_sdx
let new_inst = Instance.setBoth inst old_sdx old_pdx
return (Container.addTwo old_pdx new_s old_sdx new_p nl,
tgt_n = Container.find new_pdx nl
int_p = Node.removePri old_p inst
int_s = Node.removeSec old_s inst
+ force_p = Node.offline old_p
new_nl = do -- Maybe monad
-- check that the current secondary can host the instance
-- during the migration
- tmp_s <- Node.addPri int_s inst
+ tmp_s <- Node.addPriEx force_p int_s inst
let tmp_s' = Node.removePri tmp_s inst
- new_p <- Node.addPri tgt_n inst
- new_s <- Node.addSec tmp_s' inst new_pdx
+ new_p <- Node.addPriEx force_p tgt_n inst
+ new_s <- Node.addSecEx force_p tmp_s' inst new_pdx
let new_inst = Instance.setPri inst new_pdx
return (Container.add new_pdx new_p $
Container.addTwo old_pdx int_p old_sdx new_s nl,
old_s = Container.find old_sdx nl
tgt_n = Container.find new_sdx nl
int_s = Node.removeSec old_s inst
+ force_s = Node.offline old_s
new_inst = Instance.setSec inst new_sdx
- new_nl = Node.addSec tgt_n inst old_pdx >>=
+ new_nl = Node.addSecEx force_s tgt_n inst old_pdx >>=
\new_s -> return (Container.addTwo new_sdx
new_s old_sdx int_s nl,
new_inst, old_pdx, new_sdx)
tgt_n = Container.find new_pdx nl
int_p = Node.removePri old_p inst
int_s = Node.removeSec old_s inst
+ force_s = Node.offline old_s
new_nl = do -- Maybe monad
new_p <- Node.addPri tgt_n inst
- new_s <- Node.addSec int_p inst new_pdx
+ new_s <- Node.addSecEx force_s int_p inst new_pdx
let new_inst = Instance.setBoth inst new_pdx old_pdx
return (Container.add new_pdx new_p $
Container.addTwo old_pdx new_s old_sdx int_s nl,
tgt_n = Container.find new_sdx nl
int_p = Node.removePri old_p inst
int_s = Node.removeSec old_s inst
+ force_p = Node.offline old_p
new_nl = do -- Maybe monad
- new_p <- Node.addPri int_s inst
- new_s <- Node.addSec tgt_n inst old_sdx
+ new_p <- Node.addPriEx force_p int_s inst
+ new_s <- Node.addSecEx force_p tgt_n inst old_sdx
let new_inst = Instance.setBoth inst old_sdx new_sdx
return (Container.add new_sdx new_s $
Container.addTwo old_sdx new_p old_pdx int_p nl,
-- | Tries to allocate an instance on one given node.
allocateOnSingle :: Node.List -> Instance.Instance -> Node.Node
- -> OpResult AllocElement
+ -> OpResult Node.AllocElement
allocateOnSingle nl inst p =
let new_pdx = Node.idx p
new_inst = Instance.setBoth inst new_pdx Node.noSecondary
- new_nl = Node.addPri p inst >>= \new_p ->
- return (Container.add new_pdx new_p nl, new_inst, [new_p])
- in new_nl
+ in Node.addPri p inst >>= \new_p -> do
+ let new_nl = Container.add new_pdx new_p nl
+ new_score = compCV nl
+ return (new_nl, new_inst, [new_p], new_score)
-- | Tries to allocate an instance on a given pair of nodes.
allocateOnPair :: Node.List -> Instance.Instance -> Node.Node -> Node.Node
- -> OpResult AllocElement
+ -> OpResult Node.AllocElement
allocateOnPair nl inst tgt_p tgt_s =
let new_pdx = Node.idx tgt_p
new_sdx = Node.idx tgt_s
- new_nl = do -- Maybe monad
- new_p <- Node.addPri tgt_p inst
- new_s <- Node.addSec tgt_s inst new_pdx
- let new_inst = Instance.setBoth inst new_pdx new_sdx
- return (Container.addTwo new_pdx new_p new_sdx new_s nl, new_inst,
- [new_p, new_s])
- in new_nl
+ in do
+ new_p <- Node.addPri tgt_p inst
+ new_s <- Node.addSec tgt_s inst new_pdx
+ let new_inst = Instance.setBoth inst new_pdx new_sdx
+ new_nl = Container.addTwo new_pdx new_p new_sdx new_s nl
+ return (new_nl, new_inst, [new_p, new_s], compCV new_nl)
-- | Tries to perform an instance move and returns the best table
-- between the original one and the new one.
best_tbl =
foldl'
(\ step_tbl em ->
- if Instance.sNode em == Node.noSecondary then step_tbl
- else compareTables step_tbl $
- checkInstanceMove nodes_idx disk_moves ini_tbl em)
+ compareTables step_tbl $
+ checkInstanceMove nodes_idx disk_moves ini_tbl em)
ini_tbl victims
Table _ _ _ best_plc = best_tbl
- in
- if length best_plc == length ini_plc then -- no advancement
- ini_tbl
- else
- best_tbl
+ in if length best_plc == length ini_plc
+ then ini_tbl -- no advancement
+ else best_tbl
+
+-- | Check if we are allowed to go deeper in the balancing
+doNextBalance :: Table -- ^ The starting table
+ -> Int -- ^ Remaining length
+ -> Score -- ^ Score at which to stop
+ -> Bool -- ^ The resulting table and commands
+doNextBalance ini_tbl max_rounds min_score =
+ let Table _ _ ini_cv ini_plc = ini_tbl
+ ini_plc_len = length ini_plc
+ in (max_rounds < 0 || ini_plc_len < max_rounds) && ini_cv > min_score
-- | Run a balance move
-
tryBalance :: Table -- ^ The starting table
- -> Int -- ^ Remaining length
-> Bool -- ^ Allow disk moves
- -> Score -- ^ Score at which to stop
+ -> Bool -- ^ Only evacuate moves
+ -> Score -- ^ Min gain threshold
+ -> Score -- ^ Min gain
-> Maybe Table -- ^ The resulting table and commands
-tryBalance ini_tbl max_rounds disk_moves min_score =
- let Table ini_nl ini_il ini_cv ini_plc = ini_tbl
- ini_plc_len = length ini_plc
- allowed_next = (max_rounds < 0 || ini_plc_len < max_rounds) &&
- ini_cv > min_score
+tryBalance ini_tbl disk_moves evac_mode mg_limit min_gain =
+ let Table ini_nl ini_il ini_cv _ = ini_tbl
+ all_inst = Container.elems ini_il
+ all_inst' = if evac_mode
+ then let bad_nodes = map Node.idx . filter Node.offline $
+ Container.elems ini_nl
+ in filter (\e -> Instance.sNode e `elem` bad_nodes ||
+ Instance.pNode e `elem` bad_nodes)
+ all_inst
+ else all_inst
+ reloc_inst = filter Instance.movable all_inst'
+ node_idx = map Node.idx . filter (not . Node.offline) $
+ Container.elems ini_nl
+ fin_tbl = checkMove node_idx disk_moves ini_tbl reloc_inst
+ (Table _ _ fin_cv _) = fin_tbl
in
- if allowed_next
- then let all_inst = Container.elems ini_il
- node_idx = map Node.idx . filter (not . Node.offline) $
- Container.elems ini_nl
- fin_tbl = checkMove node_idx disk_moves ini_tbl all_inst
- (Table _ _ fin_cv _) = fin_tbl
- in
- if fin_cv < ini_cv
- then Just fin_tbl -- this round made success, try deeper
- else Nothing
+ if fin_cv < ini_cv && (ini_cv > mg_limit || ini_cv - fin_cv >= min_gain)
+ then Just fin_tbl -- this round made success, return the new table
else Nothing
-- * Allocation functions
-- | Build failure stats out of a list of failures
collapseFailures :: [FailMode] -> FailStats
collapseFailures flst =
- map (\k -> (k, length $ filter ((==) k) flst)) [minBound..maxBound]
+ map (\k -> (k, length $ filter (k ==) flst)) [minBound..maxBound]
-- | Update current Allocation solution and failure stats with new
-- elements
-concatAllocs :: AllocSolution -> OpResult AllocElement -> AllocSolution
-concatAllocs (flst, cntok, sols) (OpFail reason) = (reason:flst, cntok, sols)
+concatAllocs :: AllocSolution -> OpResult Node.AllocElement -> AllocSolution
+concatAllocs as (OpFail reason) = as { asFailures = reason : asFailures as }
-concatAllocs (flst, cntok, osols) (OpGood ns@(nl, _, _)) =
- let nscore = compCV nl
- -- Choose the old or new solution, based on the cluster score
+concatAllocs as (OpGood ns@(_, _, _, nscore)) =
+ let -- Choose the old or new solution, based on the cluster score
+ cntok = asAllocs as
+ osols = asSolutions as
nsols = case osols of
- Nothing -> Just (nscore, ns)
- Just (oscore, _) ->
+ [] -> [ns]
+ (_, _, _, oscore):[] ->
if oscore < nscore
then osols
- else Just (nscore, ns)
+ else [ns]
+ -- FIXME: here we simply concat to lists with more
+ -- than one element; we should instead abort, since
+ -- this is not a valid usage of this function
+ xs -> ns:xs
nsuc = cntok + 1
-- Note: we force evaluation of nsols here in order to keep the
-- memory profile low - we know that we will need nsols for sure
-- in the next cycle, so we force evaluation of nsols, since the
-- foldl' in the caller will only evaluate the tuple, but not the
-- elements of the tuple
- in nsols `seq` nsuc `seq` (flst, nsuc, nsols)
+ in nsols `seq` nsuc `seq` as { asAllocs = nsuc, asSolutions = nsols }
+
+-- | Given a solution, generates a reasonable description for it
+describeSolution :: AllocSolution -> String
+describeSolution as =
+ let fcnt = asFailures as
+ sols = asSolutions as
+ freasons =
+ intercalate ", " . map (\(a, b) -> printf "%s: %d" (show a) b) .
+ filter ((> 0) . snd) . collapseFailures $ fcnt
+ in if null sols
+ then "No valid allocation solutions, failure reasons: " ++
+ (if null fcnt
+ then "unknown reasons"
+ else freasons)
+ else let (_, _, nodes, cv) = head sols
+ in printf ("score: %.8f, successes %d, failures %d (%s)" ++
+ " for node(s) %s") cv (asAllocs as) (length fcnt) freasons
+ (intercalate "/" . map Node.name $ nodes)
+
+-- | Annotates a solution with the appropriate string
+annotateSolution :: AllocSolution -> AllocSolution
+annotateSolution as = as { asLog = describeSolution as : asLog as }
-- | Try to allocate an instance on the cluster.
tryAlloc :: (Monad m) =>
ok_pairs = filter (\(x, y) -> Node.idx x /= Node.idx y) all_pairs
sols = foldl' (\cstate (p, s) ->
concatAllocs cstate $ allocateOnPair nl inst p s
- ) ([], 0, Nothing) ok_pairs
- in return sols
+ ) emptySolution ok_pairs
+
+ in if null ok_pairs -- means we have just one node
+ then fail "Not enough online nodes"
+ else return $ annotateSolution sols
tryAlloc nl _ inst 1 =
let all_nodes = getOnline nl
sols = foldl' (\cstate ->
concatAllocs cstate . allocateOnSingle nl inst
- ) ([], 0, Nothing) all_nodes
- in return sols
+ ) emptySolution all_nodes
+ in if null all_nodes
+ then fail "No online nodes"
+ else return $ annotateSolution sols
tryAlloc _ _ _ reqn = fail $ "Unsupported number of allocation \
\destinations required (" ++ show reqn ++
"), only two supported"
--- | Try to allocate an instance on the cluster.
+-- | Given a group/result, describe it as a nice (list of) messages
+solutionDescription :: Group.List -> (Gdx, Result AllocSolution) -> [String]
+solutionDescription gl (groupId, result) =
+ case result of
+ Ok solution -> map (printf "Group %s (%s): %s" gname pol) (asLog solution)
+ Bad message -> [printf "Group %s: error %s" gname message]
+ where grp = Container.find groupId gl
+ gname = Group.name grp
+ pol = apolToString (Group.allocPolicy grp)
+
+-- | From a list of possibly bad and possibly empty solutions, filter
+-- only the groups with a valid result
+filterMGResults :: Group.List
+ -> [(Gdx, Result AllocSolution)]
+ -> [(Gdx, AllocSolution)]
+filterMGResults gl=
+ filter ((/= AllocUnallocable) . Group.allocPolicy .
+ flip Container.find gl . fst) .
+ filter (not . null . asSolutions . snd) .
+ map (\(y, Ok x) -> (y, x)) .
+ filter (isOk . snd)
+
+-- | Sort multigroup results based on policy and score
+sortMGResults :: Group.List
+ -> [(Gdx, AllocSolution)]
+ -> [(Gdx, AllocSolution)]
+sortMGResults gl sols =
+ let extractScore = \(_, _, _, x) -> x
+ solScore (gdx, sol) = (Group.allocPolicy (Container.find gdx gl),
+ (extractScore . head . asSolutions) sol)
+ in sortBy (comparing solScore) sols
+
+-- | Try to allocate an instance on a multi-group cluster.
+tryMGAlloc :: Group.List -- ^ The group list
+ -> Node.List -- ^ The node list
+ -> Instance.List -- ^ The instance list
+ -> Instance.Instance -- ^ The instance to allocate
+ -> Int -- ^ Required number of nodes
+ -> Result AllocSolution -- ^ Possible solution list
+tryMGAlloc mggl mgnl mgil inst cnt =
+ let groups = splitCluster mgnl mgil
+ -- TODO: currently we consider all groups preferred
+ sols = map (\(gid, (nl, il)) ->
+ (gid, tryAlloc nl il inst cnt)) groups::
+ [(Gdx, Result AllocSolution)]
+ all_msgs = concatMap (solutionDescription mggl) sols
+ goodSols = filterMGResults mggl sols
+ sortedSols = sortMGResults mggl goodSols
+ in if null sortedSols
+ then Bad $ intercalate ", " all_msgs
+ else let (final_group, final_sol) = head sortedSols
+ final_name = Group.name $ Container.find final_group mggl
+ selmsg = "Selected group: " ++ final_name
+ in Ok $ final_sol { asLog = selmsg:all_msgs }
+
+-- | Try to relocate an instance on the cluster.
tryReloc :: (Monad m) =>
Node.List -- ^ The node list
-> Instance.List -- ^ The instance list
let em = do
(mnl, i, _, _) <-
applyMove nl inst (ReplaceSecondary x)
- return (mnl, i, [Container.find x mnl])
+ return (mnl, i, [Container.find x mnl],
+ compCV mnl)
in concatAllocs cstate em
- ) ([], 0, Nothing) valid_idxes
+ ) emptySolution valid_idxes
in return sols1
tryReloc _ _ _ reqn _ = fail $ "Unsupported number of relocation \
\destinations required (" ++ show reqn ++
"), only one supported"
+-- | Try to evacuate a list of nodes.
+tryEvac :: (Monad m) =>
+ Node.List -- ^ The node list
+ -> Instance.List -- ^ The instance list
+ -> [Ndx] -- ^ Nodes to be evacuated
+ -> m AllocSolution -- ^ Solution list
+tryEvac nl il ex_ndx =
+ let ex_nodes = map (`Container.find` nl) ex_ndx
+ all_insts = nub . concatMap Node.sList $ ex_nodes
+ in do
+ (_, sol) <- foldM (\(nl', old_as) idx -> do
+ -- FIXME: hardcoded one node here
+ -- (fm, cs, aes)
+ new_as <- tryReloc nl' il idx 1 ex_ndx
+ case asSolutions new_as of
+ csol@(nl'', _, _, _):_ ->
+ -- an individual relocation succeeded,
+ -- we kind of compose the data from
+ -- the two solutions
+ return (nl'',
+ new_as { asSolutions =
+ csol:asSolutions old_as })
+ -- this relocation failed, so we fail
+ -- the entire evac
+ _ -> fail $ "Can't evacuate instance " ++
+ Instance.name (Container.find idx il) ++
+ ": " ++ describeSolution new_as
+ ) (nl, emptySolution) all_insts
+ return $ annotateSolution sol
+
+-- | Recursively place instances on the cluster until we're out of space
+iterateAlloc :: Node.List
+ -> Instance.List
+ -> Instance.Instance
+ -> Int
+ -> [Instance.Instance]
+ -> Result (FailStats, Node.List, Instance.List,
+ [Instance.Instance])
+iterateAlloc nl il newinst nreq ixes =
+ let depth = length ixes
+ newname = printf "new-%d" depth::String
+ newidx = length (Container.elems il) + depth
+ newi2 = Instance.setIdx (Instance.setName newinst newname) newidx
+ in case tryAlloc nl il newi2 nreq of
+ Bad s -> Bad s
+ Ok (AllocSolution { asFailures = errs, asSolutions = sols3 }) ->
+ case sols3 of
+ [] -> Ok (collapseFailures errs, nl, il, ixes)
+ (xnl, xi, _, _):[] ->
+ iterateAlloc xnl (Container.add newidx xi il)
+ newinst nreq $! (xi:ixes)
+ _ -> Bad "Internal error: multiple solutions for single\
+ \ allocation"
+
+-- | The core of the tiered allocation mode
+tieredAlloc :: Node.List
+ -> Instance.List
+ -> Instance.Instance
+ -> Int
+ -> [Instance.Instance]
+ -> Result (FailStats, Node.List, Instance.List,
+ [Instance.Instance])
+tieredAlloc nl il newinst nreq ixes =
+ case iterateAlloc nl il newinst nreq ixes of
+ Bad s -> Bad s
+ Ok (errs, nl', il', ixes') ->
+ case Instance.shrinkByType newinst . fst . last $
+ sortBy (comparing snd) errs of
+ Bad _ -> Ok (errs, nl', il', ixes')
+ Ok newinst' ->
+ tieredAlloc nl' il' newinst' nreq ixes'
+
+-- | Compute the tiered spec string description from a list of
+-- allocated instances.
+tieredSpecMap :: [Instance.Instance]
+ -> [String]
+tieredSpecMap trl_ixes =
+ let fin_trl_ixes = reverse trl_ixes
+ ix_byspec = groupBy ((==) `on` Instance.specOf) fin_trl_ixes
+ spec_map = map (\ixs -> (Instance.specOf $ head ixs, length ixs))
+ ix_byspec
+ in map (\(spec, cnt) -> printf "%d,%d,%d=%d" (rspecMem spec)
+ (rspecDsk spec) (rspecCpu spec) cnt) spec_map
+
-- * Formatting functions
-- | Given the original and final nodes, computes the relocation description.
pmlen = (2*nmlen + 1)
(i, p, s, mv, c) = plc
inst = Container.find i il
- inam = Instance.name inst
- npri = Container.nameOf nl p
- nsec = Container.nameOf nl s
- opri = Container.nameOf nl $ Instance.pNode inst
- osec = Container.nameOf nl $ Instance.sNode inst
+ inam = Instance.alias inst
+ npri = Node.alias $ Container.find p nl
+ nsec = Node.alias $ Container.find s nl
+ opri = Node.alias $ Container.find (Instance.pNode inst) nl
+ osec = Node.alias $ Container.find (Instance.sNode inst) nl
(moves, cmds) = computeMoves inst inam mv npri nsec
ostr = printf "%s:%s" opri osec::String
nstr = printf "%s:%s" npri nsec::String
(zip [1..] js)) .
zip [1..]
--- | Converts a solution to string format.
-printSolution :: Node.List
- -> Instance.List
- -> [Placement]
- -> ([String], [[String]])
-printSolution nl il sol =
- let
- nmlen = Container.maxNameLen nl
- imlen = Container.maxNameLen il
- in
- unzip $ zipWith (printSolutionLine nl il nmlen imlen) sol [1..]
-
-- | Print the node list.
-printNodes :: Node.List -> String
-printNodes nl =
- let snl = sortBy (compare `on` Node.idx) (Container.elems nl)
- header = ["F", "Name"
- , "t_mem", "n_mem", "i_mem", "x_mem", "f_mem", "r_mem"
- , "t_dsk", "f_dsk", "pcpu", "vcpu", "pri", "sec"
- , "p_fmem", "p_fdsk", "r_cpu"
- , "lCpu", "lMem", "lDsk", "lNet" ]
- isnum = False:False:repeat True
+printNodes :: Node.List -> [String] -> String
+printNodes nl fs =
+ let fields = case fs of
+ [] -> Node.defaultFields
+ "+":rest -> Node.defaultFields ++ rest
+ _ -> fs
+ snl = sortBy (comparing Node.idx) (Container.elems nl)
+ (header, isnum) = unzip $ map Node.showHeader fields
in unlines . map ((:) ' ' . intercalate " ") $
- formatTable (header:map Node.list snl) isnum
+ formatTable (header:map (Node.list fields) snl) isnum
-- | Print the instance list.
printInsts :: Node.List -> Instance.List -> String
printInsts nl il =
- let sil = sortBy (compare `on` Instance.idx) (Container.elems il)
- helper inst = [ (Instance.name inst)
- , (Container.nameOf nl (Instance.pNode inst))
- , (let sdx = Instance.sNode inst
- in if sdx == Node.noSecondary
- then ""
- else Container.nameOf nl sdx) ]
- header = ["Name", "Pri_node", "Sec_node"]
- isnum = repeat False
+ let sil = sortBy (comparing Instance.idx) (Container.elems il)
+ helper inst = [ if Instance.running inst then "R" else " "
+ , Instance.name inst
+ , Container.nameOf nl (Instance.pNode inst)
+ , let sdx = Instance.sNode inst
+ in if sdx == Node.noSecondary
+ then ""
+ else Container.nameOf nl sdx
+ , printf "%3d" $ Instance.vcpus inst
+ , printf "%5d" $ Instance.mem inst
+ , printf "%5d" $ Instance.dsk inst `div` 1024
+ , printf "%5.3f" lC
+ , printf "%5.3f" lM
+ , printf "%5.3f" lD
+ , printf "%5.3f" lN
+ ]
+ where DynUtil lC lM lD lN = Instance.util inst
+ header = [ "F", "Name", "Pri_node", "Sec_node", "vcpu", "mem"
+ , "dsk", "lCpu", "lMem", "lDsk", "lNet" ]
+ isnum = False:False:False:False:repeat True
in unlines . map ((:) ' ' . intercalate " ") $
formatTable (header:map helper sil) isnum
printStats :: Node.List -> String
printStats nl =
let dcvs = compDetailedCV nl
- hd = zip (detailedCVNames ++ repeat "unknown") dcvs
- formatted = map (\(header, val) ->
- printf "%s=%.8f" header val::String) hd
+ (weights, names) = unzip detailedCVInfo
+ hd = zip3 (weights ++ repeat 1) (names ++ repeat "unknown") dcvs
+ formatted = map (\(w, header, val) ->
+ printf "%s=%.8f(x%.2f)" header val w::String) hd
in intercalate ", " formatted
-- | Convert a placement into a list of OpCodes (basically a job).
-iMoveToJob :: String -> Node.List -> Instance.List
+iMoveToJob :: Node.List -> Instance.List
-> Idx -> IMove -> [OpCodes.OpCode]
-iMoveToJob csf nl il idx move =
+iMoveToJob nl il idx move =
let inst = Container.find idx il
- iname = Instance.name inst ++ csf
- lookNode n = Just (Container.nameOf nl n ++ csf)
+ iname = Instance.name inst
+ lookNode = Just . Container.nameOf nl
opF = if Instance.running inst
then OpCodes.OpMigrateInstance iname True False
else OpCodes.OpFailoverInstance iname False
ReplaceSecondary ns -> [ opR ns ]
ReplaceAndFailover np -> [ opR np, opF ]
FailoverAndReplace ns -> [ opF, opR ns ]
+
+-- * Node group functions
+
+-- | Computes the group of an instance
+instanceGroup :: Node.List -> Instance.Instance -> Result Gdx
+instanceGroup nl i =
+ let sidx = Instance.sNode i
+ pnode = Container.find (Instance.pNode i) nl
+ snode = if sidx == Node.noSecondary
+ then pnode
+ else Container.find sidx nl
+ pgroup = Node.group pnode
+ sgroup = Node.group snode
+ in if pgroup /= sgroup
+ then fail ("Instance placed accross two node groups, primary " ++
+ show pgroup ++ ", secondary " ++ show sgroup)
+ else return pgroup
+
+-- | Compute the list of badly allocated instances (split across node
+-- groups)
+findSplitInstances :: Node.List -> Instance.List -> [Instance.Instance]
+findSplitInstances nl il =
+ filter (not . isOk . instanceGroup nl) (Container.elems il)
+
+-- | Splits a cluster into the component node groups
+splitCluster :: Node.List -> Instance.List ->
+ [(Gdx, (Node.List, Instance.List))]
+splitCluster nl il =
+ let ngroups = Node.computeGroups (Container.elems nl)
+ in map (\(guuid, nodes) ->
+ let nidxs = map Node.idx nodes
+ nodes' = zip nidxs nodes
+ instances = Container.filter ((`elem` nidxs) . Instance.pNode) il
+ in (guuid, (Container.fromAssocList nodes', instances))) ngroups