X-Git-Url: https://code.grnet.gr/git/ganeti-local/blobdiff_plain/8c9af2f03de610fe7fc55f79e005806078f4d720..6bc3997090f4d390047f228c7e82eefb46b3a0cf:/Ganeti/HTools/Cluster.hs diff --git a/Ganeti/HTools/Cluster.hs b/Ganeti/HTools/Cluster.hs index d8337ae..1a22866 100644 --- a/Ganeti/HTools/Cluster.hs +++ b/Ganeti/HTools/Cluster.hs @@ -7,7 +7,7 @@ goes into the "Main" module for the individual binaries. {- -Copyright (C) 2009 Google Inc. +Copyright (C) 2009, 2010 Google Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -29,15 +29,16 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA module Ganeti.HTools.Cluster ( -- * Types - AllocSolution + AllocSolution(..) , Table(..) , CStats(..) + , AllocStats -- * Generic functions , totalResources + , computeAllocationDelta -- * First phase functions , computeBadItems -- * Second phase functions - , printSolution , printSolutionLine , formatCmds , involvedNodes @@ -47,24 +48,38 @@ module Ganeti.HTools.Cluster , printInsts -- * Balacing functions , checkMove + , doNextBalance , tryBalance , compCV + , compDetailedCV , printStats , iMoveToJob -- * IAllocator functions , tryAlloc + , tryMGAlloc , tryReloc + , tryEvac , collapseFailures + -- * Allocation functions + , iterateAlloc + , tieredAlloc + , tieredSpecMap + -- * Node group functions + , instanceGroup + , findSplitInstances + , splitCluster ) where +import Data.Function (on) import Data.List +import Data.Ord (comparing) import Text.Printf (printf) -import Data.Function import Control.Monad import qualified Ganeti.HTools.Container as Container import qualified Ganeti.HTools.Instance as Instance import qualified Ganeti.HTools.Node as Node +import qualified Ganeti.HTools.Group as Group import Ganeti.HTools.Types import Ganeti.HTools.Utils import qualified Ganeti.OpCodes as OpCodes @@ -72,15 +87,23 @@ import qualified Ganeti.OpCodes as OpCodes -- * Types -- | Allocation\/relocation solution. -type AllocSolution = ([FailMode], Int, Maybe (Score, AllocElement)) - --- | Allocation\/relocation element. -type AllocElement = (Node.List, Instance.Instance, [Node.Node]) - +data AllocSolution = AllocSolution + { asFailures :: [FailMode] -- ^ Failure counts + , asAllocs :: Int -- ^ Good allocation count + , asSolutions :: [Node.AllocElement] -- ^ The actual result, length + -- of the list depends on the + -- allocation/relocation mode + , asLog :: [String] -- ^ A list of informational messages + } + +-- | The empty solution we start with when computing allocations +emptySolution :: AllocSolution +emptySolution = AllocSolution { asFailures = [], asAllocs = 0 + , asSolutions = [], asLog = [] } -- | The complete state for the balancing solution data Table = Table Node.List Instance.List Score [Placement] - deriving (Show) + deriving (Show, Read) data CStats = CStats { csFmem :: Int -- ^ Cluster free mem , csFdsk :: Int -- ^ Cluster free disk @@ -96,11 +119,18 @@ data CStats = CStats { csFmem :: Int -- ^ Cluster free mem , csTmem :: Double -- ^ Cluster total mem , csTdsk :: Double -- ^ Cluster total disk , csTcpu :: Double -- ^ Cluster total cpus + , csVcpu :: Int -- ^ Cluster virtual cpus (if + -- node pCpu has been set, + -- otherwise -1) , csXmem :: Int -- ^ Unnacounted for mem , csNmem :: Int -- ^ Node own memory , csScore :: Score -- ^ The cluster score , csNinst :: Int -- ^ The total number of instances } + deriving (Show, Read) + +-- | Currently used, possibly to allocate, unallocable +type AllocStats = (RSpec, RSpec, RSpec) -- * Utility functions @@ -119,7 +149,7 @@ computeBadItems :: Node.List -> Instance.List -> ([Node.Node], [Instance.Instance]) computeBadItems nl il = let bad_nodes = verifyN1 $ getOnline nl - bad_instances = map (\idx -> Container.find idx il) . + bad_instances = map (`Container.find` il) . sort . nub $ concatMap (\ n -> Node.sList n ++ Node.pList n) bad_nodes in @@ -127,7 +157,7 @@ computeBadItems nl il = -- | Zero-initializer for the CStats type emptyCStats :: CStats -emptyCStats = CStats 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +emptyCStats = CStats 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -- | Update stats with data from a new node updateCStats :: CStats -> Node.Node -> CStats @@ -137,6 +167,7 @@ updateCStats cs node = csMmem = x_mmem, csMdsk = x_mdsk, csMcpu = x_mcpu, csImem = x_imem, csIdsk = x_idsk, csIcpu = x_icpu, csTmem = x_tmem, csTdsk = x_tdsk, csTcpu = x_tcpu, + csVcpu = x_vcpu, csXmem = x_xmem, csNmem = x_nmem, csNinst = x_ninst } = cs @@ -147,6 +178,7 @@ updateCStats cs node = - Node.xMem node - Node.fMem node inc_icpu = Node.uCpu node inc_idsk = truncate (Node.tDsk node) - Node.fDsk node + inc_vcpu = Node.hiCpu node in cs { csFmem = x_fmem + Node.fMem node , csFdsk = x_fdsk + Node.fDsk node @@ -162,6 +194,7 @@ updateCStats cs node = , csTmem = x_tmem + Node.tMem node , csTdsk = x_tdsk + Node.tDsk node , csTcpu = x_tcpu + Node.tCpu node + , csVcpu = x_vcpu + inc_vcpu , csXmem = x_xmem + Node.xMem node , csNmem = x_nmem + Node.nMem node , csNinst = x_ninst + length (Node.pList node) @@ -173,19 +206,41 @@ totalResources nl = let cs = foldl' updateCStats emptyCStats . Container.elems $ nl in cs { csScore = compCV nl } --- | The names of the individual elements in the CV list -detailedCVNames :: [String] -detailedCVNames = [ "free_mem_cv" - , "free_disk_cv" - , "n1_score" - , "reserved_mem_cv" - , "offline_score" - , "vcpu_ratio_cv" - , "cpu_load_cv" - , "mem_load_cv" - , "disk_load_cv" - , "net_load_cv" - ] +-- | Compute the delta between two cluster state. +-- +-- This is used when doing allocations, to understand better the +-- available cluster resources. The return value is a triple of the +-- current used values, the delta that was still allocated, and what +-- was left unallocated. +computeAllocationDelta :: CStats -> CStats -> AllocStats +computeAllocationDelta cini cfin = + let CStats {csImem = i_imem, csIdsk = i_idsk, csIcpu = i_icpu} = cini + CStats {csImem = f_imem, csIdsk = f_idsk, csIcpu = f_icpu, + csTmem = t_mem, csTdsk = t_dsk, csVcpu = v_cpu } = cfin + rini = RSpec i_icpu i_imem i_idsk + rfin = RSpec (f_icpu - i_icpu) (f_imem - i_imem) (f_idsk - i_idsk) + un_cpu = v_cpu - f_icpu + runa = RSpec un_cpu (truncate t_mem - f_imem) (truncate t_dsk - f_idsk) + in (rini, rfin, runa) + +-- | The names and weights of the individual elements in the CV list +detailedCVInfo :: [(Double, String)] +detailedCVInfo = [ (1, "free_mem_cv") + , (1, "free_disk_cv") + , (1, "n1_cnt") + , (1, "reserved_mem_cv") + , (4, "offline_all_cnt") + , (16, "offline_pri_cnt") + , (1, "vcpu_ratio_cv") + , (1, "cpu_load_cv") + , (1, "mem_load_cv") + , (1, "disk_load_cv") + , (1, "net_load_cv") + , (2, "pri_tags_score") + ] + +detailedCVWeights :: [Double] +detailedCVWeights = map fst detailedCVInfo -- | Compute the mem and disk covariance. compDetailedCV :: Node.List -> [Double] @@ -195,36 +250,46 @@ compDetailedCV nl = (offline, nodes) = partition Node.offline all_nodes mem_l = map Node.pMem nodes dsk_l = map Node.pDsk nodes - mem_cv = varianceCoeff mem_l - dsk_cv = varianceCoeff dsk_l - n1_l = length $ filter Node.failN1 nodes - n1_score = fromIntegral n1_l / - fromIntegral (length nodes)::Double + -- metric: memory covariance + mem_cv = stdDev mem_l + -- metric: disk covariance + dsk_cv = stdDev dsk_l + -- metric: count of instances living on N1 failing nodes + n1_score = fromIntegral . sum . map (\n -> length (Node.sList n) + + length (Node.pList n)) . + filter Node.failN1 $ nodes :: Double res_l = map Node.pRem nodes - res_cv = varianceCoeff res_l - offline_inst = sum . map (\n -> (length . Node.pList $ n) + - (length . Node.sList $ n)) $ offline - online_inst = sum . map (\n -> (length . Node.pList $ n) + - (length . Node.sList $ n)) $ nodes - off_score = if offline_inst == 0 - then 0::Double - else fromIntegral offline_inst / - fromIntegral (offline_inst + online_inst)::Double + -- metric: reserved memory covariance + res_cv = stdDev res_l + -- offline instances metrics + offline_ipri = sum . map (length . Node.pList) $ offline + offline_isec = sum . map (length . Node.sList) $ offline + -- metric: count of instances on offline nodes + off_score = fromIntegral (offline_ipri + offline_isec)::Double + -- metric: count of primary instances on offline nodes (this + -- helps with evacuation/failover of primary instances on + -- 2-node clusters with one node offline) + off_pri_score = fromIntegral offline_ipri::Double cpu_l = map Node.pCpu nodes - cpu_cv = varianceCoeff cpu_l + -- metric: covariance of vcpu/pcpu ratio + cpu_cv = stdDev cpu_l + -- metrics: covariance of cpu, memory, disk and network load (c_load, m_load, d_load, n_load) = unzip4 $ map (\n -> let DynUtil c1 m1 d1 n1 = Node.utilLoad n DynUtil c2 m2 d2 n2 = Node.utilPool n in (c1/c2, m1/m2, d1/d2, n1/n2) ) nodes - in [ mem_cv, dsk_cv, n1_score, res_cv, off_score, cpu_cv - , varianceCoeff c_load, varianceCoeff m_load - , varianceCoeff d_load, varianceCoeff n_load] + -- metric: conflicting instance count + pri_tags_inst = sum $ map Node.conflictingPrimaries nodes + pri_tags_score = fromIntegral pri_tags_inst::Double + in [ mem_cv, dsk_cv, n1_score, res_cv, off_score, off_pri_score, cpu_cv + , stdDev c_load, stdDev m_load , stdDev d_load, stdDev n_load + , pri_tags_score ] -- | Compute the /total/ variance. compCV :: Node.List -> Double -compCV = sum . compDetailedCV +compCV = sum . zipWith (*) detailedCVWeights . compDetailedCV -- | Compute online nodes from a Node.List getOnline :: Node.List -> [Node.Node] @@ -248,8 +313,9 @@ applyMove nl inst Failover = old_s = Container.find old_sdx nl int_p = Node.removePri old_p inst int_s = Node.removeSec old_s inst + force_p = Node.offline old_p new_nl = do -- Maybe monad - new_p <- Node.addPri int_s inst + new_p <- Node.addPriEx force_p int_s inst new_s <- Node.addSec int_p inst old_sdx let new_inst = Instance.setBoth inst old_sdx old_pdx return (Container.addTwo old_pdx new_s old_sdx new_p nl, @@ -265,13 +331,14 @@ applyMove nl inst (ReplacePrimary new_pdx) = tgt_n = Container.find new_pdx nl int_p = Node.removePri old_p inst int_s = Node.removeSec old_s inst + force_p = Node.offline old_p new_nl = do -- Maybe monad -- check that the current secondary can host the instance -- during the migration - tmp_s <- Node.addPri int_s inst + tmp_s <- Node.addPriEx force_p int_s inst let tmp_s' = Node.removePri tmp_s inst - new_p <- Node.addPri tgt_n inst - new_s <- Node.addSec tmp_s' inst new_pdx + new_p <- Node.addPriEx force_p tgt_n inst + new_s <- Node.addSecEx force_p tmp_s' inst new_pdx let new_inst = Instance.setPri inst new_pdx return (Container.add new_pdx new_p $ Container.addTwo old_pdx int_p old_sdx new_s nl, @@ -285,8 +352,9 @@ applyMove nl inst (ReplaceSecondary new_sdx) = old_s = Container.find old_sdx nl tgt_n = Container.find new_sdx nl int_s = Node.removeSec old_s inst + force_s = Node.offline old_s new_inst = Instance.setSec inst new_sdx - new_nl = Node.addSec tgt_n inst old_pdx >>= + new_nl = Node.addSecEx force_s tgt_n inst old_pdx >>= \new_s -> return (Container.addTwo new_sdx new_s old_sdx int_s nl, new_inst, old_pdx, new_sdx) @@ -301,9 +369,10 @@ applyMove nl inst (ReplaceAndFailover new_pdx) = tgt_n = Container.find new_pdx nl int_p = Node.removePri old_p inst int_s = Node.removeSec old_s inst + force_s = Node.offline old_s new_nl = do -- Maybe monad new_p <- Node.addPri tgt_n inst - new_s <- Node.addSec int_p inst new_pdx + new_s <- Node.addSecEx force_s int_p inst new_pdx let new_inst = Instance.setBoth inst new_pdx old_pdx return (Container.add new_pdx new_p $ Container.addTwo old_pdx new_s old_sdx int_s nl, @@ -319,9 +388,10 @@ applyMove nl inst (FailoverAndReplace new_sdx) = tgt_n = Container.find new_sdx nl int_p = Node.removePri old_p inst int_s = Node.removeSec old_s inst + force_p = Node.offline old_p new_nl = do -- Maybe monad - new_p <- Node.addPri int_s inst - new_s <- Node.addSec tgt_n inst old_sdx + new_p <- Node.addPriEx force_p int_s inst + new_s <- Node.addSecEx force_p tgt_n inst old_sdx let new_inst = Instance.setBoth inst old_sdx new_sdx return (Container.add new_sdx new_s $ Container.addTwo old_sdx new_p old_pdx int_p nl, @@ -330,27 +400,27 @@ applyMove nl inst (FailoverAndReplace new_sdx) = -- | Tries to allocate an instance on one given node. allocateOnSingle :: Node.List -> Instance.Instance -> Node.Node - -> OpResult AllocElement + -> OpResult Node.AllocElement allocateOnSingle nl inst p = let new_pdx = Node.idx p new_inst = Instance.setBoth inst new_pdx Node.noSecondary - new_nl = Node.addPri p inst >>= \new_p -> - return (Container.add new_pdx new_p nl, new_inst, [new_p]) - in new_nl + in Node.addPri p inst >>= \new_p -> do + let new_nl = Container.add new_pdx new_p nl + new_score = compCV nl + return (new_nl, new_inst, [new_p], new_score) -- | Tries to allocate an instance on a given pair of nodes. allocateOnPair :: Node.List -> Instance.Instance -> Node.Node -> Node.Node - -> OpResult AllocElement + -> OpResult Node.AllocElement allocateOnPair nl inst tgt_p tgt_s = let new_pdx = Node.idx tgt_p new_sdx = Node.idx tgt_s - new_nl = do -- Maybe monad - new_p <- Node.addPri tgt_p inst - new_s <- Node.addSec tgt_s inst new_pdx - let new_inst = Instance.setBoth inst new_pdx new_sdx - return (Container.addTwo new_pdx new_p new_sdx new_s nl, new_inst, - [new_p, new_s]) - in new_nl + in do + new_p <- Node.addPri tgt_p inst + new_s <- Node.addSec tgt_s inst new_pdx + let new_inst = Instance.setBoth inst new_pdx new_sdx + new_nl = Container.addTwo new_pdx new_p new_sdx new_s nl + return (new_nl, new_inst, [new_p, new_s], compCV new_nl) -- | Tries to perform an instance move and returns the best table -- between the original one and the new one. @@ -425,40 +495,49 @@ checkMove nodes_idx disk_moves ini_tbl victims = best_tbl = foldl' (\ step_tbl em -> - if Instance.sNode em == Node.noSecondary then step_tbl - else compareTables step_tbl $ - checkInstanceMove nodes_idx disk_moves ini_tbl em) + compareTables step_tbl $ + checkInstanceMove nodes_idx disk_moves ini_tbl em) ini_tbl victims Table _ _ _ best_plc = best_tbl - in - if length best_plc == length ini_plc then -- no advancement - ini_tbl - else - best_tbl + in if length best_plc == length ini_plc + then ini_tbl -- no advancement + else best_tbl + +-- | Check if we are allowed to go deeper in the balancing +doNextBalance :: Table -- ^ The starting table + -> Int -- ^ Remaining length + -> Score -- ^ Score at which to stop + -> Bool -- ^ The resulting table and commands +doNextBalance ini_tbl max_rounds min_score = + let Table _ _ ini_cv ini_plc = ini_tbl + ini_plc_len = length ini_plc + in (max_rounds < 0 || ini_plc_len < max_rounds) && ini_cv > min_score -- | Run a balance move - tryBalance :: Table -- ^ The starting table - -> Int -- ^ Remaining length -> Bool -- ^ Allow disk moves - -> Score -- ^ Score at which to stop + -> Bool -- ^ Only evacuate moves + -> Score -- ^ Min gain threshold + -> Score -- ^ Min gain -> Maybe Table -- ^ The resulting table and commands -tryBalance ini_tbl max_rounds disk_moves min_score = - let Table ini_nl ini_il ini_cv ini_plc = ini_tbl - ini_plc_len = length ini_plc - allowed_next = (max_rounds < 0 || ini_plc_len < max_rounds) && - ini_cv > min_score +tryBalance ini_tbl disk_moves evac_mode mg_limit min_gain = + let Table ini_nl ini_il ini_cv _ = ini_tbl + all_inst = Container.elems ini_il + all_inst' = if evac_mode + then let bad_nodes = map Node.idx . filter Node.offline $ + Container.elems ini_nl + in filter (\e -> Instance.sNode e `elem` bad_nodes || + Instance.pNode e `elem` bad_nodes) + all_inst + else all_inst + reloc_inst = filter Instance.movable all_inst' + node_idx = map Node.idx . filter (not . Node.offline) $ + Container.elems ini_nl + fin_tbl = checkMove node_idx disk_moves ini_tbl reloc_inst + (Table _ _ fin_cv _) = fin_tbl in - if allowed_next - then let all_inst = Container.elems ini_il - node_idx = map Node.idx . filter (not . Node.offline) $ - Container.elems ini_nl - fin_tbl = checkMove node_idx disk_moves ini_tbl all_inst - (Table _ _ fin_cv _) = fin_tbl - in - if fin_cv < ini_cv - then Just fin_tbl -- this round made success, try deeper - else Nothing + if fin_cv < ini_cv && (ini_cv > mg_limit || ini_cv - fin_cv >= min_gain) + then Just fin_tbl -- this round made success, return the new table else Nothing -- * Allocation functions @@ -466,29 +545,56 @@ tryBalance ini_tbl max_rounds disk_moves min_score = -- | Build failure stats out of a list of failures collapseFailures :: [FailMode] -> FailStats collapseFailures flst = - map (\k -> (k, length $ filter ((==) k) flst)) [minBound..maxBound] + map (\k -> (k, length $ filter (k ==) flst)) [minBound..maxBound] -- | Update current Allocation solution and failure stats with new -- elements -concatAllocs :: AllocSolution -> OpResult AllocElement -> AllocSolution -concatAllocs (flst, cntok, sols) (OpFail reason) = (reason:flst, cntok, sols) +concatAllocs :: AllocSolution -> OpResult Node.AllocElement -> AllocSolution +concatAllocs as (OpFail reason) = as { asFailures = reason : asFailures as } -concatAllocs (flst, cntok, osols) (OpGood ns@(nl, _, _)) = - let nscore = compCV nl - -- Choose the old or new solution, based on the cluster score +concatAllocs as (OpGood ns@(_, _, _, nscore)) = + let -- Choose the old or new solution, based on the cluster score + cntok = asAllocs as + osols = asSolutions as nsols = case osols of - Nothing -> Just (nscore, ns) - Just (oscore, _) -> + [] -> [ns] + (_, _, _, oscore):[] -> if oscore < nscore then osols - else Just (nscore, ns) + else [ns] + -- FIXME: here we simply concat to lists with more + -- than one element; we should instead abort, since + -- this is not a valid usage of this function + xs -> ns:xs nsuc = cntok + 1 -- Note: we force evaluation of nsols here in order to keep the -- memory profile low - we know that we will need nsols for sure -- in the next cycle, so we force evaluation of nsols, since the -- foldl' in the caller will only evaluate the tuple, but not the -- elements of the tuple - in nsols `seq` nsuc `seq` (flst, nsuc, nsols) + in nsols `seq` nsuc `seq` as { asAllocs = nsuc, asSolutions = nsols } + +-- | Given a solution, generates a reasonable description for it +describeSolution :: AllocSolution -> String +describeSolution as = + let fcnt = asFailures as + sols = asSolutions as + freasons = + intercalate ", " . map (\(a, b) -> printf "%s: %d" (show a) b) . + filter ((> 0) . snd) . collapseFailures $ fcnt + in if null sols + then "No valid allocation solutions, failure reasons: " ++ + (if null fcnt + then "unknown reasons" + else freasons) + else let (_, _, nodes, cv) = head sols + in printf ("score: %.8f, successes %d, failures %d (%s)" ++ + " for node(s) %s") cv (asAllocs as) (length fcnt) freasons + (intercalate "/" . map Node.name $ nodes) + +-- | Annotates a solution with the appropriate string +annotateSolution :: AllocSolution -> AllocSolution +annotateSolution as = as { asLog = describeSolution as : asLog as } -- | Try to allocate an instance on the cluster. tryAlloc :: (Monad m) => @@ -503,21 +609,81 @@ tryAlloc nl _ inst 2 = ok_pairs = filter (\(x, y) -> Node.idx x /= Node.idx y) all_pairs sols = foldl' (\cstate (p, s) -> concatAllocs cstate $ allocateOnPair nl inst p s - ) ([], 0, Nothing) ok_pairs - in return sols + ) emptySolution ok_pairs + + in if null ok_pairs -- means we have just one node + then fail "Not enough online nodes" + else return $ annotateSolution sols tryAlloc nl _ inst 1 = let all_nodes = getOnline nl sols = foldl' (\cstate -> concatAllocs cstate . allocateOnSingle nl inst - ) ([], 0, Nothing) all_nodes - in return sols + ) emptySolution all_nodes + in if null all_nodes + then fail "No online nodes" + else return $ annotateSolution sols tryAlloc _ _ _ reqn = fail $ "Unsupported number of allocation \ \destinations required (" ++ show reqn ++ "), only two supported" --- | Try to allocate an instance on the cluster. +-- | Given a group/result, describe it as a nice (list of) messages +solutionDescription :: Group.List -> (Gdx, Result AllocSolution) -> [String] +solutionDescription gl (groupId, result) = + case result of + Ok solution -> map (printf "Group %s (%s): %s" gname pol) (asLog solution) + Bad message -> [printf "Group %s: error %s" gname message] + where grp = Container.find groupId gl + gname = Group.name grp + pol = apolToString (Group.allocPolicy grp) + +-- | From a list of possibly bad and possibly empty solutions, filter +-- only the groups with a valid result +filterMGResults :: Group.List + -> [(Gdx, Result AllocSolution)] + -> [(Gdx, AllocSolution)] +filterMGResults gl= + filter ((/= AllocUnallocable) . Group.allocPolicy . + flip Container.find gl . fst) . + filter (not . null . asSolutions . snd) . + map (\(y, Ok x) -> (y, x)) . + filter (isOk . snd) + +-- | Sort multigroup results based on policy and score +sortMGResults :: Group.List + -> [(Gdx, AllocSolution)] + -> [(Gdx, AllocSolution)] +sortMGResults gl sols = + let extractScore = \(_, _, _, x) -> x + solScore (gdx, sol) = (Group.allocPolicy (Container.find gdx gl), + (extractScore . head . asSolutions) sol) + in sortBy (comparing solScore) sols + +-- | Try to allocate an instance on a multi-group cluster. +tryMGAlloc :: Group.List -- ^ The group list + -> Node.List -- ^ The node list + -> Instance.List -- ^ The instance list + -> Instance.Instance -- ^ The instance to allocate + -> Int -- ^ Required number of nodes + -> Result AllocSolution -- ^ Possible solution list +tryMGAlloc mggl mgnl mgil inst cnt = + let groups = splitCluster mgnl mgil + -- TODO: currently we consider all groups preferred + sols = map (\(gid, (nl, il)) -> + (gid, tryAlloc nl il inst cnt)) groups:: + [(Gdx, Result AllocSolution)] + all_msgs = concatMap (solutionDescription mggl) sols + goodSols = filterMGResults mggl sols + sortedSols = sortMGResults mggl goodSols + in if null sortedSols + then Bad $ intercalate ", " all_msgs + else let (final_group, final_sol) = head sortedSols + final_name = Group.name $ Container.find final_group mggl + selmsg = "Selected group: " ++ final_name + in Ok $ final_sol { asLog = selmsg:all_msgs } + +-- | Try to relocate an instance on the cluster. tryReloc :: (Monad m) => Node.List -- ^ The node list -> Instance.List -- ^ The instance list @@ -535,15 +701,100 @@ tryReloc nl il xid 1 ex_idx = let em = do (mnl, i, _, _) <- applyMove nl inst (ReplaceSecondary x) - return (mnl, i, [Container.find x mnl]) + return (mnl, i, [Container.find x mnl], + compCV mnl) in concatAllocs cstate em - ) ([], 0, Nothing) valid_idxes + ) emptySolution valid_idxes in return sols1 tryReloc _ _ _ reqn _ = fail $ "Unsupported number of relocation \ \destinations required (" ++ show reqn ++ "), only one supported" +-- | Try to evacuate a list of nodes. +tryEvac :: (Monad m) => + Node.List -- ^ The node list + -> Instance.List -- ^ The instance list + -> [Ndx] -- ^ Nodes to be evacuated + -> m AllocSolution -- ^ Solution list +tryEvac nl il ex_ndx = + let ex_nodes = map (`Container.find` nl) ex_ndx + all_insts = nub . concatMap Node.sList $ ex_nodes + in do + (_, sol) <- foldM (\(nl', old_as) idx -> do + -- FIXME: hardcoded one node here + -- (fm, cs, aes) + new_as <- tryReloc nl' il idx 1 ex_ndx + case asSolutions new_as of + csol@(nl'', _, _, _):_ -> + -- an individual relocation succeeded, + -- we kind of compose the data from + -- the two solutions + return (nl'', + new_as { asSolutions = + csol:asSolutions old_as }) + -- this relocation failed, so we fail + -- the entire evac + _ -> fail $ "Can't evacuate instance " ++ + Instance.name (Container.find idx il) ++ + ": " ++ describeSolution new_as + ) (nl, emptySolution) all_insts + return $ annotateSolution sol + +-- | Recursively place instances on the cluster until we're out of space +iterateAlloc :: Node.List + -> Instance.List + -> Instance.Instance + -> Int + -> [Instance.Instance] + -> Result (FailStats, Node.List, Instance.List, + [Instance.Instance]) +iterateAlloc nl il newinst nreq ixes = + let depth = length ixes + newname = printf "new-%d" depth::String + newidx = length (Container.elems il) + depth + newi2 = Instance.setIdx (Instance.setName newinst newname) newidx + in case tryAlloc nl il newi2 nreq of + Bad s -> Bad s + Ok (AllocSolution { asFailures = errs, asSolutions = sols3 }) -> + case sols3 of + [] -> Ok (collapseFailures errs, nl, il, ixes) + (xnl, xi, _, _):[] -> + iterateAlloc xnl (Container.add newidx xi il) + newinst nreq $! (xi:ixes) + _ -> Bad "Internal error: multiple solutions for single\ + \ allocation" + +-- | The core of the tiered allocation mode +tieredAlloc :: Node.List + -> Instance.List + -> Instance.Instance + -> Int + -> [Instance.Instance] + -> Result (FailStats, Node.List, Instance.List, + [Instance.Instance]) +tieredAlloc nl il newinst nreq ixes = + case iterateAlloc nl il newinst nreq ixes of + Bad s -> Bad s + Ok (errs, nl', il', ixes') -> + case Instance.shrinkByType newinst . fst . last $ + sortBy (comparing snd) errs of + Bad _ -> Ok (errs, nl', il', ixes') + Ok newinst' -> + tieredAlloc nl' il' newinst' nreq ixes' + +-- | Compute the tiered spec string description from a list of +-- allocated instances. +tieredSpecMap :: [Instance.Instance] + -> [String] +tieredSpecMap trl_ixes = + let fin_trl_ixes = reverse trl_ixes + ix_byspec = groupBy ((==) `on` Instance.specOf) fin_trl_ixes + spec_map = map (\ixs -> (Instance.specOf $ head ixs, length ixs)) + ix_byspec + in map (\(spec, cnt) -> printf "%d,%d,%d=%d" (rspecMem spec) + (rspecDsk spec) (rspecCpu spec) cnt) spec_map + -- * Formatting functions -- | Given the original and final nodes, computes the relocation description. @@ -582,11 +833,11 @@ printSolutionLine nl il nmlen imlen plc pos = pmlen = (2*nmlen + 1) (i, p, s, mv, c) = plc inst = Container.find i il - inam = Instance.name inst - npri = Container.nameOf nl p - nsec = Container.nameOf nl s - opri = Container.nameOf nl $ Instance.pNode inst - osec = Container.nameOf nl $ Instance.sNode inst + inam = Instance.alias inst + npri = Node.alias $ Container.find p nl + nsec = Node.alias $ Container.find s nl + opri = Node.alias $ Container.find (Instance.pNode inst) nl + osec = Node.alias $ Container.find (Instance.sNode inst) nl (moves, cmds) = computeMoves inst inam mv npri nsec ostr = printf "%s:%s" opri osec::String nstr = printf "%s:%s" npri nsec::String @@ -639,43 +890,41 @@ formatCmds = (zip [1..] js)) . zip [1..] --- | Converts a solution to string format. -printSolution :: Node.List - -> Instance.List - -> [Placement] - -> ([String], [[String]]) -printSolution nl il sol = - let - nmlen = Container.maxNameLen nl - imlen = Container.maxNameLen il - in - unzip $ zipWith (printSolutionLine nl il nmlen imlen) sol [1..] - -- | Print the node list. -printNodes :: Node.List -> String -printNodes nl = - let snl = sortBy (compare `on` Node.idx) (Container.elems nl) - header = ["F", "Name" - , "t_mem", "n_mem", "i_mem", "x_mem", "f_mem", "r_mem" - , "t_dsk", "f_dsk", "pcpu", "vcpu", "pri", "sec" - , "p_fmem", "p_fdsk", "r_cpu" - , "lCpu", "lMem", "lDsk", "lNet" ] - isnum = False:False:repeat True +printNodes :: Node.List -> [String] -> String +printNodes nl fs = + let fields = case fs of + [] -> Node.defaultFields + "+":rest -> Node.defaultFields ++ rest + _ -> fs + snl = sortBy (comparing Node.idx) (Container.elems nl) + (header, isnum) = unzip $ map Node.showHeader fields in unlines . map ((:) ' ' . intercalate " ") $ - formatTable (header:map Node.list snl) isnum + formatTable (header:map (Node.list fields) snl) isnum -- | Print the instance list. printInsts :: Node.List -> Instance.List -> String printInsts nl il = - let sil = sortBy (compare `on` Instance.idx) (Container.elems il) - helper inst = [ (Instance.name inst) - , (Container.nameOf nl (Instance.pNode inst)) - , (let sdx = Instance.sNode inst - in if sdx == Node.noSecondary - then "" - else Container.nameOf nl sdx) ] - header = ["Name", "Pri_node", "Sec_node"] - isnum = repeat False + let sil = sortBy (comparing Instance.idx) (Container.elems il) + helper inst = [ if Instance.running inst then "R" else " " + , Instance.name inst + , Container.nameOf nl (Instance.pNode inst) + , let sdx = Instance.sNode inst + in if sdx == Node.noSecondary + then "" + else Container.nameOf nl sdx + , printf "%3d" $ Instance.vcpus inst + , printf "%5d" $ Instance.mem inst + , printf "%5d" $ Instance.dsk inst `div` 1024 + , printf "%5.3f" lC + , printf "%5.3f" lM + , printf "%5.3f" lD + , printf "%5.3f" lN + ] + where DynUtil lC lM lD lN = Instance.util inst + header = [ "F", "Name", "Pri_node", "Sec_node", "vcpu", "mem" + , "dsk", "lCpu", "lMem", "lDsk", "lNet" ] + isnum = False:False:False:False:repeat True in unlines . map ((:) ' ' . intercalate " ") $ formatTable (header:map helper sil) isnum @@ -683,18 +932,19 @@ printInsts nl il = printStats :: Node.List -> String printStats nl = let dcvs = compDetailedCV nl - hd = zip (detailedCVNames ++ repeat "unknown") dcvs - formatted = map (\(header, val) -> - printf "%s=%.8f" header val::String) hd + (weights, names) = unzip detailedCVInfo + hd = zip3 (weights ++ repeat 1) (names ++ repeat "unknown") dcvs + formatted = map (\(w, header, val) -> + printf "%s=%.8f(x%.2f)" header val w::String) hd in intercalate ", " formatted -- | Convert a placement into a list of OpCodes (basically a job). -iMoveToJob :: String -> Node.List -> Instance.List +iMoveToJob :: Node.List -> Instance.List -> Idx -> IMove -> [OpCodes.OpCode] -iMoveToJob csf nl il idx move = +iMoveToJob nl il idx move = let inst = Container.find idx il - iname = Instance.name inst ++ csf - lookNode n = Just (Container.nameOf nl n ++ csf) + iname = Instance.name inst + lookNode = Just . Container.nameOf nl opF = if Instance.running inst then OpCodes.OpMigrateInstance iname True False else OpCodes.OpFailoverInstance iname False @@ -706,3 +956,37 @@ iMoveToJob csf nl il idx move = ReplaceSecondary ns -> [ opR ns ] ReplaceAndFailover np -> [ opR np, opF ] FailoverAndReplace ns -> [ opF, opR ns ] + +-- * Node group functions + +-- | Computes the group of an instance +instanceGroup :: Node.List -> Instance.Instance -> Result Gdx +instanceGroup nl i = + let sidx = Instance.sNode i + pnode = Container.find (Instance.pNode i) nl + snode = if sidx == Node.noSecondary + then pnode + else Container.find sidx nl + pgroup = Node.group pnode + sgroup = Node.group snode + in if pgroup /= sgroup + then fail ("Instance placed accross two node groups, primary " ++ + show pgroup ++ ", secondary " ++ show sgroup) + else return pgroup + +-- | Compute the list of badly allocated instances (split across node +-- groups) +findSplitInstances :: Node.List -> Instance.List -> [Instance.Instance] +findSplitInstances nl il = + filter (not . isOk . instanceGroup nl) (Container.elems il) + +-- | Splits a cluster into the component node groups +splitCluster :: Node.List -> Instance.List -> + [(Gdx, (Node.List, Instance.List))] +splitCluster nl il = + let ngroups = Node.computeGroups (Container.elems nl) + in map (\(guuid, nodes) -> + let nidxs = map Node.idx nodes + nodes' = zip nidxs nodes + instances = Container.filter ((`elem` nidxs) . Instance.pNode) il + in (guuid, (Container.fromAssocList nodes', instances))) ngroups