X-Git-Url: https://code.grnet.gr/git/ganeti-local/blobdiff_plain/8423f76b14778331cb9deff38548638b4198723b..7d3f42530a2e1cdd6ec09a6098402c7e05fc3bdf:/Ganeti/HTools/Cluster.hs diff --git a/Ganeti/HTools/Cluster.hs b/Ganeti/HTools/Cluster.hs index c440c20..c52093d 100644 --- a/Ganeti/HTools/Cluster.hs +++ b/Ganeti/HTools/Cluster.hs @@ -7,7 +7,7 @@ goes into the "Main" module for the individual binaries. {- -Copyright (C) 2009 Google Inc. +Copyright (C) 2009, 2010 Google Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -39,7 +39,6 @@ module Ganeti.HTools.Cluster -- * First phase functions , computeBadItems -- * Second phase functions - , printSolution , printSolutionLine , formatCmds , involvedNodes @@ -59,6 +58,12 @@ module Ganeti.HTools.Cluster , tryReloc , tryEvac , collapseFailures + -- * Allocation functions + , iterateAlloc + , tieredAlloc + , instanceGroup + , findSplitInstances + , splitCluster ) where import Data.List @@ -200,21 +205,24 @@ computeAllocationDelta cini cfin = runa = RSpec un_cpu (truncate t_mem - f_imem) (truncate t_dsk - f_idsk) in (rini, rfin, runa) --- | The names of the individual elements in the CV list -detailedCVNames :: [String] -detailedCVNames = [ "free_mem_cv" - , "free_disk_cv" - , "n1_cnt" - , "reserved_mem_cv" - , "offline_all_cnt" - , "offline_pri_cnt" - , "vcpu_ratio_cv" - , "cpu_load_cv" - , "mem_load_cv" - , "disk_load_cv" - , "net_load_cv" - , "pri_tags_score" - ] +-- | The names and weights of the individual elements in the CV list +detailedCVInfo :: [(Double, String)] +detailedCVInfo = [ (1, "free_mem_cv") + , (1, "free_disk_cv") + , (1, "n1_cnt") + , (1, "reserved_mem_cv") + , (4, "offline_all_cnt") + , (16, "offline_pri_cnt") + , (1, "vcpu_ratio_cv") + , (1, "cpu_load_cv") + , (1, "mem_load_cv") + , (1, "disk_load_cv") + , (1, "net_load_cv") + , (2, "pri_tags_score") + ] + +detailedCVWeights :: [Double] +detailedCVWeights = map fst detailedCVInfo -- | Compute the mem and disk covariance. compDetailedCV :: Node.List -> [Double] @@ -228,9 +236,10 @@ compDetailedCV nl = mem_cv = varianceCoeff mem_l -- metric: disk covariance dsk_cv = varianceCoeff dsk_l - n1_l = length $ filter Node.failN1 nodes - -- metric: count of failN1 nodes - n1_score = fromIntegral n1_l::Double + -- metric: count of instances living on N1 failing nodes + n1_score = fromIntegral . sum . map (\n -> length (Node.sList n) + + length (Node.pList n)) . + filter Node.failN1 $ nodes :: Double res_l = map Node.pRem nodes -- metric: reserved memory covariance res_cv = varianceCoeff res_l @@ -263,7 +272,7 @@ compDetailedCV nl = -- | Compute the /total/ variance. compCV :: Node.List -> Double -compCV = sum . compDetailedCV +compCV = sum . zipWith (*) detailedCVWeights . compDetailedCV -- | Compute online nodes from a Node.List getOnline :: Node.List -> [Node.Node] @@ -287,8 +296,9 @@ applyMove nl inst Failover = old_s = Container.find old_sdx nl int_p = Node.removePri old_p inst int_s = Node.removeSec old_s inst + force_p = Node.offline old_p new_nl = do -- Maybe monad - new_p <- Node.addPri int_s inst + new_p <- Node.addPriEx force_p int_s inst new_s <- Node.addSec int_p inst old_sdx let new_inst = Instance.setBoth inst old_sdx old_pdx return (Container.addTwo old_pdx new_s old_sdx new_p nl, @@ -304,13 +314,14 @@ applyMove nl inst (ReplacePrimary new_pdx) = tgt_n = Container.find new_pdx nl int_p = Node.removePri old_p inst int_s = Node.removeSec old_s inst + force_p = Node.offline old_p new_nl = do -- Maybe monad -- check that the current secondary can host the instance -- during the migration - tmp_s <- Node.addPri int_s inst + tmp_s <- Node.addPriEx force_p int_s inst let tmp_s' = Node.removePri tmp_s inst - new_p <- Node.addPri tgt_n inst - new_s <- Node.addSec tmp_s' inst new_pdx + new_p <- Node.addPriEx force_p tgt_n inst + new_s <- Node.addSecEx force_p tmp_s' inst new_pdx let new_inst = Instance.setPri inst new_pdx return (Container.add new_pdx new_p $ Container.addTwo old_pdx int_p old_sdx new_s nl, @@ -324,8 +335,9 @@ applyMove nl inst (ReplaceSecondary new_sdx) = old_s = Container.find old_sdx nl tgt_n = Container.find new_sdx nl int_s = Node.removeSec old_s inst + force_s = Node.offline old_s new_inst = Instance.setSec inst new_sdx - new_nl = Node.addSec tgt_n inst old_pdx >>= + new_nl = Node.addSecEx force_s tgt_n inst old_pdx >>= \new_s -> return (Container.addTwo new_sdx new_s old_sdx int_s nl, new_inst, old_pdx, new_sdx) @@ -340,9 +352,10 @@ applyMove nl inst (ReplaceAndFailover new_pdx) = tgt_n = Container.find new_pdx nl int_p = Node.removePri old_p inst int_s = Node.removeSec old_s inst + force_s = Node.offline old_s new_nl = do -- Maybe monad new_p <- Node.addPri tgt_n inst - new_s <- Node.addSec int_p inst new_pdx + new_s <- Node.addSecEx force_s int_p inst new_pdx let new_inst = Instance.setBoth inst new_pdx old_pdx return (Container.add new_pdx new_p $ Container.addTwo old_pdx new_s old_sdx int_s nl, @@ -358,9 +371,10 @@ applyMove nl inst (FailoverAndReplace new_sdx) = tgt_n = Container.find new_sdx nl int_p = Node.removePri old_p inst int_s = Node.removeSec old_s inst + force_p = Node.offline old_p new_nl = do -- Maybe monad - new_p <- Node.addPri int_s inst - new_s <- Node.addSec tgt_n inst old_sdx + new_p <- Node.addPriEx force_p int_s inst + new_s <- Node.addSecEx force_p tgt_n inst old_sdx let new_inst = Instance.setBoth inst old_sdx new_sdx return (Container.add new_sdx new_s $ Container.addTwo old_sdx new_p old_pdx int_p nl, @@ -373,9 +387,10 @@ allocateOnSingle :: Node.List -> Instance.Instance -> Node.Node allocateOnSingle nl inst p = let new_pdx = Node.idx p new_inst = Instance.setBoth inst new_pdx Node.noSecondary - new_nl = Node.addPri p inst >>= \new_p -> - return (Container.add new_pdx new_p nl, new_inst, [new_p]) - in new_nl + in Node.addPri p inst >>= \new_p -> do + let new_nl = Container.add new_pdx new_p nl + new_score = compCV nl + return (new_nl, new_inst, [new_p], new_score) -- | Tries to allocate an instance on a given pair of nodes. allocateOnPair :: Node.List -> Instance.Instance -> Node.Node -> Node.Node @@ -383,13 +398,12 @@ allocateOnPair :: Node.List -> Instance.Instance -> Node.Node -> Node.Node allocateOnPair nl inst tgt_p tgt_s = let new_pdx = Node.idx tgt_p new_sdx = Node.idx tgt_s - new_nl = do -- Maybe monad - new_p <- Node.addPri tgt_p inst - new_s <- Node.addSec tgt_s inst new_pdx - let new_inst = Instance.setBoth inst new_pdx new_sdx - return (Container.addTwo new_pdx new_p new_sdx new_s nl, new_inst, - [new_p, new_s]) - in new_nl + in do + new_p <- Node.addPri tgt_p inst + new_s <- Node.addSec tgt_s inst new_pdx + let new_inst = Instance.setBoth inst new_pdx new_sdx + new_nl = Container.addTwo new_pdx new_p new_sdx new_s nl + return (new_nl, new_inst, [new_p, new_s], compCV new_nl) -- | Tries to perform an instance move and returns the best table -- between the original one and the new one. @@ -473,23 +487,23 @@ checkMove nodes_idx disk_moves ini_tbl victims = else best_tbl -- | Check if we are allowed to go deeper in the balancing - -doNextBalance :: Table -- ^ The starting table - -> Int -- ^ Remaining length - -> Score -- ^ Score at which to stop - -> Bool -- ^ The resulting table and commands +doNextBalance :: Table -- ^ The starting table + -> Int -- ^ Remaining length + -> Score -- ^ Score at which to stop + -> Bool -- ^ The resulting table and commands doNextBalance ini_tbl max_rounds min_score = let Table _ _ ini_cv ini_plc = ini_tbl ini_plc_len = length ini_plc in (max_rounds < 0 || ini_plc_len < max_rounds) && ini_cv > min_score -- | Run a balance move - tryBalance :: Table -- ^ The starting table -> Bool -- ^ Allow disk moves -> Bool -- ^ Only evacuate moves + -> Score -- ^ Min gain threshold + -> Score -- ^ Min gain -> Maybe Table -- ^ The resulting table and commands -tryBalance ini_tbl disk_moves evac_mode = +tryBalance ini_tbl disk_moves evac_mode mg_limit min_gain = let Table ini_nl ini_il ini_cv _ = ini_tbl all_inst = Container.elems ini_il all_inst' = if evac_mode @@ -505,7 +519,7 @@ tryBalance ini_tbl disk_moves evac_mode = fin_tbl = checkMove node_idx disk_moves ini_tbl reloc_inst (Table _ _ fin_cv _) = fin_tbl in - if fin_cv < ini_cv + if fin_cv < ini_cv && (ini_cv > mg_limit || ini_cv - fin_cv >= min_gain) then Just fin_tbl -- this round made success, return the new table else Nothing @@ -521,9 +535,8 @@ collapseFailures flst = concatAllocs :: AllocSolution -> OpResult Node.AllocElement -> AllocSolution concatAllocs (flst, cntok, sols) (OpFail reason) = (reason:flst, cntok, sols) -concatAllocs (flst, cntok, osols) (OpGood ns@(nl, _, _)) = - let nscore = compCV nl - -- Choose the old or new solution, based on the cluster score +concatAllocs (flst, cntok, osols) (OpGood ns@(_, _, _, nscore)) = + let -- Choose the old or new solution, based on the cluster score nsols = case osols of [] -> [(nscore, ns)] (oscore, _):[] -> @@ -587,7 +600,8 @@ tryReloc nl il xid 1 ex_idx = let em = do (mnl, i, _, _) <- applyMove nl inst (ReplaceSecondary x) - return (mnl, i, [Container.find x mnl]) + return (mnl, i, [Container.find x mnl], + compCV mnl) in concatAllocs cstate em ) ([], 0, []) valid_idxes in return sols1 @@ -596,7 +610,7 @@ tryReloc _ _ _ reqn _ = fail $ "Unsupported number of relocation \ \destinations required (" ++ show reqn ++ "), only one supported" --- | Try to allocate an instance on the cluster. +-- | Try to evacuate a list of nodes. tryEvac :: (Monad m) => Node.List -- ^ The node list -> Instance.List -- ^ The instance list @@ -610,13 +624,54 @@ tryEvac nl il ex_ndx = -- FIXME: hardcoded one node here (fm, cs, aes) <- tryReloc nl' il idx 1 ex_ndx case aes of - csol@(_, (nl'', _, _)):_ -> + csol@(_, (nl'', _, _, _)):_ -> return (nl'', (fm, cs, csol:rsols)) _ -> fail $ "Can't evacuate instance " ++ - show idx + Instance.name (Container.find idx il) ) (nl, ([], 0, [])) all_insts return sol +-- | Recursively place instances on the cluster until we're out of space +iterateAlloc :: Node.List + -> Instance.List + -> Instance.Instance + -> Int + -> [Instance.Instance] + -> Result (FailStats, Node.List, Instance.List, + [Instance.Instance]) +iterateAlloc nl il newinst nreq ixes = + let depth = length ixes + newname = printf "new-%d" depth::String + newidx = length (Container.elems il) + depth + newi2 = Instance.setIdx (Instance.setName newinst newname) newidx + in case tryAlloc nl il newi2 nreq of + Bad s -> Bad s + Ok (errs, _, sols3) -> + case sols3 of + [] -> Ok (collapseFailures errs, nl, il, ixes) + (_, (xnl, xi, _, _)):[] -> + iterateAlloc xnl (Container.add newidx xi il) + newinst nreq $! (xi:ixes) + _ -> Bad "Internal error: multiple solutions for single\ + \ allocation" + +tieredAlloc :: Node.List + -> Instance.List + -> Instance.Instance + -> Int + -> [Instance.Instance] + -> Result (FailStats, Node.List, Instance.List, + [Instance.Instance]) +tieredAlloc nl il newinst nreq ixes = + case iterateAlloc nl il newinst nreq ixes of + Bad s -> Bad s + Ok (errs, nl', il', ixes') -> + case Instance.shrinkByType newinst . fst . last $ + sortBy (comparing snd) errs of + Bad _ -> Ok (errs, nl', il', ixes') + Ok newinst' -> + tieredAlloc nl' il' newinst' nreq ixes' + -- * Formatting functions -- | Given the original and final nodes, computes the relocation description. @@ -655,11 +710,11 @@ printSolutionLine nl il nmlen imlen plc pos = pmlen = (2*nmlen + 1) (i, p, s, mv, c) = plc inst = Container.find i il - inam = Instance.name inst - npri = Container.nameOf nl p - nsec = Container.nameOf nl s - opri = Container.nameOf nl $ Instance.pNode inst - osec = Container.nameOf nl $ Instance.sNode inst + inam = Instance.alias inst + npri = Node.alias $ Container.find p nl + nsec = Node.alias $ Container.find s nl + opri = Node.alias $ Container.find (Instance.pNode inst) nl + osec = Node.alias $ Container.find (Instance.sNode inst) nl (moves, cmds) = computeMoves inst inam mv npri nsec ostr = printf "%s:%s" opri osec::String nstr = printf "%s:%s" npri nsec::String @@ -712,24 +767,13 @@ formatCmds = (zip [1..] js)) . zip [1..] --- | Converts a solution to string format. -printSolution :: Node.List - -> Instance.List - -> [Placement] - -> ([String], [[String]]) -printSolution nl il sol = - let - nmlen = Container.maxNameLen nl - imlen = Container.maxNameLen il - in - unzip $ zipWith (printSolutionLine nl il nmlen imlen) sol [1..] - -- | Print the node list. printNodes :: Node.List -> [String] -> String printNodes nl fs = - let fields = if null fs - then Node.defaultFields - else fs + let fields = case fs of + [] -> Node.defaultFields + "+":rest -> Node.defaultFields ++ rest + _ -> fs snl = sortBy (comparing Node.idx) (Container.elems nl) (header, isnum) = unzip $ map Node.showHeader fields in unlines . map ((:) ' ' . intercalate " ") $ @@ -765,9 +809,10 @@ printInsts nl il = printStats :: Node.List -> String printStats nl = let dcvs = compDetailedCV nl - hd = zip (detailedCVNames ++ repeat "unknown") dcvs - formatted = map (\(header, val) -> - printf "%s=%.8f" header val::String) hd + (weights, names) = unzip detailedCVInfo + hd = zip3 (weights ++ repeat 1) (names ++ repeat "unknown") dcvs + formatted = map (\(w, header, val) -> + printf "%s=%.8f(x%.2f)" header val w::String) hd in intercalate ", " formatted -- | Convert a placement into a list of OpCodes (basically a job). @@ -788,3 +833,35 @@ iMoveToJob nl il idx move = ReplaceSecondary ns -> [ opR ns ] ReplaceAndFailover np -> [ opR np, opF ] FailoverAndReplace ns -> [ opF, opR ns ] + +-- | Computes the group of an instance +instanceGroup :: Node.List -> Instance.Instance -> Result GroupID +instanceGroup nl i = + let sidx = Instance.sNode i + pnode = Container.find (Instance.pNode i) nl + snode = if sidx == Node.noSecondary + then pnode + else Container.find sidx nl + puuid = Node.group pnode + suuid = Node.group snode + in if puuid /= suuid + then fail ("Instance placed accross two node groups, primary " ++ puuid ++ + ", secondary " ++ suuid) + else return puuid + +-- | Compute the list of badly allocated instances (split across node +-- groups) +findSplitInstances :: Node.List -> Instance.List -> [Instance.Instance] +findSplitInstances nl il = + filter (not . isOk . instanceGroup nl) (Container.elems il) + +-- | Splits a cluster into the component node groups +splitCluster :: Node.List -> Instance.List -> + [(GroupID, (Node.List, Instance.List))] +splitCluster nl il = + let ngroups = Node.computeGroups (Container.elems nl) + in map (\(guuid, nodes) -> + let nidxs = map Node.idx nodes + nodes' = zip nidxs nodes + instances = Container.filter ((`elem` nidxs) . Instance.pNode) il + in (guuid, (Container.fromAssocList nodes', instances))) ngroups