From 478df6860fe53fa36264bd8a7ca6cfdda4afd124 Mon Sep 17 00:00:00 2001 From: Iustin Pop Date: Thu, 9 Jul 2009 14:44:24 +0200 Subject: [PATCH 1/1] Change the tryAlloc/tryReloc workflow Currently, the tryAlloc and tryReloc function return a list with all the results, both failures and successes. This is fine for hail, which does one round of allocations, but is not so good for hspace, which does iterative rounds; since at each (successful) step we only take the best solution, it means that we're using lots of heap space to compute and store node lists which are thrown away at the end of the step. This patch changes these two functions and their callers in hail/hspace to only return the best solution, and error/success counters. This allows hspace to run in a much smaller space, and reduces GC cost greatly. Overall, it is a cleanup, as hail/hspace did a lot of work to chose this best solution, whereas now it's automatically promoted within Cluster.concatAllocs. --- Ganeti/HTools/Cluster.hs | 60 +++++++++++++++++++++++++++++++++++++--------- Ganeti/HTools/Types.hs | 4 ++++ hail.hs | 52 ++++++++++++++-------------------------- hspace.hs | 44 ++++------------------------------ 4 files changed, 76 insertions(+), 84 deletions(-) diff --git a/Ganeti/HTools/Cluster.hs b/Ganeti/HTools/Cluster.hs index c4dba4c..7fa8ed8 100644 --- a/Ganeti/HTools/Cluster.hs +++ b/Ganeti/HTools/Cluster.hs @@ -51,6 +51,7 @@ module Ganeti.HTools.Cluster -- * IAllocator functions , tryAlloc , tryReloc + , collapseFailures ) where import Data.List @@ -73,7 +74,10 @@ type Score = Double type Placement = (Idx, Ndx, Ndx, Score) -- | Allocation\/relocation solution. -type AllocSolution = [OpResult (Node.List, Instance.Instance, [Node.Node])] +type AllocSolution = ([FailMode], Int, Maybe (Score, AllocElement)) + +-- | Allocation\/relocation element. +type AllocElement = (Node.List, Instance.Instance, [Node.Node]) -- | An instance move definition data IMove = Failover -- ^ Failover the instance (f) @@ -332,7 +336,7 @@ applyMove nl inst (FailoverAndReplace new_sdx) = -- | Tries to allocate an instance on one given node. allocateOnSingle :: Node.List -> Instance.Instance -> Node.Node - -> OpResult (Node.List, Instance.Instance, [Node.Node]) + -> OpResult AllocElement allocateOnSingle nl inst p = let new_pdx = Node.idx p new_inst = Instance.setBoth inst new_pdx Node.noSecondary @@ -342,7 +346,7 @@ allocateOnSingle nl inst p = -- | Tries to allocate an instance on a given pair of nodes. allocateOnPair :: Node.List -> Instance.Instance -> Node.Node -> Node.Node - -> OpResult (Node.List, Instance.Instance, [Node.Node]) + -> OpResult AllocElement allocateOnPair nl inst tgt_p tgt_s = let new_pdx = Node.idx tgt_p new_sdx = Node.idx tgt_s @@ -432,7 +436,34 @@ checkMove nodes_idx ini_tbl victims = else best_tbl --- * Alocation functions +-- * Allocation functions + +-- | Build failure stats out of a list of failures +collapseFailures :: [FailMode] -> FailStats +collapseFailures flst = + map (\k -> (k, length $ filter ((==) k) flst)) [minBound..maxBound] + +-- | Update current Allocation solution and failure stats with new +-- elements +concatAllocs :: AllocSolution -> OpResult AllocElement -> AllocSolution +concatAllocs (flst, succ, sols) (OpFail reason) = (reason:flst, succ, sols) + +concatAllocs (flst, succ, osols) (OpGood ns@(nl, _, _)) = + let nscore = compCV nl + -- Choose the old or new solution, based on the cluster score + nsols = case osols of + Nothing -> Just (nscore, ns) + Just (oscore, _) -> + if oscore < nscore + then osols + else Just (nscore, ns) + nsuc = succ + 1 + -- Note: we force evaluation of nsols here in order to keep the + -- memory profile low - we know that we will need nsols for sure + -- in the next cycle, so we force evaluation of nsols, since the + -- foldl' in the caller will only evaluate the tuple, but not the + -- *elements* of the tuple + in nsols `seq` nsuc `seq` (flst, nsuc, nsols) -- | Try to allocate an instance on the cluster. tryAlloc :: (Monad m) => @@ -445,12 +476,16 @@ tryAlloc nl _ inst 2 = let all_nodes = getOnline nl all_pairs = liftM2 (,) all_nodes all_nodes ok_pairs = filter (\(x, y) -> Node.idx x /= Node.idx y) all_pairs - sols = map (uncurry $ allocateOnPair nl inst) ok_pairs + sols = foldl' (\cstate (p, s) -> + concatAllocs cstate $ allocateOnPair nl inst p s + ) ([], 0, Nothing) ok_pairs in return sols tryAlloc nl _ inst 1 = let all_nodes = getOnline nl - sols = map (allocateOnSingle nl inst) all_nodes + sols = foldl' (\cstate p -> + concatAllocs cstate $ allocateOnSingle nl inst p + ) ([], 0, Nothing) all_nodes in return sols tryAlloc _ _ _ reqn = fail $ "Unsupported number of alllocation \ @@ -462,7 +497,7 @@ tryReloc :: (Monad m) => Node.List -- ^ The node list -> Instance.List -- ^ The instance list -> Idx -- ^ The index of the instance to move - -> Int -- ^ The numver of nodes required + -> Int -- ^ The number of nodes required -> [Ndx] -- ^ Nodes which should not be used -> m AllocSolution -- ^ Solution list tryReloc nl il xid 1 ex_idx = @@ -471,10 +506,13 @@ tryReloc nl il xid 1 ex_idx = ex_idx' = Instance.pnode inst:ex_idx valid_nodes = filter (not . flip elem ex_idx' . Node.idx) all_nodes valid_idxes = map Node.idx valid_nodes - sols1 = map (\x -> do - (mnl, i, _, _) <- applyMove nl inst (ReplaceSecondary x) - return (mnl, i, [Container.find x nl]) - ) valid_idxes + sols1 = foldl' (\cstate x -> + let elem = do + (mnl, i, _, _) <- + applyMove nl inst (ReplaceSecondary x) + return (mnl, i, [Container.find x mnl]) + in concatAllocs cstate elem + ) ([], 0, Nothing) valid_idxes in return sols1 tryReloc _ _ _ reqn _ = fail $ "Unsupported number of relocation \ diff --git a/Ganeti/HTools/Types.hs b/Ganeti/HTools/Types.hs index cb384e8..4a009f4 100644 --- a/Ganeti/HTools/Types.hs +++ b/Ganeti/HTools/Types.hs @@ -30,6 +30,7 @@ module Ganeti.HTools.Types , Result(..) , Element(..) , FailMode(..) + , FailStats , OpResult(..) ) where @@ -67,6 +68,9 @@ data FailMode = FailMem -- ^ Failed due to not enough RAM | FailN1 -- ^ Failed due to not passing N1 checks deriving (Eq, Enum, Bounded, Show) +-- | List with failure statistics +type FailStats = [(FailMode, Int)] + -- | Either-like data-type customized for our failure modes data OpResult a = OpFail FailMode -- ^ Failed operation | OpGood a -- ^ Success operation diff --git a/hail.hs b/hail.hs index b412680..0167d55 100644 --- a/hail.hs +++ b/hail.hs @@ -37,7 +37,6 @@ import Text.Printf (printf) import qualified Ganeti.HTools.Cluster as Cluster import qualified Ganeti.HTools.Node as Node -import qualified Ganeti.HTools.Instance as Instance import qualified Ganeti.HTools.CLI as CLI import Ganeti.HTools.IAlloc import Ganeti.HTools.Types @@ -72,37 +71,21 @@ options = ] -filterFails :: (Monad m) => - [OpResult (Node.List, Instance.Instance, [Node.Node])] - -> m [(Node.List, [Node.Node])] -filterFails sols = - if null sols then fail "No nodes onto which to allocate at all" - else let sols' = concatMap (\ e -> - case e of - OpFail _ -> [] - OpGood (gnl, _, nn) -> [(gnl, nn)] - ) sols - in - if null sols' - then fail "No valid allocation solutions" - else return sols' - -processResults :: (Monad m) => [(Node.List, [Node.Node])] - -> m (String, [Node.Node]) -processResults sols = - let sols' = map (\(nl', ns) -> (Cluster.compCV nl', ns)) sols - sols'' = sortBy (compare `on` fst) sols' - (best, w) = head sols'' - (worst, l) = last sols'' - info = printf "Valid results: %d, best score: %.8f for node(s) %s, \ - \worst score: %.8f for node(s) %s" (length sols'') - best (intercalate "/" . map Node.name $ w) - worst (intercalate "/" . map Node.name $ l)::String - in return (info, w) +processResults :: (Monad m) => Cluster.AllocSolution -> m (String, [Node.Node]) +processResults (fstats, succ, sols) = + case sols of + Nothing -> fail "No valid allocation solutions" + Just (best, (_, _, w)) -> + let tfails = length fstats + info = printf "successes %d, failures %d,\ + \ best score: %.8f for node(s) %s" + succ tfails + best (intercalate "/" . map Node.name $ w)::String + in return (info, w) -- | Process a request and return new node lists processRequest :: Request - -> Result [OpResult (Node.List, Instance.Instance, [Node.Node])] + -> Result Cluster.AllocSolution processRequest request = let Request rqtype nl il _ = request in case rqtype of @@ -129,10 +112,11 @@ main = do Ok rq -> return rq let Request _ _ _ csf = request - sols = processRequest request >>= filterFails >>= processResults - let (ok, info, rn) = case sols of - Ok (info, sn) -> (True, "Request successful: " ++ info, - map ((++ csf) . Node.name) sn) - Bad s -> (False, "Request failed: " ++ s, []) + sols = processRequest request >>= processResults + let (ok, info, rn) = + case sols of + Ok (info, sn) -> (True, "Request successful: " ++ info, + map ((++ csf) . Node.name) sn) + Bad s -> (False, "Request failed: " ++ s, []) resp = formatResponse ok info rn putStrLn resp diff --git a/hspace.hs b/hspace.hs index 23b8acf..ce09d5c 100644 --- a/hspace.hs +++ b/hspace.hs @@ -192,39 +192,6 @@ clusterData = [ ("MEM", printf "%.0f" . Cluster.cs_tmem) , ("CPU", printf "%.0f" . Cluster.cs_tcpu) ] --- | Build failure stats out of a list of failure reasons -concatFailure :: [(FailMode, Int)] -> FailMode -> [(FailMode, Int)] -concatFailure flst reason = - let cval = lookup reason flst - in case cval of - Nothing -> (reason, 1):flst - Just val -> let plain = filter (\(x, _) -> x /= reason) flst - in (reason, val+1):plain - --- | Build list of failures and placements out of an list of possible --- | allocations -filterFails :: Cluster.AllocSolution - -> ([(FailMode, Int)], - [(Node.List, Instance.Instance, [Node.Node])]) -filterFails sols = - let (alst, blst) = unzip . map (\ e -> - case e of - OpFail reason -> ([reason], []) - OpGood (gnl, i, nn) -> - ([], [(gnl, i, nn)]) - ) $ sols - aval = concat alst - bval = concat blst - in (foldl' concatFailure [(x, 0) | x <- [minBound..maxBound]] aval, bval) - --- | Get the placement with best score out of a list of possible placements -processResults :: [(Node.List, Instance.Instance, [Node.Node])] - -> (Node.List, Instance.Instance, [Node.Node]) -processResults sols = - let sols' = map (\e@(nl', _, _) -> (Cluster.compCV nl', e)) sols - sols'' = sortBy (compare `on` fst) sols' - in snd $ head sols'' - -- | Recursively place instances on the cluster until we're out of space iterateDepth :: Node.List -> Instance.List @@ -241,12 +208,11 @@ iterateDepth nl il newinst nreq ixes = OpResult Cluster.AllocSolution in case sols of OpFail _ -> ([], nl, ixes) - OpGood sols' -> - let (errs, sols3) = filterFails sols' - in if null sols3 - then (errs, nl, ixes) - else let (xnl, xi, _) = processResults sols3 - in iterateDepth xnl il newinst nreq (xi:ixes) + OpGood (errs, _, sols3) -> + case sols3 of + Nothing -> (Cluster.collapseFailures errs, nl, ixes) + Just (_, (xnl, xi, _)) -> + iterateDepth xnl il newinst nreq $! (xi:ixes) -- | Function to print stats for a given phase printStats :: Phase -> Cluster.CStats -> [(String, String)] -- 1.7.10.4