X-Git-Url: https://code.grnet.gr/git/ganeti-local/blobdiff_plain/1a7eff0ecbe5b7c7b957806cb196c8c45d4713b8..41c3b292188e3c8cebe6cb03b5001620865752ea:/Ganeti/HTools/Cluster.hs diff --git a/Ganeti/HTools/Cluster.hs b/Ganeti/HTools/Cluster.hs index eb5e95b..dc766a8 100644 --- a/Ganeti/HTools/Cluster.hs +++ b/Ganeti/HTools/Cluster.hs @@ -31,9 +31,7 @@ module Ganeti.HTools.Cluster -- * Types Placement , AllocSolution - , Solution(..) , Table(..) - , Removal , Score , IMove(..) , CStats(..) @@ -42,8 +40,6 @@ module Ganeti.HTools.Cluster -- * First phase functions , computeBadItems -- * Second phase functions - , computeSolution - , applySolution , printSolution , printSolutionLine , formatCmds @@ -61,7 +57,6 @@ module Ganeti.HTools.Cluster ) where import Data.List -import Data.Maybe (isNothing, fromJust) import Text.Printf (printf) import Data.Function import Control.Monad @@ -81,15 +76,7 @@ type Score = Double type Placement = (Idx, Ndx, Ndx, Score) -- | Allocation\/relocation solution. -type AllocSolution = [(Maybe Node.List, Instance.Instance, [Node.Node])] - --- | A cluster solution described as the solution delta and the list --- of placements. -data Solution = Solution Int [Placement] - deriving (Eq, Ord, Show) - --- | A removal set. -data Removal = Removal Node.List [Instance.Instance] +type AllocSolution = [(OpResult Node.List, Instance.Instance, [Node.Node])] -- | An instance move definition data IMove = Failover -- ^ Failover the instance (f) @@ -115,27 +102,9 @@ data CStats = CStats { cs_fmem :: Int -- ^ Cluster free mem -- * Utility functions --- | Returns the delta of a solution or -1 for Nothing. -solutionDelta :: Maybe Solution -> Int -solutionDelta sol = case sol of - Just (Solution d _) -> d - _ -> -1 - --- | Cap the removal list if needed. -capRemovals :: [a] -> Int -> [a] -capRemovals removals max_removals = - if max_removals > 0 then - take max_removals removals - else - removals - --- | Check if the given node list fails the N+1 check. -verifyN1Check :: [Node.Node] -> Bool -verifyN1Check nl = any Node.failN1 nl - -- | Verifies the N+1 status and return the affected nodes. verifyN1 :: [Node.Node] -> [Node.Node] -verifyN1 nl = filter Node.failN1 nl +verifyN1 = filter Node.failN1 {-| Computes the pair of bad nodes and instances. @@ -148,9 +117,9 @@ computeBadItems :: Node.List -> Instance.List -> ([Node.Node], [Instance.Instance]) computeBadItems nl il = let bad_nodes = verifyN1 $ getOnline nl - bad_instances = map (\idx -> Container.find idx il) $ - sort $ nub $ concat $ - map (\ n -> (Node.slist n) ++ (Node.plist n)) bad_nodes + bad_instances = map (\idx -> Container.find idx il) . + sort . nub $ + concatMap (\ n -> Node.slist n ++ Node.plist n) bad_nodes in (bad_nodes, bad_instances) @@ -171,15 +140,16 @@ updateCStats cs node = cs_amem = x_amem, cs_acpu = x_acpu, cs_adsk = x_adsk, cs_mmem = x_mmem, cs_mdsk = x_mdsk, cs_mcpu = x_mcpu } = cs - inc_amem = (Node.f_mem node) - (Node.r_mem node) + inc_amem = Node.f_mem node - Node.r_mem node inc_amem' = if inc_amem > 0 then inc_amem else 0 - in CStats { cs_fmem = x_fmem + (Node.f_mem node) - , cs_fdsk = x_fdsk + (Node.f_dsk node) + inc_adsk = Node.availDisk node + in CStats { cs_fmem = x_fmem + Node.f_mem node + , cs_fdsk = x_fdsk + Node.f_dsk node , cs_amem = x_amem + inc_amem' - , cs_adsk = x_adsk + , cs_adsk = x_adsk + inc_adsk , cs_acpu = x_acpu , cs_mmem = max x_mmem inc_amem' - , cs_mdsk = max x_mdsk (Node.f_dsk node) + , cs_mdsk = max x_mdsk inc_adsk , cs_mcpu = x_mcpu } @@ -198,16 +168,16 @@ compDetailedCV nl = mem_cv = varianceCoeff mem_l dsk_cv = varianceCoeff dsk_l n1_l = length $ filter Node.failN1 nodes - n1_score = ((fromIntegral n1_l) / - (fromIntegral $ length nodes))::Double + n1_score = fromIntegral n1_l / + fromIntegral (length nodes)::Double res_l = map Node.p_rem nodes res_cv = varianceCoeff res_l offline_inst = sum . map (\n -> (length . Node.plist $ n) + (length . Node.slist $ n)) $ offline online_inst = sum . map (\n -> (length . Node.plist $ n) + (length . Node.slist $ n)) $ nodes - off_score = ((fromIntegral offline_inst) / - (fromIntegral $ online_inst + offline_inst))::Double + off_score = fromIntegral offline_inst / + fromIntegral (online_inst + offline_inst)::Double cpu_l = map Node.p_cpu nodes cpu_cv = varianceCoeff cpu_l in (mem_cv, dsk_cv, n1_score, res_cv, off_score, cpu_cv) @@ -223,260 +193,6 @@ compCV nl = getOnline :: Node.List -> [Node.Node] getOnline = filter (not . Node.offline) . Container.elems --- * hn1 functions - --- | Add an instance and return the new node and instance maps. -addInstance :: Node.List -> Instance.Instance -> - Node.Node -> Node.Node -> Maybe Node.List -addInstance nl idata pri sec = - let pdx = Node.idx pri - sdx = Node.idx sec - in do - pnode <- Node.addPri pri idata - snode <- Node.addSec sec idata pdx - new_nl <- return $ Container.addTwo sdx snode - pdx pnode nl - return new_nl - --- | Remove an instance and return the new node and instance maps. -removeInstance :: Node.List -> Instance.Instance -> Node.List -removeInstance nl idata = - let pnode = Instance.pnode idata - snode = Instance.snode idata - pn = Container.find pnode nl - sn = Container.find snode nl - new_nl = Container.addTwo - pnode (Node.removePri pn idata) - snode (Node.removeSec sn idata) nl in - new_nl - --- | Remove an instance and return the new node map. -removeInstances :: Node.List -> [Instance.Instance] -> Node.List -removeInstances = foldl' removeInstance - - -{-| Compute a new version of a cluster given a solution. - -This is not used for computing the solutions, but for applying a -(known-good) solution to the original cluster for final display. - -It first removes the relocated instances after which it places them on -their new nodes. - - -} -applySolution :: Node.List -> Instance.List -> [Placement] -> Node.List -applySolution nl il sol = - let odxes = map (\ (a, b, c, _) -> (Container.find a il, - Node.idx (Container.find b nl), - Node.idx (Container.find c nl)) - ) sol - idxes = (\ (x, _, _) -> x) (unzip3 odxes) - nc = removeInstances nl idxes - in - foldl' (\ nz (a, b, c) -> - let new_p = Container.find b nz - new_s = Container.find c nz in - fromJust (addInstance nz a new_p new_s) - ) nc odxes - - --- ** First phase functions - -{-| Given a list 1,2,3..n build a list of pairs [(1, [2..n]), (2, - [3..n]), ...] - --} -genParts :: [a] -> Int -> [(a, [a])] -genParts l count = - case l of - [] -> [] - x:xs -> - if length l < count then - [] - else - (x, xs) : (genParts xs count) - --- | Generates combinations of count items from the names list. -genNames :: Int -> [b] -> [[b]] -genNames count1 names1 = - let aux_fn count names current = - case count of - 0 -> [current] - _ -> - concatMap - (\ (x, xs) -> aux_fn (count - 1) xs (x:current)) - (genParts names count) - in - aux_fn count1 names1 [] - -{-| Checks if removal of instances results in N+1 pass. - -Note: the check removal cannot optimize by scanning only the affected -nodes, since the cluster is known to be not healthy; only the check -placement can make this shortcut. - --} -checkRemoval :: Node.List -> [Instance.Instance] -> Maybe Removal -checkRemoval nl victims = - let nx = removeInstances nl victims - failN1 = verifyN1Check (Container.elems nx) - in - if failN1 then - Nothing - else - Just $ Removal nx victims - - --- | Computes the removals list for a given depth. -computeRemovals :: Node.List - -> [Instance.Instance] - -> Int - -> [Maybe Removal] -computeRemovals nl bad_instances depth = - map (checkRemoval nl) $ genNames depth bad_instances - --- ** Second phase functions - --- | Single-node relocation cost. -nodeDelta :: Ndx -> Ndx -> Ndx -> Int -nodeDelta i p s = - if i == p || i == s then - 0 - else - 1 - --- | Compute best solution. --- --- This function compares two solutions, choosing the minimum valid --- solution. -compareSolutions :: Maybe Solution -> Maybe Solution -> Maybe Solution -compareSolutions a b = case (a, b) of - (Nothing, x) -> x - (x, Nothing) -> x - (x, y) -> min x y - --- | Check if a given delta is worse then an existing solution. -tooHighDelta :: Maybe Solution -> Int -> Int -> Bool -tooHighDelta sol new_delta max_delta = - if new_delta > max_delta && max_delta >=0 then - True - else - case sol of - Nothing -> False - Just (Solution old_delta _) -> old_delta <= new_delta - -{-| Check if placement of instances still keeps the cluster N+1 compliant. - - This is the workhorse of the allocation algorithm: given the - current node and instance maps, the list of instances to be - placed, and the current solution, this will return all possible - solution by recursing until all target instances are placed. - --} -checkPlacement :: Node.List -- ^ The current node list - -> [Instance.Instance] -- ^ List of instances still to place - -> [Placement] -- ^ Partial solution until now - -> Int -- ^ The delta of the partial solution - -> Maybe Solution -- ^ The previous solution - -> Int -- ^ Abort if the we go above this delta - -> Maybe Solution -- ^ The new solution -checkPlacement nl victims current current_delta prev_sol max_delta = - let target = head victims - opdx = Instance.pnode target - osdx = Instance.snode target - vtail = tail victims - have_tail = (length vtail) > 0 - nodes = Container.elems nl - iidx = Instance.idx target - in - foldl' - (\ accu_p pri -> - let - pri_idx = Node.idx pri - upri_delta = current_delta + nodeDelta pri_idx opdx osdx - new_pri = Node.addPri pri target - fail_delta1 = tooHighDelta accu_p upri_delta max_delta - in - if fail_delta1 || isNothing(new_pri) then accu_p - else let pri_nl = Container.add pri_idx (fromJust new_pri) nl in - foldl' - (\ accu sec -> - let - sec_idx = Node.idx sec - upd_delta = upri_delta + - nodeDelta sec_idx opdx osdx - fail_delta2 = tooHighDelta accu upd_delta max_delta - new_sec = Node.addSec sec target pri_idx - in - if sec_idx == pri_idx || fail_delta2 || - isNothing new_sec then accu - else let - nx = Container.add sec_idx (fromJust new_sec) pri_nl - upd_cv = compCV nx - plc = (iidx, pri_idx, sec_idx, upd_cv) - c2 = plc:current - result = - if have_tail then - checkPlacement nx vtail c2 upd_delta - accu max_delta - else - Just (Solution upd_delta c2) - in compareSolutions accu result - ) accu_p nodes - ) prev_sol nodes - -{-| Auxiliary function for solution computation. - -We write this in an explicit recursive fashion in order to control -early-abort in case we have met the min delta. We can't use foldr -instead of explicit recursion since we need the accumulator for the -abort decision. - --} -advanceSolution :: [Maybe Removal] -- ^ The removal to process - -> Int -- ^ Minimum delta parameter - -> Int -- ^ Maximum delta parameter - -> Maybe Solution -- ^ Current best solution - -> Maybe Solution -- ^ New best solution -advanceSolution [] _ _ sol = sol -advanceSolution (Nothing:xs) m n sol = advanceSolution xs m n sol -advanceSolution ((Just (Removal nx removed)):xs) min_d max_d prev_sol = - let new_sol = checkPlacement nx removed [] 0 prev_sol max_d - new_delta = solutionDelta $! new_sol - in - if new_delta >= 0 && new_delta <= min_d then - new_sol - else - advanceSolution xs min_d max_d new_sol - --- | Computes the placement solution. -solutionFromRemovals :: [Maybe Removal] -- ^ The list of (possible) removals - -> Int -- ^ Minimum delta parameter - -> Int -- ^ Maximum delta parameter - -> Maybe Solution -- ^ The best solution found -solutionFromRemovals removals min_delta max_delta = - advanceSolution removals min_delta max_delta Nothing - -{-| Computes the solution at the given depth. - -This is a wrapper over both computeRemovals and -solutionFromRemovals. In case we have no solution, we return Nothing. - --} -computeSolution :: Node.List -- ^ The original node data - -> [Instance.Instance] -- ^ The list of /bad/ instances - -> Int -- ^ The /depth/ of removals - -> Int -- ^ Maximum number of removals to process - -> Int -- ^ Minimum delta parameter - -> Int -- ^ Maximum delta parameter - -> Maybe Solution -- ^ The best solution found (or Nothing) -computeSolution nl bad_instances depth max_removals min_delta max_delta = - let - removals = computeRemovals nl bad_instances depth - removals' = capRemovals removals max_removals - in - solutionFromRemovals removals' min_delta max_delta - -- * hbal functions -- | Compute best table. Note that the ordering of the arguments is important. @@ -486,7 +202,7 @@ compareTables a@(Table _ _ a_cv _) b@(Table _ _ b_cv _ ) = -- | Applies an instance move to a given node list and instance. applyMove :: Node.List -> Instance.Instance - -> IMove -> (Maybe Node.List, Instance.Instance, Ndx, Ndx) + -> IMove -> (OpResult Node.List, Instance.Instance, Ndx, Ndx) -- Failover (f) applyMove nl inst Failover = let old_pdx = Instance.pnode inst @@ -517,7 +233,7 @@ applyMove nl inst (ReplacePrimary new_pdx) = let tmp_s' = Node.removePri tmp_s inst new_p <- Node.addPri tgt_n inst new_s <- Node.addSec tmp_s' inst new_pdx - return $ Container.add new_pdx new_p $ + return . Container.add new_pdx new_p $ Container.addTwo old_pdx int_p old_sdx new_s nl in (new_nl, Instance.setPri inst new_pdx, new_pdx, old_sdx) @@ -545,7 +261,7 @@ applyMove nl inst (ReplaceAndFailover new_pdx) = new_nl = do -- Maybe monad new_p <- Node.addPri tgt_n inst new_s <- Node.addSec int_p inst new_pdx - return $ Container.add new_pdx new_p $ + return . Container.add new_pdx new_p $ Container.addTwo old_pdx new_s old_sdx int_s nl in (new_nl, Instance.setBoth inst new_pdx old_pdx, new_pdx, old_pdx) @@ -561,13 +277,13 @@ applyMove nl inst (FailoverAndReplace new_sdx) = new_nl = do -- Maybe monad new_p <- Node.addPri int_s inst new_s <- Node.addSec tgt_n inst old_sdx - return $ Container.add new_sdx new_s $ + return . Container.add new_sdx new_s $ Container.addTwo old_sdx new_p old_pdx int_p nl in (new_nl, Instance.setBoth inst old_sdx new_sdx, old_sdx, new_sdx) -- | Tries to allocate an instance on one given node. allocateOnSingle :: Node.List -> Instance.Instance -> Node.Node - -> (Maybe Node.List, Instance.Instance) + -> (OpResult Node.List, Instance.Instance) allocateOnSingle nl inst p = let new_pdx = Node.idx p new_nl = Node.addPri p inst >>= \new_p -> @@ -576,7 +292,7 @@ allocateOnSingle nl inst p = -- | Tries to allocate an instance on a given pair of nodes. allocateOnPair :: Node.List -> Instance.Instance -> Node.Node -> Node.Node - -> (Maybe Node.List, Instance.Instance) + -> (OpResult Node.List, Instance.Instance) allocateOnPair nl inst tgt_p tgt_s = let new_pdx = Node.idx tgt_p new_sdx = Node.idx tgt_s @@ -598,16 +314,16 @@ checkSingleStep ini_tbl target cur_tbl move = Table ini_nl ini_il _ ini_plc = ini_tbl (tmp_nl, new_inst, pri_idx, sec_idx) = applyMove ini_nl target move in - if isNothing tmp_nl then cur_tbl - else - let tgt_idx = Instance.idx target - upd_nl = fromJust tmp_nl - upd_cvar = compCV upd_nl - upd_il = Container.add tgt_idx new_inst ini_il - upd_plc = (tgt_idx, pri_idx, sec_idx, upd_cvar):ini_plc - upd_tbl = Table upd_nl upd_il upd_cvar upd_plc - in - compareTables cur_tbl upd_tbl + case tmp_nl of + OpFail _ -> cur_tbl + OpGood upd_nl -> + let tgt_idx = Instance.idx target + upd_cvar = compCV upd_nl + upd_il = Container.add tgt_idx new_inst ini_il + upd_plc = (tgt_idx, pri_idx, sec_idx, upd_cvar):ini_plc + upd_tbl = Table upd_nl upd_il upd_cvar upd_plc + in + compareTables cur_tbl upd_tbl -- | Given the status of the current secondary as a valid new node -- and the current candidate target node, @@ -690,7 +406,7 @@ tryAlloc nl _ inst 1 = in return sols tryAlloc _ _ _ reqn = fail $ "Unsupported number of alllocation \ - \destinations required (" ++ (show reqn) ++ + \destinations required (" ++ show reqn ++ "), only two supported" -- | Try to allocate an instance on the cluster. @@ -704,7 +420,7 @@ tryReloc :: (Monad m) => tryReloc nl il xid 1 ex_idx = let all_nodes = getOnline nl inst = Container.find xid il - ex_idx' = (Instance.pnode inst):ex_idx + ex_idx' = Instance.pnode inst:ex_idx valid_nodes = filter (not . flip elem ex_idx' . Node.idx) all_nodes valid_idxes = map Node.idx valid_nodes sols1 = map (\x -> let (mnl, i, _, _) = @@ -714,7 +430,7 @@ tryReloc nl il xid 1 ex_idx = in return sols1 tryReloc _ _ _ reqn _ = fail $ "Unsupported number of relocation \ - \destinations required (" ++ (show reqn) ++ + \destinations required (" ++ show reqn ++ "), only one supported" -- * Formatting functions @@ -730,38 +446,29 @@ computeMoves :: String -- ^ The instance name -- either @/f/@ for failover or @/r:name/@ for replace -- secondary, while the command list holds gnt-instance -- commands (without that prefix), e.g \"@failover instance1@\" -computeMoves i a b c d = - if c == a then {- Same primary -} - if d == b then {- Same sec??! -} - ("-", []) +computeMoves i a b c d + -- same primary + | c == a = + if d == b + then {- Same sec??! -} ("-", []) else {- Change of secondary -} - (printf "r:%s" d, - [printf "replace-disks -n %s %s" d i]) - else - if c == b then {- Failover and ... -} - if d == a then {- that's all -} - ("f", [printf "migrate -f %s" i]) - else - (printf "f r:%s" d, - [printf "migrate -f %s" i, - printf "replace-disks -n %s %s" d i]) - else - if d == a then {- ... and keep primary as secondary -} - (printf "r:%s f" c, - [printf "replace-disks -n %s %s" c i, - printf "migrate -f %s" i]) - else - if d == b then {- ... keep same secondary -} - (printf "f r:%s f" c, - [printf "migrate -f %s" i, - printf "replace-disks -n %s %s" c i, - printf "migrate -f %s" i]) - - else {- Nothing in common -} - (printf "r:%s f r:%s" c d, - [printf "replace-disks -n %s %s" c i, - printf "migrate -f %s" i, - printf "replace-disks -n %s %s" d i]) + (printf "r:%s" d, [rep d]) + -- failover and ... + | c == b = + if d == a + then {- that's all -} ("f", [mig]) + else (printf "f r:%s" d, [mig, rep d]) + -- ... and keep primary as secondary + | d == a = + (printf "r:%s f" c, [rep c, mig]) + -- ... keep same secondary + | d == b = + (printf "f r:%s f" c, [mig, rep c, mig]) + -- nothing in common - + | otherwise = + (printf "r:%s f r:%s" c d, [rep c, mig, rep d]) + where mig = printf "migrate -f %s" i::String + rep n = printf "replace-disks -n %s %s" n i -- | Converts a placement to string format. printSolutionLine :: Node.List -- ^ The node list @@ -783,8 +490,8 @@ printSolutionLine nl il nmlen imlen plc pos = opri = Container.nameOf nl $ Instance.pnode inst osec = Container.nameOf nl $ Instance.snode inst (moves, cmds) = computeMoves inam opri osec npri nsec - ostr = (printf "%s:%s" opri osec)::String - nstr = (printf "%s:%s" npri nsec)::String + ostr = printf "%s:%s" opri osec::String + nstr = printf "%s:%s" npri nsec::String in (printf " %3d. %-*s %-*s => %-*s %.8f a=%s" pos imlen inam pmlen ostr @@ -794,13 +501,14 @@ printSolutionLine nl il nmlen imlen plc pos = -- | Given a list of commands, prefix them with @gnt-instance@ and -- also beautify the display a little. formatCmds :: [[String]] -> String -formatCmds cmd_strs = - unlines $ - concat $ map (\(a, b) -> - (printf "echo step %d" (a::Int)): - (printf "check"): - (map ("gnt-instance " ++) b)) $ - zip [1..] cmd_strs +formatCmds = + unlines . + concatMap (\(a, b) -> + printf "echo step %d" (a::Int): + printf "check": + map ("gnt-instance " ++) b + ) . + zip [1..] -- | Converts a solution to string format. printSolution :: Node.List @@ -812,8 +520,7 @@ printSolution nl il sol = nmlen = Container.maxNameLen nl imlen = Container.maxNameLen il in - unzip $ map (uncurry $ printSolutionLine nl il nmlen imlen) $ - zip sol [1..] + unzip $ zipWith (printSolutionLine nl il nmlen imlen) sol [1..] -- | Print the node list. printNodes :: Node.List -> String @@ -821,14 +528,14 @@ printNodes nl = let snl = sortBy (compare `on` Node.idx) (Container.elems nl) m_name = maximum . map (length . Node.name) $ snl helper = Node.list m_name - header = (printf - "%2s %-*s %5s %5s %5s %5s %5s %5s %5s %5s %4s %4s \ - \%3s %3s %6s %6s %5s" - " F" m_name "Name" - "t_mem" "n_mem" "i_mem" "x_mem" "f_mem" "r_mem" - "t_dsk" "f_dsk" "pcpu" "vcpu" - "pri" "sec" "p_fmem" "p_fdsk" "r_cpu")::String - in unlines $ (header:map helper snl) + header = printf + "%2s %-*s %5s %5s %5s %5s %5s %5s %5s %5s %4s %4s \ + \%3s %3s %6s %6s %5s" + " F" m_name "Name" + "t_mem" "n_mem" "i_mem" "x_mem" "f_mem" "r_mem" + "t_dsk" "f_dsk" "pcpu" "vcpu" + "pri" "sec" "p_fmem" "p_fdsk" "r_cpu"::String + in unlines (header:map helper snl) -- | Shows statistics for a given node list. printStats :: Node.List -> String