1 {-| Implementation of cluster-wide logic.
3 This module holds all pure cluster-logic; I\/O related functionality
4 goes into the "Main" module for the individual binaries.
8 module Ganeti.HTools.Cluster
19 -- * Generic functions
21 -- * First phase functions
23 -- * Second phase functions
30 -- * Balacing functions
37 import Data.Maybe (isNothing, fromJust)
38 import Text.Printf (printf)
42 import qualified Ganeti.HTools.Container as Container
43 import qualified Ganeti.HTools.Instance as Instance
44 import qualified Ganeti.HTools.Node as Node
45 import Ganeti.HTools.Types
46 import Ganeti.HTools.Utils
48 -- | A separate name for the cluster score type
51 -- | The description of an instance placement.
52 type Placement = (Int, Int, Int, Score)
54 {- | A cluster solution described as the solution delta and the list
58 data Solution = Solution Int [Placement]
59 deriving (Eq, Ord, Show)
61 -- | Returns the delta of a solution or -1 for Nothing
62 solutionDelta :: Maybe Solution -> Int
63 solutionDelta sol = case sol of
64 Just (Solution d _) -> d
68 data Removal = Removal NodeList [Instance.Instance]
70 -- | An instance move definition
71 data IMove = Failover -- ^ Failover the instance (f)
72 | ReplacePrimary Int -- ^ Replace primary (f, r:np, f)
73 | ReplaceSecondary Int -- ^ Replace secondary (r:ns)
74 | ReplaceAndFailover Int -- ^ Replace secondary, failover (r:np, f)
75 | FailoverAndReplace Int -- ^ Failover, replace secondary (f, r:ns)
78 -- | The complete state for the balancing solution
79 data Table = Table NodeList InstanceList Score [Placement]
84 -- | Cap the removal list if needed.
85 capRemovals :: [a] -> Int -> [a]
86 capRemovals removals max_removals =
87 if max_removals > 0 then
88 take max_removals removals
92 -- | Check if the given node list fails the N+1 check.
93 verifyN1Check :: [Node.Node] -> Bool
94 verifyN1Check nl = any Node.failN1 nl
96 -- | Verifies the N+1 status and return the affected nodes.
97 verifyN1 :: [Node.Node] -> [Node.Node]
98 verifyN1 nl = filter Node.failN1 nl
100 {-| Add an instance and return the new node and instance maps. -}
101 addInstance :: NodeList -> Instance.Instance ->
102 Node.Node -> Node.Node -> Maybe NodeList
103 addInstance nl idata pri sec =
104 let pdx = Node.idx pri
107 pnode <- Node.addPri pri idata
108 snode <- Node.addSec sec idata pdx
109 new_nl <- return $ Container.addTwo sdx snode
113 -- | Remove an instance and return the new node and instance maps.
114 removeInstance :: NodeList -> Instance.Instance -> NodeList
115 removeInstance nl idata =
116 let pnode = Instance.pnode idata
117 snode = Instance.snode idata
118 pn = Container.find pnode nl
119 sn = Container.find snode nl
120 new_nl = Container.addTwo
121 pnode (Node.removePri pn idata)
122 snode (Node.removeSec sn idata) nl in
125 -- | Remove an instance and return the new node map.
126 removeInstances :: NodeList -> [Instance.Instance] -> NodeList
127 removeInstances = foldl' removeInstance
129 -- | Compute the total free disk and memory in the cluster.
130 totalResources :: Container.Container Node.Node -> (Int, Int)
133 (\ (mem, dsk) node -> (mem + (Node.f_mem node),
134 dsk + (Node.f_dsk node)))
135 (0, 0) (Container.elems nl)
137 {- | Compute a new version of a cluster given a solution.
139 This is not used for computing the solutions, but for applying a
140 (known-good) solution to the original cluster for final display.
142 It first removes the relocated instances after which it places them on
146 applySolution :: NodeList -> InstanceList -> [Placement] -> NodeList
147 applySolution nl il sol =
148 let odxes = map (\ (a, b, c, _) -> (Container.find a il,
149 Node.idx (Container.find b nl),
150 Node.idx (Container.find c nl))
152 idxes = (\ (x, _, _) -> x) (unzip3 odxes)
153 nc = removeInstances nl idxes
155 foldl' (\ nz (a, b, c) ->
156 let new_p = Container.find b nz
157 new_s = Container.find c nz in
158 fromJust (addInstance nz a new_p new_s)
162 -- First phase functions
164 {- | Given a list 1,2,3..n build a list of pairs [(1, [2..n]), (2,
168 genParts :: [a] -> Int -> [(a, [a])]
173 if length l < count then
176 (x, xs) : (genParts xs count)
178 -- | Generates combinations of count items from the names list.
179 genNames :: Int -> [b] -> [[b]]
180 genNames count1 names1 =
181 let aux_fn count names current =
186 (\ (x, xs) -> aux_fn (count - 1) xs (x:current))
187 (genParts names count)
189 aux_fn count1 names1 []
191 {- | Computes the pair of bad nodes and instances.
193 The bad node list is computed via a simple 'verifyN1' check, and the
194 bad instance list is the list of primary and secondary instances of
198 computeBadItems :: NodeList -> InstanceList ->
199 ([Node.Node], [Instance.Instance])
200 computeBadItems nl il =
201 let bad_nodes = verifyN1 $ filter (not . Node.offline) $ Container.elems nl
202 bad_instances = map (\idx -> Container.find idx il) $
203 sort $ nub $ concat $
204 map (\ n -> (Node.slist n) ++ (Node.plist n)) bad_nodes
206 (bad_nodes, bad_instances)
209 {- | Checks if removal of instances results in N+1 pass.
211 Note: the check removal cannot optimize by scanning only the affected
212 nodes, since the cluster is known to be not healthy; only the check
213 placement can make this shortcut.
216 checkRemoval :: NodeList -> [Instance.Instance] -> Maybe Removal
217 checkRemoval nl victims =
218 let nx = removeInstances nl victims
219 failN1 = verifyN1Check (Container.elems nx)
224 Just $ Removal nx victims
227 -- | Computes the removals list for a given depth
228 computeRemovals :: NodeList
229 -> [Instance.Instance]
232 computeRemovals nl bad_instances depth =
233 map (checkRemoval nl) $ genNames depth bad_instances
235 -- Second phase functions
237 -- | Single-node relocation cost
238 nodeDelta :: Int -> Int -> Int -> Int
240 if i == p || i == s then
245 {-| Compute best solution.
247 This function compares two solutions, choosing the minimum valid
250 compareSolutions :: Maybe Solution -> Maybe Solution -> Maybe Solution
251 compareSolutions a b = case (a, b) of
256 -- | Compute best table. Note that the ordering of the arguments is important.
257 compareTables :: Table -> Table -> Table
258 compareTables a@(Table _ _ a_cv _) b@(Table _ _ b_cv _ ) =
259 if a_cv > b_cv then b else a
261 -- | Check if a given delta is worse then an existing solution.
262 tooHighDelta :: Maybe Solution -> Int -> Int -> Bool
263 tooHighDelta sol new_delta max_delta =
264 if new_delta > max_delta && max_delta >=0 then
269 Just (Solution old_delta _) -> old_delta <= new_delta
271 {-| Check if placement of instances still keeps the cluster N+1 compliant.
273 This is the workhorse of the allocation algorithm: given the
274 current node and instance maps, the list of instances to be
275 placed, and the current solution, this will return all possible
276 solution by recursing until all target instances are placed.
279 checkPlacement :: NodeList -- ^ The current node list
280 -> [Instance.Instance] -- ^ List of instances still to place
281 -> [Placement] -- ^ Partial solution until now
282 -> Int -- ^ The delta of the partial solution
283 -> Maybe Solution -- ^ The previous solution
284 -> Int -- ^ Abort if the we go above this delta
285 -> Maybe Solution -- ^ The new solution
286 checkPlacement nl victims current current_delta prev_sol max_delta =
287 let target = head victims
288 opdx = Instance.pnode target
289 osdx = Instance.snode target
291 have_tail = (length vtail) > 0
292 nodes = Container.elems nl
293 iidx = Instance.idx target
298 pri_idx = Node.idx pri
299 upri_delta = current_delta + nodeDelta pri_idx opdx osdx
300 new_pri = Node.addPri pri target
301 fail_delta1 = tooHighDelta accu_p upri_delta max_delta
303 if fail_delta1 || isNothing(new_pri) then accu_p
304 else let pri_nl = Container.add pri_idx (fromJust new_pri) nl in
308 sec_idx = Node.idx sec
309 upd_delta = upri_delta +
310 nodeDelta sec_idx opdx osdx
311 fail_delta2 = tooHighDelta accu upd_delta max_delta
312 new_sec = Node.addSec sec target pri_idx
314 if sec_idx == pri_idx || fail_delta2 ||
315 isNothing new_sec then accu
317 nx = Container.add sec_idx (fromJust new_sec) pri_nl
319 plc = (iidx, pri_idx, sec_idx, upd_cv)
323 checkPlacement nx vtail c2 upd_delta
326 Just (Solution upd_delta c2)
327 in compareSolutions accu result
332 applyMove :: NodeList -> Instance.Instance
333 -> IMove -> (Maybe NodeList, Instance.Instance, Int, Int)
335 applyMove nl inst Failover =
336 let old_pdx = Instance.pnode inst
337 old_sdx = Instance.snode inst
338 old_p = Container.find old_pdx nl
339 old_s = Container.find old_sdx nl
340 int_p = Node.removePri old_p inst
341 int_s = Node.removeSec old_s inst
342 new_nl = do -- Maybe monad
343 new_p <- Node.addPri int_s inst
344 new_s <- Node.addSec int_p inst old_sdx
345 return $ Container.addTwo old_pdx new_s old_sdx new_p nl
346 in (new_nl, Instance.setBoth inst old_sdx old_pdx, old_sdx, old_pdx)
348 -- Replace the primary (f:, r:np, f)
349 applyMove nl inst (ReplacePrimary new_pdx) =
350 let old_pdx = Instance.pnode inst
351 old_sdx = Instance.snode inst
352 old_p = Container.find old_pdx nl
353 old_s = Container.find old_sdx nl
354 tgt_n = Container.find new_pdx nl
355 int_p = Node.removePri old_p inst
356 int_s = Node.removeSec old_s inst
357 new_nl = do -- Maybe monad
358 new_p <- Node.addPri tgt_n inst
359 new_s <- Node.addSec int_s inst new_pdx
360 return $ Container.add new_pdx new_p $
361 Container.addTwo old_pdx int_p old_sdx new_s nl
362 in (new_nl, Instance.setPri inst new_pdx, new_pdx, old_sdx)
364 -- Replace the secondary (r:ns)
365 applyMove nl inst (ReplaceSecondary new_sdx) =
366 let old_pdx = Instance.pnode inst
367 old_sdx = Instance.snode inst
368 old_s = Container.find old_sdx nl
369 tgt_n = Container.find new_sdx nl
370 int_s = Node.removeSec old_s inst
371 new_nl = Node.addSec tgt_n inst old_pdx >>=
372 \new_s -> return $ Container.addTwo new_sdx
373 new_s old_sdx int_s nl
374 in (new_nl, Instance.setSec inst new_sdx, old_pdx, new_sdx)
376 -- Replace the secondary and failover (r:np, f)
377 applyMove nl inst (ReplaceAndFailover new_pdx) =
378 let old_pdx = Instance.pnode inst
379 old_sdx = Instance.snode inst
380 old_p = Container.find old_pdx nl
381 old_s = Container.find old_sdx nl
382 tgt_n = Container.find new_pdx nl
383 int_p = Node.removePri old_p inst
384 int_s = Node.removeSec old_s inst
385 new_nl = do -- Maybe monad
386 new_p <- Node.addPri tgt_n inst
387 new_s <- Node.addSec int_p inst new_pdx
388 return $ Container.add new_pdx new_p $
389 Container.addTwo old_pdx new_s old_sdx int_s nl
390 in (new_nl, Instance.setBoth inst new_pdx old_pdx, new_pdx, old_pdx)
392 -- Failver and replace the secondary (f, r:ns)
393 applyMove nl inst (FailoverAndReplace new_sdx) =
394 let old_pdx = Instance.pnode inst
395 old_sdx = Instance.snode inst
396 old_p = Container.find old_pdx nl
397 old_s = Container.find old_sdx nl
398 tgt_n = Container.find new_sdx nl
399 int_p = Node.removePri old_p inst
400 int_s = Node.removeSec old_s inst
401 new_nl = do -- Maybe monad
402 new_p <- Node.addPri int_s inst
403 new_s <- Node.addSec tgt_n inst old_sdx
404 return $ Container.add new_sdx new_s $
405 Container.addTwo old_sdx new_p old_pdx int_p nl
406 in (new_nl, Instance.setBoth inst old_sdx new_sdx, old_sdx, new_sdx)
408 checkSingleStep :: Table -- ^ The original table
409 -> Instance.Instance -- ^ The instance to move
410 -> Table -- ^ The current best table
411 -> IMove -- ^ The move to apply
412 -> Table -- ^ The final best table
413 checkSingleStep ini_tbl target cur_tbl move =
415 Table ini_nl ini_il _ ini_plc = ini_tbl
416 (tmp_nl, new_inst, pri_idx, sec_idx) = applyMove ini_nl target move
418 if isNothing tmp_nl then cur_tbl
420 let tgt_idx = Instance.idx target
421 upd_nl = fromJust tmp_nl
422 upd_cvar = compCV upd_nl
423 upd_il = Container.add tgt_idx new_inst ini_il
424 upd_plc = (tgt_idx, pri_idx, sec_idx, upd_cvar):ini_plc
425 upd_tbl = Table upd_nl upd_il upd_cvar upd_plc
427 compareTables cur_tbl upd_tbl
429 -- | Given the status of the current secondary as a valid new node
430 -- and the current candidate target node,
431 -- generate the possible moves for a instance.
432 possibleMoves :: Bool -> Int -> [IMove]
433 possibleMoves True tdx =
434 [ReplaceSecondary tdx,
435 ReplaceAndFailover tdx,
437 FailoverAndReplace tdx]
439 possibleMoves False tdx =
440 [ReplaceSecondary tdx,
441 ReplaceAndFailover tdx]
443 -- | Compute the best move for a given instance.
444 checkInstanceMove :: [Int] -- Allowed target node indices
445 -> Table -- Original table
446 -> Instance.Instance -- Instance to move
447 -> Table -- Best new table for this instance
448 checkInstanceMove nodes_idx ini_tbl target =
450 opdx = Instance.pnode target
451 osdx = Instance.snode target
452 nodes = filter (\idx -> idx /= opdx && idx /= osdx) nodes_idx
453 use_secondary = elem osdx nodes_idx
454 aft_failover = if use_secondary -- if allowed to failover
455 then checkSingleStep ini_tbl target ini_tbl Failover
457 all_moves = concatMap (possibleMoves use_secondary) nodes
459 -- iterate over the possible nodes for this instance
460 foldl' (checkSingleStep ini_tbl target) aft_failover all_moves
462 -- | Compute the best next move.
463 checkMove :: [Int] -- ^ Allowed target node indices
464 -> Table -- ^ The current solution
465 -> [Instance.Instance] -- ^ List of instances still to move
466 -> Table -- ^ The new solution
467 checkMove nodes_idx ini_tbl victims =
468 let Table _ _ _ ini_plc = ini_tbl
469 -- iterate over all instances, computing the best move
473 if Instance.snode elem == Node.noSecondary then step_tbl
474 else compareTables step_tbl $
475 checkInstanceMove nodes_idx ini_tbl elem)
477 Table _ _ _ best_plc = best_tbl
479 if length best_plc == length ini_plc then -- no advancement
484 {- | Auxiliary function for solution computation.
486 We write this in an explicit recursive fashion in order to control
487 early-abort in case we have met the min delta. We can't use foldr
488 instead of explicit recursion since we need the accumulator for the
492 advanceSolution :: [Maybe Removal] -- ^ The removal to process
493 -> Int -- ^ Minimum delta parameter
494 -> Int -- ^ Maximum delta parameter
495 -> Maybe Solution -- ^ Current best solution
496 -> Maybe Solution -- ^ New best solution
497 advanceSolution [] _ _ sol = sol
498 advanceSolution (Nothing:xs) m n sol = advanceSolution xs m n sol
499 advanceSolution ((Just (Removal nx removed)):xs) min_d max_d prev_sol =
500 let new_sol = checkPlacement nx removed [] 0 prev_sol max_d
501 new_delta = solutionDelta $! new_sol
503 if new_delta >= 0 && new_delta <= min_d then
506 advanceSolution xs min_d max_d new_sol
508 -- | Computes the placement solution.
509 solutionFromRemovals :: [Maybe Removal] -- ^ The list of (possible) removals
510 -> Int -- ^ Minimum delta parameter
511 -> Int -- ^ Maximum delta parameter
512 -> Maybe Solution -- ^ The best solution found
513 solutionFromRemovals removals min_delta max_delta =
514 advanceSolution removals min_delta max_delta Nothing
516 {- | Computes the solution at the given depth.
518 This is a wrapper over both computeRemovals and
519 solutionFromRemovals. In case we have no solution, we return Nothing.
522 computeSolution :: NodeList -- ^ The original node data
523 -> [Instance.Instance] -- ^ The list of /bad/ instances
524 -> Int -- ^ The /depth/ of removals
525 -> Int -- ^ Maximum number of removals to process
526 -> Int -- ^ Minimum delta parameter
527 -> Int -- ^ Maximum delta parameter
528 -> Maybe Solution -- ^ The best solution found (or Nothing)
529 computeSolution nl bad_instances depth max_removals min_delta max_delta =
531 removals = computeRemovals nl bad_instances depth
532 removals' = capRemovals removals max_removals
534 solutionFromRemovals removals' min_delta max_delta
536 -- Solution display functions (pure)
538 -- | Given the original and final nodes, computes the relocation description.
539 computeMoves :: String -- ^ The instance name
540 -> String -- ^ Original primary
541 -> String -- ^ Original secondary
542 -> String -- ^ New primary
543 -> String -- ^ New secondary
544 -> (String, [String])
545 -- ^ Tuple of moves and commands list; moves is containing
546 -- either @/f/@ for failover or @/r:name/@ for replace
547 -- secondary, while the command list holds gnt-instance
548 -- commands (without that prefix), e.g \"@failover instance1@\"
549 computeMoves i a b c d =
550 if c == a then {- Same primary -}
551 if d == b then {- Same sec??! -}
553 else {- Change of secondary -}
555 [printf "replace-disks -n %s %s" d i])
557 if c == b then {- Failover and ... -}
558 if d == a then {- that's all -}
559 ("f", [printf "migrate -f %s" i])
562 [printf "migrate -f %s" i,
563 printf "replace-disks -n %s %s" d i])
565 if d == a then {- ... and keep primary as secondary -}
567 [printf "replace-disks -n %s %s" c i,
568 printf "migrate -f %s" i])
570 if d == b then {- ... keep same secondary -}
571 (printf "f r:%s f" c,
572 [printf "migrate -f %s" i,
573 printf "replace-disks -n %s %s" c i,
574 printf "migrate -f %s" i])
576 else {- Nothing in common -}
577 (printf "r:%s f r:%s" c d,
578 [printf "replace-disks -n %s %s" c i,
579 printf "migrate -f %s" i,
580 printf "replace-disks -n %s %s" d i])
582 {-| Converts a placement to string format -}
583 printSolutionLine :: InstanceList
590 -> (String, [String])
591 printSolutionLine il ktn kti nmlen imlen plc pos =
593 pmlen = (2*nmlen + 1)
595 inst = Container.find i il
596 inam = fromJust $ lookup (Instance.idx inst) kti
597 npri = fromJust $ lookup p ktn
598 nsec = fromJust $ lookup s ktn
599 opri = fromJust $ lookup (Instance.pnode inst) ktn
600 osec = fromJust $ lookup (Instance.snode inst) ktn
601 (moves, cmds) = computeMoves inam opri osec npri nsec
602 ostr = (printf "%s:%s" opri osec)::String
603 nstr = (printf "%s:%s" npri nsec)::String
605 (printf " %3d. %-*s %-*s => %-*s %.8f a=%s"
606 pos imlen inam pmlen ostr
610 formatCmds :: [[String]] -> String
611 formatCmds cmd_strs =
613 concat $ map (\(a, b) ->
614 (printf "echo step %d" (a::Int)):
616 (map ("gnt-instance " ++) b)) $
619 {-| Converts a solution to string format -}
620 printSolution :: InstanceList
624 -> ([String], [[String]])
625 printSolution il ktn kti sol =
627 mlen_fn = maximum . (map length) . snd . unzip
631 unzip $ map (uncurry $ printSolutionLine il ktn kti nmlen imlen) $
634 -- | Print the node list.
635 printNodes :: NameList -> NodeList -> String
637 let snl = sortBy (compare `on` Node.idx) (Container.elems nl)
638 snl' = map (\ n -> ((fromJust $ lookup (Node.idx n) ktn), n)) snl
639 m_name = maximum . (map length) . fst . unzip $ snl'
640 helper = Node.list m_name
642 "%2s %-*s %5s %5s %5s %5s %5s %5s %5s %5s %3s %3s %7s %7s"
644 "t_mem" "n_mem" "i_mem" "x_mem" "f_mem" "r_mem"
646 "pri" "sec" "p_fmem" "p_fdsk"
647 in unlines $ (header:map (uncurry helper) snl')
649 -- | Compute the mem and disk covariance.
650 compDetailedCV :: NodeList -> (Double, Double, Double, Double, Double)
653 all_nodes = Container.elems nl
654 (offline, nodes) = partition Node.offline all_nodes
655 mem_l = map Node.p_mem nodes
656 dsk_l = map Node.p_dsk nodes
657 mem_cv = varianceCoeff mem_l
658 dsk_cv = varianceCoeff dsk_l
659 n1_l = length $ filter Node.failN1 nodes
660 n1_score = (fromIntegral n1_l) / (fromIntegral $ length nodes)
661 res_l = map Node.p_rem nodes
662 res_cv = varianceCoeff res_l
663 offline_inst = sum . map (\n -> (length . Node.plist $ n) +
664 (length . Node.slist $ n)) $ offline
665 online_inst = sum . map (\n -> (length . Node.plist $ n) +
666 (length . Node.slist $ n)) $ nodes
667 off_score = (fromIntegral offline_inst) /
668 (fromIntegral $ online_inst + offline_inst)
669 in (mem_cv, dsk_cv, n1_score, res_cv, off_score)
671 -- | Compute the 'total' variance.
672 compCV :: NodeList -> Double
674 let (mem_cv, dsk_cv, n1_score, res_cv, off_score) = compDetailedCV nl
675 in mem_cv + dsk_cv + n1_score + res_cv + off_score
677 printStats :: NodeList -> String
679 let (mem_cv, dsk_cv, n1_score, res_cv, off_score) = compDetailedCV nl
680 in printf "f_mem=%.8f, r_mem=%.8f, f_dsk=%.8f, n1=%.3f, uf=%.3f"
681 mem_cv res_cv dsk_cv n1_score off_score