1 {-| Implementation of cluster-wide logic.
3 This module holds all pure cluster-logic; I\/O related functionality
4 goes into the "Main" module for the individual binaries.
8 module Ganeti.HTools.Cluster
20 -- * Generic functions
22 -- * First phase functions
24 -- * Second phase functions
31 -- * Balacing functions
36 -- * IAllocator functions
42 import Data.Maybe (isNothing, fromJust)
43 import Text.Printf (printf)
47 import qualified Ganeti.HTools.Container as Container
48 import qualified Ganeti.HTools.Instance as Instance
49 import qualified Ganeti.HTools.Node as Node
50 import Ganeti.HTools.Types
51 import Ganeti.HTools.Utils
53 -- | A separate name for the cluster score type
56 -- | The description of an instance placement.
57 type Placement = (Int, Int, Int, Score)
59 {- | A cluster solution described as the solution delta and the list
63 data Solution = Solution Int [Placement]
64 deriving (Eq, Ord, Show)
66 -- | Returns the delta of a solution or -1 for Nothing
67 solutionDelta :: Maybe Solution -> Int
68 solutionDelta sol = case sol of
69 Just (Solution d _) -> d
73 data Removal = Removal NodeList [Instance.Instance]
75 -- | An instance move definition
76 data IMove = Failover -- ^ Failover the instance (f)
77 | ReplacePrimary Int -- ^ Replace primary (f, r:np, f)
78 | ReplaceSecondary Int -- ^ Replace secondary (r:ns)
79 | ReplaceAndFailover Int -- ^ Replace secondary, failover (r:np, f)
80 | FailoverAndReplace Int -- ^ Failover, replace secondary (f, r:ns)
83 -- | The complete state for the balancing solution
84 data Table = Table NodeList InstanceList Score [Placement]
89 -- | Cap the removal list if needed.
90 capRemovals :: [a] -> Int -> [a]
91 capRemovals removals max_removals =
92 if max_removals > 0 then
93 take max_removals removals
97 -- | Check if the given node list fails the N+1 check.
98 verifyN1Check :: [Node.Node] -> Bool
99 verifyN1Check nl = any Node.failN1 nl
101 -- | Verifies the N+1 status and return the affected nodes.
102 verifyN1 :: [Node.Node] -> [Node.Node]
103 verifyN1 nl = filter Node.failN1 nl
105 {-| Add an instance and return the new node and instance maps. -}
106 addInstance :: NodeList -> Instance.Instance ->
107 Node.Node -> Node.Node -> Maybe NodeList
108 addInstance nl idata pri sec =
109 let pdx = Node.idx pri
112 pnode <- Node.addPri pri idata
113 snode <- Node.addSec sec idata pdx
114 new_nl <- return $ Container.addTwo sdx snode
118 -- | Remove an instance and return the new node and instance maps.
119 removeInstance :: NodeList -> Instance.Instance -> NodeList
120 removeInstance nl idata =
121 let pnode = Instance.pnode idata
122 snode = Instance.snode idata
123 pn = Container.find pnode nl
124 sn = Container.find snode nl
125 new_nl = Container.addTwo
126 pnode (Node.removePri pn idata)
127 snode (Node.removeSec sn idata) nl in
130 -- | Remove an instance and return the new node map.
131 removeInstances :: NodeList -> [Instance.Instance] -> NodeList
132 removeInstances = foldl' removeInstance
134 -- | Compute the total free disk and memory in the cluster.
135 totalResources :: Container.Container Node.Node -> (Int, Int)
138 (\ (mem, dsk) node -> (mem + (Node.f_mem node),
139 dsk + (Node.f_dsk node)))
140 (0, 0) (Container.elems nl)
142 {- | Compute a new version of a cluster given a solution.
144 This is not used for computing the solutions, but for applying a
145 (known-good) solution to the original cluster for final display.
147 It first removes the relocated instances after which it places them on
151 applySolution :: NodeList -> InstanceList -> [Placement] -> NodeList
152 applySolution nl il sol =
153 let odxes = map (\ (a, b, c, _) -> (Container.find a il,
154 Node.idx (Container.find b nl),
155 Node.idx (Container.find c nl))
157 idxes = (\ (x, _, _) -> x) (unzip3 odxes)
158 nc = removeInstances nl idxes
160 foldl' (\ nz (a, b, c) ->
161 let new_p = Container.find b nz
162 new_s = Container.find c nz in
163 fromJust (addInstance nz a new_p new_s)
167 -- First phase functions
169 {- | Given a list 1,2,3..n build a list of pairs [(1, [2..n]), (2,
173 genParts :: [a] -> Int -> [(a, [a])]
178 if length l < count then
181 (x, xs) : (genParts xs count)
183 -- | Generates combinations of count items from the names list.
184 genNames :: Int -> [b] -> [[b]]
185 genNames count1 names1 =
186 let aux_fn count names current =
191 (\ (x, xs) -> aux_fn (count - 1) xs (x:current))
192 (genParts names count)
194 aux_fn count1 names1 []
196 {- | Computes the pair of bad nodes and instances.
198 The bad node list is computed via a simple 'verifyN1' check, and the
199 bad instance list is the list of primary and secondary instances of
203 computeBadItems :: NodeList -> InstanceList ->
204 ([Node.Node], [Instance.Instance])
205 computeBadItems nl il =
206 let bad_nodes = verifyN1 $ filter (not . Node.offline) $ Container.elems nl
207 bad_instances = map (\idx -> Container.find idx il) $
208 sort $ nub $ concat $
209 map (\ n -> (Node.slist n) ++ (Node.plist n)) bad_nodes
211 (bad_nodes, bad_instances)
214 {- | Checks if removal of instances results in N+1 pass.
216 Note: the check removal cannot optimize by scanning only the affected
217 nodes, since the cluster is known to be not healthy; only the check
218 placement can make this shortcut.
221 checkRemoval :: NodeList -> [Instance.Instance] -> Maybe Removal
222 checkRemoval nl victims =
223 let nx = removeInstances nl victims
224 failN1 = verifyN1Check (Container.elems nx)
229 Just $ Removal nx victims
232 -- | Computes the removals list for a given depth
233 computeRemovals :: NodeList
234 -> [Instance.Instance]
237 computeRemovals nl bad_instances depth =
238 map (checkRemoval nl) $ genNames depth bad_instances
240 -- Second phase functions
242 -- | Single-node relocation cost
243 nodeDelta :: Int -> Int -> Int -> Int
245 if i == p || i == s then
250 {-| Compute best solution.
252 This function compares two solutions, choosing the minimum valid
255 compareSolutions :: Maybe Solution -> Maybe Solution -> Maybe Solution
256 compareSolutions a b = case (a, b) of
261 -- | Compute best table. Note that the ordering of the arguments is important.
262 compareTables :: Table -> Table -> Table
263 compareTables a@(Table _ _ a_cv _) b@(Table _ _ b_cv _ ) =
264 if a_cv > b_cv then b else a
266 -- | Check if a given delta is worse then an existing solution.
267 tooHighDelta :: Maybe Solution -> Int -> Int -> Bool
268 tooHighDelta sol new_delta max_delta =
269 if new_delta > max_delta && max_delta >=0 then
274 Just (Solution old_delta _) -> old_delta <= new_delta
276 {-| Check if placement of instances still keeps the cluster N+1 compliant.
278 This is the workhorse of the allocation algorithm: given the
279 current node and instance maps, the list of instances to be
280 placed, and the current solution, this will return all possible
281 solution by recursing until all target instances are placed.
284 checkPlacement :: NodeList -- ^ The current node list
285 -> [Instance.Instance] -- ^ List of instances still to place
286 -> [Placement] -- ^ Partial solution until now
287 -> Int -- ^ The delta of the partial solution
288 -> Maybe Solution -- ^ The previous solution
289 -> Int -- ^ Abort if the we go above this delta
290 -> Maybe Solution -- ^ The new solution
291 checkPlacement nl victims current current_delta prev_sol max_delta =
292 let target = head victims
293 opdx = Instance.pnode target
294 osdx = Instance.snode target
296 have_tail = (length vtail) > 0
297 nodes = Container.elems nl
298 iidx = Instance.idx target
303 pri_idx = Node.idx pri
304 upri_delta = current_delta + nodeDelta pri_idx opdx osdx
305 new_pri = Node.addPri pri target
306 fail_delta1 = tooHighDelta accu_p upri_delta max_delta
308 if fail_delta1 || isNothing(new_pri) then accu_p
309 else let pri_nl = Container.add pri_idx (fromJust new_pri) nl in
313 sec_idx = Node.idx sec
314 upd_delta = upri_delta +
315 nodeDelta sec_idx opdx osdx
316 fail_delta2 = tooHighDelta accu upd_delta max_delta
317 new_sec = Node.addSec sec target pri_idx
319 if sec_idx == pri_idx || fail_delta2 ||
320 isNothing new_sec then accu
322 nx = Container.add sec_idx (fromJust new_sec) pri_nl
324 plc = (iidx, pri_idx, sec_idx, upd_cv)
328 checkPlacement nx vtail c2 upd_delta
331 Just (Solution upd_delta c2)
332 in compareSolutions accu result
337 applyMove :: NodeList -> Instance.Instance
338 -> IMove -> (Maybe NodeList, Instance.Instance, Int, Int)
340 applyMove nl inst Failover =
341 let old_pdx = Instance.pnode inst
342 old_sdx = Instance.snode inst
343 old_p = Container.find old_pdx nl
344 old_s = Container.find old_sdx nl
345 int_p = Node.removePri old_p inst
346 int_s = Node.removeSec old_s inst
347 new_nl = do -- Maybe monad
348 new_p <- Node.addPri int_s inst
349 new_s <- Node.addSec int_p inst old_sdx
350 return $ Container.addTwo old_pdx new_s old_sdx new_p nl
351 in (new_nl, Instance.setBoth inst old_sdx old_pdx, old_sdx, old_pdx)
353 -- Replace the primary (f:, r:np, f)
354 applyMove nl inst (ReplacePrimary new_pdx) =
355 let old_pdx = Instance.pnode inst
356 old_sdx = Instance.snode inst
357 old_p = Container.find old_pdx nl
358 old_s = Container.find old_sdx nl
359 tgt_n = Container.find new_pdx nl
360 int_p = Node.removePri old_p inst
361 int_s = Node.removeSec old_s inst
362 new_nl = do -- Maybe monad
363 new_p <- Node.addPri tgt_n inst
364 new_s <- Node.addSec int_s inst new_pdx
365 return $ Container.add new_pdx new_p $
366 Container.addTwo old_pdx int_p old_sdx new_s nl
367 in (new_nl, Instance.setPri inst new_pdx, new_pdx, old_sdx)
369 -- Replace the secondary (r:ns)
370 applyMove nl inst (ReplaceSecondary new_sdx) =
371 let old_pdx = Instance.pnode inst
372 old_sdx = Instance.snode inst
373 old_s = Container.find old_sdx nl
374 tgt_n = Container.find new_sdx nl
375 int_s = Node.removeSec old_s inst
376 new_nl = Node.addSec tgt_n inst old_pdx >>=
377 \new_s -> return $ Container.addTwo new_sdx
378 new_s old_sdx int_s nl
379 in (new_nl, Instance.setSec inst new_sdx, old_pdx, new_sdx)
381 -- Replace the secondary and failover (r:np, f)
382 applyMove nl inst (ReplaceAndFailover new_pdx) =
383 let old_pdx = Instance.pnode inst
384 old_sdx = Instance.snode inst
385 old_p = Container.find old_pdx nl
386 old_s = Container.find old_sdx nl
387 tgt_n = Container.find new_pdx nl
388 int_p = Node.removePri old_p inst
389 int_s = Node.removeSec old_s inst
390 new_nl = do -- Maybe monad
391 new_p <- Node.addPri tgt_n inst
392 new_s <- Node.addSec int_p inst new_pdx
393 return $ Container.add new_pdx new_p $
394 Container.addTwo old_pdx new_s old_sdx int_s nl
395 in (new_nl, Instance.setBoth inst new_pdx old_pdx, new_pdx, old_pdx)
397 -- Failver and replace the secondary (f, r:ns)
398 applyMove nl inst (FailoverAndReplace new_sdx) =
399 let old_pdx = Instance.pnode inst
400 old_sdx = Instance.snode inst
401 old_p = Container.find old_pdx nl
402 old_s = Container.find old_sdx nl
403 tgt_n = Container.find new_sdx nl
404 int_p = Node.removePri old_p inst
405 int_s = Node.removeSec old_s inst
406 new_nl = do -- Maybe monad
407 new_p <- Node.addPri int_s inst
408 new_s <- Node.addSec tgt_n inst old_sdx
409 return $ Container.add new_sdx new_s $
410 Container.addTwo old_sdx new_p old_pdx int_p nl
411 in (new_nl, Instance.setBoth inst old_sdx new_sdx, old_sdx, new_sdx)
413 allocateOnSingle :: NodeList -> Instance.Instance -> Node.Node
414 -> (Maybe NodeList, Instance.Instance)
415 allocateOnSingle nl inst p =
416 let new_pdx = Node.idx p
417 new_nl = Node.addPri p inst >>= \new_p ->
418 return $ Container.add new_pdx new_p nl
419 in (new_nl, Instance.setBoth inst new_pdx Node.noSecondary)
421 allocateOnPair :: NodeList -> Instance.Instance -> Node.Node -> Node.Node
422 -> (Maybe NodeList, Instance.Instance)
423 allocateOnPair nl inst tgt_p tgt_s =
424 let new_pdx = Node.idx tgt_p
425 new_sdx = Node.idx tgt_s
426 new_nl = do -- Maybe monad
427 new_p <- Node.addPri tgt_p inst
428 new_s <- Node.addSec tgt_s inst new_pdx
429 return $ Container.addTwo new_pdx new_p new_sdx new_s nl
430 in (new_nl, Instance.setBoth inst new_pdx new_sdx)
432 checkSingleStep :: Table -- ^ The original table
433 -> Instance.Instance -- ^ The instance to move
434 -> Table -- ^ The current best table
435 -> IMove -- ^ The move to apply
436 -> Table -- ^ The final best table
437 checkSingleStep ini_tbl target cur_tbl move =
439 Table ini_nl ini_il _ ini_plc = ini_tbl
440 (tmp_nl, new_inst, pri_idx, sec_idx) = applyMove ini_nl target move
442 if isNothing tmp_nl then cur_tbl
444 let tgt_idx = Instance.idx target
445 upd_nl = fromJust tmp_nl
446 upd_cvar = compCV upd_nl
447 upd_il = Container.add tgt_idx new_inst ini_il
448 upd_plc = (tgt_idx, pri_idx, sec_idx, upd_cvar):ini_plc
449 upd_tbl = Table upd_nl upd_il upd_cvar upd_plc
451 compareTables cur_tbl upd_tbl
453 -- | Given the status of the current secondary as a valid new node
454 -- and the current candidate target node,
455 -- generate the possible moves for a instance.
456 possibleMoves :: Bool -> Int -> [IMove]
457 possibleMoves True tdx =
458 [ReplaceSecondary tdx,
459 ReplaceAndFailover tdx,
461 FailoverAndReplace tdx]
463 possibleMoves False tdx =
464 [ReplaceSecondary tdx,
465 ReplaceAndFailover tdx]
467 -- | Compute the best move for a given instance.
468 checkInstanceMove :: [Int] -- Allowed target node indices
469 -> Table -- Original table
470 -> Instance.Instance -- Instance to move
471 -> Table -- Best new table for this instance
472 checkInstanceMove nodes_idx ini_tbl target =
474 opdx = Instance.pnode target
475 osdx = Instance.snode target
476 nodes = filter (\idx -> idx /= opdx && idx /= osdx) nodes_idx
477 use_secondary = elem osdx nodes_idx
478 aft_failover = if use_secondary -- if allowed to failover
479 then checkSingleStep ini_tbl target ini_tbl Failover
481 all_moves = concatMap (possibleMoves use_secondary) nodes
483 -- iterate over the possible nodes for this instance
484 foldl' (checkSingleStep ini_tbl target) aft_failover all_moves
486 -- | Compute the best next move.
487 checkMove :: [Int] -- ^ Allowed target node indices
488 -> Table -- ^ The current solution
489 -> [Instance.Instance] -- ^ List of instances still to move
490 -> Table -- ^ The new solution
491 checkMove nodes_idx ini_tbl victims =
492 let Table _ _ _ ini_plc = ini_tbl
493 -- iterate over all instances, computing the best move
497 if Instance.snode elem == Node.noSecondary then step_tbl
498 else compareTables step_tbl $
499 checkInstanceMove nodes_idx ini_tbl elem)
501 Table _ _ _ best_plc = best_tbl
503 if length best_plc == length ini_plc then -- no advancement
508 {- | Auxiliary function for solution computation.
510 We write this in an explicit recursive fashion in order to control
511 early-abort in case we have met the min delta. We can't use foldr
512 instead of explicit recursion since we need the accumulator for the
516 advanceSolution :: [Maybe Removal] -- ^ The removal to process
517 -> Int -- ^ Minimum delta parameter
518 -> Int -- ^ Maximum delta parameter
519 -> Maybe Solution -- ^ Current best solution
520 -> Maybe Solution -- ^ New best solution
521 advanceSolution [] _ _ sol = sol
522 advanceSolution (Nothing:xs) m n sol = advanceSolution xs m n sol
523 advanceSolution ((Just (Removal nx removed)):xs) min_d max_d prev_sol =
524 let new_sol = checkPlacement nx removed [] 0 prev_sol max_d
525 new_delta = solutionDelta $! new_sol
527 if new_delta >= 0 && new_delta <= min_d then
530 advanceSolution xs min_d max_d new_sol
532 -- | Computes the placement solution.
533 solutionFromRemovals :: [Maybe Removal] -- ^ The list of (possible) removals
534 -> Int -- ^ Minimum delta parameter
535 -> Int -- ^ Maximum delta parameter
536 -> Maybe Solution -- ^ The best solution found
537 solutionFromRemovals removals min_delta max_delta =
538 advanceSolution removals min_delta max_delta Nothing
540 {- | Computes the solution at the given depth.
542 This is a wrapper over both computeRemovals and
543 solutionFromRemovals. In case we have no solution, we return Nothing.
546 computeSolution :: NodeList -- ^ The original node data
547 -> [Instance.Instance] -- ^ The list of /bad/ instances
548 -> Int -- ^ The /depth/ of removals
549 -> Int -- ^ Maximum number of removals to process
550 -> Int -- ^ Minimum delta parameter
551 -> Int -- ^ Maximum delta parameter
552 -> Maybe Solution -- ^ The best solution found (or Nothing)
553 computeSolution nl bad_instances depth max_removals min_delta max_delta =
555 removals = computeRemovals nl bad_instances depth
556 removals' = capRemovals removals max_removals
558 solutionFromRemovals removals' min_delta max_delta
560 -- Solution display functions (pure)
562 -- | Given the original and final nodes, computes the relocation description.
563 computeMoves :: String -- ^ The instance name
564 -> String -- ^ Original primary
565 -> String -- ^ Original secondary
566 -> String -- ^ New primary
567 -> String -- ^ New secondary
568 -> (String, [String])
569 -- ^ Tuple of moves and commands list; moves is containing
570 -- either @/f/@ for failover or @/r:name/@ for replace
571 -- secondary, while the command list holds gnt-instance
572 -- commands (without that prefix), e.g \"@failover instance1@\"
573 computeMoves i a b c d =
574 if c == a then {- Same primary -}
575 if d == b then {- Same sec??! -}
577 else {- Change of secondary -}
579 [printf "replace-disks -n %s %s" d i])
581 if c == b then {- Failover and ... -}
582 if d == a then {- that's all -}
583 ("f", [printf "migrate -f %s" i])
586 [printf "migrate -f %s" i,
587 printf "replace-disks -n %s %s" d i])
589 if d == a then {- ... and keep primary as secondary -}
591 [printf "replace-disks -n %s %s" c i,
592 printf "migrate -f %s" i])
594 if d == b then {- ... keep same secondary -}
595 (printf "f r:%s f" c,
596 [printf "migrate -f %s" i,
597 printf "replace-disks -n %s %s" c i,
598 printf "migrate -f %s" i])
600 else {- Nothing in common -}
601 (printf "r:%s f r:%s" c d,
602 [printf "replace-disks -n %s %s" c i,
603 printf "migrate -f %s" i,
604 printf "replace-disks -n %s %s" d i])
606 {-| Converts a placement to string format -}
607 printSolutionLine :: NodeList
613 -> (String, [String])
614 printSolutionLine nl il nmlen imlen plc pos =
616 pmlen = (2*nmlen + 1)
618 inst = Container.find i il
619 inam = Instance.name inst
622 opri = cNameOf nl $ Instance.pnode inst
623 osec = cNameOf nl $ Instance.snode inst
624 (moves, cmds) = computeMoves inam opri osec npri nsec
625 ostr = (printf "%s:%s" opri osec)::String
626 nstr = (printf "%s:%s" npri nsec)::String
628 (printf " %3d. %-*s %-*s => %-*s %.8f a=%s"
629 pos imlen inam pmlen ostr
633 formatCmds :: [[String]] -> String
634 formatCmds cmd_strs =
636 concat $ map (\(a, b) ->
637 (printf "echo step %d" (a::Int)):
639 (map ("gnt-instance " ++) b)) $
642 {-| Converts a solution to string format -}
643 printSolution :: NodeList
646 -> ([String], [[String]])
647 printSolution nl il sol =
649 nmlen = cMaxNamelen nl
650 imlen = cMaxNamelen il
652 unzip $ map (uncurry $ printSolutionLine nl il nmlen imlen) $
655 -- | Print the node list.
656 printNodes :: NodeList -> String
658 let snl = sortBy (compare `on` Node.idx) (Container.elems nl)
659 m_name = maximum . map (length . Node.name) $ snl
660 helper = Node.list m_name
662 "%2s %-*s %5s %5s %5s %5s %5s %5s %5s %5s %3s %3s %7s %7s"
664 "t_mem" "n_mem" "i_mem" "x_mem" "f_mem" "r_mem"
666 "pri" "sec" "p_fmem" "p_fdsk"
667 in unlines $ (header:map helper snl)
669 -- | Compute the mem and disk covariance.
670 compDetailedCV :: NodeList -> (Double, Double, Double, Double, Double)
673 all_nodes = Container.elems nl
674 (offline, nodes) = partition Node.offline all_nodes
675 mem_l = map Node.p_mem nodes
676 dsk_l = map Node.p_dsk nodes
677 mem_cv = varianceCoeff mem_l
678 dsk_cv = varianceCoeff dsk_l
679 n1_l = length $ filter Node.failN1 nodes
680 n1_score = (fromIntegral n1_l) / (fromIntegral $ length nodes)
681 res_l = map Node.p_rem nodes
682 res_cv = varianceCoeff res_l
683 offline_inst = sum . map (\n -> (length . Node.plist $ n) +
684 (length . Node.slist $ n)) $ offline
685 online_inst = sum . map (\n -> (length . Node.plist $ n) +
686 (length . Node.slist $ n)) $ nodes
687 off_score = (fromIntegral offline_inst) /
688 (fromIntegral $ online_inst + offline_inst)
689 in (mem_cv, dsk_cv, n1_score, res_cv, off_score)
691 -- | Compute the 'total' variance.
692 compCV :: NodeList -> Double
694 let (mem_cv, dsk_cv, n1_score, res_cv, off_score) = compDetailedCV nl
695 in mem_cv + dsk_cv + n1_score + res_cv + off_score
697 printStats :: NodeList -> String
699 let (mem_cv, dsk_cv, n1_score, res_cv, off_score) = compDetailedCV nl
700 in printf "f_mem=%.8f, r_mem=%.8f, f_dsk=%.8f, n1=%.3f, uf=%.3f"
701 mem_cv res_cv dsk_cv n1_score off_score