1 {-| Implementation of cluster-wide logic.
3 This module holds all pure cluster-logic; I\/O related functionality
4 goes into the "Main" module for the individual binaries.
8 module Ganeti.HTools.Cluster
17 -- * Generic functions
19 -- * First phase functions
21 -- * Second phase functions
28 -- * Balacing functions
33 -- * IAllocator functions
39 import Data.Maybe (isNothing, fromJust)
40 import Text.Printf (printf)
44 import qualified Ganeti.HTools.Container as Container
45 import qualified Ganeti.HTools.Instance as Instance
46 import qualified Ganeti.HTools.Node as Node
47 import Ganeti.HTools.Types
48 import Ganeti.HTools.Utils
50 -- | A separate name for the cluster score type
53 -- | The description of an instance placement.
54 type Placement = (Idx, Ndx, Ndx, Score)
56 {- | A cluster solution described as the solution delta and the list
60 data Solution = Solution Int [Placement]
61 deriving (Eq, Ord, Show)
63 -- | Returns the delta of a solution or -1 for Nothing
64 solutionDelta :: Maybe Solution -> Int
65 solutionDelta sol = case sol of
66 Just (Solution d _) -> d
70 data Removal = Removal Node.List [Instance.Instance]
72 -- | An instance move definition
73 data IMove = Failover -- ^ Failover the instance (f)
74 | ReplacePrimary Ndx -- ^ Replace primary (f, r:np, f)
75 | ReplaceSecondary Ndx -- ^ Replace secondary (r:ns)
76 | ReplaceAndFailover Ndx -- ^ Replace secondary, failover (r:np, f)
77 | FailoverAndReplace Ndx -- ^ Failover, replace secondary (f, r:ns)
80 -- | The complete state for the balancing solution
81 data Table = Table Node.List Instance.List Score [Placement]
86 -- | Cap the removal list if needed.
87 capRemovals :: [a] -> Int -> [a]
88 capRemovals removals max_removals =
89 if max_removals > 0 then
90 take max_removals removals
94 -- | Check if the given node list fails the N+1 check.
95 verifyN1Check :: [Node.Node] -> Bool
96 verifyN1Check nl = any Node.failN1 nl
98 -- | Verifies the N+1 status and return the affected nodes.
99 verifyN1 :: [Node.Node] -> [Node.Node]
100 verifyN1 nl = filter Node.failN1 nl
102 {-| Add an instance and return the new node and instance maps. -}
103 addInstance :: Node.List -> Instance.Instance ->
104 Node.Node -> Node.Node -> Maybe Node.List
105 addInstance nl idata pri sec =
106 let pdx = Node.idx pri
109 pnode <- Node.addPri pri idata
110 snode <- Node.addSec sec idata pdx
111 new_nl <- return $ Container.addTwo sdx snode
115 -- | Remove an instance and return the new node and instance maps.
116 removeInstance :: Node.List -> Instance.Instance -> Node.List
117 removeInstance nl idata =
118 let pnode = Instance.pnode idata
119 snode = Instance.snode idata
120 pn = Container.find pnode nl
121 sn = Container.find snode nl
122 new_nl = Container.addTwo
123 pnode (Node.removePri pn idata)
124 snode (Node.removeSec sn idata) nl in
127 -- | Remove an instance and return the new node map.
128 removeInstances :: Node.List -> [Instance.Instance] -> Node.List
129 removeInstances = foldl' removeInstance
131 -- | Compute the total free disk and memory in the cluster.
132 totalResources :: Container.Container Node.Node -> (Int, Int)
135 (\ (mem, dsk) node -> (mem + (Node.f_mem node),
136 dsk + (Node.f_dsk node)))
137 (0, 0) (Container.elems nl)
139 {- | Compute a new version of a cluster given a solution.
141 This is not used for computing the solutions, but for applying a
142 (known-good) solution to the original cluster for final display.
144 It first removes the relocated instances after which it places them on
148 applySolution :: Node.List -> Instance.List -> [Placement] -> Node.List
149 applySolution nl il sol =
150 let odxes = map (\ (a, b, c, _) -> (Container.find a il,
151 Node.idx (Container.find b nl),
152 Node.idx (Container.find c nl))
154 idxes = (\ (x, _, _) -> x) (unzip3 odxes)
155 nc = removeInstances nl idxes
157 foldl' (\ nz (a, b, c) ->
158 let new_p = Container.find b nz
159 new_s = Container.find c nz in
160 fromJust (addInstance nz a new_p new_s)
164 -- First phase functions
166 {- | Given a list 1,2,3..n build a list of pairs [(1, [2..n]), (2,
170 genParts :: [a] -> Int -> [(a, [a])]
175 if length l < count then
178 (x, xs) : (genParts xs count)
180 -- | Generates combinations of count items from the names list.
181 genNames :: Int -> [b] -> [[b]]
182 genNames count1 names1 =
183 let aux_fn count names current =
188 (\ (x, xs) -> aux_fn (count - 1) xs (x:current))
189 (genParts names count)
191 aux_fn count1 names1 []
193 {- | Computes the pair of bad nodes and instances.
195 The bad node list is computed via a simple 'verifyN1' check, and the
196 bad instance list is the list of primary and secondary instances of
200 computeBadItems :: Node.List -> Instance.List ->
201 ([Node.Node], [Instance.Instance])
202 computeBadItems nl il =
203 let bad_nodes = verifyN1 $ filter (not . Node.offline) $ Container.elems nl
204 bad_instances = map (\idx -> Container.find idx il) $
205 sort $ nub $ concat $
206 map (\ n -> (Node.slist n) ++ (Node.plist n)) bad_nodes
208 (bad_nodes, bad_instances)
211 {- | Checks if removal of instances results in N+1 pass.
213 Note: the check removal cannot optimize by scanning only the affected
214 nodes, since the cluster is known to be not healthy; only the check
215 placement can make this shortcut.
218 checkRemoval :: Node.List -> [Instance.Instance] -> Maybe Removal
219 checkRemoval nl victims =
220 let nx = removeInstances nl victims
221 failN1 = verifyN1Check (Container.elems nx)
226 Just $ Removal nx victims
229 -- | Computes the removals list for a given depth
230 computeRemovals :: Node.List
231 -> [Instance.Instance]
234 computeRemovals nl bad_instances depth =
235 map (checkRemoval nl) $ genNames depth bad_instances
237 -- Second phase functions
239 -- | Single-node relocation cost
240 nodeDelta :: Ndx -> Ndx -> Ndx -> Int
242 if i == p || i == s then
247 {-| Compute best solution.
249 This function compares two solutions, choosing the minimum valid
252 compareSolutions :: Maybe Solution -> Maybe Solution -> Maybe Solution
253 compareSolutions a b = case (a, b) of
258 -- | Compute best table. Note that the ordering of the arguments is important.
259 compareTables :: Table -> Table -> Table
260 compareTables a@(Table _ _ a_cv _) b@(Table _ _ b_cv _ ) =
261 if a_cv > b_cv then b else a
263 -- | Check if a given delta is worse then an existing solution.
264 tooHighDelta :: Maybe Solution -> Int -> Int -> Bool
265 tooHighDelta sol new_delta max_delta =
266 if new_delta > max_delta && max_delta >=0 then
271 Just (Solution old_delta _) -> old_delta <= new_delta
273 {-| Check if placement of instances still keeps the cluster N+1 compliant.
275 This is the workhorse of the allocation algorithm: given the
276 current node and instance maps, the list of instances to be
277 placed, and the current solution, this will return all possible
278 solution by recursing until all target instances are placed.
281 checkPlacement :: Node.List -- ^ The current node list
282 -> [Instance.Instance] -- ^ List of instances still to place
283 -> [Placement] -- ^ Partial solution until now
284 -> Int -- ^ The delta of the partial solution
285 -> Maybe Solution -- ^ The previous solution
286 -> Int -- ^ Abort if the we go above this delta
287 -> Maybe Solution -- ^ The new solution
288 checkPlacement nl victims current current_delta prev_sol max_delta =
289 let target = head victims
290 opdx = Instance.pnode target
291 osdx = Instance.snode target
293 have_tail = (length vtail) > 0
294 nodes = Container.elems nl
295 iidx = Instance.idx target
300 pri_idx = Node.idx pri
301 upri_delta = current_delta + nodeDelta pri_idx opdx osdx
302 new_pri = Node.addPri pri target
303 fail_delta1 = tooHighDelta accu_p upri_delta max_delta
305 if fail_delta1 || isNothing(new_pri) then accu_p
306 else let pri_nl = Container.add pri_idx (fromJust new_pri) nl in
310 sec_idx = Node.idx sec
311 upd_delta = upri_delta +
312 nodeDelta sec_idx opdx osdx
313 fail_delta2 = tooHighDelta accu upd_delta max_delta
314 new_sec = Node.addSec sec target pri_idx
316 if sec_idx == pri_idx || fail_delta2 ||
317 isNothing new_sec then accu
319 nx = Container.add sec_idx (fromJust new_sec) pri_nl
321 plc = (iidx, pri_idx, sec_idx, upd_cv)
325 checkPlacement nx vtail c2 upd_delta
328 Just (Solution upd_delta c2)
329 in compareSolutions accu result
334 applyMove :: Node.List -> Instance.Instance
335 -> IMove -> (Maybe Node.List, Instance.Instance, Ndx, Ndx)
337 applyMove nl inst Failover =
338 let old_pdx = Instance.pnode inst
339 old_sdx = Instance.snode inst
340 old_p = Container.find old_pdx nl
341 old_s = Container.find old_sdx nl
342 int_p = Node.removePri old_p inst
343 int_s = Node.removeSec old_s inst
344 new_nl = do -- Maybe monad
345 new_p <- Node.addPri int_s inst
346 new_s <- Node.addSec int_p inst old_sdx
347 return $ Container.addTwo old_pdx new_s old_sdx new_p nl
348 in (new_nl, Instance.setBoth inst old_sdx old_pdx, old_sdx, old_pdx)
350 -- Replace the primary (f:, r:np, f)
351 applyMove nl inst (ReplacePrimary new_pdx) =
352 let old_pdx = Instance.pnode inst
353 old_sdx = Instance.snode inst
354 old_p = Container.find old_pdx nl
355 old_s = Container.find old_sdx nl
356 tgt_n = Container.find new_pdx nl
357 int_p = Node.removePri old_p inst
358 int_s = Node.removeSec old_s inst
359 new_nl = do -- Maybe monad
360 new_p <- Node.addPri tgt_n inst
361 new_s <- Node.addSec int_s inst new_pdx
362 return $ Container.add new_pdx new_p $
363 Container.addTwo old_pdx int_p old_sdx new_s nl
364 in (new_nl, Instance.setPri inst new_pdx, new_pdx, old_sdx)
366 -- Replace the secondary (r:ns)
367 applyMove nl inst (ReplaceSecondary new_sdx) =
368 let old_pdx = Instance.pnode inst
369 old_sdx = Instance.snode inst
370 old_s = Container.find old_sdx nl
371 tgt_n = Container.find new_sdx nl
372 int_s = Node.removeSec old_s inst
373 new_nl = Node.addSec tgt_n inst old_pdx >>=
374 \new_s -> return $ Container.addTwo new_sdx
375 new_s old_sdx int_s nl
376 in (new_nl, Instance.setSec inst new_sdx, old_pdx, new_sdx)
378 -- Replace the secondary and failover (r:np, f)
379 applyMove nl inst (ReplaceAndFailover new_pdx) =
380 let old_pdx = Instance.pnode inst
381 old_sdx = Instance.snode inst
382 old_p = Container.find old_pdx nl
383 old_s = Container.find old_sdx nl
384 tgt_n = Container.find new_pdx nl
385 int_p = Node.removePri old_p inst
386 int_s = Node.removeSec old_s inst
387 new_nl = do -- Maybe monad
388 new_p <- Node.addPri tgt_n inst
389 new_s <- Node.addSec int_p inst new_pdx
390 return $ Container.add new_pdx new_p $
391 Container.addTwo old_pdx new_s old_sdx int_s nl
392 in (new_nl, Instance.setBoth inst new_pdx old_pdx, new_pdx, old_pdx)
394 -- Failver and replace the secondary (f, r:ns)
395 applyMove nl inst (FailoverAndReplace new_sdx) =
396 let old_pdx = Instance.pnode inst
397 old_sdx = Instance.snode inst
398 old_p = Container.find old_pdx nl
399 old_s = Container.find old_sdx nl
400 tgt_n = Container.find new_sdx nl
401 int_p = Node.removePri old_p inst
402 int_s = Node.removeSec old_s inst
403 new_nl = do -- Maybe monad
404 new_p <- Node.addPri int_s inst
405 new_s <- Node.addSec tgt_n inst old_sdx
406 return $ Container.add new_sdx new_s $
407 Container.addTwo old_sdx new_p old_pdx int_p nl
408 in (new_nl, Instance.setBoth inst old_sdx new_sdx, old_sdx, new_sdx)
410 allocateOnSingle :: Node.List -> Instance.Instance -> Node.Node
411 -> (Maybe Node.List, Instance.Instance)
412 allocateOnSingle nl inst p =
413 let new_pdx = Node.idx p
414 new_nl = Node.addPri p inst >>= \new_p ->
415 return $ Container.add new_pdx new_p nl
416 in (new_nl, Instance.setBoth inst new_pdx Node.noSecondary)
418 allocateOnPair :: Node.List -> Instance.Instance -> Node.Node -> Node.Node
419 -> (Maybe Node.List, Instance.Instance)
420 allocateOnPair nl inst tgt_p tgt_s =
421 let new_pdx = Node.idx tgt_p
422 new_sdx = Node.idx tgt_s
423 new_nl = do -- Maybe monad
424 new_p <- Node.addPri tgt_p inst
425 new_s <- Node.addSec tgt_s inst new_pdx
426 return $ Container.addTwo new_pdx new_p new_sdx new_s nl
427 in (new_nl, Instance.setBoth inst new_pdx new_sdx)
429 checkSingleStep :: Table -- ^ The original table
430 -> Instance.Instance -- ^ The instance to move
431 -> Table -- ^ The current best table
432 -> IMove -- ^ The move to apply
433 -> Table -- ^ The final best table
434 checkSingleStep ini_tbl target cur_tbl move =
436 Table ini_nl ini_il _ ini_plc = ini_tbl
437 (tmp_nl, new_inst, pri_idx, sec_idx) = applyMove ini_nl target move
439 if isNothing tmp_nl then cur_tbl
441 let tgt_idx = Instance.idx target
442 upd_nl = fromJust tmp_nl
443 upd_cvar = compCV upd_nl
444 upd_il = Container.add tgt_idx new_inst ini_il
445 upd_plc = (tgt_idx, pri_idx, sec_idx, upd_cvar):ini_plc
446 upd_tbl = Table upd_nl upd_il upd_cvar upd_plc
448 compareTables cur_tbl upd_tbl
450 -- | Given the status of the current secondary as a valid new node
451 -- and the current candidate target node,
452 -- generate the possible moves for a instance.
453 possibleMoves :: Bool -> Ndx -> [IMove]
454 possibleMoves True tdx =
455 [ReplaceSecondary tdx,
456 ReplaceAndFailover tdx,
458 FailoverAndReplace tdx]
460 possibleMoves False tdx =
461 [ReplaceSecondary tdx,
462 ReplaceAndFailover tdx]
464 -- | Compute the best move for a given instance.
465 checkInstanceMove :: [Ndx] -- Allowed target node indices
466 -> Table -- Original table
467 -> Instance.Instance -- Instance to move
468 -> Table -- Best new table for this instance
469 checkInstanceMove nodes_idx ini_tbl target =
471 opdx = Instance.pnode target
472 osdx = Instance.snode target
473 nodes = filter (\idx -> idx /= opdx && idx /= osdx) nodes_idx
474 use_secondary = elem osdx nodes_idx
475 aft_failover = if use_secondary -- if allowed to failover
476 then checkSingleStep ini_tbl target ini_tbl Failover
478 all_moves = concatMap (possibleMoves use_secondary) nodes
480 -- iterate over the possible nodes for this instance
481 foldl' (checkSingleStep ini_tbl target) aft_failover all_moves
483 -- | Compute the best next move.
484 checkMove :: [Ndx] -- ^ Allowed target node indices
485 -> Table -- ^ The current solution
486 -> [Instance.Instance] -- ^ List of instances still to move
487 -> Table -- ^ The new solution
488 checkMove nodes_idx ini_tbl victims =
489 let Table _ _ _ ini_plc = ini_tbl
490 -- iterate over all instances, computing the best move
494 if Instance.snode elem == Node.noSecondary then step_tbl
495 else compareTables step_tbl $
496 checkInstanceMove nodes_idx ini_tbl elem)
498 Table _ _ _ best_plc = best_tbl
500 if length best_plc == length ini_plc then -- no advancement
505 {- | Auxiliary function for solution computation.
507 We write this in an explicit recursive fashion in order to control
508 early-abort in case we have met the min delta. We can't use foldr
509 instead of explicit recursion since we need the accumulator for the
513 advanceSolution :: [Maybe Removal] -- ^ The removal to process
514 -> Int -- ^ Minimum delta parameter
515 -> Int -- ^ Maximum delta parameter
516 -> Maybe Solution -- ^ Current best solution
517 -> Maybe Solution -- ^ New best solution
518 advanceSolution [] _ _ sol = sol
519 advanceSolution (Nothing:xs) m n sol = advanceSolution xs m n sol
520 advanceSolution ((Just (Removal nx removed)):xs) min_d max_d prev_sol =
521 let new_sol = checkPlacement nx removed [] 0 prev_sol max_d
522 new_delta = solutionDelta $! new_sol
524 if new_delta >= 0 && new_delta <= min_d then
527 advanceSolution xs min_d max_d new_sol
529 -- | Computes the placement solution.
530 solutionFromRemovals :: [Maybe Removal] -- ^ The list of (possible) removals
531 -> Int -- ^ Minimum delta parameter
532 -> Int -- ^ Maximum delta parameter
533 -> Maybe Solution -- ^ The best solution found
534 solutionFromRemovals removals min_delta max_delta =
535 advanceSolution removals min_delta max_delta Nothing
537 {- | Computes the solution at the given depth.
539 This is a wrapper over both computeRemovals and
540 solutionFromRemovals. In case we have no solution, we return Nothing.
543 computeSolution :: Node.List -- ^ The original node data
544 -> [Instance.Instance] -- ^ The list of /bad/ instances
545 -> Int -- ^ The /depth/ of removals
546 -> Int -- ^ Maximum number of removals to process
547 -> Int -- ^ Minimum delta parameter
548 -> Int -- ^ Maximum delta parameter
549 -> Maybe Solution -- ^ The best solution found (or Nothing)
550 computeSolution nl bad_instances depth max_removals min_delta max_delta =
552 removals = computeRemovals nl bad_instances depth
553 removals' = capRemovals removals max_removals
555 solutionFromRemovals removals' min_delta max_delta
557 -- Solution display functions (pure)
559 -- | Given the original and final nodes, computes the relocation description.
560 computeMoves :: String -- ^ The instance name
561 -> String -- ^ Original primary
562 -> String -- ^ Original secondary
563 -> String -- ^ New primary
564 -> String -- ^ New secondary
565 -> (String, [String])
566 -- ^ Tuple of moves and commands list; moves is containing
567 -- either @/f/@ for failover or @/r:name/@ for replace
568 -- secondary, while the command list holds gnt-instance
569 -- commands (without that prefix), e.g \"@failover instance1@\"
570 computeMoves i a b c d =
571 if c == a then {- Same primary -}
572 if d == b then {- Same sec??! -}
574 else {- Change of secondary -}
576 [printf "replace-disks -n %s %s" d i])
578 if c == b then {- Failover and ... -}
579 if d == a then {- that's all -}
580 ("f", [printf "migrate -f %s" i])
583 [printf "migrate -f %s" i,
584 printf "replace-disks -n %s %s" d i])
586 if d == a then {- ... and keep primary as secondary -}
588 [printf "replace-disks -n %s %s" c i,
589 printf "migrate -f %s" i])
591 if d == b then {- ... keep same secondary -}
592 (printf "f r:%s f" c,
593 [printf "migrate -f %s" i,
594 printf "replace-disks -n %s %s" c i,
595 printf "migrate -f %s" i])
597 else {- Nothing in common -}
598 (printf "r:%s f r:%s" c d,
599 [printf "replace-disks -n %s %s" c i,
600 printf "migrate -f %s" i,
601 printf "replace-disks -n %s %s" d i])
603 {-| Converts a placement to string format -}
604 printSolutionLine :: Node.List
610 -> (String, [String])
611 printSolutionLine nl il nmlen imlen plc pos =
613 pmlen = (2*nmlen + 1)
615 inst = Container.find i il
616 inam = Instance.name inst
617 npri = Container.nameOf nl p
618 nsec = Container.nameOf nl s
619 opri = Container.nameOf nl $ Instance.pnode inst
620 osec = Container.nameOf nl $ Instance.snode inst
621 (moves, cmds) = computeMoves inam opri osec npri nsec
622 ostr = (printf "%s:%s" opri osec)::String
623 nstr = (printf "%s:%s" npri nsec)::String
625 (printf " %3d. %-*s %-*s => %-*s %.8f a=%s"
626 pos imlen inam pmlen ostr
630 formatCmds :: [[String]] -> String
631 formatCmds cmd_strs =
633 concat $ map (\(a, b) ->
634 (printf "echo step %d" (a::Int)):
636 (map ("gnt-instance " ++) b)) $
639 {-| Converts a solution to string format -}
640 printSolution :: Node.List
643 -> ([String], [[String]])
644 printSolution nl il sol =
646 nmlen = Container.maxNameLen nl
647 imlen = Container.maxNameLen il
649 unzip $ map (uncurry $ printSolutionLine nl il nmlen imlen) $
652 -- | Print the node list.
653 printNodes :: Node.List -> String
655 let snl = sortBy (compare `on` Node.idx) (Container.elems nl)
656 m_name = maximum . map (length . Node.name) $ snl
657 helper = Node.list m_name
659 "%2s %-*s %5s %5s %5s %5s %5s %5s %5s %5s %3s %3s %7s %7s"
661 "t_mem" "n_mem" "i_mem" "x_mem" "f_mem" "r_mem"
663 "pri" "sec" "p_fmem" "p_fdsk"
664 in unlines $ (header:map helper snl)
666 -- | Compute the mem and disk covariance.
667 compDetailedCV :: Node.List -> (Double, Double, Double, Double, Double)
670 all_nodes = Container.elems nl
671 (offline, nodes) = partition Node.offline all_nodes
672 mem_l = map Node.p_mem nodes
673 dsk_l = map Node.p_dsk nodes
674 mem_cv = varianceCoeff mem_l
675 dsk_cv = varianceCoeff dsk_l
676 n1_l = length $ filter Node.failN1 nodes
677 n1_score = (fromIntegral n1_l) / (fromIntegral $ length nodes)
678 res_l = map Node.p_rem nodes
679 res_cv = varianceCoeff res_l
680 offline_inst = sum . map (\n -> (length . Node.plist $ n) +
681 (length . Node.slist $ n)) $ offline
682 online_inst = sum . map (\n -> (length . Node.plist $ n) +
683 (length . Node.slist $ n)) $ nodes
684 off_score = (fromIntegral offline_inst) /
685 (fromIntegral $ online_inst + offline_inst)
686 in (mem_cv, dsk_cv, n1_score, res_cv, off_score)
688 -- | Compute the 'total' variance.
689 compCV :: Node.List -> Double
691 let (mem_cv, dsk_cv, n1_score, res_cv, off_score) = compDetailedCV nl
692 in mem_cv + dsk_cv + n1_score + res_cv + off_score
694 printStats :: Node.List -> String
696 let (mem_cv, dsk_cv, n1_score, res_cv, off_score) = compDetailedCV nl
697 in printf "f_mem=%.8f, r_mem=%.8f, f_dsk=%.8f, n1=%.3f, uf=%.3f"
698 mem_cv res_cv dsk_cv n1_score off_score