1 {-| Implementation of cluster-wide logic.
3 This module holds all pure cluster-logic; I\/O related functionality
4 goes into the "Main" module for the individual binaries.
8 module Ganeti.HTools.Cluster
18 -- * Generic functions
20 -- * First phase functions
22 -- * Second phase functions
29 -- * Balacing functions
34 -- * IAllocator functions
40 import Data.Maybe (isNothing, fromJust)
41 import Text.Printf (printf)
45 import qualified Ganeti.HTools.Container as Container
46 import qualified Ganeti.HTools.Instance as Instance
47 import qualified Ganeti.HTools.Node as Node
48 import Ganeti.HTools.Types
49 import Ganeti.HTools.Utils
51 -- | A separate name for the cluster score type
54 -- | The description of an instance placement.
55 type Placement = (Idx, Ndx, Ndx, Score)
57 {- | A cluster solution described as the solution delta and the list
61 data Solution = Solution Int [Placement]
62 deriving (Eq, Ord, Show)
64 -- | Returns the delta of a solution or -1 for Nothing
65 solutionDelta :: Maybe Solution -> Int
66 solutionDelta sol = case sol of
67 Just (Solution d _) -> d
71 data Removal = Removal Node.List [Instance.Instance]
73 -- | An instance move definition
74 data IMove = Failover -- ^ Failover the instance (f)
75 | ReplacePrimary Ndx -- ^ Replace primary (f, r:np, f)
76 | ReplaceSecondary Ndx -- ^ Replace secondary (r:ns)
77 | ReplaceAndFailover Ndx -- ^ Replace secondary, failover (r:np, f)
78 | FailoverAndReplace Ndx -- ^ Failover, replace secondary (f, r:ns)
81 -- | The complete state for the balancing solution
82 data Table = Table Node.List Instance.List Score [Placement]
87 -- | Cap the removal list if needed.
88 capRemovals :: [a] -> Int -> [a]
89 capRemovals removals max_removals =
90 if max_removals > 0 then
91 take max_removals removals
95 -- | Check if the given node list fails the N+1 check.
96 verifyN1Check :: [Node.Node] -> Bool
97 verifyN1Check nl = any Node.failN1 nl
99 -- | Verifies the N+1 status and return the affected nodes.
100 verifyN1 :: [Node.Node] -> [Node.Node]
101 verifyN1 nl = filter Node.failN1 nl
103 {-| Add an instance and return the new node and instance maps. -}
104 addInstance :: Node.List -> Instance.Instance ->
105 Node.Node -> Node.Node -> Maybe Node.List
106 addInstance nl idata pri sec =
107 let pdx = Node.idx pri
110 pnode <- Node.addPri pri idata
111 snode <- Node.addSec sec idata pdx
112 new_nl <- return $ Container.addTwo sdx snode
116 -- | Remove an instance and return the new node and instance maps.
117 removeInstance :: Node.List -> Instance.Instance -> Node.List
118 removeInstance nl idata =
119 let pnode = Instance.pnode idata
120 snode = Instance.snode idata
121 pn = Container.find pnode nl
122 sn = Container.find snode nl
123 new_nl = Container.addTwo
124 pnode (Node.removePri pn idata)
125 snode (Node.removeSec sn idata) nl in
128 -- | Remove an instance and return the new node map.
129 removeInstances :: Node.List -> [Instance.Instance] -> Node.List
130 removeInstances = foldl' removeInstance
132 -- | Compute the total free disk and memory in the cluster.
133 totalResources :: Container.Container Node.Node -> (Int, Int)
136 (\ (mem, dsk) node -> (mem + (Node.f_mem node),
137 dsk + (Node.f_dsk node)))
138 (0, 0) (Container.elems nl)
140 {- | Compute a new version of a cluster given a solution.
142 This is not used for computing the solutions, but for applying a
143 (known-good) solution to the original cluster for final display.
145 It first removes the relocated instances after which it places them on
149 applySolution :: Node.List -> Instance.List -> [Placement] -> Node.List
150 applySolution nl il sol =
151 let odxes = map (\ (a, b, c, _) -> (Container.find a il,
152 Node.idx (Container.find b nl),
153 Node.idx (Container.find c nl))
155 idxes = (\ (x, _, _) -> x) (unzip3 odxes)
156 nc = removeInstances nl idxes
158 foldl' (\ nz (a, b, c) ->
159 let new_p = Container.find b nz
160 new_s = Container.find c nz in
161 fromJust (addInstance nz a new_p new_s)
165 -- First phase functions
167 {- | Given a list 1,2,3..n build a list of pairs [(1, [2..n]), (2,
171 genParts :: [a] -> Int -> [(a, [a])]
176 if length l < count then
179 (x, xs) : (genParts xs count)
181 -- | Generates combinations of count items from the names list.
182 genNames :: Int -> [b] -> [[b]]
183 genNames count1 names1 =
184 let aux_fn count names current =
189 (\ (x, xs) -> aux_fn (count - 1) xs (x:current))
190 (genParts names count)
192 aux_fn count1 names1 []
194 {- | Computes the pair of bad nodes and instances.
196 The bad node list is computed via a simple 'verifyN1' check, and the
197 bad instance list is the list of primary and secondary instances of
201 computeBadItems :: Node.List -> Instance.List ->
202 ([Node.Node], [Instance.Instance])
203 computeBadItems nl il =
204 let bad_nodes = verifyN1 $ filter (not . Node.offline) $ Container.elems nl
205 bad_instances = map (\idx -> Container.find idx il) $
206 sort $ nub $ concat $
207 map (\ n -> (Node.slist n) ++ (Node.plist n)) bad_nodes
209 (bad_nodes, bad_instances)
212 {- | Checks if removal of instances results in N+1 pass.
214 Note: the check removal cannot optimize by scanning only the affected
215 nodes, since the cluster is known to be not healthy; only the check
216 placement can make this shortcut.
219 checkRemoval :: Node.List -> [Instance.Instance] -> Maybe Removal
220 checkRemoval nl victims =
221 let nx = removeInstances nl victims
222 failN1 = verifyN1Check (Container.elems nx)
227 Just $ Removal nx victims
230 -- | Computes the removals list for a given depth
231 computeRemovals :: Node.List
232 -> [Instance.Instance]
235 computeRemovals nl bad_instances depth =
236 map (checkRemoval nl) $ genNames depth bad_instances
238 -- Second phase functions
240 -- | Single-node relocation cost
241 nodeDelta :: Ndx -> Ndx -> Ndx -> Int
243 if i == p || i == s then
248 {-| Compute best solution.
250 This function compares two solutions, choosing the minimum valid
253 compareSolutions :: Maybe Solution -> Maybe Solution -> Maybe Solution
254 compareSolutions a b = case (a, b) of
259 -- | Compute best table. Note that the ordering of the arguments is important.
260 compareTables :: Table -> Table -> Table
261 compareTables a@(Table _ _ a_cv _) b@(Table _ _ b_cv _ ) =
262 if a_cv > b_cv then b else a
264 -- | Check if a given delta is worse then an existing solution.
265 tooHighDelta :: Maybe Solution -> Int -> Int -> Bool
266 tooHighDelta sol new_delta max_delta =
267 if new_delta > max_delta && max_delta >=0 then
272 Just (Solution old_delta _) -> old_delta <= new_delta
274 {-| Check if placement of instances still keeps the cluster N+1 compliant.
276 This is the workhorse of the allocation algorithm: given the
277 current node and instance maps, the list of instances to be
278 placed, and the current solution, this will return all possible
279 solution by recursing until all target instances are placed.
282 checkPlacement :: Node.List -- ^ The current node list
283 -> [Instance.Instance] -- ^ List of instances still to place
284 -> [Placement] -- ^ Partial solution until now
285 -> Int -- ^ The delta of the partial solution
286 -> Maybe Solution -- ^ The previous solution
287 -> Int -- ^ Abort if the we go above this delta
288 -> Maybe Solution -- ^ The new solution
289 checkPlacement nl victims current current_delta prev_sol max_delta =
290 let target = head victims
291 opdx = Instance.pnode target
292 osdx = Instance.snode target
294 have_tail = (length vtail) > 0
295 nodes = Container.elems nl
296 iidx = Instance.idx target
301 pri_idx = Node.idx pri
302 upri_delta = current_delta + nodeDelta pri_idx opdx osdx
303 new_pri = Node.addPri pri target
304 fail_delta1 = tooHighDelta accu_p upri_delta max_delta
306 if fail_delta1 || isNothing(new_pri) then accu_p
307 else let pri_nl = Container.add pri_idx (fromJust new_pri) nl in
311 sec_idx = Node.idx sec
312 upd_delta = upri_delta +
313 nodeDelta sec_idx opdx osdx
314 fail_delta2 = tooHighDelta accu upd_delta max_delta
315 new_sec = Node.addSec sec target pri_idx
317 if sec_idx == pri_idx || fail_delta2 ||
318 isNothing new_sec then accu
320 nx = Container.add sec_idx (fromJust new_sec) pri_nl
322 plc = (iidx, pri_idx, sec_idx, upd_cv)
326 checkPlacement nx vtail c2 upd_delta
329 Just (Solution upd_delta c2)
330 in compareSolutions accu result
335 applyMove :: Node.List -> Instance.Instance
336 -> IMove -> (Maybe Node.List, Instance.Instance, Ndx, Ndx)
338 applyMove nl inst Failover =
339 let old_pdx = Instance.pnode inst
340 old_sdx = Instance.snode inst
341 old_p = Container.find old_pdx nl
342 old_s = Container.find old_sdx nl
343 int_p = Node.removePri old_p inst
344 int_s = Node.removeSec old_s inst
345 new_nl = do -- Maybe monad
346 new_p <- Node.addPri int_s inst
347 new_s <- Node.addSec int_p inst old_sdx
348 return $ Container.addTwo old_pdx new_s old_sdx new_p nl
349 in (new_nl, Instance.setBoth inst old_sdx old_pdx, old_sdx, old_pdx)
351 -- Replace the primary (f:, r:np, f)
352 applyMove nl inst (ReplacePrimary new_pdx) =
353 let old_pdx = Instance.pnode inst
354 old_sdx = Instance.snode inst
355 old_p = Container.find old_pdx nl
356 old_s = Container.find old_sdx nl
357 tgt_n = Container.find new_pdx nl
358 int_p = Node.removePri old_p inst
359 int_s = Node.removeSec old_s inst
360 new_nl = do -- Maybe monad
361 new_p <- Node.addPri tgt_n inst
362 new_s <- Node.addSec int_s inst new_pdx
363 return $ Container.add new_pdx new_p $
364 Container.addTwo old_pdx int_p old_sdx new_s nl
365 in (new_nl, Instance.setPri inst new_pdx, new_pdx, old_sdx)
367 -- Replace the secondary (r:ns)
368 applyMove nl inst (ReplaceSecondary new_sdx) =
369 let old_pdx = Instance.pnode inst
370 old_sdx = Instance.snode inst
371 old_s = Container.find old_sdx nl
372 tgt_n = Container.find new_sdx nl
373 int_s = Node.removeSec old_s inst
374 new_nl = Node.addSec tgt_n inst old_pdx >>=
375 \new_s -> return $ Container.addTwo new_sdx
376 new_s old_sdx int_s nl
377 in (new_nl, Instance.setSec inst new_sdx, old_pdx, new_sdx)
379 -- Replace the secondary and failover (r:np, f)
380 applyMove nl inst (ReplaceAndFailover new_pdx) =
381 let old_pdx = Instance.pnode inst
382 old_sdx = Instance.snode inst
383 old_p = Container.find old_pdx nl
384 old_s = Container.find old_sdx nl
385 tgt_n = Container.find new_pdx nl
386 int_p = Node.removePri old_p inst
387 int_s = Node.removeSec old_s inst
388 new_nl = do -- Maybe monad
389 new_p <- Node.addPri tgt_n inst
390 new_s <- Node.addSec int_p inst new_pdx
391 return $ Container.add new_pdx new_p $
392 Container.addTwo old_pdx new_s old_sdx int_s nl
393 in (new_nl, Instance.setBoth inst new_pdx old_pdx, new_pdx, old_pdx)
395 -- Failver and replace the secondary (f, r:ns)
396 applyMove nl inst (FailoverAndReplace new_sdx) =
397 let old_pdx = Instance.pnode inst
398 old_sdx = Instance.snode inst
399 old_p = Container.find old_pdx nl
400 old_s = Container.find old_sdx nl
401 tgt_n = Container.find new_sdx nl
402 int_p = Node.removePri old_p inst
403 int_s = Node.removeSec old_s inst
404 new_nl = do -- Maybe monad
405 new_p <- Node.addPri int_s inst
406 new_s <- Node.addSec tgt_n inst old_sdx
407 return $ Container.add new_sdx new_s $
408 Container.addTwo old_sdx new_p old_pdx int_p nl
409 in (new_nl, Instance.setBoth inst old_sdx new_sdx, old_sdx, new_sdx)
411 allocateOnSingle :: Node.List -> Instance.Instance -> Node.Node
412 -> (Maybe Node.List, Instance.Instance)
413 allocateOnSingle nl inst p =
414 let new_pdx = Node.idx p
415 new_nl = Node.addPri p inst >>= \new_p ->
416 return $ Container.add new_pdx new_p nl
417 in (new_nl, Instance.setBoth inst new_pdx Node.noSecondary)
419 allocateOnPair :: Node.List -> Instance.Instance -> Node.Node -> Node.Node
420 -> (Maybe Node.List, Instance.Instance)
421 allocateOnPair nl inst tgt_p tgt_s =
422 let new_pdx = Node.idx tgt_p
423 new_sdx = Node.idx tgt_s
424 new_nl = do -- Maybe monad
425 new_p <- Node.addPri tgt_p inst
426 new_s <- Node.addSec tgt_s inst new_pdx
427 return $ Container.addTwo new_pdx new_p new_sdx new_s nl
428 in (new_nl, Instance.setBoth inst new_pdx new_sdx)
430 checkSingleStep :: Table -- ^ The original table
431 -> Instance.Instance -- ^ The instance to move
432 -> Table -- ^ The current best table
433 -> IMove -- ^ The move to apply
434 -> Table -- ^ The final best table
435 checkSingleStep ini_tbl target cur_tbl move =
437 Table ini_nl ini_il _ ini_plc = ini_tbl
438 (tmp_nl, new_inst, pri_idx, sec_idx) = applyMove ini_nl target move
440 if isNothing tmp_nl then cur_tbl
442 let tgt_idx = Instance.idx target
443 upd_nl = fromJust tmp_nl
444 upd_cvar = compCV upd_nl
445 upd_il = Container.add tgt_idx new_inst ini_il
446 upd_plc = (tgt_idx, pri_idx, sec_idx, upd_cvar):ini_plc
447 upd_tbl = Table upd_nl upd_il upd_cvar upd_plc
449 compareTables cur_tbl upd_tbl
451 -- | Given the status of the current secondary as a valid new node
452 -- and the current candidate target node,
453 -- generate the possible moves for a instance.
454 possibleMoves :: Bool -> Ndx -> [IMove]
455 possibleMoves True tdx =
456 [ReplaceSecondary tdx,
457 ReplaceAndFailover tdx,
459 FailoverAndReplace tdx]
461 possibleMoves False tdx =
462 [ReplaceSecondary tdx,
463 ReplaceAndFailover tdx]
465 -- | Compute the best move for a given instance.
466 checkInstanceMove :: [Ndx] -- Allowed target node indices
467 -> Table -- Original table
468 -> Instance.Instance -- Instance to move
469 -> Table -- Best new table for this instance
470 checkInstanceMove nodes_idx ini_tbl target =
472 opdx = Instance.pnode target
473 osdx = Instance.snode target
474 nodes = filter (\idx -> idx /= opdx && idx /= osdx) nodes_idx
475 use_secondary = elem osdx nodes_idx
476 aft_failover = if use_secondary -- if allowed to failover
477 then checkSingleStep ini_tbl target ini_tbl Failover
479 all_moves = concatMap (possibleMoves use_secondary) nodes
481 -- iterate over the possible nodes for this instance
482 foldl' (checkSingleStep ini_tbl target) aft_failover all_moves
484 -- | Compute the best next move.
485 checkMove :: [Ndx] -- ^ Allowed target node indices
486 -> Table -- ^ The current solution
487 -> [Instance.Instance] -- ^ List of instances still to move
488 -> Table -- ^ The new solution
489 checkMove nodes_idx ini_tbl victims =
490 let Table _ _ _ ini_plc = ini_tbl
491 -- iterate over all instances, computing the best move
495 if Instance.snode elem == Node.noSecondary then step_tbl
496 else compareTables step_tbl $
497 checkInstanceMove nodes_idx ini_tbl elem)
499 Table _ _ _ best_plc = best_tbl
501 if length best_plc == length ini_plc then -- no advancement
506 {- | Auxiliary function for solution computation.
508 We write this in an explicit recursive fashion in order to control
509 early-abort in case we have met the min delta. We can't use foldr
510 instead of explicit recursion since we need the accumulator for the
514 advanceSolution :: [Maybe Removal] -- ^ The removal to process
515 -> Int -- ^ Minimum delta parameter
516 -> Int -- ^ Maximum delta parameter
517 -> Maybe Solution -- ^ Current best solution
518 -> Maybe Solution -- ^ New best solution
519 advanceSolution [] _ _ sol = sol
520 advanceSolution (Nothing:xs) m n sol = advanceSolution xs m n sol
521 advanceSolution ((Just (Removal nx removed)):xs) min_d max_d prev_sol =
522 let new_sol = checkPlacement nx removed [] 0 prev_sol max_d
523 new_delta = solutionDelta $! new_sol
525 if new_delta >= 0 && new_delta <= min_d then
528 advanceSolution xs min_d max_d new_sol
530 -- | Computes the placement solution.
531 solutionFromRemovals :: [Maybe Removal] -- ^ The list of (possible) removals
532 -> Int -- ^ Minimum delta parameter
533 -> Int -- ^ Maximum delta parameter
534 -> Maybe Solution -- ^ The best solution found
535 solutionFromRemovals removals min_delta max_delta =
536 advanceSolution removals min_delta max_delta Nothing
538 {- | Computes the solution at the given depth.
540 This is a wrapper over both computeRemovals and
541 solutionFromRemovals. In case we have no solution, we return Nothing.
544 computeSolution :: Node.List -- ^ The original node data
545 -> [Instance.Instance] -- ^ The list of /bad/ instances
546 -> Int -- ^ The /depth/ of removals
547 -> Int -- ^ Maximum number of removals to process
548 -> Int -- ^ Minimum delta parameter
549 -> Int -- ^ Maximum delta parameter
550 -> Maybe Solution -- ^ The best solution found (or Nothing)
551 computeSolution nl bad_instances depth max_removals min_delta max_delta =
553 removals = computeRemovals nl bad_instances depth
554 removals' = capRemovals removals max_removals
556 solutionFromRemovals removals' min_delta max_delta
558 -- Solution display functions (pure)
560 -- | Given the original and final nodes, computes the relocation description.
561 computeMoves :: String -- ^ The instance name
562 -> String -- ^ Original primary
563 -> String -- ^ Original secondary
564 -> String -- ^ New primary
565 -> String -- ^ New secondary
566 -> (String, [String])
567 -- ^ Tuple of moves and commands list; moves is containing
568 -- either @/f/@ for failover or @/r:name/@ for replace
569 -- secondary, while the command list holds gnt-instance
570 -- commands (without that prefix), e.g \"@failover instance1@\"
571 computeMoves i a b c d =
572 if c == a then {- Same primary -}
573 if d == b then {- Same sec??! -}
575 else {- Change of secondary -}
577 [printf "replace-disks -n %s %s" d i])
579 if c == b then {- Failover and ... -}
580 if d == a then {- that's all -}
581 ("f", [printf "migrate -f %s" i])
584 [printf "migrate -f %s" i,
585 printf "replace-disks -n %s %s" d i])
587 if d == a then {- ... and keep primary as secondary -}
589 [printf "replace-disks -n %s %s" c i,
590 printf "migrate -f %s" i])
592 if d == b then {- ... keep same secondary -}
593 (printf "f r:%s f" c,
594 [printf "migrate -f %s" i,
595 printf "replace-disks -n %s %s" c i,
596 printf "migrate -f %s" i])
598 else {- Nothing in common -}
599 (printf "r:%s f r:%s" c d,
600 [printf "replace-disks -n %s %s" c i,
601 printf "migrate -f %s" i,
602 printf "replace-disks -n %s %s" d i])
604 {-| Converts a placement to string format -}
605 printSolutionLine :: Node.List
611 -> (String, [String])
612 printSolutionLine nl il nmlen imlen plc pos =
614 pmlen = (2*nmlen + 1)
616 inst = Container.find i il
617 inam = Instance.name inst
618 npri = Container.nameOf nl p
619 nsec = Container.nameOf nl s
620 opri = Container.nameOf nl $ Instance.pnode inst
621 osec = Container.nameOf nl $ Instance.snode inst
622 (moves, cmds) = computeMoves inam opri osec npri nsec
623 ostr = (printf "%s:%s" opri osec)::String
624 nstr = (printf "%s:%s" npri nsec)::String
626 (printf " %3d. %-*s %-*s => %-*s %.8f a=%s"
627 pos imlen inam pmlen ostr
631 formatCmds :: [[String]] -> String
632 formatCmds cmd_strs =
634 concat $ map (\(a, b) ->
635 (printf "echo step %d" (a::Int)):
637 (map ("gnt-instance " ++) b)) $
640 {-| Converts a solution to string format -}
641 printSolution :: Node.List
644 -> ([String], [[String]])
645 printSolution nl il sol =
647 nmlen = Container.maxNameLen nl
648 imlen = Container.maxNameLen il
650 unzip $ map (uncurry $ printSolutionLine nl il nmlen imlen) $
653 -- | Print the node list.
654 printNodes :: Node.List -> String
656 let snl = sortBy (compare `on` Node.idx) (Container.elems nl)
657 m_name = maximum . map (length . Node.name) $ snl
658 helper = Node.list m_name
660 "%2s %-*s %5s %5s %5s %5s %5s %5s %5s %5s %3s %3s %7s %7s"
662 "t_mem" "n_mem" "i_mem" "x_mem" "f_mem" "r_mem"
664 "pri" "sec" "p_fmem" "p_fdsk"
665 in unlines $ (header:map helper snl)
667 -- | Compute the mem and disk covariance.
668 compDetailedCV :: Node.List -> (Double, Double, Double, Double, Double)
671 all_nodes = Container.elems nl
672 (offline, nodes) = partition Node.offline all_nodes
673 mem_l = map Node.p_mem nodes
674 dsk_l = map Node.p_dsk nodes
675 mem_cv = varianceCoeff mem_l
676 dsk_cv = varianceCoeff dsk_l
677 n1_l = length $ filter Node.failN1 nodes
678 n1_score = (fromIntegral n1_l) / (fromIntegral $ length nodes)
679 res_l = map Node.p_rem nodes
680 res_cv = varianceCoeff res_l
681 offline_inst = sum . map (\n -> (length . Node.plist $ n) +
682 (length . Node.slist $ n)) $ offline
683 online_inst = sum . map (\n -> (length . Node.plist $ n) +
684 (length . Node.slist $ n)) $ nodes
685 off_score = (fromIntegral offline_inst) /
686 (fromIntegral $ online_inst + offline_inst)
687 in (mem_cv, dsk_cv, n1_score, res_cv, off_score)
689 -- | Compute the 'total' variance.
690 compCV :: Node.List -> Double
692 let (mem_cv, dsk_cv, n1_score, res_cv, off_score) = compDetailedCV nl
693 in mem_cv + dsk_cv + n1_score + res_cv + off_score
695 printStats :: Node.List -> String
697 let (mem_cv, dsk_cv, n1_score, res_cv, off_score) = compDetailedCV nl
698 in printf "f_mem=%.8f, r_mem=%.8f, f_dsk=%.8f, n1=%.3f, uf=%.3f"
699 mem_cv res_cv dsk_cv n1_score off_score