1 {-| Implementation of cluster-wide logic.
3 This module holds all pure cluster-logic; I\/O related functionality
4 goes into the "Main" module for the individual binaries.
8 module Ganeti.HTools.Cluster
20 -- * Generic functions
22 -- * First phase functions
24 -- * Second phase functions
31 -- * Balacing functions
36 -- * IAllocator functions
41 import Data.Maybe (isNothing, fromJust)
42 import Text.Printf (printf)
46 import qualified Ganeti.HTools.Container as Container
47 import qualified Ganeti.HTools.Instance as Instance
48 import qualified Ganeti.HTools.Node as Node
49 import Ganeti.HTools.Types
50 import Ganeti.HTools.Utils
52 -- | A separate name for the cluster score type
55 -- | The description of an instance placement.
56 type Placement = (Int, Int, Int, Score)
58 {- | A cluster solution described as the solution delta and the list
62 data Solution = Solution Int [Placement]
63 deriving (Eq, Ord, Show)
65 -- | Returns the delta of a solution or -1 for Nothing
66 solutionDelta :: Maybe Solution -> Int
67 solutionDelta sol = case sol of
68 Just (Solution d _) -> d
72 data Removal = Removal NodeList [Instance.Instance]
74 -- | An instance move definition
75 data IMove = Failover -- ^ Failover the instance (f)
76 | ReplacePrimary Int -- ^ Replace primary (f, r:np, f)
77 | ReplaceSecondary Int -- ^ Replace secondary (r:ns)
78 | ReplaceAndFailover Int -- ^ Replace secondary, failover (r:np, f)
79 | FailoverAndReplace Int -- ^ Failover, replace secondary (f, r:ns)
82 -- | The complete state for the balancing solution
83 data Table = Table NodeList InstanceList Score [Placement]
88 -- | Cap the removal list if needed.
89 capRemovals :: [a] -> Int -> [a]
90 capRemovals removals max_removals =
91 if max_removals > 0 then
92 take max_removals removals
96 -- | Check if the given node list fails the N+1 check.
97 verifyN1Check :: [Node.Node] -> Bool
98 verifyN1Check nl = any Node.failN1 nl
100 -- | Verifies the N+1 status and return the affected nodes.
101 verifyN1 :: [Node.Node] -> [Node.Node]
102 verifyN1 nl = filter Node.failN1 nl
104 {-| Add an instance and return the new node and instance maps. -}
105 addInstance :: NodeList -> Instance.Instance ->
106 Node.Node -> Node.Node -> Maybe NodeList
107 addInstance nl idata pri sec =
108 let pdx = Node.idx pri
111 pnode <- Node.addPri pri idata
112 snode <- Node.addSec sec idata pdx
113 new_nl <- return $ Container.addTwo sdx snode
117 -- | Remove an instance and return the new node and instance maps.
118 removeInstance :: NodeList -> Instance.Instance -> NodeList
119 removeInstance nl idata =
120 let pnode = Instance.pnode idata
121 snode = Instance.snode idata
122 pn = Container.find pnode nl
123 sn = Container.find snode nl
124 new_nl = Container.addTwo
125 pnode (Node.removePri pn idata)
126 snode (Node.removeSec sn idata) nl in
129 -- | Remove an instance and return the new node map.
130 removeInstances :: NodeList -> [Instance.Instance] -> NodeList
131 removeInstances = foldl' removeInstance
133 -- | Compute the total free disk and memory in the cluster.
134 totalResources :: Container.Container Node.Node -> (Int, Int)
137 (\ (mem, dsk) node -> (mem + (Node.f_mem node),
138 dsk + (Node.f_dsk node)))
139 (0, 0) (Container.elems nl)
141 {- | Compute a new version of a cluster given a solution.
143 This is not used for computing the solutions, but for applying a
144 (known-good) solution to the original cluster for final display.
146 It first removes the relocated instances after which it places them on
150 applySolution :: NodeList -> InstanceList -> [Placement] -> NodeList
151 applySolution nl il sol =
152 let odxes = map (\ (a, b, c, _) -> (Container.find a il,
153 Node.idx (Container.find b nl),
154 Node.idx (Container.find c nl))
156 idxes = (\ (x, _, _) -> x) (unzip3 odxes)
157 nc = removeInstances nl idxes
159 foldl' (\ nz (a, b, c) ->
160 let new_p = Container.find b nz
161 new_s = Container.find c nz in
162 fromJust (addInstance nz a new_p new_s)
166 -- First phase functions
168 {- | Given a list 1,2,3..n build a list of pairs [(1, [2..n]), (2,
172 genParts :: [a] -> Int -> [(a, [a])]
177 if length l < count then
180 (x, xs) : (genParts xs count)
182 -- | Generates combinations of count items from the names list.
183 genNames :: Int -> [b] -> [[b]]
184 genNames count1 names1 =
185 let aux_fn count names current =
190 (\ (x, xs) -> aux_fn (count - 1) xs (x:current))
191 (genParts names count)
193 aux_fn count1 names1 []
195 {- | Computes the pair of bad nodes and instances.
197 The bad node list is computed via a simple 'verifyN1' check, and the
198 bad instance list is the list of primary and secondary instances of
202 computeBadItems :: NodeList -> InstanceList ->
203 ([Node.Node], [Instance.Instance])
204 computeBadItems nl il =
205 let bad_nodes = verifyN1 $ filter (not . Node.offline) $ Container.elems nl
206 bad_instances = map (\idx -> Container.find idx il) $
207 sort $ nub $ concat $
208 map (\ n -> (Node.slist n) ++ (Node.plist n)) bad_nodes
210 (bad_nodes, bad_instances)
213 {- | Checks if removal of instances results in N+1 pass.
215 Note: the check removal cannot optimize by scanning only the affected
216 nodes, since the cluster is known to be not healthy; only the check
217 placement can make this shortcut.
220 checkRemoval :: NodeList -> [Instance.Instance] -> Maybe Removal
221 checkRemoval nl victims =
222 let nx = removeInstances nl victims
223 failN1 = verifyN1Check (Container.elems nx)
228 Just $ Removal nx victims
231 -- | Computes the removals list for a given depth
232 computeRemovals :: NodeList
233 -> [Instance.Instance]
236 computeRemovals nl bad_instances depth =
237 map (checkRemoval nl) $ genNames depth bad_instances
239 -- Second phase functions
241 -- | Single-node relocation cost
242 nodeDelta :: Int -> Int -> Int -> Int
244 if i == p || i == s then
249 {-| Compute best solution.
251 This function compares two solutions, choosing the minimum valid
254 compareSolutions :: Maybe Solution -> Maybe Solution -> Maybe Solution
255 compareSolutions a b = case (a, b) of
260 -- | Compute best table. Note that the ordering of the arguments is important.
261 compareTables :: Table -> Table -> Table
262 compareTables a@(Table _ _ a_cv _) b@(Table _ _ b_cv _ ) =
263 if a_cv > b_cv then b else a
265 -- | Check if a given delta is worse then an existing solution.
266 tooHighDelta :: Maybe Solution -> Int -> Int -> Bool
267 tooHighDelta sol new_delta max_delta =
268 if new_delta > max_delta && max_delta >=0 then
273 Just (Solution old_delta _) -> old_delta <= new_delta
275 {-| Check if placement of instances still keeps the cluster N+1 compliant.
277 This is the workhorse of the allocation algorithm: given the
278 current node and instance maps, the list of instances to be
279 placed, and the current solution, this will return all possible
280 solution by recursing until all target instances are placed.
283 checkPlacement :: NodeList -- ^ The current node list
284 -> [Instance.Instance] -- ^ List of instances still to place
285 -> [Placement] -- ^ Partial solution until now
286 -> Int -- ^ The delta of the partial solution
287 -> Maybe Solution -- ^ The previous solution
288 -> Int -- ^ Abort if the we go above this delta
289 -> Maybe Solution -- ^ The new solution
290 checkPlacement nl victims current current_delta prev_sol max_delta =
291 let target = head victims
292 opdx = Instance.pnode target
293 osdx = Instance.snode target
295 have_tail = (length vtail) > 0
296 nodes = Container.elems nl
297 iidx = Instance.idx target
302 pri_idx = Node.idx pri
303 upri_delta = current_delta + nodeDelta pri_idx opdx osdx
304 new_pri = Node.addPri pri target
305 fail_delta1 = tooHighDelta accu_p upri_delta max_delta
307 if fail_delta1 || isNothing(new_pri) then accu_p
308 else let pri_nl = Container.add pri_idx (fromJust new_pri) nl in
312 sec_idx = Node.idx sec
313 upd_delta = upri_delta +
314 nodeDelta sec_idx opdx osdx
315 fail_delta2 = tooHighDelta accu upd_delta max_delta
316 new_sec = Node.addSec sec target pri_idx
318 if sec_idx == pri_idx || fail_delta2 ||
319 isNothing new_sec then accu
321 nx = Container.add sec_idx (fromJust new_sec) pri_nl
323 plc = (iidx, pri_idx, sec_idx, upd_cv)
327 checkPlacement nx vtail c2 upd_delta
330 Just (Solution upd_delta c2)
331 in compareSolutions accu result
336 applyMove :: NodeList -> Instance.Instance
337 -> IMove -> (Maybe NodeList, Instance.Instance, Int, Int)
339 applyMove nl inst Failover =
340 let old_pdx = Instance.pnode inst
341 old_sdx = Instance.snode inst
342 old_p = Container.find old_pdx nl
343 old_s = Container.find old_sdx nl
344 int_p = Node.removePri old_p inst
345 int_s = Node.removeSec old_s inst
346 new_nl = do -- Maybe monad
347 new_p <- Node.addPri int_s inst
348 new_s <- Node.addSec int_p inst old_sdx
349 return $ Container.addTwo old_pdx new_s old_sdx new_p nl
350 in (new_nl, Instance.setBoth inst old_sdx old_pdx, old_sdx, old_pdx)
352 -- Replace the primary (f:, r:np, f)
353 applyMove nl inst (ReplacePrimary new_pdx) =
354 let old_pdx = Instance.pnode inst
355 old_sdx = Instance.snode inst
356 old_p = Container.find old_pdx nl
357 old_s = Container.find old_sdx nl
358 tgt_n = Container.find new_pdx nl
359 int_p = Node.removePri old_p inst
360 int_s = Node.removeSec old_s inst
361 new_nl = do -- Maybe monad
362 new_p <- Node.addPri tgt_n inst
363 new_s <- Node.addSec int_s inst new_pdx
364 return $ Container.add new_pdx new_p $
365 Container.addTwo old_pdx int_p old_sdx new_s nl
366 in (new_nl, Instance.setPri inst new_pdx, new_pdx, old_sdx)
368 -- Replace the secondary (r:ns)
369 applyMove nl inst (ReplaceSecondary new_sdx) =
370 let old_pdx = Instance.pnode inst
371 old_sdx = Instance.snode inst
372 old_s = Container.find old_sdx nl
373 tgt_n = Container.find new_sdx nl
374 int_s = Node.removeSec old_s inst
375 new_nl = Node.addSec tgt_n inst old_pdx >>=
376 \new_s -> return $ Container.addTwo new_sdx
377 new_s old_sdx int_s nl
378 in (new_nl, Instance.setSec inst new_sdx, old_pdx, new_sdx)
380 -- Replace the secondary and failover (r:np, f)
381 applyMove nl inst (ReplaceAndFailover new_pdx) =
382 let old_pdx = Instance.pnode inst
383 old_sdx = Instance.snode inst
384 old_p = Container.find old_pdx nl
385 old_s = Container.find old_sdx nl
386 tgt_n = Container.find new_pdx nl
387 int_p = Node.removePri old_p inst
388 int_s = Node.removeSec old_s inst
389 new_nl = do -- Maybe monad
390 new_p <- Node.addPri tgt_n inst
391 new_s <- Node.addSec int_p inst new_pdx
392 return $ Container.add new_pdx new_p $
393 Container.addTwo old_pdx new_s old_sdx int_s nl
394 in (new_nl, Instance.setBoth inst new_pdx old_pdx, new_pdx, old_pdx)
396 -- Failver and replace the secondary (f, r:ns)
397 applyMove nl inst (FailoverAndReplace new_sdx) =
398 let old_pdx = Instance.pnode inst
399 old_sdx = Instance.snode inst
400 old_p = Container.find old_pdx nl
401 old_s = Container.find old_sdx nl
402 tgt_n = Container.find new_sdx nl
403 int_p = Node.removePri old_p inst
404 int_s = Node.removeSec old_s inst
405 new_nl = do -- Maybe monad
406 new_p <- Node.addPri int_s inst
407 new_s <- Node.addSec tgt_n inst old_sdx
408 return $ Container.add new_sdx new_s $
409 Container.addTwo old_sdx new_p old_pdx int_p nl
410 in (new_nl, Instance.setBoth inst old_sdx new_sdx, old_sdx, new_sdx)
412 allocateOn nl inst new_pdx new_sdx =
414 tgt_p = Container.find new_pdx nl
415 tgt_s = Container.find new_sdx nl
416 new_nl = do -- Maybe monad
417 new_p <- Node.addPri tgt_p inst
418 new_s <- Node.addSec tgt_s inst new_pdx
419 return $ Container.addTwo new_pdx new_p new_sdx new_s nl
420 in (new_nl, Instance.setBoth inst new_pdx new_sdx)
422 checkSingleStep :: Table -- ^ The original table
423 -> Instance.Instance -- ^ The instance to move
424 -> Table -- ^ The current best table
425 -> IMove -- ^ The move to apply
426 -> Table -- ^ The final best table
427 checkSingleStep ini_tbl target cur_tbl move =
429 Table ini_nl ini_il _ ini_plc = ini_tbl
430 (tmp_nl, new_inst, pri_idx, sec_idx) = applyMove ini_nl target move
432 if isNothing tmp_nl then cur_tbl
434 let tgt_idx = Instance.idx target
435 upd_nl = fromJust tmp_nl
436 upd_cvar = compCV upd_nl
437 upd_il = Container.add tgt_idx new_inst ini_il
438 upd_plc = (tgt_idx, pri_idx, sec_idx, upd_cvar):ini_plc
439 upd_tbl = Table upd_nl upd_il upd_cvar upd_plc
441 compareTables cur_tbl upd_tbl
443 -- | Given the status of the current secondary as a valid new node
444 -- and the current candidate target node,
445 -- generate the possible moves for a instance.
446 possibleMoves :: Bool -> Int -> [IMove]
447 possibleMoves True tdx =
448 [ReplaceSecondary tdx,
449 ReplaceAndFailover tdx,
451 FailoverAndReplace tdx]
453 possibleMoves False tdx =
454 [ReplaceSecondary tdx,
455 ReplaceAndFailover tdx]
457 -- | Compute the best move for a given instance.
458 checkInstanceMove :: [Int] -- Allowed target node indices
459 -> Table -- Original table
460 -> Instance.Instance -- Instance to move
461 -> Table -- Best new table for this instance
462 checkInstanceMove nodes_idx ini_tbl target =
464 opdx = Instance.pnode target
465 osdx = Instance.snode target
466 nodes = filter (\idx -> idx /= opdx && idx /= osdx) nodes_idx
467 use_secondary = elem osdx nodes_idx
468 aft_failover = if use_secondary -- if allowed to failover
469 then checkSingleStep ini_tbl target ini_tbl Failover
471 all_moves = concatMap (possibleMoves use_secondary) nodes
473 -- iterate over the possible nodes for this instance
474 foldl' (checkSingleStep ini_tbl target) aft_failover all_moves
476 -- | Compute the best next move.
477 checkMove :: [Int] -- ^ Allowed target node indices
478 -> Table -- ^ The current solution
479 -> [Instance.Instance] -- ^ List of instances still to move
480 -> Table -- ^ The new solution
481 checkMove nodes_idx ini_tbl victims =
482 let Table _ _ _ ini_plc = ini_tbl
483 -- iterate over all instances, computing the best move
487 if Instance.snode elem == Node.noSecondary then step_tbl
488 else compareTables step_tbl $
489 checkInstanceMove nodes_idx ini_tbl elem)
491 Table _ _ _ best_plc = best_tbl
493 if length best_plc == length ini_plc then -- no advancement
498 {- | Auxiliary function for solution computation.
500 We write this in an explicit recursive fashion in order to control
501 early-abort in case we have met the min delta. We can't use foldr
502 instead of explicit recursion since we need the accumulator for the
506 advanceSolution :: [Maybe Removal] -- ^ The removal to process
507 -> Int -- ^ Minimum delta parameter
508 -> Int -- ^ Maximum delta parameter
509 -> Maybe Solution -- ^ Current best solution
510 -> Maybe Solution -- ^ New best solution
511 advanceSolution [] _ _ sol = sol
512 advanceSolution (Nothing:xs) m n sol = advanceSolution xs m n sol
513 advanceSolution ((Just (Removal nx removed)):xs) min_d max_d prev_sol =
514 let new_sol = checkPlacement nx removed [] 0 prev_sol max_d
515 new_delta = solutionDelta $! new_sol
517 if new_delta >= 0 && new_delta <= min_d then
520 advanceSolution xs min_d max_d new_sol
522 -- | Computes the placement solution.
523 solutionFromRemovals :: [Maybe Removal] -- ^ The list of (possible) removals
524 -> Int -- ^ Minimum delta parameter
525 -> Int -- ^ Maximum delta parameter
526 -> Maybe Solution -- ^ The best solution found
527 solutionFromRemovals removals min_delta max_delta =
528 advanceSolution removals min_delta max_delta Nothing
530 {- | Computes the solution at the given depth.
532 This is a wrapper over both computeRemovals and
533 solutionFromRemovals. In case we have no solution, we return Nothing.
536 computeSolution :: NodeList -- ^ The original node data
537 -> [Instance.Instance] -- ^ The list of /bad/ instances
538 -> Int -- ^ The /depth/ of removals
539 -> Int -- ^ Maximum number of removals to process
540 -> Int -- ^ Minimum delta parameter
541 -> Int -- ^ Maximum delta parameter
542 -> Maybe Solution -- ^ The best solution found (or Nothing)
543 computeSolution nl bad_instances depth max_removals min_delta max_delta =
545 removals = computeRemovals nl bad_instances depth
546 removals' = capRemovals removals max_removals
548 solutionFromRemovals removals' min_delta max_delta
550 -- Solution display functions (pure)
552 -- | Given the original and final nodes, computes the relocation description.
553 computeMoves :: String -- ^ The instance name
554 -> String -- ^ Original primary
555 -> String -- ^ Original secondary
556 -> String -- ^ New primary
557 -> String -- ^ New secondary
558 -> (String, [String])
559 -- ^ Tuple of moves and commands list; moves is containing
560 -- either @/f/@ for failover or @/r:name/@ for replace
561 -- secondary, while the command list holds gnt-instance
562 -- commands (without that prefix), e.g \"@failover instance1@\"
563 computeMoves i a b c d =
564 if c == a then {- Same primary -}
565 if d == b then {- Same sec??! -}
567 else {- Change of secondary -}
569 [printf "replace-disks -n %s %s" d i])
571 if c == b then {- Failover and ... -}
572 if d == a then {- that's all -}
573 ("f", [printf "migrate -f %s" i])
576 [printf "migrate -f %s" i,
577 printf "replace-disks -n %s %s" d i])
579 if d == a then {- ... and keep primary as secondary -}
581 [printf "replace-disks -n %s %s" c i,
582 printf "migrate -f %s" i])
584 if d == b then {- ... keep same secondary -}
585 (printf "f r:%s f" c,
586 [printf "migrate -f %s" i,
587 printf "replace-disks -n %s %s" c i,
588 printf "migrate -f %s" i])
590 else {- Nothing in common -}
591 (printf "r:%s f r:%s" c d,
592 [printf "replace-disks -n %s %s" c i,
593 printf "migrate -f %s" i,
594 printf "replace-disks -n %s %s" d i])
596 {-| Converts a placement to string format -}
597 printSolutionLine :: NodeList
603 -> (String, [String])
604 printSolutionLine nl il nmlen imlen plc pos =
606 pmlen = (2*nmlen + 1)
608 inst = Container.find i il
609 inam = Instance.name inst
612 opri = cNameOf nl $ Instance.pnode inst
613 osec = cNameOf nl $ Instance.snode inst
614 (moves, cmds) = computeMoves inam opri osec npri nsec
615 ostr = (printf "%s:%s" opri osec)::String
616 nstr = (printf "%s:%s" npri nsec)::String
618 (printf " %3d. %-*s %-*s => %-*s %.8f a=%s"
619 pos imlen inam pmlen ostr
623 formatCmds :: [[String]] -> String
624 formatCmds cmd_strs =
626 concat $ map (\(a, b) ->
627 (printf "echo step %d" (a::Int)):
629 (map ("gnt-instance " ++) b)) $
632 {-| Converts a solution to string format -}
633 printSolution :: NodeList
636 -> ([String], [[String]])
637 printSolution nl il sol =
639 nmlen = cMaxNamelen nl
640 imlen = cMaxNamelen il
642 unzip $ map (uncurry $ printSolutionLine nl il nmlen imlen) $
645 -- | Print the node list.
646 printNodes :: NodeList -> String
648 let snl = sortBy (compare `on` Node.idx) (Container.elems nl)
649 m_name = maximum . map (length . Node.name) $ snl
650 helper = Node.list m_name
652 "%2s %-*s %5s %5s %5s %5s %5s %5s %5s %5s %3s %3s %7s %7s"
654 "t_mem" "n_mem" "i_mem" "x_mem" "f_mem" "r_mem"
656 "pri" "sec" "p_fmem" "p_fdsk"
657 in unlines $ (header:map helper snl)
659 -- | Compute the mem and disk covariance.
660 compDetailedCV :: NodeList -> (Double, Double, Double, Double, Double)
663 all_nodes = Container.elems nl
664 (offline, nodes) = partition Node.offline all_nodes
665 mem_l = map Node.p_mem nodes
666 dsk_l = map Node.p_dsk nodes
667 mem_cv = varianceCoeff mem_l
668 dsk_cv = varianceCoeff dsk_l
669 n1_l = length $ filter Node.failN1 nodes
670 n1_score = (fromIntegral n1_l) / (fromIntegral $ length nodes)
671 res_l = map Node.p_rem nodes
672 res_cv = varianceCoeff res_l
673 offline_inst = sum . map (\n -> (length . Node.plist $ n) +
674 (length . Node.slist $ n)) $ offline
675 online_inst = sum . map (\n -> (length . Node.plist $ n) +
676 (length . Node.slist $ n)) $ nodes
677 off_score = (fromIntegral offline_inst) /
678 (fromIntegral $ online_inst + offline_inst)
679 in (mem_cv, dsk_cv, n1_score, res_cv, off_score)
681 -- | Compute the 'total' variance.
682 compCV :: NodeList -> Double
684 let (mem_cv, dsk_cv, n1_score, res_cv, off_score) = compDetailedCV nl
685 in mem_cv + dsk_cv + n1_score + res_cv + off_score
687 printStats :: NodeList -> String
689 let (mem_cv, dsk_cv, n1_score, res_cv, off_score) = compDetailedCV nl
690 in printf "f_mem=%.8f, r_mem=%.8f, f_dsk=%.8f, n1=%.3f, uf=%.3f"
691 mem_cv res_cv dsk_cv n1_score off_score