1 {-| Implementation of cluster-wide logic.
3 This module holds all pure cluster-logic; I\/O related functionality
4 goes into the "Main" module.
17 -- * Generic functions
19 -- * First phase functions
21 -- * Second phase functions
27 -- * Balacing functions
31 -- * Loading functions
36 import Data.Maybe (isNothing, fromJust)
37 import Text.Printf (printf)
40 import qualified Container
41 import qualified Instance
45 type NodeList = Container.Container Node.Node
46 type InstanceList = Container.Container Instance.Instance
49 -- | The description of an instance placement.
50 type Placement = (Int, Int, Int, Score)
52 {- | A cluster solution described as the solution delta and the list
56 data Solution = Solution Int [Placement]
57 deriving (Eq, Ord, Show)
59 -- | Returns the delta of a solution or -1 for Nothing
60 solutionDelta :: Maybe Solution -> Int
61 solutionDelta sol = case sol of
62 Just (Solution d _) -> d
66 data Removal = Removal NodeList [Instance.Instance]
68 -- | An instance move definition
71 | ReplaceSecondary Int
72 | ReplaceAndFailover Int
75 -- | The complete state for the balancing solution
76 data Table = Table NodeList InstanceList Score [Placement]
81 -- | Cap the removal list if needed.
82 capRemovals :: [a] -> Int -> [a]
83 capRemovals removals max_removals =
84 if max_removals > 0 then
85 take max_removals removals
89 -- | Check if the given node list fails the N+1 check.
90 verifyN1Check :: [Node.Node] -> Bool
91 verifyN1Check nl = any Node.failN1 nl
93 -- | Verifies the N+1 status and return the affected nodes.
94 verifyN1 :: [Node.Node] -> [Node.Node]
95 verifyN1 nl = filter Node.failN1 nl
97 {-| Add an instance and return the new node and instance maps. -}
98 addInstance :: NodeList -> Instance.Instance ->
99 Node.Node -> Node.Node -> Maybe NodeList
100 addInstance nl idata pri sec =
101 let pdx = Node.idx pri
104 pnode <- Node.addPri pri idata
105 snode <- Node.addSec sec idata pdx
106 new_nl <- return $ Container.addTwo sdx snode
110 -- | Remove an instance and return the new node and instance maps.
111 removeInstance :: NodeList -> Instance.Instance -> NodeList
112 removeInstance nl idata =
113 let pnode = Instance.pnode idata
114 snode = Instance.snode idata
115 pn = Container.find pnode nl
116 sn = Container.find snode nl
117 new_nl = Container.addTwo
118 pnode (Node.removePri pn idata)
119 snode (Node.removeSec sn idata) nl in
122 -- | Remove an instance and return the new node map.
123 removeInstances :: NodeList -> [Instance.Instance] -> NodeList
124 removeInstances = foldl' removeInstance
126 -- | Compute the total free disk and memory in the cluster.
127 totalResources :: Container.Container Node.Node -> (Int, Int)
130 (\ (mem, dsk) node -> (mem + (Node.f_mem node),
131 dsk + (Node.f_dsk node)))
132 (0, 0) (Container.elems nl)
134 {- | Compute a new version of a cluster given a solution.
136 This is not used for computing the solutions, but for applying a
137 (known-good) solution to the original cluster for final display.
139 It first removes the relocated instances after which it places them on
143 applySolution :: NodeList -> InstanceList -> [Placement] -> NodeList
144 applySolution nl il sol =
145 let odxes = map (\ (a, b, c, _) -> (Container.find a il,
146 Node.idx (Container.find b nl),
147 Node.idx (Container.find c nl))
149 idxes = (\ (x, _, _) -> x) (unzip3 odxes)
150 nc = removeInstances nl idxes
152 foldl' (\ nz (a, b, c) ->
153 let new_p = Container.find b nz
154 new_s = Container.find c nz in
155 fromJust (addInstance nz a new_p new_s)
159 -- First phase functions
161 {- | Given a list 1,2,3..n build a list of pairs [(1, [2..n]), (2,
165 genParts :: [a] -> Int -> [(a, [a])]
170 if length l < count then
173 (x, xs) : (genParts xs count)
175 -- | Generates combinations of count items from the names list.
176 genNames :: Int -> [b] -> [[b]]
177 genNames count1 names1 =
178 let aux_fn count names current =
183 (\ (x, xs) -> aux_fn (count - 1) xs (x:current))
184 (genParts names count)
186 aux_fn count1 names1 []
188 {- | Computes the pair of bad nodes and instances.
190 The bad node list is computed via a simple 'verifyN1' check, and the
191 bad instance list is the list of primary and secondary instances of
195 computeBadItems :: NodeList -> InstanceList ->
196 ([Node.Node], [Instance.Instance])
197 computeBadItems nl il =
198 let bad_nodes = verifyN1 $ Container.elems nl
199 bad_instances = map (\idx -> Container.find idx il) $
200 sort $ nub $ concat $
201 map (\ n -> (Node.slist n) ++ (Node.plist n)) bad_nodes
203 (bad_nodes, bad_instances)
206 {- | Checks if removal of instances results in N+1 pass.
208 Note: the check removal cannot optimize by scanning only the affected
209 nodes, since the cluster is known to be not healthy; only the check
210 placement can make this shortcut.
213 checkRemoval :: NodeList -> [Instance.Instance] -> Maybe Removal
214 checkRemoval nl victims =
215 let nx = removeInstances nl victims
216 failN1 = verifyN1Check (Container.elems nx)
221 Just $ Removal nx victims
224 -- | Computes the removals list for a given depth
225 computeRemovals :: Cluster.NodeList
226 -> [Instance.Instance]
228 -> [Maybe Cluster.Removal]
229 computeRemovals nl bad_instances depth =
230 map (checkRemoval nl) $ genNames depth bad_instances
232 -- Second phase functions
234 -- | Single-node relocation cost
235 nodeDelta :: Int -> Int -> Int -> Int
237 if i == p || i == s then
242 {-| Compute best solution.
244 This function compares two solutions, choosing the minimum valid
247 compareSolutions :: Maybe Solution -> Maybe Solution -> Maybe Solution
248 compareSolutions a b = case (a, b) of
253 -- | Compute best table. Note that the ordering of the arguments is important.
254 compareTables :: Table -> Table -> Table
255 compareTables a@(Table _ _ a_cv _) b@(Table _ _ b_cv _ ) =
256 if a_cv > b_cv then b else a
258 -- | Check if a given delta is worse then an existing solution.
259 tooHighDelta :: Maybe Solution -> Int -> Int -> Bool
260 tooHighDelta sol new_delta max_delta =
261 if new_delta > max_delta && max_delta >=0 then
266 Just (Solution old_delta _) -> old_delta <= new_delta
268 {-| Check if placement of instances still keeps the cluster N+1 compliant.
270 This is the workhorse of the allocation algorithm: given the
271 current node and instance maps, the list of instances to be
272 placed, and the current solution, this will return all possible
273 solution by recursing until all target instances are placed.
276 checkPlacement :: NodeList -- ^ The current node list
277 -> [Instance.Instance] -- ^ List of instances still to place
278 -> [Placement] -- ^ Partial solution until now
279 -> Int -- ^ The delta of the partial solution
280 -> Maybe Solution -- ^ The previous solution
281 -> Int -- ^ Abort if the we go above this delta
282 -> Maybe Solution -- ^ The new solution
283 checkPlacement nl victims current current_delta prev_sol max_delta =
284 let target = head victims
285 opdx = Instance.pnode target
286 osdx = Instance.snode target
288 have_tail = (length vtail) > 0
289 nodes = Container.elems nl
290 iidx = Instance.idx target
295 pri_idx = Node.idx pri
296 upri_delta = current_delta + nodeDelta pri_idx opdx osdx
297 new_pri = Node.addPri pri target
298 fail_delta1 = tooHighDelta accu_p upri_delta max_delta
300 if fail_delta1 || isNothing(new_pri) then accu_p
301 else let pri_nl = Container.add pri_idx (fromJust new_pri) nl in
305 sec_idx = Node.idx sec
306 upd_delta = upri_delta +
307 nodeDelta sec_idx opdx osdx
308 fail_delta2 = tooHighDelta accu upd_delta max_delta
309 new_sec = Node.addSec sec target pri_idx
311 if sec_idx == pri_idx || fail_delta2 ||
312 isNothing new_sec then accu
314 nx = Container.add sec_idx (fromJust new_sec) pri_nl
316 plc = (iidx, pri_idx, sec_idx, upd_cv)
320 checkPlacement nx vtail c2 upd_delta
323 Just (Solution upd_delta c2)
324 in compareSolutions accu result
329 applyMove :: NodeList -> Instance.Instance
330 -> IMove -> (Maybe NodeList, Instance.Instance, Int, Int)
331 applyMove nl inst Failover =
332 let old_pdx = Instance.pnode inst
333 old_sdx = Instance.snode inst
334 old_p = Container.find old_pdx nl
335 old_s = Container.find old_sdx nl
336 int_p = Node.removePri old_p inst
337 int_s = Node.removeSec old_s inst
338 new_p = Node.addPri int_s inst
339 new_s = Node.addSec int_p inst old_sdx
340 new_nl = if isNothing(new_p) || isNothing(new_s) then Nothing
341 else Just $ Container.addTwo old_pdx (fromJust new_s)
342 old_sdx (fromJust new_p) nl
343 in (new_nl, Instance.setBoth inst old_sdx old_pdx, old_sdx, old_pdx)
345 applyMove nl inst (ReplacePrimary new_pdx) =
346 let old_pdx = Instance.pnode inst
347 old_sdx = Instance.snode inst
348 old_p = Container.find old_pdx nl
349 old_s = Container.find old_sdx nl
350 tgt_n = Container.find new_pdx nl
351 int_p = Node.removePri old_p inst
352 int_s = Node.removeSec old_s inst
353 new_p = Node.addPri tgt_n inst
354 new_s = Node.addSec int_s inst new_pdx
355 new_nl = if isNothing(new_p) || isNothing(new_s) then Nothing
356 else Just $ Container.add new_pdx (fromJust new_p) $
357 Container.addTwo old_pdx int_p
358 old_sdx (fromJust new_s) nl
359 in (new_nl, Instance.setPri inst new_pdx, new_pdx, old_sdx)
361 applyMove nl inst (ReplaceSecondary new_sdx) =
362 let old_pdx = Instance.pnode inst
363 old_sdx = Instance.snode inst
364 old_s = Container.find old_sdx nl
365 tgt_n = Container.find new_sdx nl
366 int_s = Node.removeSec old_s inst
367 new_s = Node.addSec tgt_n inst old_pdx
368 new_nl = if isNothing(new_s) then Nothing
369 else Just $ Container.addTwo new_sdx (fromJust new_s)
371 in (new_nl, Instance.setSec inst new_sdx, old_pdx, new_sdx)
373 applyMove nl inst (ReplaceAndFailover new_pdx) =
374 let old_pdx = Instance.pnode inst
375 old_sdx = Instance.snode inst
376 old_p = Container.find old_pdx nl
377 old_s = Container.find old_sdx nl
378 tgt_n = Container.find new_pdx nl
379 int_p = Node.removePri old_p inst
380 int_s = Node.removeSec old_s inst
381 new_p = Node.addPri tgt_n inst
382 new_s = Node.addSec int_p inst new_pdx
383 new_nl = if isNothing(new_p) || isNothing(new_s) then Nothing
384 else Just $ Container.add new_pdx (fromJust new_p) $
385 Container.addTwo old_pdx (fromJust new_s)
387 in (new_nl, Instance.setBoth inst new_pdx old_pdx, new_pdx, old_pdx)
389 checkSingleStep :: Table -- ^ The original table
390 -> Instance.Instance -- ^ The instance to move
391 -> Table -- ^ The current best table
392 -> IMove -- ^ The move to apply
393 -> Table -- ^ The final best table
394 checkSingleStep ini_tbl target cur_tbl move =
396 Table ini_nl ini_il _ ini_plc = ini_tbl
397 (tmp_nl, new_inst, pri_idx, sec_idx) = applyMove ini_nl target move
399 if isNothing tmp_nl then cur_tbl
401 let tgt_idx = Instance.idx target
402 upd_nl = fromJust tmp_nl
403 upd_cvar = compCV upd_nl
404 upd_il = Container.add tgt_idx new_inst ini_il
405 upd_plc = (tgt_idx, pri_idx, sec_idx, upd_cvar):ini_plc
406 upd_tbl = Table upd_nl upd_il upd_cvar upd_plc
408 compareTables cur_tbl upd_tbl
410 checkInstanceMove :: [Int] -- Allowed target node indices
411 -> Table -- Original table
412 -> Instance.Instance -- Instance to move
413 -> Table -- Best new table for this instance
414 checkInstanceMove nodes_idx ini_tbl target =
416 opdx = Instance.pnode target
417 osdx = Instance.snode target
418 nodes = filter (\idx -> idx /= opdx && idx /= osdx) nodes_idx
419 aft_failover = checkSingleStep ini_tbl target ini_tbl Failover
420 all_moves = concatMap (\idx -> [ReplacePrimary idx,
421 ReplaceSecondary idx,
422 ReplaceAndFailover idx]) nodes
424 -- iterate over the possible nodes for this instance
425 foldl' (checkSingleStep ini_tbl target) aft_failover all_moves
427 -- | Compute the best next move.
428 checkMove :: [Int] -- ^ Allowed target node indices
429 -> Table -- ^ The current solution
430 -> [Instance.Instance] -- ^ List of instances still to move
431 -> Table -- ^ The new solution
432 checkMove nodes_idx ini_tbl victims =
433 let Table _ _ _ ini_plc = ini_tbl
434 -- iterate over all instances, computing the best move
437 (\ step_tbl elem -> compareTables step_tbl $
438 checkInstanceMove nodes_idx ini_tbl elem)
440 Table _ _ _ best_plc = best_tbl
442 if length best_plc == length ini_plc then -- no advancement
447 {- | Auxiliary function for solution computation.
449 We write this in an explicit recursive fashion in order to control
450 early-abort in case we have met the min delta. We can't use foldr
451 instead of explicit recursion since we need the accumulator for the
455 advanceSolution :: [Maybe Removal] -- ^ The removal to process
456 -> Int -- ^ Minimum delta parameter
457 -> Int -- ^ Maximum delta parameter
458 -> Maybe Solution -- ^ Current best solution
459 -> Maybe Solution -- ^ New best solution
460 advanceSolution [] _ _ sol = sol
461 advanceSolution (Nothing:xs) m n sol = advanceSolution xs m n sol
462 advanceSolution ((Just (Removal nx removed)):xs) min_d max_d prev_sol =
463 let new_sol = checkPlacement nx removed [] 0 prev_sol max_d
464 new_delta = solutionDelta $! new_sol
466 if new_delta >= 0 && new_delta <= min_d then
469 advanceSolution xs min_d max_d new_sol
471 -- | Computes the placement solution.
472 solutionFromRemovals :: [Maybe Removal] -- ^ The list of (possible) removals
473 -> Int -- ^ Minimum delta parameter
474 -> Int -- ^ Maximum delta parameter
475 -> Maybe Solution -- ^ The best solution found
476 solutionFromRemovals removals min_delta max_delta =
477 advanceSolution removals min_delta max_delta Nothing
479 {- | Computes the solution at the given depth.
481 This is a wrapper over both computeRemovals and
482 solutionFromRemovals. In case we have no solution, we return Nothing.
485 computeSolution :: NodeList -- ^ The original node data
486 -> [Instance.Instance] -- ^ The list of /bad/ instances
487 -> Int -- ^ The /depth/ of removals
488 -> Int -- ^ Maximum number of removals to process
489 -> Int -- ^ Minimum delta parameter
490 -> Int -- ^ Maximum delta parameter
491 -> Maybe Solution -- ^ The best solution found (or Nothing)
492 computeSolution nl bad_instances depth max_removals min_delta max_delta =
494 removals = computeRemovals nl bad_instances depth
495 removals' = capRemovals removals max_removals
497 solutionFromRemovals removals' min_delta max_delta
499 -- Solution display functions (pure)
501 -- | Given the original and final nodes, computes the relocation description.
502 computeMoves :: String -- ^ The instance name
503 -> String -- ^ Original primary
504 -> String -- ^ Original secondary
505 -> String -- ^ New primary
506 -> String -- ^ New secondary
507 -> (String, [String])
508 -- ^ Tuple of moves and commands list; moves is containing
509 -- either @/f/@ for failover or @/r:name/@ for replace
510 -- secondary, while the command list holds gnt-instance
511 -- commands (without that prefix), e.g \"@failover instance1@\"
512 computeMoves i a b c d =
513 if c == a then {- Same primary -}
514 if d == b then {- Same sec??! -}
516 else {- Change of secondary -}
518 [printf "replace-disks -n %s %s" d i])
520 if c == b then {- Failover and ... -}
521 if d == a then {- that's all -}
522 ("f", [printf "failover %s" i])
525 [printf "failover %s" i,
526 printf "replace-disks -n %s %s" d i])
528 if d == a then {- ... and keep primary as secondary -}
530 [printf "replace-disks -n %s %s" c i,
531 printf "failover %s" i])
533 if d == b then {- ... keep same secondary -}
534 (printf "f r:%s f" c,
535 [printf "failover %s" i,
536 printf "replace-disks -n %s %s" c i,
537 printf "failover %s" i])
539 else {- Nothing in common -}
540 (printf "r:%s f r:%s" c d,
541 [printf "replace-disks -n %s %s" c i,
542 printf "failover %s" i,
543 printf "replace-disks -n %s %s" d i])
545 {-| Converts a placement to string format -}
546 printSolutionLine :: InstanceList
552 -> (String, [String])
553 printSolutionLine il ktn kti nmlen imlen plc =
555 pmlen = (2*nmlen + 1)
557 inst = Container.find i il
558 inam = fromJust $ lookup (Instance.idx inst) kti
559 npri = fromJust $ lookup p ktn
560 nsec = fromJust $ lookup s ktn
561 opri = fromJust $ lookup (Instance.pnode inst) ktn
562 osec = fromJust $ lookup (Instance.snode inst) ktn
563 (moves, cmds) = computeMoves inam opri osec npri nsec
564 ostr = (printf "%s:%s" opri osec)::String
565 nstr = (printf "%s:%s" npri nsec)::String
567 (printf " %-*s %-*s => %-*s %.8f a=%s"
568 imlen inam pmlen ostr
572 {-| Converts a solution to string format -}
573 printSolution :: InstanceList
577 -> ([String], [[String]])
578 printSolution il ktn kti sol =
580 mlen_fn = maximum . (map length) . snd . unzip
584 unzip $ map (printSolutionLine il ktn kti nmlen imlen) sol
586 -- | Print the node list.
587 printNodes :: [(Int, String)] -> NodeList -> String
589 let snl = sortBy (compare `on` Node.idx) (Container.elems nl)
590 snl' = map (\ n -> ((fromJust $ lookup (Node.idx n) ktn), n)) snl
591 m_name = maximum . (map length) . fst . unzip $ snl'
592 helper = Node.list m_name
593 header = printf "%2s %-*s %5s %5s %5s %5s %5s %3s %3s %7s %7s"
594 "N1" m_name "Name" "t_mem" "f_mem" "r_mem"
596 "pri" "sec" "p_fmem" "p_fdsk"
597 in unlines $ (header:map (uncurry helper) snl')
599 -- | Compute the mem and disk covariance.
600 compDetailedCV :: NodeList -> (Double, Double, Double, Double)
603 nodes = Container.elems nl
604 mem_l = map Node.p_mem nodes
605 dsk_l = map Node.p_dsk nodes
606 mem_cv = varianceCoeff mem_l
607 dsk_cv = varianceCoeff dsk_l
608 n1_l = length $ filter Node.failN1 nodes
609 n1_score = (fromIntegral n1_l) / (fromIntegral $ length nodes)
610 res_l = map Node.p_rem nodes
611 res_cv = varianceCoeff res_l
612 in (mem_cv, dsk_cv, n1_score, res_cv)
614 -- | Compute the 'total' variance.
615 compCV :: NodeList -> Double
617 let (mem_cv, dsk_cv, n1_score, res_cv) = compDetailedCV nl
618 in mem_cv + dsk_cv + n1_score + res_cv
620 printStats :: NodeList -> String
622 let (mem_cv, dsk_cv, n1_score, res_cv) = compDetailedCV nl
623 in printf "f_mem=%.8f, r_mem=%.8f, f_dsk=%.8f, n1=%.3f"
624 mem_cv res_cv dsk_cv n1_score
626 -- Balancing functions
630 {- | Convert newline and delimiter-separated text.
632 This function converts a text in tabular format as generated by
633 @gnt-instance list@ and @gnt-node list@ to a list of objects using a
634 supplied conversion function.
637 loadTabular :: String -> ([String] -> (String, a))
638 -> (a -> Int -> a) -> ([(String, Int)], [(Int, a)])
639 loadTabular text_data convert_fn set_fn =
640 let lines_data = lines text_data
641 rows = map (sepSplit '|') lines_data
642 kerows = (map convert_fn rows)
643 idxrows = map (\ (idx, (k, v)) -> ((k, idx), (idx, set_fn v idx)))
647 -- | For each instance, add its index to its primary and secondary nodes
648 fixNodes :: [(Int, Node.Node)]
649 -> [(Int, Instance.Instance)]
650 -> [(Int, Node.Node)]
652 foldl' (\accu (idx, inst) ->
654 assocEqual = (\ (i, _) (j, _) -> i == j)
655 pdx = Instance.pnode inst
656 sdx = Instance.snode inst
657 pold = fromJust $ lookup pdx accu
658 sold = fromJust $ lookup sdx accu
659 pnew = Node.setPri pold idx
660 snew = Node.setSec sold idx
661 ac1 = deleteBy assocEqual (pdx, pold) accu
662 ac2 = deleteBy assocEqual (sdx, sold) ac1
663 ac3 = (pdx, pnew):(sdx, snew):ac2
666 -- | Compute the longest common suffix of a [(Int, String)] list that
667 -- | starts with a dot
668 longestDomain :: [(Int, String)] -> String
669 longestDomain [] = ""
670 longestDomain ((_,x):xs) =
672 onlyStrings = snd $ unzip xs
674 foldr (\ suffix accu -> if all (isSuffixOf suffix) onlyStrings
677 "" $ filter (isPrefixOf ".") (tails x)
679 -- | Remove tails from the (Int, String) lists
680 stripSuffix :: String -> [(Int, String)] -> [(Int, String)]
681 stripSuffix suffix lst =
682 let sflen = length suffix in
683 map (\ (key, name) -> (key, take ((length name) - sflen) name)) lst
685 {-| Initializer function that loads the data from a node and list file
686 and massages it into the correct format. -}
687 loadData :: String -- ^ Node data in text format
688 -> String -- ^ Instance data in text format
689 -> (Container.Container Node.Node,
690 Container.Container Instance.Instance,
691 String, [(Int, String)], [(Int, String)])
692 loadData ndata idata =
694 {- node file: name mem disk -}
695 (ktn, nl) = loadTabular ndata
696 (\ (i:jt:jf:kt:kf:[]) -> (i, Node.create jt jf kt kf))
698 {- instance file: name mem disk -}
699 (kti, il) = loadTabular idata
700 (\ (i:j:k:l:m:[]) -> (i,
702 (fromJust $ lookup l ktn)
703 (fromJust $ lookup m ktn)))
706 il3 = Container.fromAssocList il
707 nl3 = Container.fromAssocList
708 (map (\ (k, v) -> (k, Node.buildPeers v il3 (length nl2))) nl2)
711 common_suffix = longestDomain (xti ++ xtn)
712 stn = stripSuffix common_suffix xtn
713 sti = stripSuffix common_suffix xti
715 (nl3, il3, common_suffix, stn, sti)