1 {-| Implementation of cluster-wide logic.
3 This module holds all pure cluster-logic; I\/O related functionality
4 goes into the "Main" module.
17 -- * Generic functions
19 -- * First phase functions
21 -- * Second phase functions
26 -- * Balacing functions
30 -- * Loading functions
35 import Data.Maybe (isNothing, fromJust)
36 import Text.Printf (printf)
39 import qualified Container
40 import qualified Instance
44 type NodeList = Container.Container Node.Node
45 type InstanceList = Container.Container Instance.Instance
48 -- | The description of an instance placement.
49 type Placement = (Int, Int, Int)
51 {- | A cluster solution described as the solution delta and the list
55 data Solution = Solution Int [Placement]
56 deriving (Eq, Ord, Show)
58 -- | Returns the delta of a solution or -1 for Nothing
59 solutionDelta :: Maybe Solution -> Int
60 solutionDelta sol = case sol of
61 Just (Solution d _) -> d
65 data Removal = Removal NodeList [Instance.Instance]
67 -- | An instance move definition
70 | ReplaceSecondary Int
73 -- | The complete state for the balancing solution
74 data Table = Table NodeList InstanceList Score [Placement]
79 -- | Cap the removal list if needed.
80 capRemovals :: [a] -> Int -> [a]
81 capRemovals removals max_removals =
82 if max_removals > 0 then
83 take max_removals removals
87 -- | Check if the given node list fails the N+1 check.
88 verifyN1Check :: [Node.Node] -> Bool
89 verifyN1Check nl = any Node.failN1 nl
91 -- | Verifies the N+1 status and return the affected nodes.
92 verifyN1 :: [Node.Node] -> [Node.Node]
93 verifyN1 nl = filter Node.failN1 nl
95 {-| Add an instance and return the new node and instance maps. -}
96 addInstance :: NodeList -> Instance.Instance ->
97 Node.Node -> Node.Node -> Maybe NodeList
98 addInstance nl idata pri sec =
99 let pdx = Node.idx pri
102 pnode <- Node.addPri pri idata
103 snode <- Node.addSec sec idata pdx
104 new_nl <- return $ Container.addTwo sdx snode
108 -- | Remove an instance and return the new node and instance maps.
109 removeInstance :: NodeList -> Instance.Instance -> NodeList
110 removeInstance nl idata =
111 let pnode = Instance.pnode idata
112 snode = Instance.snode idata
113 pn = Container.find pnode nl
114 sn = Container.find snode nl
115 new_nl = Container.addTwo
116 pnode (Node.removePri pn idata)
117 snode (Node.removeSec sn idata) nl in
120 -- | Remove an instance and return the new node map.
121 removeInstances :: NodeList -> [Instance.Instance] -> NodeList
122 removeInstances = foldl' removeInstance
124 -- | Compute the total free disk and memory in the cluster.
125 totalResources :: Container.Container Node.Node -> (Int, Int)
128 (\ (mem, dsk) node -> (mem + (Node.f_mem node),
129 dsk + (Node.f_dsk node)))
130 (0, 0) (Container.elems nl)
132 {- | Compute a new version of a cluster given a solution.
134 This is not used for computing the solutions, but for applying a
135 (known-good) solution to the original cluster for final display.
137 It first removes the relocated instances after which it places them on
141 applySolution :: NodeList -> InstanceList -> [Placement] -> NodeList
142 applySolution nl il sol =
143 let odxes = map (\ (a, b, c) -> (Container.find a il,
144 Node.idx (Container.find b nl),
145 Node.idx (Container.find c nl))
147 idxes = (\ (x, _, _) -> x) (unzip3 odxes)
148 nc = removeInstances nl idxes
150 foldl' (\ nz (a, b, c) ->
151 let new_p = Container.find b nz
152 new_s = Container.find c nz in
153 fromJust (addInstance nz a new_p new_s)
157 -- First phase functions
159 {- | Given a list 1,2,3..n build a list of pairs [(1, [2..n]), (2,
163 genParts :: [a] -> Int -> [(a, [a])]
168 if length l < count then
171 (x, xs) : (genParts xs count)
173 -- | Generates combinations of count items from the names list.
174 genNames :: Int -> [b] -> [[b]]
175 genNames count1 names1 =
176 let aux_fn count names current =
181 (\ (x, xs) -> aux_fn (count - 1) xs (x:current))
182 (genParts names count)
184 aux_fn count1 names1 []
186 {- | Computes the pair of bad nodes and instances.
188 The bad node list is computed via a simple 'verifyN1' check, and the
189 bad instance list is the list of primary and secondary instances of
193 computeBadItems :: NodeList -> InstanceList ->
194 ([Node.Node], [Instance.Instance])
195 computeBadItems nl il =
196 let bad_nodes = verifyN1 $ Container.elems nl
197 bad_instances = map (\idx -> Container.find idx il) $
198 sort $ nub $ concat $
199 map (\ n -> (Node.slist n) ++ (Node.plist n)) bad_nodes
201 (bad_nodes, bad_instances)
204 {- | Checks if removal of instances results in N+1 pass.
206 Note: the check removal cannot optimize by scanning only the affected
207 nodes, since the cluster is known to be not healthy; only the check
208 placement can make this shortcut.
211 checkRemoval :: NodeList -> [Instance.Instance] -> Maybe Removal
212 checkRemoval nl victims =
213 let nx = removeInstances nl victims
214 failN1 = verifyN1Check (Container.elems nx)
219 Just $ Removal nx victims
222 -- | Computes the removals list for a given depth
223 computeRemovals :: Cluster.NodeList
224 -> [Instance.Instance]
226 -> [Maybe Cluster.Removal]
227 computeRemovals nl bad_instances depth =
228 map (checkRemoval nl) $ genNames depth bad_instances
230 -- Second phase functions
232 -- | Single-node relocation cost
233 nodeDelta :: Int -> Int -> Int -> Int
235 if i == p || i == s then
240 {-| Compute best solution.
242 This function compares two solutions, choosing the minimum valid
245 compareSolutions :: Maybe Solution -> Maybe Solution -> Maybe Solution
246 compareSolutions a b = case (a, b) of
251 -- | Compute best table. Note that the ordering of the arguments is important.
252 compareTables :: Table -> Table -> Table
253 compareTables a@(Table _ _ a_cv _) b@(Table _ _ b_cv _ ) =
254 if a_cv > b_cv then b else a
256 -- | Check if a given delta is worse then an existing solution.
257 tooHighDelta :: Maybe Solution -> Int -> Int -> Bool
258 tooHighDelta sol new_delta max_delta =
259 if new_delta > max_delta && max_delta >=0 then
264 Just (Solution old_delta _) -> old_delta <= new_delta
266 {-| Check if placement of instances still keeps the cluster N+1 compliant.
268 This is the workhorse of the allocation algorithm: given the
269 current node and instance maps, the list of instances to be
270 placed, and the current solution, this will return all possible
271 solution by recursing until all target instances are placed.
274 checkPlacement :: NodeList -- ^ The current node list
275 -> [Instance.Instance] -- ^ List of instances still to place
276 -> [Placement] -- ^ Partial solution until now
277 -> Int -- ^ The delta of the partial solution
278 -> Maybe Solution -- ^ The previous solution
279 -> Int -- ^ Abort if the we go above this delta
280 -> Maybe Solution -- ^ The new solution
281 checkPlacement nl victims current current_delta prev_sol max_delta =
282 let target = head victims
283 opdx = Instance.pnode target
284 osdx = Instance.snode target
286 have_tail = (length vtail) > 0
287 nodes = Container.elems nl
292 pri_idx = Node.idx pri
293 upri_delta = current_delta + nodeDelta pri_idx opdx osdx
294 new_pri = Node.addPri pri target
295 fail_delta1 = tooHighDelta accu_p upri_delta max_delta
297 if fail_delta1 || isNothing(new_pri) then accu_p
298 else let pri_nl = Container.add pri_idx (fromJust new_pri) nl in
302 sec_idx = Node.idx sec
303 upd_delta = upri_delta +
304 nodeDelta sec_idx opdx osdx
305 fail_delta2 = tooHighDelta accu upd_delta max_delta
306 new_sec = Node.addSec sec target pri_idx
308 if sec_idx == pri_idx || fail_delta2 ||
309 isNothing new_sec then accu
311 nx = Container.add sec_idx (fromJust new_sec) pri_nl
312 plc = (Instance.idx target, pri_idx, sec_idx)
316 checkPlacement nx vtail c2 upd_delta
319 Just (Solution upd_delta c2)
320 in compareSolutions accu result
325 applyMove :: NodeList -> Instance.Instance
326 -> IMove -> (Maybe NodeList, Instance.Instance, Int, Int)
327 applyMove nl inst Failover =
328 let old_pdx = Instance.pnode inst
329 old_sdx = Instance.snode inst
330 old_p = Container.find old_pdx nl
331 old_s = Container.find old_sdx nl
332 int_p = Node.removePri old_p inst
333 int_s = Node.removeSec old_s inst
334 new_p = Node.addPri int_s inst
335 new_s = Node.addSec int_p inst old_sdx
336 new_nl = if isNothing(new_p) || isNothing(new_s) then Nothing
337 else Just $ Container.addTwo old_pdx (fromJust new_s)
338 old_sdx (fromJust new_p) nl
339 in (new_nl, Instance.setBoth inst old_sdx old_pdx, old_sdx, old_pdx)
341 applyMove nl inst (ReplacePrimary new_pdx) =
342 let old_pdx = Instance.pnode inst
343 old_sdx = Instance.snode inst
344 old_p = Container.find old_pdx nl
345 old_s = Container.find old_sdx nl
346 tgt_n = Container.find new_pdx nl
347 int_p = Node.removePri old_p inst
348 int_s = Node.removeSec old_s inst
349 new_p = Node.addPri tgt_n inst
350 new_s = Node.addSec int_s inst new_pdx
351 new_nl = if isNothing(new_p) || isNothing(new_s) then Nothing
352 else Just $ Container.add new_pdx (fromJust new_p) $
353 Container.addTwo old_pdx int_p
354 old_sdx (fromJust new_s) nl
355 in (new_nl, Instance.setPri inst new_pdx, new_pdx, old_sdx)
357 applyMove nl inst (ReplaceSecondary new_sdx) =
358 let old_pdx = Instance.pnode inst
359 old_sdx = Instance.snode inst
360 old_s = Container.find old_sdx nl
361 tgt_n = Container.find new_sdx nl
362 int_s = Node.removeSec old_s inst
363 new_s = Node.addSec tgt_n inst old_pdx
364 new_nl = if isNothing(new_s) then Nothing
365 else Just $ Container.addTwo new_sdx (fromJust new_s)
367 in (new_nl, Instance.setSec inst new_sdx, old_pdx, new_sdx)
369 checkSingleStep :: Table -- ^ The original table
370 -> Instance.Instance -- ^ The instance to move
371 -> Table -- ^ The current best table
372 -> IMove -- ^ The move to apply
373 -> Table -- ^ The final best table
374 checkSingleStep ini_tbl target cur_tbl move =
376 Table ini_nl ini_il _ ini_plc = ini_tbl
377 (tmp_nl, new_inst, pri_idx, sec_idx) =
378 applyMove ini_nl target move
380 if isNothing tmp_nl then cur_tbl
382 let tgt_idx = Instance.idx target
383 upd_nl = fromJust tmp_nl
384 upd_cvar = compCV upd_nl
385 upd_il = Container.add tgt_idx new_inst ini_il
386 tmp_plc = filter (\ (t, _, _) -> t /= tgt_idx) ini_plc
387 upd_plc = (tgt_idx, pri_idx, sec_idx):tmp_plc
388 upd_tbl = Table upd_nl upd_il upd_cvar upd_plc
390 compareTables cur_tbl upd_tbl
392 checkInstanceMove :: [Int] -- Allowed target node indices
393 -> Table -- Original table
394 -> Instance.Instance -- Instance to move
395 -> Table -- Best new table for this instance
396 checkInstanceMove nodes_idx ini_tbl target =
398 opdx = Instance.pnode target
399 osdx = Instance.snode target
400 nodes = filter (\idx -> idx /= opdx && idx /= osdx) nodes_idx
401 aft_failover = checkSingleStep ini_tbl target ini_tbl Failover
402 all_moves = concatMap (\idx -> [ReplacePrimary idx,
403 ReplaceSecondary idx]) nodes
405 -- iterate over the possible nodes for this instance
406 foldl' (checkSingleStep ini_tbl target) aft_failover all_moves
408 -- | Compute the best next move.
409 checkMove :: [Int] -- ^ Allowed target node indices
410 -> Table -- ^ The current solution
411 -> [Instance.Instance] -- ^ List of instances still to move
412 -> Table -- ^ The new solution
413 checkMove nodes_idx ini_tbl victims =
414 let Table _ _ _ ini_plc = ini_tbl
415 -- iterate over all instances, computing the best move
418 (\ step_tbl elem -> compareTables step_tbl $
419 checkInstanceMove nodes_idx ini_tbl elem)
422 Table _ _ _ best_plc = best_tbl
423 (target, _, _) = head best_plc
424 -- remove the last placed instance from the victims list, it will
425 -- get another chance the next round
426 vtail = filter (\inst -> Instance.idx inst /= target) victims
428 if length best_plc == length ini_plc then -- no advancement
431 if null vtail then best_tbl
432 else checkMove nodes_idx best_tbl vtail
434 {- | Auxiliary function for solution computation.
436 We write this in an explicit recursive fashion in order to control
437 early-abort in case we have met the min delta. We can't use foldr
438 instead of explicit recursion since we need the accumulator for the
442 advanceSolution :: [Maybe Removal] -- ^ The removal to process
443 -> Int -- ^ Minimum delta parameter
444 -> Int -- ^ Maximum delta parameter
445 -> Maybe Solution -- ^ Current best solution
446 -> Maybe Solution -- ^ New best solution
447 advanceSolution [] _ _ sol = sol
448 advanceSolution (Nothing:xs) m n sol = advanceSolution xs m n sol
449 advanceSolution ((Just (Removal nx removed)):xs) min_d max_d prev_sol =
450 let new_sol = checkPlacement nx removed [] 0 prev_sol max_d
451 new_delta = solutionDelta $! new_sol
453 if new_delta >= 0 && new_delta <= min_d then
456 advanceSolution xs min_d max_d new_sol
458 -- | Computes the placement solution.
459 solutionFromRemovals :: [Maybe Removal] -- ^ The list of (possible) removals
460 -> Int -- ^ Minimum delta parameter
461 -> Int -- ^ Maximum delta parameter
462 -> Maybe Solution -- ^ The best solution found
463 solutionFromRemovals removals min_delta max_delta =
464 advanceSolution removals min_delta max_delta Nothing
466 {- | Computes the solution at the given depth.
468 This is a wrapper over both computeRemovals and
469 solutionFromRemovals. In case we have no solution, we return Nothing.
472 computeSolution :: NodeList -- ^ The original node data
473 -> [Instance.Instance] -- ^ The list of /bad/ instances
474 -> Int -- ^ The /depth/ of removals
475 -> Int -- ^ Maximum number of removals to process
476 -> Int -- ^ Minimum delta parameter
477 -> Int -- ^ Maximum delta parameter
478 -> Maybe Solution -- ^ The best solution found (or Nothing)
479 computeSolution nl bad_instances depth max_removals min_delta max_delta =
481 removals = computeRemovals nl bad_instances depth
482 removals' = capRemovals removals max_removals
484 solutionFromRemovals removals' min_delta max_delta
486 -- Solution display functions (pure)
488 -- | Given the original and final nodes, computes the relocation description.
489 computeMoves :: String -- ^ The instance name
490 -> String -- ^ Original primary
491 -> String -- ^ Original secondary
492 -> String -- ^ New primary
493 -> String -- ^ New secondary
494 -> (String, [String])
495 -- ^ Tuple of moves and commands list; moves is containing
496 -- either @/f/@ for failover or @/r:name/@ for replace
497 -- secondary, while the command list holds gnt-instance
498 -- commands (without that prefix), e.g \"@failover instance1@\"
499 computeMoves i a b c d =
500 if c == a then {- Same primary -}
501 if d == b then {- Same sec??! -}
503 else {- Change of secondary -}
505 [printf "replace-disks -n %s %s" d i])
507 if c == b then {- Failover and ... -}
508 if d == a then {- that's all -}
509 ("f", [printf "failover %s" i])
512 [printf "failover %s" i,
513 printf "replace-disks -n %s %s" d i])
515 if d == a then {- ... and keep primary as secondary -}
517 [printf "replace-disks -n %s %s" c i,
518 printf "failover %s" i])
520 if d == b then {- ... keep same secondary -}
521 (printf "f r:%s f" c,
522 [printf "failover %s" i,
523 printf "replace-disks -n %s %s" c i,
524 printf "failover %s" i])
526 else {- Nothing in common -}
527 (printf "r:%s f r:%s" c d,
528 [printf "replace-disks -n %s %s" c i,
529 printf "failover %s" i,
530 printf "replace-disks -n %s %s" d i])
532 {-| Converts a solution to string format -}
533 printSolution :: InstanceList
537 -> ([String], [[String]])
538 printSolution il ktn kti sol =
541 let inst = Container.find i il
542 inam = fromJust $ lookup (Instance.idx inst) kti
543 npri = fromJust $ lookup p ktn
544 nsec = fromJust $ lookup s ktn
545 opri = fromJust $ lookup (Instance.pnode inst) ktn
546 osec = fromJust $ lookup (Instance.snode inst) ktn
547 (moves, cmds) = computeMoves inam opri osec npri nsec
550 (printf " I: %s\to: %s+>%s\tn: %s+>%s\ta: %s"
551 inam opri osec npri nsec moves,
555 -- | Print the node list.
556 printNodes :: [(Int, String)] -> NodeList -> String
558 let snl = sortBy (compare `on` Node.idx) (Container.elems nl)
559 snl' = map (\ n -> ((fromJust $ lookup (Node.idx n) ktn), n)) snl
560 in unlines $ map (uncurry Node.list) snl'
562 -- | Compute the mem and disk covariance.
563 compDetailedCV :: NodeList -> (Double, Double)
566 nodes = Container.elems nl
567 mem_l = map Node.p_mem nodes
568 dsk_l = map Node.p_dsk nodes
569 mem_cv = varianceCoeff mem_l
570 dsk_cv = varianceCoeff dsk_l
573 -- | Compute the 'total' variance.
574 compCV :: NodeList -> Double
576 let (mem_cv, dsk_cv) = compDetailedCV nl
579 printStats :: NodeList -> String
581 let (mem_cv, dsk_cv) = compDetailedCV nl
582 in printf "mem=%.8f, dsk=%.8f" mem_cv dsk_cv
584 -- Balancing functions
588 {- | Convert newline and delimiter-separated text.
590 This function converts a text in tabular format as generated by
591 @gnt-instance list@ and @gnt-node list@ to a list of objects using a
592 supplied conversion function.
595 loadTabular :: String -> ([String] -> (String, a))
596 -> (a -> Int -> a) -> ([(String, Int)], [(Int, a)])
597 loadTabular text_data convert_fn set_fn =
598 let lines_data = lines text_data
599 rows = map (sepSplit '|') lines_data
600 kerows = (map convert_fn rows)
601 idxrows = map (\ (idx, (k, v)) -> ((k, idx), (idx, set_fn v idx)))
605 -- | For each instance, add its index to its primary and secondary nodes
606 fixNodes :: [(Int, Node.Node)]
607 -> [(Int, Instance.Instance)]
608 -> [(Int, Node.Node)]
610 foldl' (\accu (idx, inst) ->
612 assocEqual = (\ (i, _) (j, _) -> i == j)
613 pdx = Instance.pnode inst
614 sdx = Instance.snode inst
615 pold = fromJust $ lookup pdx accu
616 sold = fromJust $ lookup sdx accu
617 pnew = Node.setPri pold idx
618 snew = Node.setSec sold idx
619 ac1 = deleteBy assocEqual (pdx, pold) accu
620 ac2 = deleteBy assocEqual (sdx, sold) ac1
621 ac3 = (pdx, pnew):(sdx, snew):ac2
624 -- | Compute the longest common suffix of a [(Int, String)] list that
625 -- | starts with a dot
626 longestDomain :: [(Int, String)] -> String
627 longestDomain [] = ""
628 longestDomain ((_,x):xs) =
630 onlyStrings = snd $ unzip xs
632 foldr (\ suffix accu -> if all (isSuffixOf suffix) onlyStrings
635 "" $ filter (isPrefixOf ".") (tails x)
637 -- | Remove tails from the (Int, String) lists
638 stripSuffix :: String -> [(Int, String)] -> [(Int, String)]
639 stripSuffix suffix lst =
640 let sflen = length suffix in
641 map (\ (key, name) -> (key, take ((length name) - sflen) name)) lst
643 {-| Initializer function that loads the data from a node and list file
644 and massages it into the correct format. -}
645 loadData :: String -- ^ Node data in text format
646 -> String -- ^ Instance data in text format
647 -> (Container.Container Node.Node,
648 Container.Container Instance.Instance,
649 String, [(Int, String)], [(Int, String)])
650 loadData ndata idata =
652 {- node file: name mem disk -}
653 (ktn, nl) = loadTabular ndata
654 (\ (i:jt:jf:kt:kf:[]) -> (i, Node.create jt jf kt kf))
656 {- instance file: name mem disk -}
657 (kti, il) = loadTabular idata
658 (\ (i:j:k:l:m:[]) -> (i,
660 (fromJust $ lookup l ktn)
661 (fromJust $ lookup m ktn)))
664 il3 = Container.fromAssocList il
665 nl3 = Container.fromAssocList
666 (map (\ (k, v) -> (k, Node.buildPeers v il3 (length nl2))) nl2)
669 common_suffix = longestDomain (xti ++ xtn)
670 stn = stripSuffix common_suffix xtn
671 sti = stripSuffix common_suffix xti
673 (nl3, il3, common_suffix, stn, sti)