1 {-| Implementation of cluster-wide logic.
3 This module holds all pure cluster-logic; I\/O related functionality
4 goes into the "Main" module.
17 -- * Generic functions
19 -- * First phase functions
21 -- * Second phase functions
26 -- * Balacing functions
30 -- * Loading functions
35 import Data.Maybe (isNothing, fromJust)
36 import Text.Printf (printf)
39 import qualified Container
40 import qualified Instance
44 type NodeList = Container.Container Node.Node
45 type InstanceList = Container.Container Instance.Instance
48 -- | The description of an instance placement.
49 type Placement = (Int, Int, Int, Score)
51 {- | A cluster solution described as the solution delta and the list
55 data Solution = Solution Int [Placement]
56 deriving (Eq, Ord, Show)
58 -- | Returns the delta of a solution or -1 for Nothing
59 solutionDelta :: Maybe Solution -> Int
60 solutionDelta sol = case sol of
61 Just (Solution d _) -> d
65 data Removal = Removal NodeList [Instance.Instance]
67 -- | An instance move definition
70 | ReplaceSecondary Int
73 -- | The complete state for the balancing solution
74 data Table = Table NodeList InstanceList Score [Placement]
79 -- | Cap the removal list if needed.
80 capRemovals :: [a] -> Int -> [a]
81 capRemovals removals max_removals =
82 if max_removals > 0 then
83 take max_removals removals
87 -- | Check if the given node list fails the N+1 check.
88 verifyN1Check :: [Node.Node] -> Bool
89 verifyN1Check nl = any Node.failN1 nl
91 -- | Verifies the N+1 status and return the affected nodes.
92 verifyN1 :: [Node.Node] -> [Node.Node]
93 verifyN1 nl = filter Node.failN1 nl
95 {-| Add an instance and return the new node and instance maps. -}
96 addInstance :: NodeList -> Instance.Instance ->
97 Node.Node -> Node.Node -> Maybe NodeList
98 addInstance nl idata pri sec =
99 let pdx = Node.idx pri
102 pnode <- Node.addPri pri idata
103 snode <- Node.addSec sec idata pdx
104 new_nl <- return $ Container.addTwo sdx snode
108 -- | Remove an instance and return the new node and instance maps.
109 removeInstance :: NodeList -> Instance.Instance -> NodeList
110 removeInstance nl idata =
111 let pnode = Instance.pnode idata
112 snode = Instance.snode idata
113 pn = Container.find pnode nl
114 sn = Container.find snode nl
115 new_nl = Container.addTwo
116 pnode (Node.removePri pn idata)
117 snode (Node.removeSec sn idata) nl in
120 -- | Remove an instance and return the new node map.
121 removeInstances :: NodeList -> [Instance.Instance] -> NodeList
122 removeInstances = foldl' removeInstance
124 -- | Compute the total free disk and memory in the cluster.
125 totalResources :: Container.Container Node.Node -> (Int, Int)
128 (\ (mem, dsk) node -> (mem + (Node.f_mem node),
129 dsk + (Node.f_dsk node)))
130 (0, 0) (Container.elems nl)
132 {- | Compute a new version of a cluster given a solution.
134 This is not used for computing the solutions, but for applying a
135 (known-good) solution to the original cluster for final display.
137 It first removes the relocated instances after which it places them on
141 applySolution :: NodeList -> InstanceList -> [Placement] -> NodeList
142 applySolution nl il sol =
143 let odxes = map (\ (a, b, c, _) -> (Container.find a il,
144 Node.idx (Container.find b nl),
145 Node.idx (Container.find c nl))
147 idxes = (\ (x, _, _) -> x) (unzip3 odxes)
148 nc = removeInstances nl idxes
150 foldl' (\ nz (a, b, c) ->
151 let new_p = Container.find b nz
152 new_s = Container.find c nz in
153 fromJust (addInstance nz a new_p new_s)
157 -- First phase functions
159 {- | Given a list 1,2,3..n build a list of pairs [(1, [2..n]), (2,
163 genParts :: [a] -> Int -> [(a, [a])]
168 if length l < count then
171 (x, xs) : (genParts xs count)
173 -- | Generates combinations of count items from the names list.
174 genNames :: Int -> [b] -> [[b]]
175 genNames count1 names1 =
176 let aux_fn count names current =
181 (\ (x, xs) -> aux_fn (count - 1) xs (x:current))
182 (genParts names count)
184 aux_fn count1 names1 []
186 {- | Computes the pair of bad nodes and instances.
188 The bad node list is computed via a simple 'verifyN1' check, and the
189 bad instance list is the list of primary and secondary instances of
193 computeBadItems :: NodeList -> InstanceList ->
194 ([Node.Node], [Instance.Instance])
195 computeBadItems nl il =
196 let bad_nodes = verifyN1 $ Container.elems nl
197 bad_instances = map (\idx -> Container.find idx il) $
198 sort $ nub $ concat $
199 map (\ n -> (Node.slist n) ++ (Node.plist n)) bad_nodes
201 (bad_nodes, bad_instances)
204 {- | Checks if removal of instances results in N+1 pass.
206 Note: the check removal cannot optimize by scanning only the affected
207 nodes, since the cluster is known to be not healthy; only the check
208 placement can make this shortcut.
211 checkRemoval :: NodeList -> [Instance.Instance] -> Maybe Removal
212 checkRemoval nl victims =
213 let nx = removeInstances nl victims
214 failN1 = verifyN1Check (Container.elems nx)
219 Just $ Removal nx victims
222 -- | Computes the removals list for a given depth
223 computeRemovals :: Cluster.NodeList
224 -> [Instance.Instance]
226 -> [Maybe Cluster.Removal]
227 computeRemovals nl bad_instances depth =
228 map (checkRemoval nl) $ genNames depth bad_instances
230 -- Second phase functions
232 -- | Single-node relocation cost
233 nodeDelta :: Int -> Int -> Int -> Int
235 if i == p || i == s then
240 {-| Compute best solution.
242 This function compares two solutions, choosing the minimum valid
245 compareSolutions :: Maybe Solution -> Maybe Solution -> Maybe Solution
246 compareSolutions a b = case (a, b) of
251 -- | Compute best table. Note that the ordering of the arguments is important.
252 compareTables :: Table -> Table -> Table
253 compareTables a@(Table _ _ a_cv _) b@(Table _ _ b_cv _ ) =
254 if a_cv > b_cv then b else a
256 -- | Check if a given delta is worse then an existing solution.
257 tooHighDelta :: Maybe Solution -> Int -> Int -> Bool
258 tooHighDelta sol new_delta max_delta =
259 if new_delta > max_delta && max_delta >=0 then
264 Just (Solution old_delta _) -> old_delta <= new_delta
266 {-| Check if placement of instances still keeps the cluster N+1 compliant.
268 This is the workhorse of the allocation algorithm: given the
269 current node and instance maps, the list of instances to be
270 placed, and the current solution, this will return all possible
271 solution by recursing until all target instances are placed.
274 checkPlacement :: NodeList -- ^ The current node list
275 -> [Instance.Instance] -- ^ List of instances still to place
276 -> [Placement] -- ^ Partial solution until now
277 -> Int -- ^ The delta of the partial solution
278 -> Maybe Solution -- ^ The previous solution
279 -> Int -- ^ Abort if the we go above this delta
280 -> Maybe Solution -- ^ The new solution
281 checkPlacement nl victims current current_delta prev_sol max_delta =
282 let target = head victims
283 opdx = Instance.pnode target
284 osdx = Instance.snode target
286 have_tail = (length vtail) > 0
287 nodes = Container.elems nl
288 iidx = Instance.idx target
293 pri_idx = Node.idx pri
294 upri_delta = current_delta + nodeDelta pri_idx opdx osdx
295 new_pri = Node.addPri pri target
296 fail_delta1 = tooHighDelta accu_p upri_delta max_delta
298 if fail_delta1 || isNothing(new_pri) then accu_p
299 else let pri_nl = Container.add pri_idx (fromJust new_pri) nl in
303 sec_idx = Node.idx sec
304 upd_delta = upri_delta +
305 nodeDelta sec_idx opdx osdx
306 fail_delta2 = tooHighDelta accu upd_delta max_delta
307 new_sec = Node.addSec sec target pri_idx
309 if sec_idx == pri_idx || fail_delta2 ||
310 isNothing new_sec then accu
312 nx = Container.add sec_idx (fromJust new_sec) pri_nl
314 plc = (iidx, pri_idx, sec_idx, upd_cv)
318 checkPlacement nx vtail c2 upd_delta
321 Just (Solution upd_delta c2)
322 in compareSolutions accu result
327 applyMove :: NodeList -> Instance.Instance
328 -> IMove -> (Maybe NodeList, Instance.Instance, Int, Int)
329 applyMove nl inst Failover =
330 let old_pdx = Instance.pnode inst
331 old_sdx = Instance.snode inst
332 old_p = Container.find old_pdx nl
333 old_s = Container.find old_sdx nl
334 int_p = Node.removePri old_p inst
335 int_s = Node.removeSec old_s inst
336 new_p = Node.addPri int_s inst
337 new_s = Node.addSec int_p inst old_sdx
338 new_nl = if isNothing(new_p) || isNothing(new_s) then Nothing
339 else Just $ Container.addTwo old_pdx (fromJust new_s)
340 old_sdx (fromJust new_p) nl
341 in (new_nl, Instance.setBoth inst old_sdx old_pdx, old_sdx, old_pdx)
343 applyMove nl inst (ReplacePrimary new_pdx) =
344 let old_pdx = Instance.pnode inst
345 old_sdx = Instance.snode inst
346 old_p = Container.find old_pdx nl
347 old_s = Container.find old_sdx nl
348 tgt_n = Container.find new_pdx nl
349 int_p = Node.removePri old_p inst
350 int_s = Node.removeSec old_s inst
351 new_p = Node.addPri tgt_n inst
352 new_s = Node.addSec int_s inst new_pdx
353 new_nl = if isNothing(new_p) || isNothing(new_s) then Nothing
354 else Just $ Container.add new_pdx (fromJust new_p) $
355 Container.addTwo old_pdx int_p
356 old_sdx (fromJust new_s) nl
357 in (new_nl, Instance.setPri inst new_pdx, new_pdx, old_sdx)
359 applyMove nl inst (ReplaceSecondary new_sdx) =
360 let old_pdx = Instance.pnode inst
361 old_sdx = Instance.snode inst
362 old_s = Container.find old_sdx nl
363 tgt_n = Container.find new_sdx nl
364 int_s = Node.removeSec old_s inst
365 new_s = Node.addSec tgt_n inst old_pdx
366 new_nl = if isNothing(new_s) then Nothing
367 else Just $ Container.addTwo new_sdx (fromJust new_s)
369 in (new_nl, Instance.setSec inst new_sdx, old_pdx, new_sdx)
371 checkSingleStep :: Table -- ^ The original table
372 -> Instance.Instance -- ^ The instance to move
373 -> Table -- ^ The current best table
374 -> IMove -- ^ The move to apply
375 -> Table -- ^ The final best table
376 checkSingleStep ini_tbl target cur_tbl move =
378 Table ini_nl ini_il _ ini_plc = ini_tbl
379 (tmp_nl, new_inst, pri_idx, sec_idx) =
380 applyMove ini_nl target move
382 if isNothing tmp_nl then cur_tbl
384 let tgt_idx = Instance.idx target
385 upd_nl = fromJust tmp_nl
386 upd_cvar = compCV upd_nl
387 upd_il = Container.add tgt_idx new_inst ini_il
388 tmp_plc = filter (\ (t, _, _, _) -> t /= tgt_idx) ini_plc
389 upd_plc = (tgt_idx, pri_idx, sec_idx, upd_cvar):tmp_plc
390 upd_tbl = Table upd_nl upd_il upd_cvar upd_plc
392 compareTables cur_tbl upd_tbl
394 checkInstanceMove :: [Int] -- Allowed target node indices
395 -> Table -- Original table
396 -> Instance.Instance -- Instance to move
397 -> Table -- Best new table for this instance
398 checkInstanceMove nodes_idx ini_tbl target =
400 opdx = Instance.pnode target
401 osdx = Instance.snode target
402 nodes = filter (\idx -> idx /= opdx && idx /= osdx) nodes_idx
403 aft_failover = checkSingleStep ini_tbl target ini_tbl Failover
404 all_moves = concatMap (\idx -> [ReplacePrimary idx,
405 ReplaceSecondary idx]) nodes
407 -- iterate over the possible nodes for this instance
408 foldl' (checkSingleStep ini_tbl target) aft_failover all_moves
410 -- | Compute the best next move.
411 checkMove :: [Int] -- ^ Allowed target node indices
412 -> Table -- ^ The current solution
413 -> [Instance.Instance] -- ^ List of instances still to move
414 -> Table -- ^ The new solution
415 checkMove nodes_idx ini_tbl victims =
416 let Table _ _ _ ini_plc = ini_tbl
417 -- iterate over all instances, computing the best move
420 (\ step_tbl elem -> compareTables step_tbl $
421 checkInstanceMove nodes_idx ini_tbl elem)
424 Table _ _ _ best_plc = best_tbl
425 (target, _, _, _) = head best_plc
426 -- remove the last placed instance from the victims list, it will
427 -- get another chance the next round
428 vtail = filter (\inst -> Instance.idx inst /= target) victims
430 if length best_plc == length ini_plc then -- no advancement
433 if null vtail then best_tbl
434 else checkMove nodes_idx best_tbl vtail
436 {- | Auxiliary function for solution computation.
438 We write this in an explicit recursive fashion in order to control
439 early-abort in case we have met the min delta. We can't use foldr
440 instead of explicit recursion since we need the accumulator for the
444 advanceSolution :: [Maybe Removal] -- ^ The removal to process
445 -> Int -- ^ Minimum delta parameter
446 -> Int -- ^ Maximum delta parameter
447 -> Maybe Solution -- ^ Current best solution
448 -> Maybe Solution -- ^ New best solution
449 advanceSolution [] _ _ sol = sol
450 advanceSolution (Nothing:xs) m n sol = advanceSolution xs m n sol
451 advanceSolution ((Just (Removal nx removed)):xs) min_d max_d prev_sol =
452 let new_sol = checkPlacement nx removed [] 0 prev_sol max_d
453 new_delta = solutionDelta $! new_sol
455 if new_delta >= 0 && new_delta <= min_d then
458 advanceSolution xs min_d max_d new_sol
460 -- | Computes the placement solution.
461 solutionFromRemovals :: [Maybe Removal] -- ^ The list of (possible) removals
462 -> Int -- ^ Minimum delta parameter
463 -> Int -- ^ Maximum delta parameter
464 -> Maybe Solution -- ^ The best solution found
465 solutionFromRemovals removals min_delta max_delta =
466 advanceSolution removals min_delta max_delta Nothing
468 {- | Computes the solution at the given depth.
470 This is a wrapper over both computeRemovals and
471 solutionFromRemovals. In case we have no solution, we return Nothing.
474 computeSolution :: NodeList -- ^ The original node data
475 -> [Instance.Instance] -- ^ The list of /bad/ instances
476 -> Int -- ^ The /depth/ of removals
477 -> Int -- ^ Maximum number of removals to process
478 -> Int -- ^ Minimum delta parameter
479 -> Int -- ^ Maximum delta parameter
480 -> Maybe Solution -- ^ The best solution found (or Nothing)
481 computeSolution nl bad_instances depth max_removals min_delta max_delta =
483 removals = computeRemovals nl bad_instances depth
484 removals' = capRemovals removals max_removals
486 solutionFromRemovals removals' min_delta max_delta
488 -- Solution display functions (pure)
490 -- | Given the original and final nodes, computes the relocation description.
491 computeMoves :: String -- ^ The instance name
492 -> String -- ^ Original primary
493 -> String -- ^ Original secondary
494 -> String -- ^ New primary
495 -> String -- ^ New secondary
496 -> (String, [String])
497 -- ^ Tuple of moves and commands list; moves is containing
498 -- either @/f/@ for failover or @/r:name/@ for replace
499 -- secondary, while the command list holds gnt-instance
500 -- commands (without that prefix), e.g \"@failover instance1@\"
501 computeMoves i a b c d =
502 if c == a then {- Same primary -}
503 if d == b then {- Same sec??! -}
505 else {- Change of secondary -}
507 [printf "replace-disks -n %s %s" d i])
509 if c == b then {- Failover and ... -}
510 if d == a then {- that's all -}
511 ("f", [printf "failover %s" i])
514 [printf "failover %s" i,
515 printf "replace-disks -n %s %s" d i])
517 if d == a then {- ... and keep primary as secondary -}
519 [printf "replace-disks -n %s %s" c i,
520 printf "failover %s" i])
522 if d == b then {- ... keep same secondary -}
523 (printf "f r:%s f" c,
524 [printf "failover %s" i,
525 printf "replace-disks -n %s %s" c i,
526 printf "failover %s" i])
528 else {- Nothing in common -}
529 (printf "r:%s f r:%s" c d,
530 [printf "replace-disks -n %s %s" c i,
531 printf "failover %s" i,
532 printf "replace-disks -n %s %s" d i])
534 {-| Converts a solution to string format -}
535 printSolution :: InstanceList
539 -> ([String], [[String]])
540 printSolution il ktn kti sol =
542 mlen_fn = maximum . (map length) . snd . unzip
545 pmlen = (2*nmlen + 1)
549 let inst = Container.find i il
550 inam = fromJust $ lookup (Instance.idx inst) kti
551 npri = fromJust $ lookup p ktn
552 nsec = fromJust $ lookup s ktn
553 opri = fromJust $ lookup (Instance.pnode inst) ktn
554 osec = fromJust $ lookup (Instance.snode inst) ktn
555 (moves, cmds) = computeMoves inam opri osec npri nsec
556 ostr = (printf "%s:%s" opri osec)::String
557 nstr = (printf "%s:%s" npri nsec)::String
559 (printf " %-*s %-*s => %-*s %.8f a=%s"
560 imlen inam pmlen ostr
565 -- | Print the node list.
566 printNodes :: [(Int, String)] -> NodeList -> String
568 let snl = sortBy (compare `on` Node.idx) (Container.elems nl)
569 snl' = map (\ n -> ((fromJust $ lookup (Node.idx n) ktn), n)) snl
570 m_name = maximum . (map length) . fst . unzip $ snl'
571 helper = Node.list m_name
572 in unlines $ map (uncurry helper) snl'
574 -- | Compute the mem and disk covariance.
575 compDetailedCV :: NodeList -> (Double, Double)
578 nodes = Container.elems nl
579 mem_l = map Node.p_mem nodes
580 dsk_l = map Node.p_dsk nodes
581 mem_cv = varianceCoeff mem_l
582 dsk_cv = varianceCoeff dsk_l
585 -- | Compute the 'total' variance.
586 compCV :: NodeList -> Double
588 let (mem_cv, dsk_cv) = compDetailedCV nl
591 printStats :: NodeList -> String
593 let (mem_cv, dsk_cv) = compDetailedCV nl
594 in printf "mem=%.8f, dsk=%.8f" mem_cv dsk_cv
596 -- Balancing functions
600 {- | Convert newline and delimiter-separated text.
602 This function converts a text in tabular format as generated by
603 @gnt-instance list@ and @gnt-node list@ to a list of objects using a
604 supplied conversion function.
607 loadTabular :: String -> ([String] -> (String, a))
608 -> (a -> Int -> a) -> ([(String, Int)], [(Int, a)])
609 loadTabular text_data convert_fn set_fn =
610 let lines_data = lines text_data
611 rows = map (sepSplit '|') lines_data
612 kerows = (map convert_fn rows)
613 idxrows = map (\ (idx, (k, v)) -> ((k, idx), (idx, set_fn v idx)))
617 -- | For each instance, add its index to its primary and secondary nodes
618 fixNodes :: [(Int, Node.Node)]
619 -> [(Int, Instance.Instance)]
620 -> [(Int, Node.Node)]
622 foldl' (\accu (idx, inst) ->
624 assocEqual = (\ (i, _) (j, _) -> i == j)
625 pdx = Instance.pnode inst
626 sdx = Instance.snode inst
627 pold = fromJust $ lookup pdx accu
628 sold = fromJust $ lookup sdx accu
629 pnew = Node.setPri pold idx
630 snew = Node.setSec sold idx
631 ac1 = deleteBy assocEqual (pdx, pold) accu
632 ac2 = deleteBy assocEqual (sdx, sold) ac1
633 ac3 = (pdx, pnew):(sdx, snew):ac2
636 -- | Compute the longest common suffix of a [(Int, String)] list that
637 -- | starts with a dot
638 longestDomain :: [(Int, String)] -> String
639 longestDomain [] = ""
640 longestDomain ((_,x):xs) =
642 onlyStrings = snd $ unzip xs
644 foldr (\ suffix accu -> if all (isSuffixOf suffix) onlyStrings
647 "" $ filter (isPrefixOf ".") (tails x)
649 -- | Remove tails from the (Int, String) lists
650 stripSuffix :: String -> [(Int, String)] -> [(Int, String)]
651 stripSuffix suffix lst =
652 let sflen = length suffix in
653 map (\ (key, name) -> (key, take ((length name) - sflen) name)) lst
655 {-| Initializer function that loads the data from a node and list file
656 and massages it into the correct format. -}
657 loadData :: String -- ^ Node data in text format
658 -> String -- ^ Instance data in text format
659 -> (Container.Container Node.Node,
660 Container.Container Instance.Instance,
661 String, [(Int, String)], [(Int, String)])
662 loadData ndata idata =
664 {- node file: name mem disk -}
665 (ktn, nl) = loadTabular ndata
666 (\ (i:jt:jf:kt:kf:[]) -> (i, Node.create jt jf kt kf))
668 {- instance file: name mem disk -}
669 (kti, il) = loadTabular idata
670 (\ (i:j:k:l:m:[]) -> (i,
672 (fromJust $ lookup l ktn)
673 (fromJust $ lookup m ktn)))
676 il3 = Container.fromAssocList il
677 nl3 = Container.fromAssocList
678 (map (\ (k, v) -> (k, Node.buildPeers v il3 (length nl2))) nl2)
681 common_suffix = longestDomain (xti ++ xtn)
682 stn = stripSuffix common_suffix xtn
683 sti = stripSuffix common_suffix xti
685 (nl3, il3, common_suffix, stn, sti)