1 {-| Implementation of cluster-wide logic.
3 This module holds all pure cluster-logic; I\/O related functionality
4 goes into the "Main" module.
17 -- * Generic functions
19 -- * First phase functions
21 -- * Second phase functions
26 -- * Balacing functions
30 -- * Loading functions
35 import Data.Maybe (isNothing, fromJust)
36 import Text.Printf (printf)
39 import qualified Container
40 import qualified Instance
44 type NodeList = Container.Container Node.Node
45 type InstanceList = Container.Container Instance.Instance
48 -- | The description of an instance placement.
49 type Placement = (Int, Int, Int, Score)
51 {- | A cluster solution described as the solution delta and the list
55 data Solution = Solution Int [Placement]
56 deriving (Eq, Ord, Show)
58 -- | Returns the delta of a solution or -1 for Nothing
59 solutionDelta :: Maybe Solution -> Int
60 solutionDelta sol = case sol of
61 Just (Solution d _) -> d
65 data Removal = Removal NodeList [Instance.Instance]
67 -- | An instance move definition
70 | ReplaceSecondary Int
73 -- | The complete state for the balancing solution
74 data Table = Table NodeList InstanceList Score [Placement]
79 -- | Cap the removal list if needed.
80 capRemovals :: [a] -> Int -> [a]
81 capRemovals removals max_removals =
82 if max_removals > 0 then
83 take max_removals removals
87 -- | Check if the given node list fails the N+1 check.
88 verifyN1Check :: [Node.Node] -> Bool
89 verifyN1Check nl = any Node.failN1 nl
91 -- | Verifies the N+1 status and return the affected nodes.
92 verifyN1 :: [Node.Node] -> [Node.Node]
93 verifyN1 nl = filter Node.failN1 nl
95 {-| Add an instance and return the new node and instance maps. -}
96 addInstance :: NodeList -> Instance.Instance ->
97 Node.Node -> Node.Node -> Maybe NodeList
98 addInstance nl idata pri sec =
99 let pdx = Node.idx pri
102 pnode <- Node.addPri pri idata
103 snode <- Node.addSec sec idata pdx
104 new_nl <- return $ Container.addTwo sdx snode
108 -- | Remove an instance and return the new node and instance maps.
109 removeInstance :: NodeList -> Instance.Instance -> NodeList
110 removeInstance nl idata =
111 let pnode = Instance.pnode idata
112 snode = Instance.snode idata
113 pn = Container.find pnode nl
114 sn = Container.find snode nl
115 new_nl = Container.addTwo
116 pnode (Node.removePri pn idata)
117 snode (Node.removeSec sn idata) nl in
120 -- | Remove an instance and return the new node map.
121 removeInstances :: NodeList -> [Instance.Instance] -> NodeList
122 removeInstances = foldl' removeInstance
124 -- | Compute the total free disk and memory in the cluster.
125 totalResources :: Container.Container Node.Node -> (Int, Int)
128 (\ (mem, dsk) node -> (mem + (Node.f_mem node),
129 dsk + (Node.f_dsk node)))
130 (0, 0) (Container.elems nl)
132 {- | Compute a new version of a cluster given a solution.
134 This is not used for computing the solutions, but for applying a
135 (known-good) solution to the original cluster for final display.
137 It first removes the relocated instances after which it places them on
141 applySolution :: NodeList -> InstanceList -> [Placement] -> NodeList
142 applySolution nl il sol =
143 let odxes = map (\ (a, b, c, _) -> (Container.find a il,
144 Node.idx (Container.find b nl),
145 Node.idx (Container.find c nl))
147 idxes = (\ (x, _, _) -> x) (unzip3 odxes)
148 nc = removeInstances nl idxes
150 foldl' (\ nz (a, b, c) ->
151 let new_p = Container.find b nz
152 new_s = Container.find c nz in
153 fromJust (addInstance nz a new_p new_s)
157 -- First phase functions
159 {- | Given a list 1,2,3..n build a list of pairs [(1, [2..n]), (2,
163 genParts :: [a] -> Int -> [(a, [a])]
168 if length l < count then
171 (x, xs) : (genParts xs count)
173 -- | Generates combinations of count items from the names list.
174 genNames :: Int -> [b] -> [[b]]
175 genNames count1 names1 =
176 let aux_fn count names current =
181 (\ (x, xs) -> aux_fn (count - 1) xs (x:current))
182 (genParts names count)
184 aux_fn count1 names1 []
186 {- | Computes the pair of bad nodes and instances.
188 The bad node list is computed via a simple 'verifyN1' check, and the
189 bad instance list is the list of primary and secondary instances of
193 computeBadItems :: NodeList -> InstanceList ->
194 ([Node.Node], [Instance.Instance])
195 computeBadItems nl il =
196 let bad_nodes = verifyN1 $ Container.elems nl
197 bad_instances = map (\idx -> Container.find idx il) $
198 sort $ nub $ concat $
199 map (\ n -> (Node.slist n) ++ (Node.plist n)) bad_nodes
201 (bad_nodes, bad_instances)
204 {- | Checks if removal of instances results in N+1 pass.
206 Note: the check removal cannot optimize by scanning only the affected
207 nodes, since the cluster is known to be not healthy; only the check
208 placement can make this shortcut.
211 checkRemoval :: NodeList -> [Instance.Instance] -> Maybe Removal
212 checkRemoval nl victims =
213 let nx = removeInstances nl victims
214 failN1 = verifyN1Check (Container.elems nx)
219 Just $ Removal nx victims
222 -- | Computes the removals list for a given depth
223 computeRemovals :: Cluster.NodeList
224 -> [Instance.Instance]
226 -> [Maybe Cluster.Removal]
227 computeRemovals nl bad_instances depth =
228 map (checkRemoval nl) $ genNames depth bad_instances
230 -- Second phase functions
232 -- | Single-node relocation cost
233 nodeDelta :: Int -> Int -> Int -> Int
235 if i == p || i == s then
240 {-| Compute best solution.
242 This function compares two solutions, choosing the minimum valid
245 compareSolutions :: Maybe Solution -> Maybe Solution -> Maybe Solution
246 compareSolutions a b = case (a, b) of
251 -- | Compute best table. Note that the ordering of the arguments is important.
252 compareTables :: Table -> Table -> Table
253 compareTables a@(Table _ _ a_cv _) b@(Table _ _ b_cv _ ) =
254 if a_cv > b_cv then b else a
256 -- | Check if a given delta is worse then an existing solution.
257 tooHighDelta :: Maybe Solution -> Int -> Int -> Bool
258 tooHighDelta sol new_delta max_delta =
259 if new_delta > max_delta && max_delta >=0 then
264 Just (Solution old_delta _) -> old_delta <= new_delta
266 {-| Check if placement of instances still keeps the cluster N+1 compliant.
268 This is the workhorse of the allocation algorithm: given the
269 current node and instance maps, the list of instances to be
270 placed, and the current solution, this will return all possible
271 solution by recursing until all target instances are placed.
274 checkPlacement :: NodeList -- ^ The current node list
275 -> [Instance.Instance] -- ^ List of instances still to place
276 -> [Placement] -- ^ Partial solution until now
277 -> Int -- ^ The delta of the partial solution
278 -> Maybe Solution -- ^ The previous solution
279 -> Int -- ^ Abort if the we go above this delta
280 -> Maybe Solution -- ^ The new solution
281 checkPlacement nl victims current current_delta prev_sol max_delta =
282 let target = head victims
283 opdx = Instance.pnode target
284 osdx = Instance.snode target
286 have_tail = (length vtail) > 0
287 nodes = Container.elems nl
288 iidx = Instance.idx target
293 pri_idx = Node.idx pri
294 upri_delta = current_delta + nodeDelta pri_idx opdx osdx
295 new_pri = Node.addPri pri target
296 fail_delta1 = tooHighDelta accu_p upri_delta max_delta
298 if fail_delta1 || isNothing(new_pri) then accu_p
299 else let pri_nl = Container.add pri_idx (fromJust new_pri) nl in
303 sec_idx = Node.idx sec
304 upd_delta = upri_delta +
305 nodeDelta sec_idx opdx osdx
306 fail_delta2 = tooHighDelta accu upd_delta max_delta
307 new_sec = Node.addSec sec target pri_idx
309 if sec_idx == pri_idx || fail_delta2 ||
310 isNothing new_sec then accu
312 nx = Container.add sec_idx (fromJust new_sec) pri_nl
314 plc = (iidx, pri_idx, sec_idx, upd_cv)
318 checkPlacement nx vtail c2 upd_delta
321 Just (Solution upd_delta c2)
322 in compareSolutions accu result
327 applyMove :: NodeList -> Instance.Instance
328 -> IMove -> (Maybe NodeList, Instance.Instance, Int, Int)
329 applyMove nl inst Failover =
330 let old_pdx = Instance.pnode inst
331 old_sdx = Instance.snode inst
332 old_p = Container.find old_pdx nl
333 old_s = Container.find old_sdx nl
334 int_p = Node.removePri old_p inst
335 int_s = Node.removeSec old_s inst
336 new_p = Node.addPri int_s inst
337 new_s = Node.addSec int_p inst old_sdx
338 new_nl = if isNothing(new_p) || isNothing(new_s) then Nothing
339 else Just $ Container.addTwo old_pdx (fromJust new_s)
340 old_sdx (fromJust new_p) nl
341 in (new_nl, Instance.setBoth inst old_sdx old_pdx, old_sdx, old_pdx)
343 applyMove nl inst (ReplacePrimary new_pdx) =
344 let old_pdx = Instance.pnode inst
345 old_sdx = Instance.snode inst
346 old_p = Container.find old_pdx nl
347 old_s = Container.find old_sdx nl
348 tgt_n = Container.find new_pdx nl
349 int_p = Node.removePri old_p inst
350 int_s = Node.removeSec old_s inst
351 new_p = Node.addPri tgt_n inst
352 new_s = Node.addSec int_s inst new_pdx
353 new_nl = if isNothing(new_p) || isNothing(new_s) then Nothing
354 else Just $ Container.add new_pdx (fromJust new_p) $
355 Container.addTwo old_pdx int_p
356 old_sdx (fromJust new_s) nl
357 in (new_nl, Instance.setPri inst new_pdx, new_pdx, old_sdx)
359 applyMove nl inst (ReplaceSecondary new_sdx) =
360 let old_pdx = Instance.pnode inst
361 old_sdx = Instance.snode inst
362 old_s = Container.find old_sdx nl
363 tgt_n = Container.find new_sdx nl
364 int_s = Node.removeSec old_s inst
365 new_s = Node.addSec tgt_n inst old_pdx
366 new_nl = if isNothing(new_s) then Nothing
367 else Just $ Container.addTwo new_sdx (fromJust new_s)
369 in (new_nl, Instance.setSec inst new_sdx, old_pdx, new_sdx)
371 checkSingleStep :: Table -- ^ The original table
372 -> Instance.Instance -- ^ The instance to move
373 -> Table -- ^ The current best table
374 -> IMove -- ^ The move to apply
375 -> Table -- ^ The final best table
376 checkSingleStep ini_tbl target cur_tbl move =
378 Table ini_nl ini_il _ ini_plc = ini_tbl
379 (tmp_nl, new_inst, pri_idx, sec_idx) = applyMove ini_nl target move
381 if isNothing tmp_nl then cur_tbl
383 let tgt_idx = Instance.idx target
384 upd_nl = fromJust tmp_nl
385 upd_cvar = compCV upd_nl
386 upd_il = Container.add tgt_idx new_inst ini_il
387 upd_plc = (tgt_idx, pri_idx, sec_idx, upd_cvar):ini_plc
388 upd_tbl = Table upd_nl upd_il upd_cvar upd_plc
390 compareTables cur_tbl upd_tbl
392 checkInstanceMove :: [Int] -- Allowed target node indices
393 -> Table -- Original table
394 -> Instance.Instance -- Instance to move
395 -> Table -- Best new table for this instance
396 checkInstanceMove nodes_idx ini_tbl target =
398 opdx = Instance.pnode target
399 osdx = Instance.snode target
400 nodes = filter (\idx -> idx /= opdx && idx /= osdx) nodes_idx
401 aft_failover = checkSingleStep ini_tbl target ini_tbl Failover
402 all_moves = concatMap (\idx -> [ReplacePrimary idx,
403 ReplaceSecondary idx]) nodes
405 -- iterate over the possible nodes for this instance
406 foldl' (checkSingleStep ini_tbl target) aft_failover all_moves
408 -- | Compute the best next move.
409 checkMove :: [Int] -- ^ Allowed target node indices
410 -> Table -- ^ The current solution
411 -> [Instance.Instance] -- ^ List of instances still to move
412 -> Table -- ^ The new solution
413 checkMove nodes_idx ini_tbl victims =
414 let Table _ _ _ ini_plc = ini_tbl
415 -- iterate over all instances, computing the best move
418 (\ step_tbl elem -> compareTables step_tbl $
419 checkInstanceMove nodes_idx ini_tbl elem)
421 Table _ _ _ best_plc = best_tbl
423 if length best_plc == length ini_plc then -- no advancement
426 -- FIXME: replace 100 with a real constant
427 if (length best_plc > 100) then best_tbl
428 else checkMove nodes_idx best_tbl victims
430 {- | Auxiliary function for solution computation.
432 We write this in an explicit recursive fashion in order to control
433 early-abort in case we have met the min delta. We can't use foldr
434 instead of explicit recursion since we need the accumulator for the
438 advanceSolution :: [Maybe Removal] -- ^ The removal to process
439 -> Int -- ^ Minimum delta parameter
440 -> Int -- ^ Maximum delta parameter
441 -> Maybe Solution -- ^ Current best solution
442 -> Maybe Solution -- ^ New best solution
443 advanceSolution [] _ _ sol = sol
444 advanceSolution (Nothing:xs) m n sol = advanceSolution xs m n sol
445 advanceSolution ((Just (Removal nx removed)):xs) min_d max_d prev_sol =
446 let new_sol = checkPlacement nx removed [] 0 prev_sol max_d
447 new_delta = solutionDelta $! new_sol
449 if new_delta >= 0 && new_delta <= min_d then
452 advanceSolution xs min_d max_d new_sol
454 -- | Computes the placement solution.
455 solutionFromRemovals :: [Maybe Removal] -- ^ The list of (possible) removals
456 -> Int -- ^ Minimum delta parameter
457 -> Int -- ^ Maximum delta parameter
458 -> Maybe Solution -- ^ The best solution found
459 solutionFromRemovals removals min_delta max_delta =
460 advanceSolution removals min_delta max_delta Nothing
462 {- | Computes the solution at the given depth.
464 This is a wrapper over both computeRemovals and
465 solutionFromRemovals. In case we have no solution, we return Nothing.
468 computeSolution :: NodeList -- ^ The original node data
469 -> [Instance.Instance] -- ^ The list of /bad/ instances
470 -> Int -- ^ The /depth/ of removals
471 -> Int -- ^ Maximum number of removals to process
472 -> Int -- ^ Minimum delta parameter
473 -> Int -- ^ Maximum delta parameter
474 -> Maybe Solution -- ^ The best solution found (or Nothing)
475 computeSolution nl bad_instances depth max_removals min_delta max_delta =
477 removals = computeRemovals nl bad_instances depth
478 removals' = capRemovals removals max_removals
480 solutionFromRemovals removals' min_delta max_delta
482 -- Solution display functions (pure)
484 -- | Given the original and final nodes, computes the relocation description.
485 computeMoves :: String -- ^ The instance name
486 -> String -- ^ Original primary
487 -> String -- ^ Original secondary
488 -> String -- ^ New primary
489 -> String -- ^ New secondary
490 -> (String, [String])
491 -- ^ Tuple of moves and commands list; moves is containing
492 -- either @/f/@ for failover or @/r:name/@ for replace
493 -- secondary, while the command list holds gnt-instance
494 -- commands (without that prefix), e.g \"@failover instance1@\"
495 computeMoves i a b c d =
496 if c == a then {- Same primary -}
497 if d == b then {- Same sec??! -}
499 else {- Change of secondary -}
501 [printf "replace-disks -n %s %s" d i])
503 if c == b then {- Failover and ... -}
504 if d == a then {- that's all -}
505 ("f", [printf "failover %s" i])
508 [printf "failover %s" i,
509 printf "replace-disks -n %s %s" d i])
511 if d == a then {- ... and keep primary as secondary -}
513 [printf "replace-disks -n %s %s" c i,
514 printf "failover %s" i])
516 if d == b then {- ... keep same secondary -}
517 (printf "f r:%s f" c,
518 [printf "failover %s" i,
519 printf "replace-disks -n %s %s" c i,
520 printf "failover %s" i])
522 else {- Nothing in common -}
523 (printf "r:%s f r:%s" c d,
524 [printf "replace-disks -n %s %s" c i,
525 printf "failover %s" i,
526 printf "replace-disks -n %s %s" d i])
528 {-| Converts a solution to string format -}
529 printSolution :: InstanceList
533 -> ([String], [[String]])
534 printSolution il ktn kti sol =
536 mlen_fn = maximum . (map length) . snd . unzip
539 pmlen = (2*nmlen + 1)
543 let inst = Container.find i il
544 inam = fromJust $ lookup (Instance.idx inst) kti
545 npri = fromJust $ lookup p ktn
546 nsec = fromJust $ lookup s ktn
547 opri = fromJust $ lookup (Instance.pnode inst) ktn
548 osec = fromJust $ lookup (Instance.snode inst) ktn
549 (moves, cmds) = computeMoves inam opri osec npri nsec
550 ostr = (printf "%s:%s" opri osec)::String
551 nstr = (printf "%s:%s" npri nsec)::String
553 (printf " %-*s %-*s => %-*s %.8f a=%s"
554 imlen inam pmlen ostr
559 -- | Print the node list.
560 printNodes :: [(Int, String)] -> NodeList -> String
562 let snl = sortBy (compare `on` Node.idx) (Container.elems nl)
563 snl' = map (\ n -> ((fromJust $ lookup (Node.idx n) ktn), n)) snl
564 m_name = maximum . (map length) . fst . unzip $ snl'
565 helper = Node.list m_name
566 header = printf "%2s %-*s %5s %5s %5s %5s %5s %3s %3s %7s %7s"
567 "N1" m_name "Name" "t_mem" "f_mem" "r_mem"
569 "pri" "sec" "p_fmem" "p_fdsk"
570 in unlines $ (header:map (uncurry helper) snl')
572 -- | Compute the mem and disk covariance.
573 compDetailedCV :: NodeList -> (Double, Double, Double, Double)
576 nodes = Container.elems nl
577 mem_l = map Node.p_mem nodes
578 dsk_l = map Node.p_dsk nodes
579 mem_cv = varianceCoeff mem_l
580 dsk_cv = varianceCoeff dsk_l
581 n1_l = length $ filter Node.failN1 nodes
582 n1_score = (fromIntegral n1_l) / (fromIntegral $ length nodes)
583 res_l = map Node.p_rem nodes
584 res_cv = varianceCoeff res_l
585 in (mem_cv, dsk_cv, n1_score, res_cv)
587 -- | Compute the 'total' variance.
588 compCV :: NodeList -> Double
590 let (mem_cv, dsk_cv, n1_score, res_cv) = compDetailedCV nl
591 in mem_cv + dsk_cv + n1_score + res_cv
593 printStats :: NodeList -> String
595 let (mem_cv, dsk_cv, n1_score, res_cv) = compDetailedCV nl
596 in printf "f_mem=%.8f, r_mem=%.8f, f_dsk=%.8f, n1=%.3f"
597 mem_cv res_cv dsk_cv n1_score
599 -- Balancing functions
603 {- | Convert newline and delimiter-separated text.
605 This function converts a text in tabular format as generated by
606 @gnt-instance list@ and @gnt-node list@ to a list of objects using a
607 supplied conversion function.
610 loadTabular :: String -> ([String] -> (String, a))
611 -> (a -> Int -> a) -> ([(String, Int)], [(Int, a)])
612 loadTabular text_data convert_fn set_fn =
613 let lines_data = lines text_data
614 rows = map (sepSplit '|') lines_data
615 kerows = (map convert_fn rows)
616 idxrows = map (\ (idx, (k, v)) -> ((k, idx), (idx, set_fn v idx)))
620 -- | For each instance, add its index to its primary and secondary nodes
621 fixNodes :: [(Int, Node.Node)]
622 -> [(Int, Instance.Instance)]
623 -> [(Int, Node.Node)]
625 foldl' (\accu (idx, inst) ->
627 assocEqual = (\ (i, _) (j, _) -> i == j)
628 pdx = Instance.pnode inst
629 sdx = Instance.snode inst
630 pold = fromJust $ lookup pdx accu
631 sold = fromJust $ lookup sdx accu
632 pnew = Node.setPri pold idx
633 snew = Node.setSec sold idx
634 ac1 = deleteBy assocEqual (pdx, pold) accu
635 ac2 = deleteBy assocEqual (sdx, sold) ac1
636 ac3 = (pdx, pnew):(sdx, snew):ac2
639 -- | Compute the longest common suffix of a [(Int, String)] list that
640 -- | starts with a dot
641 longestDomain :: [(Int, String)] -> String
642 longestDomain [] = ""
643 longestDomain ((_,x):xs) =
645 onlyStrings = snd $ unzip xs
647 foldr (\ suffix accu -> if all (isSuffixOf suffix) onlyStrings
650 "" $ filter (isPrefixOf ".") (tails x)
652 -- | Remove tails from the (Int, String) lists
653 stripSuffix :: String -> [(Int, String)] -> [(Int, String)]
654 stripSuffix suffix lst =
655 let sflen = length suffix in
656 map (\ (key, name) -> (key, take ((length name) - sflen) name)) lst
658 {-| Initializer function that loads the data from a node and list file
659 and massages it into the correct format. -}
660 loadData :: String -- ^ Node data in text format
661 -> String -- ^ Instance data in text format
662 -> (Container.Container Node.Node,
663 Container.Container Instance.Instance,
664 String, [(Int, String)], [(Int, String)])
665 loadData ndata idata =
667 {- node file: name mem disk -}
668 (ktn, nl) = loadTabular ndata
669 (\ (i:jt:jf:kt:kf:[]) -> (i, Node.create jt jf kt kf))
671 {- instance file: name mem disk -}
672 (kti, il) = loadTabular idata
673 (\ (i:j:k:l:m:[]) -> (i,
675 (fromJust $ lookup l ktn)
676 (fromJust $ lookup m ktn)))
679 il3 = Container.fromAssocList il
680 nl3 = Container.fromAssocList
681 (map (\ (k, v) -> (k, Node.buildPeers v il3 (length nl2))) nl2)
684 common_suffix = longestDomain (xti ++ xtn)
685 stn = stripSuffix common_suffix xtn
686 sti = stripSuffix common_suffix xti
688 (nl3, il3, common_suffix, stn, sti)