1 {-| Implementation of cluster-wide logic.
3 This module holds all pure cluster-logic; I\/O related functionality
4 goes into the "Main" module for the individual binaries.
10 Copyright (C) 2009, 2010 Google Inc.
12 This program is free software; you can redistribute it and/or modify
13 it under the terms of the GNU General Public License as published by
14 the Free Software Foundation; either version 2 of the License, or
15 (at your option) any later version.
17 This program is distributed in the hope that it will be useful, but
18 WITHOUT ANY WARRANTY; without even the implied warranty of
19 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 General Public License for more details.
22 You should have received a copy of the GNU General Public License
23 along with this program; if not, write to the Free Software
24 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
29 module Ganeti.HTools.Cluster
36 -- * Generic functions
38 , computeAllocationDelta
39 -- * First phase functions
41 -- * Second phase functions
46 -- * Display functions
49 -- * Balacing functions
56 -- * IAllocator functions
62 -- * Allocation functions
71 import Data.Ord (comparing)
72 import Text.Printf (printf)
75 import qualified Ganeti.HTools.Container as Container
76 import qualified Ganeti.HTools.Instance as Instance
77 import qualified Ganeti.HTools.Node as Node
78 import Ganeti.HTools.Types
79 import Ganeti.HTools.Utils
80 import qualified Ganeti.OpCodes as OpCodes
84 -- | Allocation\/relocation solution.
85 data AllocSolution = AllocSolution
86 { asFailures :: [FailMode] -- ^ Failure counts
87 , asAllocs :: Int -- ^ Good allocation count
88 , asSolutions :: [Node.AllocElement] -- ^ The actual result, length
89 -- of the list depends on the
90 -- allocation/relocation mode
91 , asLog :: [String] -- ^ A list of informational messages
94 -- | The empty solution we start with when computing allocations
95 emptySolution :: AllocSolution
96 emptySolution = AllocSolution { asFailures = [], asAllocs = 0
97 , asSolutions = [], asLog = [] }
99 -- | The complete state for the balancing solution
100 data Table = Table Node.List Instance.List Score [Placement]
103 data CStats = CStats { csFmem :: Int -- ^ Cluster free mem
104 , csFdsk :: Int -- ^ Cluster free disk
105 , csAmem :: Int -- ^ Cluster allocatable mem
106 , csAdsk :: Int -- ^ Cluster allocatable disk
107 , csAcpu :: Int -- ^ Cluster allocatable cpus
108 , csMmem :: Int -- ^ Max node allocatable mem
109 , csMdsk :: Int -- ^ Max node allocatable disk
110 , csMcpu :: Int -- ^ Max node allocatable cpu
111 , csImem :: Int -- ^ Instance used mem
112 , csIdsk :: Int -- ^ Instance used disk
113 , csIcpu :: Int -- ^ Instance used cpu
114 , csTmem :: Double -- ^ Cluster total mem
115 , csTdsk :: Double -- ^ Cluster total disk
116 , csTcpu :: Double -- ^ Cluster total cpus
117 , csVcpu :: Int -- ^ Cluster virtual cpus (if
118 -- node pCpu has been set,
120 , csXmem :: Int -- ^ Unnacounted for mem
121 , csNmem :: Int -- ^ Node own memory
122 , csScore :: Score -- ^ The cluster score
123 , csNinst :: Int -- ^ The total number of instances
127 -- | Currently used, possibly to allocate, unallocable
128 type AllocStats = (RSpec, RSpec, RSpec)
130 -- * Utility functions
132 -- | Verifies the N+1 status and return the affected nodes.
133 verifyN1 :: [Node.Node] -> [Node.Node]
134 verifyN1 = filter Node.failN1
136 {-| Computes the pair of bad nodes and instances.
138 The bad node list is computed via a simple 'verifyN1' check, and the
139 bad instance list is the list of primary and secondary instances of
143 computeBadItems :: Node.List -> Instance.List ->
144 ([Node.Node], [Instance.Instance])
145 computeBadItems nl il =
146 let bad_nodes = verifyN1 $ getOnline nl
147 bad_instances = map (`Container.find` il) .
149 concatMap (\ n -> Node.sList n ++ Node.pList n) bad_nodes
151 (bad_nodes, bad_instances)
153 -- | Zero-initializer for the CStats type
154 emptyCStats :: CStats
155 emptyCStats = CStats 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
157 -- | Update stats with data from a new node
158 updateCStats :: CStats -> Node.Node -> CStats
159 updateCStats cs node =
160 let CStats { csFmem = x_fmem, csFdsk = x_fdsk,
161 csAmem = x_amem, csAcpu = x_acpu, csAdsk = x_adsk,
162 csMmem = x_mmem, csMdsk = x_mdsk, csMcpu = x_mcpu,
163 csImem = x_imem, csIdsk = x_idsk, csIcpu = x_icpu,
164 csTmem = x_tmem, csTdsk = x_tdsk, csTcpu = x_tcpu,
166 csXmem = x_xmem, csNmem = x_nmem, csNinst = x_ninst
169 inc_amem = Node.fMem node - Node.rMem node
170 inc_amem' = if inc_amem > 0 then inc_amem else 0
171 inc_adsk = Node.availDisk node
172 inc_imem = truncate (Node.tMem node) - Node.nMem node
173 - Node.xMem node - Node.fMem node
174 inc_icpu = Node.uCpu node
175 inc_idsk = truncate (Node.tDsk node) - Node.fDsk node
176 inc_vcpu = Node.hiCpu node
178 in cs { csFmem = x_fmem + Node.fMem node
179 , csFdsk = x_fdsk + Node.fDsk node
180 , csAmem = x_amem + inc_amem'
181 , csAdsk = x_adsk + inc_adsk
183 , csMmem = max x_mmem inc_amem'
184 , csMdsk = max x_mdsk inc_adsk
186 , csImem = x_imem + inc_imem
187 , csIdsk = x_idsk + inc_idsk
188 , csIcpu = x_icpu + inc_icpu
189 , csTmem = x_tmem + Node.tMem node
190 , csTdsk = x_tdsk + Node.tDsk node
191 , csTcpu = x_tcpu + Node.tCpu node
192 , csVcpu = x_vcpu + inc_vcpu
193 , csXmem = x_xmem + Node.xMem node
194 , csNmem = x_nmem + Node.nMem node
195 , csNinst = x_ninst + length (Node.pList node)
198 -- | Compute the total free disk and memory in the cluster.
199 totalResources :: Node.List -> CStats
201 let cs = foldl' updateCStats emptyCStats . Container.elems $ nl
202 in cs { csScore = compCV nl }
204 -- | Compute the delta between two cluster state.
206 -- This is used when doing allocations, to understand better the
207 -- available cluster resources. The return value is a triple of the
208 -- current used values, the delta that was still allocated, and what
209 -- was left unallocated.
210 computeAllocationDelta :: CStats -> CStats -> AllocStats
211 computeAllocationDelta cini cfin =
212 let CStats {csImem = i_imem, csIdsk = i_idsk, csIcpu = i_icpu} = cini
213 CStats {csImem = f_imem, csIdsk = f_idsk, csIcpu = f_icpu,
214 csTmem = t_mem, csTdsk = t_dsk, csVcpu = v_cpu } = cfin
215 rini = RSpec i_icpu i_imem i_idsk
216 rfin = RSpec (f_icpu - i_icpu) (f_imem - i_imem) (f_idsk - i_idsk)
217 un_cpu = v_cpu - f_icpu
218 runa = RSpec un_cpu (truncate t_mem - f_imem) (truncate t_dsk - f_idsk)
219 in (rini, rfin, runa)
221 -- | The names and weights of the individual elements in the CV list
222 detailedCVInfo :: [(Double, String)]
223 detailedCVInfo = [ (1, "free_mem_cv")
224 , (1, "free_disk_cv")
226 , (1, "reserved_mem_cv")
227 , (4, "offline_all_cnt")
228 , (16, "offline_pri_cnt")
229 , (1, "vcpu_ratio_cv")
232 , (1, "disk_load_cv")
234 , (2, "pri_tags_score")
237 detailedCVWeights :: [Double]
238 detailedCVWeights = map fst detailedCVInfo
240 -- | Compute the mem and disk covariance.
241 compDetailedCV :: Node.List -> [Double]
244 all_nodes = Container.elems nl
245 (offline, nodes) = partition Node.offline all_nodes
246 mem_l = map Node.pMem nodes
247 dsk_l = map Node.pDsk nodes
248 -- metric: memory covariance
249 mem_cv = varianceCoeff mem_l
250 -- metric: disk covariance
251 dsk_cv = varianceCoeff dsk_l
252 -- metric: count of instances living on N1 failing nodes
253 n1_score = fromIntegral . sum . map (\n -> length (Node.sList n) +
254 length (Node.pList n)) .
255 filter Node.failN1 $ nodes :: Double
256 res_l = map Node.pRem nodes
257 -- metric: reserved memory covariance
258 res_cv = varianceCoeff res_l
259 -- offline instances metrics
260 offline_ipri = sum . map (length . Node.pList) $ offline
261 offline_isec = sum . map (length . Node.sList) $ offline
262 -- metric: count of instances on offline nodes
263 off_score = fromIntegral (offline_ipri + offline_isec)::Double
264 -- metric: count of primary instances on offline nodes (this
265 -- helps with evacuation/failover of primary instances on
266 -- 2-node clusters with one node offline)
267 off_pri_score = fromIntegral offline_ipri::Double
268 cpu_l = map Node.pCpu nodes
269 -- metric: covariance of vcpu/pcpu ratio
270 cpu_cv = varianceCoeff cpu_l
271 -- metrics: covariance of cpu, memory, disk and network load
272 (c_load, m_load, d_load, n_load) = unzip4 $
274 let DynUtil c1 m1 d1 n1 = Node.utilLoad n
275 DynUtil c2 m2 d2 n2 = Node.utilPool n
276 in (c1/c2, m1/m2, d1/d2, n1/n2)
278 -- metric: conflicting instance count
279 pri_tags_inst = sum $ map Node.conflictingPrimaries nodes
280 pri_tags_score = fromIntegral pri_tags_inst::Double
281 in [ mem_cv, dsk_cv, n1_score, res_cv, off_score, off_pri_score, cpu_cv
282 , varianceCoeff c_load, varianceCoeff m_load
283 , varianceCoeff d_load, varianceCoeff n_load
286 -- | Compute the /total/ variance.
287 compCV :: Node.List -> Double
288 compCV = sum . zipWith (*) detailedCVWeights . compDetailedCV
290 -- | Compute online nodes from a Node.List
291 getOnline :: Node.List -> [Node.Node]
292 getOnline = filter (not . Node.offline) . Container.elems
296 -- | Compute best table. Note that the ordering of the arguments is important.
297 compareTables :: Table -> Table -> Table
298 compareTables a@(Table _ _ a_cv _) b@(Table _ _ b_cv _ ) =
299 if a_cv > b_cv then b else a
301 -- | Applies an instance move to a given node list and instance.
302 applyMove :: Node.List -> Instance.Instance
303 -> IMove -> OpResult (Node.List, Instance.Instance, Ndx, Ndx)
305 applyMove nl inst Failover =
306 let old_pdx = Instance.pNode inst
307 old_sdx = Instance.sNode inst
308 old_p = Container.find old_pdx nl
309 old_s = Container.find old_sdx nl
310 int_p = Node.removePri old_p inst
311 int_s = Node.removeSec old_s inst
312 force_p = Node.offline old_p
313 new_nl = do -- Maybe monad
314 new_p <- Node.addPriEx force_p int_s inst
315 new_s <- Node.addSec int_p inst old_sdx
316 let new_inst = Instance.setBoth inst old_sdx old_pdx
317 return (Container.addTwo old_pdx new_s old_sdx new_p nl,
318 new_inst, old_sdx, old_pdx)
321 -- Replace the primary (f:, r:np, f)
322 applyMove nl inst (ReplacePrimary new_pdx) =
323 let old_pdx = Instance.pNode inst
324 old_sdx = Instance.sNode inst
325 old_p = Container.find old_pdx nl
326 old_s = Container.find old_sdx nl
327 tgt_n = Container.find new_pdx nl
328 int_p = Node.removePri old_p inst
329 int_s = Node.removeSec old_s inst
330 force_p = Node.offline old_p
331 new_nl = do -- Maybe monad
332 -- check that the current secondary can host the instance
333 -- during the migration
334 tmp_s <- Node.addPriEx force_p int_s inst
335 let tmp_s' = Node.removePri tmp_s inst
336 new_p <- Node.addPriEx force_p tgt_n inst
337 new_s <- Node.addSecEx force_p tmp_s' inst new_pdx
338 let new_inst = Instance.setPri inst new_pdx
339 return (Container.add new_pdx new_p $
340 Container.addTwo old_pdx int_p old_sdx new_s nl,
341 new_inst, new_pdx, old_sdx)
344 -- Replace the secondary (r:ns)
345 applyMove nl inst (ReplaceSecondary new_sdx) =
346 let old_pdx = Instance.pNode inst
347 old_sdx = Instance.sNode inst
348 old_s = Container.find old_sdx nl
349 tgt_n = Container.find new_sdx nl
350 int_s = Node.removeSec old_s inst
351 force_s = Node.offline old_s
352 new_inst = Instance.setSec inst new_sdx
353 new_nl = Node.addSecEx force_s tgt_n inst old_pdx >>=
354 \new_s -> return (Container.addTwo new_sdx
355 new_s old_sdx int_s nl,
356 new_inst, old_pdx, new_sdx)
359 -- Replace the secondary and failover (r:np, f)
360 applyMove nl inst (ReplaceAndFailover new_pdx) =
361 let old_pdx = Instance.pNode inst
362 old_sdx = Instance.sNode inst
363 old_p = Container.find old_pdx nl
364 old_s = Container.find old_sdx nl
365 tgt_n = Container.find new_pdx nl
366 int_p = Node.removePri old_p inst
367 int_s = Node.removeSec old_s inst
368 force_s = Node.offline old_s
369 new_nl = do -- Maybe monad
370 new_p <- Node.addPri tgt_n inst
371 new_s <- Node.addSecEx force_s int_p inst new_pdx
372 let new_inst = Instance.setBoth inst new_pdx old_pdx
373 return (Container.add new_pdx new_p $
374 Container.addTwo old_pdx new_s old_sdx int_s nl,
375 new_inst, new_pdx, old_pdx)
378 -- Failver and replace the secondary (f, r:ns)
379 applyMove nl inst (FailoverAndReplace new_sdx) =
380 let old_pdx = Instance.pNode inst
381 old_sdx = Instance.sNode inst
382 old_p = Container.find old_pdx nl
383 old_s = Container.find old_sdx nl
384 tgt_n = Container.find new_sdx nl
385 int_p = Node.removePri old_p inst
386 int_s = Node.removeSec old_s inst
387 force_p = Node.offline old_p
388 new_nl = do -- Maybe monad
389 new_p <- Node.addPriEx force_p int_s inst
390 new_s <- Node.addSecEx force_p tgt_n inst old_sdx
391 let new_inst = Instance.setBoth inst old_sdx new_sdx
392 return (Container.add new_sdx new_s $
393 Container.addTwo old_sdx new_p old_pdx int_p nl,
394 new_inst, old_sdx, new_sdx)
397 -- | Tries to allocate an instance on one given node.
398 allocateOnSingle :: Node.List -> Instance.Instance -> Node.Node
399 -> OpResult Node.AllocElement
400 allocateOnSingle nl inst p =
401 let new_pdx = Node.idx p
402 new_inst = Instance.setBoth inst new_pdx Node.noSecondary
403 in Node.addPri p inst >>= \new_p -> do
404 let new_nl = Container.add new_pdx new_p nl
405 new_score = compCV nl
406 return (new_nl, new_inst, [new_p], new_score)
408 -- | Tries to allocate an instance on a given pair of nodes.
409 allocateOnPair :: Node.List -> Instance.Instance -> Node.Node -> Node.Node
410 -> OpResult Node.AllocElement
411 allocateOnPair nl inst tgt_p tgt_s =
412 let new_pdx = Node.idx tgt_p
413 new_sdx = Node.idx tgt_s
415 new_p <- Node.addPri tgt_p inst
416 new_s <- Node.addSec tgt_s inst new_pdx
417 let new_inst = Instance.setBoth inst new_pdx new_sdx
418 new_nl = Container.addTwo new_pdx new_p new_sdx new_s nl
419 return (new_nl, new_inst, [new_p, new_s], compCV new_nl)
421 -- | Tries to perform an instance move and returns the best table
422 -- between the original one and the new one.
423 checkSingleStep :: Table -- ^ The original table
424 -> Instance.Instance -- ^ The instance to move
425 -> Table -- ^ The current best table
426 -> IMove -- ^ The move to apply
427 -> Table -- ^ The final best table
428 checkSingleStep ini_tbl target cur_tbl move =
430 Table ini_nl ini_il _ ini_plc = ini_tbl
431 tmp_resu = applyMove ini_nl target move
435 OpGood (upd_nl, new_inst, pri_idx, sec_idx) ->
436 let tgt_idx = Instance.idx target
437 upd_cvar = compCV upd_nl
438 upd_il = Container.add tgt_idx new_inst ini_il
439 upd_plc = (tgt_idx, pri_idx, sec_idx, move, upd_cvar):ini_plc
440 upd_tbl = Table upd_nl upd_il upd_cvar upd_plc
442 compareTables cur_tbl upd_tbl
444 -- | Given the status of the current secondary as a valid new node and
445 -- the current candidate target node, generate the possible moves for
447 possibleMoves :: Bool -- ^ Whether the secondary node is a valid new node
448 -> Ndx -- ^ Target node candidate
449 -> [IMove] -- ^ List of valid result moves
450 possibleMoves True tdx =
451 [ReplaceSecondary tdx,
452 ReplaceAndFailover tdx,
454 FailoverAndReplace tdx]
456 possibleMoves False tdx =
457 [ReplaceSecondary tdx,
458 ReplaceAndFailover tdx]
460 -- | Compute the best move for a given instance.
461 checkInstanceMove :: [Ndx] -- ^ Allowed target node indices
462 -> Bool -- ^ Whether disk moves are allowed
463 -> Table -- ^ Original table
464 -> Instance.Instance -- ^ Instance to move
465 -> Table -- ^ Best new table for this instance
466 checkInstanceMove nodes_idx disk_moves ini_tbl target =
468 opdx = Instance.pNode target
469 osdx = Instance.sNode target
470 nodes = filter (\idx -> idx /= opdx && idx /= osdx) nodes_idx
471 use_secondary = elem osdx nodes_idx
472 aft_failover = if use_secondary -- if allowed to failover
473 then checkSingleStep ini_tbl target ini_tbl Failover
475 all_moves = if disk_moves
476 then concatMap (possibleMoves use_secondary) nodes
479 -- iterate over the possible nodes for this instance
480 foldl' (checkSingleStep ini_tbl target) aft_failover all_moves
482 -- | Compute the best next move.
483 checkMove :: [Ndx] -- ^ Allowed target node indices
484 -> Bool -- ^ Whether disk moves are allowed
485 -> Table -- ^ The current solution
486 -> [Instance.Instance] -- ^ List of instances still to move
487 -> Table -- ^ The new solution
488 checkMove nodes_idx disk_moves ini_tbl victims =
489 let Table _ _ _ ini_plc = ini_tbl
490 -- iterate over all instances, computing the best move
494 compareTables step_tbl $
495 checkInstanceMove nodes_idx disk_moves ini_tbl em)
497 Table _ _ _ best_plc = best_tbl
498 in if length best_plc == length ini_plc
499 then ini_tbl -- no advancement
502 -- | Check if we are allowed to go deeper in the balancing
503 doNextBalance :: Table -- ^ The starting table
504 -> Int -- ^ Remaining length
505 -> Score -- ^ Score at which to stop
506 -> Bool -- ^ The resulting table and commands
507 doNextBalance ini_tbl max_rounds min_score =
508 let Table _ _ ini_cv ini_plc = ini_tbl
509 ini_plc_len = length ini_plc
510 in (max_rounds < 0 || ini_plc_len < max_rounds) && ini_cv > min_score
512 -- | Run a balance move
513 tryBalance :: Table -- ^ The starting table
514 -> Bool -- ^ Allow disk moves
515 -> Bool -- ^ Only evacuate moves
516 -> Score -- ^ Min gain threshold
517 -> Score -- ^ Min gain
518 -> Maybe Table -- ^ The resulting table and commands
519 tryBalance ini_tbl disk_moves evac_mode mg_limit min_gain =
520 let Table ini_nl ini_il ini_cv _ = ini_tbl
521 all_inst = Container.elems ini_il
522 all_inst' = if evac_mode
523 then let bad_nodes = map Node.idx . filter Node.offline $
524 Container.elems ini_nl
525 in filter (\e -> Instance.sNode e `elem` bad_nodes ||
526 Instance.pNode e `elem` bad_nodes)
529 reloc_inst = filter Instance.movable all_inst'
530 node_idx = map Node.idx . filter (not . Node.offline) $
531 Container.elems ini_nl
532 fin_tbl = checkMove node_idx disk_moves ini_tbl reloc_inst
533 (Table _ _ fin_cv _) = fin_tbl
535 if fin_cv < ini_cv && (ini_cv > mg_limit || ini_cv - fin_cv >= min_gain)
536 then Just fin_tbl -- this round made success, return the new table
539 -- * Allocation functions
541 -- | Build failure stats out of a list of failures
542 collapseFailures :: [FailMode] -> FailStats
543 collapseFailures flst =
544 map (\k -> (k, length $ filter (k ==) flst)) [minBound..maxBound]
546 -- | Update current Allocation solution and failure stats with new
548 concatAllocs :: AllocSolution -> OpResult Node.AllocElement -> AllocSolution
549 concatAllocs as (OpFail reason) = as { asFailures = reason : asFailures as }
551 concatAllocs as (OpGood ns@(_, _, _, nscore)) =
552 let -- Choose the old or new solution, based on the cluster score
554 osols = asSolutions as
555 nsols = case osols of
557 (_, _, _, oscore):[] ->
561 -- FIXME: here we simply concat to lists with more
562 -- than one element; we should instead abort, since
563 -- this is not a valid usage of this function
566 -- Note: we force evaluation of nsols here in order to keep the
567 -- memory profile low - we know that we will need nsols for sure
568 -- in the next cycle, so we force evaluation of nsols, since the
569 -- foldl' in the caller will only evaluate the tuple, but not the
570 -- elements of the tuple
571 in nsols `seq` nsuc `seq` as { asAllocs = nsuc, asSolutions = nsols }
573 -- | Given a solution, generates a reasonable description for it
574 describeSolution :: AllocSolution -> String
575 describeSolution as =
576 let fcnt = asFailures as
577 sols = asSolutions as
579 intercalate ", " . map (\(a, b) -> printf "%s: %d" (show a) b) .
580 filter ((> 0) . snd) . collapseFailures $ fcnt
582 then "No valid allocation solutions, failure reasons: " ++
584 then "unknown reasons"
586 else let (_, _, nodes, cv) = head sols
587 in printf ("score: %.8f, successes %d, failures %d (%s)" ++
588 " for node(s) %s") cv (asAllocs as) (length fcnt) freasons
589 (intercalate "/" . map Node.name $ nodes)
591 -- | Annotates a solution with the appropriate string
592 annotateSolution :: AllocSolution -> AllocSolution
593 annotateSolution as = as { asLog = describeSolution as : asLog as }
595 -- | Try to allocate an instance on the cluster.
596 tryAlloc :: (Monad m) =>
597 Node.List -- ^ The node list
598 -> Instance.List -- ^ The instance list
599 -> Instance.Instance -- ^ The instance to allocate
600 -> Int -- ^ Required number of nodes
601 -> m AllocSolution -- ^ Possible solution list
602 tryAlloc nl _ inst 2 =
603 let all_nodes = getOnline nl
604 all_pairs = liftM2 (,) all_nodes all_nodes
605 ok_pairs = filter (\(x, y) -> Node.idx x /= Node.idx y) all_pairs
606 sols = foldl' (\cstate (p, s) ->
607 concatAllocs cstate $ allocateOnPair nl inst p s
608 ) emptySolution ok_pairs
610 in if null ok_pairs -- means we have just one node
611 then fail "Not enough online nodes"
612 else return $ annotateSolution sols
614 tryAlloc nl _ inst 1 =
615 let all_nodes = getOnline nl
616 sols = foldl' (\cstate ->
617 concatAllocs cstate . allocateOnSingle nl inst
618 ) emptySolution all_nodes
620 then fail "No online nodes"
621 else return $ annotateSolution sols
623 tryAlloc _ _ _ reqn = fail $ "Unsupported number of allocation \
624 \destinations required (" ++ show reqn ++
625 "), only two supported"
627 -- | Given a group/result, describe it as a nice (list of) messages
628 solutionDescription :: (Gdx, Result AllocSolution) -> [String]
629 solutionDescription (groupId, result) =
631 Ok solution -> map (printf "Group %d: %s" groupId) (asLog solution)
632 Bad message -> [printf "Group %d: error %s" groupId message]
634 -- | From a list of possibly bad and possibly empty solutions, filter
635 -- only the groups with a valid result
636 filterMGResults :: [(Gdx, Result AllocSolution)] ->
637 [(Gdx, AllocSolution)]
639 filter (not . null . asSolutions . snd) .
640 map (\(y, Ok x) -> (y, x)) .
643 -- | Try to allocate an instance on a multi-group cluster.
644 tryMGAlloc :: Node.List -- ^ The node list
645 -> Instance.List -- ^ The instance list
646 -> Instance.Instance -- ^ The instance to allocate
647 -> Int -- ^ Required number of nodes
648 -> Result AllocSolution -- ^ Possible solution list
649 tryMGAlloc mgnl mgil inst cnt =
650 let groups = splitCluster mgnl mgil
651 -- TODO: currently we consider all groups preferred
652 sols = map (\(gid, (nl, il)) ->
653 (gid, tryAlloc nl il inst cnt)) groups::
654 [(Gdx, Result AllocSolution)]
655 all_msgs = concatMap solutionDescription sols
656 goodSols = filterMGResults sols
657 extractScore = \(_, _, _, x) -> x
658 solScore = extractScore . head . asSolutions . snd
659 sortedSols = sortBy (comparing solScore) goodSols
660 in if null sortedSols
661 then Bad $ intercalate ", " all_msgs
662 else let (final_group, final_sol) = head sortedSols
663 selmsg = "Selected group: " ++ show final_group
664 in Ok $ final_sol { asLog = selmsg:all_msgs }
666 -- | Try to relocate an instance on the cluster.
667 tryReloc :: (Monad m) =>
668 Node.List -- ^ The node list
669 -> Instance.List -- ^ The instance list
670 -> Idx -- ^ The index of the instance to move
671 -> Int -- ^ The number of nodes required
672 -> [Ndx] -- ^ Nodes which should not be used
673 -> m AllocSolution -- ^ Solution list
674 tryReloc nl il xid 1 ex_idx =
675 let all_nodes = getOnline nl
676 inst = Container.find xid il
677 ex_idx' = Instance.pNode inst:ex_idx
678 valid_nodes = filter (not . flip elem ex_idx' . Node.idx) all_nodes
679 valid_idxes = map Node.idx valid_nodes
680 sols1 = foldl' (\cstate x ->
683 applyMove nl inst (ReplaceSecondary x)
684 return (mnl, i, [Container.find x mnl],
686 in concatAllocs cstate em
687 ) emptySolution valid_idxes
690 tryReloc _ _ _ reqn _ = fail $ "Unsupported number of relocation \
691 \destinations required (" ++ show reqn ++
692 "), only one supported"
694 -- | Try to evacuate a list of nodes.
695 tryEvac :: (Monad m) =>
696 Node.List -- ^ The node list
697 -> Instance.List -- ^ The instance list
698 -> [Ndx] -- ^ Nodes to be evacuated
699 -> m AllocSolution -- ^ Solution list
700 tryEvac nl il ex_ndx =
701 let ex_nodes = map (`Container.find` nl) ex_ndx
702 all_insts = nub . concatMap Node.sList $ ex_nodes
704 (_, sol) <- foldM (\(nl', old_as) idx -> do
705 -- FIXME: hardcoded one node here
707 new_as <- tryReloc nl' il idx 1 ex_ndx
708 case asSolutions new_as of
709 csol@(nl'', _, _, _):_ ->
710 -- an individual relocation succeeded,
711 -- we kind of compose the data from
714 new_as { asSolutions =
715 csol:asSolutions old_as })
716 -- this relocation failed, so we fail
718 _ -> fail $ "Can't evacuate instance " ++
719 Instance.name (Container.find idx il) ++
720 ": " ++ describeSolution new_as
721 ) (nl, emptySolution) all_insts
722 return $ annotateSolution sol
724 -- | Recursively place instances on the cluster until we're out of space
725 iterateAlloc :: Node.List
729 -> [Instance.Instance]
730 -> Result (FailStats, Node.List, Instance.List,
732 iterateAlloc nl il newinst nreq ixes =
733 let depth = length ixes
734 newname = printf "new-%d" depth::String
735 newidx = length (Container.elems il) + depth
736 newi2 = Instance.setIdx (Instance.setName newinst newname) newidx
737 in case tryAlloc nl il newi2 nreq of
739 Ok (AllocSolution { asFailures = errs, asSolutions = sols3 }) ->
741 [] -> Ok (collapseFailures errs, nl, il, ixes)
742 (xnl, xi, _, _):[] ->
743 iterateAlloc xnl (Container.add newidx xi il)
744 newinst nreq $! (xi:ixes)
745 _ -> Bad "Internal error: multiple solutions for single\
748 tieredAlloc :: Node.List
752 -> [Instance.Instance]
753 -> Result (FailStats, Node.List, Instance.List,
755 tieredAlloc nl il newinst nreq ixes =
756 case iterateAlloc nl il newinst nreq ixes of
758 Ok (errs, nl', il', ixes') ->
759 case Instance.shrinkByType newinst . fst . last $
760 sortBy (comparing snd) errs of
761 Bad _ -> Ok (errs, nl', il', ixes')
763 tieredAlloc nl' il' newinst' nreq ixes'
765 -- * Formatting functions
767 -- | Given the original and final nodes, computes the relocation description.
768 computeMoves :: Instance.Instance -- ^ The instance to be moved
769 -> String -- ^ The instance name
770 -> IMove -- ^ The move being performed
771 -> String -- ^ New primary
772 -> String -- ^ New secondary
773 -> (String, [String])
774 -- ^ Tuple of moves and commands list; moves is containing
775 -- either @/f/@ for failover or @/r:name/@ for replace
776 -- secondary, while the command list holds gnt-instance
777 -- commands (without that prefix), e.g \"@failover instance1@\"
778 computeMoves i inam mv c d =
780 Failover -> ("f", [mig])
781 FailoverAndReplace _ -> (printf "f r:%s" d, [mig, rep d])
782 ReplaceSecondary _ -> (printf "r:%s" d, [rep d])
783 ReplaceAndFailover _ -> (printf "r:%s f" c, [rep c, mig])
784 ReplacePrimary _ -> (printf "f r:%s f" c, [mig, rep c, mig])
785 where morf = if Instance.running i then "migrate" else "failover"
786 mig = printf "%s -f %s" morf inam::String
787 rep n = printf "replace-disks -n %s %s" n inam
789 -- | Converts a placement to string format.
790 printSolutionLine :: Node.List -- ^ The node list
791 -> Instance.List -- ^ The instance list
792 -> Int -- ^ Maximum node name length
793 -> Int -- ^ Maximum instance name length
794 -> Placement -- ^ The current placement
795 -> Int -- ^ The index of the placement in
797 -> (String, [String])
798 printSolutionLine nl il nmlen imlen plc pos =
800 pmlen = (2*nmlen + 1)
801 (i, p, s, mv, c) = plc
802 inst = Container.find i il
803 inam = Instance.alias inst
804 npri = Node.alias $ Container.find p nl
805 nsec = Node.alias $ Container.find s nl
806 opri = Node.alias $ Container.find (Instance.pNode inst) nl
807 osec = Node.alias $ Container.find (Instance.sNode inst) nl
808 (moves, cmds) = computeMoves inst inam mv npri nsec
809 ostr = printf "%s:%s" opri osec::String
810 nstr = printf "%s:%s" npri nsec::String
812 (printf " %3d. %-*s %-*s => %-*s %.8f a=%s"
813 pos imlen inam pmlen ostr
817 -- | Return the instance and involved nodes in an instance move.
818 involvedNodes :: Instance.List -> Placement -> [Ndx]
819 involvedNodes il plc =
820 let (i, np, ns, _, _) = plc
821 inst = Container.find i il
822 op = Instance.pNode inst
823 os = Instance.sNode inst
824 in nub [np, ns, op, os]
826 -- | Inner function for splitJobs, that either appends the next job to
827 -- the current jobset, or starts a new jobset.
828 mergeJobs :: ([JobSet], [Ndx]) -> MoveJob -> ([JobSet], [Ndx])
829 mergeJobs ([], _) n@(ndx, _, _, _) = ([[n]], ndx)
830 mergeJobs (cjs@(j:js), nbuf) n@(ndx, _, _, _)
831 | null (ndx `intersect` nbuf) = ((n:j):js, ndx ++ nbuf)
832 | otherwise = ([n]:cjs, ndx)
834 -- | Break a list of moves into independent groups. Note that this
835 -- will reverse the order of jobs.
836 splitJobs :: [MoveJob] -> [JobSet]
837 splitJobs = fst . foldl mergeJobs ([], [])
839 -- | Given a list of commands, prefix them with @gnt-instance@ and
840 -- also beautify the display a little.
841 formatJob :: Int -> Int -> (Int, MoveJob) -> [String]
842 formatJob jsn jsl (sn, (_, _, _, cmds)) =
844 printf " echo job %d/%d" jsn sn:
846 map (" gnt-instance " ++) cmds
848 then ["", printf "echo jobset %d, %d jobs" jsn jsl] ++ out
851 -- | Given a list of commands, prefix them with @gnt-instance@ and
852 -- also beautify the display a little.
853 formatCmds :: [JobSet] -> String
856 concatMap (\(jsn, js) -> concatMap (formatJob jsn (length js))
860 -- | Print the node list.
861 printNodes :: Node.List -> [String] -> String
863 let fields = case fs of
864 [] -> Node.defaultFields
865 "+":rest -> Node.defaultFields ++ rest
867 snl = sortBy (comparing Node.idx) (Container.elems nl)
868 (header, isnum) = unzip $ map Node.showHeader fields
869 in unlines . map ((:) ' ' . intercalate " ") $
870 formatTable (header:map (Node.list fields) snl) isnum
872 -- | Print the instance list.
873 printInsts :: Node.List -> Instance.List -> String
875 let sil = sortBy (comparing Instance.idx) (Container.elems il)
876 helper inst = [ if Instance.running inst then "R" else " "
878 , Container.nameOf nl (Instance.pNode inst)
879 , let sdx = Instance.sNode inst
880 in if sdx == Node.noSecondary
882 else Container.nameOf nl sdx
883 , printf "%3d" $ Instance.vcpus inst
884 , printf "%5d" $ Instance.mem inst
885 , printf "%5d" $ Instance.dsk inst `div` 1024
891 where DynUtil lC lM lD lN = Instance.util inst
892 header = [ "F", "Name", "Pri_node", "Sec_node", "vcpu", "mem"
893 , "dsk", "lCpu", "lMem", "lDsk", "lNet" ]
894 isnum = False:False:False:False:repeat True
895 in unlines . map ((:) ' ' . intercalate " ") $
896 formatTable (header:map helper sil) isnum
898 -- | Shows statistics for a given node list.
899 printStats :: Node.List -> String
901 let dcvs = compDetailedCV nl
902 (weights, names) = unzip detailedCVInfo
903 hd = zip3 (weights ++ repeat 1) (names ++ repeat "unknown") dcvs
904 formatted = map (\(w, header, val) ->
905 printf "%s=%.8f(x%.2f)" header val w::String) hd
906 in intercalate ", " formatted
908 -- | Convert a placement into a list of OpCodes (basically a job).
909 iMoveToJob :: Node.List -> Instance.List
910 -> Idx -> IMove -> [OpCodes.OpCode]
911 iMoveToJob nl il idx move =
912 let inst = Container.find idx il
913 iname = Instance.name inst
914 lookNode = Just . Container.nameOf nl
915 opF = if Instance.running inst
916 then OpCodes.OpMigrateInstance iname True False
917 else OpCodes.OpFailoverInstance iname False
918 opR n = OpCodes.OpReplaceDisks iname (lookNode n)
919 OpCodes.ReplaceNewSecondary [] Nothing
922 ReplacePrimary np -> [ opF, opR np, opF ]
923 ReplaceSecondary ns -> [ opR ns ]
924 ReplaceAndFailover np -> [ opR np, opF ]
925 FailoverAndReplace ns -> [ opF, opR ns ]
927 -- | Computes the group of an instance
928 instanceGroup :: Node.List -> Instance.Instance -> Result Gdx
930 let sidx = Instance.sNode i
931 pnode = Container.find (Instance.pNode i) nl
932 snode = if sidx == Node.noSecondary
934 else Container.find sidx nl
935 pgroup = Node.group pnode
936 sgroup = Node.group snode
937 in if pgroup /= sgroup
938 then fail ("Instance placed accross two node groups, primary " ++
939 show pgroup ++ ", secondary " ++ show sgroup)
942 -- | Compute the list of badly allocated instances (split across node
944 findSplitInstances :: Node.List -> Instance.List -> [Instance.Instance]
945 findSplitInstances nl il =
946 filter (not . isOk . instanceGroup nl) (Container.elems il)
948 -- | Splits a cluster into the component node groups
949 splitCluster :: Node.List -> Instance.List ->
950 [(Gdx, (Node.List, Instance.List))]
952 let ngroups = Node.computeGroups (Container.elems nl)
953 in map (\(guuid, nodes) ->
954 let nidxs = map Node.idx nodes
955 nodes' = zip nidxs nodes
956 instances = Container.filter ((`elem` nidxs) . Instance.pNode) il
957 in (guuid, (Container.fromAssocList nodes', instances))) ngroups