1 {-| Implementation of cluster-wide logic.
3 This module holds all pure cluster-logic; I\/O related functionality
4 goes into the /Main/ module for the individual binaries.
10 Copyright (C) 2009, 2010, 2011 Google Inc.
12 This program is free software; you can redistribute it and/or modify
13 it under the terms of the GNU General Public License as published by
14 the Free Software Foundation; either version 2 of the License, or
15 (at your option) any later version.
17 This program is distributed in the hope that it will be useful, but
18 WITHOUT ANY WARRANTY; without even the implied warranty of
19 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 General Public License for more details.
22 You should have received a copy of the GNU General Public License
23 along with this program; if not, write to the Free Software
24 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
29 module Ganeti.HTools.Cluster
37 -- * Generic functions
39 , computeAllocationDelta
40 -- * First phase functions
42 -- * Second phase functions
47 -- * Display functions
50 -- * Balacing functions
59 -- * IAllocator functions
69 -- * Allocation functions
73 -- * Node group functions
79 import Data.Function (on)
80 import qualified Data.IntSet as IntSet
82 import Data.Maybe (fromJust)
83 import Data.Ord (comparing)
84 import Text.Printf (printf)
86 import Control.Parallel.Strategies
88 import qualified Ganeti.HTools.Container as Container
89 import qualified Ganeti.HTools.Instance as Instance
90 import qualified Ganeti.HTools.Node as Node
91 import qualified Ganeti.HTools.Group as Group
92 import Ganeti.HTools.Types
93 import Ganeti.HTools.Utils
94 import qualified Ganeti.OpCodes as OpCodes
98 -- | Allocation\/relocation solution.
99 data AllocSolution = AllocSolution
100 { asFailures :: [FailMode] -- ^ Failure counts
101 , asAllocs :: Int -- ^ Good allocation count
102 , asSolutions :: [Node.AllocElement] -- ^ The actual result, length
103 -- of the list depends on the
104 -- allocation/relocation mode
105 , asLog :: [String] -- ^ A list of informational messages
108 -- | Node evacuation/group change iallocator result type. This result
109 -- type consists of actual opcodes (a restricted subset) that are
110 -- transmitted back to Ganeti.
111 data EvacSolution = EvacSolution
112 { esMoved :: [String] -- ^ Instance moved successfully
113 , esFailed :: [String] -- ^ Instance which were not
115 , esOpCodes :: [[[OpCodes.OpCode]]] -- ^ List of lists of jobs
118 -- | Allocation results, as used in 'iterateAlloc' and 'tieredAlloc'.
119 type AllocResult = (FailStats, Node.List, Instance.List,
120 [Instance.Instance], [CStats])
122 -- | A type denoting the valid allocation mode/pairs.
124 -- For a one-node allocation, this will be a @Left ['Node.Node']@,
125 -- whereas for a two-node allocation, this will be a @Right
126 -- [('Node.Node', 'Node.Node')]@.
127 type AllocNodes = Either [Ndx] [(Ndx, Ndx)]
129 -- | The empty solution we start with when computing allocations.
130 emptyAllocSolution :: AllocSolution
131 emptyAllocSolution = AllocSolution { asFailures = [], asAllocs = 0
132 , asSolutions = [], asLog = [] }
134 -- | The empty evac solution.
135 emptyEvacSolution :: EvacSolution
136 emptyEvacSolution = EvacSolution { esMoved = []
141 -- | The complete state for the balancing solution.
142 data Table = Table Node.List Instance.List Score [Placement]
143 deriving (Show, Read)
145 data CStats = CStats { csFmem :: Integer -- ^ Cluster free mem
146 , csFdsk :: Integer -- ^ Cluster free disk
147 , csAmem :: Integer -- ^ Cluster allocatable mem
148 , csAdsk :: Integer -- ^ Cluster allocatable disk
149 , csAcpu :: Integer -- ^ Cluster allocatable cpus
150 , csMmem :: Integer -- ^ Max node allocatable mem
151 , csMdsk :: Integer -- ^ Max node allocatable disk
152 , csMcpu :: Integer -- ^ Max node allocatable cpu
153 , csImem :: Integer -- ^ Instance used mem
154 , csIdsk :: Integer -- ^ Instance used disk
155 , csIcpu :: Integer -- ^ Instance used cpu
156 , csTmem :: Double -- ^ Cluster total mem
157 , csTdsk :: Double -- ^ Cluster total disk
158 , csTcpu :: Double -- ^ Cluster total cpus
159 , csVcpu :: Integer -- ^ Cluster virtual cpus (if
160 -- node pCpu has been set,
162 , csXmem :: Integer -- ^ Unnacounted for mem
163 , csNmem :: Integer -- ^ Node own memory
164 , csScore :: Score -- ^ The cluster score
165 , csNinst :: Int -- ^ The total number of instances
167 deriving (Show, Read)
169 -- | Currently used, possibly to allocate, unallocable.
170 type AllocStats = (RSpec, RSpec, RSpec)
172 -- * Utility functions
174 -- | Verifies the N+1 status and return the affected nodes.
175 verifyN1 :: [Node.Node] -> [Node.Node]
176 verifyN1 = filter Node.failN1
178 {-| Computes the pair of bad nodes and instances.
180 The bad node list is computed via a simple 'verifyN1' check, and the
181 bad instance list is the list of primary and secondary instances of
185 computeBadItems :: Node.List -> Instance.List ->
186 ([Node.Node], [Instance.Instance])
187 computeBadItems nl il =
188 let bad_nodes = verifyN1 $ getOnline nl
189 bad_instances = map (`Container.find` il) .
191 concatMap (\ n -> Node.sList n ++ Node.pList n) bad_nodes
193 (bad_nodes, bad_instances)
195 -- | Zero-initializer for the CStats type.
196 emptyCStats :: CStats
197 emptyCStats = CStats 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
199 -- | Update stats with data from a new node.
200 updateCStats :: CStats -> Node.Node -> CStats
201 updateCStats cs node =
202 let CStats { csFmem = x_fmem, csFdsk = x_fdsk,
203 csAmem = x_amem, csAcpu = x_acpu, csAdsk = x_adsk,
204 csMmem = x_mmem, csMdsk = x_mdsk, csMcpu = x_mcpu,
205 csImem = x_imem, csIdsk = x_idsk, csIcpu = x_icpu,
206 csTmem = x_tmem, csTdsk = x_tdsk, csTcpu = x_tcpu,
208 csXmem = x_xmem, csNmem = x_nmem, csNinst = x_ninst
211 inc_amem = Node.fMem node - Node.rMem node
212 inc_amem' = if inc_amem > 0 then inc_amem else 0
213 inc_adsk = Node.availDisk node
214 inc_imem = truncate (Node.tMem node) - Node.nMem node
215 - Node.xMem node - Node.fMem node
216 inc_icpu = Node.uCpu node
217 inc_idsk = truncate (Node.tDsk node) - Node.fDsk node
218 inc_vcpu = Node.hiCpu node
219 inc_acpu = Node.availCpu node
221 in cs { csFmem = x_fmem + fromIntegral (Node.fMem node)
222 , csFdsk = x_fdsk + fromIntegral (Node.fDsk node)
223 , csAmem = x_amem + fromIntegral inc_amem'
224 , csAdsk = x_adsk + fromIntegral inc_adsk
225 , csAcpu = x_acpu + fromIntegral inc_acpu
226 , csMmem = max x_mmem (fromIntegral inc_amem')
227 , csMdsk = max x_mdsk (fromIntegral inc_adsk)
228 , csMcpu = max x_mcpu (fromIntegral inc_acpu)
229 , csImem = x_imem + fromIntegral inc_imem
230 , csIdsk = x_idsk + fromIntegral inc_idsk
231 , csIcpu = x_icpu + fromIntegral inc_icpu
232 , csTmem = x_tmem + Node.tMem node
233 , csTdsk = x_tdsk + Node.tDsk node
234 , csTcpu = x_tcpu + Node.tCpu node
235 , csVcpu = x_vcpu + fromIntegral inc_vcpu
236 , csXmem = x_xmem + fromIntegral (Node.xMem node)
237 , csNmem = x_nmem + fromIntegral (Node.nMem node)
238 , csNinst = x_ninst + length (Node.pList node)
241 -- | Compute the total free disk and memory in the cluster.
242 totalResources :: Node.List -> CStats
244 let cs = foldl' updateCStats emptyCStats . Container.elems $ nl
245 in cs { csScore = compCV nl }
247 -- | Compute the delta between two cluster state.
249 -- This is used when doing allocations, to understand better the
250 -- available cluster resources. The return value is a triple of the
251 -- current used values, the delta that was still allocated, and what
252 -- was left unallocated.
253 computeAllocationDelta :: CStats -> CStats -> AllocStats
254 computeAllocationDelta cini cfin =
255 let CStats {csImem = i_imem, csIdsk = i_idsk, csIcpu = i_icpu} = cini
256 CStats {csImem = f_imem, csIdsk = f_idsk, csIcpu = f_icpu,
257 csTmem = t_mem, csTdsk = t_dsk, csVcpu = v_cpu } = cfin
258 rini = RSpec (fromIntegral i_icpu) (fromIntegral i_imem)
259 (fromIntegral i_idsk)
260 rfin = RSpec (fromIntegral (f_icpu - i_icpu))
261 (fromIntegral (f_imem - i_imem))
262 (fromIntegral (f_idsk - i_idsk))
263 un_cpu = fromIntegral (v_cpu - f_icpu)::Int
264 runa = RSpec un_cpu (truncate t_mem - fromIntegral f_imem)
265 (truncate t_dsk - fromIntegral f_idsk)
266 in (rini, rfin, runa)
268 -- | The names and weights of the individual elements in the CV list.
269 detailedCVInfo :: [(Double, String)]
270 detailedCVInfo = [ (1, "free_mem_cv")
271 , (1, "free_disk_cv")
273 , (1, "reserved_mem_cv")
274 , (4, "offline_all_cnt")
275 , (16, "offline_pri_cnt")
276 , (1, "vcpu_ratio_cv")
279 , (1, "disk_load_cv")
281 , (2, "pri_tags_score")
284 detailedCVWeights :: [Double]
285 detailedCVWeights = map fst detailedCVInfo
287 -- | Compute the mem and disk covariance.
288 compDetailedCV :: [Node.Node] -> [Double]
289 compDetailedCV all_nodes =
291 (offline, nodes) = partition Node.offline all_nodes
292 mem_l = map Node.pMem nodes
293 dsk_l = map Node.pDsk nodes
294 -- metric: memory covariance
295 mem_cv = stdDev mem_l
296 -- metric: disk covariance
297 dsk_cv = stdDev dsk_l
298 -- metric: count of instances living on N1 failing nodes
299 n1_score = fromIntegral . sum . map (\n -> length (Node.sList n) +
300 length (Node.pList n)) .
301 filter Node.failN1 $ nodes :: Double
302 res_l = map Node.pRem nodes
303 -- metric: reserved memory covariance
304 res_cv = stdDev res_l
305 -- offline instances metrics
306 offline_ipri = sum . map (length . Node.pList) $ offline
307 offline_isec = sum . map (length . Node.sList) $ offline
308 -- metric: count of instances on offline nodes
309 off_score = fromIntegral (offline_ipri + offline_isec)::Double
310 -- metric: count of primary instances on offline nodes (this
311 -- helps with evacuation/failover of primary instances on
312 -- 2-node clusters with one node offline)
313 off_pri_score = fromIntegral offline_ipri::Double
314 cpu_l = map Node.pCpu nodes
315 -- metric: covariance of vcpu/pcpu ratio
316 cpu_cv = stdDev cpu_l
317 -- metrics: covariance of cpu, memory, disk and network load
318 (c_load, m_load, d_load, n_load) = unzip4 $
320 let DynUtil c1 m1 d1 n1 = Node.utilLoad n
321 DynUtil c2 m2 d2 n2 = Node.utilPool n
322 in (c1/c2, m1/m2, d1/d2, n1/n2)
324 -- metric: conflicting instance count
325 pri_tags_inst = sum $ map Node.conflictingPrimaries nodes
326 pri_tags_score = fromIntegral pri_tags_inst::Double
327 in [ mem_cv, dsk_cv, n1_score, res_cv, off_score, off_pri_score, cpu_cv
328 , stdDev c_load, stdDev m_load , stdDev d_load, stdDev n_load
331 -- | Compute the /total/ variance.
332 compCVNodes :: [Node.Node] -> Double
333 compCVNodes = sum . zipWith (*) detailedCVWeights . compDetailedCV
335 -- | Wrapper over 'compCVNodes' for callers that have a 'Node.List'.
336 compCV :: Node.List -> Double
337 compCV = compCVNodes . Container.elems
340 -- | Compute online nodes from a 'Node.List'.
341 getOnline :: Node.List -> [Node.Node]
342 getOnline = filter (not . Node.offline) . Container.elems
344 -- * Balancing functions
346 -- | Compute best table. Note that the ordering of the arguments is important.
347 compareTables :: Table -> Table -> Table
348 compareTables a@(Table _ _ a_cv _) b@(Table _ _ b_cv _ ) =
349 if a_cv > b_cv then b else a
351 -- | Applies an instance move to a given node list and instance.
352 applyMove :: Node.List -> Instance.Instance
353 -> IMove -> OpResult (Node.List, Instance.Instance, Ndx, Ndx)
355 applyMove nl inst Failover =
356 let old_pdx = Instance.pNode inst
357 old_sdx = Instance.sNode inst
358 old_p = Container.find old_pdx nl
359 old_s = Container.find old_sdx nl
360 int_p = Node.removePri old_p inst
361 int_s = Node.removeSec old_s inst
362 force_p = Node.offline old_p
363 new_nl = do -- Maybe monad
364 new_p <- Node.addPriEx force_p int_s inst
365 new_s <- Node.addSec int_p inst old_sdx
366 let new_inst = Instance.setBoth inst old_sdx old_pdx
367 return (Container.addTwo old_pdx new_s old_sdx new_p nl,
368 new_inst, old_sdx, old_pdx)
371 -- Replace the primary (f:, r:np, f)
372 applyMove nl inst (ReplacePrimary new_pdx) =
373 let old_pdx = Instance.pNode inst
374 old_sdx = Instance.sNode inst
375 old_p = Container.find old_pdx nl
376 old_s = Container.find old_sdx nl
377 tgt_n = Container.find new_pdx nl
378 int_p = Node.removePri old_p inst
379 int_s = Node.removeSec old_s inst
380 force_p = Node.offline old_p
381 new_nl = do -- Maybe monad
382 -- check that the current secondary can host the instance
383 -- during the migration
384 tmp_s <- Node.addPriEx force_p int_s inst
385 let tmp_s' = Node.removePri tmp_s inst
386 new_p <- Node.addPriEx force_p tgt_n inst
387 new_s <- Node.addSecEx force_p tmp_s' inst new_pdx
388 let new_inst = Instance.setPri inst new_pdx
389 return (Container.add new_pdx new_p $
390 Container.addTwo old_pdx int_p old_sdx new_s nl,
391 new_inst, new_pdx, old_sdx)
394 -- Replace the secondary (r:ns)
395 applyMove nl inst (ReplaceSecondary new_sdx) =
396 let old_pdx = Instance.pNode inst
397 old_sdx = Instance.sNode inst
398 old_s = Container.find old_sdx nl
399 tgt_n = Container.find new_sdx nl
400 int_s = Node.removeSec old_s inst
401 force_s = Node.offline old_s
402 new_inst = Instance.setSec inst new_sdx
403 new_nl = Node.addSecEx force_s tgt_n inst old_pdx >>=
404 \new_s -> return (Container.addTwo new_sdx
405 new_s old_sdx int_s nl,
406 new_inst, old_pdx, new_sdx)
409 -- Replace the secondary and failover (r:np, f)
410 applyMove nl inst (ReplaceAndFailover new_pdx) =
411 let old_pdx = Instance.pNode inst
412 old_sdx = Instance.sNode inst
413 old_p = Container.find old_pdx nl
414 old_s = Container.find old_sdx nl
415 tgt_n = Container.find new_pdx nl
416 int_p = Node.removePri old_p inst
417 int_s = Node.removeSec old_s inst
418 force_s = Node.offline old_s
419 new_nl = do -- Maybe monad
420 new_p <- Node.addPri tgt_n inst
421 new_s <- Node.addSecEx force_s int_p inst new_pdx
422 let new_inst = Instance.setBoth inst new_pdx old_pdx
423 return (Container.add new_pdx new_p $
424 Container.addTwo old_pdx new_s old_sdx int_s nl,
425 new_inst, new_pdx, old_pdx)
428 -- Failver and replace the secondary (f, r:ns)
429 applyMove nl inst (FailoverAndReplace new_sdx) =
430 let old_pdx = Instance.pNode inst
431 old_sdx = Instance.sNode inst
432 old_p = Container.find old_pdx nl
433 old_s = Container.find old_sdx nl
434 tgt_n = Container.find new_sdx nl
435 int_p = Node.removePri old_p inst
436 int_s = Node.removeSec old_s inst
437 force_p = Node.offline old_p
438 new_nl = do -- Maybe monad
439 new_p <- Node.addPriEx force_p int_s inst
440 new_s <- Node.addSecEx force_p tgt_n inst old_sdx
441 let new_inst = Instance.setBoth inst old_sdx new_sdx
442 return (Container.add new_sdx new_s $
443 Container.addTwo old_sdx new_p old_pdx int_p nl,
444 new_inst, old_sdx, new_sdx)
447 -- | Tries to allocate an instance on one given node.
448 allocateOnSingle :: Node.List -> Instance.Instance -> Ndx
449 -> OpResult Node.AllocElement
450 allocateOnSingle nl inst new_pdx =
451 let p = Container.find new_pdx nl
452 new_inst = Instance.setBoth inst new_pdx Node.noSecondary
453 in Node.addPri p inst >>= \new_p -> do
454 let new_nl = Container.add new_pdx new_p nl
455 new_score = compCV nl
456 return (new_nl, new_inst, [new_p], new_score)
458 -- | Tries to allocate an instance on a given pair of nodes.
459 allocateOnPair :: Node.List -> Instance.Instance -> Ndx -> Ndx
460 -> OpResult Node.AllocElement
461 allocateOnPair nl inst new_pdx new_sdx =
462 let tgt_p = Container.find new_pdx nl
463 tgt_s = Container.find new_sdx nl
465 new_p <- Node.addPri tgt_p inst
466 new_s <- Node.addSec tgt_s inst new_pdx
467 let new_inst = Instance.setBoth inst new_pdx new_sdx
468 new_nl = Container.addTwo new_pdx new_p new_sdx new_s nl
469 return (new_nl, new_inst, [new_p, new_s], compCV new_nl)
471 -- | Tries to perform an instance move and returns the best table
472 -- between the original one and the new one.
473 checkSingleStep :: Table -- ^ The original table
474 -> Instance.Instance -- ^ The instance to move
475 -> Table -- ^ The current best table
476 -> IMove -- ^ The move to apply
477 -> Table -- ^ The final best table
478 checkSingleStep ini_tbl target cur_tbl move =
480 Table ini_nl ini_il _ ini_plc = ini_tbl
481 tmp_resu = applyMove ini_nl target move
485 OpGood (upd_nl, new_inst, pri_idx, sec_idx) ->
486 let tgt_idx = Instance.idx target
487 upd_cvar = compCV upd_nl
488 upd_il = Container.add tgt_idx new_inst ini_il
489 upd_plc = (tgt_idx, pri_idx, sec_idx, move, upd_cvar):ini_plc
490 upd_tbl = Table upd_nl upd_il upd_cvar upd_plc
492 compareTables cur_tbl upd_tbl
494 -- | Given the status of the current secondary as a valid new node and
495 -- the current candidate target node, generate the possible moves for
497 possibleMoves :: Bool -- ^ Whether the secondary node is a valid new node
498 -> Bool -- ^ Whether we can change the primary node
499 -> Ndx -- ^ Target node candidate
500 -> [IMove] -- ^ List of valid result moves
502 possibleMoves _ False tdx =
503 [ReplaceSecondary tdx]
505 possibleMoves True True tdx =
506 [ReplaceSecondary tdx,
507 ReplaceAndFailover tdx,
509 FailoverAndReplace tdx]
511 possibleMoves False True tdx =
512 [ReplaceSecondary tdx,
513 ReplaceAndFailover tdx]
515 -- | Compute the best move for a given instance.
516 checkInstanceMove :: [Ndx] -- ^ Allowed target node indices
517 -> Bool -- ^ Whether disk moves are allowed
518 -> Bool -- ^ Whether instance moves are allowed
519 -> Table -- ^ Original table
520 -> Instance.Instance -- ^ Instance to move
521 -> Table -- ^ Best new table for this instance
522 checkInstanceMove nodes_idx disk_moves inst_moves ini_tbl target =
524 opdx = Instance.pNode target
525 osdx = Instance.sNode target
526 nodes = filter (\idx -> idx /= opdx && idx /= osdx) nodes_idx
527 use_secondary = elem osdx nodes_idx && inst_moves
528 aft_failover = if use_secondary -- if allowed to failover
529 then checkSingleStep ini_tbl target ini_tbl Failover
531 all_moves = if disk_moves
533 (possibleMoves use_secondary inst_moves) nodes
536 -- iterate over the possible nodes for this instance
537 foldl' (checkSingleStep ini_tbl target) aft_failover all_moves
539 -- | Compute the best next move.
540 checkMove :: [Ndx] -- ^ Allowed target node indices
541 -> Bool -- ^ Whether disk moves are allowed
542 -> Bool -- ^ Whether instance moves are allowed
543 -> Table -- ^ The current solution
544 -> [Instance.Instance] -- ^ List of instances still to move
545 -> Table -- ^ The new solution
546 checkMove nodes_idx disk_moves inst_moves ini_tbl victims =
547 let Table _ _ _ ini_plc = ini_tbl
548 -- we're using rwhnf from the Control.Parallel.Strategies
549 -- package; we don't need to use rnf as that would force too
550 -- much evaluation in single-threaded cases, and in
551 -- multi-threaded case the weak head normal form is enough to
552 -- spark the evaluation
553 tables = parMap rwhnf (checkInstanceMove nodes_idx disk_moves
556 -- iterate over all instances, computing the best move
557 best_tbl = foldl' compareTables ini_tbl tables
558 Table _ _ _ best_plc = best_tbl
559 in if length best_plc == length ini_plc
560 then ini_tbl -- no advancement
563 -- | Check if we are allowed to go deeper in the balancing.
564 doNextBalance :: Table -- ^ The starting table
565 -> Int -- ^ Remaining length
566 -> Score -- ^ Score at which to stop
567 -> Bool -- ^ The resulting table and commands
568 doNextBalance ini_tbl max_rounds min_score =
569 let Table _ _ ini_cv ini_plc = ini_tbl
570 ini_plc_len = length ini_plc
571 in (max_rounds < 0 || ini_plc_len < max_rounds) && ini_cv > min_score
573 -- | Run a balance move.
574 tryBalance :: Table -- ^ The starting table
575 -> Bool -- ^ Allow disk moves
576 -> Bool -- ^ Allow instance moves
577 -> Bool -- ^ Only evacuate moves
578 -> Score -- ^ Min gain threshold
579 -> Score -- ^ Min gain
580 -> Maybe Table -- ^ The resulting table and commands
581 tryBalance ini_tbl disk_moves inst_moves evac_mode mg_limit min_gain =
582 let Table ini_nl ini_il ini_cv _ = ini_tbl
583 all_inst = Container.elems ini_il
584 all_inst' = if evac_mode
585 then let bad_nodes = map Node.idx . filter Node.offline $
586 Container.elems ini_nl
587 in filter (\e -> Instance.sNode e `elem` bad_nodes ||
588 Instance.pNode e `elem` bad_nodes)
591 reloc_inst = filter Instance.movable all_inst'
592 node_idx = map Node.idx . filter (not . Node.offline) $
593 Container.elems ini_nl
594 fin_tbl = checkMove node_idx disk_moves inst_moves ini_tbl reloc_inst
595 (Table _ _ fin_cv _) = fin_tbl
597 if fin_cv < ini_cv && (ini_cv > mg_limit || ini_cv - fin_cv >= min_gain)
598 then Just fin_tbl -- this round made success, return the new table
601 -- * Allocation functions
603 -- | Build failure stats out of a list of failures.
604 collapseFailures :: [FailMode] -> FailStats
605 collapseFailures flst =
606 map (\k -> (k, foldl' (\a e -> if e == k then a + 1 else a) 0 flst))
609 -- | Update current Allocation solution and failure stats with new
611 concatAllocs :: AllocSolution -> OpResult Node.AllocElement -> AllocSolution
612 concatAllocs as (OpFail reason) = as { asFailures = reason : asFailures as }
614 concatAllocs as (OpGood ns@(_, _, _, nscore)) =
615 let -- Choose the old or new solution, based on the cluster score
617 osols = asSolutions as
618 nsols = case osols of
620 (_, _, _, oscore):[] ->
624 -- FIXME: here we simply concat to lists with more
625 -- than one element; we should instead abort, since
626 -- this is not a valid usage of this function
629 -- Note: we force evaluation of nsols here in order to keep the
630 -- memory profile low - we know that we will need nsols for sure
631 -- in the next cycle, so we force evaluation of nsols, since the
632 -- foldl' in the caller will only evaluate the tuple, but not the
633 -- elements of the tuple
634 in nsols `seq` nsuc `seq` as { asAllocs = nsuc, asSolutions = nsols }
636 -- | Sums two allocation solutions (e.g. for two separate node groups).
637 sumAllocs :: AllocSolution -> AllocSolution -> AllocSolution
638 sumAllocs (AllocSolution af aa as al) (AllocSolution bf ba bs bl) =
639 AllocSolution (af ++ bf) (aa + ba) (as ++ bs) (al ++ bl)
641 -- | Given a solution, generates a reasonable description for it.
642 describeSolution :: AllocSolution -> String
643 describeSolution as =
644 let fcnt = asFailures as
645 sols = asSolutions as
647 intercalate ", " . map (\(a, b) -> printf "%s: %d" (show a) b) .
648 filter ((> 0) . snd) . collapseFailures $ fcnt
650 then "No valid allocation solutions, failure reasons: " ++
652 then "unknown reasons"
654 else let (_, _, nodes, cv) = head sols
655 in printf ("score: %.8f, successes %d, failures %d (%s)" ++
656 " for node(s) %s") cv (asAllocs as) (length fcnt) freasons
657 (intercalate "/" . map Node.name $ nodes)
659 -- | Annotates a solution with the appropriate string.
660 annotateSolution :: AllocSolution -> AllocSolution
661 annotateSolution as = as { asLog = describeSolution as : asLog as }
663 -- | Reverses an evacuation solution.
665 -- Rationale: we always concat the results to the top of the lists, so
666 -- for proper jobset execution, we should reverse all lists.
667 reverseEvacSolution :: EvacSolution -> EvacSolution
668 reverseEvacSolution (EvacSolution f m o) =
669 EvacSolution (reverse f) (reverse m) (reverse o)
671 -- | Generate the valid node allocation singles or pairs for a new instance.
672 genAllocNodes :: Group.List -- ^ Group list
673 -> Node.List -- ^ The node map
674 -> Int -- ^ The number of nodes required
675 -> Bool -- ^ Whether to drop or not
677 -> Result AllocNodes -- ^ The (monadic) result
678 genAllocNodes gl nl count drop_unalloc =
679 let filter_fn = if drop_unalloc
680 then filter (Group.isAllocable .
681 flip Container.find gl . Node.group)
683 all_nodes = filter_fn $ getOnline nl
684 all_pairs = liftM2 (,) all_nodes all_nodes
685 ok_pairs = filter (\(x, y) -> Node.idx x /= Node.idx y &&
686 Node.group x == Node.group y) all_pairs
688 1 -> Ok (Left (map Node.idx all_nodes))
689 2 -> Ok (Right (map (\(p, s) -> (Node.idx p, Node.idx s)) ok_pairs))
690 _ -> Bad "Unsupported number of nodes, only one or two supported"
692 -- | Try to allocate an instance on the cluster.
693 tryAlloc :: (Monad m) =>
694 Node.List -- ^ The node list
695 -> Instance.List -- ^ The instance list
696 -> Instance.Instance -- ^ The instance to allocate
697 -> AllocNodes -- ^ The allocation targets
698 -> m AllocSolution -- ^ Possible solution list
699 tryAlloc nl _ inst (Right ok_pairs) =
700 let sols = foldl' (\cstate (p, s) ->
701 concatAllocs cstate $ allocateOnPair nl inst p s
702 ) emptyAllocSolution ok_pairs
704 in if null ok_pairs -- means we have just one node
705 then fail "Not enough online nodes"
706 else return $ annotateSolution sols
708 tryAlloc nl _ inst (Left all_nodes) =
709 let sols = foldl' (\cstate ->
710 concatAllocs cstate . allocateOnSingle nl inst
711 ) emptyAllocSolution all_nodes
713 then fail "No online nodes"
714 else return $ annotateSolution sols
716 -- | Given a group/result, describe it as a nice (list of) messages.
717 solutionDescription :: Group.List -> (Gdx, Result AllocSolution) -> [String]
718 solutionDescription gl (groupId, result) =
720 Ok solution -> map (printf "Group %s (%s): %s" gname pol) (asLog solution)
721 Bad message -> [printf "Group %s: error %s" gname message]
722 where grp = Container.find groupId gl
723 gname = Group.name grp
724 pol = apolToString (Group.allocPolicy grp)
726 -- | From a list of possibly bad and possibly empty solutions, filter
727 -- only the groups with a valid result. Note that the result will be
728 -- reversed compared to the original list.
729 filterMGResults :: Group.List
730 -> [(Gdx, Result AllocSolution)]
731 -> [(Gdx, AllocSolution)]
732 filterMGResults gl = foldl' fn []
733 where unallocable = not . Group.isAllocable . flip Container.find gl
734 fn accu (gdx, rasol) =
737 Ok sol | null (asSolutions sol) -> accu
738 | unallocable gdx -> accu
739 | otherwise -> (gdx, sol):accu
741 -- | Sort multigroup results based on policy and score.
742 sortMGResults :: Group.List
743 -> [(Gdx, AllocSolution)]
744 -> [(Gdx, AllocSolution)]
745 sortMGResults gl sols =
746 let extractScore (_, _, _, x) = x
747 solScore (gdx, sol) = (Group.allocPolicy (Container.find gdx gl),
748 (extractScore . head . asSolutions) sol)
749 in sortBy (comparing solScore) sols
751 -- | Finds the best group for an instance on a multi-group cluster.
752 findBestAllocGroup :: Group.List -- ^ The group list
753 -> Node.List -- ^ The node list
754 -> Instance.List -- ^ The instance list
755 -> Instance.Instance -- ^ The instance to allocate
756 -> Int -- ^ Required number of nodes
757 -> Result (Gdx, AllocSolution, [String])
758 findBestAllocGroup mggl mgnl mgil inst cnt =
759 let groups = splitCluster mgnl mgil
760 sols = map (\(gid, (nl, il)) ->
761 (gid, genAllocNodes mggl nl cnt False >>=
762 tryAlloc nl il inst))
763 groups::[(Gdx, Result AllocSolution)]
764 all_msgs = concatMap (solutionDescription mggl) sols
765 goodSols = filterMGResults mggl sols
766 sortedSols = sortMGResults mggl goodSols
767 in if null sortedSols
768 then Bad $ intercalate ", " all_msgs
769 else let (final_group, final_sol) = head sortedSols
770 in return (final_group, final_sol, all_msgs)
772 -- | Try to allocate an instance on a multi-group cluster.
773 tryMGAlloc :: Group.List -- ^ The group list
774 -> Node.List -- ^ The node list
775 -> Instance.List -- ^ The instance list
776 -> Instance.Instance -- ^ The instance to allocate
777 -> Int -- ^ Required number of nodes
778 -> Result AllocSolution -- ^ Possible solution list
779 tryMGAlloc mggl mgnl mgil inst cnt = do
780 (best_group, solution, all_msgs) <-
781 findBestAllocGroup mggl mgnl mgil inst cnt
782 let group_name = Group.name $ Container.find best_group mggl
783 selmsg = "Selected group: " ++ group_name
784 return $ solution { asLog = selmsg:all_msgs }
786 -- | Try to relocate an instance on the cluster.
787 tryReloc :: (Monad m) =>
788 Node.List -- ^ The node list
789 -> Instance.List -- ^ The instance list
790 -> Idx -- ^ The index of the instance to move
791 -> Int -- ^ The number of nodes required
792 -> [Ndx] -- ^ Nodes which should not be used
793 -> m AllocSolution -- ^ Solution list
794 tryReloc nl il xid 1 ex_idx =
795 let all_nodes = getOnline nl
796 inst = Container.find xid il
797 ex_idx' = Instance.pNode inst:ex_idx
798 valid_nodes = filter (not . flip elem ex_idx' . Node.idx) all_nodes
799 valid_idxes = map Node.idx valid_nodes
800 sols1 = foldl' (\cstate x ->
803 applyMove nl inst (ReplaceSecondary x)
804 return (mnl, i, [Container.find x mnl],
806 in concatAllocs cstate em
807 ) emptyAllocSolution valid_idxes
810 tryReloc _ _ _ reqn _ = fail $ "Unsupported number of relocation \
811 \destinations required (" ++ show reqn ++
812 "), only one supported"
814 tryMGReloc :: (Monad m) =>
815 Group.List -- ^ The group list
816 -> Node.List -- ^ The node list
817 -> Instance.List -- ^ The instance list
818 -> Idx -- ^ The index of the instance to move
819 -> Int -- ^ The number of nodes required
820 -> [Ndx] -- ^ Nodes which should not be used
821 -> m AllocSolution -- ^ Solution list
822 tryMGReloc _ mgnl mgil xid ncount ex_ndx = do
823 let groups = splitCluster mgnl mgil
824 -- TODO: we only relocate inside the group for now
825 inst = Container.find xid mgil
826 (nl, il) <- case lookup (instancePriGroup mgnl inst) groups of
827 Nothing -> fail $ "Cannot find group for instance " ++
830 tryReloc nl il xid ncount ex_ndx
832 -- | Change an instance's secondary node.
833 evacInstance :: (Monad m) =>
834 [Ndx] -- ^ Excluded nodes
835 -> Instance.List -- ^ The current instance list
836 -> (Node.List, AllocSolution) -- ^ The current state
837 -> Idx -- ^ The instance to evacuate
838 -> m (Node.List, AllocSolution)
839 evacInstance ex_ndx il (nl, old_as) idx = do
840 -- FIXME: hardcoded one node here
842 -- Longer explanation: evacuation is currently hardcoded to DRBD
843 -- instances (which have one secondary); hence, even if the
844 -- IAllocator protocol can request N nodes for an instance, and all
845 -- the message parsing/loading pass this, this implementation only
846 -- supports one; this situation needs to be revisited if we ever
847 -- support more than one secondary, or if we change the storage
849 new_as <- tryReloc nl il idx 1 ex_ndx
850 case asSolutions new_as of
851 -- an individual relocation succeeded, we kind of compose the data
852 -- from the two solutions
853 csol@(nl', _, _, _):_ ->
854 return (nl', new_as { asSolutions = csol:asSolutions old_as })
855 -- this relocation failed, so we fail the entire evac
856 _ -> fail $ "Can't evacuate instance " ++
857 Instance.name (Container.find idx il) ++
858 ": " ++ describeSolution new_as
860 -- | Try to evacuate a list of nodes.
861 tryEvac :: (Monad m) =>
862 Node.List -- ^ The node list
863 -> Instance.List -- ^ The instance list
864 -> [Idx] -- ^ Instances to be evacuated
865 -> [Ndx] -- ^ Restricted nodes (the ones being evacuated)
866 -> m AllocSolution -- ^ Solution list
867 tryEvac nl il idxs ex_ndx = do
868 (_, sol) <- foldM (evacInstance ex_ndx il) (nl, emptyAllocSolution) idxs
871 -- | Multi-group evacuation of a list of nodes.
872 tryMGEvac :: (Monad m) =>
873 Group.List -- ^ The group list
874 -> Node.List -- ^ The node list
875 -> Instance.List -- ^ The instance list
876 -> [Ndx] -- ^ Nodes to be evacuated
877 -> m AllocSolution -- ^ Solution list
878 tryMGEvac _ nl il ex_ndx =
879 let ex_nodes = map (`Container.find` nl) ex_ndx
880 all_insts = nub . concatMap Node.sList $ ex_nodes
881 all_insts' = associateIdxs all_insts $ splitCluster nl il
883 results <- mapM (\(_, (gnl, gil, idxs)) -> tryEvac gnl gil idxs ex_ndx)
885 let sol = foldl' sumAllocs emptyAllocSolution results
886 return $ annotateSolution sol
888 -- | Function which fails if the requested mode is change secondary.
890 -- This is useful since except DRBD, no other disk template can
891 -- execute change secondary; thus, we can just call this function
892 -- instead of always checking for secondary mode. After the call to
893 -- this function, whatever mode we have is just a primary change.
894 failOnSecondaryChange :: (Monad m) => EvacMode -> DiskTemplate -> m ()
895 failOnSecondaryChange ChangeSecondary dt =
896 fail $ "Instances with disk template '" ++ dtToString dt ++
897 "' can't execute change secondary"
898 failOnSecondaryChange _ _ = return ()
900 -- | Run evacuation for a single instance.
901 nodeEvacInstance :: Node.List -- ^ The node list (cluster-wide)
902 -> Instance.List -- ^ Instance list (cluster-wide)
903 -> EvacMode -- ^ The evacuation mode
904 -> Instance.Instance -- ^ The instance to be evacuated
905 -> [Ndx] -- ^ The list of available nodes
907 -> Result (Node.List, Instance.List, [OpCodes.OpCode])
908 nodeEvacInstance _ _ mode (Instance.Instance
909 {Instance.diskTemplate = dt@DTDiskless}) _ =
910 failOnSecondaryChange mode dt >>
911 fail "Diskless relocations not implemented yet"
913 nodeEvacInstance _ _ _ (Instance.Instance
914 {Instance.diskTemplate = DTPlain}) _ =
915 fail "Instances of type plain cannot be relocated"
917 nodeEvacInstance _ _ _ (Instance.Instance
918 {Instance.diskTemplate = DTFile}) _ =
919 fail "Instances of type file cannot be relocated"
921 nodeEvacInstance _ _ mode (Instance.Instance
922 {Instance.diskTemplate = dt@DTSharedFile}) _ =
923 failOnSecondaryChange mode dt >>
924 fail "Shared file relocations not implemented yet"
926 nodeEvacInstance _ _ mode (Instance.Instance
927 {Instance.diskTemplate = dt@DTBlock}) _ =
928 failOnSecondaryChange mode dt >>
929 fail "Block device relocations not implemented yet"
931 nodeEvacInstance nl il ChangePrimary
932 inst@(Instance.Instance {Instance.diskTemplate = DTDrbd8}) _ =
934 (nl', inst', _, _) <- opToResult $ applyMove nl inst Failover
935 let idx = Instance.idx inst
936 il' = Container.add idx inst' il
937 ops = iMoveToJob nl' il' idx Failover
938 return (nl', il', ops)
940 nodeEvacInstance nl il ChangeSecondary
941 inst@(Instance.Instance {Instance.diskTemplate = DTDrbd8})
944 let gdx = instancePriGroup nl inst
945 (nl', inst', _, ndx) <- annotateResult "Can't find any good node" $
947 foldl' (evacDrbdSecondaryInner nl inst gdx)
948 (Left "no nodes available") avail_nodes
949 let idx = Instance.idx inst
950 il' = Container.add idx inst' il
951 ops = iMoveToJob nl' il' idx (ReplaceSecondary ndx)
952 return (nl', il', ops)
954 nodeEvacInstance nl il ChangeAll
955 inst@(Instance.Instance {Instance.diskTemplate = DTDrbd8})
958 let primary = Container.find (Instance.pNode inst) nl
959 idx = Instance.idx inst
960 gdx = instancePriGroup nl inst
961 no_nodes = Left "no nodes available"
962 -- if the primary is offline, then we first failover
963 (nl1, inst1, ops1) <-
964 if Node.offline primary
966 (nl', inst', _, _) <-
967 annotateResult "Failing over to the secondary" $
968 opToResult $ applyMove nl inst Failover
969 return (nl', inst', [Failover])
970 else return (nl, inst, [])
971 -- we now need to execute a replace secondary to the future
973 (nl2, inst2, _, new_pdx) <- annotateResult "Searching for a new primary" $
975 foldl' (evacDrbdSecondaryInner nl1 inst1 gdx)
977 let ops2 = ReplaceSecondary new_pdx:ops1
978 -- since we chose the new primary, we remove it from the list of
980 let avail_nodes_sec = new_pdx `delete` avail_nodes
981 -- we now execute another failover, the primary stays fixed now
982 (nl3, inst3, _, _) <- annotateResult "Failing over to new primary" $
983 opToResult $ applyMove nl2 inst2 Failover
984 let ops3 = Failover:ops2
985 -- and finally another replace secondary, to the final secondary
986 (nl4, inst4, _, new_sdx) <-
987 annotateResult "Searching for a new secondary" $
989 foldl' (evacDrbdSecondaryInner nl3 inst3 gdx) no_nodes avail_nodes_sec
990 let ops4 = ReplaceSecondary new_sdx:ops3
991 il' = Container.add idx inst4 il
992 ops = concatMap (iMoveToJob nl4 il' idx) $ reverse ops4
993 return (nl4, il', ops)
995 -- | Inner fold function for changing secondary of a DRBD instance.
997 -- The "running" solution is either a @Left String@, which means we
998 -- don't have yet a working solution, or a @Right (...)@, which
999 -- represents a valid solution; it holds the modified node list, the
1000 -- modified instance (after evacuation), the score of that solution,
1001 -- and the new secondary node index.
1002 evacDrbdSecondaryInner :: Node.List -- ^ Cluster node list
1003 -> Instance.Instance -- ^ Instance being evacuated
1004 -> Gdx -- ^ The group index of the instance
1005 -> Either String ( Node.List
1008 , Ndx) -- ^ Current best solution
1009 -> Ndx -- ^ Node we're evaluating as new secondary
1010 -> Either String ( Node.List
1013 , Ndx) -- ^ New best solution
1014 evacDrbdSecondaryInner nl inst gdx accu ndx =
1015 case applyMove nl inst (ReplaceSecondary ndx) of
1019 Left _ -> Left $ "Node " ++ Container.nameOf nl ndx ++
1020 " failed: " ++ show fm
1021 OpGood (nl', inst', _, _) ->
1022 let nodes = Container.elems nl'
1023 -- The fromJust below is ugly (it can fail nastily), but
1024 -- at this point we should have any internal mismatches,
1025 -- and adding a monad here would be quite involved
1026 grpnodes = fromJust (gdx `lookup` (Node.computeGroups nodes))
1027 new_cv = compCVNodes grpnodes
1028 new_accu = Right (nl', inst', new_cv, ndx)
1031 Right (_, _, old_cv, _) ->
1036 -- | Computes the local nodes of a given instance which are available
1038 availableLocalNodes :: Node.List
1041 -> Instance.Instance
1043 availableLocalNodes nl group_nodes excl_ndx inst = do
1044 let gdx = instancePriGroup nl inst
1045 local_nodes <- maybe (Bad $ "Can't find group with index " ++ show gdx)
1046 Ok (lookup gdx group_nodes)
1047 let avail_nodes = filter (not . flip IntSet.member excl_ndx) local_nodes
1050 -- | Updates the evac solution with the results of an instance
1052 updateEvacSolution :: (Node.List, Instance.List, EvacSolution)
1053 -> Instance.Instance
1054 -> Result (Node.List, Instance.List, [OpCodes.OpCode])
1055 -> (Node.List, Instance.List, EvacSolution)
1056 updateEvacSolution (nl, il, es) inst (Bad msg) =
1057 (nl, il, es { esFailed = (Instance.name inst ++ ": " ++ msg):esFailed es})
1058 updateEvacSolution (_, _, es) inst (Ok (nl, il, opcodes)) =
1059 (nl, il, es { esMoved = Instance.name inst:esMoved es
1060 , esOpCodes = [opcodes]:esOpCodes es })
1062 -- | Node-evacuation IAllocator mode main function.
1063 tryNodeEvac :: Group.List -- ^ The cluster groups
1064 -> Node.List -- ^ The node list (cluster-wide, not per group)
1065 -> Instance.List -- ^ Instance list (cluster-wide)
1066 -> EvacMode -- ^ The evacuation mode
1067 -> [Idx] -- ^ List of instance (indices) to be evacuated
1068 -> Result EvacSolution
1069 tryNodeEvac _ ini_nl ini_il mode idxs =
1070 let evac_ndx = nodesToEvacuate ini_il mode idxs
1071 offline = map Node.idx . filter Node.offline $ Container.elems ini_nl
1072 excl_ndx = foldl' (flip IntSet.insert) evac_ndx offline
1073 group_ndx = map (\(gdx, (nl, _)) -> (gdx, map Node.idx
1074 (Container.elems nl))) $
1075 splitCluster ini_nl ini_il
1077 foldl' (\state@(nl, il, _) inst ->
1078 updateEvacSolution state inst $
1079 availableLocalNodes nl group_ndx excl_ndx inst >>=
1080 nodeEvacInstance nl il mode inst
1082 (ini_nl, ini_il, emptyEvacSolution)
1083 (map (`Container.find` ini_il) idxs)
1084 in return $ reverseEvacSolution esol
1086 -- | Recursively place instances on the cluster until we're out of space.
1087 iterateAlloc :: Node.List
1090 -> Instance.Instance
1092 -> [Instance.Instance]
1094 -> Result AllocResult
1095 iterateAlloc nl il limit newinst allocnodes ixes cstats =
1096 let depth = length ixes
1097 newname = printf "new-%d" depth::String
1098 newidx = length (Container.elems il) + depth
1099 newi2 = Instance.setIdx (Instance.setName newinst newname) newidx
1100 newlimit = fmap (flip (-) 1) limit
1101 in case tryAlloc nl il newi2 allocnodes of
1103 Ok (AllocSolution { asFailures = errs, asSolutions = sols3 }) ->
1104 let newsol = Ok (collapseFailures errs, nl, il, ixes, cstats) in
1107 (xnl, xi, _, _):[] ->
1110 else iterateAlloc xnl (Container.add newidx xi il)
1111 newlimit newinst allocnodes (xi:ixes)
1112 (totalResources xnl:cstats)
1113 _ -> Bad "Internal error: multiple solutions for single\
1116 -- | The core of the tiered allocation mode.
1117 tieredAlloc :: Node.List
1120 -> Instance.Instance
1122 -> [Instance.Instance]
1124 -> Result AllocResult
1125 tieredAlloc nl il limit newinst allocnodes ixes cstats =
1126 case iterateAlloc nl il limit newinst allocnodes ixes cstats of
1128 Ok (errs, nl', il', ixes', cstats') ->
1129 let newsol = Ok (errs, nl', il', ixes', cstats')
1130 ixes_cnt = length ixes'
1131 (stop, newlimit) = case limit of
1132 Nothing -> (False, Nothing)
1133 Just n -> (n <= ixes_cnt,
1134 Just (n - ixes_cnt)) in
1135 if stop then newsol else
1136 case Instance.shrinkByType newinst . fst . last $
1137 sortBy (comparing snd) errs of
1139 Ok newinst' -> tieredAlloc nl' il' newlimit
1140 newinst' allocnodes ixes' cstats'
1142 -- | Compute the tiered spec string description from a list of
1143 -- allocated instances.
1144 tieredSpecMap :: [Instance.Instance]
1146 tieredSpecMap trl_ixes =
1147 let fin_trl_ixes = reverse trl_ixes
1148 ix_byspec = groupBy ((==) `on` Instance.specOf) fin_trl_ixes
1149 spec_map = map (\ixs -> (Instance.specOf $ head ixs, length ixs))
1151 in map (\(spec, cnt) -> printf "%d,%d,%d=%d" (rspecMem spec)
1152 (rspecDsk spec) (rspecCpu spec) cnt) spec_map
1154 -- * Formatting functions
1156 -- | Given the original and final nodes, computes the relocation description.
1157 computeMoves :: Instance.Instance -- ^ The instance to be moved
1158 -> String -- ^ The instance name
1159 -> IMove -- ^ The move being performed
1160 -> String -- ^ New primary
1161 -> String -- ^ New secondary
1162 -> (String, [String])
1163 -- ^ Tuple of moves and commands list; moves is containing
1164 -- either @/f/@ for failover or @/r:name/@ for replace
1165 -- secondary, while the command list holds gnt-instance
1166 -- commands (without that prefix), e.g \"@failover instance1@\"
1167 computeMoves i inam mv c d =
1169 Failover -> ("f", [mig])
1170 FailoverAndReplace _ -> (printf "f r:%s" d, [mig, rep d])
1171 ReplaceSecondary _ -> (printf "r:%s" d, [rep d])
1172 ReplaceAndFailover _ -> (printf "r:%s f" c, [rep c, mig])
1173 ReplacePrimary _ -> (printf "f r:%s f" c, [mig, rep c, mig])
1174 where morf = if Instance.running i then "migrate" else "failover"
1175 mig = printf "%s -f %s" morf inam::String
1176 rep n = printf "replace-disks -n %s %s" n inam
1178 -- | Converts a placement to string format.
1179 printSolutionLine :: Node.List -- ^ The node list
1180 -> Instance.List -- ^ The instance list
1181 -> Int -- ^ Maximum node name length
1182 -> Int -- ^ Maximum instance name length
1183 -> Placement -- ^ The current placement
1184 -> Int -- ^ The index of the placement in
1186 -> (String, [String])
1187 printSolutionLine nl il nmlen imlen plc pos =
1189 pmlen = (2*nmlen + 1)
1190 (i, p, s, mv, c) = plc
1191 inst = Container.find i il
1192 inam = Instance.alias inst
1193 npri = Node.alias $ Container.find p nl
1194 nsec = Node.alias $ Container.find s nl
1195 opri = Node.alias $ Container.find (Instance.pNode inst) nl
1196 osec = Node.alias $ Container.find (Instance.sNode inst) nl
1197 (moves, cmds) = computeMoves inst inam mv npri nsec
1198 ostr = printf "%s:%s" opri osec::String
1199 nstr = printf "%s:%s" npri nsec::String
1201 (printf " %3d. %-*s %-*s => %-*s %.8f a=%s"
1202 pos imlen inam pmlen ostr
1206 -- | Return the instance and involved nodes in an instance move.
1207 involvedNodes :: Instance.List -> Placement -> [Ndx]
1208 involvedNodes il plc =
1209 let (i, np, ns, _, _) = plc
1210 inst = Container.find i il
1211 op = Instance.pNode inst
1212 os = Instance.sNode inst
1213 in nub [np, ns, op, os]
1215 -- | Inner function for splitJobs, that either appends the next job to
1216 -- the current jobset, or starts a new jobset.
1217 mergeJobs :: ([JobSet], [Ndx]) -> MoveJob -> ([JobSet], [Ndx])
1218 mergeJobs ([], _) n@(ndx, _, _, _) = ([[n]], ndx)
1219 mergeJobs (cjs@(j:js), nbuf) n@(ndx, _, _, _)
1220 | null (ndx `intersect` nbuf) = ((n:j):js, ndx ++ nbuf)
1221 | otherwise = ([n]:cjs, ndx)
1223 -- | Break a list of moves into independent groups. Note that this
1224 -- will reverse the order of jobs.
1225 splitJobs :: [MoveJob] -> [JobSet]
1226 splitJobs = fst . foldl mergeJobs ([], [])
1228 -- | Given a list of commands, prefix them with @gnt-instance@ and
1229 -- also beautify the display a little.
1230 formatJob :: Int -> Int -> (Int, MoveJob) -> [String]
1231 formatJob jsn jsl (sn, (_, _, _, cmds)) =
1233 printf " echo job %d/%d" jsn sn:
1235 map (" gnt-instance " ++) cmds
1237 then ["", printf "echo jobset %d, %d jobs" jsn jsl] ++ out
1240 -- | Given a list of commands, prefix them with @gnt-instance@ and
1241 -- also beautify the display a little.
1242 formatCmds :: [JobSet] -> String
1245 concatMap (\(jsn, js) -> concatMap (formatJob jsn (length js))
1249 -- | Print the node list.
1250 printNodes :: Node.List -> [String] -> String
1252 let fields = case fs of
1253 [] -> Node.defaultFields
1254 "+":rest -> Node.defaultFields ++ rest
1256 snl = sortBy (comparing Node.idx) (Container.elems nl)
1257 (header, isnum) = unzip $ map Node.showHeader fields
1258 in unlines . map ((:) ' ' . intercalate " ") $
1259 formatTable (header:map (Node.list fields) snl) isnum
1261 -- | Print the instance list.
1262 printInsts :: Node.List -> Instance.List -> String
1264 let sil = sortBy (comparing Instance.idx) (Container.elems il)
1265 helper inst = [ if Instance.running inst then "R" else " "
1266 , Instance.name inst
1267 , Container.nameOf nl (Instance.pNode inst)
1268 , let sdx = Instance.sNode inst
1269 in if sdx == Node.noSecondary
1271 else Container.nameOf nl sdx
1272 , if Instance.autoBalance inst then "Y" else "N"
1273 , printf "%3d" $ Instance.vcpus inst
1274 , printf "%5d" $ Instance.mem inst
1275 , printf "%5d" $ Instance.dsk inst `div` 1024
1281 where DynUtil lC lM lD lN = Instance.util inst
1282 header = [ "F", "Name", "Pri_node", "Sec_node", "Auto_bal"
1283 , "vcpu", "mem" , "dsk", "lCpu", "lMem", "lDsk", "lNet" ]
1284 isnum = False:False:False:False:False:repeat True
1285 in unlines . map ((:) ' ' . intercalate " ") $
1286 formatTable (header:map helper sil) isnum
1288 -- | Shows statistics for a given node list.
1289 printStats :: Node.List -> String
1291 let dcvs = compDetailedCV $ Container.elems nl
1292 (weights, names) = unzip detailedCVInfo
1293 hd = zip3 (weights ++ repeat 1) (names ++ repeat "unknown") dcvs
1294 formatted = map (\(w, header, val) ->
1295 printf "%s=%.8f(x%.2f)" header val w::String) hd
1296 in intercalate ", " formatted
1298 -- | Convert a placement into a list of OpCodes (basically a job).
1299 iMoveToJob :: Node.List -> Instance.List
1300 -> Idx -> IMove -> [OpCodes.OpCode]
1301 iMoveToJob nl il idx move =
1302 let inst = Container.find idx il
1303 iname = Instance.name inst
1304 lookNode = Just . Container.nameOf nl
1305 opF = OpCodes.OpInstanceMigrate iname True False True
1306 opR n = OpCodes.OpInstanceReplaceDisks iname (lookNode n)
1307 OpCodes.ReplaceNewSecondary [] Nothing
1310 ReplacePrimary np -> [ opF, opR np, opF ]
1311 ReplaceSecondary ns -> [ opR ns ]
1312 ReplaceAndFailover np -> [ opR np, opF ]
1313 FailoverAndReplace ns -> [ opF, opR ns ]
1315 -- * Node group functions
1317 -- | Computes the group of an instance.
1318 instanceGroup :: Node.List -> Instance.Instance -> Result Gdx
1319 instanceGroup nl i =
1320 let sidx = Instance.sNode i
1321 pnode = Container.find (Instance.pNode i) nl
1322 snode = if sidx == Node.noSecondary
1324 else Container.find sidx nl
1325 pgroup = Node.group pnode
1326 sgroup = Node.group snode
1327 in if pgroup /= sgroup
1328 then fail ("Instance placed accross two node groups, primary " ++
1329 show pgroup ++ ", secondary " ++ show sgroup)
1332 -- | Computes the group of an instance per the primary node.
1333 instancePriGroup :: Node.List -> Instance.Instance -> Gdx
1334 instancePriGroup nl i =
1335 let pnode = Container.find (Instance.pNode i) nl
1338 -- | Compute the list of badly allocated instances (split across node
1340 findSplitInstances :: Node.List -> Instance.List -> [Instance.Instance]
1341 findSplitInstances nl =
1342 filter (not . isOk . instanceGroup nl) . Container.elems
1344 -- | Splits a cluster into the component node groups.
1345 splitCluster :: Node.List -> Instance.List ->
1346 [(Gdx, (Node.List, Instance.List))]
1347 splitCluster nl il =
1348 let ngroups = Node.computeGroups (Container.elems nl)
1349 in map (\(guuid, nodes) ->
1350 let nidxs = map Node.idx nodes
1351 nodes' = zip nidxs nodes
1352 instances = Container.filter ((`elem` nidxs) . Instance.pNode) il
1353 in (guuid, (Container.fromList nodes', instances))) ngroups
1355 -- | Split a global instance index map into per-group, and associate
1356 -- it with the group/node/instance lists.
1357 associateIdxs :: [Idx] -- ^ Instance indices to be split/associated
1358 -> [(Gdx, (Node.List, Instance.List))] -- ^ Input groups
1359 -> [(Gdx, (Node.List, Instance.List, [Idx]))] -- ^ Result
1360 associateIdxs idxs =
1361 map (\(gdx, (nl, il)) ->
1362 (gdx, (nl, il, filter (`Container.member` il) idxs)))
1364 -- | Compute the list of nodes that are to be evacuated, given a list
1365 -- of instances and an evacuation mode.
1366 nodesToEvacuate :: Instance.List -- ^ The cluster-wide instance list
1367 -> EvacMode -- ^ The evacuation mode we're using
1368 -> [Idx] -- ^ List of instance indices being evacuated
1369 -> IntSet.IntSet -- ^ Set of node indices
1370 nodesToEvacuate il mode =
1371 IntSet.delete Node.noSecondary .
1373 let i = Container.find idx il
1374 pdx = Instance.pNode i
1375 sdx = Instance.sNode i
1376 dt = Instance.diskTemplate i
1377 withSecondary = case dt of
1378 DTDrbd8 -> IntSet.insert sdx ns
1381 ChangePrimary -> IntSet.insert pdx ns
1382 ChangeSecondary -> withSecondary
1383 ChangeAll -> IntSet.insert pdx withSecondary