1 {-| Implementation of cluster-wide logic.
3 This module holds all pure cluster-logic; I\/O related functionality
4 goes into the /Main/ module for the individual binaries.
10 Copyright (C) 2009, 2010, 2011 Google Inc.
12 This program is free software; you can redistribute it and/or modify
13 it under the terms of the GNU General Public License as published by
14 the Free Software Foundation; either version 2 of the License, or
15 (at your option) any later version.
17 This program is distributed in the hope that it will be useful, but
18 WITHOUT ANY WARRANTY; without even the implied warranty of
19 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 General Public License for more details.
22 You should have received a copy of the GNU General Public License
23 along with this program; if not, write to the Free Software
24 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
29 module Ganeti.HTools.Cluster
37 -- * Generic functions
39 , computeAllocationDelta
40 -- * First phase functions
42 -- * Second phase functions
47 -- * Display functions
50 -- * Balacing functions
59 -- * IAllocator functions
69 -- * Allocation functions
73 -- * Node group functions
79 import Data.Function (on)
80 import qualified Data.IntSet as IntSet
82 import Data.Ord (comparing)
83 import Text.Printf (printf)
85 import Control.Parallel.Strategies
87 import qualified Ganeti.HTools.Container as Container
88 import qualified Ganeti.HTools.Instance as Instance
89 import qualified Ganeti.HTools.Node as Node
90 import qualified Ganeti.HTools.Group as Group
91 import Ganeti.HTools.Types
92 import Ganeti.HTools.Utils
93 import qualified Ganeti.OpCodes as OpCodes
97 -- | Allocation\/relocation solution.
98 data AllocSolution = AllocSolution
99 { asFailures :: [FailMode] -- ^ Failure counts
100 , asAllocs :: Int -- ^ Good allocation count
101 , asSolutions :: [Node.AllocElement] -- ^ The actual result, length
102 -- of the list depends on the
103 -- allocation/relocation mode
104 , asLog :: [String] -- ^ A list of informational messages
107 -- | Node evacuation/group change iallocator result type. This result
108 -- type consists of actual opcodes (a restricted subset) that are
109 -- transmitted back to Ganeti.
110 data EvacSolution = EvacSolution
111 { esMoved :: [String] -- ^ Instance moved successfully
112 , esFailed :: [String] -- ^ Instance which were not
114 , esOpCodes :: [[[OpCodes.OpCode]]] -- ^ List of lists of jobs
117 -- | Allocation results, as used in 'iterateAlloc' and 'tieredAlloc'.
118 type AllocResult = (FailStats, Node.List, Instance.List,
119 [Instance.Instance], [CStats])
121 -- | A type denoting the valid allocation mode/pairs.
123 -- For a one-node allocation, this will be a @Left ['Node.Node']@,
124 -- whereas for a two-node allocation, this will be a @Right
125 -- [('Node.Node', 'Node.Node')]@.
126 type AllocNodes = Either [Ndx] [(Ndx, Ndx)]
128 -- | The empty solution we start with when computing allocations.
129 emptyAllocSolution :: AllocSolution
130 emptyAllocSolution = AllocSolution { asFailures = [], asAllocs = 0
131 , asSolutions = [], asLog = [] }
133 -- | The empty evac solution.
134 emptyEvacSolution :: EvacSolution
135 emptyEvacSolution = EvacSolution { esMoved = []
140 -- | The complete state for the balancing solution.
141 data Table = Table Node.List Instance.List Score [Placement]
142 deriving (Show, Read)
144 data CStats = CStats { csFmem :: Integer -- ^ Cluster free mem
145 , csFdsk :: Integer -- ^ Cluster free disk
146 , csAmem :: Integer -- ^ Cluster allocatable mem
147 , csAdsk :: Integer -- ^ Cluster allocatable disk
148 , csAcpu :: Integer -- ^ Cluster allocatable cpus
149 , csMmem :: Integer -- ^ Max node allocatable mem
150 , csMdsk :: Integer -- ^ Max node allocatable disk
151 , csMcpu :: Integer -- ^ Max node allocatable cpu
152 , csImem :: Integer -- ^ Instance used mem
153 , csIdsk :: Integer -- ^ Instance used disk
154 , csIcpu :: Integer -- ^ Instance used cpu
155 , csTmem :: Double -- ^ Cluster total mem
156 , csTdsk :: Double -- ^ Cluster total disk
157 , csTcpu :: Double -- ^ Cluster total cpus
158 , csVcpu :: Integer -- ^ Cluster virtual cpus (if
159 -- node pCpu has been set,
161 , csXmem :: Integer -- ^ Unnacounted for mem
162 , csNmem :: Integer -- ^ Node own memory
163 , csScore :: Score -- ^ The cluster score
164 , csNinst :: Int -- ^ The total number of instances
166 deriving (Show, Read)
168 -- | Currently used, possibly to allocate, unallocable.
169 type AllocStats = (RSpec, RSpec, RSpec)
171 -- * Utility functions
173 -- | Verifies the N+1 status and return the affected nodes.
174 verifyN1 :: [Node.Node] -> [Node.Node]
175 verifyN1 = filter Node.failN1
177 {-| Computes the pair of bad nodes and instances.
179 The bad node list is computed via a simple 'verifyN1' check, and the
180 bad instance list is the list of primary and secondary instances of
184 computeBadItems :: Node.List -> Instance.List ->
185 ([Node.Node], [Instance.Instance])
186 computeBadItems nl il =
187 let bad_nodes = verifyN1 $ getOnline nl
188 bad_instances = map (`Container.find` il) .
190 concatMap (\ n -> Node.sList n ++ Node.pList n) bad_nodes
192 (bad_nodes, bad_instances)
194 -- | Zero-initializer for the CStats type.
195 emptyCStats :: CStats
196 emptyCStats = CStats 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
198 -- | Update stats with data from a new node.
199 updateCStats :: CStats -> Node.Node -> CStats
200 updateCStats cs node =
201 let CStats { csFmem = x_fmem, csFdsk = x_fdsk,
202 csAmem = x_amem, csAcpu = x_acpu, csAdsk = x_adsk,
203 csMmem = x_mmem, csMdsk = x_mdsk, csMcpu = x_mcpu,
204 csImem = x_imem, csIdsk = x_idsk, csIcpu = x_icpu,
205 csTmem = x_tmem, csTdsk = x_tdsk, csTcpu = x_tcpu,
207 csXmem = x_xmem, csNmem = x_nmem, csNinst = x_ninst
210 inc_amem = Node.fMem node - Node.rMem node
211 inc_amem' = if inc_amem > 0 then inc_amem else 0
212 inc_adsk = Node.availDisk node
213 inc_imem = truncate (Node.tMem node) - Node.nMem node
214 - Node.xMem node - Node.fMem node
215 inc_icpu = Node.uCpu node
216 inc_idsk = truncate (Node.tDsk node) - Node.fDsk node
217 inc_vcpu = Node.hiCpu node
218 inc_acpu = Node.availCpu node
220 in cs { csFmem = x_fmem + fromIntegral (Node.fMem node)
221 , csFdsk = x_fdsk + fromIntegral (Node.fDsk node)
222 , csAmem = x_amem + fromIntegral inc_amem'
223 , csAdsk = x_adsk + fromIntegral inc_adsk
224 , csAcpu = x_acpu + fromIntegral inc_acpu
225 , csMmem = max x_mmem (fromIntegral inc_amem')
226 , csMdsk = max x_mdsk (fromIntegral inc_adsk)
227 , csMcpu = max x_mcpu (fromIntegral inc_acpu)
228 , csImem = x_imem + fromIntegral inc_imem
229 , csIdsk = x_idsk + fromIntegral inc_idsk
230 , csIcpu = x_icpu + fromIntegral inc_icpu
231 , csTmem = x_tmem + Node.tMem node
232 , csTdsk = x_tdsk + Node.tDsk node
233 , csTcpu = x_tcpu + Node.tCpu node
234 , csVcpu = x_vcpu + fromIntegral inc_vcpu
235 , csXmem = x_xmem + fromIntegral (Node.xMem node)
236 , csNmem = x_nmem + fromIntegral (Node.nMem node)
237 , csNinst = x_ninst + length (Node.pList node)
240 -- | Compute the total free disk and memory in the cluster.
241 totalResources :: Node.List -> CStats
243 let cs = foldl' updateCStats emptyCStats . Container.elems $ nl
244 in cs { csScore = compCV nl }
246 -- | Compute the delta between two cluster state.
248 -- This is used when doing allocations, to understand better the
249 -- available cluster resources. The return value is a triple of the
250 -- current used values, the delta that was still allocated, and what
251 -- was left unallocated.
252 computeAllocationDelta :: CStats -> CStats -> AllocStats
253 computeAllocationDelta cini cfin =
254 let CStats {csImem = i_imem, csIdsk = i_idsk, csIcpu = i_icpu} = cini
255 CStats {csImem = f_imem, csIdsk = f_idsk, csIcpu = f_icpu,
256 csTmem = t_mem, csTdsk = t_dsk, csVcpu = v_cpu } = cfin
257 rini = RSpec (fromIntegral i_icpu) (fromIntegral i_imem)
258 (fromIntegral i_idsk)
259 rfin = RSpec (fromIntegral (f_icpu - i_icpu))
260 (fromIntegral (f_imem - i_imem))
261 (fromIntegral (f_idsk - i_idsk))
262 un_cpu = fromIntegral (v_cpu - f_icpu)::Int
263 runa = RSpec un_cpu (truncate t_mem - fromIntegral f_imem)
264 (truncate t_dsk - fromIntegral f_idsk)
265 in (rini, rfin, runa)
267 -- | The names and weights of the individual elements in the CV list.
268 detailedCVInfo :: [(Double, String)]
269 detailedCVInfo = [ (1, "free_mem_cv")
270 , (1, "free_disk_cv")
272 , (1, "reserved_mem_cv")
273 , (4, "offline_all_cnt")
274 , (16, "offline_pri_cnt")
275 , (1, "vcpu_ratio_cv")
278 , (1, "disk_load_cv")
280 , (2, "pri_tags_score")
283 detailedCVWeights :: [Double]
284 detailedCVWeights = map fst detailedCVInfo
286 -- | Compute the mem and disk covariance.
287 compDetailedCV :: [Node.Node] -> [Double]
288 compDetailedCV all_nodes =
290 (offline, nodes) = partition Node.offline all_nodes
291 mem_l = map Node.pMem nodes
292 dsk_l = map Node.pDsk nodes
293 -- metric: memory covariance
294 mem_cv = stdDev mem_l
295 -- metric: disk covariance
296 dsk_cv = stdDev dsk_l
297 -- metric: count of instances living on N1 failing nodes
298 n1_score = fromIntegral . sum . map (\n -> length (Node.sList n) +
299 length (Node.pList n)) .
300 filter Node.failN1 $ nodes :: Double
301 res_l = map Node.pRem nodes
302 -- metric: reserved memory covariance
303 res_cv = stdDev res_l
304 -- offline instances metrics
305 offline_ipri = sum . map (length . Node.pList) $ offline
306 offline_isec = sum . map (length . Node.sList) $ offline
307 -- metric: count of instances on offline nodes
308 off_score = fromIntegral (offline_ipri + offline_isec)::Double
309 -- metric: count of primary instances on offline nodes (this
310 -- helps with evacuation/failover of primary instances on
311 -- 2-node clusters with one node offline)
312 off_pri_score = fromIntegral offline_ipri::Double
313 cpu_l = map Node.pCpu nodes
314 -- metric: covariance of vcpu/pcpu ratio
315 cpu_cv = stdDev cpu_l
316 -- metrics: covariance of cpu, memory, disk and network load
317 (c_load, m_load, d_load, n_load) = unzip4 $
319 let DynUtil c1 m1 d1 n1 = Node.utilLoad n
320 DynUtil c2 m2 d2 n2 = Node.utilPool n
321 in (c1/c2, m1/m2, d1/d2, n1/n2)
323 -- metric: conflicting instance count
324 pri_tags_inst = sum $ map Node.conflictingPrimaries nodes
325 pri_tags_score = fromIntegral pri_tags_inst::Double
326 in [ mem_cv, dsk_cv, n1_score, res_cv, off_score, off_pri_score, cpu_cv
327 , stdDev c_load, stdDev m_load , stdDev d_load, stdDev n_load
330 -- | Compute the /total/ variance.
331 compCVNodes :: [Node.Node] -> Double
332 compCVNodes = sum . zipWith (*) detailedCVWeights . compDetailedCV
334 -- | Wrapper over 'compCVNodes' for callers that have a 'Node.List'.
335 compCV :: Node.List -> Double
336 compCV = compCVNodes . Container.elems
339 -- | Compute online nodes from a 'Node.List'.
340 getOnline :: Node.List -> [Node.Node]
341 getOnline = filter (not . Node.offline) . Container.elems
343 -- * Balancing functions
345 -- | Compute best table. Note that the ordering of the arguments is important.
346 compareTables :: Table -> Table -> Table
347 compareTables a@(Table _ _ a_cv _) b@(Table _ _ b_cv _ ) =
348 if a_cv > b_cv then b else a
350 -- | Applies an instance move to a given node list and instance.
351 applyMove :: Node.List -> Instance.Instance
352 -> IMove -> OpResult (Node.List, Instance.Instance, Ndx, Ndx)
354 applyMove nl inst Failover =
355 let old_pdx = Instance.pNode inst
356 old_sdx = Instance.sNode inst
357 old_p = Container.find old_pdx nl
358 old_s = Container.find old_sdx nl
359 int_p = Node.removePri old_p inst
360 int_s = Node.removeSec old_s inst
361 force_p = Node.offline old_p
362 new_nl = do -- Maybe monad
363 new_p <- Node.addPriEx force_p int_s inst
364 new_s <- Node.addSec int_p inst old_sdx
365 let new_inst = Instance.setBoth inst old_sdx old_pdx
366 return (Container.addTwo old_pdx new_s old_sdx new_p nl,
367 new_inst, old_sdx, old_pdx)
370 -- Replace the primary (f:, r:np, f)
371 applyMove nl inst (ReplacePrimary new_pdx) =
372 let old_pdx = Instance.pNode inst
373 old_sdx = Instance.sNode inst
374 old_p = Container.find old_pdx nl
375 old_s = Container.find old_sdx nl
376 tgt_n = Container.find new_pdx nl
377 int_p = Node.removePri old_p inst
378 int_s = Node.removeSec old_s inst
379 force_p = Node.offline old_p
380 new_nl = do -- Maybe monad
381 -- check that the current secondary can host the instance
382 -- during the migration
383 tmp_s <- Node.addPriEx force_p int_s inst
384 let tmp_s' = Node.removePri tmp_s inst
385 new_p <- Node.addPriEx force_p tgt_n inst
386 new_s <- Node.addSecEx force_p tmp_s' inst new_pdx
387 let new_inst = Instance.setPri inst new_pdx
388 return (Container.add new_pdx new_p $
389 Container.addTwo old_pdx int_p old_sdx new_s nl,
390 new_inst, new_pdx, old_sdx)
393 -- Replace the secondary (r:ns)
394 applyMove nl inst (ReplaceSecondary new_sdx) =
395 let old_pdx = Instance.pNode inst
396 old_sdx = Instance.sNode inst
397 old_s = Container.find old_sdx nl
398 tgt_n = Container.find new_sdx nl
399 int_s = Node.removeSec old_s inst
400 force_s = Node.offline old_s
401 new_inst = Instance.setSec inst new_sdx
402 new_nl = Node.addSecEx force_s tgt_n inst old_pdx >>=
403 \new_s -> return (Container.addTwo new_sdx
404 new_s old_sdx int_s nl,
405 new_inst, old_pdx, new_sdx)
408 -- Replace the secondary and failover (r:np, f)
409 applyMove nl inst (ReplaceAndFailover new_pdx) =
410 let old_pdx = Instance.pNode inst
411 old_sdx = Instance.sNode inst
412 old_p = Container.find old_pdx nl
413 old_s = Container.find old_sdx nl
414 tgt_n = Container.find new_pdx nl
415 int_p = Node.removePri old_p inst
416 int_s = Node.removeSec old_s inst
417 force_s = Node.offline old_s
418 new_nl = do -- Maybe monad
419 new_p <- Node.addPri tgt_n inst
420 new_s <- Node.addSecEx force_s int_p inst new_pdx
421 let new_inst = Instance.setBoth inst new_pdx old_pdx
422 return (Container.add new_pdx new_p $
423 Container.addTwo old_pdx new_s old_sdx int_s nl,
424 new_inst, new_pdx, old_pdx)
427 -- Failver and replace the secondary (f, r:ns)
428 applyMove nl inst (FailoverAndReplace new_sdx) =
429 let old_pdx = Instance.pNode inst
430 old_sdx = Instance.sNode inst
431 old_p = Container.find old_pdx nl
432 old_s = Container.find old_sdx nl
433 tgt_n = Container.find new_sdx nl
434 int_p = Node.removePri old_p inst
435 int_s = Node.removeSec old_s inst
436 force_p = Node.offline old_p
437 new_nl = do -- Maybe monad
438 new_p <- Node.addPriEx force_p int_s inst
439 new_s <- Node.addSecEx force_p tgt_n inst old_sdx
440 let new_inst = Instance.setBoth inst old_sdx new_sdx
441 return (Container.add new_sdx new_s $
442 Container.addTwo old_sdx new_p old_pdx int_p nl,
443 new_inst, old_sdx, new_sdx)
446 -- | Tries to allocate an instance on one given node.
447 allocateOnSingle :: Node.List -> Instance.Instance -> Ndx
448 -> OpResult Node.AllocElement
449 allocateOnSingle nl inst new_pdx =
450 let p = Container.find new_pdx nl
451 new_inst = Instance.setBoth inst new_pdx Node.noSecondary
452 in Node.addPri p inst >>= \new_p -> do
453 let new_nl = Container.add new_pdx new_p nl
454 new_score = compCV nl
455 return (new_nl, new_inst, [new_p], new_score)
457 -- | Tries to allocate an instance on a given pair of nodes.
458 allocateOnPair :: Node.List -> Instance.Instance -> Ndx -> Ndx
459 -> OpResult Node.AllocElement
460 allocateOnPair nl inst new_pdx new_sdx =
461 let tgt_p = Container.find new_pdx nl
462 tgt_s = Container.find new_sdx nl
464 new_p <- Node.addPri tgt_p inst
465 new_s <- Node.addSec tgt_s inst new_pdx
466 let new_inst = Instance.setBoth inst new_pdx new_sdx
467 new_nl = Container.addTwo new_pdx new_p new_sdx new_s nl
468 return (new_nl, new_inst, [new_p, new_s], compCV new_nl)
470 -- | Tries to perform an instance move and returns the best table
471 -- between the original one and the new one.
472 checkSingleStep :: Table -- ^ The original table
473 -> Instance.Instance -- ^ The instance to move
474 -> Table -- ^ The current best table
475 -> IMove -- ^ The move to apply
476 -> Table -- ^ The final best table
477 checkSingleStep ini_tbl target cur_tbl move =
479 Table ini_nl ini_il _ ini_plc = ini_tbl
480 tmp_resu = applyMove ini_nl target move
484 OpGood (upd_nl, new_inst, pri_idx, sec_idx) ->
485 let tgt_idx = Instance.idx target
486 upd_cvar = compCV upd_nl
487 upd_il = Container.add tgt_idx new_inst ini_il
488 upd_plc = (tgt_idx, pri_idx, sec_idx, move, upd_cvar):ini_plc
489 upd_tbl = Table upd_nl upd_il upd_cvar upd_plc
491 compareTables cur_tbl upd_tbl
493 -- | Given the status of the current secondary as a valid new node and
494 -- the current candidate target node, generate the possible moves for
496 possibleMoves :: Bool -- ^ Whether the secondary node is a valid new node
497 -> Bool -- ^ Whether we can change the primary node
498 -> Ndx -- ^ Target node candidate
499 -> [IMove] -- ^ List of valid result moves
501 possibleMoves _ False tdx =
502 [ReplaceSecondary tdx]
504 possibleMoves True True tdx =
505 [ReplaceSecondary tdx,
506 ReplaceAndFailover tdx,
508 FailoverAndReplace tdx]
510 possibleMoves False True tdx =
511 [ReplaceSecondary tdx,
512 ReplaceAndFailover tdx]
514 -- | Compute the best move for a given instance.
515 checkInstanceMove :: [Ndx] -- ^ Allowed target node indices
516 -> Bool -- ^ Whether disk moves are allowed
517 -> Bool -- ^ Whether instance moves are allowed
518 -> Table -- ^ Original table
519 -> Instance.Instance -- ^ Instance to move
520 -> Table -- ^ Best new table for this instance
521 checkInstanceMove nodes_idx disk_moves inst_moves ini_tbl target =
523 opdx = Instance.pNode target
524 osdx = Instance.sNode target
525 nodes = filter (\idx -> idx /= opdx && idx /= osdx) nodes_idx
526 use_secondary = elem osdx nodes_idx && inst_moves
527 aft_failover = if use_secondary -- if allowed to failover
528 then checkSingleStep ini_tbl target ini_tbl Failover
530 all_moves = if disk_moves
532 (possibleMoves use_secondary inst_moves) nodes
535 -- iterate over the possible nodes for this instance
536 foldl' (checkSingleStep ini_tbl target) aft_failover all_moves
538 -- | Compute the best next move.
539 checkMove :: [Ndx] -- ^ Allowed target node indices
540 -> Bool -- ^ Whether disk moves are allowed
541 -> Bool -- ^ Whether instance moves are allowed
542 -> Table -- ^ The current solution
543 -> [Instance.Instance] -- ^ List of instances still to move
544 -> Table -- ^ The new solution
545 checkMove nodes_idx disk_moves inst_moves ini_tbl victims =
546 let Table _ _ _ ini_plc = ini_tbl
547 -- we're using rwhnf from the Control.Parallel.Strategies
548 -- package; we don't need to use rnf as that would force too
549 -- much evaluation in single-threaded cases, and in
550 -- multi-threaded case the weak head normal form is enough to
551 -- spark the evaluation
552 tables = parMap rwhnf (checkInstanceMove nodes_idx disk_moves
555 -- iterate over all instances, computing the best move
556 best_tbl = foldl' compareTables ini_tbl tables
557 Table _ _ _ best_plc = best_tbl
558 in if length best_plc == length ini_plc
559 then ini_tbl -- no advancement
562 -- | Check if we are allowed to go deeper in the balancing.
563 doNextBalance :: Table -- ^ The starting table
564 -> Int -- ^ Remaining length
565 -> Score -- ^ Score at which to stop
566 -> Bool -- ^ The resulting table and commands
567 doNextBalance ini_tbl max_rounds min_score =
568 let Table _ _ ini_cv ini_plc = ini_tbl
569 ini_plc_len = length ini_plc
570 in (max_rounds < 0 || ini_plc_len < max_rounds) && ini_cv > min_score
572 -- | Run a balance move.
573 tryBalance :: Table -- ^ The starting table
574 -> Bool -- ^ Allow disk moves
575 -> Bool -- ^ Allow instance moves
576 -> Bool -- ^ Only evacuate moves
577 -> Score -- ^ Min gain threshold
578 -> Score -- ^ Min gain
579 -> Maybe Table -- ^ The resulting table and commands
580 tryBalance ini_tbl disk_moves inst_moves evac_mode mg_limit min_gain =
581 let Table ini_nl ini_il ini_cv _ = ini_tbl
582 all_inst = Container.elems ini_il
583 all_inst' = if evac_mode
584 then let bad_nodes = map Node.idx . filter Node.offline $
585 Container.elems ini_nl
586 in filter (\e -> Instance.sNode e `elem` bad_nodes ||
587 Instance.pNode e `elem` bad_nodes)
590 reloc_inst = filter Instance.movable all_inst'
591 node_idx = map Node.idx . filter (not . Node.offline) $
592 Container.elems ini_nl
593 fin_tbl = checkMove node_idx disk_moves inst_moves ini_tbl reloc_inst
594 (Table _ _ fin_cv _) = fin_tbl
596 if fin_cv < ini_cv && (ini_cv > mg_limit || ini_cv - fin_cv >= min_gain)
597 then Just fin_tbl -- this round made success, return the new table
600 -- * Allocation functions
602 -- | Build failure stats out of a list of failures.
603 collapseFailures :: [FailMode] -> FailStats
604 collapseFailures flst =
605 map (\k -> (k, length $ filter (k ==) flst)) [minBound..maxBound]
607 -- | Update current Allocation solution and failure stats with new
609 concatAllocs :: AllocSolution -> OpResult Node.AllocElement -> AllocSolution
610 concatAllocs as (OpFail reason) = as { asFailures = reason : asFailures as }
612 concatAllocs as (OpGood ns@(_, _, _, nscore)) =
613 let -- Choose the old or new solution, based on the cluster score
615 osols = asSolutions as
616 nsols = case osols of
618 (_, _, _, oscore):[] ->
622 -- FIXME: here we simply concat to lists with more
623 -- than one element; we should instead abort, since
624 -- this is not a valid usage of this function
627 -- Note: we force evaluation of nsols here in order to keep the
628 -- memory profile low - we know that we will need nsols for sure
629 -- in the next cycle, so we force evaluation of nsols, since the
630 -- foldl' in the caller will only evaluate the tuple, but not the
631 -- elements of the tuple
632 in nsols `seq` nsuc `seq` as { asAllocs = nsuc, asSolutions = nsols }
634 -- | Sums two allocation solutions (e.g. for two separate node groups).
635 sumAllocs :: AllocSolution -> AllocSolution -> AllocSolution
636 sumAllocs (AllocSolution af aa as al) (AllocSolution bf ba bs bl) =
637 AllocSolution (af ++ bf) (aa + ba) (as ++ bs) (al ++ bl)
639 -- | Given a solution, generates a reasonable description for it.
640 describeSolution :: AllocSolution -> String
641 describeSolution as =
642 let fcnt = asFailures as
643 sols = asSolutions as
645 intercalate ", " . map (\(a, b) -> printf "%s: %d" (show a) b) .
646 filter ((> 0) . snd) . collapseFailures $ fcnt
648 then "No valid allocation solutions, failure reasons: " ++
650 then "unknown reasons"
652 else let (_, _, nodes, cv) = head sols
653 in printf ("score: %.8f, successes %d, failures %d (%s)" ++
654 " for node(s) %s") cv (asAllocs as) (length fcnt) freasons
655 (intercalate "/" . map Node.name $ nodes)
657 -- | Annotates a solution with the appropriate string.
658 annotateSolution :: AllocSolution -> AllocSolution
659 annotateSolution as = as { asLog = describeSolution as : asLog as }
661 -- | Reverses an evacuation solution.
663 -- Rationale: we always concat the results to the top of the lists, so
664 -- for proper jobset execution, we should reverse all lists.
665 reverseEvacSolution :: EvacSolution -> EvacSolution
666 reverseEvacSolution (EvacSolution f m o) =
667 EvacSolution (reverse f) (reverse m) (reverse o)
669 -- | Generate the valid node allocation singles or pairs for a new instance.
670 genAllocNodes :: Group.List -- ^ Group list
671 -> Node.List -- ^ The node map
672 -> Int -- ^ The number of nodes required
673 -> Bool -- ^ Whether to drop or not
675 -> Result AllocNodes -- ^ The (monadic) result
676 genAllocNodes gl nl count drop_unalloc =
677 let filter_fn = if drop_unalloc
678 then filter (Group.isAllocable .
679 flip Container.find gl . Node.group)
681 all_nodes = filter_fn $ getOnline nl
682 all_pairs = liftM2 (,) all_nodes all_nodes
683 ok_pairs = filter (\(x, y) -> Node.idx x /= Node.idx y &&
684 Node.group x == Node.group y) all_pairs
686 1 -> Ok (Left (map Node.idx all_nodes))
687 2 -> Ok (Right (map (\(p, s) -> (Node.idx p, Node.idx s)) ok_pairs))
688 _ -> Bad "Unsupported number of nodes, only one or two supported"
690 -- | Try to allocate an instance on the cluster.
691 tryAlloc :: (Monad m) =>
692 Node.List -- ^ The node list
693 -> Instance.List -- ^ The instance list
694 -> Instance.Instance -- ^ The instance to allocate
695 -> AllocNodes -- ^ The allocation targets
696 -> m AllocSolution -- ^ Possible solution list
697 tryAlloc nl _ inst (Right ok_pairs) =
698 let sols = foldl' (\cstate (p, s) ->
699 concatAllocs cstate $ allocateOnPair nl inst p s
700 ) emptyAllocSolution ok_pairs
702 in if null ok_pairs -- means we have just one node
703 then fail "Not enough online nodes"
704 else return $ annotateSolution sols
706 tryAlloc nl _ inst (Left all_nodes) =
707 let sols = foldl' (\cstate ->
708 concatAllocs cstate . allocateOnSingle nl inst
709 ) emptyAllocSolution all_nodes
711 then fail "No online nodes"
712 else return $ annotateSolution sols
714 -- | Given a group/result, describe it as a nice (list of) messages.
715 solutionDescription :: Group.List -> (Gdx, Result AllocSolution) -> [String]
716 solutionDescription gl (groupId, result) =
718 Ok solution -> map (printf "Group %s (%s): %s" gname pol) (asLog solution)
719 Bad message -> [printf "Group %s: error %s" gname message]
720 where grp = Container.find groupId gl
721 gname = Group.name grp
722 pol = apolToString (Group.allocPolicy grp)
724 -- | From a list of possibly bad and possibly empty solutions, filter
725 -- only the groups with a valid result. Note that the result will be
726 -- reversed compared to the original list.
727 filterMGResults :: Group.List
728 -> [(Gdx, Result AllocSolution)]
729 -> [(Gdx, AllocSolution)]
730 filterMGResults gl = foldl' fn []
731 where unallocable = not . Group.isAllocable . flip Container.find gl
732 fn accu (gdx, rasol) =
735 Ok sol | null (asSolutions sol) -> accu
736 | unallocable gdx -> accu
737 | otherwise -> (gdx, sol):accu
739 -- | Sort multigroup results based on policy and score.
740 sortMGResults :: Group.List
741 -> [(Gdx, AllocSolution)]
742 -> [(Gdx, AllocSolution)]
743 sortMGResults gl sols =
744 let extractScore (_, _, _, x) = x
745 solScore (gdx, sol) = (Group.allocPolicy (Container.find gdx gl),
746 (extractScore . head . asSolutions) sol)
747 in sortBy (comparing solScore) sols
749 -- | Try to allocate an instance on a multi-group cluster.
750 tryMGAlloc :: Group.List -- ^ The group list
751 -> Node.List -- ^ The node list
752 -> Instance.List -- ^ The instance list
753 -> Instance.Instance -- ^ The instance to allocate
754 -> Int -- ^ Required number of nodes
755 -> Result AllocSolution -- ^ Possible solution list
756 tryMGAlloc mggl mgnl mgil inst cnt =
757 let groups = splitCluster mgnl mgil
758 sols = map (\(gid, (nl, il)) ->
759 (gid, genAllocNodes mggl nl cnt False >>=
760 tryAlloc nl il inst))
761 groups::[(Gdx, Result AllocSolution)]
762 all_msgs = concatMap (solutionDescription mggl) sols
763 goodSols = filterMGResults mggl sols
764 sortedSols = sortMGResults mggl goodSols
765 in if null sortedSols
766 then Bad $ intercalate ", " all_msgs
767 else let (final_group, final_sol) = head sortedSols
768 final_name = Group.name $ Container.find final_group mggl
769 selmsg = "Selected group: " ++ final_name
770 in Ok $ final_sol { asLog = selmsg:all_msgs }
772 -- | Try to relocate an instance on the cluster.
773 tryReloc :: (Monad m) =>
774 Node.List -- ^ The node list
775 -> Instance.List -- ^ The instance list
776 -> Idx -- ^ The index of the instance to move
777 -> Int -- ^ The number of nodes required
778 -> [Ndx] -- ^ Nodes which should not be used
779 -> m AllocSolution -- ^ Solution list
780 tryReloc nl il xid 1 ex_idx =
781 let all_nodes = getOnline nl
782 inst = Container.find xid il
783 ex_idx' = Instance.pNode inst:ex_idx
784 valid_nodes = filter (not . flip elem ex_idx' . Node.idx) all_nodes
785 valid_idxes = map Node.idx valid_nodes
786 sols1 = foldl' (\cstate x ->
789 applyMove nl inst (ReplaceSecondary x)
790 return (mnl, i, [Container.find x mnl],
792 in concatAllocs cstate em
793 ) emptyAllocSolution valid_idxes
796 tryReloc _ _ _ reqn _ = fail $ "Unsupported number of relocation \
797 \destinations required (" ++ show reqn ++
798 "), only one supported"
800 tryMGReloc :: (Monad m) =>
801 Group.List -- ^ The group list
802 -> Node.List -- ^ The node list
803 -> Instance.List -- ^ The instance list
804 -> Idx -- ^ The index of the instance to move
805 -> Int -- ^ The number of nodes required
806 -> [Ndx] -- ^ Nodes which should not be used
807 -> m AllocSolution -- ^ Solution list
808 tryMGReloc _ mgnl mgil xid ncount ex_ndx = do
809 let groups = splitCluster mgnl mgil
810 -- TODO: we only relocate inside the group for now
811 inst = Container.find xid mgil
812 (nl, il) <- case lookup (instancePriGroup mgnl inst) groups of
813 Nothing -> fail $ "Cannot find group for instance " ++
816 tryReloc nl il xid ncount ex_ndx
818 -- | Change an instance's secondary node.
819 evacInstance :: (Monad m) =>
820 [Ndx] -- ^ Excluded nodes
821 -> Instance.List -- ^ The current instance list
822 -> (Node.List, AllocSolution) -- ^ The current state
823 -> Idx -- ^ The instance to evacuate
824 -> m (Node.List, AllocSolution)
825 evacInstance ex_ndx il (nl, old_as) idx = do
826 -- FIXME: hardcoded one node here
828 -- Longer explanation: evacuation is currently hardcoded to DRBD
829 -- instances (which have one secondary); hence, even if the
830 -- IAllocator protocol can request N nodes for an instance, and all
831 -- the message parsing/loading pass this, this implementation only
832 -- supports one; this situation needs to be revisited if we ever
833 -- support more than one secondary, or if we change the storage
835 new_as <- tryReloc nl il idx 1 ex_ndx
836 case asSolutions new_as of
837 -- an individual relocation succeeded, we kind of compose the data
838 -- from the two solutions
839 csol@(nl', _, _, _):_ ->
840 return (nl', new_as { asSolutions = csol:asSolutions old_as })
841 -- this relocation failed, so we fail the entire evac
842 _ -> fail $ "Can't evacuate instance " ++
843 Instance.name (Container.find idx il) ++
844 ": " ++ describeSolution new_as
846 -- | Try to evacuate a list of nodes.
847 tryEvac :: (Monad m) =>
848 Node.List -- ^ The node list
849 -> Instance.List -- ^ The instance list
850 -> [Idx] -- ^ Instances to be evacuated
851 -> [Ndx] -- ^ Restricted nodes (the ones being evacuated)
852 -> m AllocSolution -- ^ Solution list
853 tryEvac nl il idxs ex_ndx = do
854 (_, sol) <- foldM (evacInstance ex_ndx il) (nl, emptyAllocSolution) idxs
857 -- | Multi-group evacuation of a list of nodes.
858 tryMGEvac :: (Monad m) =>
859 Group.List -- ^ The group list
860 -> Node.List -- ^ The node list
861 -> Instance.List -- ^ The instance list
862 -> [Ndx] -- ^ Nodes to be evacuated
863 -> m AllocSolution -- ^ Solution list
864 tryMGEvac _ nl il ex_ndx =
865 let ex_nodes = map (`Container.find` nl) ex_ndx
866 all_insts = nub . concatMap Node.sList $ ex_nodes
867 all_insts' = associateIdxs all_insts $ splitCluster nl il
869 results <- mapM (\(_, (gnl, gil, idxs)) -> tryEvac gnl gil idxs ex_ndx)
871 let sol = foldl' sumAllocs emptyAllocSolution results
872 return $ annotateSolution sol
874 -- | Function which fails if the requested mode is change secondary.
876 -- This is useful since except DRBD, no other disk template can
877 -- execute change secondary; thus, we can just call this function
878 -- instead of always checking for secondary mode. After the call to
879 -- this function, whatever mode we have is just a primary change.
880 failOnSecondaryChange :: (Monad m) => EvacMode -> DiskTemplate -> m ()
881 failOnSecondaryChange ChangeSecondary dt =
882 fail $ "Instances with disk template '" ++ dtToString dt ++
883 "' can't execute change secondary"
884 failOnSecondaryChange _ _ = return ()
886 -- | Run evacuation for a single instance.
887 nodeEvacInstance :: Node.List -- ^ The node list (cluster-wide)
888 -> Instance.List -- ^ Instance list (cluster-wide)
889 -> EvacMode -- ^ The evacuation mode
890 -> Instance.Instance -- ^ The instance to be evacuated
891 -> [Ndx] -- ^ The list of available nodes
893 -> Result (Node.List, Instance.List, [OpCodes.OpCode])
894 nodeEvacInstance _ _ mode (Instance.Instance
895 {Instance.diskTemplate = dt@DTDiskless}) _ =
896 failOnSecondaryChange mode dt >>
897 fail "Diskless relocations not implemented yet"
899 nodeEvacInstance _ _ _ (Instance.Instance
900 {Instance.diskTemplate = DTPlain}) _ =
901 fail "Instances of type plain cannot be relocated"
903 nodeEvacInstance _ _ _ (Instance.Instance
904 {Instance.diskTemplate = DTFile}) _ =
905 fail "Instances of type file cannot be relocated"
907 nodeEvacInstance _ _ mode (Instance.Instance
908 {Instance.diskTemplate = dt@DTSharedFile}) _ =
909 failOnSecondaryChange mode dt >>
910 fail "Shared file relocations not implemented yet"
912 nodeEvacInstance _ _ mode (Instance.Instance
913 {Instance.diskTemplate = dt@DTBlock}) _ =
914 failOnSecondaryChange mode dt >>
915 fail "Block device relocations not implemented yet"
917 nodeEvacInstance nl il ChangePrimary
918 inst@(Instance.Instance {Instance.diskTemplate = DTDrbd8}) _ =
920 (nl', inst', _, _) <- opToResult $ applyMove nl inst Failover
921 let idx = Instance.idx inst
922 il' = Container.add idx inst' il
923 ops = iMoveToJob nl' il' idx Failover
924 return (nl', il', ops)
926 nodeEvacInstance _ _ _ (Instance.Instance
927 {Instance.diskTemplate = DTDrbd8}) _ =
928 fail "DRBD relocations not implemented yet"
930 -- | Computes the local nodes of a given instance which are available
932 availableLocalNodes :: Node.List
937 availableLocalNodes nl group_nodes excl_ndx inst = do
938 let gdx = instancePriGroup nl inst
939 local_nodes <- maybe (Bad $ "Can't find group with index " ++ show gdx)
940 Ok (lookup gdx group_nodes)
941 let avail_nodes = filter (not . flip IntSet.member excl_ndx) local_nodes
944 -- | Updates the evac solution with the results of an instance
946 updateEvacSolution :: (Node.List, Instance.List, EvacSolution)
948 -> Result (Node.List, Instance.List, [OpCodes.OpCode])
949 -> (Node.List, Instance.List, EvacSolution)
950 updateEvacSolution (nl, il, es) inst (Bad msg) =
951 (nl, il, es { esFailed = (Instance.name inst ++ ": " ++ msg):esFailed es})
952 updateEvacSolution (_, _, es) inst (Ok (nl, il, opcodes)) =
953 (nl, il, es { esMoved = Instance.name inst:esMoved es
954 , esOpCodes = [opcodes]:esOpCodes es })
956 -- | Node-evacuation IAllocator mode main function.
957 tryNodeEvac :: Group.List -- ^ The cluster groups
958 -> Node.List -- ^ The node list (cluster-wide, not per group)
959 -> Instance.List -- ^ Instance list (cluster-wide)
960 -> EvacMode -- ^ The evacuation mode
961 -> [Idx] -- ^ List of instance (indices) to be evacuated
962 -> Result EvacSolution
963 tryNodeEvac _ ini_nl ini_il mode idxs =
964 let evac_ndx = nodesToEvacuate ini_il mode idxs
965 offline = map Node.idx . filter Node.offline $ Container.elems ini_nl
966 excl_ndx = foldl' (flip IntSet.insert) evac_ndx offline
967 group_ndx = map (\(gdx, (nl, _)) -> (gdx, map Node.idx
968 (Container.elems nl))) $
969 splitCluster ini_nl ini_il
971 foldl' (\state@(nl, il, _) inst ->
972 updateEvacSolution state inst $
973 availableLocalNodes nl group_ndx excl_ndx inst >>=
974 nodeEvacInstance nl il mode inst
976 (ini_nl, ini_il, emptyEvacSolution)
977 (map (`Container.find` ini_il) idxs)
978 in return $ reverseEvacSolution esol
980 -- | Recursively place instances on the cluster until we're out of space.
981 iterateAlloc :: Node.List
985 -> [Instance.Instance]
987 -> Result AllocResult
988 iterateAlloc nl il newinst allocnodes ixes cstats =
989 let depth = length ixes
990 newname = printf "new-%d" depth::String
991 newidx = length (Container.elems il) + depth
992 newi2 = Instance.setIdx (Instance.setName newinst newname) newidx
993 in case tryAlloc nl il newi2 allocnodes of
995 Ok (AllocSolution { asFailures = errs, asSolutions = sols3 }) ->
997 [] -> Ok (collapseFailures errs, nl, il, ixes, cstats)
998 (xnl, xi, _, _):[] ->
999 iterateAlloc xnl (Container.add newidx xi il)
1000 newinst allocnodes (xi:ixes)
1001 (totalResources xnl:cstats)
1002 _ -> Bad "Internal error: multiple solutions for single\
1005 -- | The core of the tiered allocation mode.
1006 tieredAlloc :: Node.List
1008 -> Instance.Instance
1010 -> [Instance.Instance]
1012 -> Result AllocResult
1013 tieredAlloc nl il newinst allocnodes ixes cstats =
1014 case iterateAlloc nl il newinst allocnodes ixes cstats of
1016 Ok (errs, nl', il', ixes', cstats') ->
1017 case Instance.shrinkByType newinst . fst . last $
1018 sortBy (comparing snd) errs of
1019 Bad _ -> Ok (errs, nl', il', ixes', cstats')
1021 tieredAlloc nl' il' newinst' allocnodes ixes' cstats'
1023 -- | Compute the tiered spec string description from a list of
1024 -- allocated instances.
1025 tieredSpecMap :: [Instance.Instance]
1027 tieredSpecMap trl_ixes =
1028 let fin_trl_ixes = reverse trl_ixes
1029 ix_byspec = groupBy ((==) `on` Instance.specOf) fin_trl_ixes
1030 spec_map = map (\ixs -> (Instance.specOf $ head ixs, length ixs))
1032 in map (\(spec, cnt) -> printf "%d,%d,%d=%d" (rspecMem spec)
1033 (rspecDsk spec) (rspecCpu spec) cnt) spec_map
1035 -- * Formatting functions
1037 -- | Given the original and final nodes, computes the relocation description.
1038 computeMoves :: Instance.Instance -- ^ The instance to be moved
1039 -> String -- ^ The instance name
1040 -> IMove -- ^ The move being performed
1041 -> String -- ^ New primary
1042 -> String -- ^ New secondary
1043 -> (String, [String])
1044 -- ^ Tuple of moves and commands list; moves is containing
1045 -- either @/f/@ for failover or @/r:name/@ for replace
1046 -- secondary, while the command list holds gnt-instance
1047 -- commands (without that prefix), e.g \"@failover instance1@\"
1048 computeMoves i inam mv c d =
1050 Failover -> ("f", [mig])
1051 FailoverAndReplace _ -> (printf "f r:%s" d, [mig, rep d])
1052 ReplaceSecondary _ -> (printf "r:%s" d, [rep d])
1053 ReplaceAndFailover _ -> (printf "r:%s f" c, [rep c, mig])
1054 ReplacePrimary _ -> (printf "f r:%s f" c, [mig, rep c, mig])
1055 where morf = if Instance.running i then "migrate" else "failover"
1056 mig = printf "%s -f %s" morf inam::String
1057 rep n = printf "replace-disks -n %s %s" n inam
1059 -- | Converts a placement to string format.
1060 printSolutionLine :: Node.List -- ^ The node list
1061 -> Instance.List -- ^ The instance list
1062 -> Int -- ^ Maximum node name length
1063 -> Int -- ^ Maximum instance name length
1064 -> Placement -- ^ The current placement
1065 -> Int -- ^ The index of the placement in
1067 -> (String, [String])
1068 printSolutionLine nl il nmlen imlen plc pos =
1070 pmlen = (2*nmlen + 1)
1071 (i, p, s, mv, c) = plc
1072 inst = Container.find i il
1073 inam = Instance.alias inst
1074 npri = Node.alias $ Container.find p nl
1075 nsec = Node.alias $ Container.find s nl
1076 opri = Node.alias $ Container.find (Instance.pNode inst) nl
1077 osec = Node.alias $ Container.find (Instance.sNode inst) nl
1078 (moves, cmds) = computeMoves inst inam mv npri nsec
1079 ostr = printf "%s:%s" opri osec::String
1080 nstr = printf "%s:%s" npri nsec::String
1082 (printf " %3d. %-*s %-*s => %-*s %.8f a=%s"
1083 pos imlen inam pmlen ostr
1087 -- | Return the instance and involved nodes in an instance move.
1088 involvedNodes :: Instance.List -> Placement -> [Ndx]
1089 involvedNodes il plc =
1090 let (i, np, ns, _, _) = plc
1091 inst = Container.find i il
1092 op = Instance.pNode inst
1093 os = Instance.sNode inst
1094 in nub [np, ns, op, os]
1096 -- | Inner function for splitJobs, that either appends the next job to
1097 -- the current jobset, or starts a new jobset.
1098 mergeJobs :: ([JobSet], [Ndx]) -> MoveJob -> ([JobSet], [Ndx])
1099 mergeJobs ([], _) n@(ndx, _, _, _) = ([[n]], ndx)
1100 mergeJobs (cjs@(j:js), nbuf) n@(ndx, _, _, _)
1101 | null (ndx `intersect` nbuf) = ((n:j):js, ndx ++ nbuf)
1102 | otherwise = ([n]:cjs, ndx)
1104 -- | Break a list of moves into independent groups. Note that this
1105 -- will reverse the order of jobs.
1106 splitJobs :: [MoveJob] -> [JobSet]
1107 splitJobs = fst . foldl mergeJobs ([], [])
1109 -- | Given a list of commands, prefix them with @gnt-instance@ and
1110 -- also beautify the display a little.
1111 formatJob :: Int -> Int -> (Int, MoveJob) -> [String]
1112 formatJob jsn jsl (sn, (_, _, _, cmds)) =
1114 printf " echo job %d/%d" jsn sn:
1116 map (" gnt-instance " ++) cmds
1118 then ["", printf "echo jobset %d, %d jobs" jsn jsl] ++ out
1121 -- | Given a list of commands, prefix them with @gnt-instance@ and
1122 -- also beautify the display a little.
1123 formatCmds :: [JobSet] -> String
1126 concatMap (\(jsn, js) -> concatMap (formatJob jsn (length js))
1130 -- | Print the node list.
1131 printNodes :: Node.List -> [String] -> String
1133 let fields = case fs of
1134 [] -> Node.defaultFields
1135 "+":rest -> Node.defaultFields ++ rest
1137 snl = sortBy (comparing Node.idx) (Container.elems nl)
1138 (header, isnum) = unzip $ map Node.showHeader fields
1139 in unlines . map ((:) ' ' . intercalate " ") $
1140 formatTable (header:map (Node.list fields) snl) isnum
1142 -- | Print the instance list.
1143 printInsts :: Node.List -> Instance.List -> String
1145 let sil = sortBy (comparing Instance.idx) (Container.elems il)
1146 helper inst = [ if Instance.running inst then "R" else " "
1147 , Instance.name inst
1148 , Container.nameOf nl (Instance.pNode inst)
1149 , let sdx = Instance.sNode inst
1150 in if sdx == Node.noSecondary
1152 else Container.nameOf nl sdx
1153 , if Instance.autoBalance inst then "Y" else "N"
1154 , printf "%3d" $ Instance.vcpus inst
1155 , printf "%5d" $ Instance.mem inst
1156 , printf "%5d" $ Instance.dsk inst `div` 1024
1162 where DynUtil lC lM lD lN = Instance.util inst
1163 header = [ "F", "Name", "Pri_node", "Sec_node", "Auto_bal"
1164 , "vcpu", "mem" , "dsk", "lCpu", "lMem", "lDsk", "lNet" ]
1165 isnum = False:False:False:False:False:repeat True
1166 in unlines . map ((:) ' ' . intercalate " ") $
1167 formatTable (header:map helper sil) isnum
1169 -- | Shows statistics for a given node list.
1170 printStats :: Node.List -> String
1172 let dcvs = compDetailedCV $ Container.elems nl
1173 (weights, names) = unzip detailedCVInfo
1174 hd = zip3 (weights ++ repeat 1) (names ++ repeat "unknown") dcvs
1175 formatted = map (\(w, header, val) ->
1176 printf "%s=%.8f(x%.2f)" header val w::String) hd
1177 in intercalate ", " formatted
1179 -- | Convert a placement into a list of OpCodes (basically a job).
1180 iMoveToJob :: Node.List -> Instance.List
1181 -> Idx -> IMove -> [OpCodes.OpCode]
1182 iMoveToJob nl il idx move =
1183 let inst = Container.find idx il
1184 iname = Instance.name inst
1185 lookNode = Just . Container.nameOf nl
1186 opF = OpCodes.OpInstanceMigrate iname True False True
1187 opR n = OpCodes.OpInstanceReplaceDisks iname (lookNode n)
1188 OpCodes.ReplaceNewSecondary [] Nothing
1191 ReplacePrimary np -> [ opF, opR np, opF ]
1192 ReplaceSecondary ns -> [ opR ns ]
1193 ReplaceAndFailover np -> [ opR np, opF ]
1194 FailoverAndReplace ns -> [ opF, opR ns ]
1196 -- * Node group functions
1198 -- | Computes the group of an instance.
1199 instanceGroup :: Node.List -> Instance.Instance -> Result Gdx
1200 instanceGroup nl i =
1201 let sidx = Instance.sNode i
1202 pnode = Container.find (Instance.pNode i) nl
1203 snode = if sidx == Node.noSecondary
1205 else Container.find sidx nl
1206 pgroup = Node.group pnode
1207 sgroup = Node.group snode
1208 in if pgroup /= sgroup
1209 then fail ("Instance placed accross two node groups, primary " ++
1210 show pgroup ++ ", secondary " ++ show sgroup)
1213 -- | Computes the group of an instance per the primary node.
1214 instancePriGroup :: Node.List -> Instance.Instance -> Gdx
1215 instancePriGroup nl i =
1216 let pnode = Container.find (Instance.pNode i) nl
1219 -- | Compute the list of badly allocated instances (split across node
1221 findSplitInstances :: Node.List -> Instance.List -> [Instance.Instance]
1222 findSplitInstances nl =
1223 filter (not . isOk . instanceGroup nl) . Container.elems
1225 -- | Splits a cluster into the component node groups.
1226 splitCluster :: Node.List -> Instance.List ->
1227 [(Gdx, (Node.List, Instance.List))]
1228 splitCluster nl il =
1229 let ngroups = Node.computeGroups (Container.elems nl)
1230 in map (\(guuid, nodes) ->
1231 let nidxs = map Node.idx nodes
1232 nodes' = zip nidxs nodes
1233 instances = Container.filter ((`elem` nidxs) . Instance.pNode) il
1234 in (guuid, (Container.fromList nodes', instances))) ngroups
1236 -- | Split a global instance index map into per-group, and associate
1237 -- it with the group/node/instance lists.
1238 associateIdxs :: [Idx] -- ^ Instance indices to be split/associated
1239 -> [(Gdx, (Node.List, Instance.List))] -- ^ Input groups
1240 -> [(Gdx, (Node.List, Instance.List, [Idx]))] -- ^ Result
1241 associateIdxs idxs =
1242 map (\(gdx, (nl, il)) ->
1243 (gdx, (nl, il, filter (`Container.member` il) idxs)))
1245 -- | Compute the list of nodes that are to be evacuated, given a list
1246 -- of instances and an evacuation mode.
1247 nodesToEvacuate :: Instance.List -- ^ The cluster-wide instance list
1248 -> EvacMode -- ^ The evacuation mode we're using
1249 -> [Idx] -- ^ List of instance indices being evacuated
1250 -> IntSet.IntSet -- ^ Set of node indices
1251 nodesToEvacuate il mode =
1252 IntSet.delete Node.noSecondary .
1254 let i = Container.find idx il
1255 pdx = Instance.pNode i
1256 sdx = Instance.sNode i
1257 dt = Instance.diskTemplate i
1258 withSecondary = case dt of
1259 DTDrbd8 -> IntSet.insert sdx ns
1262 ChangePrimary -> IntSet.insert pdx ns
1263 ChangeSecondary -> withSecondary
1264 ChangeAll -> IntSet.insert pdx withSecondary