1 {-| Implementation of cluster-wide logic.
3 This module holds all pure cluster-logic; I\/O related functionality
4 goes into the "Main" module for the individual binaries.
10 Copyright (C) 2009, 2010 Google Inc.
12 This program is free software; you can redistribute it and/or modify
13 it under the terms of the GNU General Public License as published by
14 the Free Software Foundation; either version 2 of the License, or
15 (at your option) any later version.
17 This program is distributed in the hope that it will be useful, but
18 WITHOUT ANY WARRANTY; without even the implied warranty of
19 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 General Public License for more details.
22 You should have received a copy of the GNU General Public License
23 along with this program; if not, write to the Free Software
24 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
29 module Ganeti.HTools.Cluster
36 -- * Generic functions
38 , computeAllocationDelta
39 -- * First phase functions
41 -- * Second phase functions
46 -- * Display functions
49 -- * Balacing functions
56 -- * IAllocator functions
61 -- * Allocation functions
70 import Data.Ord (comparing)
71 import Text.Printf (printf)
74 import qualified Ganeti.HTools.Container as Container
75 import qualified Ganeti.HTools.Instance as Instance
76 import qualified Ganeti.HTools.Node as Node
77 import Ganeti.HTools.Types
78 import Ganeti.HTools.Utils
79 import qualified Ganeti.OpCodes as OpCodes
83 -- | Allocation\/relocation solution.
84 data AllocSolution = AllocSolution
85 { asFailures :: [FailMode] -- ^ Failure counts
86 , asAllocs :: Int -- ^ Good allocation count
87 , asSolutions :: [Node.AllocElement] -- ^ The actual result, length
88 -- of the list depends on the
89 -- allocation/relocation mode
93 -- | The empty solution we start with when computing allocations
94 emptySolution :: AllocSolution
95 emptySolution = AllocSolution { asFailures = [], asAllocs = 0
98 -- | The complete state for the balancing solution
99 data Table = Table Node.List Instance.List Score [Placement]
102 data CStats = CStats { csFmem :: Int -- ^ Cluster free mem
103 , csFdsk :: Int -- ^ Cluster free disk
104 , csAmem :: Int -- ^ Cluster allocatable mem
105 , csAdsk :: Int -- ^ Cluster allocatable disk
106 , csAcpu :: Int -- ^ Cluster allocatable cpus
107 , csMmem :: Int -- ^ Max node allocatable mem
108 , csMdsk :: Int -- ^ Max node allocatable disk
109 , csMcpu :: Int -- ^ Max node allocatable cpu
110 , csImem :: Int -- ^ Instance used mem
111 , csIdsk :: Int -- ^ Instance used disk
112 , csIcpu :: Int -- ^ Instance used cpu
113 , csTmem :: Double -- ^ Cluster total mem
114 , csTdsk :: Double -- ^ Cluster total disk
115 , csTcpu :: Double -- ^ Cluster total cpus
116 , csVcpu :: Int -- ^ Cluster virtual cpus (if
117 -- node pCpu has been set,
119 , csXmem :: Int -- ^ Unnacounted for mem
120 , csNmem :: Int -- ^ Node own memory
121 , csScore :: Score -- ^ The cluster score
122 , csNinst :: Int -- ^ The total number of instances
126 -- | Currently used, possibly to allocate, unallocable
127 type AllocStats = (RSpec, RSpec, RSpec)
129 -- * Utility functions
131 -- | Verifies the N+1 status and return the affected nodes.
132 verifyN1 :: [Node.Node] -> [Node.Node]
133 verifyN1 = filter Node.failN1
135 {-| Computes the pair of bad nodes and instances.
137 The bad node list is computed via a simple 'verifyN1' check, and the
138 bad instance list is the list of primary and secondary instances of
142 computeBadItems :: Node.List -> Instance.List ->
143 ([Node.Node], [Instance.Instance])
144 computeBadItems nl il =
145 let bad_nodes = verifyN1 $ getOnline nl
146 bad_instances = map (`Container.find` il) .
148 concatMap (\ n -> Node.sList n ++ Node.pList n) bad_nodes
150 (bad_nodes, bad_instances)
152 -- | Zero-initializer for the CStats type
153 emptyCStats :: CStats
154 emptyCStats = CStats 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
156 -- | Update stats with data from a new node
157 updateCStats :: CStats -> Node.Node -> CStats
158 updateCStats cs node =
159 let CStats { csFmem = x_fmem, csFdsk = x_fdsk,
160 csAmem = x_amem, csAcpu = x_acpu, csAdsk = x_adsk,
161 csMmem = x_mmem, csMdsk = x_mdsk, csMcpu = x_mcpu,
162 csImem = x_imem, csIdsk = x_idsk, csIcpu = x_icpu,
163 csTmem = x_tmem, csTdsk = x_tdsk, csTcpu = x_tcpu,
165 csXmem = x_xmem, csNmem = x_nmem, csNinst = x_ninst
168 inc_amem = Node.fMem node - Node.rMem node
169 inc_amem' = if inc_amem > 0 then inc_amem else 0
170 inc_adsk = Node.availDisk node
171 inc_imem = truncate (Node.tMem node) - Node.nMem node
172 - Node.xMem node - Node.fMem node
173 inc_icpu = Node.uCpu node
174 inc_idsk = truncate (Node.tDsk node) - Node.fDsk node
175 inc_vcpu = Node.hiCpu node
177 in cs { csFmem = x_fmem + Node.fMem node
178 , csFdsk = x_fdsk + Node.fDsk node
179 , csAmem = x_amem + inc_amem'
180 , csAdsk = x_adsk + inc_adsk
182 , csMmem = max x_mmem inc_amem'
183 , csMdsk = max x_mdsk inc_adsk
185 , csImem = x_imem + inc_imem
186 , csIdsk = x_idsk + inc_idsk
187 , csIcpu = x_icpu + inc_icpu
188 , csTmem = x_tmem + Node.tMem node
189 , csTdsk = x_tdsk + Node.tDsk node
190 , csTcpu = x_tcpu + Node.tCpu node
191 , csVcpu = x_vcpu + inc_vcpu
192 , csXmem = x_xmem + Node.xMem node
193 , csNmem = x_nmem + Node.nMem node
194 , csNinst = x_ninst + length (Node.pList node)
197 -- | Compute the total free disk and memory in the cluster.
198 totalResources :: Node.List -> CStats
200 let cs = foldl' updateCStats emptyCStats . Container.elems $ nl
201 in cs { csScore = compCV nl }
203 -- | Compute the delta between two cluster state.
205 -- This is used when doing allocations, to understand better the
206 -- available cluster resources. The return value is a triple of the
207 -- current used values, the delta that was still allocated, and what
208 -- was left unallocated.
209 computeAllocationDelta :: CStats -> CStats -> AllocStats
210 computeAllocationDelta cini cfin =
211 let CStats {csImem = i_imem, csIdsk = i_idsk, csIcpu = i_icpu} = cini
212 CStats {csImem = f_imem, csIdsk = f_idsk, csIcpu = f_icpu,
213 csTmem = t_mem, csTdsk = t_dsk, csVcpu = v_cpu } = cfin
214 rini = RSpec i_icpu i_imem i_idsk
215 rfin = RSpec (f_icpu - i_icpu) (f_imem - i_imem) (f_idsk - i_idsk)
216 un_cpu = v_cpu - f_icpu
217 runa = RSpec un_cpu (truncate t_mem - f_imem) (truncate t_dsk - f_idsk)
218 in (rini, rfin, runa)
220 -- | The names and weights of the individual elements in the CV list
221 detailedCVInfo :: [(Double, String)]
222 detailedCVInfo = [ (1, "free_mem_cv")
223 , (1, "free_disk_cv")
225 , (1, "reserved_mem_cv")
226 , (4, "offline_all_cnt")
227 , (16, "offline_pri_cnt")
228 , (1, "vcpu_ratio_cv")
231 , (1, "disk_load_cv")
233 , (2, "pri_tags_score")
236 detailedCVWeights :: [Double]
237 detailedCVWeights = map fst detailedCVInfo
239 -- | Compute the mem and disk covariance.
240 compDetailedCV :: Node.List -> [Double]
243 all_nodes = Container.elems nl
244 (offline, nodes) = partition Node.offline all_nodes
245 mem_l = map Node.pMem nodes
246 dsk_l = map Node.pDsk nodes
247 -- metric: memory covariance
248 mem_cv = varianceCoeff mem_l
249 -- metric: disk covariance
250 dsk_cv = varianceCoeff dsk_l
251 -- metric: count of instances living on N1 failing nodes
252 n1_score = fromIntegral . sum . map (\n -> length (Node.sList n) +
253 length (Node.pList n)) .
254 filter Node.failN1 $ nodes :: Double
255 res_l = map Node.pRem nodes
256 -- metric: reserved memory covariance
257 res_cv = varianceCoeff res_l
258 -- offline instances metrics
259 offline_ipri = sum . map (length . Node.pList) $ offline
260 offline_isec = sum . map (length . Node.sList) $ offline
261 -- metric: count of instances on offline nodes
262 off_score = fromIntegral (offline_ipri + offline_isec)::Double
263 -- metric: count of primary instances on offline nodes (this
264 -- helps with evacuation/failover of primary instances on
265 -- 2-node clusters with one node offline)
266 off_pri_score = fromIntegral offline_ipri::Double
267 cpu_l = map Node.pCpu nodes
268 -- metric: covariance of vcpu/pcpu ratio
269 cpu_cv = varianceCoeff cpu_l
270 -- metrics: covariance of cpu, memory, disk and network load
271 (c_load, m_load, d_load, n_load) = unzip4 $
273 let DynUtil c1 m1 d1 n1 = Node.utilLoad n
274 DynUtil c2 m2 d2 n2 = Node.utilPool n
275 in (c1/c2, m1/m2, d1/d2, n1/n2)
277 -- metric: conflicting instance count
278 pri_tags_inst = sum $ map Node.conflictingPrimaries nodes
279 pri_tags_score = fromIntegral pri_tags_inst::Double
280 in [ mem_cv, dsk_cv, n1_score, res_cv, off_score, off_pri_score, cpu_cv
281 , varianceCoeff c_load, varianceCoeff m_load
282 , varianceCoeff d_load, varianceCoeff n_load
285 -- | Compute the /total/ variance.
286 compCV :: Node.List -> Double
287 compCV = sum . zipWith (*) detailedCVWeights . compDetailedCV
289 -- | Compute online nodes from a Node.List
290 getOnline :: Node.List -> [Node.Node]
291 getOnline = filter (not . Node.offline) . Container.elems
295 -- | Compute best table. Note that the ordering of the arguments is important.
296 compareTables :: Table -> Table -> Table
297 compareTables a@(Table _ _ a_cv _) b@(Table _ _ b_cv _ ) =
298 if a_cv > b_cv then b else a
300 -- | Applies an instance move to a given node list and instance.
301 applyMove :: Node.List -> Instance.Instance
302 -> IMove -> OpResult (Node.List, Instance.Instance, Ndx, Ndx)
304 applyMove nl inst Failover =
305 let old_pdx = Instance.pNode inst
306 old_sdx = Instance.sNode inst
307 old_p = Container.find old_pdx nl
308 old_s = Container.find old_sdx nl
309 int_p = Node.removePri old_p inst
310 int_s = Node.removeSec old_s inst
311 force_p = Node.offline old_p
312 new_nl = do -- Maybe monad
313 new_p <- Node.addPriEx force_p int_s inst
314 new_s <- Node.addSec int_p inst old_sdx
315 let new_inst = Instance.setBoth inst old_sdx old_pdx
316 return (Container.addTwo old_pdx new_s old_sdx new_p nl,
317 new_inst, old_sdx, old_pdx)
320 -- Replace the primary (f:, r:np, f)
321 applyMove nl inst (ReplacePrimary new_pdx) =
322 let old_pdx = Instance.pNode inst
323 old_sdx = Instance.sNode inst
324 old_p = Container.find old_pdx nl
325 old_s = Container.find old_sdx nl
326 tgt_n = Container.find new_pdx nl
327 int_p = Node.removePri old_p inst
328 int_s = Node.removeSec old_s inst
329 force_p = Node.offline old_p
330 new_nl = do -- Maybe monad
331 -- check that the current secondary can host the instance
332 -- during the migration
333 tmp_s <- Node.addPriEx force_p int_s inst
334 let tmp_s' = Node.removePri tmp_s inst
335 new_p <- Node.addPriEx force_p tgt_n inst
336 new_s <- Node.addSecEx force_p tmp_s' inst new_pdx
337 let new_inst = Instance.setPri inst new_pdx
338 return (Container.add new_pdx new_p $
339 Container.addTwo old_pdx int_p old_sdx new_s nl,
340 new_inst, new_pdx, old_sdx)
343 -- Replace the secondary (r:ns)
344 applyMove nl inst (ReplaceSecondary new_sdx) =
345 let old_pdx = Instance.pNode inst
346 old_sdx = Instance.sNode inst
347 old_s = Container.find old_sdx nl
348 tgt_n = Container.find new_sdx nl
349 int_s = Node.removeSec old_s inst
350 force_s = Node.offline old_s
351 new_inst = Instance.setSec inst new_sdx
352 new_nl = Node.addSecEx force_s tgt_n inst old_pdx >>=
353 \new_s -> return (Container.addTwo new_sdx
354 new_s old_sdx int_s nl,
355 new_inst, old_pdx, new_sdx)
358 -- Replace the secondary and failover (r:np, f)
359 applyMove nl inst (ReplaceAndFailover new_pdx) =
360 let old_pdx = Instance.pNode inst
361 old_sdx = Instance.sNode inst
362 old_p = Container.find old_pdx nl
363 old_s = Container.find old_sdx nl
364 tgt_n = Container.find new_pdx nl
365 int_p = Node.removePri old_p inst
366 int_s = Node.removeSec old_s inst
367 force_s = Node.offline old_s
368 new_nl = do -- Maybe monad
369 new_p <- Node.addPri tgt_n inst
370 new_s <- Node.addSecEx force_s int_p inst new_pdx
371 let new_inst = Instance.setBoth inst new_pdx old_pdx
372 return (Container.add new_pdx new_p $
373 Container.addTwo old_pdx new_s old_sdx int_s nl,
374 new_inst, new_pdx, old_pdx)
377 -- Failver and replace the secondary (f, r:ns)
378 applyMove nl inst (FailoverAndReplace new_sdx) =
379 let old_pdx = Instance.pNode inst
380 old_sdx = Instance.sNode inst
381 old_p = Container.find old_pdx nl
382 old_s = Container.find old_sdx nl
383 tgt_n = Container.find new_sdx nl
384 int_p = Node.removePri old_p inst
385 int_s = Node.removeSec old_s inst
386 force_p = Node.offline old_p
387 new_nl = do -- Maybe monad
388 new_p <- Node.addPriEx force_p int_s inst
389 new_s <- Node.addSecEx force_p tgt_n inst old_sdx
390 let new_inst = Instance.setBoth inst old_sdx new_sdx
391 return (Container.add new_sdx new_s $
392 Container.addTwo old_sdx new_p old_pdx int_p nl,
393 new_inst, old_sdx, new_sdx)
396 -- | Tries to allocate an instance on one given node.
397 allocateOnSingle :: Node.List -> Instance.Instance -> Node.Node
398 -> OpResult Node.AllocElement
399 allocateOnSingle nl inst p =
400 let new_pdx = Node.idx p
401 new_inst = Instance.setBoth inst new_pdx Node.noSecondary
402 in Node.addPri p inst >>= \new_p -> do
403 let new_nl = Container.add new_pdx new_p nl
404 new_score = compCV nl
405 return (new_nl, new_inst, [new_p], new_score)
407 -- | Tries to allocate an instance on a given pair of nodes.
408 allocateOnPair :: Node.List -> Instance.Instance -> Node.Node -> Node.Node
409 -> OpResult Node.AllocElement
410 allocateOnPair nl inst tgt_p tgt_s =
411 let new_pdx = Node.idx tgt_p
412 new_sdx = Node.idx tgt_s
414 new_p <- Node.addPri tgt_p inst
415 new_s <- Node.addSec tgt_s inst new_pdx
416 let new_inst = Instance.setBoth inst new_pdx new_sdx
417 new_nl = Container.addTwo new_pdx new_p new_sdx new_s nl
418 return (new_nl, new_inst, [new_p, new_s], compCV new_nl)
420 -- | Tries to perform an instance move and returns the best table
421 -- between the original one and the new one.
422 checkSingleStep :: Table -- ^ The original table
423 -> Instance.Instance -- ^ The instance to move
424 -> Table -- ^ The current best table
425 -> IMove -- ^ The move to apply
426 -> Table -- ^ The final best table
427 checkSingleStep ini_tbl target cur_tbl move =
429 Table ini_nl ini_il _ ini_plc = ini_tbl
430 tmp_resu = applyMove ini_nl target move
434 OpGood (upd_nl, new_inst, pri_idx, sec_idx) ->
435 let tgt_idx = Instance.idx target
436 upd_cvar = compCV upd_nl
437 upd_il = Container.add tgt_idx new_inst ini_il
438 upd_plc = (tgt_idx, pri_idx, sec_idx, move, upd_cvar):ini_plc
439 upd_tbl = Table upd_nl upd_il upd_cvar upd_plc
441 compareTables cur_tbl upd_tbl
443 -- | Given the status of the current secondary as a valid new node and
444 -- the current candidate target node, generate the possible moves for
446 possibleMoves :: Bool -- ^ Whether the secondary node is a valid new node
447 -> Ndx -- ^ Target node candidate
448 -> [IMove] -- ^ List of valid result moves
449 possibleMoves True tdx =
450 [ReplaceSecondary tdx,
451 ReplaceAndFailover tdx,
453 FailoverAndReplace tdx]
455 possibleMoves False tdx =
456 [ReplaceSecondary tdx,
457 ReplaceAndFailover tdx]
459 -- | Compute the best move for a given instance.
460 checkInstanceMove :: [Ndx] -- ^ Allowed target node indices
461 -> Bool -- ^ Whether disk moves are allowed
462 -> Table -- ^ Original table
463 -> Instance.Instance -- ^ Instance to move
464 -> Table -- ^ Best new table for this instance
465 checkInstanceMove nodes_idx disk_moves ini_tbl target =
467 opdx = Instance.pNode target
468 osdx = Instance.sNode target
469 nodes = filter (\idx -> idx /= opdx && idx /= osdx) nodes_idx
470 use_secondary = elem osdx nodes_idx
471 aft_failover = if use_secondary -- if allowed to failover
472 then checkSingleStep ini_tbl target ini_tbl Failover
474 all_moves = if disk_moves
475 then concatMap (possibleMoves use_secondary) nodes
478 -- iterate over the possible nodes for this instance
479 foldl' (checkSingleStep ini_tbl target) aft_failover all_moves
481 -- | Compute the best next move.
482 checkMove :: [Ndx] -- ^ Allowed target node indices
483 -> Bool -- ^ Whether disk moves are allowed
484 -> Table -- ^ The current solution
485 -> [Instance.Instance] -- ^ List of instances still to move
486 -> Table -- ^ The new solution
487 checkMove nodes_idx disk_moves ini_tbl victims =
488 let Table _ _ _ ini_plc = ini_tbl
489 -- iterate over all instances, computing the best move
493 compareTables step_tbl $
494 checkInstanceMove nodes_idx disk_moves ini_tbl em)
496 Table _ _ _ best_plc = best_tbl
497 in if length best_plc == length ini_plc
498 then ini_tbl -- no advancement
501 -- | Check if we are allowed to go deeper in the balancing
502 doNextBalance :: Table -- ^ The starting table
503 -> Int -- ^ Remaining length
504 -> Score -- ^ Score at which to stop
505 -> Bool -- ^ The resulting table and commands
506 doNextBalance ini_tbl max_rounds min_score =
507 let Table _ _ ini_cv ini_plc = ini_tbl
508 ini_plc_len = length ini_plc
509 in (max_rounds < 0 || ini_plc_len < max_rounds) && ini_cv > min_score
511 -- | Run a balance move
512 tryBalance :: Table -- ^ The starting table
513 -> Bool -- ^ Allow disk moves
514 -> Bool -- ^ Only evacuate moves
515 -> Score -- ^ Min gain threshold
516 -> Score -- ^ Min gain
517 -> Maybe Table -- ^ The resulting table and commands
518 tryBalance ini_tbl disk_moves evac_mode mg_limit min_gain =
519 let Table ini_nl ini_il ini_cv _ = ini_tbl
520 all_inst = Container.elems ini_il
521 all_inst' = if evac_mode
522 then let bad_nodes = map Node.idx . filter Node.offline $
523 Container.elems ini_nl
524 in filter (\e -> Instance.sNode e `elem` bad_nodes ||
525 Instance.pNode e `elem` bad_nodes)
528 reloc_inst = filter Instance.movable all_inst'
529 node_idx = map Node.idx . filter (not . Node.offline) $
530 Container.elems ini_nl
531 fin_tbl = checkMove node_idx disk_moves ini_tbl reloc_inst
532 (Table _ _ fin_cv _) = fin_tbl
534 if fin_cv < ini_cv && (ini_cv > mg_limit || ini_cv - fin_cv >= min_gain)
535 then Just fin_tbl -- this round made success, return the new table
538 -- * Allocation functions
540 -- | Build failure stats out of a list of failures
541 collapseFailures :: [FailMode] -> FailStats
542 collapseFailures flst =
543 map (\k -> (k, length $ filter (k ==) flst)) [minBound..maxBound]
545 -- | Update current Allocation solution and failure stats with new
547 concatAllocs :: AllocSolution -> OpResult Node.AllocElement -> AllocSolution
548 concatAllocs as (OpFail reason) = as { asFailures = reason : asFailures as }
550 concatAllocs as (OpGood ns@(_, _, _, nscore)) =
551 let -- Choose the old or new solution, based on the cluster score
553 osols = asSolutions as
554 nsols = case osols of
556 (_, _, _, oscore):[] ->
560 -- FIXME: here we simply concat to lists with more
561 -- than one element; we should instead abort, since
562 -- this is not a valid usage of this function
565 -- Note: we force evaluation of nsols here in order to keep the
566 -- memory profile low - we know that we will need nsols for sure
567 -- in the next cycle, so we force evaluation of nsols, since the
568 -- foldl' in the caller will only evaluate the tuple, but not the
569 -- elements of the tuple
570 in nsols `seq` nsuc `seq` as { asAllocs = nsuc, asSolutions = nsols }
572 -- | Try to allocate an instance on the cluster.
573 tryAlloc :: (Monad m) =>
574 Node.List -- ^ The node list
575 -> Instance.List -- ^ The instance list
576 -> Instance.Instance -- ^ The instance to allocate
577 -> Int -- ^ Required number of nodes
578 -> m AllocSolution -- ^ Possible solution list
579 tryAlloc nl _ inst 2 =
580 let all_nodes = getOnline nl
581 all_pairs = liftM2 (,) all_nodes all_nodes
582 ok_pairs = filter (\(x, y) -> Node.idx x /= Node.idx y) all_pairs
583 sols = foldl' (\cstate (p, s) ->
584 concatAllocs cstate $ allocateOnPair nl inst p s
585 ) emptySolution ok_pairs
588 tryAlloc nl _ inst 1 =
589 let all_nodes = getOnline nl
590 sols = foldl' (\cstate ->
591 concatAllocs cstate . allocateOnSingle nl inst
592 ) emptySolution all_nodes
595 tryAlloc _ _ _ reqn = fail $ "Unsupported number of allocation \
596 \destinations required (" ++ show reqn ++
597 "), only two supported"
599 -- | Try to allocate an instance on the cluster.
600 tryReloc :: (Monad m) =>
601 Node.List -- ^ The node list
602 -> Instance.List -- ^ The instance list
603 -> Idx -- ^ The index of the instance to move
604 -> Int -- ^ The number of nodes required
605 -> [Ndx] -- ^ Nodes which should not be used
606 -> m AllocSolution -- ^ Solution list
607 tryReloc nl il xid 1 ex_idx =
608 let all_nodes = getOnline nl
609 inst = Container.find xid il
610 ex_idx' = Instance.pNode inst:ex_idx
611 valid_nodes = filter (not . flip elem ex_idx' . Node.idx) all_nodes
612 valid_idxes = map Node.idx valid_nodes
613 sols1 = foldl' (\cstate x ->
616 applyMove nl inst (ReplaceSecondary x)
617 return (mnl, i, [Container.find x mnl],
619 in concatAllocs cstate em
620 ) emptySolution valid_idxes
623 tryReloc _ _ _ reqn _ = fail $ "Unsupported number of relocation \
624 \destinations required (" ++ show reqn ++
625 "), only one supported"
627 -- | Try to evacuate a list of nodes.
628 tryEvac :: (Monad m) =>
629 Node.List -- ^ The node list
630 -> Instance.List -- ^ The instance list
631 -> [Ndx] -- ^ Nodes to be evacuated
632 -> m AllocSolution -- ^ Solution list
633 tryEvac nl il ex_ndx =
634 let ex_nodes = map (`Container.find` nl) ex_ndx
635 all_insts = nub . concatMap Node.sList $ ex_nodes
637 (_, sol) <- foldM (\(nl', old_as) idx -> do
638 -- FIXME: hardcoded one node here
640 new_as <- tryReloc nl' il idx 1 ex_ndx
641 case asSolutions new_as of
642 csol@(nl'', _, _, _):_ ->
643 -- an individual relocation succeeded,
644 -- we kind of compose the data from
647 new_as { asSolutions =
648 csol:asSolutions old_as })
649 -- this relocation failed, so we fail
651 _ -> fail $ "Can't evacuate instance " ++
652 Instance.name (Container.find idx il)
653 ) (nl, emptySolution) all_insts
656 -- | Recursively place instances on the cluster until we're out of space
657 iterateAlloc :: Node.List
661 -> [Instance.Instance]
662 -> Result (FailStats, Node.List, Instance.List,
664 iterateAlloc nl il newinst nreq ixes =
665 let depth = length ixes
666 newname = printf "new-%d" depth::String
667 newidx = length (Container.elems il) + depth
668 newi2 = Instance.setIdx (Instance.setName newinst newname) newidx
669 in case tryAlloc nl il newi2 nreq of
671 Ok (AllocSolution { asFailures = errs, asSolutions = sols3 }) ->
673 [] -> Ok (collapseFailures errs, nl, il, ixes)
674 (xnl, xi, _, _):[] ->
675 iterateAlloc xnl (Container.add newidx xi il)
676 newinst nreq $! (xi:ixes)
677 _ -> Bad "Internal error: multiple solutions for single\
680 tieredAlloc :: Node.List
684 -> [Instance.Instance]
685 -> Result (FailStats, Node.List, Instance.List,
687 tieredAlloc nl il newinst nreq ixes =
688 case iterateAlloc nl il newinst nreq ixes of
690 Ok (errs, nl', il', ixes') ->
691 case Instance.shrinkByType newinst . fst . last $
692 sortBy (comparing snd) errs of
693 Bad _ -> Ok (errs, nl', il', ixes')
695 tieredAlloc nl' il' newinst' nreq ixes'
697 -- * Formatting functions
699 -- | Given the original and final nodes, computes the relocation description.
700 computeMoves :: Instance.Instance -- ^ The instance to be moved
701 -> String -- ^ The instance name
702 -> IMove -- ^ The move being performed
703 -> String -- ^ New primary
704 -> String -- ^ New secondary
705 -> (String, [String])
706 -- ^ Tuple of moves and commands list; moves is containing
707 -- either @/f/@ for failover or @/r:name/@ for replace
708 -- secondary, while the command list holds gnt-instance
709 -- commands (without that prefix), e.g \"@failover instance1@\"
710 computeMoves i inam mv c d =
712 Failover -> ("f", [mig])
713 FailoverAndReplace _ -> (printf "f r:%s" d, [mig, rep d])
714 ReplaceSecondary _ -> (printf "r:%s" d, [rep d])
715 ReplaceAndFailover _ -> (printf "r:%s f" c, [rep c, mig])
716 ReplacePrimary _ -> (printf "f r:%s f" c, [mig, rep c, mig])
717 where morf = if Instance.running i then "migrate" else "failover"
718 mig = printf "%s -f %s" morf inam::String
719 rep n = printf "replace-disks -n %s %s" n inam
721 -- | Converts a placement to string format.
722 printSolutionLine :: Node.List -- ^ The node list
723 -> Instance.List -- ^ The instance list
724 -> Int -- ^ Maximum node name length
725 -> Int -- ^ Maximum instance name length
726 -> Placement -- ^ The current placement
727 -> Int -- ^ The index of the placement in
729 -> (String, [String])
730 printSolutionLine nl il nmlen imlen plc pos =
732 pmlen = (2*nmlen + 1)
733 (i, p, s, mv, c) = plc
734 inst = Container.find i il
735 inam = Instance.alias inst
736 npri = Node.alias $ Container.find p nl
737 nsec = Node.alias $ Container.find s nl
738 opri = Node.alias $ Container.find (Instance.pNode inst) nl
739 osec = Node.alias $ Container.find (Instance.sNode inst) nl
740 (moves, cmds) = computeMoves inst inam mv npri nsec
741 ostr = printf "%s:%s" opri osec::String
742 nstr = printf "%s:%s" npri nsec::String
744 (printf " %3d. %-*s %-*s => %-*s %.8f a=%s"
745 pos imlen inam pmlen ostr
749 -- | Return the instance and involved nodes in an instance move.
750 involvedNodes :: Instance.List -> Placement -> [Ndx]
751 involvedNodes il plc =
752 let (i, np, ns, _, _) = plc
753 inst = Container.find i il
754 op = Instance.pNode inst
755 os = Instance.sNode inst
756 in nub [np, ns, op, os]
758 -- | Inner function for splitJobs, that either appends the next job to
759 -- the current jobset, or starts a new jobset.
760 mergeJobs :: ([JobSet], [Ndx]) -> MoveJob -> ([JobSet], [Ndx])
761 mergeJobs ([], _) n@(ndx, _, _, _) = ([[n]], ndx)
762 mergeJobs (cjs@(j:js), nbuf) n@(ndx, _, _, _)
763 | null (ndx `intersect` nbuf) = ((n:j):js, ndx ++ nbuf)
764 | otherwise = ([n]:cjs, ndx)
766 -- | Break a list of moves into independent groups. Note that this
767 -- will reverse the order of jobs.
768 splitJobs :: [MoveJob] -> [JobSet]
769 splitJobs = fst . foldl mergeJobs ([], [])
771 -- | Given a list of commands, prefix them with @gnt-instance@ and
772 -- also beautify the display a little.
773 formatJob :: Int -> Int -> (Int, MoveJob) -> [String]
774 formatJob jsn jsl (sn, (_, _, _, cmds)) =
776 printf " echo job %d/%d" jsn sn:
778 map (" gnt-instance " ++) cmds
780 then ["", printf "echo jobset %d, %d jobs" jsn jsl] ++ out
783 -- | Given a list of commands, prefix them with @gnt-instance@ and
784 -- also beautify the display a little.
785 formatCmds :: [JobSet] -> String
788 concatMap (\(jsn, js) -> concatMap (formatJob jsn (length js))
792 -- | Print the node list.
793 printNodes :: Node.List -> [String] -> String
795 let fields = case fs of
796 [] -> Node.defaultFields
797 "+":rest -> Node.defaultFields ++ rest
799 snl = sortBy (comparing Node.idx) (Container.elems nl)
800 (header, isnum) = unzip $ map Node.showHeader fields
801 in unlines . map ((:) ' ' . intercalate " ") $
802 formatTable (header:map (Node.list fields) snl) isnum
804 -- | Print the instance list.
805 printInsts :: Node.List -> Instance.List -> String
807 let sil = sortBy (comparing Instance.idx) (Container.elems il)
808 helper inst = [ if Instance.running inst then "R" else " "
810 , Container.nameOf nl (Instance.pNode inst)
811 , let sdx = Instance.sNode inst
812 in if sdx == Node.noSecondary
814 else Container.nameOf nl sdx
815 , printf "%3d" $ Instance.vcpus inst
816 , printf "%5d" $ Instance.mem inst
817 , printf "%5d" $ Instance.dsk inst `div` 1024
823 where DynUtil lC lM lD lN = Instance.util inst
824 header = [ "F", "Name", "Pri_node", "Sec_node", "vcpu", "mem"
825 , "dsk", "lCpu", "lMem", "lDsk", "lNet" ]
826 isnum = False:False:False:False:repeat True
827 in unlines . map ((:) ' ' . intercalate " ") $
828 formatTable (header:map helper sil) isnum
830 -- | Shows statistics for a given node list.
831 printStats :: Node.List -> String
833 let dcvs = compDetailedCV nl
834 (weights, names) = unzip detailedCVInfo
835 hd = zip3 (weights ++ repeat 1) (names ++ repeat "unknown") dcvs
836 formatted = map (\(w, header, val) ->
837 printf "%s=%.8f(x%.2f)" header val w::String) hd
838 in intercalate ", " formatted
840 -- | Convert a placement into a list of OpCodes (basically a job).
841 iMoveToJob :: Node.List -> Instance.List
842 -> Idx -> IMove -> [OpCodes.OpCode]
843 iMoveToJob nl il idx move =
844 let inst = Container.find idx il
845 iname = Instance.name inst
846 lookNode = Just . Container.nameOf nl
847 opF = if Instance.running inst
848 then OpCodes.OpMigrateInstance iname True False
849 else OpCodes.OpFailoverInstance iname False
850 opR n = OpCodes.OpReplaceDisks iname (lookNode n)
851 OpCodes.ReplaceNewSecondary [] Nothing
854 ReplacePrimary np -> [ opF, opR np, opF ]
855 ReplaceSecondary ns -> [ opR ns ]
856 ReplaceAndFailover np -> [ opR np, opF ]
857 FailoverAndReplace ns -> [ opF, opR ns ]
859 -- | Computes the group of an instance
860 instanceGroup :: Node.List -> Instance.Instance -> Result GroupID
862 let sidx = Instance.sNode i
863 pnode = Container.find (Instance.pNode i) nl
864 snode = if sidx == Node.noSecondary
866 else Container.find sidx nl
867 puuid = Node.group pnode
868 suuid = Node.group snode
870 then fail ("Instance placed accross two node groups, primary " ++ puuid ++
871 ", secondary " ++ suuid)
874 -- | Compute the list of badly allocated instances (split across node
876 findSplitInstances :: Node.List -> Instance.List -> [Instance.Instance]
877 findSplitInstances nl il =
878 filter (not . isOk . instanceGroup nl) (Container.elems il)
880 -- | Splits a cluster into the component node groups
881 splitCluster :: Node.List -> Instance.List ->
882 [(GroupID, (Node.List, Instance.List))]
884 let ngroups = Node.computeGroups (Container.elems nl)
885 in map (\(guuid, nodes) ->
886 let nidxs = map Node.idx nodes
887 nodes' = zip nidxs nodes
888 instances = Container.filter ((`elem` nidxs) . Instance.pNode) il
889 in (guuid, (Container.fromAssocList nodes', instances))) ngroups