1 {-| Implementation of cluster-wide logic.
3 This module holds all pure cluster-logic; I\/O related functionality
4 goes into the "Main" module for the individual binaries.
10 Copyright (C) 2009, 2010 Google Inc.
12 This program is free software; you can redistribute it and/or modify
13 it under the terms of the GNU General Public License as published by
14 the Free Software Foundation; either version 2 of the License, or
15 (at your option) any later version.
17 This program is distributed in the hope that it will be useful, but
18 WITHOUT ANY WARRANTY; without even the implied warranty of
19 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 General Public License for more details.
22 You should have received a copy of the GNU General Public License
23 along with this program; if not, write to the Free Software
24 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
29 module Ganeti.HTools.Cluster
36 -- * Generic functions
38 , computeAllocationDelta
39 -- * First phase functions
41 -- * Second phase functions
46 -- * Display functions
49 -- * Balacing functions
56 -- * IAllocator functions
61 -- * Allocation functions
70 import Data.Ord (comparing)
71 import Text.Printf (printf)
74 import qualified Ganeti.HTools.Container as Container
75 import qualified Ganeti.HTools.Instance as Instance
76 import qualified Ganeti.HTools.Node as Node
77 import Ganeti.HTools.Types
78 import Ganeti.HTools.Utils
79 import qualified Ganeti.OpCodes as OpCodes
83 -- | Allocation\/relocation solution.
84 type AllocSolution = ([FailMode], Int, [(Score, Node.AllocElement)])
86 -- | The complete state for the balancing solution
87 data Table = Table Node.List Instance.List Score [Placement]
90 data CStats = CStats { csFmem :: Int -- ^ Cluster free mem
91 , csFdsk :: Int -- ^ Cluster free disk
92 , csAmem :: Int -- ^ Cluster allocatable mem
93 , csAdsk :: Int -- ^ Cluster allocatable disk
94 , csAcpu :: Int -- ^ Cluster allocatable cpus
95 , csMmem :: Int -- ^ Max node allocatable mem
96 , csMdsk :: Int -- ^ Max node allocatable disk
97 , csMcpu :: Int -- ^ Max node allocatable cpu
98 , csImem :: Int -- ^ Instance used mem
99 , csIdsk :: Int -- ^ Instance used disk
100 , csIcpu :: Int -- ^ Instance used cpu
101 , csTmem :: Double -- ^ Cluster total mem
102 , csTdsk :: Double -- ^ Cluster total disk
103 , csTcpu :: Double -- ^ Cluster total cpus
104 , csVcpu :: Int -- ^ Cluster virtual cpus (if
105 -- node pCpu has been set,
107 , csXmem :: Int -- ^ Unnacounted for mem
108 , csNmem :: Int -- ^ Node own memory
109 , csScore :: Score -- ^ The cluster score
110 , csNinst :: Int -- ^ The total number of instances
114 -- | Currently used, possibly to allocate, unallocable
115 type AllocStats = (RSpec, RSpec, RSpec)
117 -- * Utility functions
119 -- | Verifies the N+1 status and return the affected nodes.
120 verifyN1 :: [Node.Node] -> [Node.Node]
121 verifyN1 = filter Node.failN1
123 {-| Computes the pair of bad nodes and instances.
125 The bad node list is computed via a simple 'verifyN1' check, and the
126 bad instance list is the list of primary and secondary instances of
130 computeBadItems :: Node.List -> Instance.List ->
131 ([Node.Node], [Instance.Instance])
132 computeBadItems nl il =
133 let bad_nodes = verifyN1 $ getOnline nl
134 bad_instances = map (`Container.find` il) .
136 concatMap (\ n -> Node.sList n ++ Node.pList n) bad_nodes
138 (bad_nodes, bad_instances)
140 -- | Zero-initializer for the CStats type
141 emptyCStats :: CStats
142 emptyCStats = CStats 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
144 -- | Update stats with data from a new node
145 updateCStats :: CStats -> Node.Node -> CStats
146 updateCStats cs node =
147 let CStats { csFmem = x_fmem, csFdsk = x_fdsk,
148 csAmem = x_amem, csAcpu = x_acpu, csAdsk = x_adsk,
149 csMmem = x_mmem, csMdsk = x_mdsk, csMcpu = x_mcpu,
150 csImem = x_imem, csIdsk = x_idsk, csIcpu = x_icpu,
151 csTmem = x_tmem, csTdsk = x_tdsk, csTcpu = x_tcpu,
153 csXmem = x_xmem, csNmem = x_nmem, csNinst = x_ninst
156 inc_amem = Node.fMem node - Node.rMem node
157 inc_amem' = if inc_amem > 0 then inc_amem else 0
158 inc_adsk = Node.availDisk node
159 inc_imem = truncate (Node.tMem node) - Node.nMem node
160 - Node.xMem node - Node.fMem node
161 inc_icpu = Node.uCpu node
162 inc_idsk = truncate (Node.tDsk node) - Node.fDsk node
163 inc_vcpu = Node.hiCpu node
165 in cs { csFmem = x_fmem + Node.fMem node
166 , csFdsk = x_fdsk + Node.fDsk node
167 , csAmem = x_amem + inc_amem'
168 , csAdsk = x_adsk + inc_adsk
170 , csMmem = max x_mmem inc_amem'
171 , csMdsk = max x_mdsk inc_adsk
173 , csImem = x_imem + inc_imem
174 , csIdsk = x_idsk + inc_idsk
175 , csIcpu = x_icpu + inc_icpu
176 , csTmem = x_tmem + Node.tMem node
177 , csTdsk = x_tdsk + Node.tDsk node
178 , csTcpu = x_tcpu + Node.tCpu node
179 , csVcpu = x_vcpu + inc_vcpu
180 , csXmem = x_xmem + Node.xMem node
181 , csNmem = x_nmem + Node.nMem node
182 , csNinst = x_ninst + length (Node.pList node)
185 -- | Compute the total free disk and memory in the cluster.
186 totalResources :: Node.List -> CStats
188 let cs = foldl' updateCStats emptyCStats . Container.elems $ nl
189 in cs { csScore = compCV nl }
191 -- | Compute the delta between two cluster state.
193 -- This is used when doing allocations, to understand better the
194 -- available cluster resources. The return value is a triple of the
195 -- current used values, the delta that was still allocated, and what
196 -- was left unallocated.
197 computeAllocationDelta :: CStats -> CStats -> AllocStats
198 computeAllocationDelta cini cfin =
199 let CStats {csImem = i_imem, csIdsk = i_idsk, csIcpu = i_icpu} = cini
200 CStats {csImem = f_imem, csIdsk = f_idsk, csIcpu = f_icpu,
201 csTmem = t_mem, csTdsk = t_dsk, csVcpu = v_cpu } = cfin
202 rini = RSpec i_icpu i_imem i_idsk
203 rfin = RSpec (f_icpu - i_icpu) (f_imem - i_imem) (f_idsk - i_idsk)
204 un_cpu = v_cpu - f_icpu
205 runa = RSpec un_cpu (truncate t_mem - f_imem) (truncate t_dsk - f_idsk)
206 in (rini, rfin, runa)
208 -- | The names and weights of the individual elements in the CV list
209 detailedCVInfo :: [(Double, String)]
210 detailedCVInfo = [ (1, "free_mem_cv")
211 , (1, "free_disk_cv")
213 , (1, "reserved_mem_cv")
214 , (4, "offline_all_cnt")
215 , (16, "offline_pri_cnt")
216 , (1, "vcpu_ratio_cv")
219 , (1, "disk_load_cv")
221 , (2, "pri_tags_score")
224 detailedCVWeights :: [Double]
225 detailedCVWeights = map fst detailedCVInfo
227 -- | Compute the mem and disk covariance.
228 compDetailedCV :: Node.List -> [Double]
231 all_nodes = Container.elems nl
232 (offline, nodes) = partition Node.offline all_nodes
233 mem_l = map Node.pMem nodes
234 dsk_l = map Node.pDsk nodes
235 -- metric: memory covariance
236 mem_cv = varianceCoeff mem_l
237 -- metric: disk covariance
238 dsk_cv = varianceCoeff dsk_l
239 -- metric: count of instances living on N1 failing nodes
240 n1_score = fromIntegral . sum . map (\n -> length (Node.sList n) +
241 length (Node.pList n)) .
242 filter Node.failN1 $ nodes :: Double
243 res_l = map Node.pRem nodes
244 -- metric: reserved memory covariance
245 res_cv = varianceCoeff res_l
246 -- offline instances metrics
247 offline_ipri = sum . map (length . Node.pList) $ offline
248 offline_isec = sum . map (length . Node.sList) $ offline
249 -- metric: count of instances on offline nodes
250 off_score = fromIntegral (offline_ipri + offline_isec)::Double
251 -- metric: count of primary instances on offline nodes (this
252 -- helps with evacuation/failover of primary instances on
253 -- 2-node clusters with one node offline)
254 off_pri_score = fromIntegral offline_ipri::Double
255 cpu_l = map Node.pCpu nodes
256 -- metric: covariance of vcpu/pcpu ratio
257 cpu_cv = varianceCoeff cpu_l
258 -- metrics: covariance of cpu, memory, disk and network load
259 (c_load, m_load, d_load, n_load) = unzip4 $
261 let DynUtil c1 m1 d1 n1 = Node.utilLoad n
262 DynUtil c2 m2 d2 n2 = Node.utilPool n
263 in (c1/c2, m1/m2, d1/d2, n1/n2)
265 -- metric: conflicting instance count
266 pri_tags_inst = sum $ map Node.conflictingPrimaries nodes
267 pri_tags_score = fromIntegral pri_tags_inst::Double
268 in [ mem_cv, dsk_cv, n1_score, res_cv, off_score, off_pri_score, cpu_cv
269 , varianceCoeff c_load, varianceCoeff m_load
270 , varianceCoeff d_load, varianceCoeff n_load
273 -- | Compute the /total/ variance.
274 compCV :: Node.List -> Double
275 compCV = sum . zipWith (*) detailedCVWeights . compDetailedCV
277 -- | Compute online nodes from a Node.List
278 getOnline :: Node.List -> [Node.Node]
279 getOnline = filter (not . Node.offline) . Container.elems
283 -- | Compute best table. Note that the ordering of the arguments is important.
284 compareTables :: Table -> Table -> Table
285 compareTables a@(Table _ _ a_cv _) b@(Table _ _ b_cv _ ) =
286 if a_cv > b_cv then b else a
288 -- | Applies an instance move to a given node list and instance.
289 applyMove :: Node.List -> Instance.Instance
290 -> IMove -> OpResult (Node.List, Instance.Instance, Ndx, Ndx)
292 applyMove nl inst Failover =
293 let old_pdx = Instance.pNode inst
294 old_sdx = Instance.sNode inst
295 old_p = Container.find old_pdx nl
296 old_s = Container.find old_sdx nl
297 int_p = Node.removePri old_p inst
298 int_s = Node.removeSec old_s inst
299 force_p = Node.offline old_p
300 new_nl = do -- Maybe monad
301 new_p <- Node.addPriEx force_p int_s inst
302 new_s <- Node.addSec int_p inst old_sdx
303 let new_inst = Instance.setBoth inst old_sdx old_pdx
304 return (Container.addTwo old_pdx new_s old_sdx new_p nl,
305 new_inst, old_sdx, old_pdx)
308 -- Replace the primary (f:, r:np, f)
309 applyMove nl inst (ReplacePrimary new_pdx) =
310 let old_pdx = Instance.pNode inst
311 old_sdx = Instance.sNode inst
312 old_p = Container.find old_pdx nl
313 old_s = Container.find old_sdx nl
314 tgt_n = Container.find new_pdx nl
315 int_p = Node.removePri old_p inst
316 int_s = Node.removeSec old_s inst
317 force_p = Node.offline old_p
318 new_nl = do -- Maybe monad
319 -- check that the current secondary can host the instance
320 -- during the migration
321 tmp_s <- Node.addPriEx force_p int_s inst
322 let tmp_s' = Node.removePri tmp_s inst
323 new_p <- Node.addPriEx force_p tgt_n inst
324 new_s <- Node.addSecEx force_p tmp_s' inst new_pdx
325 let new_inst = Instance.setPri inst new_pdx
326 return (Container.add new_pdx new_p $
327 Container.addTwo old_pdx int_p old_sdx new_s nl,
328 new_inst, new_pdx, old_sdx)
331 -- Replace the secondary (r:ns)
332 applyMove nl inst (ReplaceSecondary new_sdx) =
333 let old_pdx = Instance.pNode inst
334 old_sdx = Instance.sNode inst
335 old_s = Container.find old_sdx nl
336 tgt_n = Container.find new_sdx nl
337 int_s = Node.removeSec old_s inst
338 force_s = Node.offline old_s
339 new_inst = Instance.setSec inst new_sdx
340 new_nl = Node.addSecEx force_s tgt_n inst old_pdx >>=
341 \new_s -> return (Container.addTwo new_sdx
342 new_s old_sdx int_s nl,
343 new_inst, old_pdx, new_sdx)
346 -- Replace the secondary and failover (r:np, f)
347 applyMove nl inst (ReplaceAndFailover new_pdx) =
348 let old_pdx = Instance.pNode inst
349 old_sdx = Instance.sNode inst
350 old_p = Container.find old_pdx nl
351 old_s = Container.find old_sdx nl
352 tgt_n = Container.find new_pdx nl
353 int_p = Node.removePri old_p inst
354 int_s = Node.removeSec old_s inst
355 force_s = Node.offline old_s
356 new_nl = do -- Maybe monad
357 new_p <- Node.addPri tgt_n inst
358 new_s <- Node.addSecEx force_s int_p inst new_pdx
359 let new_inst = Instance.setBoth inst new_pdx old_pdx
360 return (Container.add new_pdx new_p $
361 Container.addTwo old_pdx new_s old_sdx int_s nl,
362 new_inst, new_pdx, old_pdx)
365 -- Failver and replace the secondary (f, r:ns)
366 applyMove nl inst (FailoverAndReplace new_sdx) =
367 let old_pdx = Instance.pNode inst
368 old_sdx = Instance.sNode inst
369 old_p = Container.find old_pdx nl
370 old_s = Container.find old_sdx nl
371 tgt_n = Container.find new_sdx nl
372 int_p = Node.removePri old_p inst
373 int_s = Node.removeSec old_s inst
374 force_p = Node.offline old_p
375 new_nl = do -- Maybe monad
376 new_p <- Node.addPriEx force_p int_s inst
377 new_s <- Node.addSecEx force_p tgt_n inst old_sdx
378 let new_inst = Instance.setBoth inst old_sdx new_sdx
379 return (Container.add new_sdx new_s $
380 Container.addTwo old_sdx new_p old_pdx int_p nl,
381 new_inst, old_sdx, new_sdx)
384 -- | Tries to allocate an instance on one given node.
385 allocateOnSingle :: Node.List -> Instance.Instance -> Node.Node
386 -> OpResult Node.AllocElement
387 allocateOnSingle nl inst p =
388 let new_pdx = Node.idx p
389 new_inst = Instance.setBoth inst new_pdx Node.noSecondary
390 new_nl = Node.addPri p inst >>= \new_p ->
391 return (Container.add new_pdx new_p nl, new_inst, [new_p])
394 -- | Tries to allocate an instance on a given pair of nodes.
395 allocateOnPair :: Node.List -> Instance.Instance -> Node.Node -> Node.Node
396 -> OpResult Node.AllocElement
397 allocateOnPair nl inst tgt_p tgt_s =
398 let new_pdx = Node.idx tgt_p
399 new_sdx = Node.idx tgt_s
400 new_nl = do -- Maybe monad
401 new_p <- Node.addPri tgt_p inst
402 new_s <- Node.addSec tgt_s inst new_pdx
403 let new_inst = Instance.setBoth inst new_pdx new_sdx
404 return (Container.addTwo new_pdx new_p new_sdx new_s nl, new_inst,
408 -- | Tries to perform an instance move and returns the best table
409 -- between the original one and the new one.
410 checkSingleStep :: Table -- ^ The original table
411 -> Instance.Instance -- ^ The instance to move
412 -> Table -- ^ The current best table
413 -> IMove -- ^ The move to apply
414 -> Table -- ^ The final best table
415 checkSingleStep ini_tbl target cur_tbl move =
417 Table ini_nl ini_il _ ini_plc = ini_tbl
418 tmp_resu = applyMove ini_nl target move
422 OpGood (upd_nl, new_inst, pri_idx, sec_idx) ->
423 let tgt_idx = Instance.idx target
424 upd_cvar = compCV upd_nl
425 upd_il = Container.add tgt_idx new_inst ini_il
426 upd_plc = (tgt_idx, pri_idx, sec_idx, move, upd_cvar):ini_plc
427 upd_tbl = Table upd_nl upd_il upd_cvar upd_plc
429 compareTables cur_tbl upd_tbl
431 -- | Given the status of the current secondary as a valid new node and
432 -- the current candidate target node, generate the possible moves for
434 possibleMoves :: Bool -- ^ Whether the secondary node is a valid new node
435 -> Ndx -- ^ Target node candidate
436 -> [IMove] -- ^ List of valid result moves
437 possibleMoves True tdx =
438 [ReplaceSecondary tdx,
439 ReplaceAndFailover tdx,
441 FailoverAndReplace tdx]
443 possibleMoves False tdx =
444 [ReplaceSecondary tdx,
445 ReplaceAndFailover tdx]
447 -- | Compute the best move for a given instance.
448 checkInstanceMove :: [Ndx] -- ^ Allowed target node indices
449 -> Bool -- ^ Whether disk moves are allowed
450 -> Table -- ^ Original table
451 -> Instance.Instance -- ^ Instance to move
452 -> Table -- ^ Best new table for this instance
453 checkInstanceMove nodes_idx disk_moves ini_tbl target =
455 opdx = Instance.pNode target
456 osdx = Instance.sNode target
457 nodes = filter (\idx -> idx /= opdx && idx /= osdx) nodes_idx
458 use_secondary = elem osdx nodes_idx
459 aft_failover = if use_secondary -- if allowed to failover
460 then checkSingleStep ini_tbl target ini_tbl Failover
462 all_moves = if disk_moves
463 then concatMap (possibleMoves use_secondary) nodes
466 -- iterate over the possible nodes for this instance
467 foldl' (checkSingleStep ini_tbl target) aft_failover all_moves
469 -- | Compute the best next move.
470 checkMove :: [Ndx] -- ^ Allowed target node indices
471 -> Bool -- ^ Whether disk moves are allowed
472 -> Table -- ^ The current solution
473 -> [Instance.Instance] -- ^ List of instances still to move
474 -> Table -- ^ The new solution
475 checkMove nodes_idx disk_moves ini_tbl victims =
476 let Table _ _ _ ini_plc = ini_tbl
477 -- iterate over all instances, computing the best move
481 compareTables step_tbl $
482 checkInstanceMove nodes_idx disk_moves ini_tbl em)
484 Table _ _ _ best_plc = best_tbl
485 in if length best_plc == length ini_plc
486 then ini_tbl -- no advancement
489 -- | Check if we are allowed to go deeper in the balancing
490 doNextBalance :: Table -- ^ The starting table
491 -> Int -- ^ Remaining length
492 -> Score -- ^ Score at which to stop
493 -> Bool -- ^ The resulting table and commands
494 doNextBalance ini_tbl max_rounds min_score =
495 let Table _ _ ini_cv ini_plc = ini_tbl
496 ini_plc_len = length ini_plc
497 in (max_rounds < 0 || ini_plc_len < max_rounds) && ini_cv > min_score
499 -- | Run a balance move
500 tryBalance :: Table -- ^ The starting table
501 -> Bool -- ^ Allow disk moves
502 -> Bool -- ^ Only evacuate moves
503 -> Score -- ^ Min gain threshold
504 -> Score -- ^ Min gain
505 -> Maybe Table -- ^ The resulting table and commands
506 tryBalance ini_tbl disk_moves evac_mode mg_limit min_gain =
507 let Table ini_nl ini_il ini_cv _ = ini_tbl
508 all_inst = Container.elems ini_il
509 all_inst' = if evac_mode
510 then let bad_nodes = map Node.idx . filter Node.offline $
511 Container.elems ini_nl
512 in filter (\e -> Instance.sNode e `elem` bad_nodes ||
513 Instance.pNode e `elem` bad_nodes)
516 reloc_inst = filter Instance.movable all_inst'
517 node_idx = map Node.idx . filter (not . Node.offline) $
518 Container.elems ini_nl
519 fin_tbl = checkMove node_idx disk_moves ini_tbl reloc_inst
520 (Table _ _ fin_cv _) = fin_tbl
522 if fin_cv < ini_cv && (ini_cv > mg_limit || ini_cv - fin_cv >= min_gain)
523 then Just fin_tbl -- this round made success, return the new table
526 -- * Allocation functions
528 -- | Build failure stats out of a list of failures
529 collapseFailures :: [FailMode] -> FailStats
530 collapseFailures flst =
531 map (\k -> (k, length $ filter (k ==) flst)) [minBound..maxBound]
533 -- | Update current Allocation solution and failure stats with new
535 concatAllocs :: AllocSolution -> OpResult Node.AllocElement -> AllocSolution
536 concatAllocs (flst, cntok, sols) (OpFail reason) = (reason:flst, cntok, sols)
538 concatAllocs (flst, cntok, osols) (OpGood ns@(nl, _, _)) =
539 let nscore = compCV nl
540 -- Choose the old or new solution, based on the cluster score
541 nsols = case osols of
547 -- FIXME: here we simply concat to lists with more
548 -- than one element; we should instead abort, since
549 -- this is not a valid usage of this function
550 xs -> (nscore, ns):xs
552 -- Note: we force evaluation of nsols here in order to keep the
553 -- memory profile low - we know that we will need nsols for sure
554 -- in the next cycle, so we force evaluation of nsols, since the
555 -- foldl' in the caller will only evaluate the tuple, but not the
556 -- elements of the tuple
557 in nsols `seq` nsuc `seq` (flst, nsuc, nsols)
559 -- | Try to allocate an instance on the cluster.
560 tryAlloc :: (Monad m) =>
561 Node.List -- ^ The node list
562 -> Instance.List -- ^ The instance list
563 -> Instance.Instance -- ^ The instance to allocate
564 -> Int -- ^ Required number of nodes
565 -> m AllocSolution -- ^ Possible solution list
566 tryAlloc nl _ inst 2 =
567 let all_nodes = getOnline nl
568 all_pairs = liftM2 (,) all_nodes all_nodes
569 ok_pairs = filter (\(x, y) -> Node.idx x /= Node.idx y) all_pairs
570 sols = foldl' (\cstate (p, s) ->
571 concatAllocs cstate $ allocateOnPair nl inst p s
572 ) ([], 0, []) ok_pairs
575 tryAlloc nl _ inst 1 =
576 let all_nodes = getOnline nl
577 sols = foldl' (\cstate ->
578 concatAllocs cstate . allocateOnSingle nl inst
579 ) ([], 0, []) all_nodes
582 tryAlloc _ _ _ reqn = fail $ "Unsupported number of allocation \
583 \destinations required (" ++ show reqn ++
584 "), only two supported"
586 -- | Try to allocate an instance on the cluster.
587 tryReloc :: (Monad m) =>
588 Node.List -- ^ The node list
589 -> Instance.List -- ^ The instance list
590 -> Idx -- ^ The index of the instance to move
591 -> Int -- ^ The number of nodes required
592 -> [Ndx] -- ^ Nodes which should not be used
593 -> m AllocSolution -- ^ Solution list
594 tryReloc nl il xid 1 ex_idx =
595 let all_nodes = getOnline nl
596 inst = Container.find xid il
597 ex_idx' = Instance.pNode inst:ex_idx
598 valid_nodes = filter (not . flip elem ex_idx' . Node.idx) all_nodes
599 valid_idxes = map Node.idx valid_nodes
600 sols1 = foldl' (\cstate x ->
603 applyMove nl inst (ReplaceSecondary x)
604 return (mnl, i, [Container.find x mnl])
605 in concatAllocs cstate em
606 ) ([], 0, []) valid_idxes
609 tryReloc _ _ _ reqn _ = fail $ "Unsupported number of relocation \
610 \destinations required (" ++ show reqn ++
611 "), only one supported"
613 -- | Try to evacuate a list of nodes.
614 tryEvac :: (Monad m) =>
615 Node.List -- ^ The node list
616 -> Instance.List -- ^ The instance list
617 -> [Ndx] -- ^ Nodes to be evacuated
618 -> m AllocSolution -- ^ Solution list
619 tryEvac nl il ex_ndx =
620 let ex_nodes = map (`Container.find` nl) ex_ndx
621 all_insts = nub . concatMap Node.sList $ ex_nodes
623 (_, sol) <- foldM (\(nl', (_, _, rsols)) idx -> do
624 -- FIXME: hardcoded one node here
625 (fm, cs, aes) <- tryReloc nl' il idx 1 ex_ndx
627 csol@(_, (nl'', _, _)):_ ->
628 return (nl'', (fm, cs, csol:rsols))
629 _ -> fail $ "Can't evacuate instance " ++
630 Instance.name (Container.find idx il)
631 ) (nl, ([], 0, [])) all_insts
634 -- | Recursively place instances on the cluster until we're out of space
635 iterateAlloc :: Node.List
639 -> [Instance.Instance]
640 -> Result (FailStats, Node.List, Instance.List,
642 iterateAlloc nl il newinst nreq ixes =
643 let depth = length ixes
644 newname = printf "new-%d" depth::String
645 newidx = length (Container.elems il) + depth
646 newi2 = Instance.setIdx (Instance.setName newinst newname) newidx
647 in case tryAlloc nl il newi2 nreq of
649 Ok (errs, _, sols3) ->
651 [] -> Ok (collapseFailures errs, nl, il, ixes)
652 (_, (xnl, xi, _)):[] ->
653 iterateAlloc xnl (Container.add newidx xi il)
654 newinst nreq $! (xi:ixes)
655 _ -> Bad "Internal error: multiple solutions for single\
658 tieredAlloc :: Node.List
662 -> [Instance.Instance]
663 -> Result (FailStats, Node.List, Instance.List,
665 tieredAlloc nl il newinst nreq ixes =
666 case iterateAlloc nl il newinst nreq ixes of
668 Ok (errs, nl', il', ixes') ->
669 case Instance.shrinkByType newinst . fst . last $
670 sortBy (comparing snd) errs of
671 Bad _ -> Ok (errs, nl', il', ixes')
673 tieredAlloc nl' il' newinst' nreq ixes'
675 -- * Formatting functions
677 -- | Given the original and final nodes, computes the relocation description.
678 computeMoves :: Instance.Instance -- ^ The instance to be moved
679 -> String -- ^ The instance name
680 -> IMove -- ^ The move being performed
681 -> String -- ^ New primary
682 -> String -- ^ New secondary
683 -> (String, [String])
684 -- ^ Tuple of moves and commands list; moves is containing
685 -- either @/f/@ for failover or @/r:name/@ for replace
686 -- secondary, while the command list holds gnt-instance
687 -- commands (without that prefix), e.g \"@failover instance1@\"
688 computeMoves i inam mv c d =
690 Failover -> ("f", [mig])
691 FailoverAndReplace _ -> (printf "f r:%s" d, [mig, rep d])
692 ReplaceSecondary _ -> (printf "r:%s" d, [rep d])
693 ReplaceAndFailover _ -> (printf "r:%s f" c, [rep c, mig])
694 ReplacePrimary _ -> (printf "f r:%s f" c, [mig, rep c, mig])
695 where morf = if Instance.running i then "migrate" else "failover"
696 mig = printf "%s -f %s" morf inam::String
697 rep n = printf "replace-disks -n %s %s" n inam
699 -- | Converts a placement to string format.
700 printSolutionLine :: Node.List -- ^ The node list
701 -> Instance.List -- ^ The instance list
702 -> Int -- ^ Maximum node name length
703 -> Int -- ^ Maximum instance name length
704 -> Placement -- ^ The current placement
705 -> Int -- ^ The index of the placement in
707 -> (String, [String])
708 printSolutionLine nl il nmlen imlen plc pos =
710 pmlen = (2*nmlen + 1)
711 (i, p, s, mv, c) = plc
712 inst = Container.find i il
713 inam = Instance.alias inst
714 npri = Node.alias $ Container.find p nl
715 nsec = Node.alias $ Container.find s nl
716 opri = Node.alias $ Container.find (Instance.pNode inst) nl
717 osec = Node.alias $ Container.find (Instance.sNode inst) nl
718 (moves, cmds) = computeMoves inst inam mv npri nsec
719 ostr = printf "%s:%s" opri osec::String
720 nstr = printf "%s:%s" npri nsec::String
722 (printf " %3d. %-*s %-*s => %-*s %.8f a=%s"
723 pos imlen inam pmlen ostr
727 -- | Return the instance and involved nodes in an instance move.
728 involvedNodes :: Instance.List -> Placement -> [Ndx]
729 involvedNodes il plc =
730 let (i, np, ns, _, _) = plc
731 inst = Container.find i il
732 op = Instance.pNode inst
733 os = Instance.sNode inst
734 in nub [np, ns, op, os]
736 -- | Inner function for splitJobs, that either appends the next job to
737 -- the current jobset, or starts a new jobset.
738 mergeJobs :: ([JobSet], [Ndx]) -> MoveJob -> ([JobSet], [Ndx])
739 mergeJobs ([], _) n@(ndx, _, _, _) = ([[n]], ndx)
740 mergeJobs (cjs@(j:js), nbuf) n@(ndx, _, _, _)
741 | null (ndx `intersect` nbuf) = ((n:j):js, ndx ++ nbuf)
742 | otherwise = ([n]:cjs, ndx)
744 -- | Break a list of moves into independent groups. Note that this
745 -- will reverse the order of jobs.
746 splitJobs :: [MoveJob] -> [JobSet]
747 splitJobs = fst . foldl mergeJobs ([], [])
749 -- | Given a list of commands, prefix them with @gnt-instance@ and
750 -- also beautify the display a little.
751 formatJob :: Int -> Int -> (Int, MoveJob) -> [String]
752 formatJob jsn jsl (sn, (_, _, _, cmds)) =
754 printf " echo job %d/%d" jsn sn:
756 map (" gnt-instance " ++) cmds
758 then ["", printf "echo jobset %d, %d jobs" jsn jsl] ++ out
761 -- | Given a list of commands, prefix them with @gnt-instance@ and
762 -- also beautify the display a little.
763 formatCmds :: [JobSet] -> String
766 concatMap (\(jsn, js) -> concatMap (formatJob jsn (length js))
770 -- | Print the node list.
771 printNodes :: Node.List -> [String] -> String
773 let fields = case fs of
774 [] -> Node.defaultFields
775 "+":rest -> Node.defaultFields ++ rest
777 snl = sortBy (comparing Node.idx) (Container.elems nl)
778 (header, isnum) = unzip $ map Node.showHeader fields
779 in unlines . map ((:) ' ' . intercalate " ") $
780 formatTable (header:map (Node.list fields) snl) isnum
782 -- | Print the instance list.
783 printInsts :: Node.List -> Instance.List -> String
785 let sil = sortBy (comparing Instance.idx) (Container.elems il)
786 helper inst = [ if Instance.running inst then "R" else " "
788 , Container.nameOf nl (Instance.pNode inst)
789 , let sdx = Instance.sNode inst
790 in if sdx == Node.noSecondary
792 else Container.nameOf nl sdx
793 , printf "%3d" $ Instance.vcpus inst
794 , printf "%5d" $ Instance.mem inst
795 , printf "%5d" $ Instance.dsk inst `div` 1024
801 where DynUtil lC lM lD lN = Instance.util inst
802 header = [ "F", "Name", "Pri_node", "Sec_node", "vcpu", "mem"
803 , "dsk", "lCpu", "lMem", "lDsk", "lNet" ]
804 isnum = False:False:False:False:repeat True
805 in unlines . map ((:) ' ' . intercalate " ") $
806 formatTable (header:map helper sil) isnum
808 -- | Shows statistics for a given node list.
809 printStats :: Node.List -> String
811 let dcvs = compDetailedCV nl
812 (weights, names) = unzip detailedCVInfo
813 hd = zip3 (weights ++ repeat 1) (names ++ repeat "unknown") dcvs
814 formatted = map (\(w, header, val) ->
815 printf "%s=%.8f(x%.2f)" header val w::String) hd
816 in intercalate ", " formatted
818 -- | Convert a placement into a list of OpCodes (basically a job).
819 iMoveToJob :: Node.List -> Instance.List
820 -> Idx -> IMove -> [OpCodes.OpCode]
821 iMoveToJob nl il idx move =
822 let inst = Container.find idx il
823 iname = Instance.name inst
824 lookNode = Just . Container.nameOf nl
825 opF = if Instance.running inst
826 then OpCodes.OpMigrateInstance iname True False
827 else OpCodes.OpFailoverInstance iname False
828 opR n = OpCodes.OpReplaceDisks iname (lookNode n)
829 OpCodes.ReplaceNewSecondary [] Nothing
832 ReplacePrimary np -> [ opF, opR np, opF ]
833 ReplaceSecondary ns -> [ opR ns ]
834 ReplaceAndFailover np -> [ opR np, opF ]
835 FailoverAndReplace ns -> [ opF, opR ns ]
837 -- | Computes the group of an instance
838 instanceGroup :: Node.List -> Instance.Instance -> Result GroupID
840 let sidx = Instance.sNode i
841 pnode = Container.find (Instance.pNode i) nl
842 snode = if sidx == Node.noSecondary
844 else Container.find sidx nl
845 puuid = Node.group pnode
846 suuid = Node.group snode
848 then fail ("Instance placed accross two node groups, primary " ++ puuid ++
849 ", secondary " ++ suuid)
852 -- | Compute the list of badly allocated instances (split across node
854 findSplitInstances :: Node.List -> Instance.List -> [Instance.Instance]
855 findSplitInstances nl il =
856 filter (not . isOk . instanceGroup nl) (Container.elems il)
857 where isOk x = case x of
861 -- | Splits a cluster into the component node groups
862 splitCluster :: Node.List -> Instance.List ->
863 [(GroupID, (Node.List, Instance.List))]
865 let ngroups = Node.computeGroups (Container.elems nl)
866 in map (\(guuid, nodes) ->
867 let nidxs = map Node.idx nodes
868 nodes' = zip nidxs nodes
869 instances = Container.filter ((`elem` nidxs) . Instance.pNode) il
870 in (guuid, (Container.fromAssocList nodes', instances))) ngroups