{-
-Copyright (C) 2009, 2010, 2011 Google Inc.
+Copyright (C) 2009, 2010, 2011, 2012 Google Inc.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
-}
module Ganeti.HTools.Cluster
- (
- -- * Types
- AllocSolution(..)
- , EvacSolution(..)
- , Table(..)
- , CStats(..)
- , AllocStats
- -- * Generic functions
- , totalResources
- , computeAllocationDelta
- -- * First phase functions
- , computeBadItems
- -- * Second phase functions
- , printSolutionLine
- , formatCmds
- , involvedNodes
- , splitJobs
- -- * Display functions
- , printNodes
- , printInsts
- -- * Balacing functions
- , checkMove
- , doNextBalance
- , tryBalance
- , compCV
- , compCVNodes
- , compDetailedCV
- , printStats
- , iMoveToJob
- -- * IAllocator functions
- , genAllocNodes
- , tryAlloc
- , tryMGAlloc
- , tryReloc
- , tryNodeEvac
- , tryChangeGroup
- , collapseFailures
- -- * Allocation functions
- , iterateAlloc
- , tieredAlloc
- -- * Node group functions
- , instanceGroup
- , findSplitInstances
- , splitCluster
- ) where
+ (
+ -- * Types
+ AllocSolution(..)
+ , EvacSolution(..)
+ , Table(..)
+ , CStats(..)
+ , AllocStats
+ , AllocResult
+ , AllocMethod
+ -- * Generic functions
+ , totalResources
+ , computeAllocationDelta
+ -- * First phase functions
+ , computeBadItems
+ -- * Second phase functions
+ , printSolutionLine
+ , formatCmds
+ , involvedNodes
+ , splitJobs
+ -- * Display functions
+ , printNodes
+ , printInsts
+ -- * Balacing functions
+ , checkMove
+ , doNextBalance
+ , tryBalance
+ , compCV
+ , compCVNodes
+ , compDetailedCV
+ , printStats
+ , iMoveToJob
+ -- * IAllocator functions
+ , genAllocNodes
+ , tryAlloc
+ , tryMGAlloc
+ , tryReloc
+ , tryNodeEvac
+ , tryChangeGroup
+ , collapseFailures
+ -- * Allocation functions
+ , iterateAlloc
+ , tieredAlloc
+ -- * Node group functions
+ , instanceGroup
+ , findSplitInstances
+ , splitCluster
+ ) where
import qualified Data.IntSet as IntSet
import Data.List
-- type consists of actual opcodes (a restricted subset) that are
-- transmitted back to Ganeti.
data EvacSolution = EvacSolution
- { esMoved :: [(Idx, Gdx, [Ndx])] -- ^ Instances moved successfully
- , esFailed :: [(Idx, String)] -- ^ Instances which were not
- -- relocated
- , esOpCodes :: [[OpCodes.OpCode]] -- ^ List of jobs
- }
+ { esMoved :: [(Idx, Gdx, [Ndx])] -- ^ Instances moved successfully
+ , esFailed :: [(Idx, String)] -- ^ Instances which were not
+ -- relocated
+ , esOpCodes :: [[OpCodes.OpCode]] -- ^ List of jobs
+ }
-- | Allocation results, as used in 'iterateAlloc' and 'tieredAlloc'.
type AllocResult = (FailStats, Node.List, Instance.List,
-- | Currently used, possibly to allocate, unallocable.
type AllocStats = (RSpec, RSpec, RSpec)
+-- | A simple type for allocation functions.
+type AllocMethod = Node.List -- ^ Node list
+ -> Instance.List -- ^ Instance list
+ -> Maybe Int -- ^ Optional allocation limit
+ -> Instance.Instance -- ^ Instance spec for allocation
+ -> AllocNodes -- ^ Which nodes we should allocate on
+ -> [Instance.Instance] -- ^ Allocated instances
+ -> [CStats] -- ^ Running cluster stats
+ -> Result AllocResult -- ^ Allocation result
+
-- * Utility functions
-- | Verifies the N+1 status and return the affected nodes.
in
(bad_nodes, bad_instances)
+-- | Extracts the node pairs for an instance. This can fail if the
+-- instance is single-homed. FIXME: this needs to be improved,
+-- together with the general enhancement for handling non-DRBD moves.
+instanceNodes :: Node.List -> Instance.Instance ->
+ (Ndx, Ndx, Node.Node, Node.Node)
+instanceNodes nl inst =
+ let old_pdx = Instance.pNode inst
+ old_sdx = Instance.sNode inst
+ old_p = Container.find old_pdx nl
+ old_s = Container.find old_sdx nl
+ in (old_pdx, old_sdx, old_p, old_s)
+
-- | Zero-initializer for the CStats type.
emptyCStats :: CStats
emptyCStats = CStats 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
-- | Update stats with data from a new node.
updateCStats :: CStats -> Node.Node -> CStats
updateCStats cs node =
- let CStats { csFmem = x_fmem, csFdsk = x_fdsk,
- csAmem = x_amem, csAcpu = x_acpu, csAdsk = x_adsk,
- csMmem = x_mmem, csMdsk = x_mdsk, csMcpu = x_mcpu,
- csImem = x_imem, csIdsk = x_idsk, csIcpu = x_icpu,
- csTmem = x_tmem, csTdsk = x_tdsk, csTcpu = x_tcpu,
- csVcpu = x_vcpu,
- csXmem = x_xmem, csNmem = x_nmem, csNinst = x_ninst
- }
- = cs
- inc_amem = Node.fMem node - Node.rMem node
- inc_amem' = if inc_amem > 0 then inc_amem else 0
- inc_adsk = Node.availDisk node
- inc_imem = truncate (Node.tMem node) - Node.nMem node
- - Node.xMem node - Node.fMem node
- inc_icpu = Node.uCpu node
- inc_idsk = truncate (Node.tDsk node) - Node.fDsk node
- inc_vcpu = Node.hiCpu node
- inc_acpu = Node.availCpu node
-
- in cs { csFmem = x_fmem + fromIntegral (Node.fMem node)
- , csFdsk = x_fdsk + fromIntegral (Node.fDsk node)
- , csAmem = x_amem + fromIntegral inc_amem'
- , csAdsk = x_adsk + fromIntegral inc_adsk
- , csAcpu = x_acpu + fromIntegral inc_acpu
- , csMmem = max x_mmem (fromIntegral inc_amem')
- , csMdsk = max x_mdsk (fromIntegral inc_adsk)
- , csMcpu = max x_mcpu (fromIntegral inc_acpu)
- , csImem = x_imem + fromIntegral inc_imem
- , csIdsk = x_idsk + fromIntegral inc_idsk
- , csIcpu = x_icpu + fromIntegral inc_icpu
- , csTmem = x_tmem + Node.tMem node
- , csTdsk = x_tdsk + Node.tDsk node
- , csTcpu = x_tcpu + Node.tCpu node
- , csVcpu = x_vcpu + fromIntegral inc_vcpu
- , csXmem = x_xmem + fromIntegral (Node.xMem node)
- , csNmem = x_nmem + fromIntegral (Node.nMem node)
- , csNinst = x_ninst + length (Node.pList node)
- }
+ let CStats { csFmem = x_fmem, csFdsk = x_fdsk,
+ csAmem = x_amem, csAcpu = x_acpu, csAdsk = x_adsk,
+ csMmem = x_mmem, csMdsk = x_mdsk, csMcpu = x_mcpu,
+ csImem = x_imem, csIdsk = x_idsk, csIcpu = x_icpu,
+ csTmem = x_tmem, csTdsk = x_tdsk, csTcpu = x_tcpu,
+ csVcpu = x_vcpu,
+ csXmem = x_xmem, csNmem = x_nmem, csNinst = x_ninst
+ }
+ = cs
+ inc_amem = Node.fMem node - Node.rMem node
+ inc_amem' = if inc_amem > 0 then inc_amem else 0
+ inc_adsk = Node.availDisk node
+ inc_imem = truncate (Node.tMem node) - Node.nMem node
+ - Node.xMem node - Node.fMem node
+ inc_icpu = Node.uCpu node
+ inc_idsk = truncate (Node.tDsk node) - Node.fDsk node
+ inc_vcpu = Node.hiCpu node
+ inc_acpu = Node.availCpu node
+ in cs { csFmem = x_fmem + fromIntegral (Node.fMem node)
+ , csFdsk = x_fdsk + fromIntegral (Node.fDsk node)
+ , csAmem = x_amem + fromIntegral inc_amem'
+ , csAdsk = x_adsk + fromIntegral inc_adsk
+ , csAcpu = x_acpu + fromIntegral inc_acpu
+ , csMmem = max x_mmem (fromIntegral inc_amem')
+ , csMdsk = max x_mdsk (fromIntegral inc_adsk)
+ , csMcpu = max x_mcpu (fromIntegral inc_acpu)
+ , csImem = x_imem + fromIntegral inc_imem
+ , csIdsk = x_idsk + fromIntegral inc_idsk
+ , csIcpu = x_icpu + fromIntegral inc_icpu
+ , csTmem = x_tmem + Node.tMem node
+ , csTdsk = x_tdsk + Node.tDsk node
+ , csTcpu = x_tcpu + Node.tCpu node
+ , csVcpu = x_vcpu + fromIntegral inc_vcpu
+ , csXmem = x_xmem + fromIntegral (Node.xMem node)
+ , csNmem = x_nmem + fromIntegral (Node.nMem node)
+ , csNinst = x_ninst + length (Node.pList node)
+ }
-- | Compute the total free disk and memory in the cluster.
totalResources :: Node.List -> CStats
totalResources nl =
- let cs = foldl' updateCStats emptyCStats . Container.elems $ nl
- in cs { csScore = compCV nl }
+ let cs = foldl' updateCStats emptyCStats . Container.elems $ nl
+ in cs { csScore = compCV nl }
-- | Compute the delta between two cluster state.
--
-- was left unallocated.
computeAllocationDelta :: CStats -> CStats -> AllocStats
computeAllocationDelta cini cfin =
- let CStats {csImem = i_imem, csIdsk = i_idsk, csIcpu = i_icpu} = cini
- CStats {csImem = f_imem, csIdsk = f_idsk, csIcpu = f_icpu,
- csTmem = t_mem, csTdsk = t_dsk, csVcpu = v_cpu } = cfin
- rini = RSpec (fromIntegral i_icpu) (fromIntegral i_imem)
- (fromIntegral i_idsk)
- rfin = RSpec (fromIntegral (f_icpu - i_icpu))
- (fromIntegral (f_imem - i_imem))
- (fromIntegral (f_idsk - i_idsk))
- un_cpu = fromIntegral (v_cpu - f_icpu)::Int
- runa = RSpec un_cpu (truncate t_mem - fromIntegral f_imem)
- (truncate t_dsk - fromIntegral f_idsk)
- in (rini, rfin, runa)
+ let CStats {csImem = i_imem, csIdsk = i_idsk, csIcpu = i_icpu} = cini
+ CStats {csImem = f_imem, csIdsk = f_idsk, csIcpu = f_icpu,
+ csTmem = t_mem, csTdsk = t_dsk, csVcpu = v_cpu } = cfin
+ rini = RSpec (fromIntegral i_icpu) (fromIntegral i_imem)
+ (fromIntegral i_idsk)
+ rfin = RSpec (fromIntegral (f_icpu - i_icpu))
+ (fromIntegral (f_imem - i_imem))
+ (fromIntegral (f_idsk - i_idsk))
+ un_cpu = fromIntegral (v_cpu - f_icpu)::Int
+ runa = RSpec un_cpu (truncate t_mem - fromIntegral f_imem)
+ (truncate t_dsk - fromIntegral f_idsk)
+ in (rini, rfin, runa)
-- | The names and weights of the individual elements in the CV list.
detailedCVInfo :: [(Double, String)]
-- | Compute the mem and disk covariance.
compDetailedCV :: [Node.Node] -> [Double]
compDetailedCV all_nodes =
- let
- (offline, nodes) = partition Node.offline all_nodes
- mem_l = map Node.pMem nodes
- dsk_l = map Node.pDsk nodes
- -- metric: memory covariance
- mem_cv = stdDev mem_l
- -- metric: disk covariance
- dsk_cv = stdDev dsk_l
- -- metric: count of instances living on N1 failing nodes
- n1_score = fromIntegral . sum . map (\n -> length (Node.sList n) +
- length (Node.pList n)) .
- filter Node.failN1 $ nodes :: Double
- res_l = map Node.pRem nodes
- -- metric: reserved memory covariance
- res_cv = stdDev res_l
- -- offline instances metrics
- offline_ipri = sum . map (length . Node.pList) $ offline
- offline_isec = sum . map (length . Node.sList) $ offline
- -- metric: count of instances on offline nodes
- off_score = fromIntegral (offline_ipri + offline_isec)::Double
- -- metric: count of primary instances on offline nodes (this
- -- helps with evacuation/failover of primary instances on
- -- 2-node clusters with one node offline)
- off_pri_score = fromIntegral offline_ipri::Double
- cpu_l = map Node.pCpu nodes
- -- metric: covariance of vcpu/pcpu ratio
- cpu_cv = stdDev cpu_l
- -- metrics: covariance of cpu, memory, disk and network load
- (c_load, m_load, d_load, n_load) = unzip4 $
- map (\n ->
- let DynUtil c1 m1 d1 n1 = Node.utilLoad n
- DynUtil c2 m2 d2 n2 = Node.utilPool n
- in (c1/c2, m1/m2, d1/d2, n1/n2)
- ) nodes
- -- metric: conflicting instance count
- pri_tags_inst = sum $ map Node.conflictingPrimaries nodes
- pri_tags_score = fromIntegral pri_tags_inst::Double
- in [ mem_cv, dsk_cv, n1_score, res_cv, off_score, off_pri_score, cpu_cv
- , stdDev c_load, stdDev m_load , stdDev d_load, stdDev n_load
- , pri_tags_score ]
+ let (offline, nodes) = partition Node.offline all_nodes
+ mem_l = map Node.pMem nodes
+ dsk_l = map Node.pDsk nodes
+ -- metric: memory covariance
+ mem_cv = stdDev mem_l
+ -- metric: disk covariance
+ dsk_cv = stdDev dsk_l
+ -- metric: count of instances living on N1 failing nodes
+ n1_score = fromIntegral . sum . map (\n -> length (Node.sList n) +
+ length (Node.pList n)) .
+ filter Node.failN1 $ nodes :: Double
+ res_l = map Node.pRem nodes
+ -- metric: reserved memory covariance
+ res_cv = stdDev res_l
+ -- offline instances metrics
+ offline_ipri = sum . map (length . Node.pList) $ offline
+ offline_isec = sum . map (length . Node.sList) $ offline
+ -- metric: count of instances on offline nodes
+ off_score = fromIntegral (offline_ipri + offline_isec)::Double
+ -- metric: count of primary instances on offline nodes (this
+ -- helps with evacuation/failover of primary instances on
+ -- 2-node clusters with one node offline)
+ off_pri_score = fromIntegral offline_ipri::Double
+ cpu_l = map Node.pCpu nodes
+ -- metric: covariance of vcpu/pcpu ratio
+ cpu_cv = stdDev cpu_l
+ -- metrics: covariance of cpu, memory, disk and network load
+ (c_load, m_load, d_load, n_load) =
+ unzip4 $ map (\n ->
+ let DynUtil c1 m1 d1 n1 = Node.utilLoad n
+ DynUtil c2 m2 d2 n2 = Node.utilPool n
+ in (c1/c2, m1/m2, d1/d2, n1/n2)) nodes
+ -- metric: conflicting instance count
+ pri_tags_inst = sum $ map Node.conflictingPrimaries nodes
+ pri_tags_score = fromIntegral pri_tags_inst::Double
+ in [ mem_cv, dsk_cv, n1_score, res_cv, off_score, off_pri_score, cpu_cv
+ , stdDev c_load, stdDev m_load , stdDev d_load, stdDev n_load
+ , pri_tags_score ]
-- | Compute the /total/ variance.
compCVNodes :: [Node.Node] -> Double
-- | Compute best table. Note that the ordering of the arguments is important.
compareTables :: Table -> Table -> Table
compareTables a@(Table _ _ a_cv _) b@(Table _ _ b_cv _ ) =
- if a_cv > b_cv then b else a
+ if a_cv > b_cv then b else a
-- | Applies an instance move to a given node list and instance.
applyMove :: Node.List -> Instance.Instance
-> IMove -> OpResult (Node.List, Instance.Instance, Ndx, Ndx)
-- Failover (f)
applyMove nl inst Failover =
- let old_pdx = Instance.pNode inst
- old_sdx = Instance.sNode inst
- old_p = Container.find old_pdx nl
- old_s = Container.find old_sdx nl
- int_p = Node.removePri old_p inst
- int_s = Node.removeSec old_s inst
- force_p = Node.offline old_p
- new_nl = do -- Maybe monad
- new_p <- Node.addPriEx force_p int_s inst
- new_s <- Node.addSec int_p inst old_sdx
- let new_inst = Instance.setBoth inst old_sdx old_pdx
- return (Container.addTwo old_pdx new_s old_sdx new_p nl,
- new_inst, old_sdx, old_pdx)
- in new_nl
+ let (old_pdx, old_sdx, old_p, old_s) = instanceNodes nl inst
+ int_p = Node.removePri old_p inst
+ int_s = Node.removeSec old_s inst
+ new_nl = do -- Maybe monad
+ new_p <- Node.addPriEx (Node.offline old_p) int_s inst
+ new_s <- Node.addSec int_p inst old_sdx
+ let new_inst = Instance.setBoth inst old_sdx old_pdx
+ return (Container.addTwo old_pdx new_s old_sdx new_p nl,
+ new_inst, old_sdx, old_pdx)
+ in new_nl
-- Replace the primary (f:, r:np, f)
applyMove nl inst (ReplacePrimary new_pdx) =
- let old_pdx = Instance.pNode inst
- old_sdx = Instance.sNode inst
- old_p = Container.find old_pdx nl
- old_s = Container.find old_sdx nl
- tgt_n = Container.find new_pdx nl
- int_p = Node.removePri old_p inst
- int_s = Node.removeSec old_s inst
- force_p = Node.offline old_p
- new_nl = do -- Maybe monad
- -- check that the current secondary can host the instance
- -- during the migration
- tmp_s <- Node.addPriEx force_p int_s inst
- let tmp_s' = Node.removePri tmp_s inst
- new_p <- Node.addPriEx force_p tgt_n inst
- new_s <- Node.addSecEx force_p tmp_s' inst new_pdx
- let new_inst = Instance.setPri inst new_pdx
- return (Container.add new_pdx new_p $
- Container.addTwo old_pdx int_p old_sdx new_s nl,
- new_inst, new_pdx, old_sdx)
- in new_nl
+ let (old_pdx, old_sdx, old_p, old_s) = instanceNodes nl inst
+ tgt_n = Container.find new_pdx nl
+ int_p = Node.removePri old_p inst
+ int_s = Node.removeSec old_s inst
+ force_p = Node.offline old_p
+ new_nl = do -- Maybe monad
+ -- check that the current secondary can host the instance
+ -- during the migration
+ tmp_s <- Node.addPriEx force_p int_s inst
+ let tmp_s' = Node.removePri tmp_s inst
+ new_p <- Node.addPriEx force_p tgt_n inst
+ new_s <- Node.addSecEx force_p tmp_s' inst new_pdx
+ let new_inst = Instance.setPri inst new_pdx
+ return (Container.add new_pdx new_p $
+ Container.addTwo old_pdx int_p old_sdx new_s nl,
+ new_inst, new_pdx, old_sdx)
+ in new_nl
-- Replace the secondary (r:ns)
applyMove nl inst (ReplaceSecondary new_sdx) =
- let old_pdx = Instance.pNode inst
- old_sdx = Instance.sNode inst
- old_s = Container.find old_sdx nl
- tgt_n = Container.find new_sdx nl
- int_s = Node.removeSec old_s inst
- force_s = Node.offline old_s
- new_inst = Instance.setSec inst new_sdx
- new_nl = Node.addSecEx force_s tgt_n inst old_pdx >>=
- \new_s -> return (Container.addTwo new_sdx
- new_s old_sdx int_s nl,
- new_inst, old_pdx, new_sdx)
- in new_nl
+ let old_pdx = Instance.pNode inst
+ old_sdx = Instance.sNode inst
+ old_s = Container.find old_sdx nl
+ tgt_n = Container.find new_sdx nl
+ int_s = Node.removeSec old_s inst
+ force_s = Node.offline old_s
+ new_inst = Instance.setSec inst new_sdx
+ new_nl = Node.addSecEx force_s tgt_n inst old_pdx >>=
+ \new_s -> return (Container.addTwo new_sdx
+ new_s old_sdx int_s nl,
+ new_inst, old_pdx, new_sdx)
+ in new_nl
-- Replace the secondary and failover (r:np, f)
applyMove nl inst (ReplaceAndFailover new_pdx) =
- let old_pdx = Instance.pNode inst
- old_sdx = Instance.sNode inst
- old_p = Container.find old_pdx nl
- old_s = Container.find old_sdx nl
- tgt_n = Container.find new_pdx nl
- int_p = Node.removePri old_p inst
- int_s = Node.removeSec old_s inst
- force_s = Node.offline old_s
- new_nl = do -- Maybe monad
- new_p <- Node.addPri tgt_n inst
- new_s <- Node.addSecEx force_s int_p inst new_pdx
- let new_inst = Instance.setBoth inst new_pdx old_pdx
- return (Container.add new_pdx new_p $
- Container.addTwo old_pdx new_s old_sdx int_s nl,
- new_inst, new_pdx, old_pdx)
- in new_nl
+ let (old_pdx, old_sdx, old_p, old_s) = instanceNodes nl inst
+ tgt_n = Container.find new_pdx nl
+ int_p = Node.removePri old_p inst
+ int_s = Node.removeSec old_s inst
+ force_s = Node.offline old_s
+ new_nl = do -- Maybe monad
+ new_p <- Node.addPri tgt_n inst
+ new_s <- Node.addSecEx force_s int_p inst new_pdx
+ let new_inst = Instance.setBoth inst new_pdx old_pdx
+ return (Container.add new_pdx new_p $
+ Container.addTwo old_pdx new_s old_sdx int_s nl,
+ new_inst, new_pdx, old_pdx)
+ in new_nl
-- Failver and replace the secondary (f, r:ns)
applyMove nl inst (FailoverAndReplace new_sdx) =
- let old_pdx = Instance.pNode inst
- old_sdx = Instance.sNode inst
- old_p = Container.find old_pdx nl
- old_s = Container.find old_sdx nl
- tgt_n = Container.find new_sdx nl
- int_p = Node.removePri old_p inst
- int_s = Node.removeSec old_s inst
- force_p = Node.offline old_p
- new_nl = do -- Maybe monad
- new_p <- Node.addPriEx force_p int_s inst
- new_s <- Node.addSecEx force_p tgt_n inst old_sdx
- let new_inst = Instance.setBoth inst old_sdx new_sdx
- return (Container.add new_sdx new_s $
- Container.addTwo old_sdx new_p old_pdx int_p nl,
- new_inst, old_sdx, new_sdx)
- in new_nl
+ let (old_pdx, old_sdx, old_p, old_s) = instanceNodes nl inst
+ tgt_n = Container.find new_sdx nl
+ int_p = Node.removePri old_p inst
+ int_s = Node.removeSec old_s inst
+ force_p = Node.offline old_p
+ new_nl = do -- Maybe monad
+ new_p <- Node.addPriEx force_p int_s inst
+ new_s <- Node.addSecEx force_p tgt_n inst old_sdx
+ let new_inst = Instance.setBoth inst old_sdx new_sdx
+ return (Container.add new_sdx new_s $
+ Container.addTwo old_sdx new_p old_pdx int_p nl,
+ new_inst, old_sdx, new_sdx)
+ in new_nl
-- | Tries to allocate an instance on one given node.
allocateOnSingle :: Node.List -> Instance.Instance -> Ndx
-> OpResult Node.AllocElement
allocateOnSingle nl inst new_pdx =
- let p = Container.find new_pdx nl
- new_inst = Instance.setBoth inst new_pdx Node.noSecondary
- in Node.addPri p inst >>= \new_p -> do
- let new_nl = Container.add new_pdx new_p nl
- new_score = compCV nl
- return (new_nl, new_inst, [new_p], new_score)
+ let p = Container.find new_pdx nl
+ new_inst = Instance.setBoth inst new_pdx Node.noSecondary
+ in do
+ Instance.instMatchesPolicy inst (Node.iPolicy p)
+ new_p <- Node.addPri p inst
+ let new_nl = Container.add new_pdx new_p nl
+ new_score = compCV nl
+ return (new_nl, new_inst, [new_p], new_score)
-- | Tries to allocate an instance on a given pair of nodes.
allocateOnPair :: Node.List -> Instance.Instance -> Ndx -> Ndx
-> OpResult Node.AllocElement
allocateOnPair nl inst new_pdx new_sdx =
- let tgt_p = Container.find new_pdx nl
- tgt_s = Container.find new_sdx nl
- in do
- new_p <- Node.addPri tgt_p inst
- new_s <- Node.addSec tgt_s inst new_pdx
- let new_inst = Instance.setBoth inst new_pdx new_sdx
- new_nl = Container.addTwo new_pdx new_p new_sdx new_s nl
- return (new_nl, new_inst, [new_p, new_s], compCV new_nl)
+ let tgt_p = Container.find new_pdx nl
+ tgt_s = Container.find new_sdx nl
+ in do
+ Instance.instMatchesPolicy inst (Node.iPolicy tgt_p)
+ new_p <- Node.addPri tgt_p inst
+ new_s <- Node.addSec tgt_s inst new_pdx
+ let new_inst = Instance.setBoth inst new_pdx new_sdx
+ new_nl = Container.addTwo new_pdx new_p new_sdx new_s nl
+ return (new_nl, new_inst, [new_p, new_s], compCV new_nl)
-- | Tries to perform an instance move and returns the best table
-- between the original one and the new one.
-> IMove -- ^ The move to apply
-> Table -- ^ The final best table
checkSingleStep ini_tbl target cur_tbl move =
- let
- Table ini_nl ini_il _ ini_plc = ini_tbl
- tmp_resu = applyMove ini_nl target move
- in
- case tmp_resu of
- OpFail _ -> cur_tbl
- OpGood (upd_nl, new_inst, pri_idx, sec_idx) ->
- let tgt_idx = Instance.idx target
- upd_cvar = compCV upd_nl
- upd_il = Container.add tgt_idx new_inst ini_il
- upd_plc = (tgt_idx, pri_idx, sec_idx, move, upd_cvar):ini_plc
- upd_tbl = Table upd_nl upd_il upd_cvar upd_plc
- in
- compareTables cur_tbl upd_tbl
+ let Table ini_nl ini_il _ ini_plc = ini_tbl
+ tmp_resu = applyMove ini_nl target move
+ in case tmp_resu of
+ OpFail _ -> cur_tbl
+ OpGood (upd_nl, new_inst, pri_idx, sec_idx) ->
+ let tgt_idx = Instance.idx target
+ upd_cvar = compCV upd_nl
+ upd_il = Container.add tgt_idx new_inst ini_il
+ upd_plc = (tgt_idx, pri_idx, sec_idx, move, upd_cvar):ini_plc
+ upd_tbl = Table upd_nl upd_il upd_cvar upd_plc
+ in compareTables cur_tbl upd_tbl
-- | Given the status of the current secondary as a valid new node and
-- the current candidate target node, generate the possible moves for
-> [IMove] -- ^ List of valid result moves
possibleMoves _ False tdx =
- [ReplaceSecondary tdx]
+ [ReplaceSecondary tdx]
possibleMoves True True tdx =
- [ReplaceSecondary tdx,
- ReplaceAndFailover tdx,
- ReplacePrimary tdx,
- FailoverAndReplace tdx]
+ [ ReplaceSecondary tdx
+ , ReplaceAndFailover tdx
+ , ReplacePrimary tdx
+ , FailoverAndReplace tdx
+ ]
possibleMoves False True tdx =
- [ReplaceSecondary tdx,
- ReplaceAndFailover tdx]
+ [ ReplaceSecondary tdx
+ , ReplaceAndFailover tdx
+ ]
-- | Compute the best move for a given instance.
checkInstanceMove :: [Ndx] -- ^ Allowed target node indices
-> Instance.Instance -- ^ Instance to move
-> Table -- ^ Best new table for this instance
checkInstanceMove nodes_idx disk_moves inst_moves ini_tbl target =
- let
- opdx = Instance.pNode target
- osdx = Instance.sNode target
- nodes = filter (\idx -> idx /= opdx && idx /= osdx) nodes_idx
- use_secondary = elem osdx nodes_idx && inst_moves
- aft_failover = if use_secondary -- if allowed to failover
+ let opdx = Instance.pNode target
+ osdx = Instance.sNode target
+ bad_nodes = [opdx, osdx]
+ nodes = filter (`notElem` bad_nodes) nodes_idx
+ use_secondary = elem osdx nodes_idx && inst_moves
+ aft_failover = if use_secondary -- if allowed to failover
then checkSingleStep ini_tbl target ini_tbl Failover
else ini_tbl
- all_moves = if disk_moves
+ all_moves = if disk_moves
then concatMap
- (possibleMoves use_secondary inst_moves) nodes
+ (possibleMoves use_secondary inst_moves) nodes
else []
in
-- iterate over the possible nodes for this instance
-> [Instance.Instance] -- ^ List of instances still to move
-> Table -- ^ The new solution
checkMove nodes_idx disk_moves inst_moves ini_tbl victims =
- let Table _ _ _ ini_plc = ini_tbl
- -- we're using rwhnf from the Control.Parallel.Strategies
- -- package; we don't need to use rnf as that would force too
- -- much evaluation in single-threaded cases, and in
- -- multi-threaded case the weak head normal form is enough to
- -- spark the evaluation
- tables = parMap rwhnf (checkInstanceMove nodes_idx disk_moves
- inst_moves ini_tbl)
- victims
- -- iterate over all instances, computing the best move
- best_tbl = foldl' compareTables ini_tbl tables
- Table _ _ _ best_plc = best_tbl
- in if length best_plc == length ini_plc
+ let Table _ _ _ ini_plc = ini_tbl
+ -- we're using rwhnf from the Control.Parallel.Strategies
+ -- package; we don't need to use rnf as that would force too
+ -- much evaluation in single-threaded cases, and in
+ -- multi-threaded case the weak head normal form is enough to
+ -- spark the evaluation
+ tables = parMap rwhnf (checkInstanceMove nodes_idx disk_moves
+ inst_moves ini_tbl)
+ victims
+ -- iterate over all instances, computing the best move
+ best_tbl = foldl' compareTables ini_tbl tables
+ Table _ _ _ best_plc = best_tbl
+ in if length best_plc == length ini_plc
then ini_tbl -- no advancement
else best_tbl
-> Score -- ^ Score at which to stop
-> Bool -- ^ The resulting table and commands
doNextBalance ini_tbl max_rounds min_score =
- let Table _ _ ini_cv ini_plc = ini_tbl
- ini_plc_len = length ini_plc
- in (max_rounds < 0 || ini_plc_len < max_rounds) && ini_cv > min_score
+ let Table _ _ ini_cv ini_plc = ini_tbl
+ ini_plc_len = length ini_plc
+ in (max_rounds < 0 || ini_plc_len < max_rounds) && ini_cv > min_score
-- | Run a balance move.
tryBalance :: Table -- ^ The starting table
bestAllocElement a Nothing = a
bestAllocElement Nothing b = b
bestAllocElement a@(Just (_, _, _, ascore)) b@(Just (_, _, _, bscore)) =
- if ascore < bscore then a else b
+ if ascore < bscore then a else b
-- | Update current Allocation solution and failure stats with new
-- elements.
concatAllocs as (OpFail reason) = as { asFailures = reason : asFailures as }
concatAllocs as (OpGood ns) =
- let -- Choose the old or new solution, based on the cluster score
- cntok = asAllocs as
- osols = asSolution as
- nsols = bestAllocElement osols (Just ns)
- nsuc = cntok + 1
+ let -- Choose the old or new solution, based on the cluster score
+ cntok = asAllocs as
+ osols = asSolution as
+ nsols = bestAllocElement osols (Just ns)
+ nsuc = cntok + 1
-- Note: we force evaluation of nsols here in order to keep the
-- memory profile low - we know that we will need nsols for sure
-- in the next cycle, so we force evaluation of nsols, since the
-- foldl' in the caller will only evaluate the tuple, but not the
-- elements of the tuple
- in nsols `seq` nsuc `seq` as { asAllocs = nsuc, asSolution = nsols }
+ in nsols `seq` nsuc `seq` as { asAllocs = nsuc, asSolution = nsols }
-- | Sums two 'AllocSolution' structures.
sumAllocs :: AllocSolution -> AllocSolution -> AllocSolution
sumAllocs (AllocSolution aFails aAllocs aSols aLog)
(AllocSolution bFails bAllocs bSols bLog) =
- -- note: we add b first, since usually it will be smaller; when
- -- fold'ing, a will grow and grow whereas b is the per-group
- -- result, hence smaller
- let nFails = bFails ++ aFails
- nAllocs = aAllocs + bAllocs
- nSols = bestAllocElement aSols bSols
- nLog = bLog ++ aLog
- in AllocSolution nFails nAllocs nSols nLog
+ -- note: we add b first, since usually it will be smaller; when
+ -- fold'ing, a will grow and grow whereas b is the per-group
+ -- result, hence smaller
+ let nFails = bFails ++ aFails
+ nAllocs = aAllocs + bAllocs
+ nSols = bestAllocElement aSols bSols
+ nLog = bLog ++ aLog
+ in AllocSolution nFails nAllocs nSols nLog
-- | Given a solution, generates a reasonable description for it.
describeSolution :: AllocSolution -> String
-- for proper jobset execution, we should reverse all lists.
reverseEvacSolution :: EvacSolution -> EvacSolution
reverseEvacSolution (EvacSolution f m o) =
- EvacSolution (reverse f) (reverse m) (reverse o)
+ EvacSolution (reverse f) (reverse m) (reverse o)
-- | Generate the valid node allocation singles or pairs for a new instance.
genAllocNodes :: Group.List -- ^ Group list
-- unallocable nodes
-> Result AllocNodes -- ^ The (monadic) result
genAllocNodes gl nl count drop_unalloc =
- let filter_fn = if drop_unalloc
+ let filter_fn = if drop_unalloc
then filter (Group.isAllocable .
flip Container.find gl . Node.group)
else id
- all_nodes = filter_fn $ getOnline nl
- all_pairs = [(Node.idx p,
- [Node.idx s | s <- all_nodes,
- Node.idx p /= Node.idx s,
- Node.group p == Node.group s]) |
- p <- all_nodes]
- in case count of
- 1 -> Ok (Left (map Node.idx all_nodes))
- 2 -> Ok (Right (filter (not . null . snd) all_pairs))
- _ -> Bad "Unsupported number of nodes, only one or two supported"
+ all_nodes = filter_fn $ getOnline nl
+ all_pairs = [(Node.idx p,
+ [Node.idx s | s <- all_nodes,
+ Node.idx p /= Node.idx s,
+ Node.group p == Node.group s]) |
+ p <- all_nodes]
+ in case count of
+ 1 -> Ok (Left (map Node.idx all_nodes))
+ 2 -> Ok (Right (filter (not . null . snd) all_pairs))
+ _ -> Bad "Unsupported number of nodes, only one or two supported"
-- | Try to allocate an instance on the cluster.
tryAlloc :: (Monad m) =>
-> m AllocSolution -- ^ Possible solution list
tryAlloc _ _ _ (Right []) = fail "Not enough online nodes"
tryAlloc nl _ inst (Right ok_pairs) =
- let psols = parMap rwhnf (\(p, ss) ->
- foldl' (\cstate ->
- concatAllocs cstate .
- allocateOnPair nl inst p)
- emptyAllocSolution ss) ok_pairs
- sols = foldl' sumAllocs emptyAllocSolution psols
- in return $ annotateSolution sols
+ let psols = parMap rwhnf (\(p, ss) ->
+ foldl' (\cstate ->
+ concatAllocs cstate .
+ allocateOnPair nl inst p)
+ emptyAllocSolution ss) ok_pairs
+ sols = foldl' sumAllocs emptyAllocSolution psols
+ in return $ annotateSolution sols
tryAlloc _ _ _ (Left []) = fail "No online nodes"
tryAlloc nl _ inst (Left all_nodes) =
- let sols = foldl' (\cstate ->
- concatAllocs cstate . allocateOnSingle nl inst
- ) emptyAllocSolution all_nodes
- in return $ annotateSolution sols
+ let sols = foldl' (\cstate ->
+ concatAllocs cstate . allocateOnSingle nl inst
+ ) emptyAllocSolution all_nodes
+ in return $ annotateSolution sols
-- | Given a group/result, describe it as a nice (list of) messages.
solutionDescription :: Group.List -> (Gdx, Result AllocSolution) -> [String]
Bad message -> [printf "Group %s: error %s" gname message]
where grp = Container.find groupId gl
gname = Group.name grp
- pol = allocPolicyToString (Group.allocPolicy grp)
+ pol = allocPolicyToRaw (Group.allocPolicy grp)
-- | From a list of possibly bad and possibly empty solutions, filter
-- only the groups with a valid result. Note that the result will be
-> [(Gdx, Result AllocSolution)]
-> [(Gdx, AllocSolution)]
filterMGResults gl = foldl' fn []
- where unallocable = not . Group.isAllocable . flip Container.find gl
- fn accu (gdx, rasol) =
- case rasol of
- Bad _ -> accu
- Ok sol | isNothing (asSolution sol) -> accu
- | unallocable gdx -> accu
- | otherwise -> (gdx, sol):accu
+ where unallocable = not . Group.isAllocable . flip Container.find gl
+ fn accu (gdx, rasol) =
+ case rasol of
+ Bad _ -> accu
+ Ok sol | isNothing (asSolution sol) -> accu
+ | unallocable gdx -> accu
+ | otherwise -> (gdx, sol):accu
-- | Sort multigroup results based on policy and score.
sortMGResults :: Group.List
-> [(Gdx, AllocSolution)]
-> [(Gdx, AllocSolution)]
sortMGResults gl sols =
- let extractScore (_, _, _, x) = x
- solScore (gdx, sol) = (Group.allocPolicy (Container.find gdx gl),
- (extractScore . fromJust . asSolution) sol)
- in sortBy (comparing solScore) sols
+ let extractScore (_, _, _, x) = x
+ solScore (gdx, sol) = (Group.allocPolicy (Container.find gdx gl),
+ (extractScore . fromJust . asSolution) sol)
+ in sortBy (comparing solScore) sols
-- | Finds the best group for an instance on a multi-group cluster.
--
goodSols = filterMGResults mggl sols
sortedSols = sortMGResults mggl goodSols
in if null sortedSols
- then Bad $ intercalate ", " all_msgs
- else let (final_group, final_sol) = head sortedSols
- in return (final_group, final_sol, all_msgs)
+ then Bad $ intercalate ", " all_msgs
+ else let (final_group, final_sol) = head sortedSols
+ in return (final_group, final_sol, all_msgs)
-- | Try to allocate an instance on a multi-group cluster.
tryMGAlloc :: Group.List -- ^ The group list
-> [Ndx] -- ^ Nodes which should not be used
-> m AllocSolution -- ^ Solution list
tryReloc nl il xid 1 ex_idx =
- let all_nodes = getOnline nl
- inst = Container.find xid il
- ex_idx' = Instance.pNode inst:ex_idx
- valid_nodes = filter (not . flip elem ex_idx' . Node.idx) all_nodes
- valid_idxes = map Node.idx valid_nodes
- sols1 = foldl' (\cstate x ->
- let em = do
- (mnl, i, _, _) <-
- applyMove nl inst (ReplaceSecondary x)
- return (mnl, i, [Container.find x mnl],
- compCV mnl)
- in concatAllocs cstate em
- ) emptyAllocSolution valid_idxes
- in return sols1
+ let all_nodes = getOnline nl
+ inst = Container.find xid il
+ ex_idx' = Instance.pNode inst:ex_idx
+ valid_nodes = filter (not . flip elem ex_idx' . Node.idx) all_nodes
+ valid_idxes = map Node.idx valid_nodes
+ sols1 = foldl' (\cstate x ->
+ let em = do
+ (mnl, i, _, _) <-
+ applyMove nl inst (ReplaceSecondary x)
+ return (mnl, i, [Container.find x mnl],
+ compCV mnl)
+ in concatAllocs cstate em
+ ) emptyAllocSolution valid_idxes
+ in return sols1
tryReloc _ _ _ reqn _ = fail $ "Unsupported number of relocation \
\destinations required (" ++ show reqn ++
-- this function, whatever mode we have is just a primary change.
failOnSecondaryChange :: (Monad m) => EvacMode -> DiskTemplate -> m ()
failOnSecondaryChange ChangeSecondary dt =
- fail $ "Instances with disk template '" ++ diskTemplateToString dt ++
+ fail $ "Instances with disk template '" ++ diskTemplateToRaw dt ++
"' can't execute change secondary"
failOnSecondaryChange _ _ = return ()
, Score
, Ndx) -- ^ New best solution
evacDrbdSecondaryInner nl inst gdx accu ndx =
- case applyMove nl inst (ReplaceSecondary ndx) of
- OpFail fm ->
- case accu of
- Right _ -> accu
- Left _ -> Left $ "Node " ++ Container.nameOf nl ndx ++
- " failed: " ++ show fm
- OpGood (nl', inst', _, _) ->
- let nodes = Container.elems nl'
- -- The fromJust below is ugly (it can fail nastily), but
- -- at this point we should have any internal mismatches,
- -- and adding a monad here would be quite involved
- grpnodes = fromJust (gdx `lookup` Node.computeGroups nodes)
- new_cv = compCVNodes grpnodes
- new_accu = Right (nl', inst', new_cv, ndx)
- in case accu of
- Left _ -> new_accu
- Right (_, _, old_cv, _) ->
- if old_cv < new_cv
- then accu
- else new_accu
+ case applyMove nl inst (ReplaceSecondary ndx) of
+ OpFail fm ->
+ case accu of
+ Right _ -> accu
+ Left _ -> Left $ "Node " ++ Container.nameOf nl ndx ++
+ " failed: " ++ show fm
+ OpGood (nl', inst', _, _) ->
+ let nodes = Container.elems nl'
+ -- The fromJust below is ugly (it can fail nastily), but
+ -- at this point we should have any internal mismatches,
+ -- and adding a monad here would be quite involved
+ grpnodes = fromJust (gdx `lookup` Node.computeGroups nodes)
+ new_cv = compCVNodes grpnodes
+ new_accu = Right (nl', inst', new_cv, ndx)
+ in case accu of
+ Left _ -> new_accu
+ Right (_, _, old_cv, _) ->
+ if old_cv < new_cv
+ then accu
+ else new_accu
-- | Compute result of changing all nodes of a DRBD instance.
--
-> (Ndx, Ndx) -- ^ Tuple of new
-- primary\/secondary nodes
-> Result (Node.List, Instance.List, [OpCodes.OpCode], Score)
-evacDrbdAllInner nl il inst gdx (t_pdx, t_sdx) =
- do
- let primary = Container.find (Instance.pNode inst) nl
- idx = Instance.idx inst
- -- if the primary is offline, then we first failover
- (nl1, inst1, ops1) <-
- if Node.offline primary
- then do
- (nl', inst', _, _) <-
- annotateResult "Failing over to the secondary" $
- opToResult $ applyMove nl inst Failover
- return (nl', inst', [Failover])
- else return (nl, inst, [])
- let (o1, o2, o3) = (ReplaceSecondary t_pdx,
- Failover,
- ReplaceSecondary t_sdx)
- -- we now need to execute a replace secondary to the future
- -- primary node
- (nl2, inst2, _, _) <-
- annotateResult "Changing secondary to new primary" $
- opToResult $
- applyMove nl1 inst1 o1
- let ops2 = o1:ops1
- -- we now execute another failover, the primary stays fixed now
- (nl3, inst3, _, _) <- annotateResult "Failing over to new primary" $
- opToResult $ applyMove nl2 inst2 o2
- let ops3 = o2:ops2
- -- and finally another replace secondary, to the final secondary
- (nl4, inst4, _, _) <-
- annotateResult "Changing secondary to final secondary" $
- opToResult $
- applyMove nl3 inst3 o3
- let ops4 = o3:ops3
- il' = Container.add idx inst4 il
- ops = concatMap (iMoveToJob nl4 il' idx) $ reverse ops4
- let nodes = Container.elems nl4
- -- The fromJust below is ugly (it can fail nastily), but
- -- at this point we should have any internal mismatches,
- -- and adding a monad here would be quite involved
- grpnodes = fromJust (gdx `lookup` Node.computeGroups nodes)
- new_cv = compCVNodes grpnodes
- return (nl4, il', ops, new_cv)
+evacDrbdAllInner nl il inst gdx (t_pdx, t_sdx) = do
+ let primary = Container.find (Instance.pNode inst) nl
+ idx = Instance.idx inst
+ -- if the primary is offline, then we first failover
+ (nl1, inst1, ops1) <-
+ if Node.offline primary
+ then do
+ (nl', inst', _, _) <-
+ annotateResult "Failing over to the secondary" $
+ opToResult $ applyMove nl inst Failover
+ return (nl', inst', [Failover])
+ else return (nl, inst, [])
+ let (o1, o2, o3) = (ReplaceSecondary t_pdx,
+ Failover,
+ ReplaceSecondary t_sdx)
+ -- we now need to execute a replace secondary to the future
+ -- primary node
+ (nl2, inst2, _, _) <-
+ annotateResult "Changing secondary to new primary" $
+ opToResult $
+ applyMove nl1 inst1 o1
+ let ops2 = o1:ops1
+ -- we now execute another failover, the primary stays fixed now
+ (nl3, inst3, _, _) <- annotateResult "Failing over to new primary" $
+ opToResult $ applyMove nl2 inst2 o2
+ let ops3 = o2:ops2
+ -- and finally another replace secondary, to the final secondary
+ (nl4, inst4, _, _) <-
+ annotateResult "Changing secondary to final secondary" $
+ opToResult $
+ applyMove nl3 inst3 o3
+ let ops4 = o3:ops3
+ il' = Container.add idx inst4 il
+ ops = concatMap (iMoveToJob nl4 il' idx) $ reverse ops4
+ let nodes = Container.elems nl4
+ -- The fromJust below is ugly (it can fail nastily), but
+ -- at this point we should have any internal mismatches,
+ -- and adding a monad here would be quite involved
+ grpnodes = fromJust (gdx `lookup` Node.computeGroups nodes)
+ new_cv = compCVNodes grpnodes
+ return (nl4, il', ops, new_cv)
-- | Computes the nodes in a given group which are available for
-- allocation.
-> Result (Node.List, Instance.List, [OpCodes.OpCode])
-> (Node.List, Instance.List, EvacSolution)
updateEvacSolution (nl, il, es) idx (Bad msg) =
- (nl, il, es { esFailed = (idx, msg):esFailed es})
+ (nl, il, es { esFailed = (idx, msg):esFailed es})
updateEvacSolution (_, _, es) idx (Ok (nl, il, opcodes)) =
- (nl, il, es { esMoved = new_elem:esMoved es
- , esOpCodes = opcodes:esOpCodes es })
- where inst = Container.find idx il
- new_elem = (idx,
- instancePriGroup nl inst,
- Instance.allNodes inst)
+ (nl, il, es { esMoved = new_elem:esMoved es
+ , esOpCodes = opcodes:esOpCodes es })
+ where inst = Container.find idx il
+ new_elem = (idx,
+ instancePriGroup nl inst,
+ Instance.allNodes inst)
-- | Node-evacuation IAllocator mode main function.
tryNodeEvac :: Group.List -- ^ The cluster groups
-> [Idx] -- ^ List of instance (indices) to be evacuated
-> Result (Node.List, Instance.List, EvacSolution)
tryNodeEvac _ ini_nl ini_il mode idxs =
- let evac_ndx = nodesToEvacuate ini_il mode idxs
- offline = map Node.idx . filter Node.offline $ Container.elems ini_nl
- excl_ndx = foldl' (flip IntSet.insert) evac_ndx offline
- group_ndx = map (\(gdx, (nl, _)) -> (gdx, map Node.idx
- (Container.elems nl))) $
- splitCluster ini_nl ini_il
- (fin_nl, fin_il, esol) =
- foldl' (\state@(nl, il, _) inst ->
- let gdx = instancePriGroup nl inst
- pdx = Instance.pNode inst in
- updateEvacSolution state (Instance.idx inst) $
- availableGroupNodes group_ndx
- (IntSet.insert pdx excl_ndx) gdx >>=
- nodeEvacInstance nl il mode inst gdx
- )
- (ini_nl, ini_il, emptyEvacSolution)
- (map (`Container.find` ini_il) idxs)
- in return (fin_nl, fin_il, reverseEvacSolution esol)
+ let evac_ndx = nodesToEvacuate ini_il mode idxs
+ offline = map Node.idx . filter Node.offline $ Container.elems ini_nl
+ excl_ndx = foldl' (flip IntSet.insert) evac_ndx offline
+ group_ndx = map (\(gdx, (nl, _)) -> (gdx, map Node.idx
+ (Container.elems nl))) $
+ splitCluster ini_nl ini_il
+ (fin_nl, fin_il, esol) =
+ foldl' (\state@(nl, il, _) inst ->
+ let gdx = instancePriGroup nl inst
+ pdx = Instance.pNode inst in
+ updateEvacSolution state (Instance.idx inst) $
+ availableGroupNodes group_ndx
+ (IntSet.insert pdx excl_ndx) gdx >>=
+ nodeEvacInstance nl il mode inst gdx
+ )
+ (ini_nl, ini_il, emptyEvacSolution)
+ (map (`Container.find` ini_il) idxs)
+ in return (fin_nl, fin_il, reverseEvacSolution esol)
-- | Change-group IAllocator mode main function.
--
-> [Idx] -- ^ List of instance (indices) to be evacuated
-> Result (Node.List, Instance.List, EvacSolution)
tryChangeGroup gl ini_nl ini_il gdxs idxs =
- let evac_gdxs = nub $ map (instancePriGroup ini_nl .
- flip Container.find ini_il) idxs
- target_gdxs = (if null gdxs
+ let evac_gdxs = nub $ map (instancePriGroup ini_nl .
+ flip Container.find ini_il) idxs
+ target_gdxs = (if null gdxs
then Container.keys gl
else gdxs) \\ evac_gdxs
- offline = map Node.idx . filter Node.offline $ Container.elems ini_nl
- excl_ndx = foldl' (flip IntSet.insert) IntSet.empty offline
- group_ndx = map (\(gdx, (nl, _)) -> (gdx, map Node.idx
- (Container.elems nl))) $
- splitCluster ini_nl ini_il
- (fin_nl, fin_il, esol) =
- foldl' (\state@(nl, il, _) inst ->
- let solution = do
- let ncnt = Instance.requiredNodes $
- Instance.diskTemplate inst
- (gdx, _, _) <- findBestAllocGroup gl nl il
- (Just target_gdxs) inst ncnt
- av_nodes <- availableGroupNodes group_ndx
- excl_ndx gdx
- nodeEvacInstance nl il ChangeAll inst
- gdx av_nodes
- in updateEvacSolution state
- (Instance.idx inst) solution
- )
- (ini_nl, ini_il, emptyEvacSolution)
- (map (`Container.find` ini_il) idxs)
- in return (fin_nl, fin_il, reverseEvacSolution esol)
-
--- | Recursively place instances on the cluster until we're out of space.
-iterateAlloc :: Node.List
- -> Instance.List
- -> Maybe Int
- -> Instance.Instance
- -> AllocNodes
- -> [Instance.Instance]
- -> [CStats]
- -> Result AllocResult
+ offline = map Node.idx . filter Node.offline $ Container.elems ini_nl
+ excl_ndx = foldl' (flip IntSet.insert) IntSet.empty offline
+ group_ndx = map (\(gdx, (nl, _)) -> (gdx, map Node.idx
+ (Container.elems nl))) $
+ splitCluster ini_nl ini_il
+ (fin_nl, fin_il, esol) =
+ foldl' (\state@(nl, il, _) inst ->
+ let solution = do
+ let ncnt = Instance.requiredNodes $
+ Instance.diskTemplate inst
+ (gdx, _, _) <- findBestAllocGroup gl nl il
+ (Just target_gdxs) inst ncnt
+ av_nodes <- availableGroupNodes group_ndx
+ excl_ndx gdx
+ nodeEvacInstance nl il ChangeAll inst gdx av_nodes
+ in updateEvacSolution state (Instance.idx inst) solution
+ )
+ (ini_nl, ini_il, emptyEvacSolution)
+ (map (`Container.find` ini_il) idxs)
+ in return (fin_nl, fin_il, reverseEvacSolution esol)
+
+-- | Standard-sized allocation method.
+--
+-- This places instances of the same size on the cluster until we're
+-- out of space. The result will be a list of identically-sized
+-- instances.
+iterateAlloc :: AllocMethod
iterateAlloc nl il limit newinst allocnodes ixes cstats =
- let depth = length ixes
- newname = printf "new-%d" depth::String
- newidx = length (Container.elems il) + depth
- newi2 = Instance.setIdx (Instance.setName newinst newname) newidx
- newlimit = fmap (flip (-) 1) limit
- in case tryAlloc nl il newi2 allocnodes of
- Bad s -> Bad s
- Ok (AllocSolution { asFailures = errs, asSolution = sols3 }) ->
- let newsol = Ok (collapseFailures errs, nl, il, ixes, cstats) in
- case sols3 of
- Nothing -> newsol
- Just (xnl, xi, _, _) ->
- if limit == Just 0
- then newsol
- else iterateAlloc xnl (Container.add newidx xi il)
- newlimit newinst allocnodes (xi:ixes)
- (totalResources xnl:cstats)
-
--- | The core of the tiered allocation mode.
-tieredAlloc :: Node.List
- -> Instance.List
- -> Maybe Int
- -> Instance.Instance
- -> AllocNodes
- -> [Instance.Instance]
- -> [CStats]
- -> Result AllocResult
+ let depth = length ixes
+ newname = printf "new-%d" depth::String
+ newidx = Container.size il + depth
+ newi2 = Instance.setIdx (Instance.setName newinst newname) newidx
+ newlimit = fmap (flip (-) 1) limit
+ in case tryAlloc nl il newi2 allocnodes of
+ Bad s -> Bad s
+ Ok (AllocSolution { asFailures = errs, asSolution = sols3 }) ->
+ let newsol = Ok (collapseFailures errs, nl, il, ixes, cstats) in
+ case sols3 of
+ Nothing -> newsol
+ Just (xnl, xi, _, _) ->
+ if limit == Just 0
+ then newsol
+ else iterateAlloc xnl (Container.add newidx xi il)
+ newlimit newinst allocnodes (xi:ixes)
+ (totalResources xnl:cstats)
+
+-- | Tiered allocation method.
+--
+-- This places instances on the cluster, and decreases the spec until
+-- we can allocate again. The result will be a list of decreasing
+-- instance specs.
+tieredAlloc :: AllocMethod
tieredAlloc nl il limit newinst allocnodes ixes cstats =
- case iterateAlloc nl il limit newinst allocnodes ixes cstats of
- Bad s -> Bad s
- Ok (errs, nl', il', ixes', cstats') ->
- let newsol = Ok (errs, nl', il', ixes', cstats')
- ixes_cnt = length ixes'
- (stop, newlimit) = case limit of
- Nothing -> (False, Nothing)
- Just n -> (n <= ixes_cnt,
- Just (n - ixes_cnt)) in
- if stop then newsol else
+ case iterateAlloc nl il limit newinst allocnodes ixes cstats of
+ Bad s -> Bad s
+ Ok (errs, nl', il', ixes', cstats') ->
+ let newsol = Ok (errs, nl', il', ixes', cstats')
+ ixes_cnt = length ixes'
+ (stop, newlimit) = case limit of
+ Nothing -> (False, Nothing)
+ Just n -> (n <= ixes_cnt,
+ Just (n - ixes_cnt)) in
+ if stop then newsol else
case Instance.shrinkByType newinst . fst . last $
sortBy (comparing snd) errs of
Bad _ -> newsol
-- secondary, while the command list holds gnt-instance
-- commands (without that prefix), e.g \"@failover instance1@\"
computeMoves i inam mv c d =
- case mv of
- Failover -> ("f", [mig])
- FailoverAndReplace _ -> (printf "f r:%s" d, [mig, rep d])
- ReplaceSecondary _ -> (printf "r:%s" d, [rep d])
- ReplaceAndFailover _ -> (printf "r:%s f" c, [rep c, mig])
- ReplacePrimary _ -> (printf "f r:%s f" c, [mig, rep c, mig])
- where morf = if Instance.running i then "migrate" else "failover"
- mig = printf "%s -f %s" morf inam::String
- rep n = printf "replace-disks -n %s %s" n inam
+ case mv of
+ Failover -> ("f", [mig])
+ FailoverAndReplace _ -> (printf "f r:%s" d, [mig, rep d])
+ ReplaceSecondary _ -> (printf "r:%s" d, [rep d])
+ ReplaceAndFailover _ -> (printf "r:%s f" c, [rep c, mig])
+ ReplacePrimary _ -> (printf "f r:%s f" c, [mig, rep c, mig])
+ where morf = if Instance.instanceRunning i then "migrate" else "failover"
+ mig = printf "%s -f %s" morf inam::String
+ rep n = printf "replace-disks -n %s %s" n inam
-- | Converts a placement to string format.
printSolutionLine :: Node.List -- ^ The node list
-- the solution
-> (String, [String])
printSolutionLine nl il nmlen imlen plc pos =
- let
- pmlen = (2*nmlen + 1)
- (i, p, s, mv, c) = plc
- inst = Container.find i il
- inam = Instance.alias inst
- npri = Node.alias $ Container.find p nl
- nsec = Node.alias $ Container.find s nl
- opri = Node.alias $ Container.find (Instance.pNode inst) nl
- osec = Node.alias $ Container.find (Instance.sNode inst) nl
- (moves, cmds) = computeMoves inst inam mv npri nsec
- ostr = printf "%s:%s" opri osec::String
- nstr = printf "%s:%s" npri nsec::String
- in
- (printf " %3d. %-*s %-*s => %-*s %.8f a=%s"
- pos imlen inam pmlen ostr
- pmlen nstr c moves,
- cmds)
+ let pmlen = (2*nmlen + 1)
+ (i, p, s, mv, c) = plc
+ inst = Container.find i il
+ inam = Instance.alias inst
+ npri = Node.alias $ Container.find p nl
+ nsec = Node.alias $ Container.find s nl
+ opri = Node.alias $ Container.find (Instance.pNode inst) nl
+ osec = Node.alias $ Container.find (Instance.sNode inst) nl
+ (moves, cmds) = computeMoves inst inam mv npri nsec
+ ostr = printf "%s:%s" opri osec::String
+ nstr = printf "%s:%s" npri nsec::String
+ in (printf " %3d. %-*s %-*s => %-*s %.8f a=%s"
+ pos imlen inam pmlen ostr
+ pmlen nstr c moves,
+ cmds)
-- | Return the instance and involved nodes in an instance move.
--
-- instance index
-> [Ndx] -- ^ Resulting list of node indices
involvedNodes il plc =
- let (i, np, ns, _, _) = plc
- inst = Container.find i il
- in nub $ [np, ns] ++ Instance.allNodes inst
+ let (i, np, ns, _, _) = plc
+ inst = Container.find i il
+ in nub $ [np, ns] ++ Instance.allNodes inst
-- | Inner function for splitJobs, that either appends the next job to
-- the current jobset, or starts a new jobset.
mergeJobs :: ([JobSet], [Ndx]) -> MoveJob -> ([JobSet], [Ndx])
mergeJobs ([], _) n@(ndx, _, _, _) = ([[n]], ndx)
mergeJobs (cjs@(j:js), nbuf) n@(ndx, _, _, _)
- | null (ndx `intersect` nbuf) = ((n:j):js, ndx ++ nbuf)
- | otherwise = ([n]:cjs, ndx)
+ | null (ndx `intersect` nbuf) = ((n:j):js, ndx ++ nbuf)
+ | otherwise = ([n]:cjs, ndx)
-- | Break a list of moves into independent groups. Note that this
-- will reverse the order of jobs.
-- also beautify the display a little.
formatJob :: Int -> Int -> (Int, MoveJob) -> [String]
formatJob jsn jsl (sn, (_, _, _, cmds)) =
- let out =
- printf " echo job %d/%d" jsn sn:
- printf " check":
- map (" gnt-instance " ++) cmds
- in if sn == 1
+ let out =
+ printf " echo job %d/%d" jsn sn:
+ printf " check":
+ map (" gnt-instance " ++) cmds
+ in if sn == 1
then ["", printf "echo jobset %d, %d jobs" jsn jsl] ++ out
else out
-- also beautify the display a little.
formatCmds :: [JobSet] -> String
formatCmds =
- unlines .
- concatMap (\(jsn, js) -> concatMap (formatJob jsn (length js))
- (zip [1..] js)) .
- zip [1..]
+ unlines .
+ concatMap (\(jsn, js) -> concatMap (formatJob jsn (length js))
+ (zip [1..] js)) .
+ zip [1..]
-- | Print the node list.
printNodes :: Node.List -> [String] -> String
printNodes nl fs =
- let fields = case fs of
- [] -> Node.defaultFields
- "+":rest -> Node.defaultFields ++ rest
- _ -> fs
- snl = sortBy (comparing Node.idx) (Container.elems nl)
- (header, isnum) = unzip $ map Node.showHeader fields
- in unlines . map ((:) ' ' . intercalate " ") $
- formatTable (header:map (Node.list fields) snl) isnum
+ let fields = case fs of
+ [] -> Node.defaultFields
+ "+":rest -> Node.defaultFields ++ rest
+ _ -> fs
+ snl = sortBy (comparing Node.idx) (Container.elems nl)
+ (header, isnum) = unzip $ map Node.showHeader fields
+ in unlines . map ((:) ' ' . unwords) $
+ formatTable (header:map (Node.list fields) snl) isnum
-- | Print the instance list.
printInsts :: Node.List -> Instance.List -> String
printInsts nl il =
- let sil = sortBy (comparing Instance.idx) (Container.elems il)
- helper inst = [ if Instance.running inst then "R" else " "
- , Instance.name inst
- , Container.nameOf nl (Instance.pNode inst)
- , let sdx = Instance.sNode inst
- in if sdx == Node.noSecondary
+ let sil = sortBy (comparing Instance.idx) (Container.elems il)
+ helper inst = [ if Instance.instanceRunning inst then "R" else " "
+ , Instance.name inst
+ , Container.nameOf nl (Instance.pNode inst)
+ , let sdx = Instance.sNode inst
+ in if sdx == Node.noSecondary
then ""
else Container.nameOf nl sdx
- , if Instance.autoBalance inst then "Y" else "N"
- , printf "%3d" $ Instance.vcpus inst
- , printf "%5d" $ Instance.mem inst
- , printf "%5d" $ Instance.dsk inst `div` 1024
- , printf "%5.3f" lC
- , printf "%5.3f" lM
- , printf "%5.3f" lD
- , printf "%5.3f" lN
- ]
- where DynUtil lC lM lD lN = Instance.util inst
- header = [ "F", "Name", "Pri_node", "Sec_node", "Auto_bal"
- , "vcpu", "mem" , "dsk", "lCpu", "lMem", "lDsk", "lNet" ]
- isnum = False:False:False:False:False:repeat True
- in unlines . map ((:) ' ' . intercalate " ") $
- formatTable (header:map helper sil) isnum
+ , if Instance.autoBalance inst then "Y" else "N"
+ , printf "%3d" $ Instance.vcpus inst
+ , printf "%5d" $ Instance.mem inst
+ , printf "%5d" $ Instance.dsk inst `div` 1024
+ , printf "%5.3f" lC
+ , printf "%5.3f" lM
+ , printf "%5.3f" lD
+ , printf "%5.3f" lN
+ ]
+ where DynUtil lC lM lD lN = Instance.util inst
+ header = [ "F", "Name", "Pri_node", "Sec_node", "Auto_bal"
+ , "vcpu", "mem" , "dsk", "lCpu", "lMem", "lDsk", "lNet" ]
+ isnum = False:False:False:False:False:repeat True
+ in unlines . map ((:) ' ' . unwords) $
+ formatTable (header:map helper sil) isnum
-- | Shows statistics for a given node list.
printStats :: Node.List -> String
printStats nl =
- let dcvs = compDetailedCV $ Container.elems nl
- (weights, names) = unzip detailedCVInfo
- hd = zip3 (weights ++ repeat 1) (names ++ repeat "unknown") dcvs
- formatted = map (\(w, header, val) ->
- printf "%s=%.8f(x%.2f)" header val w::String) hd
- in intercalate ", " formatted
+ let dcvs = compDetailedCV $ Container.elems nl
+ (weights, names) = unzip detailedCVInfo
+ hd = zip3 (weights ++ repeat 1) (names ++ repeat "unknown") dcvs
+ formatted = map (\(w, header, val) ->
+ printf "%s=%.8f(x%.2f)" header val w::String) hd
+ in intercalate ", " formatted
-- | Convert a placement into a list of OpCodes (basically a job).
iMoveToJob :: Node.List -- ^ The node list; only used for node
-> [OpCodes.OpCode] -- ^ The list of opcodes equivalent to
-- the given move
iMoveToJob nl il idx move =
- let inst = Container.find idx il
- iname = Instance.name inst
- lookNode = Just . Container.nameOf nl
- opF = OpCodes.OpInstanceMigrate iname True False True Nothing
- opR n = OpCodes.OpInstanceReplaceDisks iname (lookNode n)
- OpCodes.ReplaceNewSecondary [] Nothing
- in case move of
- Failover -> [ opF ]
- ReplacePrimary np -> [ opF, opR np, opF ]
- ReplaceSecondary ns -> [ opR ns ]
- ReplaceAndFailover np -> [ opR np, opF ]
- FailoverAndReplace ns -> [ opF, opR ns ]
+ let inst = Container.find idx il
+ iname = Instance.name inst
+ lookNode = Just . Container.nameOf nl
+ opF = OpCodes.OpInstanceMigrate iname True False True Nothing
+ opR n = OpCodes.OpInstanceReplaceDisks iname (lookNode n)
+ OpCodes.ReplaceNewSecondary [] Nothing
+ in case move of
+ Failover -> [ opF ]
+ ReplacePrimary np -> [ opF, opR np, opF ]
+ ReplaceSecondary ns -> [ opR ns ]
+ ReplaceAndFailover np -> [ opR np, opF ]
+ FailoverAndReplace ns -> [ opF, opR ns ]
-- * Node group functions
pgroup = Node.group pnode
sgroup = Node.group snode
in if pgroup /= sgroup
- then fail ("Instance placed accross two node groups, primary " ++
- show pgroup ++ ", secondary " ++ show sgroup)
- else return pgroup
+ then fail ("Instance placed accross two node groups, primary " ++
+ show pgroup ++ ", secondary " ++ show sgroup)
+ else return pgroup
-- | Computes the group of an instance per the primary node.
instancePriGroup :: Node.List -> Instance.Instance -> Gdx
-> [Idx] -- ^ List of instance indices being evacuated
-> IntSet.IntSet -- ^ Set of node indices
nodesToEvacuate il mode =
- IntSet.delete Node.noSecondary .
- foldl' (\ns idx ->
- let i = Container.find idx il
- pdx = Instance.pNode i
- sdx = Instance.sNode i
- dt = Instance.diskTemplate i
- withSecondary = case dt of
- DTDrbd8 -> IntSet.insert sdx ns
- _ -> ns
- in case mode of
- ChangePrimary -> IntSet.insert pdx ns
- ChangeSecondary -> withSecondary
- ChangeAll -> IntSet.insert pdx withSecondary
- ) IntSet.empty
+ IntSet.delete Node.noSecondary .
+ foldl' (\ns idx ->
+ let i = Container.find idx il
+ pdx = Instance.pNode i
+ sdx = Instance.sNode i
+ dt = Instance.diskTemplate i
+ withSecondary = case dt of
+ DTDrbd8 -> IntSet.insert sdx ns
+ _ -> ns
+ in case mode of
+ ChangePrimary -> IntSet.insert pdx ns
+ ChangeSecondary -> withSecondary
+ ChangeAll -> IntSet.insert pdx withSecondary
+ ) IntSet.empty