X-Git-Url: https://code.grnet.gr/git/ganeti-local/blobdiff_plain/009763045bcdc82d2ddeb9c9cf0716424050ba9c..2922d2c576d85983eb5f6ac82982b641aacc954b:/htools/Ganeti/HTools/Cluster.hs diff --git a/htools/Ganeti/HTools/Cluster.hs b/htools/Ganeti/HTools/Cluster.hs index 4b2a588..37af920 100644 --- a/htools/Ganeti/HTools/Cluster.hs +++ b/htools/Ganeti/HTools/Cluster.hs @@ -7,7 +7,7 @@ goes into the /Main/ module for the individual binaries. {- -Copyright (C) 2009, 2010, 2011 Google Inc. +Copyright (C) 2009, 2010, 2011, 2012 Google Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -27,51 +27,51 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA -} module Ganeti.HTools.Cluster - ( - -- * Types - AllocSolution(..) - , EvacSolution(..) - , Table(..) - , CStats(..) - , AllocStats - -- * Generic functions - , totalResources - , computeAllocationDelta - -- * First phase functions - , computeBadItems - -- * Second phase functions - , printSolutionLine - , formatCmds - , involvedNodes - , splitJobs - -- * Display functions - , printNodes - , printInsts - -- * Balacing functions - , checkMove - , doNextBalance - , tryBalance - , compCV - , compCVNodes - , compDetailedCV - , printStats - , iMoveToJob - -- * IAllocator functions - , genAllocNodes - , tryAlloc - , tryMGAlloc - , tryReloc - , tryNodeEvac - , tryChangeGroup - , collapseFailures - -- * Allocation functions - , iterateAlloc - , tieredAlloc - -- * Node group functions - , instanceGroup - , findSplitInstances - , splitCluster - ) where + ( + -- * Types + AllocSolution(..) + , EvacSolution(..) + , Table(..) + , CStats(..) + , AllocResult + , AllocMethod + -- * Generic functions + , totalResources + , computeAllocationDelta + -- * First phase functions + , computeBadItems + -- * Second phase functions + , printSolutionLine + , formatCmds + , involvedNodes + , splitJobs + -- * Display functions + , printNodes + , printInsts + -- * Balacing functions + , checkMove + , doNextBalance + , tryBalance + , compCV + , compCVNodes + , compDetailedCV + , printStats + , iMoveToJob + -- * IAllocator functions + , genAllocNodes + , tryAlloc + , tryMGAlloc + , tryNodeEvac + , tryChangeGroup + , collapseFailures + -- * Allocation functions + , iterateAlloc + , tieredAlloc + -- * Node group functions + , instanceGroup + , findSplitInstances + , splitCluster + ) where import qualified Data.IntSet as IntSet import Data.List @@ -102,11 +102,11 @@ data AllocSolution = AllocSolution -- type consists of actual opcodes (a restricted subset) that are -- transmitted back to Ganeti. data EvacSolution = EvacSolution - { esMoved :: [(Idx, Gdx, [Ndx])] -- ^ Instances moved successfully - , esFailed :: [(Idx, String)] -- ^ Instances which were not - -- relocated - , esOpCodes :: [[OpCodes.OpCode]] -- ^ List of jobs - } + { esMoved :: [(Idx, Gdx, [Ndx])] -- ^ Instances moved successfully + , esFailed :: [(Idx, String)] -- ^ Instances which were not + -- relocated + , esOpCodes :: [[OpCodes.OpCode]] -- ^ List of jobs + } deriving (Show) -- | Allocation results, as used in 'iterateAlloc' and 'tieredAlloc'. type AllocResult = (FailStats, Node.List, Instance.List, @@ -138,32 +138,39 @@ data Table = Table Node.List Instance.List Score [Placement] deriving (Show, Read) -- | Cluster statistics data type. -data CStats = CStats { csFmem :: Integer -- ^ Cluster free mem - , csFdsk :: Integer -- ^ Cluster free disk - , csAmem :: Integer -- ^ Cluster allocatable mem - , csAdsk :: Integer -- ^ Cluster allocatable disk - , csAcpu :: Integer -- ^ Cluster allocatable cpus - , csMmem :: Integer -- ^ Max node allocatable mem - , csMdsk :: Integer -- ^ Max node allocatable disk - , csMcpu :: Integer -- ^ Max node allocatable cpu - , csImem :: Integer -- ^ Instance used mem - , csIdsk :: Integer -- ^ Instance used disk - , csIcpu :: Integer -- ^ Instance used cpu - , csTmem :: Double -- ^ Cluster total mem - , csTdsk :: Double -- ^ Cluster total disk - , csTcpu :: Double -- ^ Cluster total cpus - , csVcpu :: Integer -- ^ Cluster virtual cpus (if - -- node pCpu has been set, - -- otherwise -1) - , csXmem :: Integer -- ^ Unnacounted for mem - , csNmem :: Integer -- ^ Node own memory - , csScore :: Score -- ^ The cluster score - , csNinst :: Int -- ^ The total number of instances - } - deriving (Show, Read) - --- | Currently used, possibly to allocate, unallocable. -type AllocStats = (RSpec, RSpec, RSpec) +data CStats = CStats + { csFmem :: Integer -- ^ Cluster free mem + , csFdsk :: Integer -- ^ Cluster free disk + , csAmem :: Integer -- ^ Cluster allocatable mem + , csAdsk :: Integer -- ^ Cluster allocatable disk + , csAcpu :: Integer -- ^ Cluster allocatable cpus + , csMmem :: Integer -- ^ Max node allocatable mem + , csMdsk :: Integer -- ^ Max node allocatable disk + , csMcpu :: Integer -- ^ Max node allocatable cpu + , csImem :: Integer -- ^ Instance used mem + , csIdsk :: Integer -- ^ Instance used disk + , csIcpu :: Integer -- ^ Instance used cpu + , csTmem :: Double -- ^ Cluster total mem + , csTdsk :: Double -- ^ Cluster total disk + , csTcpu :: Double -- ^ Cluster total cpus + , csVcpu :: Integer -- ^ Cluster total virtual cpus + , csNcpu :: Double -- ^ Equivalent to 'csIcpu' but in terms of + -- physical CPUs, i.e. normalised used phys CPUs + , csXmem :: Integer -- ^ Unnacounted for mem + , csNmem :: Integer -- ^ Node own memory + , csScore :: Score -- ^ The cluster score + , csNinst :: Int -- ^ The total number of instances + } deriving (Show, Read) + +-- | A simple type for allocation functions. +type AllocMethod = Node.List -- ^ Node list + -> Instance.List -- ^ Instance list + -> Maybe Int -- ^ Optional allocation limit + -> Instance.Instance -- ^ Instance spec for allocation + -> AllocNodes -- ^ Which nodes we should allocate on + -> [Instance.Instance] -- ^ Allocated instances + -> [CStats] -- ^ Running cluster stats + -> Result AllocResult -- ^ Allocation result -- * Utility functions @@ -188,57 +195,71 @@ computeBadItems nl il = in (bad_nodes, bad_instances) +-- | Extracts the node pairs for an instance. This can fail if the +-- instance is single-homed. FIXME: this needs to be improved, +-- together with the general enhancement for handling non-DRBD moves. +instanceNodes :: Node.List -> Instance.Instance -> + (Ndx, Ndx, Node.Node, Node.Node) +instanceNodes nl inst = + let old_pdx = Instance.pNode inst + old_sdx = Instance.sNode inst + old_p = Container.find old_pdx nl + old_s = Container.find old_sdx nl + in (old_pdx, old_sdx, old_p, old_s) + -- | Zero-initializer for the CStats type. emptyCStats :: CStats -emptyCStats = CStats 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +emptyCStats = CStats 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -- | Update stats with data from a new node. updateCStats :: CStats -> Node.Node -> CStats updateCStats cs node = - let CStats { csFmem = x_fmem, csFdsk = x_fdsk, - csAmem = x_amem, csAcpu = x_acpu, csAdsk = x_adsk, - csMmem = x_mmem, csMdsk = x_mdsk, csMcpu = x_mcpu, - csImem = x_imem, csIdsk = x_idsk, csIcpu = x_icpu, - csTmem = x_tmem, csTdsk = x_tdsk, csTcpu = x_tcpu, - csVcpu = x_vcpu, - csXmem = x_xmem, csNmem = x_nmem, csNinst = x_ninst - } - = cs - inc_amem = Node.fMem node - Node.rMem node - inc_amem' = if inc_amem > 0 then inc_amem else 0 - inc_adsk = Node.availDisk node - inc_imem = truncate (Node.tMem node) - Node.nMem node - - Node.xMem node - Node.fMem node - inc_icpu = Node.uCpu node - inc_idsk = truncate (Node.tDsk node) - Node.fDsk node - inc_vcpu = Node.hiCpu node - inc_acpu = Node.availCpu node - - in cs { csFmem = x_fmem + fromIntegral (Node.fMem node) - , csFdsk = x_fdsk + fromIntegral (Node.fDsk node) - , csAmem = x_amem + fromIntegral inc_amem' - , csAdsk = x_adsk + fromIntegral inc_adsk - , csAcpu = x_acpu + fromIntegral inc_acpu - , csMmem = max x_mmem (fromIntegral inc_amem') - , csMdsk = max x_mdsk (fromIntegral inc_adsk) - , csMcpu = max x_mcpu (fromIntegral inc_acpu) - , csImem = x_imem + fromIntegral inc_imem - , csIdsk = x_idsk + fromIntegral inc_idsk - , csIcpu = x_icpu + fromIntegral inc_icpu - , csTmem = x_tmem + Node.tMem node - , csTdsk = x_tdsk + Node.tDsk node - , csTcpu = x_tcpu + Node.tCpu node - , csVcpu = x_vcpu + fromIntegral inc_vcpu - , csXmem = x_xmem + fromIntegral (Node.xMem node) - , csNmem = x_nmem + fromIntegral (Node.nMem node) - , csNinst = x_ninst + length (Node.pList node) - } + let CStats { csFmem = x_fmem, csFdsk = x_fdsk, + csAmem = x_amem, csAcpu = x_acpu, csAdsk = x_adsk, + csMmem = x_mmem, csMdsk = x_mdsk, csMcpu = x_mcpu, + csImem = x_imem, csIdsk = x_idsk, csIcpu = x_icpu, + csTmem = x_tmem, csTdsk = x_tdsk, csTcpu = x_tcpu, + csVcpu = x_vcpu, csNcpu = x_ncpu, + csXmem = x_xmem, csNmem = x_nmem, csNinst = x_ninst + } + = cs + inc_amem = Node.fMem node - Node.rMem node + inc_amem' = if inc_amem > 0 then inc_amem else 0 + inc_adsk = Node.availDisk node + inc_imem = truncate (Node.tMem node) - Node.nMem node + - Node.xMem node - Node.fMem node + inc_icpu = Node.uCpu node + inc_idsk = truncate (Node.tDsk node) - Node.fDsk node + inc_vcpu = Node.hiCpu node + inc_acpu = Node.availCpu node + inc_ncpu = fromIntegral (Node.uCpu node) / + iPolicyVcpuRatio (Node.iPolicy node) + in cs { csFmem = x_fmem + fromIntegral (Node.fMem node) + , csFdsk = x_fdsk + fromIntegral (Node.fDsk node) + , csAmem = x_amem + fromIntegral inc_amem' + , csAdsk = x_adsk + fromIntegral inc_adsk + , csAcpu = x_acpu + fromIntegral inc_acpu + , csMmem = max x_mmem (fromIntegral inc_amem') + , csMdsk = max x_mdsk (fromIntegral inc_adsk) + , csMcpu = max x_mcpu (fromIntegral inc_acpu) + , csImem = x_imem + fromIntegral inc_imem + , csIdsk = x_idsk + fromIntegral inc_idsk + , csIcpu = x_icpu + fromIntegral inc_icpu + , csTmem = x_tmem + Node.tMem node + , csTdsk = x_tdsk + Node.tDsk node + , csTcpu = x_tcpu + Node.tCpu node + , csVcpu = x_vcpu + fromIntegral inc_vcpu + , csNcpu = x_ncpu + inc_ncpu + , csXmem = x_xmem + fromIntegral (Node.xMem node) + , csNmem = x_nmem + fromIntegral (Node.nMem node) + , csNinst = x_ninst + length (Node.pList node) + } -- | Compute the total free disk and memory in the cluster. totalResources :: Node.List -> CStats totalResources nl = - let cs = foldl' updateCStats emptyCStats . Container.elems $ nl - in cs { csScore = compCV nl } + let cs = foldl' updateCStats emptyCStats . Container.elems $ nl + in cs { csScore = compCV nl } -- | Compute the delta between two cluster state. -- @@ -248,18 +269,27 @@ totalResources nl = -- was left unallocated. computeAllocationDelta :: CStats -> CStats -> AllocStats computeAllocationDelta cini cfin = - let CStats {csImem = i_imem, csIdsk = i_idsk, csIcpu = i_icpu} = cini - CStats {csImem = f_imem, csIdsk = f_idsk, csIcpu = f_icpu, - csTmem = t_mem, csTdsk = t_dsk, csVcpu = v_cpu } = cfin - rini = RSpec (fromIntegral i_icpu) (fromIntegral i_imem) - (fromIntegral i_idsk) - rfin = RSpec (fromIntegral (f_icpu - i_icpu)) - (fromIntegral (f_imem - i_imem)) - (fromIntegral (f_idsk - i_idsk)) - un_cpu = fromIntegral (v_cpu - f_icpu)::Int - runa = RSpec un_cpu (truncate t_mem - fromIntegral f_imem) - (truncate t_dsk - fromIntegral f_idsk) - in (rini, rfin, runa) + let CStats {csImem = i_imem, csIdsk = i_idsk, csIcpu = i_icpu, + csNcpu = i_ncpu } = cini + CStats {csImem = f_imem, csIdsk = f_idsk, csIcpu = f_icpu, + csTmem = t_mem, csTdsk = t_dsk, csVcpu = f_vcpu, + csNcpu = f_ncpu, csTcpu = f_tcpu } = cfin + rini = AllocInfo { allocInfoVCpus = fromIntegral i_icpu + , allocInfoNCpus = i_ncpu + , allocInfoMem = fromIntegral i_imem + , allocInfoDisk = fromIntegral i_idsk + } + rfin = AllocInfo { allocInfoVCpus = fromIntegral (f_icpu - i_icpu) + , allocInfoNCpus = f_ncpu - i_ncpu + , allocInfoMem = fromIntegral (f_imem - i_imem) + , allocInfoDisk = fromIntegral (f_idsk - i_idsk) + } + runa = AllocInfo { allocInfoVCpus = fromIntegral (f_vcpu - f_icpu) + , allocInfoNCpus = f_tcpu - f_ncpu + , allocInfoMem = truncate t_mem - fromIntegral f_imem + , allocInfoDisk = truncate t_dsk - fromIntegral f_idsk + } + in (rini, rfin, runa) -- | The names and weights of the individual elements in the CV list. detailedCVInfo :: [(Double, String)] @@ -284,46 +314,44 @@ detailedCVWeights = map fst detailedCVInfo -- | Compute the mem and disk covariance. compDetailedCV :: [Node.Node] -> [Double] compDetailedCV all_nodes = - let - (offline, nodes) = partition Node.offline all_nodes - mem_l = map Node.pMem nodes - dsk_l = map Node.pDsk nodes - -- metric: memory covariance - mem_cv = stdDev mem_l - -- metric: disk covariance - dsk_cv = stdDev dsk_l - -- metric: count of instances living on N1 failing nodes - n1_score = fromIntegral . sum . map (\n -> length (Node.sList n) + - length (Node.pList n)) . - filter Node.failN1 $ nodes :: Double - res_l = map Node.pRem nodes - -- metric: reserved memory covariance - res_cv = stdDev res_l - -- offline instances metrics - offline_ipri = sum . map (length . Node.pList) $ offline - offline_isec = sum . map (length . Node.sList) $ offline - -- metric: count of instances on offline nodes - off_score = fromIntegral (offline_ipri + offline_isec)::Double - -- metric: count of primary instances on offline nodes (this - -- helps with evacuation/failover of primary instances on - -- 2-node clusters with one node offline) - off_pri_score = fromIntegral offline_ipri::Double - cpu_l = map Node.pCpu nodes - -- metric: covariance of vcpu/pcpu ratio - cpu_cv = stdDev cpu_l - -- metrics: covariance of cpu, memory, disk and network load - (c_load, m_load, d_load, n_load) = unzip4 $ - map (\n -> - let DynUtil c1 m1 d1 n1 = Node.utilLoad n - DynUtil c2 m2 d2 n2 = Node.utilPool n - in (c1/c2, m1/m2, d1/d2, n1/n2) - ) nodes - -- metric: conflicting instance count - pri_tags_inst = sum $ map Node.conflictingPrimaries nodes - pri_tags_score = fromIntegral pri_tags_inst::Double - in [ mem_cv, dsk_cv, n1_score, res_cv, off_score, off_pri_score, cpu_cv - , stdDev c_load, stdDev m_load , stdDev d_load, stdDev n_load - , pri_tags_score ] + let (offline, nodes) = partition Node.offline all_nodes + mem_l = map Node.pMem nodes + dsk_l = map Node.pDsk nodes + -- metric: memory covariance + mem_cv = stdDev mem_l + -- metric: disk covariance + dsk_cv = stdDev dsk_l + -- metric: count of instances living on N1 failing nodes + n1_score = fromIntegral . sum . map (\n -> length (Node.sList n) + + length (Node.pList n)) . + filter Node.failN1 $ nodes :: Double + res_l = map Node.pRem nodes + -- metric: reserved memory covariance + res_cv = stdDev res_l + -- offline instances metrics + offline_ipri = sum . map (length . Node.pList) $ offline + offline_isec = sum . map (length . Node.sList) $ offline + -- metric: count of instances on offline nodes + off_score = fromIntegral (offline_ipri + offline_isec)::Double + -- metric: count of primary instances on offline nodes (this + -- helps with evacuation/failover of primary instances on + -- 2-node clusters with one node offline) + off_pri_score = fromIntegral offline_ipri::Double + cpu_l = map Node.pCpu nodes + -- metric: covariance of vcpu/pcpu ratio + cpu_cv = stdDev cpu_l + -- metrics: covariance of cpu, memory, disk and network load + (c_load, m_load, d_load, n_load) = + unzip4 $ map (\n -> + let DynUtil c1 m1 d1 n1 = Node.utilLoad n + DynUtil c2 m2 d2 n2 = Node.utilPool n + in (c1/c2, m1/m2, d1/d2, n1/n2)) nodes + -- metric: conflicting instance count + pri_tags_inst = sum $ map Node.conflictingPrimaries nodes + pri_tags_score = fromIntegral pri_tags_inst::Double + in [ mem_cv, dsk_cv, n1_score, res_cv, off_score, off_pri_score, cpu_cv + , stdDev c_load, stdDev m_load , stdDev d_load, stdDev n_load + , pri_tags_score ] -- | Compute the /total/ variance. compCVNodes :: [Node.Node] -> Double @@ -342,127 +370,117 @@ getOnline = filter (not . Node.offline) . Container.elems -- | Compute best table. Note that the ordering of the arguments is important. compareTables :: Table -> Table -> Table compareTables a@(Table _ _ a_cv _) b@(Table _ _ b_cv _ ) = - if a_cv > b_cv then b else a + if a_cv > b_cv then b else a -- | Applies an instance move to a given node list and instance. applyMove :: Node.List -> Instance.Instance -> IMove -> OpResult (Node.List, Instance.Instance, Ndx, Ndx) -- Failover (f) applyMove nl inst Failover = - let old_pdx = Instance.pNode inst - old_sdx = Instance.sNode inst - old_p = Container.find old_pdx nl - old_s = Container.find old_sdx nl - int_p = Node.removePri old_p inst - int_s = Node.removeSec old_s inst - force_p = Node.offline old_p - new_nl = do -- Maybe monad - new_p <- Node.addPriEx force_p int_s inst - new_s <- Node.addSec int_p inst old_sdx - let new_inst = Instance.setBoth inst old_sdx old_pdx - return (Container.addTwo old_pdx new_s old_sdx new_p nl, - new_inst, old_sdx, old_pdx) - in new_nl + let (old_pdx, old_sdx, old_p, old_s) = instanceNodes nl inst + int_p = Node.removePri old_p inst + int_s = Node.removeSec old_s inst + new_nl = do -- Maybe monad + new_p <- Node.addPriEx (Node.offline old_p) int_s inst + new_s <- Node.addSec int_p inst old_sdx + let new_inst = Instance.setBoth inst old_sdx old_pdx + return (Container.addTwo old_pdx new_s old_sdx new_p nl, + new_inst, old_sdx, old_pdx) + in new_nl -- Replace the primary (f:, r:np, f) applyMove nl inst (ReplacePrimary new_pdx) = - let old_pdx = Instance.pNode inst - old_sdx = Instance.sNode inst - old_p = Container.find old_pdx nl - old_s = Container.find old_sdx nl - tgt_n = Container.find new_pdx nl - int_p = Node.removePri old_p inst - int_s = Node.removeSec old_s inst - force_p = Node.offline old_p - new_nl = do -- Maybe monad - -- check that the current secondary can host the instance - -- during the migration - tmp_s <- Node.addPriEx force_p int_s inst - let tmp_s' = Node.removePri tmp_s inst - new_p <- Node.addPriEx force_p tgt_n inst - new_s <- Node.addSecEx force_p tmp_s' inst new_pdx - let new_inst = Instance.setPri inst new_pdx - return (Container.add new_pdx new_p $ - Container.addTwo old_pdx int_p old_sdx new_s nl, - new_inst, new_pdx, old_sdx) - in new_nl + let (old_pdx, old_sdx, old_p, old_s) = instanceNodes nl inst + tgt_n = Container.find new_pdx nl + int_p = Node.removePri old_p inst + int_s = Node.removeSec old_s inst + force_p = Node.offline old_p + new_nl = do -- Maybe monad + -- check that the current secondary can host the instance + -- during the migration + tmp_s <- Node.addPriEx force_p int_s inst + let tmp_s' = Node.removePri tmp_s inst + new_p <- Node.addPriEx force_p tgt_n inst + new_s <- Node.addSecEx force_p tmp_s' inst new_pdx + let new_inst = Instance.setPri inst new_pdx + return (Container.add new_pdx new_p $ + Container.addTwo old_pdx int_p old_sdx new_s nl, + new_inst, new_pdx, old_sdx) + in new_nl -- Replace the secondary (r:ns) applyMove nl inst (ReplaceSecondary new_sdx) = - let old_pdx = Instance.pNode inst - old_sdx = Instance.sNode inst - old_s = Container.find old_sdx nl - tgt_n = Container.find new_sdx nl - int_s = Node.removeSec old_s inst - force_s = Node.offline old_s - new_inst = Instance.setSec inst new_sdx - new_nl = Node.addSecEx force_s tgt_n inst old_pdx >>= - \new_s -> return (Container.addTwo new_sdx - new_s old_sdx int_s nl, - new_inst, old_pdx, new_sdx) - in new_nl + let old_pdx = Instance.pNode inst + old_sdx = Instance.sNode inst + old_s = Container.find old_sdx nl + tgt_n = Container.find new_sdx nl + int_s = Node.removeSec old_s inst + force_s = Node.offline old_s + new_inst = Instance.setSec inst new_sdx + new_nl = Node.addSecEx force_s tgt_n inst old_pdx >>= + \new_s -> return (Container.addTwo new_sdx + new_s old_sdx int_s nl, + new_inst, old_pdx, new_sdx) + in new_nl -- Replace the secondary and failover (r:np, f) applyMove nl inst (ReplaceAndFailover new_pdx) = - let old_pdx = Instance.pNode inst - old_sdx = Instance.sNode inst - old_p = Container.find old_pdx nl - old_s = Container.find old_sdx nl - tgt_n = Container.find new_pdx nl - int_p = Node.removePri old_p inst - int_s = Node.removeSec old_s inst - force_s = Node.offline old_s - new_nl = do -- Maybe monad - new_p <- Node.addPri tgt_n inst - new_s <- Node.addSecEx force_s int_p inst new_pdx - let new_inst = Instance.setBoth inst new_pdx old_pdx - return (Container.add new_pdx new_p $ - Container.addTwo old_pdx new_s old_sdx int_s nl, - new_inst, new_pdx, old_pdx) - in new_nl + let (old_pdx, old_sdx, old_p, old_s) = instanceNodes nl inst + tgt_n = Container.find new_pdx nl + int_p = Node.removePri old_p inst + int_s = Node.removeSec old_s inst + force_s = Node.offline old_s + new_nl = do -- Maybe monad + new_p <- Node.addPri tgt_n inst + new_s <- Node.addSecEx force_s int_p inst new_pdx + let new_inst = Instance.setBoth inst new_pdx old_pdx + return (Container.add new_pdx new_p $ + Container.addTwo old_pdx new_s old_sdx int_s nl, + new_inst, new_pdx, old_pdx) + in new_nl -- Failver and replace the secondary (f, r:ns) applyMove nl inst (FailoverAndReplace new_sdx) = - let old_pdx = Instance.pNode inst - old_sdx = Instance.sNode inst - old_p = Container.find old_pdx nl - old_s = Container.find old_sdx nl - tgt_n = Container.find new_sdx nl - int_p = Node.removePri old_p inst - int_s = Node.removeSec old_s inst - force_p = Node.offline old_p - new_nl = do -- Maybe monad - new_p <- Node.addPriEx force_p int_s inst - new_s <- Node.addSecEx force_p tgt_n inst old_sdx - let new_inst = Instance.setBoth inst old_sdx new_sdx - return (Container.add new_sdx new_s $ - Container.addTwo old_sdx new_p old_pdx int_p nl, - new_inst, old_sdx, new_sdx) - in new_nl + let (old_pdx, old_sdx, old_p, old_s) = instanceNodes nl inst + tgt_n = Container.find new_sdx nl + int_p = Node.removePri old_p inst + int_s = Node.removeSec old_s inst + force_p = Node.offline old_p + new_nl = do -- Maybe monad + new_p <- Node.addPriEx force_p int_s inst + new_s <- Node.addSecEx force_p tgt_n inst old_sdx + let new_inst = Instance.setBoth inst old_sdx new_sdx + return (Container.add new_sdx new_s $ + Container.addTwo old_sdx new_p old_pdx int_p nl, + new_inst, old_sdx, new_sdx) + in new_nl -- | Tries to allocate an instance on one given node. allocateOnSingle :: Node.List -> Instance.Instance -> Ndx -> OpResult Node.AllocElement allocateOnSingle nl inst new_pdx = - let p = Container.find new_pdx nl - new_inst = Instance.setBoth inst new_pdx Node.noSecondary - in Node.addPri p inst >>= \new_p -> do - let new_nl = Container.add new_pdx new_p nl - new_score = compCV nl - return (new_nl, new_inst, [new_p], new_score) + let p = Container.find new_pdx nl + new_inst = Instance.setBoth inst new_pdx Node.noSecondary + in do + Instance.instMatchesPolicy inst (Node.iPolicy p) + new_p <- Node.addPri p inst + let new_nl = Container.add new_pdx new_p nl + new_score = compCV nl + return (new_nl, new_inst, [new_p], new_score) -- | Tries to allocate an instance on a given pair of nodes. allocateOnPair :: Node.List -> Instance.Instance -> Ndx -> Ndx -> OpResult Node.AllocElement allocateOnPair nl inst new_pdx new_sdx = - let tgt_p = Container.find new_pdx nl - tgt_s = Container.find new_sdx nl - in do - new_p <- Node.addPri tgt_p inst - new_s <- Node.addSec tgt_s inst new_pdx - let new_inst = Instance.setBoth inst new_pdx new_sdx - new_nl = Container.addTwo new_pdx new_p new_sdx new_s nl - return (new_nl, new_inst, [new_p, new_s], compCV new_nl) + let tgt_p = Container.find new_pdx nl + tgt_s = Container.find new_sdx nl + in do + Instance.instMatchesPolicy inst (Node.iPolicy tgt_p) + new_p <- Node.addPri tgt_p inst + new_s <- Node.addSec tgt_s inst new_pdx + let new_inst = Instance.setBoth inst new_pdx new_sdx + new_nl = Container.addTwo new_pdx new_p new_sdx new_s nl + return (new_nl, new_inst, [new_p, new_s], compCV new_nl) -- | Tries to perform an instance move and returns the best table -- between the original one and the new one. @@ -472,20 +490,17 @@ checkSingleStep :: Table -- ^ The original table -> IMove -- ^ The move to apply -> Table -- ^ The final best table checkSingleStep ini_tbl target cur_tbl move = - let - Table ini_nl ini_il _ ini_plc = ini_tbl - tmp_resu = applyMove ini_nl target move - in - case tmp_resu of - OpFail _ -> cur_tbl - OpGood (upd_nl, new_inst, pri_idx, sec_idx) -> - let tgt_idx = Instance.idx target - upd_cvar = compCV upd_nl - upd_il = Container.add tgt_idx new_inst ini_il - upd_plc = (tgt_idx, pri_idx, sec_idx, move, upd_cvar):ini_plc - upd_tbl = Table upd_nl upd_il upd_cvar upd_plc - in - compareTables cur_tbl upd_tbl + let Table ini_nl ini_il _ ini_plc = ini_tbl + tmp_resu = applyMove ini_nl target move + in case tmp_resu of + OpFail _ -> cur_tbl + OpGood (upd_nl, new_inst, pri_idx, sec_idx) -> + let tgt_idx = Instance.idx target + upd_cvar = compCV upd_nl + upd_il = Container.add tgt_idx new_inst ini_il + upd_plc = (tgt_idx, pri_idx, sec_idx, move, upd_cvar):ini_plc + upd_tbl = Table upd_nl upd_il upd_cvar upd_plc + in compareTables cur_tbl upd_tbl -- | Given the status of the current secondary as a valid new node and -- the current candidate target node, generate the possible moves for @@ -496,17 +511,19 @@ possibleMoves :: Bool -- ^ Whether the secondary node is a valid new node -> [IMove] -- ^ List of valid result moves possibleMoves _ False tdx = - [ReplaceSecondary tdx] + [ReplaceSecondary tdx] possibleMoves True True tdx = - [ReplaceSecondary tdx, - ReplaceAndFailover tdx, - ReplacePrimary tdx, - FailoverAndReplace tdx] + [ ReplaceSecondary tdx + , ReplaceAndFailover tdx + , ReplacePrimary tdx + , FailoverAndReplace tdx + ] possibleMoves False True tdx = - [ReplaceSecondary tdx, - ReplaceAndFailover tdx] + [ ReplaceSecondary tdx + , ReplaceAndFailover tdx + ] -- | Compute the best move for a given instance. checkInstanceMove :: [Ndx] -- ^ Allowed target node indices @@ -516,17 +533,17 @@ checkInstanceMove :: [Ndx] -- ^ Allowed target node indices -> Instance.Instance -- ^ Instance to move -> Table -- ^ Best new table for this instance checkInstanceMove nodes_idx disk_moves inst_moves ini_tbl target = - let - opdx = Instance.pNode target - osdx = Instance.sNode target - nodes = filter (\idx -> idx /= opdx && idx /= osdx) nodes_idx - use_secondary = elem osdx nodes_idx && inst_moves - aft_failover = if use_secondary -- if allowed to failover + let opdx = Instance.pNode target + osdx = Instance.sNode target + bad_nodes = [opdx, osdx] + nodes = filter (`notElem` bad_nodes) nodes_idx + use_secondary = elem osdx nodes_idx && inst_moves + aft_failover = if use_secondary -- if allowed to failover then checkSingleStep ini_tbl target ini_tbl Failover else ini_tbl - all_moves = if disk_moves + all_moves = if disk_moves then concatMap - (possibleMoves use_secondary inst_moves) nodes + (possibleMoves use_secondary inst_moves) nodes else [] in -- iterate over the possible nodes for this instance @@ -540,19 +557,19 @@ checkMove :: [Ndx] -- ^ Allowed target node indices -> [Instance.Instance] -- ^ List of instances still to move -> Table -- ^ The new solution checkMove nodes_idx disk_moves inst_moves ini_tbl victims = - let Table _ _ _ ini_plc = ini_tbl - -- we're using rwhnf from the Control.Parallel.Strategies - -- package; we don't need to use rnf as that would force too - -- much evaluation in single-threaded cases, and in - -- multi-threaded case the weak head normal form is enough to - -- spark the evaluation - tables = parMap rwhnf (checkInstanceMove nodes_idx disk_moves - inst_moves ini_tbl) - victims - -- iterate over all instances, computing the best move - best_tbl = foldl' compareTables ini_tbl tables - Table _ _ _ best_plc = best_tbl - in if length best_plc == length ini_plc + let Table _ _ _ ini_plc = ini_tbl + -- we're using rwhnf from the Control.Parallel.Strategies + -- package; we don't need to use rnf as that would force too + -- much evaluation in single-threaded cases, and in + -- multi-threaded case the weak head normal form is enough to + -- spark the evaluation + tables = parMap rwhnf (checkInstanceMove nodes_idx disk_moves + inst_moves ini_tbl) + victims + -- iterate over all instances, computing the best move + best_tbl = foldl' compareTables ini_tbl tables + Table _ _ _ best_plc = best_tbl + in if length best_plc == length ini_plc then ini_tbl -- no advancement else best_tbl @@ -562,9 +579,9 @@ doNextBalance :: Table -- ^ The starting table -> Score -- ^ Score at which to stop -> Bool -- ^ The resulting table and commands doNextBalance ini_tbl max_rounds min_score = - let Table _ _ ini_cv ini_plc = ini_tbl - ini_plc_len = length ini_plc - in (max_rounds < 0 || ini_plc_len < max_rounds) && ini_cv > min_score + let Table _ _ ini_cv ini_plc = ini_tbl + ini_plc_len = length ini_plc + in (max_rounds < 0 || ini_plc_len < max_rounds) && ini_cv > min_score -- | Run a balance move. tryBalance :: Table -- ^ The starting table @@ -608,7 +625,7 @@ bestAllocElement :: Maybe Node.AllocElement bestAllocElement a Nothing = a bestAllocElement Nothing b = b bestAllocElement a@(Just (_, _, _, ascore)) b@(Just (_, _, _, bscore)) = - if ascore < bscore then a else b + if ascore < bscore then a else b -- | Update current Allocation solution and failure stats with new -- elements. @@ -616,30 +633,30 @@ concatAllocs :: AllocSolution -> OpResult Node.AllocElement -> AllocSolution concatAllocs as (OpFail reason) = as { asFailures = reason : asFailures as } concatAllocs as (OpGood ns) = - let -- Choose the old or new solution, based on the cluster score - cntok = asAllocs as - osols = asSolution as - nsols = bestAllocElement osols (Just ns) - nsuc = cntok + 1 + let -- Choose the old or new solution, based on the cluster score + cntok = asAllocs as + osols = asSolution as + nsols = bestAllocElement osols (Just ns) + nsuc = cntok + 1 -- Note: we force evaluation of nsols here in order to keep the -- memory profile low - we know that we will need nsols for sure -- in the next cycle, so we force evaluation of nsols, since the -- foldl' in the caller will only evaluate the tuple, but not the -- elements of the tuple - in nsols `seq` nsuc `seq` as { asAllocs = nsuc, asSolution = nsols } + in nsols `seq` nsuc `seq` as { asAllocs = nsuc, asSolution = nsols } -- | Sums two 'AllocSolution' structures. sumAllocs :: AllocSolution -> AllocSolution -> AllocSolution sumAllocs (AllocSolution aFails aAllocs aSols aLog) (AllocSolution bFails bAllocs bSols bLog) = - -- note: we add b first, since usually it will be smaller; when - -- fold'ing, a will grow and grow whereas b is the per-group - -- result, hence smaller - let nFails = bFails ++ aFails - nAllocs = aAllocs + bAllocs - nSols = bestAllocElement aSols bSols - nLog = bLog ++ aLog - in AllocSolution nFails nAllocs nSols nLog + -- note: we add b first, since usually it will be smaller; when + -- fold'ing, a will grow and grow whereas b is the per-group + -- result, hence smaller + let nFails = bFails ++ aFails + nAllocs = aAllocs + bAllocs + nSols = bestAllocElement aSols bSols + nLog = bLog ++ aLog + in AllocSolution nFails nAllocs nSols nLog -- | Given a solution, generates a reasonable description for it. describeSolution :: AllocSolution -> String @@ -667,7 +684,7 @@ annotateSolution as = as { asLog = describeSolution as : asLog as } -- for proper jobset execution, we should reverse all lists. reverseEvacSolution :: EvacSolution -> EvacSolution reverseEvacSolution (EvacSolution f m o) = - EvacSolution (reverse f) (reverse m) (reverse o) + EvacSolution (reverse f) (reverse m) (reverse o) -- | Generate the valid node allocation singles or pairs for a new instance. genAllocNodes :: Group.List -- ^ Group list @@ -677,20 +694,20 @@ genAllocNodes :: Group.List -- ^ Group list -- unallocable nodes -> Result AllocNodes -- ^ The (monadic) result genAllocNodes gl nl count drop_unalloc = - let filter_fn = if drop_unalloc + let filter_fn = if drop_unalloc then filter (Group.isAllocable . flip Container.find gl . Node.group) else id - all_nodes = filter_fn $ getOnline nl - all_pairs = [(Node.idx p, - [Node.idx s | s <- all_nodes, - Node.idx p /= Node.idx s, - Node.group p == Node.group s]) | - p <- all_nodes] - in case count of - 1 -> Ok (Left (map Node.idx all_nodes)) - 2 -> Ok (Right (filter (not . null . snd) all_pairs)) - _ -> Bad "Unsupported number of nodes, only one or two supported" + all_nodes = filter_fn $ getOnline nl + all_pairs = [(Node.idx p, + [Node.idx s | s <- all_nodes, + Node.idx p /= Node.idx s, + Node.group p == Node.group s]) | + p <- all_nodes] + in case count of + 1 -> Ok (Left (map Node.idx all_nodes)) + 2 -> Ok (Right (filter (not . null . snd) all_pairs)) + _ -> Bad "Unsupported number of nodes, only one or two supported" -- | Try to allocate an instance on the cluster. tryAlloc :: (Monad m) => @@ -701,20 +718,20 @@ tryAlloc :: (Monad m) => -> m AllocSolution -- ^ Possible solution list tryAlloc _ _ _ (Right []) = fail "Not enough online nodes" tryAlloc nl _ inst (Right ok_pairs) = - let psols = parMap rwhnf (\(p, ss) -> - foldl' (\cstate -> - concatAllocs cstate . - allocateOnPair nl inst p) - emptyAllocSolution ss) ok_pairs - sols = foldl' sumAllocs emptyAllocSolution psols - in return $ annotateSolution sols + let psols = parMap rwhnf (\(p, ss) -> + foldl' (\cstate -> + concatAllocs cstate . + allocateOnPair nl inst p) + emptyAllocSolution ss) ok_pairs + sols = foldl' sumAllocs emptyAllocSolution psols + in return $ annotateSolution sols tryAlloc _ _ _ (Left []) = fail "No online nodes" tryAlloc nl _ inst (Left all_nodes) = - let sols = foldl' (\cstate -> - concatAllocs cstate . allocateOnSingle nl inst - ) emptyAllocSolution all_nodes - in return $ annotateSolution sols + let sols = foldl' (\cstate -> + concatAllocs cstate . allocateOnSingle nl inst + ) emptyAllocSolution all_nodes + in return $ annotateSolution sols -- | Given a group/result, describe it as a nice (list of) messages. solutionDescription :: Group.List -> (Gdx, Result AllocSolution) -> [String] @@ -724,7 +741,7 @@ solutionDescription gl (groupId, result) = Bad message -> [printf "Group %s: error %s" gname message] where grp = Container.find groupId gl gname = Group.name grp - pol = allocPolicyToString (Group.allocPolicy grp) + pol = allocPolicyToRaw (Group.allocPolicy grp) -- | From a list of possibly bad and possibly empty solutions, filter -- only the groups with a valid result. Note that the result will be @@ -733,23 +750,23 @@ filterMGResults :: Group.List -> [(Gdx, Result AllocSolution)] -> [(Gdx, AllocSolution)] filterMGResults gl = foldl' fn [] - where unallocable = not . Group.isAllocable . flip Container.find gl - fn accu (gdx, rasol) = - case rasol of - Bad _ -> accu - Ok sol | isNothing (asSolution sol) -> accu - | unallocable gdx -> accu - | otherwise -> (gdx, sol):accu + where unallocable = not . Group.isAllocable . flip Container.find gl + fn accu (gdx, rasol) = + case rasol of + Bad _ -> accu + Ok sol | isNothing (asSolution sol) -> accu + | unallocable gdx -> accu + | otherwise -> (gdx, sol):accu -- | Sort multigroup results based on policy and score. sortMGResults :: Group.List -> [(Gdx, AllocSolution)] -> [(Gdx, AllocSolution)] sortMGResults gl sols = - let extractScore (_, _, _, x) = x - solScore (gdx, sol) = (Group.allocPolicy (Container.find gdx gl), - (extractScore . fromJust . asSolution) sol) - in sortBy (comparing solScore) sols + let extractScore (_, _, _, x) = x + solScore (gdx, sol) = (Group.allocPolicy (Container.find gdx gl), + (extractScore . fromJust . asSolution) sol) + in sortBy (comparing solScore) sols -- | Finds the best group for an instance on a multi-group cluster. -- @@ -776,9 +793,13 @@ findBestAllocGroup mggl mgnl mgil allowed_gdxs inst cnt = goodSols = filterMGResults mggl sols sortedSols = sortMGResults mggl goodSols in if null sortedSols - then Bad $ intercalate ", " all_msgs - else let (final_group, final_sol) = head sortedSols - in return (final_group, final_sol, all_msgs) + then if null groups' + then Bad $ "no groups for evacuation: allowed groups was" ++ + show allowed_gdxs ++ ", all groups: " ++ + show (map fst groups) + else Bad $ intercalate ", " all_msgs + else let (final_group, final_sol) = head sortedSols + in return (final_group, final_sol, all_msgs) -- | Try to allocate an instance on a multi-group cluster. tryMGAlloc :: Group.List -- ^ The group list @@ -794,34 +815,6 @@ tryMGAlloc mggl mgnl mgil inst cnt = do selmsg = "Selected group: " ++ group_name return $ solution { asLog = selmsg:all_msgs } --- | Try to relocate an instance on the cluster. -tryReloc :: (Monad m) => - Node.List -- ^ The node list - -> Instance.List -- ^ The instance list - -> Idx -- ^ The index of the instance to move - -> Int -- ^ The number of nodes required - -> [Ndx] -- ^ Nodes which should not be used - -> m AllocSolution -- ^ Solution list -tryReloc nl il xid 1 ex_idx = - let all_nodes = getOnline nl - inst = Container.find xid il - ex_idx' = Instance.pNode inst:ex_idx - valid_nodes = filter (not . flip elem ex_idx' . Node.idx) all_nodes - valid_idxes = map Node.idx valid_nodes - sols1 = foldl' (\cstate x -> - let em = do - (mnl, i, _, _) <- - applyMove nl inst (ReplaceSecondary x) - return (mnl, i, [Container.find x mnl], - compCV mnl) - in concatAllocs cstate em - ) emptyAllocSolution valid_idxes - in return sols1 - -tryReloc _ _ _ reqn _ = fail $ "Unsupported number of relocation \ - \destinations required (" ++ show reqn ++ - "), only one supported" - -- | Function which fails if the requested mode is change secondary. -- -- This is useful since except DRBD, no other disk template can @@ -830,7 +823,7 @@ tryReloc _ _ _ reqn _ = fail $ "Unsupported number of relocation \ -- this function, whatever mode we have is just a primary change. failOnSecondaryChange :: (Monad m) => EvacMode -> DiskTemplate -> m () failOnSecondaryChange ChangeSecondary dt = - fail $ "Instances with disk template '" ++ diskTemplateToString dt ++ + fail $ "Instances with disk template '" ++ diskTemplateToRaw dt ++ "' can't execute change secondary" failOnSecondaryChange _ _ = return () @@ -871,6 +864,11 @@ nodeEvacInstance _ _ mode (Instance.Instance failOnSecondaryChange mode dt >> fail "Block device relocations not implemented yet" +nodeEvacInstance _ _ mode (Instance.Instance + {Instance.diskTemplate = dt@DTRbd}) _ _ = + failOnSecondaryChange mode dt >> + fail "Rbd relocations not implemented yet" + nodeEvacInstance nl il ChangePrimary inst@(Instance.Instance {Instance.diskTemplate = DTDrbd8}) _ _ = @@ -953,26 +951,26 @@ evacDrbdSecondaryInner :: Node.List -- ^ Cluster node list , Score , Ndx) -- ^ New best solution evacDrbdSecondaryInner nl inst gdx accu ndx = - case applyMove nl inst (ReplaceSecondary ndx) of - OpFail fm -> - case accu of - Right _ -> accu - Left _ -> Left $ "Node " ++ Container.nameOf nl ndx ++ - " failed: " ++ show fm - OpGood (nl', inst', _, _) -> - let nodes = Container.elems nl' - -- The fromJust below is ugly (it can fail nastily), but - -- at this point we should have any internal mismatches, - -- and adding a monad here would be quite involved - grpnodes = fromJust (gdx `lookup` Node.computeGroups nodes) - new_cv = compCVNodes grpnodes - new_accu = Right (nl', inst', new_cv, ndx) - in case accu of - Left _ -> new_accu - Right (_, _, old_cv, _) -> - if old_cv < new_cv - then accu - else new_accu + case applyMove nl inst (ReplaceSecondary ndx) of + OpFail fm -> + case accu of + Right _ -> accu + Left _ -> Left $ "Node " ++ Container.nameOf nl ndx ++ + " failed: " ++ show fm + OpGood (nl', inst', _, _) -> + let nodes = Container.elems nl' + -- The fromJust below is ugly (it can fail nastily), but + -- at this point we should have any internal mismatches, + -- and adding a monad here would be quite involved + grpnodes = fromJust (gdx `lookup` Node.computeGroups nodes) + new_cv = compCVNodes grpnodes + new_accu = Right (nl', inst', new_cv, ndx) + in case accu of + Left _ -> new_accu + Right (_, _, old_cv, _) -> + if old_cv < new_cv + then accu + else new_accu -- | Compute result of changing all nodes of a DRBD instance. -- @@ -991,48 +989,47 @@ evacDrbdAllInner :: Node.List -- ^ Cluster node list -> (Ndx, Ndx) -- ^ Tuple of new -- primary\/secondary nodes -> Result (Node.List, Instance.List, [OpCodes.OpCode], Score) -evacDrbdAllInner nl il inst gdx (t_pdx, t_sdx) = - do - let primary = Container.find (Instance.pNode inst) nl - idx = Instance.idx inst - -- if the primary is offline, then we first failover - (nl1, inst1, ops1) <- - if Node.offline primary - then do - (nl', inst', _, _) <- - annotateResult "Failing over to the secondary" $ - opToResult $ applyMove nl inst Failover - return (nl', inst', [Failover]) - else return (nl, inst, []) - let (o1, o2, o3) = (ReplaceSecondary t_pdx, - Failover, - ReplaceSecondary t_sdx) - -- we now need to execute a replace secondary to the future - -- primary node - (nl2, inst2, _, _) <- - annotateResult "Changing secondary to new primary" $ - opToResult $ - applyMove nl1 inst1 o1 - let ops2 = o1:ops1 - -- we now execute another failover, the primary stays fixed now - (nl3, inst3, _, _) <- annotateResult "Failing over to new primary" $ - opToResult $ applyMove nl2 inst2 o2 - let ops3 = o2:ops2 - -- and finally another replace secondary, to the final secondary - (nl4, inst4, _, _) <- - annotateResult "Changing secondary to final secondary" $ - opToResult $ - applyMove nl3 inst3 o3 - let ops4 = o3:ops3 - il' = Container.add idx inst4 il - ops = concatMap (iMoveToJob nl4 il' idx) $ reverse ops4 - let nodes = Container.elems nl4 - -- The fromJust below is ugly (it can fail nastily), but - -- at this point we should have any internal mismatches, - -- and adding a monad here would be quite involved - grpnodes = fromJust (gdx `lookup` Node.computeGroups nodes) - new_cv = compCVNodes grpnodes - return (nl4, il', ops, new_cv) +evacDrbdAllInner nl il inst gdx (t_pdx, t_sdx) = do + let primary = Container.find (Instance.pNode inst) nl + idx = Instance.idx inst + -- if the primary is offline, then we first failover + (nl1, inst1, ops1) <- + if Node.offline primary + then do + (nl', inst', _, _) <- + annotateResult "Failing over to the secondary" $ + opToResult $ applyMove nl inst Failover + return (nl', inst', [Failover]) + else return (nl, inst, []) + let (o1, o2, o3) = (ReplaceSecondary t_pdx, + Failover, + ReplaceSecondary t_sdx) + -- we now need to execute a replace secondary to the future + -- primary node + (nl2, inst2, _, _) <- + annotateResult "Changing secondary to new primary" $ + opToResult $ + applyMove nl1 inst1 o1 + let ops2 = o1:ops1 + -- we now execute another failover, the primary stays fixed now + (nl3, inst3, _, _) <- annotateResult "Failing over to new primary" $ + opToResult $ applyMove nl2 inst2 o2 + let ops3 = o2:ops2 + -- and finally another replace secondary, to the final secondary + (nl4, inst4, _, _) <- + annotateResult "Changing secondary to final secondary" $ + opToResult $ + applyMove nl3 inst3 o3 + let ops4 = o3:ops3 + il' = Container.add idx inst4 il + ops = concatMap (iMoveToJob nl4 il' idx) $ reverse ops4 + let nodes = Container.elems nl4 + -- The fromJust below is ugly (it can fail nastily), but + -- at this point we should have any internal mismatches, + -- and adding a monad here would be quite involved + grpnodes = fromJust (gdx `lookup` Node.computeGroups nodes) + new_cv = compCVNodes grpnodes + return (nl4, il', ops, new_cv) -- | Computes the nodes in a given group which are available for -- allocation. @@ -1054,14 +1051,14 @@ updateEvacSolution :: (Node.List, Instance.List, EvacSolution) -> Result (Node.List, Instance.List, [OpCodes.OpCode]) -> (Node.List, Instance.List, EvacSolution) updateEvacSolution (nl, il, es) idx (Bad msg) = - (nl, il, es { esFailed = (idx, msg):esFailed es}) + (nl, il, es { esFailed = (idx, msg):esFailed es}) updateEvacSolution (_, _, es) idx (Ok (nl, il, opcodes)) = - (nl, il, es { esMoved = new_elem:esMoved es - , esOpCodes = opcodes:esOpCodes es }) - where inst = Container.find idx il - new_elem = (idx, - instancePriGroup nl inst, - Instance.allNodes inst) + (nl, il, es { esMoved = new_elem:esMoved es + , esOpCodes = opcodes:esOpCodes es }) + where inst = Container.find idx il + new_elem = (idx, + instancePriGroup nl inst, + Instance.allNodes inst) -- | Node-evacuation IAllocator mode main function. tryNodeEvac :: Group.List -- ^ The cluster groups @@ -1071,24 +1068,24 @@ tryNodeEvac :: Group.List -- ^ The cluster groups -> [Idx] -- ^ List of instance (indices) to be evacuated -> Result (Node.List, Instance.List, EvacSolution) tryNodeEvac _ ini_nl ini_il mode idxs = - let evac_ndx = nodesToEvacuate ini_il mode idxs - offline = map Node.idx . filter Node.offline $ Container.elems ini_nl - excl_ndx = foldl' (flip IntSet.insert) evac_ndx offline - group_ndx = map (\(gdx, (nl, _)) -> (gdx, map Node.idx - (Container.elems nl))) $ - splitCluster ini_nl ini_il - (fin_nl, fin_il, esol) = - foldl' (\state@(nl, il, _) inst -> - let gdx = instancePriGroup nl inst - pdx = Instance.pNode inst in - updateEvacSolution state (Instance.idx inst) $ - availableGroupNodes group_ndx - (IntSet.insert pdx excl_ndx) gdx >>= - nodeEvacInstance nl il mode inst gdx - ) - (ini_nl, ini_il, emptyEvacSolution) - (map (`Container.find` ini_il) idxs) - in return (fin_nl, fin_il, reverseEvacSolution esol) + let evac_ndx = nodesToEvacuate ini_il mode idxs + offline = map Node.idx . filter Node.offline $ Container.elems ini_nl + excl_ndx = foldl' (flip IntSet.insert) evac_ndx offline + group_ndx = map (\(gdx, (nl, _)) -> (gdx, map Node.idx + (Container.elems nl))) $ + splitCluster ini_nl ini_il + (fin_nl, fin_il, esol) = + foldl' (\state@(nl, il, _) inst -> + let gdx = instancePriGroup nl inst + pdx = Instance.pNode inst in + updateEvacSolution state (Instance.idx inst) $ + availableGroupNodes group_ndx + (IntSet.insert pdx excl_ndx) gdx >>= + nodeEvacInstance nl il mode inst gdx + ) + (ini_nl, ini_il, emptyEvacSolution) + (map (`Container.find` ini_il) idxs) + in return (fin_nl, fin_il, reverseEvacSolution esol) -- | Change-group IAllocator mode main function. -- @@ -1117,82 +1114,74 @@ tryChangeGroup :: Group.List -- ^ The cluster groups -> [Idx] -- ^ List of instance (indices) to be evacuated -> Result (Node.List, Instance.List, EvacSolution) tryChangeGroup gl ini_nl ini_il gdxs idxs = - let evac_gdxs = nub $ map (instancePriGroup ini_nl . - flip Container.find ini_il) idxs - target_gdxs = (if null gdxs + let evac_gdxs = nub $ map (instancePriGroup ini_nl . + flip Container.find ini_il) idxs + target_gdxs = (if null gdxs then Container.keys gl else gdxs) \\ evac_gdxs - offline = map Node.idx . filter Node.offline $ Container.elems ini_nl - excl_ndx = foldl' (flip IntSet.insert) IntSet.empty offline - group_ndx = map (\(gdx, (nl, _)) -> (gdx, map Node.idx - (Container.elems nl))) $ - splitCluster ini_nl ini_il - (fin_nl, fin_il, esol) = - foldl' (\state@(nl, il, _) inst -> - let solution = do - let ncnt = Instance.requiredNodes $ - Instance.diskTemplate inst - (gdx, _, _) <- findBestAllocGroup gl nl il - (Just target_gdxs) inst ncnt - av_nodes <- availableGroupNodes group_ndx - excl_ndx gdx - nodeEvacInstance nl il ChangeAll inst - gdx av_nodes - in updateEvacSolution state - (Instance.idx inst) solution - ) - (ini_nl, ini_il, emptyEvacSolution) - (map (`Container.find` ini_il) idxs) - in return (fin_nl, fin_il, reverseEvacSolution esol) - --- | Recursively place instances on the cluster until we're out of space. -iterateAlloc :: Node.List - -> Instance.List - -> Maybe Int - -> Instance.Instance - -> AllocNodes - -> [Instance.Instance] - -> [CStats] - -> Result AllocResult + offline = map Node.idx . filter Node.offline $ Container.elems ini_nl + excl_ndx = foldl' (flip IntSet.insert) IntSet.empty offline + group_ndx = map (\(gdx, (nl, _)) -> (gdx, map Node.idx + (Container.elems nl))) $ + splitCluster ini_nl ini_il + (fin_nl, fin_il, esol) = + foldl' (\state@(nl, il, _) inst -> + let solution = do + let ncnt = Instance.requiredNodes $ + Instance.diskTemplate inst + (gdx, _, _) <- findBestAllocGroup gl nl il + (Just target_gdxs) inst ncnt + av_nodes <- availableGroupNodes group_ndx + excl_ndx gdx + nodeEvacInstance nl il ChangeAll inst gdx av_nodes + in updateEvacSolution state (Instance.idx inst) solution + ) + (ini_nl, ini_il, emptyEvacSolution) + (map (`Container.find` ini_il) idxs) + in return (fin_nl, fin_il, reverseEvacSolution esol) + +-- | Standard-sized allocation method. +-- +-- This places instances of the same size on the cluster until we're +-- out of space. The result will be a list of identically-sized +-- instances. +iterateAlloc :: AllocMethod iterateAlloc nl il limit newinst allocnodes ixes cstats = - let depth = length ixes - newname = printf "new-%d" depth::String - newidx = length (Container.elems il) + depth - newi2 = Instance.setIdx (Instance.setName newinst newname) newidx - newlimit = fmap (flip (-) 1) limit - in case tryAlloc nl il newi2 allocnodes of - Bad s -> Bad s - Ok (AllocSolution { asFailures = errs, asSolution = sols3 }) -> - let newsol = Ok (collapseFailures errs, nl, il, ixes, cstats) in - case sols3 of - Nothing -> newsol - Just (xnl, xi, _, _) -> - if limit == Just 0 - then newsol - else iterateAlloc xnl (Container.add newidx xi il) - newlimit newinst allocnodes (xi:ixes) - (totalResources xnl:cstats) - --- | The core of the tiered allocation mode. -tieredAlloc :: Node.List - -> Instance.List - -> Maybe Int - -> Instance.Instance - -> AllocNodes - -> [Instance.Instance] - -> [CStats] - -> Result AllocResult + let depth = length ixes + newname = printf "new-%d" depth::String + newidx = Container.size il + newi2 = Instance.setIdx (Instance.setName newinst newname) newidx + newlimit = fmap (flip (-) 1) limit + in case tryAlloc nl il newi2 allocnodes of + Bad s -> Bad s + Ok (AllocSolution { asFailures = errs, asSolution = sols3 }) -> + let newsol = Ok (collapseFailures errs, nl, il, ixes, cstats) in + case sols3 of + Nothing -> newsol + Just (xnl, xi, _, _) -> + if limit == Just 0 + then newsol + else iterateAlloc xnl (Container.add newidx xi il) + newlimit newinst allocnodes (xi:ixes) + (totalResources xnl:cstats) + +-- | Tiered allocation method. +-- +-- This places instances on the cluster, and decreases the spec until +-- we can allocate again. The result will be a list of decreasing +-- instance specs. +tieredAlloc :: AllocMethod tieredAlloc nl il limit newinst allocnodes ixes cstats = - case iterateAlloc nl il limit newinst allocnodes ixes cstats of - Bad s -> Bad s - Ok (errs, nl', il', ixes', cstats') -> - let newsol = Ok (errs, nl', il', ixes', cstats') - ixes_cnt = length ixes' - (stop, newlimit) = case limit of - Nothing -> (False, Nothing) - Just n -> (n <= ixes_cnt, - Just (n - ixes_cnt)) in - if stop then newsol else + case iterateAlloc nl il limit newinst allocnodes ixes cstats of + Bad s -> Bad s + Ok (errs, nl', il', ixes', cstats') -> + let newsol = Ok (errs, nl', il', ixes', cstats') + ixes_cnt = length ixes' + (stop, newlimit) = case limit of + Nothing -> (False, Nothing) + Just n -> (n <= ixes_cnt, + Just (n - ixes_cnt)) in + if stop then newsol else case Instance.shrinkByType newinst . fst . last $ sortBy (comparing snd) errs of Bad _ -> newsol @@ -1213,15 +1202,15 @@ computeMoves :: Instance.Instance -- ^ The instance to be moved -- secondary, while the command list holds gnt-instance -- commands (without that prefix), e.g \"@failover instance1@\" computeMoves i inam mv c d = - case mv of - Failover -> ("f", [mig]) - FailoverAndReplace _ -> (printf "f r:%s" d, [mig, rep d]) - ReplaceSecondary _ -> (printf "r:%s" d, [rep d]) - ReplaceAndFailover _ -> (printf "r:%s f" c, [rep c, mig]) - ReplacePrimary _ -> (printf "f r:%s f" c, [mig, rep c, mig]) - where morf = if Instance.running i then "migrate" else "failover" - mig = printf "%s -f %s" morf inam::String - rep n = printf "replace-disks -n %s %s" n inam + case mv of + Failover -> ("f", [mig]) + FailoverAndReplace _ -> (printf "f r:%s" d, [mig, rep d]) + ReplaceSecondary _ -> (printf "r:%s" d, [rep d]) + ReplaceAndFailover _ -> (printf "r:%s f" c, [rep c, mig]) + ReplacePrimary _ -> (printf "f r:%s f" c, [mig, rep c, mig]) + where morf = if Instance.instanceRunning i then "migrate" else "failover" + mig = printf "%s -f %s" morf inam::String + rep n = printf "replace-disks -n %s %s" n inam -- | Converts a placement to string format. printSolutionLine :: Node.List -- ^ The node list @@ -1233,23 +1222,21 @@ printSolutionLine :: Node.List -- ^ The node list -- the solution -> (String, [String]) printSolutionLine nl il nmlen imlen plc pos = - let - pmlen = (2*nmlen + 1) - (i, p, s, mv, c) = plc - inst = Container.find i il - inam = Instance.alias inst - npri = Node.alias $ Container.find p nl - nsec = Node.alias $ Container.find s nl - opri = Node.alias $ Container.find (Instance.pNode inst) nl - osec = Node.alias $ Container.find (Instance.sNode inst) nl - (moves, cmds) = computeMoves inst inam mv npri nsec - ostr = printf "%s:%s" opri osec::String - nstr = printf "%s:%s" npri nsec::String - in - (printf " %3d. %-*s %-*s => %-*s %.8f a=%s" - pos imlen inam pmlen ostr - pmlen nstr c moves, - cmds) + let pmlen = (2*nmlen + 1) + (i, p, s, mv, c) = plc + inst = Container.find i il + inam = Instance.alias inst + npri = Node.alias $ Container.find p nl + nsec = Node.alias $ Container.find s nl + opri = Node.alias $ Container.find (Instance.pNode inst) nl + osec = Node.alias $ Container.find (Instance.sNode inst) nl + (moves, cmds) = computeMoves inst inam mv npri nsec + ostr = printf "%s:%s" opri osec::String + nstr = printf "%s:%s" npri nsec::String + in (printf " %3d. %-*s %-*s => %-*s %.8f a=%s" + pos imlen inam pmlen ostr + pmlen nstr c moves, + cmds) -- | Return the instance and involved nodes in an instance move. -- @@ -1265,17 +1252,17 @@ involvedNodes :: Instance.List -- ^ Instance list, used for retrieving -- instance index -> [Ndx] -- ^ Resulting list of node indices involvedNodes il plc = - let (i, np, ns, _, _) = plc - inst = Container.find i il - in nub $ [np, ns] ++ Instance.allNodes inst + let (i, np, ns, _, _) = plc + inst = Container.find i il + in nub $ [np, ns] ++ Instance.allNodes inst -- | Inner function for splitJobs, that either appends the next job to -- the current jobset, or starts a new jobset. mergeJobs :: ([JobSet], [Ndx]) -> MoveJob -> ([JobSet], [Ndx]) mergeJobs ([], _) n@(ndx, _, _, _) = ([[n]], ndx) mergeJobs (cjs@(j:js), nbuf) n@(ndx, _, _, _) - | null (ndx `intersect` nbuf) = ((n:j):js, ndx ++ nbuf) - | otherwise = ([n]:cjs, ndx) + | null (ndx `intersect` nbuf) = ((n:j):js, ndx ++ nbuf) + | otherwise = ([n]:cjs, ndx) -- | Break a list of moves into independent groups. Note that this -- will reverse the order of jobs. @@ -1286,11 +1273,11 @@ splitJobs = fst . foldl mergeJobs ([], []) -- also beautify the display a little. formatJob :: Int -> Int -> (Int, MoveJob) -> [String] formatJob jsn jsl (sn, (_, _, _, cmds)) = - let out = - printf " echo job %d/%d" jsn sn: - printf " check": - map (" gnt-instance " ++) cmds - in if sn == 1 + let out = + printf " echo job %d/%d" jsn sn: + printf " check": + map (" gnt-instance " ++) cmds + in if sn == 1 then ["", printf "echo jobset %d, %d jobs" jsn jsl] ++ out else out @@ -1298,59 +1285,64 @@ formatJob jsn jsl (sn, (_, _, _, cmds)) = -- also beautify the display a little. formatCmds :: [JobSet] -> String formatCmds = - unlines . - concatMap (\(jsn, js) -> concatMap (formatJob jsn (length js)) - (zip [1..] js)) . - zip [1..] + unlines . + concatMap (\(jsn, js) -> concatMap (formatJob jsn (length js)) + (zip [1..] js)) . + zip [1..] -- | Print the node list. printNodes :: Node.List -> [String] -> String printNodes nl fs = - let fields = case fs of - [] -> Node.defaultFields - "+":rest -> Node.defaultFields ++ rest - _ -> fs - snl = sortBy (comparing Node.idx) (Container.elems nl) - (header, isnum) = unzip $ map Node.showHeader fields - in unlines . map ((:) ' ' . intercalate " ") $ - formatTable (header:map (Node.list fields) snl) isnum + let fields = case fs of + [] -> Node.defaultFields + "+":rest -> Node.defaultFields ++ rest + _ -> fs + snl = sortBy (comparing Node.idx) (Container.elems nl) + (header, isnum) = unzip $ map Node.showHeader fields + in unlines . map ((:) ' ' . unwords) $ + formatTable (header:map (Node.list fields) snl) isnum -- | Print the instance list. printInsts :: Node.List -> Instance.List -> String printInsts nl il = - let sil = sortBy (comparing Instance.idx) (Container.elems il) - helper inst = [ if Instance.running inst then "R" else " " - , Instance.name inst - , Container.nameOf nl (Instance.pNode inst) - , let sdx = Instance.sNode inst - in if sdx == Node.noSecondary + let sil = sortBy (comparing Instance.idx) (Container.elems il) + helper inst = [ if Instance.instanceRunning inst then "R" else " " + , Instance.name inst + , Container.nameOf nl (Instance.pNode inst) + , let sdx = Instance.sNode inst + in if sdx == Node.noSecondary then "" else Container.nameOf nl sdx - , if Instance.autoBalance inst then "Y" else "N" - , printf "%3d" $ Instance.vcpus inst - , printf "%5d" $ Instance.mem inst - , printf "%5d" $ Instance.dsk inst `div` 1024 - , printf "%5.3f" lC - , printf "%5.3f" lM - , printf "%5.3f" lD - , printf "%5.3f" lN - ] - where DynUtil lC lM lD lN = Instance.util inst - header = [ "F", "Name", "Pri_node", "Sec_node", "Auto_bal" - , "vcpu", "mem" , "dsk", "lCpu", "lMem", "lDsk", "lNet" ] - isnum = False:False:False:False:False:repeat True - in unlines . map ((:) ' ' . intercalate " ") $ - formatTable (header:map helper sil) isnum + , if Instance.autoBalance inst then "Y" else "N" + , printf "%3d" $ Instance.vcpus inst + , printf "%5d" $ Instance.mem inst + , printf "%5d" $ Instance.dsk inst `div` 1024 + , printf "%5.3f" lC + , printf "%5.3f" lM + , printf "%5.3f" lD + , printf "%5.3f" lN + ] + where DynUtil lC lM lD lN = Instance.util inst + header = [ "F", "Name", "Pri_node", "Sec_node", "Auto_bal" + , "vcpu", "mem" , "dsk", "lCpu", "lMem", "lDsk", "lNet" ] + isnum = False:False:False:False:False:repeat True + in unlines . map ((:) ' ' . unwords) $ + formatTable (header:map helper sil) isnum -- | Shows statistics for a given node list. -printStats :: Node.List -> String -printStats nl = - let dcvs = compDetailedCV $ Container.elems nl - (weights, names) = unzip detailedCVInfo - hd = zip3 (weights ++ repeat 1) (names ++ repeat "unknown") dcvs - formatted = map (\(w, header, val) -> - printf "%s=%.8f(x%.2f)" header val w::String) hd - in intercalate ", " formatted +printStats :: String -> Node.List -> String +printStats lp nl = + let dcvs = compDetailedCV $ Container.elems nl + (weights, names) = unzip detailedCVInfo + hd = zip3 (weights ++ repeat 1) (names ++ repeat "unknown") dcvs + header = [ "Field", "Value", "Weight" ] + formatted = map (\(w, h, val) -> + [ h + , printf "%.8f" val + , printf "x%.2f" w + ]) hd + in unlines . map ((++) lp) . map ((:) ' ' . unwords) $ + formatTable (header:formatted) $ False:repeat True -- | Convert a placement into a list of OpCodes (basically a job). iMoveToJob :: Node.List -- ^ The node list; only used for node @@ -1364,18 +1356,18 @@ iMoveToJob :: Node.List -- ^ The node list; only used for node -> [OpCodes.OpCode] -- ^ The list of opcodes equivalent to -- the given move iMoveToJob nl il idx move = - let inst = Container.find idx il - iname = Instance.name inst - lookNode = Just . Container.nameOf nl - opF = OpCodes.OpInstanceMigrate iname True False True Nothing - opR n = OpCodes.OpInstanceReplaceDisks iname (lookNode n) - OpCodes.ReplaceNewSecondary [] Nothing - in case move of - Failover -> [ opF ] - ReplacePrimary np -> [ opF, opR np, opF ] - ReplaceSecondary ns -> [ opR ns ] - ReplaceAndFailover np -> [ opR np, opF ] - FailoverAndReplace ns -> [ opF, opR ns ] + let inst = Container.find idx il + iname = Instance.name inst + lookNode = Just . Container.nameOf nl + opF = OpCodes.OpInstanceMigrate iname True False True Nothing + opR n = OpCodes.OpInstanceReplaceDisks iname (lookNode n) + OpCodes.ReplaceNewSecondary [] Nothing + in case move of + Failover -> [ opF ] + ReplacePrimary np -> [ opF, opR np, opF ] + ReplaceSecondary ns -> [ opR ns ] + ReplaceAndFailover np -> [ opR np, opF ] + FailoverAndReplace ns -> [ opF, opR ns ] -- * Node group functions @@ -1390,9 +1382,9 @@ instanceGroup nl i = pgroup = Node.group pnode sgroup = Node.group snode in if pgroup /= sgroup - then fail ("Instance placed accross two node groups, primary " ++ - show pgroup ++ ", secondary " ++ show sgroup) - else return pgroup + then fail ("Instance placed accross two node groups, primary " ++ + show pgroup ++ ", secondary " ++ show sgroup) + else return pgroup -- | Computes the group of an instance per the primary node. instancePriGroup :: Node.List -> Instance.Instance -> Gdx @@ -1424,17 +1416,17 @@ nodesToEvacuate :: Instance.List -- ^ The cluster-wide instance list -> [Idx] -- ^ List of instance indices being evacuated -> IntSet.IntSet -- ^ Set of node indices nodesToEvacuate il mode = - IntSet.delete Node.noSecondary . - foldl' (\ns idx -> - let i = Container.find idx il - pdx = Instance.pNode i - sdx = Instance.sNode i - dt = Instance.diskTemplate i - withSecondary = case dt of - DTDrbd8 -> IntSet.insert sdx ns - _ -> ns - in case mode of - ChangePrimary -> IntSet.insert pdx ns - ChangeSecondary -> withSecondary - ChangeAll -> IntSet.insert pdx withSecondary - ) IntSet.empty + IntSet.delete Node.noSecondary . + foldl' (\ns idx -> + let i = Container.find idx il + pdx = Instance.pNode i + sdx = Instance.sNode i + dt = Instance.diskTemplate i + withSecondary = case dt of + DTDrbd8 -> IntSet.insert sdx ns + _ -> ns + in case mode of + ChangePrimary -> IntSet.insert pdx ns + ChangeSecondary -> withSecondary + ChangeAll -> IntSet.insert pdx withSecondary + ) IntSet.empty