{-
-Copyright (C) 2009 Google Inc.
+Copyright (C) 2009, 2010 Google Inc.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
-- * First phase functions
, computeBadItems
-- * Second phase functions
- , printSolution
, printSolutionLine
, formatCmds
, involvedNodes
-- * Allocation functions
, iterateAlloc
, tieredAlloc
+ , instanceGroup
+ , findSplitInstances
+ , splitCluster
) where
import Data.List
-- * Types
-- | Allocation\/relocation solution.
-type AllocSolution = ([FailMode], Int, [(Score, Node.AllocElement)])
+type AllocSolution = ([FailMode], Int, [Node.AllocElement])
-- | The complete state for the balancing solution
data Table = Table Node.List Instance.List Score [Placement]
runa = RSpec un_cpu (truncate t_mem - f_imem) (truncate t_dsk - f_idsk)
in (rini, rfin, runa)
--- | The names of the individual elements in the CV list
-detailedCVNames :: [String]
-detailedCVNames = [ "free_mem_cv"
- , "free_disk_cv"
- , "n1_cnt"
- , "reserved_mem_cv"
- , "offline_all_cnt"
- , "offline_pri_cnt"
- , "vcpu_ratio_cv"
- , "cpu_load_cv"
- , "mem_load_cv"
- , "disk_load_cv"
- , "net_load_cv"
- , "pri_tags_score"
- ]
+-- | The names and weights of the individual elements in the CV list
+detailedCVInfo :: [(Double, String)]
+detailedCVInfo = [ (1, "free_mem_cv")
+ , (1, "free_disk_cv")
+ , (1, "n1_cnt")
+ , (1, "reserved_mem_cv")
+ , (4, "offline_all_cnt")
+ , (16, "offline_pri_cnt")
+ , (1, "vcpu_ratio_cv")
+ , (1, "cpu_load_cv")
+ , (1, "mem_load_cv")
+ , (1, "disk_load_cv")
+ , (1, "net_load_cv")
+ , (2, "pri_tags_score")
+ ]
+
+detailedCVWeights :: [Double]
+detailedCVWeights = map fst detailedCVInfo
-- | Compute the mem and disk covariance.
compDetailedCV :: Node.List -> [Double]
mem_cv = varianceCoeff mem_l
-- metric: disk covariance
dsk_cv = varianceCoeff dsk_l
- n1_l = length $ filter Node.failN1 nodes
- -- metric: count of failN1 nodes
- n1_score = fromIntegral n1_l::Double
+ -- metric: count of instances living on N1 failing nodes
+ n1_score = fromIntegral . sum . map (\n -> length (Node.sList n) +
+ length (Node.pList n)) .
+ filter Node.failN1 $ nodes :: Double
res_l = map Node.pRem nodes
-- metric: reserved memory covariance
res_cv = varianceCoeff res_l
-- | Compute the /total/ variance.
compCV :: Node.List -> Double
-compCV = sum . compDetailedCV
+compCV = sum . zipWith (*) detailedCVWeights . compDetailedCV
-- | Compute online nodes from a Node.List
getOnline :: Node.List -> [Node.Node]
old_s = Container.find old_sdx nl
int_p = Node.removePri old_p inst
int_s = Node.removeSec old_s inst
+ force_p = Node.offline old_p
new_nl = do -- Maybe monad
- new_p <- Node.addPri int_s inst
+ new_p <- Node.addPriEx force_p int_s inst
new_s <- Node.addSec int_p inst old_sdx
let new_inst = Instance.setBoth inst old_sdx old_pdx
return (Container.addTwo old_pdx new_s old_sdx new_p nl,
tgt_n = Container.find new_pdx nl
int_p = Node.removePri old_p inst
int_s = Node.removeSec old_s inst
+ force_p = Node.offline old_p
new_nl = do -- Maybe monad
-- check that the current secondary can host the instance
-- during the migration
- tmp_s <- Node.addPri int_s inst
+ tmp_s <- Node.addPriEx force_p int_s inst
let tmp_s' = Node.removePri tmp_s inst
- new_p <- Node.addPri tgt_n inst
- new_s <- Node.addSec tmp_s' inst new_pdx
+ new_p <- Node.addPriEx force_p tgt_n inst
+ new_s <- Node.addSecEx force_p tmp_s' inst new_pdx
let new_inst = Instance.setPri inst new_pdx
return (Container.add new_pdx new_p $
Container.addTwo old_pdx int_p old_sdx new_s nl,
old_s = Container.find old_sdx nl
tgt_n = Container.find new_sdx nl
int_s = Node.removeSec old_s inst
+ force_s = Node.offline old_s
new_inst = Instance.setSec inst new_sdx
- new_nl = Node.addSec tgt_n inst old_pdx >>=
+ new_nl = Node.addSecEx force_s tgt_n inst old_pdx >>=
\new_s -> return (Container.addTwo new_sdx
new_s old_sdx int_s nl,
new_inst, old_pdx, new_sdx)
tgt_n = Container.find new_pdx nl
int_p = Node.removePri old_p inst
int_s = Node.removeSec old_s inst
+ force_s = Node.offline old_s
new_nl = do -- Maybe monad
new_p <- Node.addPri tgt_n inst
- new_s <- Node.addSec int_p inst new_pdx
+ new_s <- Node.addSecEx force_s int_p inst new_pdx
let new_inst = Instance.setBoth inst new_pdx old_pdx
return (Container.add new_pdx new_p $
Container.addTwo old_pdx new_s old_sdx int_s nl,
tgt_n = Container.find new_sdx nl
int_p = Node.removePri old_p inst
int_s = Node.removeSec old_s inst
+ force_p = Node.offline old_p
new_nl = do -- Maybe monad
- new_p <- Node.addPri int_s inst
- new_s <- Node.addSec tgt_n inst old_sdx
+ new_p <- Node.addPriEx force_p int_s inst
+ new_s <- Node.addSecEx force_p tgt_n inst old_sdx
let new_inst = Instance.setBoth inst old_sdx new_sdx
return (Container.add new_sdx new_s $
Container.addTwo old_sdx new_p old_pdx int_p nl,
allocateOnSingle nl inst p =
let new_pdx = Node.idx p
new_inst = Instance.setBoth inst new_pdx Node.noSecondary
- new_nl = Node.addPri p inst >>= \new_p ->
- return (Container.add new_pdx new_p nl, new_inst, [new_p])
- in new_nl
+ in Node.addPri p inst >>= \new_p -> do
+ let new_nl = Container.add new_pdx new_p nl
+ new_score = compCV nl
+ return (new_nl, new_inst, [new_p], new_score)
-- | Tries to allocate an instance on a given pair of nodes.
allocateOnPair :: Node.List -> Instance.Instance -> Node.Node -> Node.Node
allocateOnPair nl inst tgt_p tgt_s =
let new_pdx = Node.idx tgt_p
new_sdx = Node.idx tgt_s
- new_nl = do -- Maybe monad
- new_p <- Node.addPri tgt_p inst
- new_s <- Node.addSec tgt_s inst new_pdx
- let new_inst = Instance.setBoth inst new_pdx new_sdx
- return (Container.addTwo new_pdx new_p new_sdx new_s nl, new_inst,
- [new_p, new_s])
- in new_nl
+ in do
+ new_p <- Node.addPri tgt_p inst
+ new_s <- Node.addSec tgt_s inst new_pdx
+ let new_inst = Instance.setBoth inst new_pdx new_sdx
+ new_nl = Container.addTwo new_pdx new_p new_sdx new_s nl
+ return (new_nl, new_inst, [new_p, new_s], compCV new_nl)
-- | Tries to perform an instance move and returns the best table
-- between the original one and the new one.
else best_tbl
-- | Check if we are allowed to go deeper in the balancing
-
-doNextBalance :: Table -- ^ The starting table
- -> Int -- ^ Remaining length
- -> Score -- ^ Score at which to stop
- -> Bool -- ^ The resulting table and commands
+doNextBalance :: Table -- ^ The starting table
+ -> Int -- ^ Remaining length
+ -> Score -- ^ Score at which to stop
+ -> Bool -- ^ The resulting table and commands
doNextBalance ini_tbl max_rounds min_score =
let Table _ _ ini_cv ini_plc = ini_tbl
ini_plc_len = length ini_plc
in (max_rounds < 0 || ini_plc_len < max_rounds) && ini_cv > min_score
-- | Run a balance move
-
tryBalance :: Table -- ^ The starting table
-> Bool -- ^ Allow disk moves
-> Bool -- ^ Only evacuate moves
+ -> Score -- ^ Min gain threshold
+ -> Score -- ^ Min gain
-> Maybe Table -- ^ The resulting table and commands
-tryBalance ini_tbl disk_moves evac_mode =
+tryBalance ini_tbl disk_moves evac_mode mg_limit min_gain =
let Table ini_nl ini_il ini_cv _ = ini_tbl
all_inst = Container.elems ini_il
all_inst' = if evac_mode
fin_tbl = checkMove node_idx disk_moves ini_tbl reloc_inst
(Table _ _ fin_cv _) = fin_tbl
in
- if fin_cv < ini_cv
+ if fin_cv < ini_cv && (ini_cv > mg_limit || ini_cv - fin_cv >= min_gain)
then Just fin_tbl -- this round made success, return the new table
else Nothing
concatAllocs :: AllocSolution -> OpResult Node.AllocElement -> AllocSolution
concatAllocs (flst, cntok, sols) (OpFail reason) = (reason:flst, cntok, sols)
-concatAllocs (flst, cntok, osols) (OpGood ns@(nl, _, _)) =
- let nscore = compCV nl
- -- Choose the old or new solution, based on the cluster score
+concatAllocs (flst, cntok, osols) (OpGood ns@(_, _, _, nscore)) =
+ let -- Choose the old or new solution, based on the cluster score
nsols = case osols of
- [] -> [(nscore, ns)]
- (oscore, _):[] ->
+ [] -> [ns]
+ (_, _, _, oscore):[] ->
if oscore < nscore
then osols
- else [(nscore, ns)]
+ else [ns]
-- FIXME: here we simply concat to lists with more
-- than one element; we should instead abort, since
-- this is not a valid usage of this function
- xs -> (nscore, ns):xs
+ xs -> ns:xs
nsuc = cntok + 1
-- Note: we force evaluation of nsols here in order to keep the
-- memory profile low - we know that we will need nsols for sure
let em = do
(mnl, i, _, _) <-
applyMove nl inst (ReplaceSecondary x)
- return (mnl, i, [Container.find x mnl])
+ return (mnl, i, [Container.find x mnl],
+ compCV mnl)
in concatAllocs cstate em
) ([], 0, []) valid_idxes
in return sols1
\destinations required (" ++ show reqn ++
"), only one supported"
--- | Try to allocate an instance on the cluster.
+-- | Try to evacuate a list of nodes.
tryEvac :: (Monad m) =>
Node.List -- ^ The node list
-> Instance.List -- ^ The instance list
-- FIXME: hardcoded one node here
(fm, cs, aes) <- tryReloc nl' il idx 1 ex_ndx
case aes of
- csol@(_, (nl'', _, _)):_ ->
+ csol@(nl'', _, _, _):_ ->
return (nl'', (fm, cs, csol:rsols))
_ -> fail $ "Can't evacuate instance " ++
- show idx
+ Instance.name (Container.find idx il)
) (nl, ([], 0, [])) all_insts
return sol
-> Instance.Instance
-> Int
-> [Instance.Instance]
- -> Result (FailStats, Node.List, [Instance.Instance])
+ -> Result (FailStats, Node.List, Instance.List,
+ [Instance.Instance])
iterateAlloc nl il newinst nreq ixes =
let depth = length ixes
newname = printf "new-%d" depth::String
Bad s -> Bad s
Ok (errs, _, sols3) ->
case sols3 of
- [] -> Ok (collapseFailures errs, nl, ixes)
- (_, (xnl, xi, _)):[] ->
- iterateAlloc xnl il newinst nreq $! (xi:ixes)
+ [] -> Ok (collapseFailures errs, nl, il, ixes)
+ (xnl, xi, _, _):[] ->
+ iterateAlloc xnl (Container.add newidx xi il)
+ newinst nreq $! (xi:ixes)
_ -> Bad "Internal error: multiple solutions for single\
\ allocation"
-> Instance.Instance
-> Int
-> [Instance.Instance]
- -> Result (FailStats, Node.List, [Instance.Instance])
+ -> Result (FailStats, Node.List, Instance.List,
+ [Instance.Instance])
tieredAlloc nl il newinst nreq ixes =
case iterateAlloc nl il newinst nreq ixes of
Bad s -> Bad s
- Ok (errs, nl', ixes') ->
+ Ok (errs, nl', il', ixes') ->
case Instance.shrinkByType newinst . fst . last $
sortBy (comparing snd) errs of
- Bad _ -> Ok (errs, nl', ixes')
+ Bad _ -> Ok (errs, nl', il', ixes')
Ok newinst' ->
- tieredAlloc nl' il newinst' nreq ixes'
+ tieredAlloc nl' il' newinst' nreq ixes'
-- * Formatting functions
pmlen = (2*nmlen + 1)
(i, p, s, mv, c) = plc
inst = Container.find i il
- inam = Instance.name inst
- npri = Container.nameOf nl p
- nsec = Container.nameOf nl s
- opri = Container.nameOf nl $ Instance.pNode inst
- osec = Container.nameOf nl $ Instance.sNode inst
+ inam = Instance.alias inst
+ npri = Node.alias $ Container.find p nl
+ nsec = Node.alias $ Container.find s nl
+ opri = Node.alias $ Container.find (Instance.pNode inst) nl
+ osec = Node.alias $ Container.find (Instance.sNode inst) nl
(moves, cmds) = computeMoves inst inam mv npri nsec
ostr = printf "%s:%s" opri osec::String
nstr = printf "%s:%s" npri nsec::String
(zip [1..] js)) .
zip [1..]
--- | Converts a solution to string format.
-printSolution :: Node.List
- -> Instance.List
- -> [Placement]
- -> ([String], [[String]])
-printSolution nl il sol =
- let
- nmlen = Container.maxNameLen nl
- imlen = Container.maxNameLen il
- in
- unzip $ zipWith (printSolutionLine nl il nmlen imlen) sol [1..]
-
-- | Print the node list.
printNodes :: Node.List -> [String] -> String
printNodes nl fs =
- let fields = if null fs
- then Node.defaultFields
- else fs
+ let fields = case fs of
+ [] -> Node.defaultFields
+ "+":rest -> Node.defaultFields ++ rest
+ _ -> fs
snl = sortBy (comparing Node.idx) (Container.elems nl)
(header, isnum) = unzip $ map Node.showHeader fields
in unlines . map ((:) ' ' . intercalate " ") $
printStats :: Node.List -> String
printStats nl =
let dcvs = compDetailedCV nl
- hd = zip (detailedCVNames ++ repeat "unknown") dcvs
- formatted = map (\(header, val) ->
- printf "%s=%.8f" header val::String) hd
+ (weights, names) = unzip detailedCVInfo
+ hd = zip3 (weights ++ repeat 1) (names ++ repeat "unknown") dcvs
+ formatted = map (\(w, header, val) ->
+ printf "%s=%.8f(x%.2f)" header val w::String) hd
in intercalate ", " formatted
-- | Convert a placement into a list of OpCodes (basically a job).
ReplaceSecondary ns -> [ opR ns ]
ReplaceAndFailover np -> [ opR np, opF ]
FailoverAndReplace ns -> [ opF, opR ns ]
+
+-- | Computes the group of an instance
+instanceGroup :: Node.List -> Instance.Instance -> Result GroupID
+instanceGroup nl i =
+ let sidx = Instance.sNode i
+ pnode = Container.find (Instance.pNode i) nl
+ snode = if sidx == Node.noSecondary
+ then pnode
+ else Container.find sidx nl
+ puuid = Node.group pnode
+ suuid = Node.group snode
+ in if puuid /= suuid
+ then fail ("Instance placed accross two node groups, primary " ++ puuid ++
+ ", secondary " ++ suuid)
+ else return puuid
+
+-- | Compute the list of badly allocated instances (split across node
+-- groups)
+findSplitInstances :: Node.List -> Instance.List -> [Instance.Instance]
+findSplitInstances nl il =
+ filter (not . isOk . instanceGroup nl) (Container.elems il)
+
+-- | Splits a cluster into the component node groups
+splitCluster :: Node.List -> Instance.List ->
+ [(GroupID, (Node.List, Instance.List))]
+splitCluster nl il =
+ let ngroups = Node.computeGroups (Container.elems nl)
+ in map (\(guuid, nodes) ->
+ let nidxs = map Node.idx nodes
+ nodes' = zip nidxs nodes
+ instances = Container.filter ((`elem` nidxs) . Instance.pNode) il
+ in (guuid, (Container.fromAssocList nodes', instances))) ngroups