, printInsts
-- * Balacing functions
, checkMove
+ , doNextBalance
, tryBalance
, compCV
, printStats
-- * IAllocator functions
, tryAlloc
, tryReloc
+ , tryEvac
, collapseFailures
) where
-- * Types
-- | Allocation\/relocation solution.
-type AllocSolution = ([FailMode], Int, Maybe (Score, AllocElement))
-
--- | Allocation\/relocation element.
-type AllocElement = (Node.List, Instance.Instance, [Node.Node])
-
+type AllocSolution = ([FailMode], Int, [(Score, Node.AllocElement)])
-- | The complete state for the balancing solution
data Table = Table Node.List Instance.List Score [Placement]
detailedCVNames :: [String]
detailedCVNames = [ "free_mem_cv"
, "free_disk_cv"
- , "n1_score"
+ , "n1_cnt"
, "reserved_mem_cv"
- , "offline_score"
+ , "offline_all_cnt"
+ , "offline_pri_cnt"
, "vcpu_ratio_cv"
, "cpu_load_cv"
, "mem_load_cv"
, "disk_load_cv"
, "net_load_cv"
+ , "pri_tags_score"
]
-- | Compute the mem and disk covariance.
-- metric: disk covariance
dsk_cv = varianceCoeff dsk_l
n1_l = length $ filter Node.failN1 nodes
- -- metric: ratio of failN1 nodes
- n1_score = fromIntegral n1_l /
- fromIntegral (length nodes)::Double
+ -- metric: count of failN1 nodes
+ n1_score = fromIntegral n1_l::Double
res_l = map Node.pRem nodes
-- metric: reserved memory covariance
res_cv = varianceCoeff res_l
- offline_inst = sum . map (\n -> (length . Node.pList $ n) +
- (length . Node.sList $ n)) $ offline
- online_inst = sum . map (\n -> (length . Node.pList $ n) +
- (length . Node.sList $ n)) $ nodes
- -- metric: ratio of instances on offline nodes
- off_score = if offline_inst == 0
- then 0::Double
- else fromIntegral offline_inst /
- fromIntegral (offline_inst + online_inst)::Double
+ -- offline instances metrics
+ offline_ipri = sum . map (length . Node.pList) $ offline
+ offline_isec = sum . map (length . Node.sList) $ offline
+ -- metric: count of instances on offline nodes
+ off_score = fromIntegral (offline_ipri + offline_isec)::Double
+ -- metric: count of primary instances on offline nodes (this
+ -- helps with evacuation/failover of primary instances on
+ -- 2-node clusters with one node offline)
+ off_pri_score = fromIntegral offline_ipri::Double
cpu_l = map Node.pCpu nodes
-- metric: covariance of vcpu/pcpu ratio
cpu_cv = varianceCoeff cpu_l
DynUtil c2 m2 d2 n2 = Node.utilPool n
in (c1/c2, m1/m2, d1/d2, n1/n2)
) nodes
- in [ mem_cv, dsk_cv, n1_score, res_cv, off_score, cpu_cv
+ -- metric: conflicting instance count
+ pri_tags_inst = sum $ map Node.conflictingPrimaries nodes
+ pri_tags_score = fromIntegral pri_tags_inst::Double
+ in [ mem_cv, dsk_cv, n1_score, res_cv, off_score, off_pri_score, cpu_cv
, varianceCoeff c_load, varianceCoeff m_load
- , varianceCoeff d_load, varianceCoeff n_load]
+ , varianceCoeff d_load, varianceCoeff n_load
+ , pri_tags_score ]
-- | Compute the /total/ variance.
compCV :: Node.List -> Double
-- | Tries to allocate an instance on one given node.
allocateOnSingle :: Node.List -> Instance.Instance -> Node.Node
- -> OpResult AllocElement
+ -> OpResult Node.AllocElement
allocateOnSingle nl inst p =
let new_pdx = Node.idx p
new_inst = Instance.setBoth inst new_pdx Node.noSecondary
-- | Tries to allocate an instance on a given pair of nodes.
allocateOnPair :: Node.List -> Instance.Instance -> Node.Node -> Node.Node
- -> OpResult AllocElement
+ -> OpResult Node.AllocElement
allocateOnPair nl inst tgt_p tgt_s =
let new_pdx = Node.idx tgt_p
new_sdx = Node.idx tgt_s
best_tbl =
foldl'
(\ step_tbl em ->
- if Instance.sNode em == Node.noSecondary then step_tbl
- else compareTables step_tbl $
- checkInstanceMove nodes_idx disk_moves ini_tbl em)
+ compareTables step_tbl $
+ checkInstanceMove nodes_idx disk_moves ini_tbl em)
ini_tbl victims
Table _ _ _ best_plc = best_tbl
- in
- if length best_plc == length ini_plc then -- no advancement
- ini_tbl
- else
- best_tbl
+ in if length best_plc == length ini_plc
+ then ini_tbl -- no advancement
+ else best_tbl
+
+-- | Check if we are allowed to go deeper in the balancing
+
+doNextBalance :: Table -- ^ The starting table
+ -> Int -- ^ Remaining length
+ -> Score -- ^ Score at which to stop
+ -> Bool -- ^ The resulting table and commands
+doNextBalance ini_tbl max_rounds min_score =
+ let Table _ _ ini_cv ini_plc = ini_tbl
+ ini_plc_len = length ini_plc
+ in (max_rounds < 0 || ini_plc_len < max_rounds) && ini_cv > min_score
-- | Run a balance move
tryBalance :: Table -- ^ The starting table
- -> Int -- ^ Remaining length
-> Bool -- ^ Allow disk moves
- -> Score -- ^ Score at which to stop
+ -> Bool -- ^ Only evacuate moves
-> Maybe Table -- ^ The resulting table and commands
-tryBalance ini_tbl max_rounds disk_moves min_score =
- let Table ini_nl ini_il ini_cv ini_plc = ini_tbl
- ini_plc_len = length ini_plc
- allowed_next = (max_rounds < 0 || ini_plc_len < max_rounds) &&
- ini_cv > min_score
+tryBalance ini_tbl disk_moves evac_mode =
+ let Table ini_nl ini_il ini_cv _ = ini_tbl
+ all_inst = Container.elems ini_il
+ all_inst' = if evac_mode
+ then let bad_nodes = map Node.idx . filter Node.offline $
+ Container.elems ini_nl
+ in filter (\e -> Instance.sNode e `elem` bad_nodes ||
+ Instance.pNode e `elem` bad_nodes)
+ all_inst
+ else all_inst
+ reloc_inst = filter (\e -> Instance.sNode e /= Node.noSecondary)
+ all_inst'
+ node_idx = map Node.idx . filter (not . Node.offline) $
+ Container.elems ini_nl
+ fin_tbl = checkMove node_idx disk_moves ini_tbl reloc_inst
+ (Table _ _ fin_cv _) = fin_tbl
in
- if allowed_next
- then let all_inst = Container.elems ini_il
- node_idx = map Node.idx . filter (not . Node.offline) $
- Container.elems ini_nl
- fin_tbl = checkMove node_idx disk_moves ini_tbl all_inst
- (Table _ _ fin_cv _) = fin_tbl
- in
- if fin_cv < ini_cv
- then Just fin_tbl -- this round made success, try deeper
- else Nothing
+ if fin_cv < ini_cv
+ then Just fin_tbl -- this round made success, return the new table
else Nothing
-- * Allocation functions
-- | Update current Allocation solution and failure stats with new
-- elements
-concatAllocs :: AllocSolution -> OpResult AllocElement -> AllocSolution
+concatAllocs :: AllocSolution -> OpResult Node.AllocElement -> AllocSolution
concatAllocs (flst, cntok, sols) (OpFail reason) = (reason:flst, cntok, sols)
concatAllocs (flst, cntok, osols) (OpGood ns@(nl, _, _)) =
let nscore = compCV nl
-- Choose the old or new solution, based on the cluster score
nsols = case osols of
- Nothing -> Just (nscore, ns)
- Just (oscore, _) ->
+ [] -> [(nscore, ns)]
+ (oscore, _):[] ->
if oscore < nscore
then osols
- else Just (nscore, ns)
+ else [(nscore, ns)]
+ -- FIXME: here we simply concat to lists with more
+ -- than one element; we should instead abort, since
+ -- this is not a valid usage of this function
+ xs -> (nscore, ns):xs
nsuc = cntok + 1
-- Note: we force evaluation of nsols here in order to keep the
-- memory profile low - we know that we will need nsols for sure
ok_pairs = filter (\(x, y) -> Node.idx x /= Node.idx y) all_pairs
sols = foldl' (\cstate (p, s) ->
concatAllocs cstate $ allocateOnPair nl inst p s
- ) ([], 0, Nothing) ok_pairs
+ ) ([], 0, []) ok_pairs
in return sols
tryAlloc nl _ inst 1 =
let all_nodes = getOnline nl
sols = foldl' (\cstate ->
concatAllocs cstate . allocateOnSingle nl inst
- ) ([], 0, Nothing) all_nodes
+ ) ([], 0, []) all_nodes
in return sols
tryAlloc _ _ _ reqn = fail $ "Unsupported number of allocation \
applyMove nl inst (ReplaceSecondary x)
return (mnl, i, [Container.find x mnl])
in concatAllocs cstate em
- ) ([], 0, Nothing) valid_idxes
+ ) ([], 0, []) valid_idxes
in return sols1
tryReloc _ _ _ reqn _ = fail $ "Unsupported number of relocation \
\destinations required (" ++ show reqn ++
"), only one supported"
+-- | Try to allocate an instance on the cluster.
+tryEvac :: (Monad m) =>
+ Node.List -- ^ The node list
+ -> Instance.List -- ^ The instance list
+ -> [Ndx] -- ^ Nodes to be evacuated
+ -> m AllocSolution -- ^ Solution list
+tryEvac nl il ex_ndx =
+ let ex_nodes = map (flip Container.find nl) ex_ndx
+ all_insts = nub . concat . map Node.sList $ ex_nodes
+ in do
+ (_, sol) <- foldM (\(nl', (_, _, rsols)) idx -> do
+ -- FIXME: hardcoded one node here
+ (fm, cs, aes) <- tryReloc nl' il idx 1 ex_ndx
+ case aes of
+ csol@(_, (nl'', _, _)):_ ->
+ return (nl'', (fm, cs, csol:rsols))
+ _ -> fail $ "Can't evacuate instance " ++
+ show idx
+ ) (nl, ([], 0, [])) all_insts
+ return sol
+
-- * Formatting functions
-- | Given the original and final nodes, computes the relocation description.
unzip $ zipWith (printSolutionLine nl il nmlen imlen) sol [1..]
-- | Print the node list.
-printNodes :: Node.List -> String
-printNodes nl =
- let snl = sortBy (compare `on` Node.idx) (Container.elems nl)
- (header, isnum) = unzip $ map Node.showHeader Node.defaultFields
+printNodes :: Node.List -> [String] -> String
+printNodes nl fs =
+ let fields = if null fs
+ then Node.defaultFields
+ else fs
+ snl = sortBy (compare `on` Node.idx) (Container.elems nl)
+ (header, isnum) = unzip $ map Node.showHeader fields
in unlines . map ((:) ' ' . intercalate " ") $
- formatTable (header:map (Node.list Node.defaultFields) snl) isnum
+ formatTable (header:map (Node.list fields) snl) isnum
-- | Print the instance list.
printInsts :: Node.List -> Instance.List -> String