, CStats(..)
, AllocResult
, AllocMethod
+ , AllocSolutionList
-- * Generic functions
, totalResources
, computeAllocationDelta
, tryNodeEvac
, tryChangeGroup
, collapseFailures
+ , allocList
-- * Allocation functions
, iterateAlloc
, tieredAlloc
import Data.Ord (comparing)
import Text.Printf (printf)
+import Ganeti.BasicTypes
import qualified Ganeti.HTools.Container as Container
import qualified Ganeti.HTools.Instance as Instance
import qualified Ganeti.HTools.Node as Node
import qualified Ganeti.HTools.Group as Group
import Ganeti.HTools.Types
-import Ganeti.HTools.Utils
-import Ganeti.HTools.Compat
+import Ganeti.Compat
import qualified Ganeti.OpCodes as OpCodes
+import Ganeti.Utils
-- * Types
type AllocResult = (FailStats, Node.List, Instance.List,
[Instance.Instance], [CStats])
+-- | Type alias for easier handling.
+type AllocSolutionList = [(Instance.Instance, AllocSolution)]
+
-- | A type denoting the valid allocation mode/pairs.
--
-- For a one-node allocation, this will be a @Left ['Ndx']@, whereas
-> [CStats] -- ^ Running cluster stats
-> Result AllocResult -- ^ Allocation result
+-- | A simple type for the running solution of evacuations.
+type EvacInnerState =
+ Either String (Node.List, Instance.Instance, Score, Ndx)
+
-- * Utility functions
-- | Verifies the N+1 status and return the affected nodes.
, (1, "disk_load_cv")
, (1, "net_load_cv")
, (2, "pri_tags_score")
+ , (1, "spindles_cv")
]
-- | Holds the weights used by 'compCVNodes' for each metric.
-- metric: conflicting instance count
pri_tags_inst = sum $ map Node.conflictingPrimaries nodes
pri_tags_score = fromIntegral pri_tags_inst::Double
+ -- metric: spindles %
+ spindles_cv = map (\n -> Node.instSpindles n / Node.hiSpindles n) nodes
in [ mem_cv, dsk_cv, n1_score, res_cv, off_score, off_pri_score, cpu_cv
, stdDev c_load, stdDev m_load , stdDev d_load, stdDev n_load
- , pri_tags_score ]
+ , pri_tags_score, stdDev spindles_cv ]
-- | Compute the /total/ variance.
compCVNodes :: [Node.Node] -> Double
new_inst, old_sdx, old_pdx)
in new_nl
+-- Failover to any (fa)
+applyMove nl inst (FailoverToAny new_pdx) = do
+ let (old_pdx, old_sdx, old_pnode, _) = instanceNodes nl inst
+ new_pnode = Container.find new_pdx nl
+ force_failover = Node.offline old_pnode
+ new_pnode' <- Node.addPriEx force_failover new_pnode inst
+ let old_pnode' = Node.removePri old_pnode inst
+ inst' = Instance.setPri inst new_pdx
+ nl' = Container.addTwo old_pdx old_pnode' new_pdx new_pnode' nl
+ return (nl', inst', new_pdx, old_sdx)
+
-- Replace the primary (f:, r:np, f)
applyMove nl inst (ReplacePrimary new_pdx) =
let (old_pdx, old_sdx, old_p, old_s) = instanceNodes nl inst
Instance.instMatchesPolicy inst (Node.iPolicy p)
new_p <- Node.addPri p inst
let new_nl = Container.add new_pdx new_p nl
- new_score = compCV nl
+ new_score = compCV new_nl
return (new_nl, new_inst, [new_p], new_score)
-- | Tries to allocate an instance on a given pair of nodes.
let Table ini_nl ini_il _ ini_plc = ini_tbl
tmp_resu = applyMove ini_nl target move
in case tmp_resu of
- OpFail _ -> cur_tbl
- OpGood (upd_nl, new_inst, pri_idx, sec_idx) ->
+ Bad _ -> cur_tbl
+ Ok (upd_nl, new_inst, pri_idx, sec_idx) ->
let tgt_idx = Instance.idx target
upd_cvar = compCV upd_nl
upd_il = Container.add tgt_idx new_inst ini_il
-- | Given the status of the current secondary as a valid new node and
-- the current candidate target node, generate the possible moves for
-- a instance.
-possibleMoves :: Bool -- ^ Whether the secondary node is a valid new node
- -> Bool -- ^ Whether we can change the primary node
- -> Ndx -- ^ Target node candidate
- -> [IMove] -- ^ List of valid result moves
+possibleMoves :: MirrorType -- ^ The mirroring type of the instance
+ -> Bool -- ^ Whether the secondary node is a valid new node
+ -> Bool -- ^ Whether we can change the primary node
+ -> Ndx -- ^ Target node candidate
+ -> [IMove] -- ^ List of valid result moves
+
+possibleMoves MirrorNone _ _ _ = []
+
+possibleMoves MirrorExternal _ False _ = []
-possibleMoves _ False tdx =
- [ReplaceSecondary tdx]
+possibleMoves MirrorExternal _ True tdx =
+ [ FailoverToAny tdx ]
-possibleMoves True True tdx =
+possibleMoves MirrorInternal _ False tdx =
+ [ ReplaceSecondary tdx ]
+
+possibleMoves MirrorInternal True True tdx =
[ ReplaceSecondary tdx
, ReplaceAndFailover tdx
, ReplacePrimary tdx
, FailoverAndReplace tdx
]
-possibleMoves False True tdx =
+possibleMoves MirrorInternal False True tdx =
[ ReplaceSecondary tdx
, ReplaceAndFailover tdx
]
osdx = Instance.sNode target
bad_nodes = [opdx, osdx]
nodes = filter (`notElem` bad_nodes) nodes_idx
+ mir_type = Instance.mirrorType target
use_secondary = elem osdx nodes_idx && inst_moves
- aft_failover = if use_secondary -- if allowed to failover
+ aft_failover = if mir_type == MirrorInternal && use_secondary
+ -- if drbd and allowed to failover
then checkSingleStep ini_tbl target ini_tbl Failover
else ini_tbl
- all_moves = if disk_moves
- then concatMap
- (possibleMoves use_secondary inst_moves) nodes
- else []
+ all_moves =
+ if disk_moves
+ then concatMap (possibleMoves mir_type use_secondary inst_moves)
+ nodes
+ else []
in
-- iterate over the possible nodes for this instance
foldl' (checkSingleStep ini_tbl target) aft_failover all_moves
tryBalance ini_tbl disk_moves inst_moves evac_mode mg_limit min_gain =
let Table ini_nl ini_il ini_cv _ = ini_tbl
all_inst = Container.elems ini_il
+ all_nodes = Container.elems ini_nl
+ (offline_nodes, online_nodes) = partition Node.offline all_nodes
all_inst' = if evac_mode
- then let bad_nodes = map Node.idx . filter Node.offline $
- Container.elems ini_nl
- in filter (any (`elem` bad_nodes) . Instance.allNodes)
- all_inst
- else all_inst
+ then let bad_nodes = map Node.idx offline_nodes
+ in filter (any (`elem` bad_nodes) .
+ Instance.allNodes) all_inst
+ else all_inst
reloc_inst = filter Instance.movable all_inst'
- node_idx = map Node.idx . filter (not . Node.offline) $
- Container.elems ini_nl
+ node_idx = map Node.idx online_nodes
fin_tbl = checkMove node_idx disk_moves inst_moves ini_tbl reloc_inst
(Table _ _ fin_cv _) = fin_tbl
in
-- | Update current Allocation solution and failure stats with new
-- elements.
concatAllocs :: AllocSolution -> OpResult Node.AllocElement -> AllocSolution
-concatAllocs as (OpFail reason) = as { asFailures = reason : asFailures as }
+concatAllocs as (Bad reason) = as { asFailures = reason : asFailures as }
-concatAllocs as (OpGood ns) =
+concatAllocs as (Ok ns) =
let -- Choose the old or new solution, based on the cluster score
cntok = asAllocs as
osols = asSolution as
selmsg = "Selected group: " ++ group_name
return $ solution { asLog = selmsg:all_msgs }
+-- | Calculate the new instance list after allocation solution.
+updateIl :: Instance.List -- ^ The original instance list
+ -> Maybe Node.AllocElement -- ^ The result of the allocation attempt
+ -> Instance.List -- ^ The updated instance list
+updateIl il Nothing = il
+updateIl il (Just (_, xi, _, _)) = Container.add (Container.size il) xi il
+
+-- | Extract the the new node list from the allocation solution.
+extractNl :: Node.List -- ^ The original node list
+ -> Maybe Node.AllocElement -- ^ The result of the allocation attempt
+ -> Node.List -- ^ The new node list
+extractNl nl Nothing = nl
+extractNl _ (Just (xnl, _, _, _)) = xnl
+
+-- | Try to allocate a list of instances on a multi-group cluster.
+allocList :: Group.List -- ^ The group list
+ -> Node.List -- ^ The node list
+ -> Instance.List -- ^ The instance list
+ -> [(Instance.Instance, Int)] -- ^ The instance to allocate
+ -> AllocSolutionList -- ^ Possible solution list
+ -> Result (Node.List, Instance.List,
+ AllocSolutionList) -- ^ The final solution list
+allocList _ nl il [] result = Ok (nl, il, result)
+allocList gl nl il ((xi, xicnt):xies) result = do
+ ares <- tryMGAlloc gl nl il xi xicnt
+ let sol = asSolution ares
+ nl' = extractNl nl sol
+ il' = updateIl il sol
+ allocList gl nl' il' xies ((xi, ares):result)
+
-- | Function which fails if the requested mode is change secondary.
--
-- This is useful since except DRBD, no other disk template can
-> [Ndx] -- ^ The list of available nodes
-- for allocation
-> Result (Node.List, Instance.List, [OpCodes.OpCode])
-nodeEvacInstance _ _ mode (Instance.Instance
- {Instance.diskTemplate = dt@DTDiskless}) _ _ =
- failOnSecondaryChange mode dt >>
- fail "Diskless relocations not implemented yet"
+nodeEvacInstance nl il mode inst@(Instance.Instance
+ {Instance.diskTemplate = dt@DTDiskless})
+ gdx avail_nodes =
+ failOnSecondaryChange mode dt >>
+ evacOneNodeOnly nl il inst gdx avail_nodes
nodeEvacInstance _ _ _ (Instance.Instance
{Instance.diskTemplate = DTPlain}) _ _ =
{Instance.diskTemplate = DTFile}) _ _ =
fail "Instances of type file cannot be relocated"
-nodeEvacInstance _ _ mode (Instance.Instance
- {Instance.diskTemplate = dt@DTSharedFile}) _ _ =
- failOnSecondaryChange mode dt >>
- fail "Shared file relocations not implemented yet"
+nodeEvacInstance nl il mode inst@(Instance.Instance
+ {Instance.diskTemplate = dt@DTSharedFile})
+ gdx avail_nodes =
+ failOnSecondaryChange mode dt >>
+ evacOneNodeOnly nl il inst gdx avail_nodes
-nodeEvacInstance _ _ mode (Instance.Instance
- {Instance.diskTemplate = dt@DTBlock}) _ _ =
- failOnSecondaryChange mode dt >>
- fail "Block device relocations not implemented yet"
+nodeEvacInstance nl il mode inst@(Instance.Instance
+ {Instance.diskTemplate = dt@DTBlock})
+ gdx avail_nodes =
+ failOnSecondaryChange mode dt >>
+ evacOneNodeOnly nl il inst gdx avail_nodes
+
+nodeEvacInstance nl il mode inst@(Instance.Instance
+ {Instance.diskTemplate = dt@DTRbd})
+ gdx avail_nodes =
+ failOnSecondaryChange mode dt >>
+ evacOneNodeOnly nl il inst gdx avail_nodes
nodeEvacInstance nl il ChangePrimary
inst@(Instance.Instance {Instance.diskTemplate = DTDrbd8})
nodeEvacInstance nl il ChangeSecondary
inst@(Instance.Instance {Instance.diskTemplate = DTDrbd8})
gdx avail_nodes =
- do
- (nl', inst', _, ndx) <- annotateResult "Can't find any good node" $
- eitherToResult $
- foldl' (evacDrbdSecondaryInner nl inst gdx)
- (Left "no nodes available") avail_nodes
- let idx = Instance.idx inst
- il' = Container.add idx inst' il
- ops = iMoveToJob nl' il' idx (ReplaceSecondary ndx)
- return (nl', il', ops)
+ evacOneNodeOnly nl il inst gdx avail_nodes
-- The algorithm for ChangeAll is as follows:
--
let no_nodes = Left "no nodes available"
node_pairs = [(p,s) | p <- avail_nodes, s <- avail_nodes, p /= s]
(nl', il', ops, _) <-
- annotateResult "Can't find any good nodes for relocation" $
+ annotateResult "Can't find any good nodes for relocation" .
eitherToResult $
foldl'
(\accu nodes -> case evacDrbdAllInner nl il inst gdx nodes of
return (nl', il', ops)
--- | Inner fold function for changing secondary of a DRBD instance.
+-- | Generic function for changing one node of an instance.
+--
+-- This is similar to 'nodeEvacInstance' but will be used in a few of
+-- its sub-patterns. It folds the inner function 'evacOneNodeInner'
+-- over the list of available nodes, which results in the best choice
+-- for relocation.
+evacOneNodeOnly :: Node.List -- ^ The node list (cluster-wide)
+ -> Instance.List -- ^ Instance list (cluster-wide)
+ -> Instance.Instance -- ^ The instance to be evacuated
+ -> Gdx -- ^ The group we're targetting
+ -> [Ndx] -- ^ The list of available nodes
+ -- for allocation
+ -> Result (Node.List, Instance.List, [OpCodes.OpCode])
+evacOneNodeOnly nl il inst gdx avail_nodes = do
+ op_fn <- case Instance.mirrorType inst of
+ MirrorNone -> Bad "Can't relocate/evacuate non-mirrored instances"
+ MirrorInternal -> Ok ReplaceSecondary
+ MirrorExternal -> Ok FailoverToAny
+ (nl', inst', _, ndx) <- annotateResult "Can't find any good node" .
+ eitherToResult $
+ foldl' (evacOneNodeInner nl inst gdx op_fn)
+ (Left "no nodes available") avail_nodes
+ let idx = Instance.idx inst
+ il' = Container.add idx inst' il
+ ops = iMoveToJob nl' il' idx (op_fn ndx)
+ return (nl', il', ops)
+
+-- | Inner fold function for changing one node of an instance.
+--
+-- Depending on the instance disk template, this will either change
+-- the secondary (for DRBD) or the primary node (for shared
+-- storage). However, the operation is generic otherwise.
--
-- The running solution is either a @Left String@, which means we
-- don't have yet a working solution, or a @Right (...)@, which
-- represents a valid solution; it holds the modified node list, the
-- modified instance (after evacuation), the score of that solution,
-- and the new secondary node index.
-evacDrbdSecondaryInner :: Node.List -- ^ Cluster node list
- -> Instance.Instance -- ^ Instance being evacuated
- -> Gdx -- ^ The group index of the instance
- -> Either String ( Node.List
- , Instance.Instance
- , Score
- , Ndx) -- ^ Current best solution
- -> Ndx -- ^ Node we're evaluating as new secondary
- -> Either String ( Node.List
- , Instance.Instance
- , Score
- , Ndx) -- ^ New best solution
-evacDrbdSecondaryInner nl inst gdx accu ndx =
- case applyMove nl inst (ReplaceSecondary ndx) of
- OpFail fm ->
- case accu of
- Right _ -> accu
- Left _ -> Left $ "Node " ++ Container.nameOf nl ndx ++
- " failed: " ++ show fm
- OpGood (nl', inst', _, _) ->
+evacOneNodeInner :: Node.List -- ^ Cluster node list
+ -> Instance.Instance -- ^ Instance being evacuated
+ -> Gdx -- ^ The group index of the instance
+ -> (Ndx -> IMove) -- ^ Operation constructor
+ -> EvacInnerState -- ^ Current best solution
+ -> Ndx -- ^ Node we're evaluating as target
+ -> EvacInnerState -- ^ New best solution
+evacOneNodeInner nl inst gdx op_fn accu ndx =
+ case applyMove nl inst (op_fn ndx) of
+ Bad fm -> let fail_msg = "Node " ++ Container.nameOf nl ndx ++
+ " failed: " ++ show fm
+ in either (const $ Left fail_msg) (const accu) accu
+ Ok (nl', inst', _, _) ->
let nodes = Container.elems nl'
-- The fromJust below is ugly (it can fail nastily), but
-- at this point we should have any internal mismatches,
if Node.offline primary
then do
(nl', inst', _, _) <-
- annotateResult "Failing over to the secondary" $
+ annotateResult "Failing over to the secondary" .
opToResult $ applyMove nl inst Failover
return (nl', inst', [Failover])
else return (nl, inst, [])
-- we now need to execute a replace secondary to the future
-- primary node
(nl2, inst2, _, _) <-
- annotateResult "Changing secondary to new primary" $
+ annotateResult "Changing secondary to new primary" .
opToResult $
applyMove nl1 inst1 o1
let ops2 = o1:ops1
-- we now execute another failover, the primary stays fixed now
- (nl3, inst3, _, _) <- annotateResult "Failing over to new primary" $
+ (nl3, inst3, _, _) <- annotateResult "Failing over to new primary" .
opToResult $ applyMove nl2 inst2 o2
let ops3 = o2:ops2
-- and finally another replace secondary, to the final secondary
(nl4, inst4, _, _) <-
- annotateResult "Changing secondary to final secondary" $
+ annotateResult "Changing secondary to final secondary" .
opToResult $
applyMove nl3 inst3 o3
let ops4 = o3:ops3
computeMoves i inam mv c d =
case mv of
Failover -> ("f", [mig])
+ FailoverToAny _ -> (printf "fa:%s" c, [mig_any])
FailoverAndReplace _ -> (printf "f r:%s" d, [mig, rep d])
ReplaceSecondary _ -> (printf "r:%s" d, [rep d])
ReplaceAndFailover _ -> (printf "r:%s f" c, [rep c, mig])
ReplacePrimary _ -> (printf "f r:%s f" c, [mig, rep c, mig])
- where morf = if Instance.instanceRunning i then "migrate" else "failover"
+ where morf = if Instance.isRunning i then "migrate" else "failover"
mig = printf "%s -f %s" morf inam::String
- rep n = printf "replace-disks -n %s %s" n inam
+ mig_any = printf "%s -f -n %s %s" morf c inam::String
+ rep n = printf "replace-disks -n %s %s" n inam::String
-- | Converts a placement to string format.
printSolutionLine :: Node.List -- ^ The node list
printSolutionLine nl il nmlen imlen plc pos =
let pmlen = (2*nmlen + 1)
(i, p, s, mv, c) = plc
+ old_sec = Instance.sNode inst
inst = Container.find i il
inam = Instance.alias inst
npri = Node.alias $ Container.find p nl
nsec = Node.alias $ Container.find s nl
opri = Node.alias $ Container.find (Instance.pNode inst) nl
- osec = Node.alias $ Container.find (Instance.sNode inst) nl
+ osec = Node.alias $ Container.find old_sec nl
(moves, cmds) = computeMoves inst inam mv npri nsec
- ostr = printf "%s:%s" opri osec::String
- nstr = printf "%s:%s" npri nsec::String
- in (printf " %3d. %-*s %-*s => %-*s %.8f a=%s"
- pos imlen inam pmlen ostr
- pmlen nstr c moves,
+ -- FIXME: this should check instead/also the disk template
+ ostr = if old_sec == Node.noSecondary
+ then printf "%s" opri::String
+ else printf "%s:%s" opri osec::String
+ nstr = if s == Node.noSecondary
+ then printf "%s" npri::String
+ else printf "%s:%s" npri nsec::String
+ in (printf " %3d. %-*s %-*s => %-*s %12.8f a=%s"
+ pos imlen inam pmlen ostr pmlen nstr c moves,
cmds)
-- | Return the instance and involved nodes in an instance move.
_ -> fs
snl = sortBy (comparing Node.idx) (Container.elems nl)
(header, isnum) = unzip $ map Node.showHeader fields
- in unlines . map ((:) ' ' . unwords) $
- formatTable (header:map (Node.list fields) snl) isnum
+ in printTable "" header (map (Node.list fields) snl) isnum
-- | Print the instance list.
printInsts :: Node.List -> Instance.List -> String
printInsts nl il =
let sil = sortBy (comparing Instance.idx) (Container.elems il)
- helper inst = [ if Instance.instanceRunning inst then "R" else " "
+ helper inst = [ if Instance.isRunning inst then "R" else " "
, Instance.name inst
, Container.nameOf nl (Instance.pNode inst)
, let sdx = Instance.sNode inst
header = [ "F", "Name", "Pri_node", "Sec_node", "Auto_bal"
, "vcpu", "mem" , "dsk", "lCpu", "lMem", "lDsk", "lNet" ]
isnum = False:False:False:False:False:repeat True
- in unlines . map ((:) ' ' . unwords) $
- formatTable (header:map helper sil) isnum
+ in printTable "" header (map helper sil) isnum
-- | Shows statistics for a given node list.
-printStats :: Node.List -> String
-printStats nl =
+printStats :: String -> Node.List -> String
+printStats lp nl =
let dcvs = compDetailedCV $ Container.elems nl
(weights, names) = unzip detailedCVInfo
hd = zip3 (weights ++ repeat 1) (names ++ repeat "unknown") dcvs
- formatted = map (\(w, header, val) ->
- printf "%s=%.8f(x%.2f)" header val w::String) hd
- in intercalate ", " formatted
+ header = [ "Field", "Value", "Weight" ]
+ formatted = map (\(w, h, val) ->
+ [ h
+ , printf "%.8f" val
+ , printf "x%.2f" w
+ ]) hd
+ in printTable lp header formatted $ False:repeat True
-- | Convert a placement into a list of OpCodes (basically a job).
iMoveToJob :: Node.List -- ^ The node list; only used for node
iname = Instance.name inst
lookNode = Just . Container.nameOf nl
opF = OpCodes.OpInstanceMigrate iname True False True Nothing
+ opFA n = OpCodes.OpInstanceMigrate iname True False True (lookNode n)
opR n = OpCodes.OpInstanceReplaceDisks iname (lookNode n)
OpCodes.ReplaceNewSecondary [] Nothing
in case move of
Failover -> [ opF ]
+ FailoverToAny np -> [ opFA np ]
ReplacePrimary np -> [ opF, opR np, opF ]
ReplaceSecondary ns -> [ opR ns ]
ReplaceAndFailover np -> [ opR np, opF ]