1 {-| Cluster rolling maintenance helper.
7 Copyright (C) 2012 Google Inc.
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 2 of the License, or
12 (at your option) any later version.
14 This program is distributed in the hope that it will be useful, but
15 WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
26 module Ganeti.HTools.Program.Hroller
32 import Control.Applicative
40 import qualified Data.IntMap as IntMap
42 import qualified Ganeti.HTools.Container as Container
43 import qualified Ganeti.HTools.Node as Node
44 import qualified Ganeti.HTools.Instance as Instance
45 import qualified Ganeti.HTools.Group as Group
47 import Ganeti.BasicTypes
49 import Ganeti.HTools.CLI
50 import Ganeti.HTools.ExtLoader
51 import Ganeti.HTools.Graph
52 import Ganeti.HTools.Loader
53 import Ganeti.HTools.Types
56 -- | Options list and functions.
57 options :: IO [OptType]
80 -- | The list of arguments supported by the program.
81 arguments :: [ArgCompletion]
84 -- | Compute the result of moving an instance to a different node.
85 move :: Idx -> Ndx -> (Node.List, Instance.List)
86 -> OpResult (Node.List, Instance.List)
87 move idx new_ndx (nl, il) = do
88 let new_node = Container.find new_ndx nl
89 inst = Container.find idx il
90 old_ndx = Instance.pNode inst
91 old_node = Container.find old_ndx nl
92 new_node' <- Node.addPriEx True new_node inst
93 let old_node' = Node.removePri old_node inst
94 inst' = Instance.setPri inst new_ndx
95 nl' = Container.addTwo old_ndx old_node' new_ndx new_node' nl
96 il' = Container.add idx inst' il
99 -- | Move a non-redundant instance to one of the candidate nodes mentioned.
100 locateInstance :: Idx -> [Ndx] -> (Node.List, Instance.List)
101 -> Result (Node.List, Instance.List)
102 locateInstance idx ndxs conf =
103 msum $ map (opToResult . flip (move idx) conf) ndxs
105 -- | Move a list of non-redundant instances to some of the nodes mentioned.
106 locateInstances :: [Idx] -> [Ndx] -> (Node.List, Instance.List)
107 -> Result (Node.List, Instance.List)
108 locateInstances idxs ndxs conf =
109 foldM (\ cf idx -> locateInstance idx ndxs cf) conf idxs
111 -- | Greedily move the non-redundant instances away from a list of nodes.
112 -- The arguments are the list of nodes to be cleared, a list of nodes the
113 -- instances can be moved to, and an initial configuration. Returned is a
114 -- list of nodes that can be cleared simultaneously and the configuration
115 -- after these nodes are cleared.
116 clearNodes :: [Ndx] -> [Ndx] -> (Node.List, Instance.List)
117 -> Result ([Ndx], (Node.List, Instance.List))
118 clearNodes [] _ conf = return ([], conf)
119 clearNodes (ndx:ndxs) targets conf@(nl, _) =
120 withFirst `mplus` withoutFirst where
122 let othernodes = delete ndx targets
123 grp = Node.group $ Container.find ndx nl
124 othernodesSameGroup =
125 filter ((==) grp . Node.group . flip Container.find nl) othernodes
126 conf' <- locateInstances (nonRedundant conf ndx) othernodesSameGroup conf
127 (ndxs', conf'') <- clearNodes ndxs othernodes conf'
128 return (ndx:ndxs', conf'')
129 withoutFirst = clearNodes ndxs targets conf
131 -- | Parition a list of nodes into chunks according cluster capacity.
132 partitionNonRedundant :: [Ndx] -> [Ndx] -> (Node.List, Instance.List)
133 -> Result [([Ndx], (Node.List, Instance.List))]
134 partitionNonRedundant [] _ _ = return []
135 partitionNonRedundant ndxs targets conf = do
136 (grp, conf') <- clearNodes ndxs targets conf
137 guard . not . null $ grp
138 let remaining = ndxs \\ grp
139 part <- partitionNonRedundant remaining targets conf
140 return $ (grp, conf') : part
142 -- | Gather statistics for the coloring algorithms.
143 -- Returns a string with a summary on how each algorithm has performed,
144 -- in order of non-decreasing effectiveness, and whether it tied or lost
145 -- with the previous one.
146 getStats :: [(String, ColorVertMap)] -> String
147 getStats colorings = snd . foldr helper (0,"") $ algBySize colorings
148 where algostat (algo, cmap) = algo ++ ": " ++ size cmap ++ grpsizes cmap
149 size cmap = show (IntMap.size cmap) ++ " "
151 "(" ++ commaJoin (map (show.length) (IntMap.elems cmap)) ++ ")"
152 algBySize = sortBy (flip (comparing (IntMap.size.snd)))
153 helper :: (String, ColorVertMap) -> (Int, String) -> (Int, String)
154 helper el (0, _) = ((IntMap.size.snd) el, algostat el)
156 | old == elsize = (elsize, str ++ " TIE " ++ algostat el)
157 | otherwise = (elsize, str ++ " LOOSE " ++ algostat el)
158 where elsize = (IntMap.size.snd) el
160 -- | Predicate of belonging to a given group restriction.
161 hasGroup :: Maybe Group.Group -> Node.Node -> Bool
162 hasGroup Nothing _ = True
163 hasGroup (Just grp) node = Node.group node == Group.idx grp
165 -- | Predicate of having at least one tag in a given set.
166 hasTag :: Maybe [String] -> Node.Node -> Bool
167 hasTag Nothing _ = True
168 hasTag (Just tags) node = not . null $ Node.nTags node `intersect` tags
170 -- | From a cluster configuration, get the list of non-redundant instances
172 nonRedundant :: (Node.List, Instance.List) -> Ndx -> [Idx]
173 nonRedundant (nl, il) ndx =
174 filter (not . Instance.hasSecondary . flip Container.find il) $
175 Node.pList (Container.find ndx nl)
177 -- | Within a cluster configuration, decide if the node hosts non-redundant
179 noNonRedundant :: (Node.List, Instance.List) -> Node.Node -> Bool
180 noNonRedundant conf = null . nonRedundant conf . Node.idx
182 -- | Put the master node last.
183 -- Reorder a list groups of nodes (with additional information) such that the
184 -- master node (if present) is the last node of the last group.
185 masterLast :: [([Node.Node], a)] -> [([Node.Node], a)]
186 masterLast rebootgroups =
187 map (first $ uncurry (++)) . uncurry (++) . partition (null . snd . fst) $
188 map (first $ partition (not . Node.isMaster)) rebootgroups
190 -- | From two configurations compute the list of moved instances.
191 getMoves :: (Node.List, Instance.List) -> (Node.List, Instance.List)
192 -> [(Instance.Instance, Node.Node)]
193 getMoves (_, il) (nl', il') = do
194 ix <- Container.keys il
195 let inst = Container.find ix il
196 inst' = Container.find ix il'
197 guard $ Instance.pNode inst /= Instance.pNode inst'
198 return (inst', Container.find (Instance.pNode inst') nl')
201 main :: Options -> [String] -> IO ()
203 unless (null args) $ exitErr "This program doesn't take any arguments."
205 let verbose = optVerbose opts
206 maybeExit = if optForce opts then warn else exitErr
208 -- Load cluster data. The last two arguments, cluster tags and ipolicy, are
209 -- currently not used by this tool.
210 ini_cdata@(ClusterData gl fixed_nl ilf _ _) <- loadExternalData opts
212 let master_names = map Node.name . filter Node.isMaster . IntMap.elems $
215 [] -> maybeExit "No master node found (maybe not supported by backend)."
217 _ -> exitErr $ "Found more than one master node: " ++ show master_names
219 nlf <- setNodeStatus opts fixed_nl
221 maybeSaveData (optSaveCluster opts) "original" "before hroller run" ini_cdata
223 -- Find the wanted node group, if any.
224 wantedGroup <- case optGroup opts of
225 Nothing -> return Nothing
226 Just name -> case Container.findByName gl name of
227 Nothing -> exitErr "Cannot find target group."
228 Just grp -> return (Just grp)
230 let nodes = IntMap.filter (foldl (liftA2 (&&)) (const True)
232 , if optSkipNonRedundant opts
233 then noNonRedundant (nlf, ilf)
235 , hasTag $ optNodeTags opts
236 , hasGroup wantedGroup ])
238 mkGraph = if optOfflineMaintenance opts
239 then Node.mkNodeGraph
240 else Node.mkRebootNodeGraph nlf
242 nodeGraph <- case mkGraph nodes ilf of
243 Nothing -> exitErr "Cannot create node graph"
246 when (verbose > 2) . putStrLn $ "Node Graph: " ++ show nodeGraph
248 let colorAlgorithms = [ ("LF", colorLF)
249 , ("Dsatur", colorDsatur)
250 , ("Dcolor", colorDcolor)
252 colorings = map (\(v,a) -> (v,(colorVertMap.a) nodeGraph)) colorAlgorithms
253 smallestColoring = IntMap.elems $
254 (snd . minimumBy (comparing (IntMap.size . snd))) colorings
255 allNdx = map Node.idx . filter (not . Node.offline) . Container.elems
257 splitted = mapM (\ grp -> partitionNonRedundant grp allNdx (nlf,ilf))
259 rebootGroups <- if optIgnoreNonRedundant opts
260 then return $ zip smallestColoring (repeat (nlf, ilf))
261 else case splitted of
262 Ok splitgroups -> return $ concat splitgroups
263 Bad _ -> exitErr "Not enough capacity to move\
264 \ non-redundant instances"
265 let idToNode = (`Container.find` nodes)
267 map (first $ map idToNode . filter (`IntMap.member` nodes)) rebootGroups
268 outputRebootGroups = masterLast .
269 sortBy (flip compare `on` length . fst) $
271 confToMoveNames = map (Instance.name *** Node.name) . getMoves (nlf, ilf)
272 namesAndMoves = map (map Node.name *** confToMoveNames) outputRebootGroups
274 when (verbose > 1) . putStrLn $ getStats colorings
276 let showGroup = if optOneStepOnly opts
278 else putStrLn . commaJoin
279 showMoves :: [(String, String)] -> IO ()
280 showMoves = if optPrintMoves opts
281 then mapM_ $ putStrLn . uncurry (printf " %s %s")
282 else const $ return ()
283 showBoth = liftM2 (>>) (showGroup . fst) (showMoves . snd)
286 if optOneStepOnly opts
288 unless (optNoHeaders opts) $
289 putStrLn "'First Reboot Group'"
290 case namesAndMoves of
294 unless (optNoHeaders opts) $
295 putStrLn "'Node Reboot Groups'"
296 mapM_ showBoth namesAndMoves