7 Copyright (C) 2012 Google Inc.
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 2 of the License, or
12 (at your option) any later version.
14 This program is distributed in the hope that it will be useful, but
15 WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 General Public License for more details.
19 You should have received a copy of the GNU Gene52al Public License
20 along with this program; if not, write to the Free Software
21 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
26 module Ganeti.HTools.Program.Hcheck
33 import Data.List (transpose)
35 import Text.Printf (printf)
37 import qualified Ganeti.HTools.Container as Container
38 import qualified Ganeti.HTools.Cluster as Cluster
39 import qualified Ganeti.HTools.Group as Group
40 import qualified Ganeti.HTools.Node as Node
41 import qualified Ganeti.HTools.Instance as Instance
43 import qualified Ganeti.HTools.Program.Hbal as Hbal
46 import Ganeti.HTools.CLI
47 import Ganeti.HTools.ExtLoader
48 import Ganeti.HTools.Loader
49 import Ganeti.HTools.Types
52 -- | Options list and functions.
53 options :: IO [OptType]
81 -- | The list of arguments supported by the program.
82 arguments :: [ArgCompletion]
85 -- | Check phase - are we before (initial) or after rebalance.
89 -- | Level of presented statistics.
93 -- | A type alias for a group index and node\/instance lists.
94 type GroupInfo = (Gdx, (Node.List, Instance.List))
96 -- | A type alias for group stats.
97 type GroupStats = ((Group.Group, Double), [Int])
99 -- | Prefix for machine readable names.
103 -- | Data showed both per group and per cluster.
104 commonData :: [(String, String)]
105 commonData =[ ("N1_FAIL", "Nodes not N+1 happy")
106 , ("CONFLICT_TAGS", "Nodes with conflicting instances")
107 , ("OFFLINE_PRI", "Instances having the primary node offline")
108 , ("OFFLINE_SEC", "Instances having a secondary node offline")
111 -- | Data showed per group.
112 groupData :: [(String, String)]
113 groupData = commonData ++ [("SCORE", "Group score")]
115 -- | Data showed per cluster.
116 clusterData :: [(String, String)]
117 clusterData = commonData ++
118 [ ("NEED_REBALANCE", "Cluster is not healthy") ]
120 -- | Phase-specific prefix for machine readable version.
121 phasePrefix :: Phase -> String
122 phasePrefix Initial = "INIT"
123 phasePrefix Rebalanced = "FINAL"
125 -- | Level-specific prefix for machine readable version.
126 levelPrefix :: Level -> String
127 levelPrefix GroupLvl = "GROUP"
128 levelPrefix ClusterLvl = "CLUSTER"
130 -- | Machine-readable keys to show depending on given level.
131 keysData :: Level -> [String]
132 keysData GroupLvl = map fst groupData
133 keysData ClusterLvl = map fst clusterData
135 -- | Description of phases for human readable version.
136 phaseDescr :: Phase -> String
137 phaseDescr Initial = "initially"
138 phaseDescr Rebalanced = "after rebalancing"
140 -- | Description to show depending on given level.
141 descrData :: Level -> [String]
142 descrData GroupLvl = map snd groupData
143 descrData ClusterLvl = map snd clusterData
145 -- | Human readable prefix for statistics.
146 phaseLevelDescr :: Phase -> Level -> Maybe String -> String
147 phaseLevelDescr phase GroupLvl (Just name) =
148 printf "Statistics for group %s %s\n" name $ phaseDescr phase
149 phaseLevelDescr phase GroupLvl Nothing =
150 printf "Statistics for group %s\n" $ phaseDescr phase
151 phaseLevelDescr phase ClusterLvl _ =
152 printf "Cluster statistics %s\n" $ phaseDescr phase
154 -- | Format a list of key, value as a shell fragment.
155 printKeysHTC :: [(String, String)] -> IO ()
156 printKeysHTC = printKeys htcPrefix
158 -- | Prepare string from boolean value.
159 printBool :: Bool -- ^ Whether the result should be machine readable
160 -> Bool -- ^ Value to be converted to string
162 printBool True True = "1"
163 printBool True False = "0"
164 printBool False b = show b
166 -- | Print mapping from group idx to group uuid (only in machine
168 printGroupsMappings :: Group.List -> IO ()
169 printGroupsMappings gl = do
170 let extract_vals g = (printf "GROUP_UUID_%d" $ Group.idx g :: String,
172 printpairs = map extract_vals (Container.elems gl)
173 printKeysHTC printpairs
175 -- | Prepare a single key given a certain level and phase of simulation.
176 prepareKey :: Level -> Phase -> Maybe String -> String -> String
177 prepareKey level phase Nothing suffix =
178 printf "%s_%s_%s" (phasePrefix phase) (levelPrefix level) suffix
179 prepareKey level phase (Just idx) suffix =
180 printf "%s_%s_%s_%s" (phasePrefix phase) (levelPrefix level) idx suffix
182 -- | Print all the statistics for given level and phase.
183 printStats :: Int -- ^ Verbosity level
184 -> Bool -- ^ If the output should be machine readable
185 -> Level -- ^ Level on which we are printing
186 -> Phase -- ^ Current phase of simulation
187 -> [String] -- ^ Values to print
188 -> Maybe String -- ^ Additional data for groups
190 printStats _ True level phase values gidx = do
191 let keys = map (prepareKey level phase gidx) (keysData level)
192 printKeysHTC $ zip keys values
194 printStats verbose False level phase values name = do
195 let prefix = phaseLevelDescr phase level name
196 descr = descrData level
197 unless (verbose == 0) $ do
200 mapM_ (uncurry (printf " %s: %s\n")) (zip descr values)
202 -- | Extract name or idx from group.
203 extractGroupData :: Bool -> Group.Group -> String
204 extractGroupData True grp = show $ Group.idx grp
205 extractGroupData False grp = Group.name grp
207 -- | Prepare values for group.
208 prepareGroupValues :: [Int] -> Double -> [String]
209 prepareGroupValues stats score =
210 map show stats ++ [printf "%.8f" score]
212 -- | Prepare values for cluster.
213 prepareClusterValues :: Bool -> [Int] -> [Bool] -> [String]
214 prepareClusterValues machineread stats bstats =
215 map show stats ++ map (printBool machineread) bstats
217 -- | Print all the statistics on a group level.
218 printGroupStats :: Int -> Bool -> Phase -> GroupStats -> IO ()
219 printGroupStats verbose machineread phase ((grp, score), stats) = do
220 let values = prepareGroupValues stats score
221 extradata = extractGroupData machineread grp
222 printStats verbose machineread GroupLvl phase values (Just extradata)
224 -- | Print all the statistics on a cluster (global) level.
225 printClusterStats :: Int -> Bool -> Phase -> [Int] -> Bool -> IO ()
226 printClusterStats verbose machineread phase stats needhbal = do
227 let values = prepareClusterValues machineread stats [needhbal]
228 printStats verbose machineread ClusterLvl phase values Nothing
230 -- | Check if any of cluster metrics is non-zero.
231 clusterNeedsRebalance :: [Int] -> Bool
232 clusterNeedsRebalance stats = sum stats > 0
234 {- | Check group for N+1 hapiness, conflicts of primaries on nodes and
235 instances residing on offline nodes.
238 perGroupChecks :: Group.List -> GroupInfo -> GroupStats
239 perGroupChecks gl (gidx, (nl, il)) =
240 let grp = Container.find gidx gl
241 offnl = filter Node.offline (Container.elems nl)
242 n1violated = length . fst $ Cluster.computeBadItems nl il
243 conflicttags = length $ filter (>0)
244 (map Node.conflictingPrimaries (Container.elems nl))
245 offline_pri = sum . map length $ map Node.pList offnl
246 offline_sec = length $ map Node.sList offnl
247 score = Cluster.compCV nl
248 groupstats = [ n1violated
253 in ((grp, score), groupstats)
255 -- | Use Hbal's iterateDepth to simulate group rebalance.
256 executeSimulation :: Options -> Cluster.Table -> Double
257 -> Gdx -> Node.List -> Instance.List
259 executeSimulation opts ini_tbl min_cv gidx nl il = do
260 let imlen = maximum . map (length . Instance.alias) $ Container.elems il
261 nmlen = maximum . map (length . Node.alias) $ Container.elems nl
263 (fin_tbl, _) <- Hbal.iterateDepth False ini_tbl
267 nmlen imlen [] min_cv
268 (optMinGainLim opts) (optMinGain opts)
271 let (Cluster.Table fin_nl fin_il _ _) = fin_tbl
272 return (gidx, (fin_nl, fin_il))
274 -- | Simulate group rebalance if group's score is not good
275 maybeSimulateGroupRebalance :: Options -> GroupInfo -> IO GroupInfo
276 maybeSimulateGroupRebalance opts (gidx, (nl, il)) = do
277 let ini_cv = Cluster.compCV nl
278 ini_tbl = Cluster.Table nl il ini_cv []
279 min_cv = optMinScore opts
281 then return (gidx, (nl, il))
282 else executeSimulation opts ini_tbl min_cv gidx nl il
284 -- | Decide whether to simulate rebalance.
285 maybeSimulateRebalance :: Bool -- ^ Whether to simulate rebalance
286 -> Options -- ^ Command line options
287 -> [GroupInfo] -- ^ Group data
289 maybeSimulateRebalance True opts cluster =
290 mapM (maybeSimulateGroupRebalance opts) cluster
291 maybeSimulateRebalance False _ cluster = return cluster
293 -- | Prints the final @OK@ marker in machine readable output.
294 printFinalHTC :: Bool -> IO ()
295 printFinalHTC = printFinal htcPrefix
298 main :: Options -> [String] -> IO ()
300 unless (null args) $ exitErr "This program doesn't take any arguments."
302 let verbose = optVerbose opts
303 machineread = optMachineReadable opts
304 nosimulation = optNoSimulation opts
306 (ClusterData gl fixed_nl ilf _ _) <- loadExternalData opts
307 nlf <- setNodeStatus opts fixed_nl
309 let splitcluster = Cluster.splitCluster nlf ilf
311 when machineread $ printGroupsMappings gl
313 let groupsstats = map (perGroupChecks gl) splitcluster
314 clusterstats = map sum . transpose . map snd $ groupsstats
315 needrebalance = clusterNeedsRebalance clusterstats
317 unless (verbose == 0 || machineread) .
318 putStrLn $ if nosimulation
319 then "Running in no-simulation mode."
320 else if needrebalance
321 then "Cluster needs rebalancing."
322 else "No need to rebalance cluster, no problems found."
324 mapM_ (printGroupStats verbose machineread Initial) groupsstats
326 printClusterStats verbose machineread Initial clusterstats needrebalance
328 let exitOK = nosimulation || not needrebalance
329 simulate = not nosimulation && needrebalance
331 rebalancedcluster <- maybeSimulateRebalance simulate opts splitcluster
333 when (simulate || machineread) $ do
334 let newgroupstats = map (perGroupChecks gl) rebalancedcluster
335 newclusterstats = map sum . transpose . map snd $ newgroupstats
336 newneedrebalance = clusterNeedsRebalance clusterstats
338 mapM_ (printGroupStats verbose machineread Rebalanced) newgroupstats
340 printClusterStats verbose machineread Rebalanced newclusterstats
343 printFinalHTC machineread
345 unless exitOK . exitWith $ ExitFailure 1