Revision 1213f9d6
b/htools/Ganeti/HTools/Program/Hcheck.hs | ||
---|---|---|
26 | 26 |
module Ganeti.HTools.Program.Hcheck (main, options) where |
27 | 27 |
|
28 | 28 |
import Control.Monad |
29 |
import List (transpose) |
|
29 | 30 |
import System.Exit |
30 | 31 |
import System.IO |
32 |
import Text.Printf (printf) |
|
33 |
|
|
34 |
import qualified Ganeti.HTools.Container as Container |
|
35 |
import qualified Ganeti.HTools.Cluster as Cluster |
|
36 |
import qualified Ganeti.HTools.Node as Node |
|
37 |
import qualified Ganeti.HTools.Instance as Instance |
|
38 |
|
|
39 |
import qualified Ganeti.HTools.Program.Hbal as Hbal |
|
31 | 40 |
|
32 | 41 |
import Ganeti.HTools.CLI |
42 |
import Ganeti.HTools.ExtLoader |
|
43 |
import Ganeti.HTools.Loader |
|
44 |
import Ganeti.HTools.Types |
|
33 | 45 |
|
34 | 46 |
-- | Options list and functions. |
35 | 47 |
options :: [OptType] |
... | ... | |
60 | 72 |
, oVerbose |
61 | 73 |
] |
62 | 74 |
|
75 |
-- | Check phase - are we before (initial) or after rebalance. |
|
76 |
data Phase = Initial |
|
77 |
| Rebalanced |
|
78 |
|
|
79 |
-- | Prefix for machine readable names |
|
80 |
htcPrefix :: String |
|
81 |
htcPrefix = "HCHECK" |
|
82 |
|
|
83 |
-- | Description of phases for human readable version. |
|
84 |
phaseDescription :: Phase -> String |
|
85 |
phaseDescription Initial = "initially" |
|
86 |
phaseDescription Rebalanced = "after rebalancing" |
|
87 |
|
|
88 |
-- | Data showed both per group and per cluster. |
|
89 |
commonData :: [(String, String)] |
|
90 |
commonData =[ ("N1_FAIL", "Nodes not N+1 happy") |
|
91 |
, ("CONFLICT_TAGS", "Nodes with conflicting instances") |
|
92 |
, ("OFFLINE_PRI", "Instances with primary on an offline node") |
|
93 |
, ("OFFLINE_SEC", "Instances with seondary on an offline node") |
|
94 |
] |
|
95 |
|
|
96 |
-- | Data showed per group. |
|
97 |
groupData :: [(String, String)] |
|
98 |
groupData = commonData ++ [("SCORE", "Group score")] |
|
99 |
|
|
100 |
-- | Data showed per cluster. |
|
101 |
clusterData :: [(String, String)] |
|
102 |
clusterData = commonData ++ [("NEED_REBALANCE", "Cluster is not healthy")] |
|
103 |
|
|
104 |
-- | Print all the statistics on a group level. |
|
105 |
printGroupStats :: Int -> Bool -> Phase -> Gdx -> [Int] -> Double -> IO () |
|
106 |
printGroupStats _ True _ _ _ _ = return () |
|
107 |
printGroupStats verbose False phase gidx stats score = do |
|
108 |
let printstats = map (printf "%d") stats ++ [printf "%.8f" score] :: [String] |
|
109 |
|
|
110 |
unless (verbose == 0) $ do |
|
111 |
printf "\nStatistics for group %d %s\n" |
|
112 |
gidx (phaseDescription phase) :: IO () |
|
113 |
mapM_ (\(a,b) -> printf " %s: %s\n" (snd a) b :: IO ()) |
|
114 |
(zip groupData printstats) |
|
115 |
|
|
116 |
-- | Print all the statistics on a cluster (global) level. |
|
117 |
printClusterStats :: Int -> Bool -> Phase -> [Int] -> IO (Bool) |
|
118 |
printClusterStats _ True _ stats = do |
|
119 |
let needrebal = sum stats |
|
120 |
return $ needrebal > 0 |
|
121 |
|
|
122 |
printClusterStats verbose False phase stats = do |
|
123 |
let needrebal = sum stats |
|
124 |
printstats = map (printf "%d") stats :: [String] |
|
125 |
unless (verbose == 0) $ do |
|
126 |
printf "\nCluster statistics %s\n" (phaseDescription phase) :: IO () |
|
127 |
mapM_ (\(a,b) -> printf " %s: %s\n" (snd a) b :: IO ()) |
|
128 |
(zip clusterData (printstats ++ [show (needrebal>0)])) |
|
129 |
return $ needrebal > 0 |
|
130 |
|
|
131 |
{- | Check group for N+1 hapiness, conflicts of primaries on nodes and |
|
132 |
instances residing on offline nodes. |
|
133 |
|
|
134 |
-} |
|
135 |
perGroupChecks :: Int -> Bool -> Phase -> (Gdx, (Node.List, Instance.List)) |
|
136 |
-> IO ([Int]) |
|
137 |
perGroupChecks verbose machineread phase (gidx, (nl, il)) = do |
|
138 |
let offnl = filter Node.offline (Container.elems nl) |
|
139 |
n1violated = length $ fst $ Cluster.computeBadItems nl il |
|
140 |
conflicttags = length $ filter (>0) |
|
141 |
(map Node.conflictingPrimaries (Container.elems nl)) |
|
142 |
offline_pri = sum . map length $ map Node.pList offnl |
|
143 |
offline_sec = length $ map Node.sList offnl |
|
144 |
score = Cluster.compCV nl |
|
145 |
groupstats = [ n1violated |
|
146 |
, conflicttags |
|
147 |
, offline_pri |
|
148 |
, offline_sec |
|
149 |
] |
|
150 |
printGroupStats verbose machineread phase gidx groupstats score |
|
151 |
return groupstats |
|
152 |
|
|
153 |
-- | Use Hbal's iterateDepth to simulate group rebalance. |
|
154 |
simulateRebalance :: Options -> |
|
155 |
(Gdx, (Node.List, Instance.List)) -> |
|
156 |
IO ( (Gdx, (Node.List, Instance.List)) ) |
|
157 |
simulateRebalance opts (gidx, (nl, il)) = do |
|
158 |
let ini_cv = Cluster.compCV nl |
|
159 |
ini_tbl = Cluster.Table nl il ini_cv [] |
|
160 |
min_cv = optMinScore opts |
|
161 |
|
|
162 |
|
|
163 |
if (ini_cv < min_cv) |
|
164 |
then return (gidx, (nl, il)) |
|
165 |
else do |
|
166 |
let imlen = maximum . map (length . Instance.alias) $ Container.elems il |
|
167 |
nmlen = maximum . map (length . Node.alias) $ Container.elems nl |
|
168 |
|
|
169 |
(fin_tbl, _) <- Hbal.iterateDepth False ini_tbl |
|
170 |
(optMaxLength opts) |
|
171 |
(optDiskMoves opts) |
|
172 |
(optInstMoves opts) |
|
173 |
nmlen imlen [] min_cv |
|
174 |
(optMinGainLim opts) (optMinGain opts) |
|
175 |
(optEvacMode opts) |
|
176 |
|
|
177 |
let (Cluster.Table fin_nl fin_il _ _) = fin_tbl |
|
178 |
return (gidx, (fin_nl, fin_il)) |
|
179 |
|
|
180 |
-- | Prints the final @OK@ marker in machine readable output. |
|
181 |
printFinalHTC :: Bool -> IO () |
|
182 |
printFinalHTC = printFinal htcPrefix |
|
183 |
|
|
63 | 184 |
-- | Main function. |
64 | 185 |
main :: Options -> [String] -> IO () |
65 |
main _ args = do
|
|
186 |
main opts args = do
|
|
66 | 187 |
unless (null args) $ do |
67 | 188 |
hPutStrLn stderr "Error: this program doesn't take any arguments." |
68 | 189 |
exitWith $ ExitFailure 1 |
190 |
|
|
191 |
let verbose = optVerbose opts |
|
192 |
machineread = optMachineReadable opts |
|
193 |
nosimulation = optNoSimulation opts |
|
194 |
|
|
195 |
(ClusterData _ fixed_nl ilf _ _) <- loadExternalData opts |
|
196 |
nlf <- setNodeStatus opts fixed_nl |
|
197 |
|
|
198 |
let splitinstances = Cluster.findSplitInstances nlf ilf |
|
199 |
splitcluster = Cluster.splitCluster nlf ilf |
|
200 |
|
|
201 |
groupsstats <- mapM (perGroupChecks verbose machineread Initial) splitcluster |
|
202 |
let clusterstats = map sum (transpose groupsstats) :: [Int] |
|
203 |
needrebalance <- printClusterStats verbose machineread Initial clusterstats |
|
204 |
|
|
205 |
when nosimulation $ do |
|
206 |
unless (verbose == 0 || machineread) $ |
|
207 |
printf "Running in no-simulation mode. Exiting.\n" |
|
208 |
printFinalHTC machineread |
|
209 |
exitWith ExitSuccess |
|
210 |
|
|
211 |
when (length splitinstances > 0) $ do |
|
212 |
unless (verbose == 0 || machineread) $ |
|
213 |
printf "Split instances found, simulation of re-balancing not possible\n" |
|
214 |
exitWith $ ExitFailure 1 |
|
215 |
|
|
216 |
unless needrebalance $ do |
|
217 |
unless (verbose == 0 || machineread) $ |
|
218 |
printf "No need to rebalance cluster, no problems found. Exiting.\n" |
|
219 |
printFinalHTC machineread |
|
220 |
exitWith ExitSuccess |
|
221 |
|
|
222 |
rebalancedcluster <- mapM (simulateRebalance opts) splitcluster |
|
223 |
newgroupstats <- mapM (perGroupChecks verbose machineread Rebalanced) |
|
224 |
rebalancedcluster |
|
225 |
let newclusterstats = map sum (transpose newgroupstats) :: [Int] |
|
226 |
_ <- printClusterStats verbose machineread Rebalanced newclusterstats |
|
227 |
|
|
228 |
printFinalHTC machineread |
Also available in: Unified diff