Statistics
| Branch: | Tag: | Revision:

root / htools / Ganeti / HTools / Program / Hcheck.hs @ 4b77c2a2

History | View | Annotate | Download (11.6 kB)

1
{-| Cluster checker.
2

    
3
-}
4

    
5
{-
6

    
7
Copyright (C) 2012 Google Inc.
8

    
9
This program is free software; you can redistribute it and/or modify
10
it under the terms of the GNU General Public License as published by
11
the Free Software Foundation; either version 2 of the License, or
12
(at your option) any later version.
13

    
14
This program is distributed in the hope that it will be useful, but
15
WITHOUT ANY WARRANTY; without even the implied warranty of
16
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17
General Public License for more details.
18

    
19
You should have received a copy of the GNU Gene52al Public License
20
along with this program; if not, write to the Free Software
21
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22
02110-1301, USA.
23

    
24
-}
25

    
26
module Ganeti.HTools.Program.Hcheck (main, options) where
27

    
28
import Control.Monad
29
import Data.List (transpose)
30
import System.Exit
31
import System.IO
32
import Text.Printf (printf)
33

    
34
import qualified Ganeti.HTools.Container as Container
35
import qualified Ganeti.HTools.Cluster as Cluster
36
import qualified Ganeti.HTools.Group as Group
37
import qualified Ganeti.HTools.Node as Node
38
import qualified Ganeti.HTools.Instance as Instance
39

    
40
import qualified Ganeti.HTools.Program.Hbal as Hbal
41

    
42
import Ganeti.HTools.CLI
43
import Ganeti.HTools.ExtLoader
44
import Ganeti.HTools.Loader
45
import Ganeti.HTools.Types
46

    
47
-- | Options list and functions.
48
options :: [OptType]
49
options =
50
  [ oDataFile
51
  , oDiskMoves
52
  , oDynuFile
53
  , oEvacMode
54
  , oExInst
55
  , oExTags
56
  , oIAllocSrc
57
  , oInstMoves
58
  , oLuxiSocket
59
  , oMachineReadable
60
  , oMaxCpu
61
  , oMaxSolLength
62
  , oMinDisk
63
  , oMinGain
64
  , oMinGainLim
65
  , oMinScore
66
  , oNoSimulation
67
  , oOfflineNode
68
  , oQuiet
69
  , oRapiMaster
70
  , oSelInst
71
  , oShowHelp
72
  , oShowVer
73
  , oVerbose
74
  ]
75

    
76
-- | Check phase - are we before (initial) or after rebalance.
77
data Phase = Initial
78
           | Rebalanced
79

    
80
-- | Level of presented statistics.
81
data Level = GroupLvl
82
           | ClusterLvl
83

    
84
-- | Prefix for machine readable names.
85
htcPrefix :: String
86
htcPrefix = "HCHECK"
87

    
88
-- | Data showed both per group and per cluster.
89
commonData :: [(String, String)]
90
commonData =[ ("N1_FAIL", "Nodes not N+1 happy")
91
            , ("CONFLICT_TAGS", "Nodes with conflicting instances")
92
            , ("OFFLINE_PRI", "Instances having the primary node offline")
93
            , ("OFFLINE_SEC", "Instances having a secondary node offline")
94
            ]
95

    
96
-- | Data showed per group.
97
groupData :: [(String, String)]
98
groupData = commonData ++ [("SCORE", "Group score")]
99

    
100
-- | Data showed per cluster.
101
clusterData :: [(String, String)]
102
clusterData = commonData ++
103
              [ ("NEED_REBALANCE", "Cluster is not healthy") ]
104

    
105
-- | Phase-specific prefix for machine readable version.
106
phasePrefix :: Phase -> String
107
phasePrefix Initial = "INIT"
108
phasePrefix Rebalanced = "FINAL"
109

    
110
-- | Level-specific prefix for machine readable version.
111
levelPrefix :: Level -> String
112
levelPrefix GroupLvl = "GROUP"
113
levelPrefix ClusterLvl = "CLUSTER"
114

    
115
-- | Machine-readable keys to show depending on given level.
116
keysData :: Level -> [String]
117
keysData GroupLvl = map fst groupData
118
keysData ClusterLvl = map fst clusterData
119

    
120
-- | Description of phases for human readable version.
121
phaseDescr :: Phase -> String
122
phaseDescr Initial = "initially"
123
phaseDescr Rebalanced = "after rebalancing"
124

    
125
-- | Description to show depending on given level.
126
descrData :: Level -> [String]
127
descrData GroupLvl = map snd groupData
128
descrData ClusterLvl = map snd clusterData
129

    
130
-- | Human readable prefix for statistics.
131
phaseLevelDescr :: Phase -> Level -> Maybe String -> String
132
phaseLevelDescr phase GroupLvl (Just name) =
133
    printf "Statistics for group %s %s\n" name $ phaseDescr phase
134
phaseLevelDescr phase GroupLvl Nothing =
135
    printf "Statistics for group %s\n" $ phaseDescr phase
136
phaseLevelDescr phase ClusterLvl _ =
137
    printf "Cluster statistics %s\n" $ phaseDescr phase
138

    
139
-- | Format a list of key, value as a shell fragment.
140
printKeysHTC :: [(String, String)] -> IO ()
141
printKeysHTC = printKeys htcPrefix
142

    
143
-- | Prepare string from boolean value.
144
printBool :: Bool    -- ^ Whether the result should be machine readable
145
          -> Bool    -- ^ Value to be converted to string
146
          -> String
147
printBool True True = "1"
148
printBool True False = "0"
149
printBool False b = show b
150

    
151
-- | Print mapping from group idx to group uuid (only in machine
152
-- readable mode).
153
printGroupsMappings :: Group.List -> IO ()
154
printGroupsMappings gl = do
155
    let extract_vals = \g -> (printf "GROUP_UUID_%d" $ Group.idx g :: String,
156
                              Group.uuid g)
157
        printpairs = map extract_vals (Container.elems gl)
158
    printKeysHTC printpairs
159

    
160
-- | Prepare a single key given a certain level and phase of simulation.
161
prepareKey :: Level -> Phase -> Maybe String -> String -> String
162
prepareKey level phase Nothing suffix =
163
  printf "%s_%s_%s" (phasePrefix phase) (levelPrefix level) suffix
164
prepareKey level phase (Just idx) suffix =
165
  printf "%s_%s_%s_%s" (phasePrefix phase) (levelPrefix level) idx suffix
166

    
167
-- | Print all the statistics for given level and phase.
168
printStats :: Int            -- ^ Verbosity level
169
           -> Bool           -- ^ If the output should be machine readable
170
           -> Level          -- ^ Level on which we are printing
171
           -> Phase          -- ^ Current phase of simulation
172
           -> [String]       -- ^ Values to print
173
           -> Maybe String   -- ^ Additional data for groups
174
           -> IO ()
175
printStats _ True level phase values gidx = do
176
  let keys = map (prepareKey level phase gidx) (keysData level)
177
  printKeysHTC $ zip keys values
178

    
179
printStats verbose False level phase values name = do
180
  let prefix = phaseLevelDescr phase level name
181
      descr = descrData level
182
  unless (verbose == 0) $ do
183
    putStrLn ""
184
    putStr prefix
185
    mapM_ (\(a,b) -> printf "    %s: %s\n" a b) (zip descr values)
186

    
187
-- | Extract name or idx from group.
188
extractGroupData :: Bool -> Group.Group -> String
189
extractGroupData True grp = show $ Group.idx grp
190
extractGroupData False grp = Group.name grp
191

    
192
-- | Prepare values for group.
193
prepareGroupValues :: [Int] -> Double -> [String]
194
prepareGroupValues stats score =
195
  map show stats ++ [printf "%.8f" score]
196

    
197
-- | Prepare values for cluster.
198
prepareClusterValues :: Bool -> [Int] -> [Bool] -> [String]
199
prepareClusterValues machineread stats bstats =
200
  map show stats ++ map (printBool machineread) bstats
201

    
202
-- | Print all the statistics on a group level.
203
printGroupStats :: Int -> Bool -> Phase -> ((Group.Group, Double), [Int])
204
                -> IO ()
205
printGroupStats verbose machineread phase ((grp, score), stats) = do
206
  let values = prepareGroupValues stats score
207
      extradata = extractGroupData machineread grp
208
  printStats verbose machineread GroupLvl phase values (Just extradata)
209

    
210
-- | Print all the statistics on a cluster (global) level.
211
printClusterStats :: Int -> Bool -> Phase -> [Int] -> Bool -> IO ()
212
printClusterStats verbose machineread phase stats needhbal = do
213
  let values = prepareClusterValues machineread stats [needhbal]
214
  printStats verbose machineread ClusterLvl phase values Nothing
215

    
216
-- | Check if any of cluster metrics is non-zero.
217
clusterNeedsRebalance :: [Int] -> Bool
218
clusterNeedsRebalance stats = sum stats > 0
219

    
220
{- | Check group for N+1 hapiness, conflicts of primaries on nodes and
221
instances residing on offline nodes.
222

    
223
-}
224
perGroupChecks :: Group.List
225
               -> (Gdx, (Node.List, Instance.List))
226
               -> ((Group.Group, Double), [Int])
227
perGroupChecks gl (gidx, (nl, il)) =
228
  let grp = Container.find gidx gl
229
      offnl = filter Node.offline (Container.elems nl)
230
      n1violated = length $ fst $ Cluster.computeBadItems nl il
231
      conflicttags = length $ filter (>0)
232
                     (map Node.conflictingPrimaries (Container.elems nl))
233
      offline_pri = sum . map length $ map Node.pList offnl
234
      offline_sec = length $ map Node.sList offnl
235
      score = Cluster.compCV nl
236
      groupstats = [ n1violated
237
                   , conflicttags
238
                   , offline_pri
239
                   , offline_sec
240
                   ]
241
  in ((grp, score), groupstats)
242

    
243
-- | Use Hbal's iterateDepth to simulate group rebalance.
244
executeSimulation :: Options -> Cluster.Table -> Double
245
                  -> Gdx -> Node.List -> Instance.List
246
                  -> IO (Gdx, (Node.List, Instance.List))
247
executeSimulation opts ini_tbl min_cv gidx nl il = do
248
  let imlen = maximum . map (length . Instance.alias) $ Container.elems il
249
      nmlen = maximum . map (length . Node.alias) $ Container.elems nl
250

    
251
  (fin_tbl, _) <- Hbal.iterateDepth False ini_tbl
252
                                    (optMaxLength opts)
253
                                    (optDiskMoves opts)
254
                                    (optInstMoves opts)
255
                                    nmlen imlen [] min_cv
256
                                    (optMinGainLim opts) (optMinGain opts)
257
                                    (optEvacMode opts)
258

    
259
  let (Cluster.Table fin_nl fin_il _ _) = fin_tbl
260
  return (gidx, (fin_nl, fin_il))
261

    
262
-- | Simulate group rebalance if group's score is not good
263
maybeSimulateGroupRebalance :: Options -> (Gdx, (Node.List, Instance.List))
264
                            -> IO (Gdx, (Node.List, Instance.List))
265
maybeSimulateGroupRebalance opts (gidx, (nl, il)) = do
266
  let ini_cv = Cluster.compCV nl
267
      ini_tbl = Cluster.Table nl il ini_cv []
268
      min_cv = optMinScore opts
269
  if ini_cv < min_cv
270
    then return (gidx, (nl, il))
271
    else executeSimulation opts ini_tbl min_cv gidx nl il
272

    
273
-- | Decide whether to simulate rebalance.
274
maybeSimulateRebalance :: Bool             -- ^ Whether to simulate rebalance
275
                       -> Options          -- ^ Command line options
276
                       -> [(Gdx, (Node.List, Instance.List))] -- ^ Group data
277
                       -> IO [(Gdx, (Node.List, Instance.List))]
278
maybeSimulateRebalance True opts cluster =
279
    mapM (maybeSimulateGroupRebalance opts) cluster
280
maybeSimulateRebalance False _ cluster = return cluster
281

    
282
-- | Prints the final @OK@ marker in machine readable output.
283
printFinalHTC :: Bool -> IO ()
284
printFinalHTC = printFinal htcPrefix
285

    
286
-- | Main function.
287
main :: Options -> [String] -> IO ()
288
main opts args = do
289
  unless (null args) $ do
290
         hPutStrLn stderr "Error: this program doesn't take any arguments."
291
         exitWith $ ExitFailure 1
292

    
293
  let verbose = optVerbose opts
294
      machineread = optMachineReadable opts
295
      nosimulation = optNoSimulation opts
296

    
297
  (ClusterData gl fixed_nl ilf _ _) <- loadExternalData opts
298
  nlf <- setNodeStatus opts fixed_nl
299

    
300
  let splitcluster = Cluster.splitCluster nlf ilf
301

    
302
  when machineread $ printGroupsMappings gl
303

    
304
  let groupsstats = map (perGroupChecks gl) splitcluster
305
      clusterstats = map sum . transpose . map snd $ groupsstats
306
      needrebalance = clusterNeedsRebalance clusterstats
307

    
308
  unless (verbose == 0 || machineread) $
309
    if nosimulation
310
      then putStrLn "Running in no-simulation mode."
311
      else if needrebalance
312
             then putStrLn "Cluster needs rebalancing."
313
             else putStrLn "No need to rebalance cluster, no problems found."
314

    
315
  mapM_ (printGroupStats verbose machineread Initial) groupsstats
316

    
317
  printClusterStats verbose machineread Initial clusterstats needrebalance
318

    
319
  let exitOK = nosimulation || not needrebalance
320
      simulate = not nosimulation && needrebalance
321

    
322
  rebalancedcluster <- maybeSimulateRebalance simulate opts splitcluster
323

    
324
  when (simulate || machineread) $ do
325
    let newgroupstats = map (perGroupChecks gl) rebalancedcluster
326
        newclusterstats = map sum . transpose . map snd $ newgroupstats
327
        newneedrebalance = clusterNeedsRebalance clusterstats
328

    
329
    mapM_ (printGroupStats verbose machineread Rebalanced) newgroupstats
330

    
331
    printClusterStats verbose machineread Rebalanced newclusterstats
332
                           newneedrebalance
333

    
334
  printFinalHTC machineread
335

    
336
  unless exitOK $ exitWith $ ExitFailure 1