Statistics
| Branch: | Tag: | Revision:

root / htools / Ganeti / HTools / Program / Hcheck.hs @ 42834645

History | View | Annotate | Download (11.6 kB)

1
{-| Cluster checker.
2

    
3
-}
4

    
5
{-
6

    
7
Copyright (C) 2012 Google Inc.
8

    
9
This program is free software; you can redistribute it and/or modify
10
it under the terms of the GNU General Public License as published by
11
the Free Software Foundation; either version 2 of the License, or
12
(at your option) any later version.
13

    
14
This program is distributed in the hope that it will be useful, but
15
WITHOUT ANY WARRANTY; without even the implied warranty of
16
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17
General Public License for more details.
18

    
19
You should have received a copy of the GNU Gene52al Public License
20
along with this program; if not, write to the Free Software
21
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22
02110-1301, USA.
23

    
24
-}
25

    
26
module Ganeti.HTools.Program.Hcheck (main, options) where
27

    
28
import Control.Monad
29
import Data.List (transpose)
30
import System.Exit
31
import System.IO
32
import Text.Printf (printf)
33

    
34
import qualified Ganeti.HTools.Container as Container
35
import qualified Ganeti.HTools.Cluster as Cluster
36
import qualified Ganeti.HTools.Group as Group
37
import qualified Ganeti.HTools.Node as Node
38
import qualified Ganeti.HTools.Instance as Instance
39

    
40
import qualified Ganeti.HTools.Program.Hbal as Hbal
41

    
42
import Ganeti.HTools.CLI
43
import Ganeti.HTools.ExtLoader
44
import Ganeti.HTools.Loader
45
import Ganeti.HTools.Types
46

    
47
-- | Options list and functions.
48
options :: [OptType]
49
options =
50
  [ oDataFile
51
  , oDiskMoves
52
  , oDynuFile
53
  , oEvacMode
54
  , oExInst
55
  , oExTags
56
  , oIAllocSrc
57
  , oInstMoves
58
  , oLuxiSocket
59
  , oMachineReadable
60
  , oMaxCpu
61
  , oMaxSolLength
62
  , oMinDisk
63
  , oMinGain
64
  , oMinGainLim
65
  , oMinScore
66
  , oNoSimulation
67
  , oOfflineNode
68
  , oQuiet
69
  , oRapiMaster
70
  , oSelInst
71
  , oVerbose
72
  ]
73

    
74
-- | Check phase - are we before (initial) or after rebalance.
75
data Phase = Initial
76
           | Rebalanced
77

    
78
-- | Level of presented statistics.
79
data Level = GroupLvl
80
           | ClusterLvl
81

    
82
-- | A type alias for a group index and node\/instance lists.
83
type GroupInfo = (Gdx, (Node.List, Instance.List))
84

    
85
-- | A type alias for group stats.
86
type GroupStats = ((Group.Group, Double), [Int])
87

    
88
-- | Prefix for machine readable names.
89
htcPrefix :: String
90
htcPrefix = "HCHECK"
91

    
92
-- | Data showed both per group and per cluster.
93
commonData :: [(String, String)]
94
commonData =[ ("N1_FAIL", "Nodes not N+1 happy")
95
            , ("CONFLICT_TAGS", "Nodes with conflicting instances")
96
            , ("OFFLINE_PRI", "Instances having the primary node offline")
97
            , ("OFFLINE_SEC", "Instances having a secondary node offline")
98
            ]
99

    
100
-- | Data showed per group.
101
groupData :: [(String, String)]
102
groupData = commonData ++ [("SCORE", "Group score")]
103

    
104
-- | Data showed per cluster.
105
clusterData :: [(String, String)]
106
clusterData = commonData ++
107
              [ ("NEED_REBALANCE", "Cluster is not healthy") ]
108

    
109
-- | Phase-specific prefix for machine readable version.
110
phasePrefix :: Phase -> String
111
phasePrefix Initial = "INIT"
112
phasePrefix Rebalanced = "FINAL"
113

    
114
-- | Level-specific prefix for machine readable version.
115
levelPrefix :: Level -> String
116
levelPrefix GroupLvl = "GROUP"
117
levelPrefix ClusterLvl = "CLUSTER"
118

    
119
-- | Machine-readable keys to show depending on given level.
120
keysData :: Level -> [String]
121
keysData GroupLvl = map fst groupData
122
keysData ClusterLvl = map fst clusterData
123

    
124
-- | Description of phases for human readable version.
125
phaseDescr :: Phase -> String
126
phaseDescr Initial = "initially"
127
phaseDescr Rebalanced = "after rebalancing"
128

    
129
-- | Description to show depending on given level.
130
descrData :: Level -> [String]
131
descrData GroupLvl = map snd groupData
132
descrData ClusterLvl = map snd clusterData
133

    
134
-- | Human readable prefix for statistics.
135
phaseLevelDescr :: Phase -> Level -> Maybe String -> String
136
phaseLevelDescr phase GroupLvl (Just name) =
137
    printf "Statistics for group %s %s\n" name $ phaseDescr phase
138
phaseLevelDescr phase GroupLvl Nothing =
139
    printf "Statistics for group %s\n" $ phaseDescr phase
140
phaseLevelDescr phase ClusterLvl _ =
141
    printf "Cluster statistics %s\n" $ phaseDescr phase
142

    
143
-- | Format a list of key, value as a shell fragment.
144
printKeysHTC :: [(String, String)] -> IO ()
145
printKeysHTC = printKeys htcPrefix
146

    
147
-- | Prepare string from boolean value.
148
printBool :: Bool    -- ^ Whether the result should be machine readable
149
          -> Bool    -- ^ Value to be converted to string
150
          -> String
151
printBool True True = "1"
152
printBool True False = "0"
153
printBool False b = show b
154

    
155
-- | Print mapping from group idx to group uuid (only in machine
156
-- readable mode).
157
printGroupsMappings :: Group.List -> IO ()
158
printGroupsMappings gl = do
159
    let extract_vals g = (printf "GROUP_UUID_%d" $ Group.idx g :: String,
160
                          Group.uuid g)
161
        printpairs = map extract_vals (Container.elems gl)
162
    printKeysHTC printpairs
163

    
164
-- | Prepare a single key given a certain level and phase of simulation.
165
prepareKey :: Level -> Phase -> Maybe String -> String -> String
166
prepareKey level phase Nothing suffix =
167
  printf "%s_%s_%s" (phasePrefix phase) (levelPrefix level) suffix
168
prepareKey level phase (Just idx) suffix =
169
  printf "%s_%s_%s_%s" (phasePrefix phase) (levelPrefix level) idx suffix
170

    
171
-- | Print all the statistics for given level and phase.
172
printStats :: Int            -- ^ Verbosity level
173
           -> Bool           -- ^ If the output should be machine readable
174
           -> Level          -- ^ Level on which we are printing
175
           -> Phase          -- ^ Current phase of simulation
176
           -> [String]       -- ^ Values to print
177
           -> Maybe String   -- ^ Additional data for groups
178
           -> IO ()
179
printStats _ True level phase values gidx = do
180
  let keys = map (prepareKey level phase gidx) (keysData level)
181
  printKeysHTC $ zip keys values
182

    
183
printStats verbose False level phase values name = do
184
  let prefix = phaseLevelDescr phase level name
185
      descr = descrData level
186
  unless (verbose == 0) $ do
187
    putStrLn ""
188
    putStr prefix
189
    mapM_ (uncurry (printf "    %s: %s\n")) (zip descr values)
190

    
191
-- | Extract name or idx from group.
192
extractGroupData :: Bool -> Group.Group -> String
193
extractGroupData True grp = show $ Group.idx grp
194
extractGroupData False grp = Group.name grp
195

    
196
-- | Prepare values for group.
197
prepareGroupValues :: [Int] -> Double -> [String]
198
prepareGroupValues stats score =
199
  map show stats ++ [printf "%.8f" score]
200

    
201
-- | Prepare values for cluster.
202
prepareClusterValues :: Bool -> [Int] -> [Bool] -> [String]
203
prepareClusterValues machineread stats bstats =
204
  map show stats ++ map (printBool machineread) bstats
205

    
206
-- | Print all the statistics on a group level.
207
printGroupStats :: Int -> Bool -> Phase -> GroupStats -> IO ()
208
printGroupStats verbose machineread phase ((grp, score), stats) = do
209
  let values = prepareGroupValues stats score
210
      extradata = extractGroupData machineread grp
211
  printStats verbose machineread GroupLvl phase values (Just extradata)
212

    
213
-- | Print all the statistics on a cluster (global) level.
214
printClusterStats :: Int -> Bool -> Phase -> [Int] -> Bool -> IO ()
215
printClusterStats verbose machineread phase stats needhbal = do
216
  let values = prepareClusterValues machineread stats [needhbal]
217
  printStats verbose machineread ClusterLvl phase values Nothing
218

    
219
-- | Check if any of cluster metrics is non-zero.
220
clusterNeedsRebalance :: [Int] -> Bool
221
clusterNeedsRebalance stats = sum stats > 0
222

    
223
{- | Check group for N+1 hapiness, conflicts of primaries on nodes and
224
instances residing on offline nodes.
225

    
226
-}
227
perGroupChecks :: Group.List -> GroupInfo -> GroupStats
228
perGroupChecks gl (gidx, (nl, il)) =
229
  let grp = Container.find gidx gl
230
      offnl = filter Node.offline (Container.elems nl)
231
      n1violated = length . fst $ Cluster.computeBadItems nl il
232
      conflicttags = length $ filter (>0)
233
                     (map Node.conflictingPrimaries (Container.elems nl))
234
      offline_pri = sum . map length $ map Node.pList offnl
235
      offline_sec = length $ map Node.sList offnl
236
      score = Cluster.compCV nl
237
      groupstats = [ n1violated
238
                   , conflicttags
239
                   , offline_pri
240
                   , offline_sec
241
                   ]
242
  in ((grp, score), groupstats)
243

    
244
-- | Use Hbal's iterateDepth to simulate group rebalance.
245
executeSimulation :: Options -> Cluster.Table -> Double
246
                  -> Gdx -> Node.List -> Instance.List
247
                  -> IO GroupInfo
248
executeSimulation opts ini_tbl min_cv gidx nl il = do
249
  let imlen = maximum . map (length . Instance.alias) $ Container.elems il
250
      nmlen = maximum . map (length . Node.alias) $ Container.elems nl
251

    
252
  (fin_tbl, _) <- Hbal.iterateDepth False ini_tbl
253
                                    (optMaxLength opts)
254
                                    (optDiskMoves opts)
255
                                    (optInstMoves opts)
256
                                    nmlen imlen [] min_cv
257
                                    (optMinGainLim opts) (optMinGain opts)
258
                                    (optEvacMode opts)
259

    
260
  let (Cluster.Table fin_nl fin_il _ _) = fin_tbl
261
  return (gidx, (fin_nl, fin_il))
262

    
263
-- | Simulate group rebalance if group's score is not good
264
maybeSimulateGroupRebalance :: Options -> GroupInfo -> IO GroupInfo
265
maybeSimulateGroupRebalance opts (gidx, (nl, il)) = do
266
  let ini_cv = Cluster.compCV nl
267
      ini_tbl = Cluster.Table nl il ini_cv []
268
      min_cv = optMinScore opts
269
  if ini_cv < min_cv
270
    then return (gidx, (nl, il))
271
    else executeSimulation opts ini_tbl min_cv gidx nl il
272

    
273
-- | Decide whether to simulate rebalance.
274
maybeSimulateRebalance :: Bool             -- ^ Whether to simulate rebalance
275
                       -> Options          -- ^ Command line options
276
                       -> [GroupInfo]      -- ^ Group data
277
                       -> IO [GroupInfo]
278
maybeSimulateRebalance True opts cluster =
279
    mapM (maybeSimulateGroupRebalance opts) cluster
280
maybeSimulateRebalance False _ cluster = return cluster
281

    
282
-- | Prints the final @OK@ marker in machine readable output.
283
printFinalHTC :: Bool -> IO ()
284
printFinalHTC = printFinal htcPrefix
285

    
286
-- | Main function.
287
main :: Options -> [String] -> IO ()
288
main opts args = do
289
  unless (null args) $ do
290
         hPutStrLn stderr "Error: this program doesn't take any arguments."
291
         exitWith $ ExitFailure 1
292

    
293
  let verbose = optVerbose opts
294
      machineread = optMachineReadable opts
295
      nosimulation = optNoSimulation opts
296

    
297
  (ClusterData gl fixed_nl ilf _ _) <- loadExternalData opts
298
  nlf <- setNodeStatus opts fixed_nl
299

    
300
  let splitcluster = Cluster.splitCluster nlf ilf
301

    
302
  when machineread $ printGroupsMappings gl
303

    
304
  let groupsstats = map (perGroupChecks gl) splitcluster
305
      clusterstats = map sum . transpose . map snd $ groupsstats
306
      needrebalance = clusterNeedsRebalance clusterstats
307

    
308
  unless (verbose == 0 || machineread) $
309
    if nosimulation
310
      then putStrLn "Running in no-simulation mode."
311
      else if needrebalance
312
             then putStrLn "Cluster needs rebalancing."
313
             else putStrLn "No need to rebalance cluster, no problems found."
314

    
315
  mapM_ (printGroupStats verbose machineread Initial) groupsstats
316

    
317
  printClusterStats verbose machineread Initial clusterstats needrebalance
318

    
319
  let exitOK = nosimulation || not needrebalance
320
      simulate = not nosimulation && needrebalance
321

    
322
  rebalancedcluster <- maybeSimulateRebalance simulate opts splitcluster
323

    
324
  when (simulate || machineread) $ do
325
    let newgroupstats = map (perGroupChecks gl) rebalancedcluster
326
        newclusterstats = map sum . transpose . map snd $ newgroupstats
327
        newneedrebalance = clusterNeedsRebalance clusterstats
328

    
329
    mapM_ (printGroupStats verbose machineread Rebalanced) newgroupstats
330

    
331
    printClusterStats verbose machineread Rebalanced newclusterstats
332
                           newneedrebalance
333

    
334
  printFinalHTC machineread
335

    
336
  unless exitOK . exitWith $ ExitFailure 1