Statistics
| Branch: | Tag: | Revision:

root / htools / Ganeti / HTools / Program / Hcheck.hs @ 66ad857a

History | View | Annotate | Download (11.7 kB)

1
{-| Cluster checker.
2

    
3
-}
4

    
5
{-
6

    
7
Copyright (C) 2012 Google Inc.
8

    
9
This program is free software; you can redistribute it and/or modify
10
it under the terms of the GNU General Public License as published by
11
the Free Software Foundation; either version 2 of the License, or
12
(at your option) any later version.
13

    
14
This program is distributed in the hope that it will be useful, but
15
WITHOUT ANY WARRANTY; without even the implied warranty of
16
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17
General Public License for more details.
18

    
19
You should have received a copy of the GNU Gene52al Public License
20
along with this program; if not, write to the Free Software
21
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22
02110-1301, USA.
23

    
24
-}
25

    
26
module Ganeti.HTools.Program.Hcheck
27
  ( main
28
  , options
29
  , arguments
30
  ) where
31

    
32
import Control.Monad
33
import Data.List (transpose)
34
import System.Exit
35
import Text.Printf (printf)
36

    
37
import qualified Ganeti.HTools.Container as Container
38
import qualified Ganeti.HTools.Cluster as Cluster
39
import qualified Ganeti.HTools.Group as Group
40
import qualified Ganeti.HTools.Node as Node
41
import qualified Ganeti.HTools.Instance as Instance
42

    
43
import qualified Ganeti.HTools.Program.Hbal as Hbal
44

    
45
import Ganeti.Common
46
import Ganeti.HTools.CLI
47
import Ganeti.HTools.ExtLoader
48
import Ganeti.HTools.Loader
49
import Ganeti.HTools.Types
50
import Ganeti.Utils
51

    
52
-- | Options list and functions.
53
options :: [OptType]
54
options =
55
  [ oDataFile
56
  , oDiskMoves
57
  , oDynuFile
58
  , oEvacMode
59
  , oExInst
60
  , oExTags
61
  , oIAllocSrc
62
  , oInstMoves
63
  , oLuxiSocket
64
  , oMachineReadable
65
  , oMaxCpu
66
  , oMaxSolLength
67
  , oMinDisk
68
  , oMinGain
69
  , oMinGainLim
70
  , oMinScore
71
  , oNoSimulation
72
  , oOfflineNode
73
  , oQuiet
74
  , oRapiMaster
75
  , oSelInst
76
  , oVerbose
77
  ]
78

    
79
-- | The list of arguments supported by the program.
80
arguments :: [ArgCompletion]
81
arguments = []
82

    
83
-- | Check phase - are we before (initial) or after rebalance.
84
data Phase = Initial
85
           | Rebalanced
86

    
87
-- | Level of presented statistics.
88
data Level = GroupLvl
89
           | ClusterLvl
90

    
91
-- | A type alias for a group index and node\/instance lists.
92
type GroupInfo = (Gdx, (Node.List, Instance.List))
93

    
94
-- | A type alias for group stats.
95
type GroupStats = ((Group.Group, Double), [Int])
96

    
97
-- | Prefix for machine readable names.
98
htcPrefix :: String
99
htcPrefix = "HCHECK"
100

    
101
-- | Data showed both per group and per cluster.
102
commonData :: [(String, String)]
103
commonData =[ ("N1_FAIL", "Nodes not N+1 happy")
104
            , ("CONFLICT_TAGS", "Nodes with conflicting instances")
105
            , ("OFFLINE_PRI", "Instances having the primary node offline")
106
            , ("OFFLINE_SEC", "Instances having a secondary node offline")
107
            ]
108

    
109
-- | Data showed per group.
110
groupData :: [(String, String)]
111
groupData = commonData ++ [("SCORE", "Group score")]
112

    
113
-- | Data showed per cluster.
114
clusterData :: [(String, String)]
115
clusterData = commonData ++
116
              [ ("NEED_REBALANCE", "Cluster is not healthy") ]
117

    
118
-- | Phase-specific prefix for machine readable version.
119
phasePrefix :: Phase -> String
120
phasePrefix Initial = "INIT"
121
phasePrefix Rebalanced = "FINAL"
122

    
123
-- | Level-specific prefix for machine readable version.
124
levelPrefix :: Level -> String
125
levelPrefix GroupLvl = "GROUP"
126
levelPrefix ClusterLvl = "CLUSTER"
127

    
128
-- | Machine-readable keys to show depending on given level.
129
keysData :: Level -> [String]
130
keysData GroupLvl = map fst groupData
131
keysData ClusterLvl = map fst clusterData
132

    
133
-- | Description of phases for human readable version.
134
phaseDescr :: Phase -> String
135
phaseDescr Initial = "initially"
136
phaseDescr Rebalanced = "after rebalancing"
137

    
138
-- | Description to show depending on given level.
139
descrData :: Level -> [String]
140
descrData GroupLvl = map snd groupData
141
descrData ClusterLvl = map snd clusterData
142

    
143
-- | Human readable prefix for statistics.
144
phaseLevelDescr :: Phase -> Level -> Maybe String -> String
145
phaseLevelDescr phase GroupLvl (Just name) =
146
    printf "Statistics for group %s %s\n" name $ phaseDescr phase
147
phaseLevelDescr phase GroupLvl Nothing =
148
    printf "Statistics for group %s\n" $ phaseDescr phase
149
phaseLevelDescr phase ClusterLvl _ =
150
    printf "Cluster statistics %s\n" $ phaseDescr phase
151

    
152
-- | Format a list of key, value as a shell fragment.
153
printKeysHTC :: [(String, String)] -> IO ()
154
printKeysHTC = printKeys htcPrefix
155

    
156
-- | Prepare string from boolean value.
157
printBool :: Bool    -- ^ Whether the result should be machine readable
158
          -> Bool    -- ^ Value to be converted to string
159
          -> String
160
printBool True True = "1"
161
printBool True False = "0"
162
printBool False b = show b
163

    
164
-- | Print mapping from group idx to group uuid (only in machine
165
-- readable mode).
166
printGroupsMappings :: Group.List -> IO ()
167
printGroupsMappings gl = do
168
    let extract_vals g = (printf "GROUP_UUID_%d" $ Group.idx g :: String,
169
                          Group.uuid g)
170
        printpairs = map extract_vals (Container.elems gl)
171
    printKeysHTC printpairs
172

    
173
-- | Prepare a single key given a certain level and phase of simulation.
174
prepareKey :: Level -> Phase -> Maybe String -> String -> String
175
prepareKey level phase Nothing suffix =
176
  printf "%s_%s_%s" (phasePrefix phase) (levelPrefix level) suffix
177
prepareKey level phase (Just idx) suffix =
178
  printf "%s_%s_%s_%s" (phasePrefix phase) (levelPrefix level) idx suffix
179

    
180
-- | Print all the statistics for given level and phase.
181
printStats :: Int            -- ^ Verbosity level
182
           -> Bool           -- ^ If the output should be machine readable
183
           -> Level          -- ^ Level on which we are printing
184
           -> Phase          -- ^ Current phase of simulation
185
           -> [String]       -- ^ Values to print
186
           -> Maybe String   -- ^ Additional data for groups
187
           -> IO ()
188
printStats _ True level phase values gidx = do
189
  let keys = map (prepareKey level phase gidx) (keysData level)
190
  printKeysHTC $ zip keys values
191

    
192
printStats verbose False level phase values name = do
193
  let prefix = phaseLevelDescr phase level name
194
      descr = descrData level
195
  unless (verbose == 0) $ do
196
    putStrLn ""
197
    putStr prefix
198
    mapM_ (uncurry (printf "    %s: %s\n")) (zip descr values)
199

    
200
-- | Extract name or idx from group.
201
extractGroupData :: Bool -> Group.Group -> String
202
extractGroupData True grp = show $ Group.idx grp
203
extractGroupData False grp = Group.name grp
204

    
205
-- | Prepare values for group.
206
prepareGroupValues :: [Int] -> Double -> [String]
207
prepareGroupValues stats score =
208
  map show stats ++ [printf "%.8f" score]
209

    
210
-- | Prepare values for cluster.
211
prepareClusterValues :: Bool -> [Int] -> [Bool] -> [String]
212
prepareClusterValues machineread stats bstats =
213
  map show stats ++ map (printBool machineread) bstats
214

    
215
-- | Print all the statistics on a group level.
216
printGroupStats :: Int -> Bool -> Phase -> GroupStats -> IO ()
217
printGroupStats verbose machineread phase ((grp, score), stats) = do
218
  let values = prepareGroupValues stats score
219
      extradata = extractGroupData machineread grp
220
  printStats verbose machineread GroupLvl phase values (Just extradata)
221

    
222
-- | Print all the statistics on a cluster (global) level.
223
printClusterStats :: Int -> Bool -> Phase -> [Int] -> Bool -> IO ()
224
printClusterStats verbose machineread phase stats needhbal = do
225
  let values = prepareClusterValues machineread stats [needhbal]
226
  printStats verbose machineread ClusterLvl phase values Nothing
227

    
228
-- | Check if any of cluster metrics is non-zero.
229
clusterNeedsRebalance :: [Int] -> Bool
230
clusterNeedsRebalance stats = sum stats > 0
231

    
232
{- | Check group for N+1 hapiness, conflicts of primaries on nodes and
233
instances residing on offline nodes.
234

    
235
-}
236
perGroupChecks :: Group.List -> GroupInfo -> GroupStats
237
perGroupChecks gl (gidx, (nl, il)) =
238
  let grp = Container.find gidx gl
239
      offnl = filter Node.offline (Container.elems nl)
240
      n1violated = length . fst $ Cluster.computeBadItems nl il
241
      conflicttags = length $ filter (>0)
242
                     (map Node.conflictingPrimaries (Container.elems nl))
243
      offline_pri = sum . map length $ map Node.pList offnl
244
      offline_sec = length $ map Node.sList offnl
245
      score = Cluster.compCV nl
246
      groupstats = [ n1violated
247
                   , conflicttags
248
                   , offline_pri
249
                   , offline_sec
250
                   ]
251
  in ((grp, score), groupstats)
252

    
253
-- | Use Hbal's iterateDepth to simulate group rebalance.
254
executeSimulation :: Options -> Cluster.Table -> Double
255
                  -> Gdx -> Node.List -> Instance.List
256
                  -> IO GroupInfo
257
executeSimulation opts ini_tbl min_cv gidx nl il = do
258
  let imlen = maximum . map (length . Instance.alias) $ Container.elems il
259
      nmlen = maximum . map (length . Node.alias) $ Container.elems nl
260

    
261
  (fin_tbl, _) <- Hbal.iterateDepth False ini_tbl
262
                                    (optMaxLength opts)
263
                                    (optDiskMoves opts)
264
                                    (optInstMoves opts)
265
                                    nmlen imlen [] min_cv
266
                                    (optMinGainLim opts) (optMinGain opts)
267
                                    (optEvacMode opts)
268

    
269
  let (Cluster.Table fin_nl fin_il _ _) = fin_tbl
270
  return (gidx, (fin_nl, fin_il))
271

    
272
-- | Simulate group rebalance if group's score is not good
273
maybeSimulateGroupRebalance :: Options -> GroupInfo -> IO GroupInfo
274
maybeSimulateGroupRebalance opts (gidx, (nl, il)) = do
275
  let ini_cv = Cluster.compCV nl
276
      ini_tbl = Cluster.Table nl il ini_cv []
277
      min_cv = optMinScore opts
278
  if ini_cv < min_cv
279
    then return (gidx, (nl, il))
280
    else executeSimulation opts ini_tbl min_cv gidx nl il
281

    
282
-- | Decide whether to simulate rebalance.
283
maybeSimulateRebalance :: Bool             -- ^ Whether to simulate rebalance
284
                       -> Options          -- ^ Command line options
285
                       -> [GroupInfo]      -- ^ Group data
286
                       -> IO [GroupInfo]
287
maybeSimulateRebalance True opts cluster =
288
    mapM (maybeSimulateGroupRebalance opts) cluster
289
maybeSimulateRebalance False _ cluster = return cluster
290

    
291
-- | Prints the final @OK@ marker in machine readable output.
292
printFinalHTC :: Bool -> IO ()
293
printFinalHTC = printFinal htcPrefix
294

    
295
-- | Main function.
296
main :: Options -> [String] -> IO ()
297
main opts args = do
298
  unless (null args) $ exitErr "This program doesn't take any arguments."
299

    
300
  let verbose = optVerbose opts
301
      machineread = optMachineReadable opts
302
      nosimulation = optNoSimulation opts
303

    
304
  (ClusterData gl fixed_nl ilf _ _) <- loadExternalData opts
305
  nlf <- setNodeStatus opts fixed_nl
306

    
307
  let splitcluster = Cluster.splitCluster nlf ilf
308

    
309
  when machineread $ printGroupsMappings gl
310

    
311
  let groupsstats = map (perGroupChecks gl) splitcluster
312
      clusterstats = map sum . transpose . map snd $ groupsstats
313
      needrebalance = clusterNeedsRebalance clusterstats
314

    
315
  unless (verbose == 0 || machineread) .
316
    putStrLn $ if nosimulation
317
                 then "Running in no-simulation mode."
318
                 else if needrebalance
319
                        then "Cluster needs rebalancing."
320
                        else "No need to rebalance cluster, no problems found."
321

    
322
  mapM_ (printGroupStats verbose machineread Initial) groupsstats
323

    
324
  printClusterStats verbose machineread Initial clusterstats needrebalance
325

    
326
  let exitOK = nosimulation || not needrebalance
327
      simulate = not nosimulation && needrebalance
328

    
329
  rebalancedcluster <- maybeSimulateRebalance simulate opts splitcluster
330

    
331
  when (simulate || machineread) $ do
332
    let newgroupstats = map (perGroupChecks gl) rebalancedcluster
333
        newclusterstats = map sum . transpose . map snd $ newgroupstats
334
        newneedrebalance = clusterNeedsRebalance clusterstats
335

    
336
    mapM_ (printGroupStats verbose machineread Rebalanced) newgroupstats
337

    
338
    printClusterStats verbose machineread Rebalanced newclusterstats
339
                           newneedrebalance
340

    
341
  printFinalHTC machineread
342

    
343
  unless exitOK . exitWith $ ExitFailure 1