Statistics
| Branch: | Tag: | Revision:

root / src / Ganeti / HTools / Program / Hcheck.hs @ bfa99f7a

History | View | Annotate | Download (11.6 kB)

1
{-| Cluster checker.
2

    
3
-}
4

    
5
{-
6

    
7
Copyright (C) 2012, 2013 Google Inc.
8

    
9
This program is free software; you can redistribute it and/or modify
10
it under the terms of the GNU General Public License as published by
11
the Free Software Foundation; either version 2 of the License, or
12
(at your option) any later version.
13

    
14
This program is distributed in the hope that it will be useful, but
15
WITHOUT ANY WARRANTY; without even the implied warranty of
16
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17
General Public License for more details.
18

    
19
You should have received a copy of the GNU Gene52al Public License
20
along with this program; if not, write to the Free Software
21
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22
02110-1301, USA.
23

    
24
-}
25

    
26
module Ganeti.HTools.Program.Hcheck
27
  ( main
28
  , options
29
  , arguments
30
  ) where
31

    
32
import Control.Monad
33
import Data.List (transpose)
34
import System.Exit
35
import Text.Printf (printf)
36

    
37
import qualified Ganeti.HTools.Container as Container
38
import qualified Ganeti.HTools.Cluster as Cluster
39
import qualified Ganeti.HTools.Group as Group
40
import qualified Ganeti.HTools.Node as Node
41
import qualified Ganeti.HTools.Instance as Instance
42

    
43
import qualified Ganeti.HTools.Program.Hbal as Hbal
44

    
45
import Ganeti.Common
46
import Ganeti.HTools.CLI
47
import Ganeti.HTools.ExtLoader
48
import Ganeti.HTools.Loader
49
import Ganeti.HTools.Types
50
import Ganeti.Utils
51

    
52
-- | Options list and functions.
53
options :: IO [OptType]
54
options = do
55
  luxi <- oLuxiSocket
56
  return
57
    [ oDataFile
58
    , oDiskMoves
59
    , oDynuFile
60
    , oEvacMode
61
    , oExInst
62
    , oExTags
63
    , oIAllocSrc
64
    , oInstMoves
65
    , luxi
66
    , oMachineReadable
67
    , oMaxCpu
68
    , oMaxSolLength
69
    , oMinDisk
70
    , oMinGain
71
    , oMinGainLim
72
    , oMinScore
73
    , oNoSimulation
74
    , oOfflineNode
75
    , oQuiet
76
    , oRapiMaster
77
    , oSelInst
78
    , oVerbose
79
    ]
80

    
81
-- | The list of arguments supported by the program.
82
arguments :: [ArgCompletion]
83
arguments = []
84

    
85
-- | Check phase - are we before (initial) or after rebalance.
86
data Phase = Initial
87
           | Rebalanced
88

    
89
-- | Level of presented statistics.
90
data Level = GroupLvl String -- ^ Group level, with name
91
           | ClusterLvl      -- ^ Cluster level
92

    
93
-- | A type alias for a group index and node\/instance lists.
94
type GroupInfo = (Gdx, (Node.List, Instance.List))
95

    
96
-- | A type alias for group stats.
97
type GroupStats = ((Group.Group, Double), [Int])
98

    
99
-- | Prefix for machine readable names.
100
htcPrefix :: String
101
htcPrefix = "HCHECK"
102

    
103
-- | Data showed both per group and per cluster.
104
commonData :: [(String, String)]
105
commonData =[ ("N1_FAIL", "Nodes not N+1 happy")
106
            , ("CONFLICT_TAGS", "Nodes with conflicting instances")
107
            , ("OFFLINE_PRI", "Instances having the primary node offline")
108
            , ("OFFLINE_SEC", "Instances having a secondary node offline")
109
            ]
110

    
111
-- | Data showed per group.
112
groupData :: [(String, String)]
113
groupData = commonData ++ [("SCORE", "Group score")]
114

    
115
-- | Data showed per cluster.
116
clusterData :: [(String, String)]
117
clusterData = commonData ++
118
              [ ("NEED_REBALANCE", "Cluster is not healthy") ]
119

    
120
-- | Phase-specific prefix for machine readable version.
121
phasePrefix :: Phase -> String
122
phasePrefix Initial = "INIT"
123
phasePrefix Rebalanced = "FINAL"
124

    
125
-- | Level-specific prefix for machine readable version.
126
levelPrefix :: Level -> String
127
levelPrefix GroupLvl {} = "GROUP"
128
levelPrefix ClusterLvl  = "CLUSTER"
129

    
130
-- | Machine-readable keys to show depending on given level.
131
keysData :: Level -> [String]
132
keysData GroupLvl {} = map fst groupData
133
keysData ClusterLvl  = map fst clusterData
134

    
135
-- | Description of phases for human readable version.
136
phaseDescr :: Phase -> String
137
phaseDescr Initial = "initially"
138
phaseDescr Rebalanced = "after rebalancing"
139

    
140
-- | Description to show depending on given level.
141
descrData :: Level -> [String]
142
descrData GroupLvl {} = map snd groupData
143
descrData ClusterLvl  = map snd clusterData
144

    
145
-- | Human readable prefix for statistics.
146
phaseLevelDescr :: Phase -> Level -> String
147
phaseLevelDescr phase (GroupLvl name) =
148
    printf "Statistics for group %s %s\n" name $ phaseDescr phase
149
phaseLevelDescr phase ClusterLvl =
150
    printf "Cluster statistics %s\n" $ phaseDescr phase
151

    
152
-- | Format a list of key, value as a shell fragment.
153
printKeysHTC :: [(String, String)] -> IO ()
154
printKeysHTC = printKeys htcPrefix
155

    
156
-- | Prepare string from boolean value.
157
printBool :: Bool    -- ^ Whether the result should be machine readable
158
          -> Bool    -- ^ Value to be converted to string
159
          -> String
160
printBool True True = "1"
161
printBool True False = "0"
162
printBool False b = show b
163

    
164
-- | Print mapping from group idx to group uuid (only in machine
165
-- readable mode).
166
printGroupsMappings :: Group.List -> IO ()
167
printGroupsMappings gl = do
168
    let extract_vals g = (printf "GROUP_UUID_%d" $ Group.idx g :: String,
169
                          Group.uuid g)
170
        printpairs = map extract_vals (Container.elems gl)
171
    printKeysHTC printpairs
172

    
173
-- | Prepare a single key given a certain level and phase of simulation.
174
prepareKey :: Level -> Phase -> String -> String
175
prepareKey level@ClusterLvl phase suffix =
176
  printf "%s_%s_%s" (phasePrefix phase) (levelPrefix level) suffix
177
prepareKey level@(GroupLvl idx) phase suffix =
178
  printf "%s_%s_%s_%s" (phasePrefix phase) (levelPrefix level) idx suffix
179

    
180
-- | Print all the statistics for given level and phase.
181
printStats :: Int            -- ^ Verbosity level
182
           -> Bool           -- ^ If the output should be machine readable
183
           -> Level          -- ^ Level on which we are printing
184
           -> Phase          -- ^ Current phase of simulation
185
           -> [String]       -- ^ Values to print
186
           -> IO ()
187
printStats _ True level phase values = do
188
  let keys = map (prepareKey level phase) (keysData level)
189
  printKeysHTC $ zip keys values
190

    
191
printStats verbose False level phase values = do
192
  let prefix = phaseLevelDescr phase level
193
      descr = descrData level
194
  unless (verbose < 1) $ do
195
    putStrLn ""
196
    putStr prefix
197
    mapM_ (uncurry (printf "    %s: %s\n")) (zip descr values)
198

    
199
-- | Extract name or idx from group.
200
extractGroupData :: Bool -> Group.Group -> String
201
extractGroupData True grp = show $ Group.idx grp
202
extractGroupData False grp = Group.name grp
203

    
204
-- | Prepare values for group.
205
prepareGroupValues :: [Int] -> Double -> [String]
206
prepareGroupValues stats score =
207
  map show stats ++ [printf "%.8f" score]
208

    
209
-- | Prepare values for cluster.
210
prepareClusterValues :: Bool -> [Int] -> [Bool] -> [String]
211
prepareClusterValues machineread stats bstats =
212
  map show stats ++ map (printBool machineread) bstats
213

    
214
-- | Print all the statistics on a group level.
215
printGroupStats :: Int -> Bool -> Phase -> GroupStats -> IO ()
216
printGroupStats verbose machineread phase ((grp, score), stats) = do
217
  let values = prepareGroupValues stats score
218
      extradata = extractGroupData machineread grp
219
  printStats verbose machineread (GroupLvl extradata) phase values
220

    
221
-- | Print all the statistics on a cluster (global) level.
222
printClusterStats :: Int -> Bool -> Phase -> [Int] -> Bool -> IO ()
223
printClusterStats verbose machineread phase stats needhbal = do
224
  let values = prepareClusterValues machineread stats [needhbal]
225
  printStats verbose machineread ClusterLvl phase values
226

    
227
-- | Check if any of cluster metrics is non-zero.
228
clusterNeedsRebalance :: [Int] -> Bool
229
clusterNeedsRebalance stats = sum stats > 0
230

    
231
{- | Check group for N+1 hapiness, conflicts of primaries on nodes and
232
instances residing on offline nodes.
233

    
234
-}
235
perGroupChecks :: Group.List -> GroupInfo -> GroupStats
236
perGroupChecks gl (gidx, (nl, il)) =
237
  let grp = Container.find gidx gl
238
      offnl = filter Node.offline (Container.elems nl)
239
      n1violated = length . fst $ Cluster.computeBadItems nl il
240
      conflicttags = length $ filter (>0)
241
                     (map Node.conflictingPrimaries (Container.elems nl))
242
      offline_pri = sum . map length $ map Node.pList offnl
243
      offline_sec = length $ map Node.sList offnl
244
      score = Cluster.compCV nl
245
      groupstats = [ n1violated
246
                   , conflicttags
247
                   , offline_pri
248
                   , offline_sec
249
                   ]
250
  in ((grp, score), groupstats)
251

    
252
-- | Use Hbal's iterateDepth to simulate group rebalance.
253
executeSimulation :: Options -> Cluster.Table -> Double
254
                  -> Gdx -> Node.List -> Instance.List
255
                  -> IO GroupInfo
256
executeSimulation opts ini_tbl min_cv gidx nl il = do
257
  let imlen = maximum . map (length . Instance.alias) $ Container.elems il
258
      nmlen = maximum . map (length . Node.alias) $ Container.elems nl
259

    
260
  (fin_tbl, _) <- Hbal.iterateDepth False ini_tbl
261
                                    (optMaxLength opts)
262
                                    (optDiskMoves opts)
263
                                    (optInstMoves opts)
264
                                    nmlen imlen [] min_cv
265
                                    (optMinGainLim opts) (optMinGain opts)
266
                                    (optEvacMode opts)
267

    
268
  let (Cluster.Table fin_nl fin_il _ _) = fin_tbl
269
  return (gidx, (fin_nl, fin_il))
270

    
271
-- | Simulate group rebalance if group's score is not good
272
maybeSimulateGroupRebalance :: Options -> GroupInfo -> IO GroupInfo
273
maybeSimulateGroupRebalance opts (gidx, (nl, il)) = do
274
  let ini_cv = Cluster.compCV nl
275
      ini_tbl = Cluster.Table nl il ini_cv []
276
      min_cv = optMinScore opts
277
  if ini_cv < min_cv
278
    then return (gidx, (nl, il))
279
    else executeSimulation opts ini_tbl min_cv gidx nl il
280

    
281
-- | Decide whether to simulate rebalance.
282
maybeSimulateRebalance :: Bool             -- ^ Whether to simulate rebalance
283
                       -> Options          -- ^ Command line options
284
                       -> [GroupInfo]      -- ^ Group data
285
                       -> IO [GroupInfo]
286
maybeSimulateRebalance True opts cluster =
287
    mapM (maybeSimulateGroupRebalance opts) cluster
288
maybeSimulateRebalance False _ cluster = return cluster
289

    
290
-- | Prints the final @OK@ marker in machine readable output.
291
printFinalHTC :: Bool -> IO ()
292
printFinalHTC = printFinal htcPrefix
293

    
294
-- | Main function.
295
main :: Options -> [String] -> IO ()
296
main opts args = do
297
  unless (null args) $ exitErr "This program doesn't take any arguments."
298

    
299
  let verbose = optVerbose opts
300
      machineread = optMachineReadable opts
301
      nosimulation = optNoSimulation opts
302

    
303
  (ClusterData gl fixed_nl ilf _ _) <- loadExternalData opts
304
  nlf <- setNodeStatus opts fixed_nl
305

    
306
  let splitcluster = Cluster.splitCluster nlf ilf
307

    
308
  when machineread $ printGroupsMappings gl
309

    
310
  let groupsstats = map (perGroupChecks gl) splitcluster
311
      clusterstats = map sum . transpose . map snd $ groupsstats
312
      needrebalance = clusterNeedsRebalance clusterstats
313

    
314
  unless (verbose < 1 || machineread) .
315
    putStrLn $ if nosimulation
316
                 then "Running in no-simulation mode."
317
                 else if needrebalance
318
                        then "Cluster needs rebalancing."
319
                        else "No need to rebalance cluster, no problems found."
320

    
321
  mapM_ (printGroupStats verbose machineread Initial) groupsstats
322

    
323
  printClusterStats verbose machineread Initial clusterstats needrebalance
324

    
325
  let exitOK = nosimulation || not needrebalance
326
      simulate = not nosimulation && needrebalance
327

    
328
  rebalancedcluster <- maybeSimulateRebalance simulate opts splitcluster
329

    
330
  when (simulate || machineread) $ do
331
    let newgroupstats = map (perGroupChecks gl) rebalancedcluster
332
        newclusterstats = map sum . transpose . map snd $ newgroupstats
333
        newneedrebalance = clusterNeedsRebalance clusterstats
334

    
335
    mapM_ (printGroupStats verbose machineread Rebalanced) newgroupstats
336

    
337
    printClusterStats verbose machineread Rebalanced newclusterstats
338
                           newneedrebalance
339

    
340
  printFinalHTC machineread
341

    
342
  unless exitOK . exitWith $ ExitFailure 1