Statistics
| Branch: | Tag: | Revision:

root / htools / Ganeti / HTools / Program / Hcheck.hs @ 2cdaf225

History | View | Annotate | Download (11.6 kB)

1
{-| Cluster checker.
2

    
3
-}
4

    
5
{-
6

    
7
Copyright (C) 2012 Google Inc.
8

    
9
This program is free software; you can redistribute it and/or modify
10
it under the terms of the GNU General Public License as published by
11
the Free Software Foundation; either version 2 of the License, or
12
(at your option) any later version.
13

    
14
This program is distributed in the hope that it will be useful, but
15
WITHOUT ANY WARRANTY; without even the implied warranty of
16
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17
General Public License for more details.
18

    
19
You should have received a copy of the GNU Gene52al Public License
20
along with this program; if not, write to the Free Software
21
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22
02110-1301, USA.
23

    
24
-}
25

    
26
module Ganeti.HTools.Program.Hcheck (main, options) where
27

    
28
import Control.Monad
29
import Data.List (transpose)
30
import System.Exit
31
import System.IO
32
import Text.Printf (printf)
33

    
34
import qualified Ganeti.HTools.Container as Container
35
import qualified Ganeti.HTools.Cluster as Cluster
36
import qualified Ganeti.HTools.Group as Group
37
import qualified Ganeti.HTools.Node as Node
38
import qualified Ganeti.HTools.Instance as Instance
39

    
40
import qualified Ganeti.HTools.Program.Hbal as Hbal
41

    
42
import Ganeti.HTools.CLI
43
import Ganeti.HTools.ExtLoader
44
import Ganeti.HTools.Loader
45
import Ganeti.HTools.Types
46

    
47
-- | Options list and functions.
48
options :: [OptType]
49
options =
50
  [ oDataFile
51
  , oDiskMoves
52
  , oDynuFile
53
  , oEvacMode
54
  , oExInst
55
  , oExTags
56
  , oIAllocSrc
57
  , oInstMoves
58
  , oLuxiSocket
59
  , oMachineReadable
60
  , oMaxCpu
61
  , oMaxSolLength
62
  , oMinDisk
63
  , oMinGain
64
  , oMinGainLim
65
  , oMinScore
66
  , oNoSimulation
67
  , oOfflineNode
68
  , oQuiet
69
  , oRapiMaster
70
  , oSelInst
71
  , oShowHelp
72
  , oShowVer
73
  , oVerbose
74
  ]
75

    
76
-- | Check phase - are we before (initial) or after rebalance.
77
data Phase = Initial
78
           | Rebalanced
79

    
80
-- | Level of presented statistics.
81
data Level = GroupLvl
82
           | ClusterLvl
83

    
84
-- | A type alias for a group index and node\/instance lists.
85
type GroupInfo = (Gdx, (Node.List, Instance.List))
86

    
87
-- | A type alias for group stats.
88
type GroupStats = ((Group.Group, Double), [Int])
89

    
90
-- | Prefix for machine readable names.
91
htcPrefix :: String
92
htcPrefix = "HCHECK"
93

    
94
-- | Data showed both per group and per cluster.
95
commonData :: [(String, String)]
96
commonData =[ ("N1_FAIL", "Nodes not N+1 happy")
97
            , ("CONFLICT_TAGS", "Nodes with conflicting instances")
98
            , ("OFFLINE_PRI", "Instances having the primary node offline")
99
            , ("OFFLINE_SEC", "Instances having a secondary node offline")
100
            ]
101

    
102
-- | Data showed per group.
103
groupData :: [(String, String)]
104
groupData = commonData ++ [("SCORE", "Group score")]
105

    
106
-- | Data showed per cluster.
107
clusterData :: [(String, String)]
108
clusterData = commonData ++
109
              [ ("NEED_REBALANCE", "Cluster is not healthy") ]
110

    
111
-- | Phase-specific prefix for machine readable version.
112
phasePrefix :: Phase -> String
113
phasePrefix Initial = "INIT"
114
phasePrefix Rebalanced = "FINAL"
115

    
116
-- | Level-specific prefix for machine readable version.
117
levelPrefix :: Level -> String
118
levelPrefix GroupLvl = "GROUP"
119
levelPrefix ClusterLvl = "CLUSTER"
120

    
121
-- | Machine-readable keys to show depending on given level.
122
keysData :: Level -> [String]
123
keysData GroupLvl = map fst groupData
124
keysData ClusterLvl = map fst clusterData
125

    
126
-- | Description of phases for human readable version.
127
phaseDescr :: Phase -> String
128
phaseDescr Initial = "initially"
129
phaseDescr Rebalanced = "after rebalancing"
130

    
131
-- | Description to show depending on given level.
132
descrData :: Level -> [String]
133
descrData GroupLvl = map snd groupData
134
descrData ClusterLvl = map snd clusterData
135

    
136
-- | Human readable prefix for statistics.
137
phaseLevelDescr :: Phase -> Level -> Maybe String -> String
138
phaseLevelDescr phase GroupLvl (Just name) =
139
    printf "Statistics for group %s %s\n" name $ phaseDescr phase
140
phaseLevelDescr phase GroupLvl Nothing =
141
    printf "Statistics for group %s\n" $ phaseDescr phase
142
phaseLevelDescr phase ClusterLvl _ =
143
    printf "Cluster statistics %s\n" $ phaseDescr phase
144

    
145
-- | Format a list of key, value as a shell fragment.
146
printKeysHTC :: [(String, String)] -> IO ()
147
printKeysHTC = printKeys htcPrefix
148

    
149
-- | Prepare string from boolean value.
150
printBool :: Bool    -- ^ Whether the result should be machine readable
151
          -> Bool    -- ^ Value to be converted to string
152
          -> String
153
printBool True True = "1"
154
printBool True False = "0"
155
printBool False b = show b
156

    
157
-- | Print mapping from group idx to group uuid (only in machine
158
-- readable mode).
159
printGroupsMappings :: Group.List -> IO ()
160
printGroupsMappings gl = do
161
    let extract_vals = \g -> (printf "GROUP_UUID_%d" $ Group.idx g :: String,
162
                              Group.uuid g)
163
        printpairs = map extract_vals (Container.elems gl)
164
    printKeysHTC printpairs
165

    
166
-- | Prepare a single key given a certain level and phase of simulation.
167
prepareKey :: Level -> Phase -> Maybe String -> String -> String
168
prepareKey level phase Nothing suffix =
169
  printf "%s_%s_%s" (phasePrefix phase) (levelPrefix level) suffix
170
prepareKey level phase (Just idx) suffix =
171
  printf "%s_%s_%s_%s" (phasePrefix phase) (levelPrefix level) idx suffix
172

    
173
-- | Print all the statistics for given level and phase.
174
printStats :: Int            -- ^ Verbosity level
175
           -> Bool           -- ^ If the output should be machine readable
176
           -> Level          -- ^ Level on which we are printing
177
           -> Phase          -- ^ Current phase of simulation
178
           -> [String]       -- ^ Values to print
179
           -> Maybe String   -- ^ Additional data for groups
180
           -> IO ()
181
printStats _ True level phase values gidx = do
182
  let keys = map (prepareKey level phase gidx) (keysData level)
183
  printKeysHTC $ zip keys values
184

    
185
printStats verbose False level phase values name = do
186
  let prefix = phaseLevelDescr phase level name
187
      descr = descrData level
188
  unless (verbose == 0) $ do
189
    putStrLn ""
190
    putStr prefix
191
    mapM_ (uncurry (printf "    %s: %s\n")) (zip descr values)
192

    
193
-- | Extract name or idx from group.
194
extractGroupData :: Bool -> Group.Group -> String
195
extractGroupData True grp = show $ Group.idx grp
196
extractGroupData False grp = Group.name grp
197

    
198
-- | Prepare values for group.
199
prepareGroupValues :: [Int] -> Double -> [String]
200
prepareGroupValues stats score =
201
  map show stats ++ [printf "%.8f" score]
202

    
203
-- | Prepare values for cluster.
204
prepareClusterValues :: Bool -> [Int] -> [Bool] -> [String]
205
prepareClusterValues machineread stats bstats =
206
  map show stats ++ map (printBool machineread) bstats
207

    
208
-- | Print all the statistics on a group level.
209
printGroupStats :: Int -> Bool -> Phase -> GroupStats -> IO ()
210
printGroupStats verbose machineread phase ((grp, score), stats) = do
211
  let values = prepareGroupValues stats score
212
      extradata = extractGroupData machineread grp
213
  printStats verbose machineread GroupLvl phase values (Just extradata)
214

    
215
-- | Print all the statistics on a cluster (global) level.
216
printClusterStats :: Int -> Bool -> Phase -> [Int] -> Bool -> IO ()
217
printClusterStats verbose machineread phase stats needhbal = do
218
  let values = prepareClusterValues machineread stats [needhbal]
219
  printStats verbose machineread ClusterLvl phase values Nothing
220

    
221
-- | Check if any of cluster metrics is non-zero.
222
clusterNeedsRebalance :: [Int] -> Bool
223
clusterNeedsRebalance stats = sum stats > 0
224

    
225
{- | Check group for N+1 hapiness, conflicts of primaries on nodes and
226
instances residing on offline nodes.
227

    
228
-}
229
perGroupChecks :: Group.List -> GroupInfo -> GroupStats
230
perGroupChecks gl (gidx, (nl, il)) =
231
  let grp = Container.find gidx gl
232
      offnl = filter Node.offline (Container.elems nl)
233
      n1violated = length . fst $ Cluster.computeBadItems nl il
234
      conflicttags = length $ filter (>0)
235
                     (map Node.conflictingPrimaries (Container.elems nl))
236
      offline_pri = sum . map length $ map Node.pList offnl
237
      offline_sec = length $ map Node.sList offnl
238
      score = Cluster.compCV nl
239
      groupstats = [ n1violated
240
                   , conflicttags
241
                   , offline_pri
242
                   , offline_sec
243
                   ]
244
  in ((grp, score), groupstats)
245

    
246
-- | Use Hbal's iterateDepth to simulate group rebalance.
247
executeSimulation :: Options -> Cluster.Table -> Double
248
                  -> Gdx -> Node.List -> Instance.List
249
                  -> IO GroupInfo
250
executeSimulation opts ini_tbl min_cv gidx nl il = do
251
  let imlen = maximum . map (length . Instance.alias) $ Container.elems il
252
      nmlen = maximum . map (length . Node.alias) $ Container.elems nl
253

    
254
  (fin_tbl, _) <- Hbal.iterateDepth False ini_tbl
255
                                    (optMaxLength opts)
256
                                    (optDiskMoves opts)
257
                                    (optInstMoves opts)
258
                                    nmlen imlen [] min_cv
259
                                    (optMinGainLim opts) (optMinGain opts)
260
                                    (optEvacMode opts)
261

    
262
  let (Cluster.Table fin_nl fin_il _ _) = fin_tbl
263
  return (gidx, (fin_nl, fin_il))
264

    
265
-- | Simulate group rebalance if group's score is not good
266
maybeSimulateGroupRebalance :: Options -> GroupInfo -> IO GroupInfo
267
maybeSimulateGroupRebalance opts (gidx, (nl, il)) = do
268
  let ini_cv = Cluster.compCV nl
269
      ini_tbl = Cluster.Table nl il ini_cv []
270
      min_cv = optMinScore opts
271
  if ini_cv < min_cv
272
    then return (gidx, (nl, il))
273
    else executeSimulation opts ini_tbl min_cv gidx nl il
274

    
275
-- | Decide whether to simulate rebalance.
276
maybeSimulateRebalance :: Bool             -- ^ Whether to simulate rebalance
277
                       -> Options          -- ^ Command line options
278
                       -> [GroupInfo]      -- ^ Group data
279
                       -> IO [GroupInfo]
280
maybeSimulateRebalance True opts cluster =
281
    mapM (maybeSimulateGroupRebalance opts) cluster
282
maybeSimulateRebalance False _ cluster = return cluster
283

    
284
-- | Prints the final @OK@ marker in machine readable output.
285
printFinalHTC :: Bool -> IO ()
286
printFinalHTC = printFinal htcPrefix
287

    
288
-- | Main function.
289
main :: Options -> [String] -> IO ()
290
main opts args = do
291
  unless (null args) $ do
292
         hPutStrLn stderr "Error: this program doesn't take any arguments."
293
         exitWith $ ExitFailure 1
294

    
295
  let verbose = optVerbose opts
296
      machineread = optMachineReadable opts
297
      nosimulation = optNoSimulation opts
298

    
299
  (ClusterData gl fixed_nl ilf _ _) <- loadExternalData opts
300
  nlf <- setNodeStatus opts fixed_nl
301

    
302
  let splitcluster = Cluster.splitCluster nlf ilf
303

    
304
  when machineread $ printGroupsMappings gl
305

    
306
  let groupsstats = map (perGroupChecks gl) splitcluster
307
      clusterstats = map sum . transpose . map snd $ groupsstats
308
      needrebalance = clusterNeedsRebalance clusterstats
309

    
310
  unless (verbose == 0 || machineread) $
311
    if nosimulation
312
      then putStrLn "Running in no-simulation mode."
313
      else if needrebalance
314
             then putStrLn "Cluster needs rebalancing."
315
             else putStrLn "No need to rebalance cluster, no problems found."
316

    
317
  mapM_ (printGroupStats verbose machineread Initial) groupsstats
318

    
319
  printClusterStats verbose machineread Initial clusterstats needrebalance
320

    
321
  let exitOK = nosimulation || not needrebalance
322
      simulate = not nosimulation && needrebalance
323

    
324
  rebalancedcluster <- maybeSimulateRebalance simulate opts splitcluster
325

    
326
  when (simulate || machineread) $ do
327
    let newgroupstats = map (perGroupChecks gl) rebalancedcluster
328
        newclusterstats = map sum . transpose . map snd $ newgroupstats
329
        newneedrebalance = clusterNeedsRebalance clusterstats
330

    
331
    mapM_ (printGroupStats verbose machineread Rebalanced) newgroupstats
332

    
333
    printClusterStats verbose machineread Rebalanced newclusterstats
334
                           newneedrebalance
335

    
336
  printFinalHTC machineread
337

    
338
  unless exitOK . exitWith $ ExitFailure 1