Statistics
| Branch: | Tag: | Revision:

root / htools / Ganeti / HTools / Program / Hcheck.hs @ d66aa238

History | View | Annotate | Download (11.7 kB)

1
{-| Cluster checker.
2

    
3
-}
4

    
5
{-
6

    
7
Copyright (C) 2012 Google Inc.
8

    
9
This program is free software; you can redistribute it and/or modify
10
it under the terms of the GNU General Public License as published by
11
the Free Software Foundation; either version 2 of the License, or
12
(at your option) any later version.
13

    
14
This program is distributed in the hope that it will be useful, but
15
WITHOUT ANY WARRANTY; without even the implied warranty of
16
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17
General Public License for more details.
18

    
19
You should have received a copy of the GNU Gene52al Public License
20
along with this program; if not, write to the Free Software
21
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22
02110-1301, USA.
23

    
24
-}
25

    
26
module Ganeti.HTools.Program.Hcheck
27
  ( main
28
  , options
29
  , arguments
30
  ) where
31

    
32
import Control.Monad
33
import Data.List (transpose)
34
import System.Exit
35
import Text.Printf (printf)
36

    
37
import qualified Ganeti.HTools.Container as Container
38
import qualified Ganeti.HTools.Cluster as Cluster
39
import qualified Ganeti.HTools.Group as Group
40
import qualified Ganeti.HTools.Node as Node
41
import qualified Ganeti.HTools.Instance as Instance
42

    
43
import qualified Ganeti.HTools.Program.Hbal as Hbal
44

    
45
import Ganeti.Common
46
import Ganeti.HTools.CLI
47
import Ganeti.HTools.ExtLoader
48
import Ganeti.HTools.Loader
49
import Ganeti.HTools.Types
50
import Ganeti.Utils
51

    
52
-- | Options list and functions.
53
options :: IO [OptType]
54
options =
55
  return
56
    [ oDataFile
57
    , oDiskMoves
58
    , oDynuFile
59
    , oEvacMode
60
    , oExInst
61
    , oExTags
62
    , oIAllocSrc
63
    , oInstMoves
64
    , oLuxiSocket
65
    , oMachineReadable
66
    , oMaxCpu
67
    , oMaxSolLength
68
    , oMinDisk
69
    , oMinGain
70
    , oMinGainLim
71
    , oMinScore
72
    , oNoSimulation
73
    , oOfflineNode
74
    , oQuiet
75
    , oRapiMaster
76
    , oSelInst
77
    , oVerbose
78
    ]
79

    
80
-- | The list of arguments supported by the program.
81
arguments :: [ArgCompletion]
82
arguments = []
83

    
84
-- | Check phase - are we before (initial) or after rebalance.
85
data Phase = Initial
86
           | Rebalanced
87

    
88
-- | Level of presented statistics.
89
data Level = GroupLvl
90
           | ClusterLvl
91

    
92
-- | A type alias for a group index and node\/instance lists.
93
type GroupInfo = (Gdx, (Node.List, Instance.List))
94

    
95
-- | A type alias for group stats.
96
type GroupStats = ((Group.Group, Double), [Int])
97

    
98
-- | Prefix for machine readable names.
99
htcPrefix :: String
100
htcPrefix = "HCHECK"
101

    
102
-- | Data showed both per group and per cluster.
103
commonData :: [(String, String)]
104
commonData =[ ("N1_FAIL", "Nodes not N+1 happy")
105
            , ("CONFLICT_TAGS", "Nodes with conflicting instances")
106
            , ("OFFLINE_PRI", "Instances having the primary node offline")
107
            , ("OFFLINE_SEC", "Instances having a secondary node offline")
108
            ]
109

    
110
-- | Data showed per group.
111
groupData :: [(String, String)]
112
groupData = commonData ++ [("SCORE", "Group score")]
113

    
114
-- | Data showed per cluster.
115
clusterData :: [(String, String)]
116
clusterData = commonData ++
117
              [ ("NEED_REBALANCE", "Cluster is not healthy") ]
118

    
119
-- | Phase-specific prefix for machine readable version.
120
phasePrefix :: Phase -> String
121
phasePrefix Initial = "INIT"
122
phasePrefix Rebalanced = "FINAL"
123

    
124
-- | Level-specific prefix for machine readable version.
125
levelPrefix :: Level -> String
126
levelPrefix GroupLvl = "GROUP"
127
levelPrefix ClusterLvl = "CLUSTER"
128

    
129
-- | Machine-readable keys to show depending on given level.
130
keysData :: Level -> [String]
131
keysData GroupLvl = map fst groupData
132
keysData ClusterLvl = map fst clusterData
133

    
134
-- | Description of phases for human readable version.
135
phaseDescr :: Phase -> String
136
phaseDescr Initial = "initially"
137
phaseDescr Rebalanced = "after rebalancing"
138

    
139
-- | Description to show depending on given level.
140
descrData :: Level -> [String]
141
descrData GroupLvl = map snd groupData
142
descrData ClusterLvl = map snd clusterData
143

    
144
-- | Human readable prefix for statistics.
145
phaseLevelDescr :: Phase -> Level -> Maybe String -> String
146
phaseLevelDescr phase GroupLvl (Just name) =
147
    printf "Statistics for group %s %s\n" name $ phaseDescr phase
148
phaseLevelDescr phase GroupLvl Nothing =
149
    printf "Statistics for group %s\n" $ phaseDescr phase
150
phaseLevelDescr phase ClusterLvl _ =
151
    printf "Cluster statistics %s\n" $ phaseDescr phase
152

    
153
-- | Format a list of key, value as a shell fragment.
154
printKeysHTC :: [(String, String)] -> IO ()
155
printKeysHTC = printKeys htcPrefix
156

    
157
-- | Prepare string from boolean value.
158
printBool :: Bool    -- ^ Whether the result should be machine readable
159
          -> Bool    -- ^ Value to be converted to string
160
          -> String
161
printBool True True = "1"
162
printBool True False = "0"
163
printBool False b = show b
164

    
165
-- | Print mapping from group idx to group uuid (only in machine
166
-- readable mode).
167
printGroupsMappings :: Group.List -> IO ()
168
printGroupsMappings gl = do
169
    let extract_vals g = (printf "GROUP_UUID_%d" $ Group.idx g :: String,
170
                          Group.uuid g)
171
        printpairs = map extract_vals (Container.elems gl)
172
    printKeysHTC printpairs
173

    
174
-- | Prepare a single key given a certain level and phase of simulation.
175
prepareKey :: Level -> Phase -> Maybe String -> String -> String
176
prepareKey level phase Nothing suffix =
177
  printf "%s_%s_%s" (phasePrefix phase) (levelPrefix level) suffix
178
prepareKey level phase (Just idx) suffix =
179
  printf "%s_%s_%s_%s" (phasePrefix phase) (levelPrefix level) idx suffix
180

    
181
-- | Print all the statistics for given level and phase.
182
printStats :: Int            -- ^ Verbosity level
183
           -> Bool           -- ^ If the output should be machine readable
184
           -> Level          -- ^ Level on which we are printing
185
           -> Phase          -- ^ Current phase of simulation
186
           -> [String]       -- ^ Values to print
187
           -> Maybe String   -- ^ Additional data for groups
188
           -> IO ()
189
printStats _ True level phase values gidx = do
190
  let keys = map (prepareKey level phase gidx) (keysData level)
191
  printKeysHTC $ zip keys values
192

    
193
printStats verbose False level phase values name = do
194
  let prefix = phaseLevelDescr phase level name
195
      descr = descrData level
196
  unless (verbose == 0) $ do
197
    putStrLn ""
198
    putStr prefix
199
    mapM_ (uncurry (printf "    %s: %s\n")) (zip descr values)
200

    
201
-- | Extract name or idx from group.
202
extractGroupData :: Bool -> Group.Group -> String
203
extractGroupData True grp = show $ Group.idx grp
204
extractGroupData False grp = Group.name grp
205

    
206
-- | Prepare values for group.
207
prepareGroupValues :: [Int] -> Double -> [String]
208
prepareGroupValues stats score =
209
  map show stats ++ [printf "%.8f" score]
210

    
211
-- | Prepare values for cluster.
212
prepareClusterValues :: Bool -> [Int] -> [Bool] -> [String]
213
prepareClusterValues machineread stats bstats =
214
  map show stats ++ map (printBool machineread) bstats
215

    
216
-- | Print all the statistics on a group level.
217
printGroupStats :: Int -> Bool -> Phase -> GroupStats -> IO ()
218
printGroupStats verbose machineread phase ((grp, score), stats) = do
219
  let values = prepareGroupValues stats score
220
      extradata = extractGroupData machineread grp
221
  printStats verbose machineread GroupLvl phase values (Just extradata)
222

    
223
-- | Print all the statistics on a cluster (global) level.
224
printClusterStats :: Int -> Bool -> Phase -> [Int] -> Bool -> IO ()
225
printClusterStats verbose machineread phase stats needhbal = do
226
  let values = prepareClusterValues machineread stats [needhbal]
227
  printStats verbose machineread ClusterLvl phase values Nothing
228

    
229
-- | Check if any of cluster metrics is non-zero.
230
clusterNeedsRebalance :: [Int] -> Bool
231
clusterNeedsRebalance stats = sum stats > 0
232

    
233
{- | Check group for N+1 hapiness, conflicts of primaries on nodes and
234
instances residing on offline nodes.
235

    
236
-}
237
perGroupChecks :: Group.List -> GroupInfo -> GroupStats
238
perGroupChecks gl (gidx, (nl, il)) =
239
  let grp = Container.find gidx gl
240
      offnl = filter Node.offline (Container.elems nl)
241
      n1violated = length . fst $ Cluster.computeBadItems nl il
242
      conflicttags = length $ filter (>0)
243
                     (map Node.conflictingPrimaries (Container.elems nl))
244
      offline_pri = sum . map length $ map Node.pList offnl
245
      offline_sec = length $ map Node.sList offnl
246
      score = Cluster.compCV nl
247
      groupstats = [ n1violated
248
                   , conflicttags
249
                   , offline_pri
250
                   , offline_sec
251
                   ]
252
  in ((grp, score), groupstats)
253

    
254
-- | Use Hbal's iterateDepth to simulate group rebalance.
255
executeSimulation :: Options -> Cluster.Table -> Double
256
                  -> Gdx -> Node.List -> Instance.List
257
                  -> IO GroupInfo
258
executeSimulation opts ini_tbl min_cv gidx nl il = do
259
  let imlen = maximum . map (length . Instance.alias) $ Container.elems il
260
      nmlen = maximum . map (length . Node.alias) $ Container.elems nl
261

    
262
  (fin_tbl, _) <- Hbal.iterateDepth False ini_tbl
263
                                    (optMaxLength opts)
264
                                    (optDiskMoves opts)
265
                                    (optInstMoves opts)
266
                                    nmlen imlen [] min_cv
267
                                    (optMinGainLim opts) (optMinGain opts)
268
                                    (optEvacMode opts)
269

    
270
  let (Cluster.Table fin_nl fin_il _ _) = fin_tbl
271
  return (gidx, (fin_nl, fin_il))
272

    
273
-- | Simulate group rebalance if group's score is not good
274
maybeSimulateGroupRebalance :: Options -> GroupInfo -> IO GroupInfo
275
maybeSimulateGroupRebalance opts (gidx, (nl, il)) = do
276
  let ini_cv = Cluster.compCV nl
277
      ini_tbl = Cluster.Table nl il ini_cv []
278
      min_cv = optMinScore opts
279
  if ini_cv < min_cv
280
    then return (gidx, (nl, il))
281
    else executeSimulation opts ini_tbl min_cv gidx nl il
282

    
283
-- | Decide whether to simulate rebalance.
284
maybeSimulateRebalance :: Bool             -- ^ Whether to simulate rebalance
285
                       -> Options          -- ^ Command line options
286
                       -> [GroupInfo]      -- ^ Group data
287
                       -> IO [GroupInfo]
288
maybeSimulateRebalance True opts cluster =
289
    mapM (maybeSimulateGroupRebalance opts) cluster
290
maybeSimulateRebalance False _ cluster = return cluster
291

    
292
-- | Prints the final @OK@ marker in machine readable output.
293
printFinalHTC :: Bool -> IO ()
294
printFinalHTC = printFinal htcPrefix
295

    
296
-- | Main function.
297
main :: Options -> [String] -> IO ()
298
main opts args = do
299
  unless (null args) $ exitErr "This program doesn't take any arguments."
300

    
301
  let verbose = optVerbose opts
302
      machineread = optMachineReadable opts
303
      nosimulation = optNoSimulation opts
304

    
305
  (ClusterData gl fixed_nl ilf _ _) <- loadExternalData opts
306
  nlf <- setNodeStatus opts fixed_nl
307

    
308
  let splitcluster = Cluster.splitCluster nlf ilf
309

    
310
  when machineread $ printGroupsMappings gl
311

    
312
  let groupsstats = map (perGroupChecks gl) splitcluster
313
      clusterstats = map sum . transpose . map snd $ groupsstats
314
      needrebalance = clusterNeedsRebalance clusterstats
315

    
316
  unless (verbose == 0 || machineread) .
317
    putStrLn $ if nosimulation
318
                 then "Running in no-simulation mode."
319
                 else if needrebalance
320
                        then "Cluster needs rebalancing."
321
                        else "No need to rebalance cluster, no problems found."
322

    
323
  mapM_ (printGroupStats verbose machineread Initial) groupsstats
324

    
325
  printClusterStats verbose machineread Initial clusterstats needrebalance
326

    
327
  let exitOK = nosimulation || not needrebalance
328
      simulate = not nosimulation && needrebalance
329

    
330
  rebalancedcluster <- maybeSimulateRebalance simulate opts splitcluster
331

    
332
  when (simulate || machineread) $ do
333
    let newgroupstats = map (perGroupChecks gl) rebalancedcluster
334
        newclusterstats = map sum . transpose . map snd $ newgroupstats
335
        newneedrebalance = clusterNeedsRebalance clusterstats
336

    
337
    mapM_ (printGroupStats verbose machineread Rebalanced) newgroupstats
338

    
339
    printClusterStats verbose machineread Rebalanced newclusterstats
340
                           newneedrebalance
341

    
342
  printFinalHTC machineread
343

    
344
  unless exitOK . exitWith $ ExitFailure 1