Statistics
| Branch: | Tag: | Revision:

root / htools / Ganeti / HTools / Program / Hcheck.hs @ 85890a9d

History | View | Annotate | Download (11.7 kB)

1
{-| Cluster checker.
2

    
3
-}
4

    
5
{-
6

    
7
Copyright (C) 2012 Google Inc.
8

    
9
This program is free software; you can redistribute it and/or modify
10
it under the terms of the GNU General Public License as published by
11
the Free Software Foundation; either version 2 of the License, or
12
(at your option) any later version.
13

    
14
This program is distributed in the hope that it will be useful, but
15
WITHOUT ANY WARRANTY; without even the implied warranty of
16
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17
General Public License for more details.
18

    
19
You should have received a copy of the GNU Gene52al Public License
20
along with this program; if not, write to the Free Software
21
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22
02110-1301, USA.
23

    
24
-}
25

    
26
module Ganeti.HTools.Program.Hcheck (main, options) where
27

    
28
import Control.Monad
29
import Data.List (transpose)
30
import System.Exit
31
import System.IO
32
import Text.Printf (printf)
33

    
34
import qualified Ganeti.HTools.Container as Container
35
import qualified Ganeti.HTools.Cluster as Cluster
36
import qualified Ganeti.HTools.Group as Group
37
import qualified Ganeti.HTools.Node as Node
38
import qualified Ganeti.HTools.Instance as Instance
39

    
40
import qualified Ganeti.HTools.Program.Hbal as Hbal
41

    
42
import Ganeti.HTools.CLI
43
import Ganeti.HTools.ExtLoader
44
import Ganeti.HTools.Loader
45
import Ganeti.HTools.Types
46

    
47
-- | Options list and functions.
48
options :: [OptType]
49
options =
50
  [ oDataFile
51
  , oDiskMoves
52
  , oDynuFile
53
  , oEvacMode
54
  , oExInst
55
  , oExTags
56
  , oIAllocSrc
57
  , oInstMoves
58
  , oLuxiSocket
59
  , oMachineReadable
60
  , oMaxCpu
61
  , oMaxSolLength
62
  , oMinDisk
63
  , oMinGain
64
  , oMinGainLim
65
  , oMinScore
66
  , oNoSimulation
67
  , oOfflineNode
68
  , oQuiet
69
  , oRapiMaster
70
  , oSelInst
71
  , oShowHelp
72
  , oShowVer
73
  , oVerbose
74
  ]
75

    
76
-- | Check phase - are we before (initial) or after rebalance.
77
data Phase = Initial
78
           | Rebalanced
79

    
80
-- | Level of presented statistics.
81
data Level = GroupLvl
82
           | ClusterLvl
83

    
84
-- | Prefix for machine readable names.
85
htcPrefix :: String
86
htcPrefix = "HCHECK"
87

    
88
-- | Data showed both per group and per cluster.
89
commonData :: [(String, String)]
90
commonData =[ ("N1_FAIL", "Nodes not N+1 happy")
91
            , ("CONFLICT_TAGS", "Nodes with conflicting instances")
92
            , ("OFFLINE_PRI", "Instances having the primary node offline")
93
            , ("OFFLINE_SEC", "Instances having a secondary node offline")
94
            ]
95

    
96
-- | Data showed per group.
97
groupData :: [(String, String)]
98
groupData = commonData ++ [("SCORE", "Group score")]
99

    
100
-- | Data showed per cluster.
101
clusterData :: [(String, String)]
102
clusterData = commonData ++
103
              [ ("NEED_REBALANCE", "Cluster is not healthy") ]
104

    
105
-- | Phase-specific prefix for machine readable version.
106
phasePrefix :: Phase -> String
107
phasePrefix Initial = "INIT"
108
phasePrefix Rebalanced = "FINAL"
109

    
110
-- | Level-specific prefix for machine readable version.
111
levelPrefix :: Level -> String
112
levelPrefix GroupLvl = "GROUP"
113
levelPrefix ClusterLvl = "CLUSTER"
114

    
115
-- | Machine-readable keys to show depending on given level.
116
keysData :: Level -> [String]
117
keysData GroupLvl = map fst groupData
118
keysData ClusterLvl = map fst clusterData
119

    
120
-- | Description of phases for human readable version.
121
phaseDescr :: Phase -> String
122
phaseDescr Initial = "initially"
123
phaseDescr Rebalanced = "after rebalancing"
124

    
125
-- | Description to show depending on given level.
126
descrData :: Level -> [String]
127
descrData GroupLvl = map snd groupData
128
descrData ClusterLvl = map snd clusterData
129

    
130
-- | Human readable prefix for statistics.
131
phaseLevelDescr :: Phase -> Level -> Maybe String -> String
132
phaseLevelDescr phase GroupLvl (Just name) =
133
    printf "Statistics for group %s %s\n" name $ phaseDescr phase
134
phaseLevelDescr phase GroupLvl Nothing =
135
    printf "Statistics for group %s\n" $ phaseDescr phase
136
phaseLevelDescr phase ClusterLvl _ =
137
    printf "Cluster statistics %s\n" $ phaseDescr phase
138

    
139
-- | Format a list of key, value as a shell fragment.
140
printKeysHTC :: [(String, String)] -> IO ()
141
printKeysHTC = printKeys htcPrefix
142

    
143
-- | Prepare string from boolean value.
144
printBool :: Bool    -- ^ Whether the result should be machine readable
145
          -> Bool    -- ^ Value to be converted to string
146
          -> String
147
printBool True True = "1"
148
printBool True False = "0"
149
printBool False b = show b
150

    
151
-- | Print mapping from group idx to group uuid (only in machine
152
-- readable mode).
153
printGroupsMappings :: Group.List -> IO ()
154
printGroupsMappings gl = do
155
    let extract_vals = \g -> (printf "GROUP_UUID_%d" $ Group.idx g :: String,
156
                              Group.uuid g)
157
        printpairs = map extract_vals (Container.elems gl)
158
    printKeysHTC printpairs
159

    
160
-- | Prepare a single key given a certain level and phase of simulation.
161
prepareKey :: Level -> Phase -> Maybe String -> String -> String
162
prepareKey level phase Nothing suffix =
163
  printf "%s_%s_%s" (phasePrefix phase) (levelPrefix level) suffix
164
prepareKey level phase (Just idx) suffix =
165
  printf "%s_%s_%s_%s" (phasePrefix phase) (levelPrefix level) idx suffix
166

    
167
-- | Print all the statistics for given level and phase.
168
printStats :: Int            -- ^ Verbosity level
169
           -> Bool           -- ^ If the output should be machine readable
170
           -> Level          -- ^ Level on which we are printing
171
           -> Phase          -- ^ Current phase of simulation
172
           -> [String]       -- ^ Values to print
173
           -> Maybe String   -- ^ Additional data for groups
174
           -> IO ()
175
printStats _ True level phase values gidx = do
176
  let keys = map (prepareKey level phase gidx) (keysData level)
177
  printKeysHTC $ zip keys values
178

    
179
printStats verbose False level phase values name = do
180
  let prefix = phaseLevelDescr phase level name
181
      descr = descrData level
182
  unless (verbose == 0) $ do
183
    printf "\n%s" prefix :: IO ()
184
    mapM_ (\(a,b) -> printf "    %s: %s\n" a b) (zip descr values)
185

    
186
-- | Extract name or idx from group.
187
extractGroupData :: Bool -> Group.Group -> String
188
extractGroupData True grp = show $ Group.idx grp
189
extractGroupData False grp = Group.name grp
190

    
191
-- | Prepare values for group.
192
prepareGroupValues :: [Int] -> Double -> [String]
193
prepareGroupValues stats score =
194
  map show stats ++ [printf "%.8f" score]
195

    
196
-- | Prepare values for cluster.
197
prepareClusterValues :: Bool -> [Int] -> [Bool] -> [String]
198
prepareClusterValues machineread stats bstats =
199
  map show stats ++ map (printBool machineread) bstats
200

    
201
-- | Print all the statistics on a group level.
202
printGroupStats :: Int -> Bool -> Phase -> Group.Group -> [Int] -> Double
203
                -> IO ()
204
printGroupStats verbose machineread phase grp stats score = do
205
  let values = prepareGroupValues stats score
206
      extradata = extractGroupData machineread grp
207
  printStats verbose machineread GroupLvl phase values (Just extradata)
208

    
209
-- | Print all the statistics on a cluster (global) level.
210
printClusterStats :: Int -> Bool -> Phase -> [Int] -> Bool -> IO ()
211
printClusterStats verbose machineread phase stats needhbal = do
212
  let values = prepareClusterValues machineread stats [needhbal]
213
  printStats verbose machineread ClusterLvl phase values Nothing
214

    
215
-- | Check if any of cluster metrics is non-zero.
216
clusterNeedsRebalance :: [Int] -> Bool
217
clusterNeedsRebalance stats = sum stats > 0
218

    
219
{- | Check group for N+1 hapiness, conflicts of primaries on nodes and
220
instances residing on offline nodes.
221

    
222
-}
223
perGroupChecks :: Int -> Bool -> Phase -> Group.List
224
               -> (Gdx, (Node.List, Instance.List)) -> IO ([Int])
225
perGroupChecks verbose machineread phase gl (gidx, (nl, il)) = do
226
  let grp = Container.find gidx gl
227
      offnl = filter Node.offline (Container.elems nl)
228
      n1violated = length $ fst $ Cluster.computeBadItems nl il
229
      conflicttags = length $ filter (>0)
230
                     (map Node.conflictingPrimaries (Container.elems nl))
231
      offline_pri = sum . map length $ map Node.pList offnl
232
      offline_sec = length $ map Node.sList offnl
233
      score = Cluster.compCV nl
234
      groupstats = [ n1violated
235
                   , conflicttags
236
                   , offline_pri
237
                   , offline_sec
238
                   ]
239
  printGroupStats verbose machineread phase grp groupstats score
240
  return groupstats
241

    
242
-- | Use Hbal's iterateDepth to simulate group rebalance.
243
executeSimulation :: Options -> Cluster.Table -> Double
244
                  -> Gdx -> Node.List -> Instance.List
245
                  -> IO (Gdx, (Node.List, Instance.List))
246
executeSimulation opts ini_tbl min_cv gidx nl il = do
247
  let imlen = maximum . map (length . Instance.alias) $ Container.elems il
248
      nmlen = maximum . map (length . Node.alias) $ Container.elems nl
249

    
250
  (fin_tbl, _) <- Hbal.iterateDepth False ini_tbl
251
                                    (optMaxLength opts)
252
                                    (optDiskMoves opts)
253
                                    (optInstMoves opts)
254
                                    nmlen imlen [] min_cv
255
                                    (optMinGainLim opts) (optMinGain opts)
256
                                    (optEvacMode opts)
257

    
258
  let (Cluster.Table fin_nl fin_il _ _) = fin_tbl
259
  return (gidx, (fin_nl, fin_il))
260

    
261
-- | Simulate group rebalance if group's score is not good
262
maybeSimulateGroupRebalance :: Options -> (Gdx, (Node.List, Instance.List))
263
                            -> IO (Gdx, (Node.List, Instance.List))
264
maybeSimulateGroupRebalance opts (gidx, (nl, il)) = do
265
  let ini_cv = Cluster.compCV nl
266
      ini_tbl = Cluster.Table nl il ini_cv []
267
      min_cv = optMinScore opts
268
  if ini_cv < min_cv
269
    then return (gidx, (nl, il))
270
    else executeSimulation opts ini_tbl min_cv gidx nl il
271

    
272
-- | Decide whether to simulate rebalance.
273
maybeSimulateRebalance :: Bool             -- ^ Whether to simulate rebalance
274
                       -> Options          -- ^ Command line options
275
                       -> [(Gdx, (Node.List, Instance.List))] -- ^ Group data
276
                       -> IO [(Gdx, (Node.List, Instance.List))]
277
maybeSimulateRebalance True opts cluster =
278
    mapM (maybeSimulateGroupRebalance opts) cluster
279
maybeSimulateRebalance False _ cluster = return cluster
280

    
281
-- | Prints the final @OK@ marker in machine readable output.
282
printFinalHTC :: Bool -> IO ()
283
printFinalHTC = printFinal htcPrefix
284

    
285
-- | Main function.
286
main :: Options -> [String] -> IO ()
287
main opts args = do
288
  unless (null args) $ do
289
         hPutStrLn stderr "Error: this program doesn't take any arguments."
290
         exitWith $ ExitFailure 1
291

    
292
  let verbose = optVerbose opts
293
      machineread = optMachineReadable opts
294
      nosimulation = optNoSimulation opts
295

    
296
  (ClusterData gl fixed_nl ilf _ _) <- loadExternalData opts
297
  nlf <- setNodeStatus opts fixed_nl
298

    
299
  let splitcluster = Cluster.splitCluster nlf ilf
300

    
301
  when machineread $ printGroupsMappings gl
302

    
303
  groupsstats <- mapM (perGroupChecks verbose machineread Initial gl)
304
                 splitcluster
305
  let clusterstats = map sum (transpose groupsstats) :: [Int]
306
      needrebalance = clusterNeedsRebalance clusterstats
307
  printClusterStats verbose machineread Initial clusterstats needrebalance
308

    
309
  when nosimulation $ do
310
    unless (verbose == 0 || machineread) $
311
      putStrLn "Running in no-simulation mode. Exiting."
312

    
313
  unless needrebalance $ do
314
    unless (verbose == 0 || machineread) $
315
      putStrLn "No need to rebalance cluster, no problems found. Exiting."
316

    
317
  let exitOK = nosimulation || not needrebalance
318
      simulate = not nosimulation && needrebalance
319

    
320
  rebalancedcluster <- maybeSimulateRebalance simulate opts splitcluster
321

    
322
  when (simulate || machineread) $ do
323
    newgroupstats <- mapM (perGroupChecks verbose machineread Rebalanced gl)
324
                     rebalancedcluster
325
    let newclusterstats = map sum (transpose newgroupstats) :: [Int]
326
        newneedrebalance = clusterNeedsRebalance clusterstats
327

    
328
    printClusterStats verbose machineread Rebalanced newclusterstats
329
                           newneedrebalance
330

    
331
  printFinalHTC machineread
332

    
333
  unless exitOK $ exitWith $ ExitFailure 1