Statistics
| Branch: | Tag: | Revision:

root / src / Ganeti / HTools / Program / Hcheck.hs @ 3add7574

History | View | Annotate | Download (11.7 kB)

1
{-| Cluster checker.
2

    
3
-}
4

    
5
{-
6

    
7
Copyright (C) 2012 Google Inc.
8

    
9
This program is free software; you can redistribute it and/or modify
10
it under the terms of the GNU General Public License as published by
11
the Free Software Foundation; either version 2 of the License, or
12
(at your option) any later version.
13

    
14
This program is distributed in the hope that it will be useful, but
15
WITHOUT ANY WARRANTY; without even the implied warranty of
16
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17
General Public License for more details.
18

    
19
You should have received a copy of the GNU Gene52al Public License
20
along with this program; if not, write to the Free Software
21
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22
02110-1301, USA.
23

    
24
-}
25

    
26
module Ganeti.HTools.Program.Hcheck
27
  ( main
28
  , options
29
  , arguments
30
  ) where
31

    
32
import Control.Monad
33
import Data.List (transpose)
34
import System.Exit
35
import Text.Printf (printf)
36

    
37
import qualified Ganeti.HTools.Container as Container
38
import qualified Ganeti.HTools.Cluster as Cluster
39
import qualified Ganeti.HTools.Group as Group
40
import qualified Ganeti.HTools.Node as Node
41
import qualified Ganeti.HTools.Instance as Instance
42

    
43
import qualified Ganeti.HTools.Program.Hbal as Hbal
44

    
45
import Ganeti.Common
46
import Ganeti.HTools.CLI
47
import Ganeti.HTools.ExtLoader
48
import Ganeti.HTools.Loader
49
import Ganeti.HTools.Types
50
import Ganeti.Utils
51

    
52
-- | Options list and functions.
53
options :: IO [OptType]
54
options = do
55
  luxi <- oLuxiSocket
56
  return
57
    [ oDataFile
58
    , oDiskMoves
59
    , oDynuFile
60
    , oEvacMode
61
    , oExInst
62
    , oExTags
63
    , oIAllocSrc
64
    , oInstMoves
65
    , luxi
66
    , oMachineReadable
67
    , oMaxCpu
68
    , oMaxSolLength
69
    , oMinDisk
70
    , oMinGain
71
    , oMinGainLim
72
    , oMinScore
73
    , oNoSimulation
74
    , oOfflineNode
75
    , oQuiet
76
    , oRapiMaster
77
    , oSelInst
78
    , oVerbose
79
    ]
80

    
81
-- | The list of arguments supported by the program.
82
arguments :: [ArgCompletion]
83
arguments = []
84

    
85
-- | Check phase - are we before (initial) or after rebalance.
86
data Phase = Initial
87
           | Rebalanced
88

    
89
-- | Level of presented statistics.
90
data Level = GroupLvl
91
           | ClusterLvl
92

    
93
-- | A type alias for a group index and node\/instance lists.
94
type GroupInfo = (Gdx, (Node.List, Instance.List))
95

    
96
-- | A type alias for group stats.
97
type GroupStats = ((Group.Group, Double), [Int])
98

    
99
-- | Prefix for machine readable names.
100
htcPrefix :: String
101
htcPrefix = "HCHECK"
102

    
103
-- | Data showed both per group and per cluster.
104
commonData :: [(String, String)]
105
commonData =[ ("N1_FAIL", "Nodes not N+1 happy")
106
            , ("CONFLICT_TAGS", "Nodes with conflicting instances")
107
            , ("OFFLINE_PRI", "Instances having the primary node offline")
108
            , ("OFFLINE_SEC", "Instances having a secondary node offline")
109
            ]
110

    
111
-- | Data showed per group.
112
groupData :: [(String, String)]
113
groupData = commonData ++ [("SCORE", "Group score")]
114

    
115
-- | Data showed per cluster.
116
clusterData :: [(String, String)]
117
clusterData = commonData ++
118
              [ ("NEED_REBALANCE", "Cluster is not healthy") ]
119

    
120
-- | Phase-specific prefix for machine readable version.
121
phasePrefix :: Phase -> String
122
phasePrefix Initial = "INIT"
123
phasePrefix Rebalanced = "FINAL"
124

    
125
-- | Level-specific prefix for machine readable version.
126
levelPrefix :: Level -> String
127
levelPrefix GroupLvl = "GROUP"
128
levelPrefix ClusterLvl = "CLUSTER"
129

    
130
-- | Machine-readable keys to show depending on given level.
131
keysData :: Level -> [String]
132
keysData GroupLvl = map fst groupData
133
keysData ClusterLvl = map fst clusterData
134

    
135
-- | Description of phases for human readable version.
136
phaseDescr :: Phase -> String
137
phaseDescr Initial = "initially"
138
phaseDescr Rebalanced = "after rebalancing"
139

    
140
-- | Description to show depending on given level.
141
descrData :: Level -> [String]
142
descrData GroupLvl = map snd groupData
143
descrData ClusterLvl = map snd clusterData
144

    
145
-- | Human readable prefix for statistics.
146
phaseLevelDescr :: Phase -> Level -> Maybe String -> String
147
phaseLevelDescr phase GroupLvl (Just name) =
148
    printf "Statistics for group %s %s\n" name $ phaseDescr phase
149
phaseLevelDescr phase GroupLvl Nothing =
150
    printf "Statistics for group %s\n" $ phaseDescr phase
151
phaseLevelDescr phase ClusterLvl _ =
152
    printf "Cluster statistics %s\n" $ phaseDescr phase
153

    
154
-- | Format a list of key, value as a shell fragment.
155
printKeysHTC :: [(String, String)] -> IO ()
156
printKeysHTC = printKeys htcPrefix
157

    
158
-- | Prepare string from boolean value.
159
printBool :: Bool    -- ^ Whether the result should be machine readable
160
          -> Bool    -- ^ Value to be converted to string
161
          -> String
162
printBool True True = "1"
163
printBool True False = "0"
164
printBool False b = show b
165

    
166
-- | Print mapping from group idx to group uuid (only in machine
167
-- readable mode).
168
printGroupsMappings :: Group.List -> IO ()
169
printGroupsMappings gl = do
170
    let extract_vals g = (printf "GROUP_UUID_%d" $ Group.idx g :: String,
171
                          Group.uuid g)
172
        printpairs = map extract_vals (Container.elems gl)
173
    printKeysHTC printpairs
174

    
175
-- | Prepare a single key given a certain level and phase of simulation.
176
prepareKey :: Level -> Phase -> Maybe String -> String -> String
177
prepareKey level phase Nothing suffix =
178
  printf "%s_%s_%s" (phasePrefix phase) (levelPrefix level) suffix
179
prepareKey level phase (Just idx) suffix =
180
  printf "%s_%s_%s_%s" (phasePrefix phase) (levelPrefix level) idx suffix
181

    
182
-- | Print all the statistics for given level and phase.
183
printStats :: Int            -- ^ Verbosity level
184
           -> Bool           -- ^ If the output should be machine readable
185
           -> Level          -- ^ Level on which we are printing
186
           -> Phase          -- ^ Current phase of simulation
187
           -> [String]       -- ^ Values to print
188
           -> Maybe String   -- ^ Additional data for groups
189
           -> IO ()
190
printStats _ True level phase values gidx = do
191
  let keys = map (prepareKey level phase gidx) (keysData level)
192
  printKeysHTC $ zip keys values
193

    
194
printStats verbose False level phase values name = do
195
  let prefix = phaseLevelDescr phase level name
196
      descr = descrData level
197
  unless (verbose == 0) $ do
198
    putStrLn ""
199
    putStr prefix
200
    mapM_ (uncurry (printf "    %s: %s\n")) (zip descr values)
201

    
202
-- | Extract name or idx from group.
203
extractGroupData :: Bool -> Group.Group -> String
204
extractGroupData True grp = show $ Group.idx grp
205
extractGroupData False grp = Group.name grp
206

    
207
-- | Prepare values for group.
208
prepareGroupValues :: [Int] -> Double -> [String]
209
prepareGroupValues stats score =
210
  map show stats ++ [printf "%.8f" score]
211

    
212
-- | Prepare values for cluster.
213
prepareClusterValues :: Bool -> [Int] -> [Bool] -> [String]
214
prepareClusterValues machineread stats bstats =
215
  map show stats ++ map (printBool machineread) bstats
216

    
217
-- | Print all the statistics on a group level.
218
printGroupStats :: Int -> Bool -> Phase -> GroupStats -> IO ()
219
printGroupStats verbose machineread phase ((grp, score), stats) = do
220
  let values = prepareGroupValues stats score
221
      extradata = extractGroupData machineread grp
222
  printStats verbose machineread GroupLvl phase values (Just extradata)
223

    
224
-- | Print all the statistics on a cluster (global) level.
225
printClusterStats :: Int -> Bool -> Phase -> [Int] -> Bool -> IO ()
226
printClusterStats verbose machineread phase stats needhbal = do
227
  let values = prepareClusterValues machineread stats [needhbal]
228
  printStats verbose machineread ClusterLvl phase values Nothing
229

    
230
-- | Check if any of cluster metrics is non-zero.
231
clusterNeedsRebalance :: [Int] -> Bool
232
clusterNeedsRebalance stats = sum stats > 0
233

    
234
{- | Check group for N+1 hapiness, conflicts of primaries on nodes and
235
instances residing on offline nodes.
236

    
237
-}
238
perGroupChecks :: Group.List -> GroupInfo -> GroupStats
239
perGroupChecks gl (gidx, (nl, il)) =
240
  let grp = Container.find gidx gl
241
      offnl = filter Node.offline (Container.elems nl)
242
      n1violated = length . fst $ Cluster.computeBadItems nl il
243
      conflicttags = length $ filter (>0)
244
                     (map Node.conflictingPrimaries (Container.elems nl))
245
      offline_pri = sum . map length $ map Node.pList offnl
246
      offline_sec = length $ map Node.sList offnl
247
      score = Cluster.compCV nl
248
      groupstats = [ n1violated
249
                   , conflicttags
250
                   , offline_pri
251
                   , offline_sec
252
                   ]
253
  in ((grp, score), groupstats)
254

    
255
-- | Use Hbal's iterateDepth to simulate group rebalance.
256
executeSimulation :: Options -> Cluster.Table -> Double
257
                  -> Gdx -> Node.List -> Instance.List
258
                  -> IO GroupInfo
259
executeSimulation opts ini_tbl min_cv gidx nl il = do
260
  let imlen = maximum . map (length . Instance.alias) $ Container.elems il
261
      nmlen = maximum . map (length . Node.alias) $ Container.elems nl
262

    
263
  (fin_tbl, _) <- Hbal.iterateDepth False ini_tbl
264
                                    (optMaxLength opts)
265
                                    (optDiskMoves opts)
266
                                    (optInstMoves opts)
267
                                    nmlen imlen [] min_cv
268
                                    (optMinGainLim opts) (optMinGain opts)
269
                                    (optEvacMode opts)
270

    
271
  let (Cluster.Table fin_nl fin_il _ _) = fin_tbl
272
  return (gidx, (fin_nl, fin_il))
273

    
274
-- | Simulate group rebalance if group's score is not good
275
maybeSimulateGroupRebalance :: Options -> GroupInfo -> IO GroupInfo
276
maybeSimulateGroupRebalance opts (gidx, (nl, il)) = do
277
  let ini_cv = Cluster.compCV nl
278
      ini_tbl = Cluster.Table nl il ini_cv []
279
      min_cv = optMinScore opts
280
  if ini_cv < min_cv
281
    then return (gidx, (nl, il))
282
    else executeSimulation opts ini_tbl min_cv gidx nl il
283

    
284
-- | Decide whether to simulate rebalance.
285
maybeSimulateRebalance :: Bool             -- ^ Whether to simulate rebalance
286
                       -> Options          -- ^ Command line options
287
                       -> [GroupInfo]      -- ^ Group data
288
                       -> IO [GroupInfo]
289
maybeSimulateRebalance True opts cluster =
290
    mapM (maybeSimulateGroupRebalance opts) cluster
291
maybeSimulateRebalance False _ cluster = return cluster
292

    
293
-- | Prints the final @OK@ marker in machine readable output.
294
printFinalHTC :: Bool -> IO ()
295
printFinalHTC = printFinal htcPrefix
296

    
297
-- | Main function.
298
main :: Options -> [String] -> IO ()
299
main opts args = do
300
  unless (null args) $ exitErr "This program doesn't take any arguments."
301

    
302
  let verbose = optVerbose opts
303
      machineread = optMachineReadable opts
304
      nosimulation = optNoSimulation opts
305

    
306
  (ClusterData gl fixed_nl ilf _ _) <- loadExternalData opts
307
  nlf <- setNodeStatus opts fixed_nl
308

    
309
  let splitcluster = Cluster.splitCluster nlf ilf
310

    
311
  when machineread $ printGroupsMappings gl
312

    
313
  let groupsstats = map (perGroupChecks gl) splitcluster
314
      clusterstats = map sum . transpose . map snd $ groupsstats
315
      needrebalance = clusterNeedsRebalance clusterstats
316

    
317
  unless (verbose == 0 || machineread) .
318
    putStrLn $ if nosimulation
319
                 then "Running in no-simulation mode."
320
                 else if needrebalance
321
                        then "Cluster needs rebalancing."
322
                        else "No need to rebalance cluster, no problems found."
323

    
324
  mapM_ (printGroupStats verbose machineread Initial) groupsstats
325

    
326
  printClusterStats verbose machineread Initial clusterstats needrebalance
327

    
328
  let exitOK = nosimulation || not needrebalance
329
      simulate = not nosimulation && needrebalance
330

    
331
  rebalancedcluster <- maybeSimulateRebalance simulate opts splitcluster
332

    
333
  when (simulate || machineread) $ do
334
    let newgroupstats = map (perGroupChecks gl) rebalancedcluster
335
        newclusterstats = map sum . transpose . map snd $ newgroupstats
336
        newneedrebalance = clusterNeedsRebalance clusterstats
337

    
338
    mapM_ (printGroupStats verbose machineread Rebalanced) newgroupstats
339

    
340
    printClusterStats verbose machineread Rebalanced newclusterstats
341
                           newneedrebalance
342

    
343
  printFinalHTC machineread
344

    
345
  unless exitOK . exitWith $ ExitFailure 1