Statistics
| Branch: | Tag: | Revision:

root / htools / Ganeti / HTools / Program / Hcheck.hs @ 92eacdd8

History | View | Annotate | Download (9.2 kB)

1
{-| Cluster checker.
2

    
3
-}
4

    
5
{-
6

    
7
Copyright (C) 2012 Google Inc.
8

    
9
This program is free software; you can redistribute it and/or modify
10
it under the terms of the GNU General Public License as published by
11
the Free Software Foundation; either version 2 of the License, or
12
(at your option) any later version.
13

    
14
This program is distributed in the hope that it will be useful, but
15
WITHOUT ANY WARRANTY; without even the implied warranty of
16
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17
General Public License for more details.
18

    
19
You should have received a copy of the GNU Gene52al Public License
20
along with this program; if not, write to the Free Software
21
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22
02110-1301, USA.
23

    
24
-}
25

    
26
module Ganeti.HTools.Program.Hcheck (main, options) where
27

    
28
import Control.Monad
29
import List (transpose)
30
import System.Exit
31
import System.IO
32
import Text.Printf (printf)
33

    
34
import qualified Ganeti.HTools.Container as Container
35
import qualified Ganeti.HTools.Cluster as Cluster
36
import qualified Ganeti.HTools.Node as Node
37
import qualified Ganeti.HTools.Instance as Instance
38

    
39
import qualified Ganeti.HTools.Program.Hbal as Hbal
40

    
41
import Ganeti.HTools.CLI
42
import Ganeti.HTools.ExtLoader
43
import Ganeti.HTools.Loader
44
import Ganeti.HTools.Types
45

    
46
-- | Options list and functions.
47
options :: [OptType]
48
options =
49
  [ oDataFile
50
  , oDiskMoves
51
  , oDynuFile
52
  , oEvacMode
53
  , oExInst
54
  , oExTags
55
  , oIAllocSrc
56
  , oInstMoves
57
  , oLuxiSocket
58
  , oMachineReadable
59
  , oMaxCpu
60
  , oMaxSolLength
61
  , oMinDisk
62
  , oMinGain
63
  , oMinGainLim
64
  , oMinScore
65
  , oNoSimulation
66
  , oOfflineNode
67
  , oQuiet
68
  , oRapiMaster
69
  , oSelInst
70
  , oShowHelp
71
  , oShowVer
72
  , oVerbose
73
  ]
74

    
75
-- | Check phase - are we before (initial) or after rebalance.
76
data Phase = Initial
77
           | Rebalanced
78

    
79
-- | Level of presented statistics.
80
data Level = GroupLvl
81
           | ClusterLvl
82

    
83
-- | Prefix for machine readable names
84
htcPrefix :: String
85
htcPrefix = "HCHECK"
86

    
87
-- | Phase-specific prefix for machine readable version.
88
phasePrefix :: Phase -> String
89
phasePrefix Initial = "INIT"
90
phasePrefix Rebalanced = "FINAL"
91

    
92
-- | Description of phases for human readable version.
93
phaseDescription :: Phase -> String
94
phaseDescription Initial = "initially"
95
phaseDescription Rebalanced = "after rebalancing"
96

    
97
-- | Level-specific prefix for machine readable version.
98
levelPrefix :: Level -> String
99
levelPrefix GroupLvl = "GROUP"
100
levelPrefix ClusterLvl = "CLUSTER"
101

    
102
-- | Data showed both per group and per cluster.
103
commonData :: [(String, String)]
104
commonData =[ ("N1_FAIL", "Nodes not N+1 happy")
105
            , ("CONFLICT_TAGS", "Nodes with conflicting instances")
106
            , ("OFFLINE_PRI", "Instances with primary on an offline node")
107
            , ("OFFLINE_SEC", "Instances with seondary on an offline node")
108
            ]
109

    
110
-- | Data showed per group.
111
groupData :: [(String, String)]
112
groupData = commonData ++ [("SCORE", "Group score")]
113

    
114
-- | Data showed per cluster.
115
clusterData :: [(String, String)]
116
clusterData = commonData ++
117
              [ ("NEED_REBALANCE", "Cluster is not healthy")
118
              , ("CAN_REBALANCE", "Possible to run rebalance")
119
              ]
120

    
121

    
122
-- | Format a list of key, value as a shell fragment.
123
printKeysHTC :: [(String, String)] -> IO ()
124
printKeysHTC = printKeys htcPrefix
125

    
126
-- | Prepare string from boolean value.
127
printBool :: Bool    -- ^ Whether the result should be machine readable
128
          -> Bool    -- ^ Value to be converted to string
129
          -> String
130
printBool True True = "1"
131
printBool True False = "0"
132
printBool False b = show b
133

    
134
-- | Print all the statistics on a group level.
135
printGroupStats :: Int -> Bool -> Phase -> Gdx -> [Int] -> Double -> IO ()
136
printGroupStats _ True phase gidx stats score = do
137
  let printstats = map (printf "%d") stats ++ [printf "%.8f" score] :: [String]
138
      printkeys = map (printf "%s_%s_%d_%s"
139
                                  (phasePrefix phase)
140
                                  (levelPrefix GroupLvl)
141
                                  gidx)
142
                       (map fst groupData) :: [String]
143
  printKeysHTC (zip printkeys printstats)
144

    
145
printGroupStats verbose False phase gidx stats score = do
146
  let printstats = map (printf "%d") stats ++ [printf "%.8f" score] :: [String]
147

    
148
  unless (verbose == 0) $ do
149
    printf "\nStatistics for group %d %s\n"
150
               gidx (phaseDescription phase) :: IO ()
151
    mapM_ (\(a,b) -> printf "    %s: %s\n" (snd a) b :: IO ())
152
          (zip groupData printstats)
153

    
154
-- | Print all the statistics on a cluster (global) level.
155
printClusterStats :: Int -> Bool -> Phase -> [Int] -> Bool -> IO (Bool)
156
printClusterStats _ True phase stats canrebal = do
157
  let needrebal = sum stats > 0
158
      printstats = map (printf "%d") stats ++
159
                   map (printBool True) [needrebal, canrebal]
160
      printkeys = map (printf "%s_%s_%s"
161
                              (phasePrefix phase)
162
                              (levelPrefix ClusterLvl))
163
                      (map fst clusterData) :: [String]
164
  printKeysHTC (zip printkeys printstats)
165
  return needrebal
166

    
167
printClusterStats verbose False phase stats canrebal = do
168
  let needrebal = sum stats > 0
169
      printstats = map (printf "%d") stats ++
170
                   map (printBool False) [needrebal, canrebal]
171
  unless (verbose == 0) $ do
172
      printf "\nCluster statistics %s\n" (phaseDescription phase) :: IO ()
173
      mapM_ (\(a,b) -> printf "    %s: %s\n" (snd a) b :: IO ())
174
            (zip clusterData printstats)
175
  return needrebal
176

    
177
{- | Check group for N+1 hapiness, conflicts of primaries on nodes and
178
instances residing on offline nodes.
179

    
180
-}
181
perGroupChecks :: Int -> Bool -> Phase -> (Gdx, (Node.List, Instance.List))
182
               -> IO ([Int])
183
perGroupChecks verbose machineread phase (gidx, (nl, il)) = do
184
  let offnl = filter Node.offline (Container.elems nl)
185
      n1violated = length $ fst $ Cluster.computeBadItems nl il
186
      conflicttags = length $ filter (>0)
187
                     (map Node.conflictingPrimaries (Container.elems nl))
188
      offline_pri = sum . map length $ map Node.pList offnl
189
      offline_sec = length $ map Node.sList offnl
190
      score = Cluster.compCV nl
191
      groupstats = [ n1violated
192
                   , conflicttags
193
                   , offline_pri
194
                   , offline_sec
195
                   ]
196
  printGroupStats verbose machineread phase gidx groupstats score
197
  return groupstats
198

    
199
-- | Use Hbal's iterateDepth to simulate group rebalance.
200
simulateRebalance :: Options ->
201
                     (Gdx, (Node.List, Instance.List)) ->
202
                     IO ( (Gdx, (Node.List, Instance.List)) )
203
simulateRebalance opts (gidx, (nl, il)) = do
204
  let ini_cv = Cluster.compCV nl
205
      ini_tbl = Cluster.Table nl il ini_cv []
206
      min_cv = optMinScore opts
207

    
208

    
209
  if (ini_cv < min_cv)
210
    then return (gidx, (nl, il))
211
    else do
212
      let imlen = maximum . map (length . Instance.alias) $ Container.elems il
213
          nmlen = maximum . map (length . Node.alias) $ Container.elems nl
214

    
215
      (fin_tbl, _) <- Hbal.iterateDepth False ini_tbl
216
                                        (optMaxLength opts)
217
                                        (optDiskMoves opts)
218
                                        (optInstMoves opts)
219
                                        nmlen imlen [] min_cv
220
                                        (optMinGainLim opts) (optMinGain opts)
221
                                        (optEvacMode opts)
222

    
223
      let (Cluster.Table fin_nl fin_il _ _) = fin_tbl
224
      return (gidx, (fin_nl, fin_il))
225

    
226
-- | Prints the final @OK@ marker in machine readable output.
227
printFinalHTC :: Bool -> IO ()
228
printFinalHTC = printFinal htcPrefix
229

    
230
-- | Main function.
231
main :: Options -> [String] -> IO ()
232
main opts args = do
233
  unless (null args) $ do
234
         hPutStrLn stderr "Error: this program doesn't take any arguments."
235
         exitWith $ ExitFailure 1
236

    
237
  let verbose = optVerbose opts
238
      machineread = optMachineReadable opts
239
      nosimulation = optNoSimulation opts
240

    
241
  (ClusterData _ fixed_nl ilf _ _) <- loadExternalData opts
242
  nlf <- setNodeStatus opts fixed_nl
243

    
244
  let splitinstances = Cluster.findSplitInstances nlf ilf
245
      splitcluster = Cluster.splitCluster nlf ilf
246

    
247
  groupsstats <- mapM (perGroupChecks verbose machineread Initial) splitcluster
248
  let clusterstats = map sum (transpose groupsstats) :: [Int]
249
      canrebalance = length splitinstances == 0
250
  needrebalance <- printClusterStats verbose machineread Initial clusterstats canrebalance
251

    
252
  when nosimulation $ do
253
    unless (verbose == 0 || machineread) $
254
      printf "Running in no-simulation mode. Exiting.\n"
255
    printFinalHTC machineread
256
    exitWith ExitSuccess
257

    
258
  when (length splitinstances > 0) $ do
259
    unless (verbose == 0 || machineread) $
260
       printf "Split instances found, simulation of re-balancing not possible\n"
261
    exitWith $ ExitFailure 1
262

    
263
  unless needrebalance $ do
264
    unless (verbose == 0 || machineread) $
265
      printf "No need to rebalance cluster, no problems found. Exiting.\n"
266
    printFinalHTC machineread
267
    exitWith ExitSuccess
268

    
269
  rebalancedcluster <- mapM (simulateRebalance opts) splitcluster
270
  newgroupstats <- mapM (perGroupChecks verbose machineread Rebalanced)
271
                     rebalancedcluster
272
  let newclusterstats = map sum (transpose newgroupstats) :: [Int]
273
  _ <- printClusterStats verbose machineread Rebalanced newclusterstats canrebalance
274

    
275
  printFinalHTC machineread