Statistics
| Branch: | Tag: | Revision:

root / htools / Ganeti / HTools / Program / Hcheck.hs @ e60fa4af

History | View | Annotate | Download (8.7 kB)

1
{-| Cluster checker.
2

    
3
-}
4

    
5
{-
6

    
7
Copyright (C) 2012 Google Inc.
8

    
9
This program is free software; you can redistribute it and/or modify
10
it under the terms of the GNU General Public License as published by
11
the Free Software Foundation; either version 2 of the License, or
12
(at your option) any later version.
13

    
14
This program is distributed in the hope that it will be useful, but
15
WITHOUT ANY WARRANTY; without even the implied warranty of
16
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17
General Public License for more details.
18

    
19
You should have received a copy of the GNU Gene52al Public License
20
along with this program; if not, write to the Free Software
21
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22
02110-1301, USA.
23

    
24
-}
25

    
26
module Ganeti.HTools.Program.Hcheck (main, options) where
27

    
28
import Control.Monad
29
import List (transpose)
30
import System.Exit
31
import System.IO
32
import Text.Printf (printf)
33

    
34
import qualified Ganeti.HTools.Container as Container
35
import qualified Ganeti.HTools.Cluster as Cluster
36
import qualified Ganeti.HTools.Node as Node
37
import qualified Ganeti.HTools.Instance as Instance
38

    
39
import qualified Ganeti.HTools.Program.Hbal as Hbal
40

    
41
import Ganeti.HTools.CLI
42
import Ganeti.HTools.ExtLoader
43
import Ganeti.HTools.Loader
44
import Ganeti.HTools.Types
45

    
46
-- | Options list and functions.
47
options :: [OptType]
48
options =
49
  [ oDataFile
50
  , oDiskMoves
51
  , oDynuFile
52
  , oEvacMode
53
  , oExInst
54
  , oExTags
55
  , oIAllocSrc
56
  , oInstMoves
57
  , oLuxiSocket
58
  , oMachineReadable
59
  , oMaxCpu
60
  , oMaxSolLength
61
  , oMinDisk
62
  , oMinGain
63
  , oMinGainLim
64
  , oMinScore
65
  , oNoSimulation
66
  , oOfflineNode
67
  , oQuiet
68
  , oRapiMaster
69
  , oSelInst
70
  , oShowHelp
71
  , oShowVer
72
  , oVerbose
73
  ]
74

    
75
-- | Check phase - are we before (initial) or after rebalance.
76
data Phase = Initial
77
           | Rebalanced
78

    
79
-- | Level of presented statistics.
80
data Level = GroupLvl
81
           | ClusterLvl
82

    
83
-- | Prefix for machine readable names
84
htcPrefix :: String
85
htcPrefix = "HCHECK"
86

    
87
-- | Phase-specific prefix for machine readable version.
88
phasePrefix :: Phase -> String
89
phasePrefix Initial = "INIT"
90
phasePrefix Rebalanced = "FINAL"
91

    
92
-- | Description of phases for human readable version.
93
phaseDescription :: Phase -> String
94
phaseDescription Initial = "initially"
95
phaseDescription Rebalanced = "after rebalancing"
96

    
97
-- | Level-specific prefix for machine readable version.
98
levelPrefix :: Level -> String
99
levelPrefix GroupLvl = "GROUP"
100
levelPrefix ClusterLvl = "CLUSTER"
101

    
102
-- | Data showed both per group and per cluster.
103
commonData :: [(String, String)]
104
commonData =[ ("N1_FAIL", "Nodes not N+1 happy")
105
            , ("CONFLICT_TAGS", "Nodes with conflicting instances")
106
            , ("OFFLINE_PRI", "Instances with primary on an offline node")
107
            , ("OFFLINE_SEC", "Instances with seondary on an offline node")
108
            ]
109

    
110
-- | Data showed per group.
111
groupData :: [(String, String)]
112
groupData = commonData ++ [("SCORE", "Group score")]
113

    
114
-- | Data showed per cluster.
115
clusterData :: [(String, String)]
116
clusterData = commonData ++ [("NEED_REBALANCE", "Cluster is not healthy")]
117

    
118
-- | Format a list of key, value as a shell fragment.
119
printKeysHTC :: [(String, String)] -> IO ()
120
printKeysHTC = printKeys htcPrefix
121

    
122
-- | Print all the statistics on a group level.
123
printGroupStats :: Int -> Bool -> Phase -> Gdx -> [Int] -> Double -> IO ()
124
printGroupStats _ True phase gidx stats score = do
125
  let printstats = map (printf "%d") stats ++ [printf "%.8f" score] :: [String]
126
      printkeys = map (printf "%s_%s_%d_%s"
127
                                  (phasePrefix phase)
128
                                  (levelPrefix GroupLvl)
129
                                  gidx)
130
                       (map fst groupData) :: [String]
131
  printKeysHTC (zip printkeys printstats)
132

    
133
printGroupStats verbose False phase gidx stats score = do
134
  let printstats = map (printf "%d") stats ++ [printf "%.8f" score] :: [String]
135

    
136
  unless (verbose == 0) $ do
137
    printf "\nStatistics for group %d %s\n"
138
               gidx (phaseDescription phase) :: IO ()
139
    mapM_ (\(a,b) -> printf "    %s: %s\n" (snd a) b :: IO ())
140
          (zip groupData printstats)
141

    
142
-- | Print all the statistics on a cluster (global) level.
143
printClusterStats :: Int -> Bool -> Phase -> [Int] -> IO (Bool)
144
printClusterStats _ True phase stats = do
145
  let needrebal = sum stats
146
      printstats = map (printf "%d") $ stats ++ [needrebal]
147
                 :: [String]
148
      printkeys = map (printf "%s_%s_%s"
149
                              (phasePrefix phase)
150
                              (levelPrefix ClusterLvl))
151
                      (map fst clusterData) :: [String]
152
  printKeysHTC (zip printkeys printstats)
153
  return $ needrebal > 0
154

    
155
printClusterStats verbose False phase stats = do
156
  let needrebal = sum stats
157
      printstats = map (printf "%d") stats :: [String]
158
  unless (verbose == 0) $ do
159
      printf "\nCluster statistics %s\n" (phaseDescription phase) :: IO ()
160
      mapM_ (\(a,b) -> printf "    %s: %s\n" (snd a) b :: IO ())
161
            (zip clusterData (printstats ++ [show (needrebal>0)]))
162
  return $ needrebal > 0
163

    
164
{- | Check group for N+1 hapiness, conflicts of primaries on nodes and
165
instances residing on offline nodes.
166

    
167
-}
168
perGroupChecks :: Int -> Bool -> Phase -> (Gdx, (Node.List, Instance.List))
169
               -> IO ([Int])
170
perGroupChecks verbose machineread phase (gidx, (nl, il)) = do
171
  let offnl = filter Node.offline (Container.elems nl)
172
      n1violated = length $ fst $ Cluster.computeBadItems nl il
173
      conflicttags = length $ filter (>0)
174
                     (map Node.conflictingPrimaries (Container.elems nl))
175
      offline_pri = sum . map length $ map Node.pList offnl
176
      offline_sec = length $ map Node.sList offnl
177
      score = Cluster.compCV nl
178
      groupstats = [ n1violated
179
                   , conflicttags
180
                   , offline_pri
181
                   , offline_sec
182
                   ]
183
  printGroupStats verbose machineread phase gidx groupstats score
184
  return groupstats
185

    
186
-- | Use Hbal's iterateDepth to simulate group rebalance.
187
simulateRebalance :: Options ->
188
                     (Gdx, (Node.List, Instance.List)) ->
189
                     IO ( (Gdx, (Node.List, Instance.List)) )
190
simulateRebalance opts (gidx, (nl, il)) = do
191
  let ini_cv = Cluster.compCV nl
192
      ini_tbl = Cluster.Table nl il ini_cv []
193
      min_cv = optMinScore opts
194

    
195

    
196
  if (ini_cv < min_cv)
197
    then return (gidx, (nl, il))
198
    else do
199
      let imlen = maximum . map (length . Instance.alias) $ Container.elems il
200
          nmlen = maximum . map (length . Node.alias) $ Container.elems nl
201

    
202
      (fin_tbl, _) <- Hbal.iterateDepth False ini_tbl
203
                                        (optMaxLength opts)
204
                                        (optDiskMoves opts)
205
                                        (optInstMoves opts)
206
                                        nmlen imlen [] min_cv
207
                                        (optMinGainLim opts) (optMinGain opts)
208
                                        (optEvacMode opts)
209

    
210
      let (Cluster.Table fin_nl fin_il _ _) = fin_tbl
211
      return (gidx, (fin_nl, fin_il))
212

    
213
-- | Prints the final @OK@ marker in machine readable output.
214
printFinalHTC :: Bool -> IO ()
215
printFinalHTC = printFinal htcPrefix
216

    
217
-- | Main function.
218
main :: Options -> [String] -> IO ()
219
main opts args = do
220
  unless (null args) $ do
221
         hPutStrLn stderr "Error: this program doesn't take any arguments."
222
         exitWith $ ExitFailure 1
223

    
224
  let verbose = optVerbose opts
225
      machineread = optMachineReadable opts
226
      nosimulation = optNoSimulation opts
227

    
228
  (ClusterData _ fixed_nl ilf _ _) <- loadExternalData opts
229
  nlf <- setNodeStatus opts fixed_nl
230

    
231
  let splitinstances = Cluster.findSplitInstances nlf ilf
232
      splitcluster = Cluster.splitCluster nlf ilf
233

    
234
  groupsstats <- mapM (perGroupChecks verbose machineread Initial) splitcluster
235
  let clusterstats = map sum (transpose groupsstats) :: [Int]
236
  needrebalance <- printClusterStats verbose machineread Initial clusterstats
237

    
238
  when nosimulation $ do
239
    unless (verbose == 0 || machineread) $
240
      printf "Running in no-simulation mode. Exiting.\n"
241
    printFinalHTC machineread
242
    exitWith ExitSuccess
243

    
244
  when (length splitinstances > 0) $ do
245
    unless (verbose == 0 || machineread) $
246
       printf "Split instances found, simulation of re-balancing not possible\n"
247
    exitWith $ ExitFailure 1
248

    
249
  unless needrebalance $ do
250
    unless (verbose == 0 || machineread) $
251
      printf "No need to rebalance cluster, no problems found. Exiting.\n"
252
    printFinalHTC machineread
253
    exitWith ExitSuccess
254

    
255
  rebalancedcluster <- mapM (simulateRebalance opts) splitcluster
256
  newgroupstats <- mapM (perGroupChecks verbose machineread Rebalanced)
257
                     rebalancedcluster
258
  let newclusterstats = map sum (transpose newgroupstats) :: [Int]
259
  _ <- printClusterStats verbose machineread Rebalanced newclusterstats
260

    
261
  printFinalHTC machineread