Statistics
| Branch: | Tag: | Revision:

root / htools / Ganeti / HTools / Program / Hcheck.hs @ 1213f9d6

History | View | Annotate | Download (7.5 kB)

1
{-| Cluster checker.
2

    
3
-}
4

    
5
{-
6

    
7
Copyright (C) 2012 Google Inc.
8

    
9
This program is free software; you can redistribute it and/or modify
10
it under the terms of the GNU General Public License as published by
11
the Free Software Foundation; either version 2 of the License, or
12
(at your option) any later version.
13

    
14
This program is distributed in the hope that it will be useful, but
15
WITHOUT ANY WARRANTY; without even the implied warranty of
16
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17
General Public License for more details.
18

    
19
You should have received a copy of the GNU Gene52al Public License
20
along with this program; if not, write to the Free Software
21
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22
02110-1301, USA.
23

    
24
-}
25

    
26
module Ganeti.HTools.Program.Hcheck (main, options) where
27

    
28
import Control.Monad
29
import List (transpose)
30
import System.Exit
31
import System.IO
32
import Text.Printf (printf)
33

    
34
import qualified Ganeti.HTools.Container as Container
35
import qualified Ganeti.HTools.Cluster as Cluster
36
import qualified Ganeti.HTools.Node as Node
37
import qualified Ganeti.HTools.Instance as Instance
38

    
39
import qualified Ganeti.HTools.Program.Hbal as Hbal
40

    
41
import Ganeti.HTools.CLI
42
import Ganeti.HTools.ExtLoader
43
import Ganeti.HTools.Loader
44
import Ganeti.HTools.Types
45

    
46
-- | Options list and functions.
47
options :: [OptType]
48
options =
49
  [ oDataFile
50
  , oDiskMoves
51
  , oDynuFile
52
  , oEvacMode
53
  , oExInst
54
  , oExTags
55
  , oIAllocSrc
56
  , oInstMoves
57
  , oLuxiSocket
58
  , oMachineReadable
59
  , oMaxCpu
60
  , oMaxSolLength
61
  , oMinDisk
62
  , oMinGain
63
  , oMinGainLim
64
  , oMinScore
65
  , oNoSimulation
66
  , oOfflineNode
67
  , oQuiet
68
  , oRapiMaster
69
  , oSelInst
70
  , oShowHelp
71
  , oShowVer
72
  , oVerbose
73
  ]
74

    
75
-- | Check phase - are we before (initial) or after rebalance.
76
data Phase = Initial
77
           | Rebalanced
78

    
79
-- | Prefix for machine readable names
80
htcPrefix :: String
81
htcPrefix = "HCHECK"
82

    
83
-- | Description of phases for human readable version.
84
phaseDescription :: Phase -> String
85
phaseDescription Initial = "initially"
86
phaseDescription Rebalanced = "after rebalancing"
87

    
88
-- | Data showed both per group and per cluster.
89
commonData :: [(String, String)]
90
commonData =[ ("N1_FAIL", "Nodes not N+1 happy")
91
            , ("CONFLICT_TAGS", "Nodes with conflicting instances")
92
            , ("OFFLINE_PRI", "Instances with primary on an offline node")
93
            , ("OFFLINE_SEC", "Instances with seondary on an offline node")
94
            ]
95

    
96
-- | Data showed per group.
97
groupData :: [(String, String)]
98
groupData = commonData ++ [("SCORE", "Group score")]
99

    
100
-- | Data showed per cluster.
101
clusterData :: [(String, String)]
102
clusterData = commonData ++ [("NEED_REBALANCE", "Cluster is not healthy")]
103

    
104
-- | Print all the statistics on a group level.
105
printGroupStats :: Int -> Bool -> Phase -> Gdx -> [Int] -> Double -> IO ()
106
printGroupStats _ True _ _ _ _  = return ()
107
printGroupStats verbose False phase gidx stats score = do
108
  let printstats = map (printf "%d") stats ++ [printf "%.8f" score] :: [String]
109

    
110
  unless (verbose == 0) $ do
111
    printf "\nStatistics for group %d %s\n"
112
               gidx (phaseDescription phase) :: IO ()
113
    mapM_ (\(a,b) -> printf "    %s: %s\n" (snd a) b :: IO ())
114
          (zip groupData printstats)
115

    
116
-- | Print all the statistics on a cluster (global) level.
117
printClusterStats :: Int -> Bool -> Phase -> [Int] -> IO (Bool)
118
printClusterStats _ True _ stats = do
119
  let needrebal = sum stats
120
  return $ needrebal > 0
121

    
122
printClusterStats verbose False phase stats = do
123
  let needrebal = sum stats
124
      printstats = map (printf "%d") stats :: [String]
125
  unless (verbose == 0) $ do
126
      printf "\nCluster statistics %s\n" (phaseDescription phase) :: IO ()
127
      mapM_ (\(a,b) -> printf "    %s: %s\n" (snd a) b :: IO ())
128
            (zip clusterData (printstats ++ [show (needrebal>0)]))
129
  return $ needrebal > 0
130

    
131
{- | Check group for N+1 hapiness, conflicts of primaries on nodes and
132
instances residing on offline nodes.
133

    
134
-}
135
perGroupChecks :: Int -> Bool -> Phase -> (Gdx, (Node.List, Instance.List))
136
               -> IO ([Int])
137
perGroupChecks verbose machineread phase (gidx, (nl, il)) = do
138
  let offnl = filter Node.offline (Container.elems nl)
139
      n1violated = length $ fst $ Cluster.computeBadItems nl il
140
      conflicttags = length $ filter (>0)
141
                     (map Node.conflictingPrimaries (Container.elems nl))
142
      offline_pri = sum . map length $ map Node.pList offnl
143
      offline_sec = length $ map Node.sList offnl
144
      score = Cluster.compCV nl
145
      groupstats = [ n1violated
146
                   , conflicttags
147
                   , offline_pri
148
                   , offline_sec
149
                   ]
150
  printGroupStats verbose machineread phase gidx groupstats score
151
  return groupstats
152

    
153
-- | Use Hbal's iterateDepth to simulate group rebalance.
154
simulateRebalance :: Options ->
155
                     (Gdx, (Node.List, Instance.List)) ->
156
                     IO ( (Gdx, (Node.List, Instance.List)) )
157
simulateRebalance opts (gidx, (nl, il)) = do
158
  let ini_cv = Cluster.compCV nl
159
      ini_tbl = Cluster.Table nl il ini_cv []
160
      min_cv = optMinScore opts
161

    
162

    
163
  if (ini_cv < min_cv)
164
    then return (gidx, (nl, il))
165
    else do
166
      let imlen = maximum . map (length . Instance.alias) $ Container.elems il
167
          nmlen = maximum . map (length . Node.alias) $ Container.elems nl
168

    
169
      (fin_tbl, _) <- Hbal.iterateDepth False ini_tbl
170
                                        (optMaxLength opts)
171
                                        (optDiskMoves opts)
172
                                        (optInstMoves opts)
173
                                        nmlen imlen [] min_cv
174
                                        (optMinGainLim opts) (optMinGain opts)
175
                                        (optEvacMode opts)
176

    
177
      let (Cluster.Table fin_nl fin_il _ _) = fin_tbl
178
      return (gidx, (fin_nl, fin_il))
179

    
180
-- | Prints the final @OK@ marker in machine readable output.
181
printFinalHTC :: Bool -> IO ()
182
printFinalHTC = printFinal htcPrefix
183

    
184
-- | Main function.
185
main :: Options -> [String] -> IO ()
186
main opts args = do
187
  unless (null args) $ do
188
         hPutStrLn stderr "Error: this program doesn't take any arguments."
189
         exitWith $ ExitFailure 1
190

    
191
  let verbose = optVerbose opts
192
      machineread = optMachineReadable opts
193
      nosimulation = optNoSimulation opts
194

    
195
  (ClusterData _ fixed_nl ilf _ _) <- loadExternalData opts
196
  nlf <- setNodeStatus opts fixed_nl
197

    
198
  let splitinstances = Cluster.findSplitInstances nlf ilf
199
      splitcluster = Cluster.splitCluster nlf ilf
200

    
201
  groupsstats <- mapM (perGroupChecks verbose machineread Initial) splitcluster
202
  let clusterstats = map sum (transpose groupsstats) :: [Int]
203
  needrebalance <- printClusterStats verbose machineread Initial clusterstats
204

    
205
  when nosimulation $ do
206
    unless (verbose == 0 || machineread) $
207
      printf "Running in no-simulation mode. Exiting.\n"
208
    printFinalHTC machineread
209
    exitWith ExitSuccess
210

    
211
  when (length splitinstances > 0) $ do
212
    unless (verbose == 0 || machineread) $
213
       printf "Split instances found, simulation of re-balancing not possible\n"
214
    exitWith $ ExitFailure 1
215

    
216
  unless needrebalance $ do
217
    unless (verbose == 0 || machineread) $
218
      printf "No need to rebalance cluster, no problems found. Exiting.\n"
219
    printFinalHTC machineread
220
    exitWith ExitSuccess
221

    
222
  rebalancedcluster <- mapM (simulateRebalance opts) splitcluster
223
  newgroupstats <- mapM (perGroupChecks verbose machineread Rebalanced)
224
                     rebalancedcluster
225
  let newclusterstats = map sum (transpose newgroupstats) :: [Int]
226
  _ <- printClusterStats verbose machineread Rebalanced newclusterstats
227

    
228
  printFinalHTC machineread