Statistics
| Branch: | Tag: | Revision:

root / hbal.hs @ 78694255

History | View | Annotate | Download (11.8 kB)

1
{-| Solver for N+1 cluster errors
2

    
3
-}
4

    
5
{-
6

    
7
Copyright (C) 2009 Google Inc.
8

    
9
This program is free software; you can redistribute it and/or modify
10
it under the terms of the GNU General Public License as published by
11
the Free Software Foundation; either version 2 of the License, or
12
(at your option) any later version.
13

    
14
This program is distributed in the hope that it will be useful, but
15
WITHOUT ANY WARRANTY; without even the implied warranty of
16
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17
General Public License for more details.
18

    
19
You should have received a copy of the GNU General Public License
20
along with this program; if not, write to the Free Software
21
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22
02110-1301, USA.
23

    
24
-}
25

    
26
module Main (main) where
27

    
28
import Data.List
29
import Data.Function
30
import Data.Maybe (isJust, fromJust, fromMaybe)
31
import Monad
32
import System
33
import System.IO
34
import System.Console.GetOpt
35
import qualified System
36

    
37
import Text.Printf (printf)
38

    
39
import qualified Ganeti.HTools.Container as Container
40
import qualified Ganeti.HTools.Cluster as Cluster
41
import qualified Ganeti.HTools.Node as Node
42
import qualified Ganeti.HTools.CLI as CLI
43

    
44
import Ganeti.HTools.Utils
45

    
46
-- | Command line options structure.
47
data Options = Options
48
    { optShowNodes :: Bool           -- ^ Whether to show node status
49
    , optShowCmds  :: Maybe FilePath -- ^ Whether to show the command list
50
    , optOneline   :: Bool           -- ^ Switch output to a single line
51
    , optNodef     :: FilePath       -- ^ Path to the nodes file
52
    , optNodeSet   :: Bool           -- ^ The nodes have been set by options
53
    , optInstf     :: FilePath       -- ^ Path to the instances file
54
    , optInstSet   :: Bool           -- ^ The insts have been set by options
55
    , optMaxLength :: Int            -- ^ Stop after this many steps
56
    , optMaster    :: String         -- ^ Collect data from RAPI
57
    , optVerbose   :: Int            -- ^ Verbosity level
58
    , optOffline   :: [String]       -- ^ Names of offline nodes
59
    , optMinScore  :: Cluster.Score  -- ^ The minimum score we aim for
60
    , optShowVer   :: Bool           -- ^ Just show the program version
61
    , optShowHelp  :: Bool           -- ^ Just show the help
62
    } deriving Show
63

    
64
instance CLI.CLIOptions Options where
65
    showVersion = optShowVer
66
    showHelp    = optShowHelp
67

    
68
instance CLI.EToolOptions Options where
69
    nodeFile   = optNodef
70
    nodeSet    = optNodeSet
71
    instFile   = optInstf
72
    instSet    = optInstSet
73
    masterName = optMaster
74
    silent a   = (optVerbose a) == 0
75

    
76
-- | Default values for the command line options.
77
defaultOptions :: Options
78
defaultOptions  = Options
79
 { optShowNodes = False
80
 , optShowCmds  = Nothing
81
 , optOneline   = False
82
 , optNodef     = "nodes"
83
 , optNodeSet   = False
84
 , optInstf     = "instances"
85
 , optInstSet   = False
86
 , optMaxLength = -1
87
 , optMaster    = ""
88
 , optVerbose   = 1
89
 , optOffline   = []
90
 , optMinScore  = 1e-9
91
 , optShowVer   = False
92
 , optShowHelp  = False
93
 }
94

    
95
-- | Options list and functions
96
options :: [OptDescr (Options -> Options)]
97
options =
98
    [ Option ['p']     ["print-nodes"]
99
      (NoArg (\ opts -> opts { optShowNodes = True }))
100
      "print the final node list"
101
    , Option ['C']     ["print-commands"]
102
      (OptArg ((\ f opts -> opts { optShowCmds = Just f }) . fromMaybe "-")
103
                  "FILE")
104
      "print the ganeti command list for reaching the solution,\
105
      \if an argument is passed then write the commands to a file named\
106
      \ as such"
107
    , Option ['o']     ["oneline"]
108
      (NoArg (\ opts -> opts { optOneline = True }))
109
      "print the ganeti command list for reaching the solution"
110
    , Option ['n']     ["nodes"]
111
      (ReqArg (\ f opts -> opts { optNodef = f, optNodeSet = True }) "FILE")
112
      "the node list FILE"
113
    , Option ['i']     ["instances"]
114
      (ReqArg (\ f opts -> opts { optInstf =  f, optInstSet = True }) "FILE")
115
      "the instance list FILE"
116
    , Option ['m']     ["master"]
117
      (ReqArg (\ m opts -> opts { optMaster = m }) "ADDRESS")
118
      "collect data via RAPI at the given ADDRESS"
119
    , Option ['l']     ["max-length"]
120
      (ReqArg (\ i opts -> opts { optMaxLength =  (read i)::Int }) "N")
121
      "cap the solution at this many moves (useful for very unbalanced \
122
      \clusters)"
123
    , Option ['v']     ["verbose"]
124
      (NoArg (\ opts -> opts { optVerbose = (optVerbose opts) + 1 }))
125
      "increase the verbosity level"
126
    , Option ['q']     ["quiet"]
127
      (NoArg (\ opts -> opts { optVerbose = (optVerbose opts) - 1 }))
128
      "decrease the verbosity level"
129
    , Option ['O']     ["offline"]
130
      (ReqArg (\ n opts -> opts { optOffline = n:optOffline opts }) "NODE")
131
      " set node as offline"
132
    , Option ['e']     ["min-score"]
133
      (ReqArg (\ e opts -> opts { optMinScore = read e }) "EPSILON")
134
      " mininum score to aim for"
135
    , Option ['V']     ["version"]
136
      (NoArg (\ opts -> opts { optShowVer = True}))
137
      "show the version of the program"
138
    , Option ['h']     ["help"]
139
      (NoArg (\ opts -> opts { optShowHelp = True}))
140
      "show help"
141
    ]
142

    
143
{- | Start computing the solution at the given depth and recurse until
144
we find a valid solution or we exceed the maximum depth.
145

    
146
-}
147
iterateDepth :: Cluster.Table    -- ^ The starting table
148
             -> Int              -- ^ Remaining length
149
             -> Int              -- ^ Max node name len
150
             -> Int              -- ^ Max instance name len
151
             -> [[String]]       -- ^ Current command list
152
             -> Bool             -- ^ Wheter to be silent
153
             -> Cluster.Score    -- ^ Score at which to stop
154
             -> IO (Cluster.Table, [[String]]) -- ^ The resulting table and
155
                                               -- commands
156
iterateDepth ini_tbl max_rounds nmlen imlen
157
             cmd_strs oneline min_score =
158
    let Cluster.Table ini_nl ini_il ini_cv ini_plc = ini_tbl
159
        all_inst = Container.elems ini_il
160
        node_idx = map Node.idx . filter (not . Node.offline) $
161
                   Container.elems ini_nl
162
        fin_tbl = Cluster.checkMove node_idx ini_tbl all_inst
163
        (Cluster.Table _ _ fin_cv fin_plc) = fin_tbl
164
        ini_plc_len = length ini_plc
165
        fin_plc_len = length fin_plc
166
        allowed_next = (max_rounds < 0 || length fin_plc < max_rounds)
167
    in
168
      do
169
        let
170
            (sol_line, cmds) = Cluster.printSolutionLine ini_nl ini_il
171
                               nmlen imlen (head fin_plc) fin_plc_len
172
            upd_cmd_strs = cmds:cmd_strs
173
        unless (oneline || fin_plc_len == ini_plc_len) $ do
174
          putStrLn sol_line
175
          hFlush stdout
176
        (if fin_cv < ini_cv then -- this round made success, try deeper
177
             if allowed_next && fin_cv > min_score
178
             then iterateDepth fin_tbl max_rounds
179
                  nmlen imlen upd_cmd_strs oneline min_score
180
             -- don't go deeper, but return the better solution
181
             else return (fin_tbl, upd_cmd_strs)
182
         else
183
             return (ini_tbl, cmd_strs))
184

    
185
-- | Formats the solution for the oneline display
186
formatOneline :: Double -> Int -> Double -> String
187
formatOneline ini_cv plc_len fin_cv =
188
    printf "%.8f %d %.8f %8.3f" ini_cv plc_len fin_cv
189
               (if fin_cv == 0 then 1 else (ini_cv / fin_cv))
190

    
191
-- | Main function.
192
main :: IO ()
193
main = do
194
  cmd_args <- System.getArgs
195
  (opts, args) <- CLI.parseOpts cmd_args "hbal" options defaultOptions
196

    
197
  unless (null args) $ do
198
         hPutStrLn stderr "Error: this program doesn't take any arguments."
199
         exitWith $ ExitFailure 1
200

    
201
  let oneline = optOneline opts
202
      verbose = optVerbose opts
203

    
204
  (fixed_nl, il, csf) <- CLI.loadExternalData opts
205

    
206
  let offline_names = optOffline opts
207
      all_nodes = Container.elems fixed_nl
208
      all_names = map Node.name all_nodes
209
      offline_wrong = filter (\n -> not $ elem n all_names) offline_names
210
      offline_indices = map Node.idx $
211
                        filter (\n -> elem (Node.name n) offline_names)
212
                               all_nodes
213

    
214
  when (length offline_wrong > 0) $ do
215
         printf "Wrong node name(s) set as offline: %s\n"
216
                (commaJoin offline_wrong)
217
         exitWith $ ExitFailure 1
218

    
219
  let nl = Container.map (\n -> if elem (Node.idx n) offline_indices
220
                                then Node.setOffline n True
221
                                else n) fixed_nl
222

    
223
  when (Container.size il == 0) $ do
224
         (if oneline then putStrLn $ formatOneline 0 0 0
225
          else printf "Cluster is empty, exiting.\n")
226
         exitWith ExitSuccess
227

    
228
  unless oneline $ printf "Loaded %d nodes, %d instances\n"
229
             (Container.size nl)
230
             (Container.size il)
231

    
232
  when (length csf > 0 && not oneline && verbose > 1) $ do
233
         printf "Note: Stripping common suffix of '%s' from names\n" csf
234

    
235
  let (bad_nodes, bad_instances) = Cluster.computeBadItems nl il
236
  unless (oneline || verbose == 0) $ printf
237
             "Initial check done: %d bad nodes, %d bad instances.\n"
238
             (length bad_nodes) (length bad_instances)
239

    
240
  when (length bad_nodes > 0) $ do
241
         putStrLn "Cluster is not N+1 happy, continuing but no guarantee \
242
                  \that the cluster will end N+1 happy."
243

    
244
  when (optShowNodes opts) $
245
       do
246
         putStrLn "Initial cluster status:"
247
         putStrLn $ Cluster.printNodes nl
248

    
249
  let ini_cv = Cluster.compCV nl
250
      ini_tbl = Cluster.Table nl il ini_cv []
251
      min_cv = optMinScore opts
252

    
253
  when (ini_cv < min_cv) $ do
254
         (if oneline then
255
              putStrLn $ formatOneline ini_cv 0 ini_cv
256
          else printf "Cluster is already well balanced (initial score %.6g,\n\
257
                      \minimum score %.6g).\nNothing to do, exiting\n"
258
                      ini_cv min_cv)
259
         exitWith ExitSuccess
260

    
261
  unless oneline (if verbose > 2 then
262
                      printf "Initial coefficients: overall %.8f, %s\n"
263
                      ini_cv (Cluster.printStats nl)
264
                  else
265
                      printf "Initial score: %.8f\n" ini_cv)
266

    
267
  unless oneline $ putStrLn "Trying to minimize the CV..."
268
  let imlen = Container.maxNameLen il
269
      nmlen = Container.maxNameLen nl
270

    
271
  (fin_tbl, cmd_strs) <- iterateDepth ini_tbl (optMaxLength opts)
272
                         nmlen imlen [] oneline min_cv
273
  let (Cluster.Table fin_nl _ fin_cv fin_plc) = fin_tbl
274
      ord_plc = reverse fin_plc
275
      sol_msg = (if null fin_plc
276
                 then printf "No solution found\n"
277
                 else (if verbose > 2
278
                       then printf "Final coefficients:   overall %.8f, %s\n"
279
                            fin_cv (Cluster.printStats fin_nl)
280
                       else printf "Cluster score improved from %.8f to %.8f\n"
281
                            ini_cv fin_cv
282
                      ))::String
283

    
284
  unless oneline $ putStr sol_msg
285

    
286
  unless (oneline || verbose == 0) $
287
         printf "Solution length=%d\n" (length ord_plc)
288

    
289
  let cmd_data = Cluster.formatCmds . reverse $ cmd_strs
290

    
291
  when (isJust $ optShowCmds opts) $
292
       do
293
         let out_path = fromJust $ optShowCmds opts
294
         putStrLn ""
295
         (if out_path == "-" then
296
              printf "Commands to run to reach the above solution:\n%s"
297
                     (unlines . map ("  " ++) .
298
                      filter (/= "check") .
299
                      lines $ cmd_data)
300
          else do
301
            writeFile out_path (CLI.shTemplate ++ cmd_data)
302
            printf "The commands have been written to file '%s'\n" out_path)
303

    
304
  when (optShowNodes opts) $
305
       do
306
         let (orig_mem, orig_disk) = Cluster.totalResources nl
307
             (final_mem, final_disk) = Cluster.totalResources fin_nl
308
         putStrLn ""
309
         putStrLn "Final cluster status:"
310
         putStrLn $ Cluster.printNodes fin_nl
311
         when (verbose > 3) $
312
              do
313
                printf "Original: mem=%d disk=%d\n" orig_mem orig_disk
314
                printf "Final:    mem=%d disk=%d\n" final_mem final_disk
315
  when oneline $
316
         putStrLn $ formatOneline ini_cv (length ord_plc) fin_cv