Statistics
| Branch: | Tag: | Revision:

root / hbal.hs @ 877d0386

History | View | Annotate | Download (12.4 kB)

1
{-| Cluster rebalancer
2

    
3
-}
4

    
5
{-
6

    
7
Copyright (C) 2009 Google Inc.
8

    
9
This program is free software; you can redistribute it and/or modify
10
it under the terms of the GNU General Public License as published by
11
the Free Software Foundation; either version 2 of the License, or
12
(at your option) any later version.
13

    
14
This program is distributed in the hope that it will be useful, but
15
WITHOUT ANY WARRANTY; without even the implied warranty of
16
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17
General Public License for more details.
18

    
19
You should have received a copy of the GNU General Public License
20
along with this program; if not, write to the Free Software
21
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22
02110-1301, USA.
23

    
24
-}
25

    
26
module Main (main) where
27

    
28
import Data.List
29
import Data.Function
30
import Data.Maybe (isJust, fromJust, fromMaybe)
31
import Monad
32
import System
33
import System.IO
34
import System.Console.GetOpt
35
import qualified System
36

    
37
import Text.Printf (printf)
38

    
39
import qualified Ganeti.HTools.Container as Container
40
import qualified Ganeti.HTools.Cluster as Cluster
41
import qualified Ganeti.HTools.Node as Node
42
import qualified Ganeti.HTools.CLI as CLI
43

    
44
import Ganeti.HTools.Utils
45

    
46
-- | Command line options structure.
47
data Options = Options
48
    { optShowNodes :: Bool           -- ^ Whether to show node status
49
    , optShowCmds  :: Maybe FilePath -- ^ Whether to show the command list
50
    , optOneline   :: Bool           -- ^ Switch output to a single line
51
    , optNodef     :: FilePath       -- ^ Path to the nodes file
52
    , optNodeSet   :: Bool           -- ^ The nodes have been set by options
53
    , optInstf     :: FilePath       -- ^ Path to the instances file
54
    , optInstSet   :: Bool           -- ^ The insts have been set by options
55
    , optMaxLength :: Int            -- ^ Stop after this many steps
56
    , optMaster    :: String         -- ^ Collect data from RAPI
57
    , optVerbose   :: Int            -- ^ Verbosity level
58
    , optOffline   :: [String]       -- ^ Names of offline nodes
59
    , optMinScore  :: Cluster.Score  -- ^ The minimum score we aim for
60
    , optMcpu      :: Double         -- ^ Max cpu ratio for nodes
61
    , optMdsk      :: Double         -- ^ Max disk usage ratio for nodes
62
    , optShowVer   :: Bool           -- ^ Just show the program version
63
    , optShowHelp  :: Bool           -- ^ Just show the help
64
    } deriving Show
65

    
66
instance CLI.CLIOptions Options where
67
    showVersion = optShowVer
68
    showHelp    = optShowHelp
69

    
70
instance CLI.EToolOptions Options where
71
    nodeFile   = optNodef
72
    nodeSet    = optNodeSet
73
    instFile   = optInstf
74
    instSet    = optInstSet
75
    masterName = optMaster
76
    silent a   = (optVerbose a) == 0
77

    
78
-- | Default values for the command line options.
79
defaultOptions :: Options
80
defaultOptions  = Options
81
 { optShowNodes = False
82
 , optShowCmds  = Nothing
83
 , optOneline   = False
84
 , optNodef     = "nodes"
85
 , optNodeSet   = False
86
 , optInstf     = "instances"
87
 , optInstSet   = False
88
 , optMaxLength = -1
89
 , optMaster    = ""
90
 , optVerbose   = 1
91
 , optOffline   = []
92
 , optMinScore  = 1e-9
93
 , optMcpu      = -1
94
 , optMdsk      = -1
95
 , optShowVer   = False
96
 , optShowHelp  = False
97
 }
98

    
99
-- | Options list and functions
100
options :: [OptDescr (Options -> Options)]
101
options =
102
    [ Option ['p']     ["print-nodes"]
103
      (NoArg (\ opts -> opts { optShowNodes = True }))
104
      "print the final node list"
105
    , Option ['C']     ["print-commands"]
106
      (OptArg ((\ f opts -> opts { optShowCmds = Just f }) . fromMaybe "-")
107
                  "FILE")
108
      "print the ganeti command list for reaching the solution,\
109
      \if an argument is passed then write the commands to a file named\
110
      \ as such"
111
    , Option ['o']     ["oneline"]
112
      (NoArg (\ opts -> opts { optOneline = True }))
113
      "print the ganeti command list for reaching the solution"
114
    , Option ['n']     ["nodes"]
115
      (ReqArg (\ f opts -> opts { optNodef = f, optNodeSet = True }) "FILE")
116
      "the node list FILE"
117
    , Option ['i']     ["instances"]
118
      (ReqArg (\ f opts -> opts { optInstf =  f, optInstSet = True }) "FILE")
119
      "the instance list FILE"
120
    , Option ['m']     ["master"]
121
      (ReqArg (\ m opts -> opts { optMaster = m }) "ADDRESS")
122
      "collect data via RAPI at the given ADDRESS"
123
    , Option ['l']     ["max-length"]
124
      (ReqArg (\ i opts -> opts { optMaxLength =  (read i)::Int }) "N")
125
      "cap the solution at this many moves (useful for very unbalanced \
126
      \clusters)"
127
    , Option ['v']     ["verbose"]
128
      (NoArg (\ opts -> opts { optVerbose = (optVerbose opts) + 1 }))
129
      "increase the verbosity level"
130
    , Option ['q']     ["quiet"]
131
      (NoArg (\ opts -> opts { optVerbose = (optVerbose opts) - 1 }))
132
      "decrease the verbosity level"
133
    , Option ['O']     ["offline"]
134
      (ReqArg (\ n opts -> opts { optOffline = n:optOffline opts }) "NODE")
135
      " set node as offline"
136
    , Option ['e']     ["min-score"]
137
      (ReqArg (\ e opts -> opts { optMinScore = read e }) "EPSILON")
138
      " mininum score to aim for"
139
    , Option []        ["max-cpu"]
140
      (ReqArg (\ n opts -> opts { optMcpu = read n }) "RATIO")
141
      "maximum virtual-to-physical cpu ratio for nodes"
142
    , Option []        ["min-disk"]
143
      (ReqArg (\ n opts -> opts { optMdsk = read n }) "RATIO")
144
      "minimum free disk space for nodes (between 0 and 1)"
145
    , Option ['V']     ["version"]
146
      (NoArg (\ opts -> opts { optShowVer = True}))
147
      "show the version of the program"
148
    , Option ['h']     ["help"]
149
      (NoArg (\ opts -> opts { optShowHelp = True}))
150
      "show help"
151
    ]
152

    
153
{- | Start computing the solution at the given depth and recurse until
154
we find a valid solution or we exceed the maximum depth.
155

    
156
-}
157
iterateDepth :: Cluster.Table    -- ^ The starting table
158
             -> Int              -- ^ Remaining length
159
             -> Int              -- ^ Max node name len
160
             -> Int              -- ^ Max instance name len
161
             -> [[String]]       -- ^ Current command list
162
             -> Bool             -- ^ Wheter to be silent
163
             -> Cluster.Score    -- ^ Score at which to stop
164
             -> IO (Cluster.Table, [[String]]) -- ^ The resulting table and
165
                                               -- commands
166
iterateDepth ini_tbl max_rounds nmlen imlen
167
             cmd_strs oneline min_score =
168
    let Cluster.Table ini_nl ini_il ini_cv ini_plc = ini_tbl
169
        all_inst = Container.elems ini_il
170
        node_idx = map Node.idx . filter (not . Node.offline) $
171
                   Container.elems ini_nl
172
        fin_tbl = Cluster.checkMove node_idx ini_tbl all_inst
173
        (Cluster.Table _ _ fin_cv fin_plc) = fin_tbl
174
        ini_plc_len = length ini_plc
175
        fin_plc_len = length fin_plc
176
        allowed_next = (max_rounds < 0 || length fin_plc < max_rounds)
177
    in
178
      do
179
        let
180
            (sol_line, cmds) = Cluster.printSolutionLine ini_nl ini_il
181
                               nmlen imlen (head fin_plc) fin_plc_len
182
            upd_cmd_strs = cmds:cmd_strs
183
        unless (oneline || fin_plc_len == ini_plc_len) $ do
184
          putStrLn sol_line
185
          hFlush stdout
186
        (if fin_cv < ini_cv then -- this round made success, try deeper
187
             if allowed_next && fin_cv > min_score
188
             then iterateDepth fin_tbl max_rounds
189
                  nmlen imlen upd_cmd_strs oneline min_score
190
             -- don't go deeper, but return the better solution
191
             else return (fin_tbl, upd_cmd_strs)
192
         else
193
             return (ini_tbl, cmd_strs))
194

    
195
-- | Formats the solution for the oneline display
196
formatOneline :: Double -> Int -> Double -> String
197
formatOneline ini_cv plc_len fin_cv =
198
    printf "%.8f %d %.8f %8.3f" ini_cv plc_len fin_cv
199
               (if fin_cv == 0 then 1 else (ini_cv / fin_cv))
200

    
201
-- | Main function.
202
main :: IO ()
203
main = do
204
  cmd_args <- System.getArgs
205
  (opts, args) <- CLI.parseOpts cmd_args "hbal" options defaultOptions
206

    
207
  unless (null args) $ do
208
         hPutStrLn stderr "Error: this program doesn't take any arguments."
209
         exitWith $ ExitFailure 1
210

    
211
  let oneline = optOneline opts
212
      verbose = optVerbose opts
213

    
214
  (fixed_nl, il, csf) <- CLI.loadExternalData opts
215

    
216
  let offline_names = optOffline opts
217
      all_nodes = Container.elems fixed_nl
218
      all_names = map Node.name all_nodes
219
      offline_wrong = filter (\n -> not $ elem n all_names) offline_names
220
      offline_indices = map Node.idx $
221
                        filter (\n -> elem (Node.name n) offline_names)
222
                               all_nodes
223
      m_cpu = optMcpu opts
224
      m_dsk = optMdsk opts
225

    
226
  when (length offline_wrong > 0) $ do
227
         printf "Wrong node name(s) set as offline: %s\n"
228
                (commaJoin offline_wrong)
229
         exitWith $ ExitFailure 1
230

    
231
  let nm = Container.map (\n -> if elem (Node.idx n) offline_indices
232
                                then Node.setOffline n True
233
                                else n) fixed_nl
234
      nl = Container.map (flip Node.setMdsk m_dsk . flip Node.setMcpu m_cpu)
235
           nm
236

    
237
  when (Container.size il == 0) $ do
238
         (if oneline then putStrLn $ formatOneline 0 0 0
239
          else printf "Cluster is empty, exiting.\n")
240
         exitWith ExitSuccess
241

    
242
  unless oneline $ printf "Loaded %d nodes, %d instances\n"
243
             (Container.size nl)
244
             (Container.size il)
245

    
246
  when (length csf > 0 && not oneline && verbose > 1) $ do
247
         printf "Note: Stripping common suffix of '%s' from names\n" csf
248

    
249
  let (bad_nodes, bad_instances) = Cluster.computeBadItems nl il
250
  unless (oneline || verbose == 0) $ printf
251
             "Initial check done: %d bad nodes, %d bad instances.\n"
252
             (length bad_nodes) (length bad_instances)
253

    
254
  when (length bad_nodes > 0) $ do
255
         putStrLn "Cluster is not N+1 happy, continuing but no guarantee \
256
                  \that the cluster will end N+1 happy."
257

    
258
  when (optShowNodes opts) $
259
       do
260
         putStrLn "Initial cluster status:"
261
         putStrLn $ Cluster.printNodes nl
262

    
263
  let ini_cv = Cluster.compCV nl
264
      ini_tbl = Cluster.Table nl il ini_cv []
265
      min_cv = optMinScore opts
266

    
267
  when (ini_cv < min_cv) $ do
268
         (if oneline then
269
              putStrLn $ formatOneline ini_cv 0 ini_cv
270
          else printf "Cluster is already well balanced (initial score %.6g,\n\
271
                      \minimum score %.6g).\nNothing to do, exiting\n"
272
                      ini_cv min_cv)
273
         exitWith ExitSuccess
274

    
275
  unless oneline (if verbose > 2 then
276
                      printf "Initial coefficients: overall %.8f, %s\n"
277
                      ini_cv (Cluster.printStats nl)
278
                  else
279
                      printf "Initial score: %.8f\n" ini_cv)
280

    
281
  unless oneline $ putStrLn "Trying to minimize the CV..."
282
  let imlen = Container.maxNameLen il
283
      nmlen = Container.maxNameLen nl
284

    
285
  (fin_tbl, cmd_strs) <- iterateDepth ini_tbl (optMaxLength opts)
286
                         nmlen imlen [] oneline min_cv
287
  let (Cluster.Table fin_nl _ fin_cv fin_plc) = fin_tbl
288
      ord_plc = reverse fin_plc
289
      sol_msg = (if null fin_plc
290
                 then printf "No solution found\n"
291
                 else (if verbose > 2
292
                       then printf "Final coefficients:   overall %.8f, %s\n"
293
                            fin_cv (Cluster.printStats fin_nl)
294
                       else printf "Cluster score improved from %.8f to %.8f\n"
295
                            ini_cv fin_cv
296
                      ))::String
297

    
298
  unless oneline $ putStr sol_msg
299

    
300
  unless (oneline || verbose == 0) $
301
         printf "Solution length=%d\n" (length ord_plc)
302

    
303
  let cmd_data = Cluster.formatCmds . reverse $ cmd_strs
304

    
305
  when (isJust $ optShowCmds opts) $
306
       do
307
         let out_path = fromJust $ optShowCmds opts
308
         putStrLn ""
309
         (if out_path == "-" then
310
              printf "Commands to run to reach the above solution:\n%s"
311
                     (unlines . map ("  " ++) .
312
                      filter (/= "check") .
313
                      lines $ cmd_data)
314
          else do
315
            writeFile out_path (CLI.shTemplate ++ cmd_data)
316
            printf "The commands have been written to file '%s'\n" out_path)
317

    
318
  when (optShowNodes opts) $
319
       do
320
         let (orig_mem, orig_disk) = Cluster.totalResources nl
321
             (final_mem, final_disk) = Cluster.totalResources fin_nl
322
         putStrLn ""
323
         putStrLn "Final cluster status:"
324
         putStrLn $ Cluster.printNodes fin_nl
325
         when (verbose > 3) $
326
              do
327
                printf "Original: mem=%d disk=%d\n" orig_mem orig_disk
328
                printf "Final:    mem=%d disk=%d\n" final_mem final_disk
329
  when oneline $
330
         putStrLn $ formatOneline ini_cv (length ord_plc) fin_cv