1 {-| Solver for N+1 cluster errors
7 Copyright (C) 2009 Google Inc.
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 2 of the License, or
12 (at your option) any later version.
14 This program is distributed in the hope that it will be useful, but
15 WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
26 module Main (main) where
30 import Data.Maybe (isJust, fromJust, fromMaybe)
34 import System.Console.GetOpt
35 import qualified System
37 import Text.Printf (printf)
39 import qualified Ganeti.HTools.Container as Container
40 import qualified Ganeti.HTools.Cluster as Cluster
41 import qualified Ganeti.HTools.Node as Node
42 import qualified Ganeti.HTools.CLI as CLI
44 import Ganeti.HTools.Utils
46 -- | Command line options structure.
47 data Options = Options
48 { optShowNodes :: Bool -- ^ Whether to show node status
49 , optShowCmds :: Maybe FilePath -- ^ Whether to show the command list
50 , optOneline :: Bool -- ^ Switch output to a single line
51 , optNodef :: FilePath -- ^ Path to the nodes file
52 , optNodeSet :: Bool -- ^ The nodes have been set by options
53 , optInstf :: FilePath -- ^ Path to the instances file
54 , optInstSet :: Bool -- ^ The insts have been set by options
55 , optMaxLength :: Int -- ^ Stop after this many steps
56 , optMaster :: String -- ^ Collect data from RAPI
57 , optVerbose :: Int -- ^ Verbosity level
58 , optOffline :: [String] -- ^ Names of offline nodes
59 , optMinScore :: Cluster.Score -- ^ The minimum score we aim for
60 , optShowVer :: Bool -- ^ Just show the program version
61 , optShowHelp :: Bool -- ^ Just show the help
64 instance CLI.CLIOptions Options where
65 showVersion = optShowVer
66 showHelp = optShowHelp
68 instance CLI.EToolOptions Options where
73 masterName = optMaster
74 silent a = (optVerbose a) == 0
76 -- | Default values for the command line options.
77 defaultOptions :: Options
78 defaultOptions = Options
79 { optShowNodes = False
80 , optShowCmds = Nothing
84 , optInstf = "instances"
95 -- | Options list and functions
96 options :: [OptDescr (Options -> Options)]
98 [ Option ['p'] ["print-nodes"]
99 (NoArg (\ opts -> opts { optShowNodes = True }))
100 "print the final node list"
101 , Option ['C'] ["print-commands"]
102 (OptArg ((\ f opts -> opts { optShowCmds = Just f }) . fromMaybe "-")
104 "print the ganeti command list for reaching the solution,\
105 \if an argument is passed then write the commands to a file named\
107 , Option ['o'] ["oneline"]
108 (NoArg (\ opts -> opts { optOneline = True }))
109 "print the ganeti command list for reaching the solution"
110 , Option ['n'] ["nodes"]
111 (ReqArg (\ f opts -> opts { optNodef = f, optNodeSet = True }) "FILE")
113 , Option ['i'] ["instances"]
114 (ReqArg (\ f opts -> opts { optInstf = f, optInstSet = True }) "FILE")
115 "the instance list FILE"
116 , Option ['m'] ["master"]
117 (ReqArg (\ m opts -> opts { optMaster = m }) "ADDRESS")
118 "collect data via RAPI at the given ADDRESS"
119 , Option ['l'] ["max-length"]
120 (ReqArg (\ i opts -> opts { optMaxLength = (read i)::Int }) "N")
121 "cap the solution at this many moves (useful for very unbalanced \
123 , Option ['v'] ["verbose"]
124 (NoArg (\ opts -> opts { optVerbose = (optVerbose opts) + 1 }))
125 "increase the verbosity level"
126 , Option ['q'] ["quiet"]
127 (NoArg (\ opts -> opts { optVerbose = (optVerbose opts) - 1 }))
128 "decrease the verbosity level"
129 , Option ['O'] ["offline"]
130 (ReqArg (\ n opts -> opts { optOffline = n:optOffline opts }) "NODE")
131 " set node as offline"
132 , Option ['e'] ["min-score"]
133 (ReqArg (\ e opts -> opts { optMinScore = read e }) "EPSILON")
134 " mininum score to aim for"
135 , Option ['V'] ["version"]
136 (NoArg (\ opts -> opts { optShowVer = True}))
137 "show the version of the program"
138 , Option ['h'] ["help"]
139 (NoArg (\ opts -> opts { optShowHelp = True}))
143 {- | Start computing the solution at the given depth and recurse until
144 we find a valid solution or we exceed the maximum depth.
147 iterateDepth :: Cluster.Table -- ^ The starting table
148 -> Int -- ^ Remaining length
149 -> Int -- ^ Max node name len
150 -> Int -- ^ Max instance name len
151 -> [[String]] -- ^ Current command list
152 -> Bool -- ^ Wheter to be silent
153 -> Cluster.Score -- ^ Score at which to stop
154 -> IO (Cluster.Table, [[String]]) -- ^ The resulting table and
156 iterateDepth ini_tbl max_rounds nmlen imlen
157 cmd_strs oneline min_score =
158 let Cluster.Table ini_nl ini_il ini_cv ini_plc = ini_tbl
159 all_inst = Container.elems ini_il
160 node_idx = map Node.idx . filter (not . Node.offline) $
161 Container.elems ini_nl
162 fin_tbl = Cluster.checkMove node_idx ini_tbl all_inst
163 (Cluster.Table _ _ fin_cv fin_plc) = fin_tbl
164 ini_plc_len = length ini_plc
165 fin_plc_len = length fin_plc
166 allowed_next = (max_rounds < 0 || length fin_plc < max_rounds)
170 (sol_line, cmds) = Cluster.printSolutionLine ini_nl ini_il
171 nmlen imlen (head fin_plc) fin_plc_len
172 upd_cmd_strs = cmds:cmd_strs
173 unless (oneline || fin_plc_len == ini_plc_len) $ do
176 (if fin_cv < ini_cv then -- this round made success, try deeper
177 if allowed_next && fin_cv > min_score
178 then iterateDepth fin_tbl max_rounds
179 nmlen imlen upd_cmd_strs oneline min_score
180 -- don't go deeper, but return the better solution
181 else return (fin_tbl, upd_cmd_strs)
183 return (ini_tbl, cmd_strs))
185 -- | Formats the solution for the oneline display
186 formatOneline :: Double -> Int -> Double -> String
187 formatOneline ini_cv plc_len fin_cv =
188 printf "%.8f %d %.8f %8.3f" ini_cv plc_len fin_cv
189 (if fin_cv == 0 then 1 else (ini_cv / fin_cv))
194 cmd_args <- System.getArgs
195 (opts, args) <- CLI.parseOpts cmd_args "hbal" options defaultOptions
197 unless (null args) $ do
198 hPutStrLn stderr "Error: this program doesn't take any arguments."
199 exitWith $ ExitFailure 1
201 let oneline = optOneline opts
202 verbose = optVerbose opts
204 (fixed_nl, il, csf) <- CLI.loadExternalData opts
206 let offline_names = optOffline opts
207 all_nodes = Container.elems fixed_nl
208 all_names = map Node.name all_nodes
209 offline_wrong = filter (\n -> not $ elem n all_names) offline_names
210 offline_indices = map Node.idx $
211 filter (\n -> elem (Node.name n) offline_names)
214 when (length offline_wrong > 0) $ do
215 printf "Wrong node name(s) set as offline: %s\n"
216 (commaJoin offline_wrong)
217 exitWith $ ExitFailure 1
219 let nl = Container.map (\n -> if elem (Node.idx n) offline_indices
220 then Node.setOffline n True
223 when (Container.size il == 0) $ do
224 (if oneline then putStrLn $ formatOneline 0 0 0
225 else printf "Cluster is empty, exiting.\n")
228 unless oneline $ printf "Loaded %d nodes, %d instances\n"
232 when (length csf > 0 && not oneline && verbose > 1) $ do
233 printf "Note: Stripping common suffix of '%s' from names\n" csf
235 let (bad_nodes, bad_instances) = Cluster.computeBadItems nl il
236 unless (oneline || verbose == 0) $ printf
237 "Initial check done: %d bad nodes, %d bad instances.\n"
238 (length bad_nodes) (length bad_instances)
240 when (length bad_nodes > 0) $ do
241 putStrLn "Cluster is not N+1 happy, continuing but no guarantee \
242 \that the cluster will end N+1 happy."
244 when (optShowNodes opts) $
246 putStrLn "Initial cluster status:"
247 putStrLn $ Cluster.printNodes nl
249 let ini_cv = Cluster.compCV nl
250 ini_tbl = Cluster.Table nl il ini_cv []
251 min_cv = optMinScore opts
253 when (ini_cv < min_cv) $ do
255 putStrLn $ formatOneline ini_cv 0 ini_cv
256 else printf "Cluster is already well balanced (initial score %.6g,\n\
257 \minimum score %.6g).\nNothing to do, exiting\n"
261 unless oneline (if verbose > 2 then
262 printf "Initial coefficients: overall %.8f, %s\n"
263 ini_cv (Cluster.printStats nl)
265 printf "Initial score: %.8f\n" ini_cv)
267 unless oneline $ putStrLn "Trying to minimize the CV..."
268 let imlen = Container.maxNameLen il
269 nmlen = Container.maxNameLen nl
271 (fin_tbl, cmd_strs) <- iterateDepth ini_tbl (optMaxLength opts)
272 nmlen imlen [] oneline min_cv
273 let (Cluster.Table fin_nl _ fin_cv fin_plc) = fin_tbl
274 ord_plc = reverse fin_plc
275 sol_msg = if null fin_plc
276 then printf "No solution found\n"
278 then printf "Final coefficients: overall %.8f, %s\n"
279 fin_cv (Cluster.printStats fin_nl)
280 else printf "Cluster score improved from %.8f to %.8f\n"
284 unless oneline $ putStr sol_msg
286 unless (oneline || verbose == 0) $
287 printf "Solution length=%d\n" (length ord_plc)
289 let cmd_data = Cluster.formatCmds . reverse $ cmd_strs
291 when (isJust $ optShowCmds opts) $
293 let out_path = fromJust $ optShowCmds opts
295 (if out_path == "-" then
296 printf "Commands to run to reach the above solution:\n%s"
297 (unlines . map (" " ++) .
298 filter (/= "check") .
301 writeFile out_path (CLI.shTemplate ++ cmd_data)
302 printf "The commands have been written to file '%s'\n" out_path)
304 when (optShowNodes opts) $
306 let (orig_mem, orig_disk) = Cluster.totalResources nl
307 (final_mem, final_disk) = Cluster.totalResources fin_nl
309 putStrLn "Final cluster status:"
310 putStrLn $ Cluster.printNodes fin_nl
313 printf "Original: mem=%d disk=%d\n" orig_mem orig_disk
314 printf "Final: mem=%d disk=%d\n" final_mem final_disk
316 putStrLn $ formatOneline ini_cv (length ord_plc) fin_cv