1 {-| Solver for N+1 cluster errors
5 module Main (main) where
9 import Data.Maybe (isJust, fromJust, fromMaybe)
13 import System.Console.GetOpt
14 import qualified System
16 import Text.Printf (printf)
18 import qualified Ganeti.HTools.Container as Container
19 import qualified Ganeti.HTools.Cluster as Cluster
20 import qualified Ganeti.HTools.Node as Node
21 import qualified Ganeti.HTools.CLI as CLI
23 import Ganeti.HTools.Utils
25 -- | Command line options structure.
26 data Options = Options
27 { optShowNodes :: Bool -- ^ Whether to show node status
28 , optShowCmds :: Maybe FilePath -- ^ Whether to show the command list
29 , optOneline :: Bool -- ^ Switch output to a single line
30 , optNodef :: FilePath -- ^ Path to the nodes file
31 , optNodeSet :: Bool -- ^ The nodes have been set by options
32 , optInstf :: FilePath -- ^ Path to the instances file
33 , optInstSet :: Bool -- ^ The insts have been set by options
34 , optMaxLength :: Int -- ^ Stop after this many steps
35 , optMaster :: String -- ^ Collect data from RAPI
36 , optVerbose :: Int -- ^ Verbosity level
37 , optOffline :: [String] -- ^ Names of offline nodes
38 , optMinScore :: Cluster.Score -- ^ The minimum score we aim for
39 , optShowVer :: Bool -- ^ Just show the program version
40 , optShowHelp :: Bool -- ^ Just show the help
43 instance CLI.CLIOptions Options where
44 showVersion = optShowVer
45 showHelp = optShowHelp
47 instance CLI.EToolOptions Options where
52 masterName = optMaster
53 silent a = (optVerbose a) == 0
55 -- | Default values for the command line options.
56 defaultOptions :: Options
57 defaultOptions = Options
58 { optShowNodes = False
59 , optShowCmds = Nothing
63 , optInstf = "instances"
74 -- | Options list and functions
75 options :: [OptDescr (Options -> Options)]
77 [ Option ['p'] ["print-nodes"]
78 (NoArg (\ opts -> opts { optShowNodes = True }))
79 "print the final node list"
80 , Option ['C'] ["print-commands"]
81 (OptArg ((\ f opts -> opts { optShowCmds = Just f }) . fromMaybe "-")
83 "print the ganeti command list for reaching the solution,\
84 \if an argument is passed then write the commands to a file named\
86 , Option ['o'] ["oneline"]
87 (NoArg (\ opts -> opts { optOneline = True }))
88 "print the ganeti command list for reaching the solution"
89 , Option ['n'] ["nodes"]
90 (ReqArg (\ f opts -> opts { optNodef = f, optNodeSet = True }) "FILE")
92 , Option ['i'] ["instances"]
93 (ReqArg (\ f opts -> opts { optInstf = f, optInstSet = True }) "FILE")
94 "the instance list FILE"
95 , Option ['m'] ["master"]
96 (ReqArg (\ m opts -> opts { optMaster = m }) "ADDRESS")
97 "collect data via RAPI at the given ADDRESS"
98 , Option ['l'] ["max-length"]
99 (ReqArg (\ i opts -> opts { optMaxLength = (read i)::Int }) "N")
100 "cap the solution at this many moves (useful for very unbalanced \
102 , Option ['v'] ["verbose"]
103 (NoArg (\ opts -> opts { optVerbose = (optVerbose opts) + 1 }))
104 "increase the verbosity level"
105 , Option ['q'] ["quiet"]
106 (NoArg (\ opts -> opts { optVerbose = (optVerbose opts) - 1 }))
107 "decrease the verbosity level"
108 , Option ['O'] ["offline"]
109 (ReqArg (\ n opts -> opts { optOffline = n:optOffline opts }) "NODE")
110 " set node as offline"
111 , Option ['e'] ["min-score"]
112 (ReqArg (\ e opts -> opts { optMinScore = read e }) "EPSILON")
113 " mininum score to aim for"
114 , Option ['V'] ["version"]
115 (NoArg (\ opts -> opts { optShowVer = True}))
116 "show the version of the program"
117 , Option ['h'] ["help"]
118 (NoArg (\ opts -> opts { optShowHelp = True}))
122 {- | Start computing the solution at the given depth and recurse until
123 we find a valid solution or we exceed the maximum depth.
126 iterateDepth :: Cluster.Table -- ^ The starting table
127 -> Int -- ^ Remaining length
128 -> Int -- ^ Max node name len
129 -> Int -- ^ Max instance name len
130 -> [[String]] -- ^ Current command list
131 -> Bool -- ^ Wheter to be silent
132 -> Cluster.Score -- ^ Score at which to stop
133 -> IO (Cluster.Table, [[String]]) -- ^ The resulting table and
135 iterateDepth ini_tbl max_rounds nmlen imlen
136 cmd_strs oneline min_score =
137 let Cluster.Table ini_nl ini_il ini_cv ini_plc = ini_tbl
138 all_inst = Container.elems ini_il
139 node_idx = map Node.idx . filter (not . Node.offline) $
140 Container.elems ini_nl
141 fin_tbl = Cluster.checkMove node_idx ini_tbl all_inst
142 (Cluster.Table _ _ fin_cv fin_plc) = fin_tbl
143 ini_plc_len = length ini_plc
144 fin_plc_len = length fin_plc
145 allowed_next = (max_rounds < 0 || length fin_plc < max_rounds)
149 (sol_line, cmds) = Cluster.printSolutionLine ini_nl ini_il
150 nmlen imlen (head fin_plc) fin_plc_len
151 upd_cmd_strs = cmds:cmd_strs
152 unless (oneline || fin_plc_len == ini_plc_len) $ do
155 (if fin_cv < ini_cv then -- this round made success, try deeper
156 if allowed_next && fin_cv > min_score
157 then iterateDepth fin_tbl max_rounds
158 nmlen imlen upd_cmd_strs oneline min_score
159 -- don't go deeper, but return the better solution
160 else return (fin_tbl, upd_cmd_strs)
162 return (ini_tbl, cmd_strs))
164 -- | Formats the solution for the oneline display
165 formatOneline :: Double -> Int -> Double -> String
166 formatOneline ini_cv plc_len fin_cv =
167 printf "%.8f %d %.8f %8.3f" ini_cv plc_len fin_cv
168 (if fin_cv == 0 then 1 else (ini_cv / fin_cv))
173 cmd_args <- System.getArgs
174 (opts, args) <- CLI.parseOpts cmd_args "hbal" options defaultOptions
176 unless (null args) $ do
177 hPutStrLn stderr "Error: this program doesn't take any arguments."
178 exitWith $ ExitFailure 1
180 let oneline = optOneline opts
181 verbose = optVerbose opts
183 (fixed_nl, il, csf) <- CLI.loadExternalData opts
185 let offline_names = optOffline opts
186 all_nodes = Container.elems fixed_nl
187 all_names = map Node.name all_nodes
188 offline_wrong = filter (\n -> not $ elem n all_names) offline_names
189 offline_indices = map Node.idx $
190 filter (\n -> elem (Node.name n) offline_names)
193 when (length offline_wrong > 0) $ do
194 printf "Wrong node name(s) set as offline: %s\n"
195 (commaJoin offline_wrong)
196 exitWith $ ExitFailure 1
198 let nl = Container.map (\n -> if elem (Node.idx n) offline_indices
199 then Node.setOffline n True
202 when (Container.size il == 0) $ do
203 (if oneline then putStrLn $ formatOneline 0 0 0
204 else printf "Cluster is empty, exiting.\n")
207 unless oneline $ printf "Loaded %d nodes, %d instances\n"
211 when (length csf > 0 && not oneline && verbose > 1) $ do
212 printf "Note: Stripping common suffix of '%s' from names\n" csf
214 let (bad_nodes, bad_instances) = Cluster.computeBadItems nl il
215 unless (oneline || verbose == 0) $ printf
216 "Initial check done: %d bad nodes, %d bad instances.\n"
217 (length bad_nodes) (length bad_instances)
219 when (length bad_nodes > 0) $ do
220 putStrLn "Cluster is not N+1 happy, continuing but no guarantee \
221 \that the cluster will end N+1 happy."
223 when (optShowNodes opts) $
225 putStrLn "Initial cluster status:"
226 putStrLn $ Cluster.printNodes nl
228 let ini_cv = Cluster.compCV nl
229 ini_tbl = Cluster.Table nl il ini_cv []
230 min_cv = optMinScore opts
232 when (ini_cv < min_cv) $ do
234 putStrLn $ formatOneline ini_cv 0 ini_cv
235 else printf "Cluster is already well balanced (initial score %.6g,\n\
236 \minimum score %.6g).\nNothing to do, exiting\n"
240 unless oneline (if verbose > 2 then
241 printf "Initial coefficients: overall %.8f, %s\n"
242 ini_cv (Cluster.printStats nl)
244 printf "Initial score: %.8f\n" ini_cv)
246 unless oneline $ putStrLn "Trying to minimize the CV..."
247 let imlen = Container.maxNameLen il
248 nmlen = Container.maxNameLen nl
250 (fin_tbl, cmd_strs) <- iterateDepth ini_tbl (optMaxLength opts)
251 nmlen imlen [] oneline min_cv
252 let (Cluster.Table fin_nl _ fin_cv fin_plc) = fin_tbl
253 ord_plc = reverse fin_plc
254 sol_msg = if null fin_plc
255 then printf "No solution found\n"
257 then printf "Final coefficients: overall %.8f, %s\n"
258 fin_cv (Cluster.printStats fin_nl)
259 else printf "Cluster score improved from %.8f to %.8f\n"
263 unless oneline $ putStr sol_msg
265 unless (oneline || verbose == 0) $
266 printf "Solution length=%d\n" (length ord_plc)
268 let cmd_data = Cluster.formatCmds . reverse $ cmd_strs
270 when (isJust $ optShowCmds opts) $
272 let out_path = fromJust $ optShowCmds opts
274 (if out_path == "-" then
275 printf "Commands to run to reach the above solution:\n%s"
276 (unlines . map (" " ++) .
277 filter (/= "check") .
280 writeFile out_path (CLI.shTemplate ++ cmd_data)
281 printf "The commands have been written to file '%s'\n" out_path)
283 when (optShowNodes opts) $
285 let (orig_mem, orig_disk) = Cluster.totalResources nl
286 (final_mem, final_disk) = Cluster.totalResources fin_nl
288 putStrLn "Final cluster status:"
289 putStrLn $ Cluster.printNodes fin_nl
292 printf "Original: mem=%d disk=%d\n" orig_mem orig_disk
293 printf "Final: mem=%d disk=%d\n" final_mem final_disk
295 putStrLn $ formatOneline ini_cv (length ord_plc) fin_cv