1 {-| Solver for N+1 cluster errors
5 module Main (main) where
9 import Data.Maybe (isJust, fromJust, fromMaybe)
13 import System.Console.GetOpt
14 import qualified System
16 import Text.Printf (printf)
18 import qualified Ganeti.HTools.Container as Container
19 import qualified Ganeti.HTools.Cluster as Cluster
20 import qualified Ganeti.HTools.Node as Node
21 import qualified Ganeti.HTools.CLI as CLI
22 import Ganeti.HTools.Rapi
23 import Ganeti.HTools.Utils
25 -- | Command line options structure.
26 data Options = Options
27 { optShowNodes :: Bool -- ^ Whether to show node status
28 , optShowCmds :: Maybe FilePath -- ^ Whether to show the command list
29 , optOneline :: Bool -- ^ Switch output to a single line
30 , optNodef :: FilePath -- ^ Path to the nodes file
31 , optNodeSet :: Bool -- ^ The nodes have been set by options
32 , optInstf :: FilePath -- ^ Path to the instances file
33 , optInstSet :: Bool -- ^ The insts have been set by options
34 , optMaxLength :: Int -- ^ Stop after this many steps
35 , optMaster :: String -- ^ Collect data from RAPI
36 , optVerbose :: Int -- ^ Verbosity level
37 , optOffline :: [String] -- ^ Names of offline nodes
38 , optMinScore :: Cluster.Score -- ^ The minimum score we aim for
39 , optShowVer :: Bool -- ^ Just show the program version
40 , optShowHelp :: Bool -- ^ Just show the help
43 -- | Default values for the command line options.
44 defaultOptions :: Options
45 defaultOptions = Options
46 { optShowNodes = False
47 , optShowCmds = Nothing
51 , optInstf = "instances"
62 -- | Options list and functions
63 options :: [OptDescr (Options -> Options)]
65 [ Option ['p'] ["print-nodes"]
66 (NoArg (\ opts -> opts { optShowNodes = True }))
67 "print the final node list"
68 , Option ['C'] ["print-commands"]
69 (OptArg ((\ f opts -> opts { optShowCmds = Just f }) . fromMaybe "-")
71 "print the ganeti command list for reaching the solution,\
72 \if an argument is passed then write the commands to a file named\
74 , Option ['o'] ["oneline"]
75 (NoArg (\ opts -> opts { optOneline = True }))
76 "print the ganeti command list for reaching the solution"
77 , Option ['n'] ["nodes"]
78 (ReqArg (\ f opts -> opts { optNodef = f, optNodeSet = True }) "FILE")
80 , Option ['i'] ["instances"]
81 (ReqArg (\ f opts -> opts { optInstf = f, optInstSet = True }) "FILE")
82 "the instance list FILE"
83 , Option ['m'] ["master"]
84 (ReqArg (\ m opts -> opts { optMaster = m }) "ADDRESS")
85 "collect data via RAPI at the given ADDRESS"
86 , Option ['l'] ["max-length"]
87 (ReqArg (\ i opts -> opts { optMaxLength = (read i)::Int }) "N")
88 "cap the solution at this many moves (useful for very unbalanced \
90 , Option ['v'] ["verbose"]
91 (NoArg (\ opts -> opts { optVerbose = (optVerbose opts) + 1 }))
92 "increase the verbosity level"
93 , Option ['q'] ["quiet"]
94 (NoArg (\ opts -> opts { optVerbose = (optVerbose opts) - 1 }))
95 "decrease the verbosity level"
96 , Option ['O'] ["offline"]
97 (ReqArg (\ n opts -> opts { optOffline = n:optOffline opts }) "NODE")
98 " set node as offline"
99 , Option ['e'] ["min-score"]
100 (ReqArg (\ e opts -> opts { optMinScore = read e }) "EPSILON")
101 " mininum score to aim for"
102 , Option ['V'] ["version"]
103 (NoArg (\ opts -> opts { optShowVer = True}))
104 "show the version of the program"
105 , Option ['h'] ["help"]
106 (NoArg (\ opts -> opts { optShowHelp = True}))
110 {- | Start computing the solution at the given depth and recurse until
111 we find a valid solution or we exceed the maximum depth.
114 iterateDepth :: Cluster.Table -- ^ The starting table
115 -> Int -- ^ Remaining length
116 -> Cluster.NameList -- ^ Node idx to name list
117 -> Cluster.NameList -- ^ Inst idx to name list
118 -> Int -- ^ Max node name len
119 -> Int -- ^ Max instance name len
120 -> [[String]] -- ^ Current command list
121 -> Bool -- ^ Wheter to be silent
122 -> Cluster.Score -- ^ Score at which to stop
123 -> IO (Cluster.Table, [[String]]) -- ^ The resulting table and
125 iterateDepth ini_tbl max_rounds ktn kti nmlen imlen
126 cmd_strs oneline min_score =
127 let Cluster.Table ini_nl ini_il ini_cv ini_plc = ini_tbl
128 all_inst = Container.elems ini_il
129 node_idx = map Node.idx . filter (not . Node.offline) $
130 Container.elems ini_nl
131 fin_tbl = Cluster.checkMove node_idx ini_tbl all_inst
132 (Cluster.Table _ _ fin_cv fin_plc) = fin_tbl
133 ini_plc_len = length ini_plc
134 fin_plc_len = length fin_plc
135 allowed_next = (max_rounds < 0 || length fin_plc < max_rounds)
139 (sol_line, cmds) = Cluster.printSolutionLine ini_il ktn kti
140 nmlen imlen (head fin_plc) fin_plc_len
141 upd_cmd_strs = cmds:cmd_strs
142 unless (oneline || fin_plc_len == ini_plc_len) $ do
145 (if fin_cv < ini_cv then -- this round made success, try deeper
146 if allowed_next && fin_cv > min_score
147 then iterateDepth fin_tbl max_rounds ktn kti
148 nmlen imlen upd_cmd_strs oneline min_score
149 -- don't go deeper, but return the better solution
150 else return (fin_tbl, upd_cmd_strs)
152 return (ini_tbl, cmd_strs))
154 -- | Formats the solution for the oneline display
155 formatOneline :: Double -> Int -> Double -> String
156 formatOneline ini_cv plc_len fin_cv =
157 printf "%.8f %d %.8f %8.3f" ini_cv plc_len fin_cv
158 (if fin_cv == 0 then 1 else (ini_cv / fin_cv))
163 cmd_args <- System.getArgs
164 (opts, args) <- CLI.parseOpts cmd_args "hbal" options
165 defaultOptions optShowHelp
167 unless (null args) $ do
168 hPutStrLn stderr "Error: this program doesn't take any arguments."
169 exitWith $ ExitFailure 1
171 when (optShowVer opts) $ do
172 putStr $ CLI.showVersion "hbal"
175 (env_node, env_inst) <- CLI.parseEnv ()
176 let nodef = if optNodeSet opts then optNodef opts
178 instf = if optInstSet opts then optInstf opts
180 oneline = optOneline opts
181 verbose = optVerbose opts
182 (node_data, inst_data) =
183 case optMaster opts of
184 "" -> (readFile nodef,
186 host -> (getNodes host >>= readData,
187 getInstances host >>= readData)
189 ldresult <- liftM2 Cluster.loadData node_data inst_data
190 (loaded_nl, il, csf, ktn, kti) <-
194 printf "Error: failed to load data. Details:\n%s\n" s
195 exitWith $ ExitFailure 1
197 let (fix_msgs, fixed_nl) = Cluster.checkData loaded_nl il ktn kti
199 unless (null fix_msgs || verbose == 0) $ do
200 putStrLn "Warning: cluster has inconsistent data:"
201 putStrLn . unlines . map (\s -> printf " - %s" s) $ fix_msgs
203 let offline_names = optOffline opts
204 all_names = snd . unzip $ ktn
205 offline_wrong = filter (\n -> not $ elem n all_names) offline_names
206 offline_indices = fst . unzip .
207 filter (\(_, n) -> elem n offline_names) $ ktn
209 when (length offline_wrong > 0) $ do
210 printf "Wrong node name(s) set as offline: %s\n"
211 (commaJoin offline_wrong)
212 exitWith $ ExitFailure 1
214 let nl = Container.map (\n -> if elem (Node.idx n) offline_indices
215 then Node.setOffline n True
218 when (Container.size il == 0) $ do
220 putStrLn $ formatOneline 0 0 0
222 printf "Cluster is empty, exiting.\n")
226 unless oneline $ printf "Loaded %d nodes, %d instances\n"
230 when (length csf > 0 && not oneline && verbose > 1) $ do
231 printf "Note: Stripping common suffix of '%s' from names\n" csf
233 let (bad_nodes, bad_instances) = Cluster.computeBadItems nl il
234 unless (oneline || verbose == 0) $ printf
235 "Initial check done: %d bad nodes, %d bad instances.\n"
236 (length bad_nodes) (length bad_instances)
238 when (length bad_nodes > 0) $ do
239 putStrLn "Cluster is not N+1 happy, continuing but no guarantee \
240 \that the cluster will end N+1 happy."
242 when (optShowNodes opts) $
244 putStrLn "Initial cluster status:"
245 putStrLn $ Cluster.printNodes ktn nl
247 let ini_cv = Cluster.compCV nl
248 ini_tbl = Cluster.Table nl il ini_cv []
249 min_cv = optMinScore opts
251 when (ini_cv < min_cv) $ do
253 putStrLn $ formatOneline ini_cv 0 ini_cv
254 else printf "Cluster is already well balanced (initial score %.6g,\n\
255 \minimum score %.6g).\nNothing to do, exiting\n"
259 unless oneline (if verbose > 2 then
260 printf "Initial coefficients: overall %.8f, %s\n"
261 ini_cv (Cluster.printStats nl)
263 printf "Initial score: %.8f\n" ini_cv)
265 unless oneline $ putStrLn "Trying to minimize the CV..."
266 let mlen_fn = maximum . (map length) . snd . unzip
270 (fin_tbl, cmd_strs) <- iterateDepth ini_tbl (optMaxLength opts)
271 ktn kti nmlen imlen [] oneline min_cv
272 let (Cluster.Table fin_nl _ fin_cv fin_plc) = fin_tbl
273 ord_plc = reverse fin_plc
274 sol_msg = if null fin_plc
275 then printf "No solution found\n"
277 then printf "Final coefficients: overall %.8f, %s\n"
278 fin_cv (Cluster.printStats fin_nl)
279 else printf "Cluster score improved from %.8f to %.8f\n"
283 unless oneline $ putStr sol_msg
285 unless (oneline || verbose == 0) $
286 printf "Solution length=%d\n" (length ord_plc)
288 let cmd_data = Cluster.formatCmds . reverse $ cmd_strs
290 when (isJust $ optShowCmds opts) $
292 let out_path = fromJust $ optShowCmds opts
294 (if out_path == "-" then
295 printf "Commands to run to reach the above solution:\n%s"
296 (unlines . map (" " ++) .
297 filter (/= "check") .
300 writeFile out_path (CLI.shTemplate ++ cmd_data)
301 printf "The commands have been written to file '%s'\n" out_path)
303 when (optShowNodes opts) $
305 let (orig_mem, orig_disk) = Cluster.totalResources nl
306 (final_mem, final_disk) = Cluster.totalResources fin_nl
308 putStrLn "Final cluster status:"
309 putStrLn $ Cluster.printNodes ktn fin_nl
312 printf "Original: mem=%d disk=%d\n" orig_mem orig_disk
313 printf "Final: mem=%d disk=%d\n" final_mem final_disk
315 putStrLn $ formatOneline ini_cv (length ord_plc) fin_cv