1 {-| Solver for N+1 cluster errors
5 module Main (main) where
9 import Data.Maybe (isJust, fromJust, fromMaybe)
13 import System.Console.GetOpt
14 import qualified System
16 import Text.Printf (printf)
18 import qualified Ganeti.HTools.Container as Container
19 import qualified Ganeti.HTools.Cluster as Cluster
20 import qualified Ganeti.HTools.Node as Node
21 import qualified Ganeti.HTools.CLI as CLI
22 import Ganeti.HTools.Rapi
23 import Ganeti.HTools.Utils
25 -- | Command line options structure.
26 data Options = Options
27 { optShowNodes :: Bool -- ^ Whether to show node status
28 , optShowCmds :: Maybe FilePath -- ^ Whether to show the command list
29 , optOneline :: Bool -- ^ Switch output to a single line
30 , optNodef :: FilePath -- ^ Path to the nodes file
31 , optNodeSet :: Bool -- ^ The nodes have been set by options
32 , optInstf :: FilePath -- ^ Path to the instances file
33 , optInstSet :: Bool -- ^ The insts have been set by options
34 , optMaxLength :: Int -- ^ Stop after this many steps
35 , optMaster :: String -- ^ Collect data from RAPI
36 , optVerbose :: Int -- ^ Verbosity level
37 , optOffline :: [String] -- ^ Names of offline nodes
38 , optShowVer :: Bool -- ^ Just show the program version
39 , optShowHelp :: Bool -- ^ Just show the help
42 -- | Default values for the command line options.
43 defaultOptions :: Options
44 defaultOptions = Options
45 { optShowNodes = False
46 , optShowCmds = Nothing
50 , optInstf = "instances"
60 -- | Options list and functions
61 options :: [OptDescr (Options -> Options)]
63 [ Option ['p'] ["print-nodes"]
64 (NoArg (\ opts -> opts { optShowNodes = True }))
65 "print the final node list"
66 , Option ['C'] ["print-commands"]
67 (OptArg ((\ f opts -> opts { optShowCmds = Just f }) . fromMaybe "-")
69 "print the ganeti command list for reaching the solution,\
70 \if an argument is passed then write the commands to a file named\
72 , Option ['o'] ["oneline"]
73 (NoArg (\ opts -> opts { optOneline = True }))
74 "print the ganeti command list for reaching the solution"
75 , Option ['n'] ["nodes"]
76 (ReqArg (\ f opts -> opts { optNodef = f, optNodeSet = True }) "FILE")
78 , Option ['i'] ["instances"]
79 (ReqArg (\ f opts -> opts { optInstf = f, optInstSet = True }) "FILE")
80 "the instance list FILE"
81 , Option ['m'] ["master"]
82 (ReqArg (\ m opts -> opts { optMaster = m }) "ADDRESS")
83 "collect data via RAPI at the given ADDRESS"
84 , Option ['l'] ["max-length"]
85 (ReqArg (\ i opts -> opts { optMaxLength = (read i)::Int }) "N")
86 "cap the solution at this many moves (useful for very unbalanced \
88 , Option ['v'] ["verbose"]
89 (NoArg (\ opts -> opts { optVerbose = (optVerbose opts) + 1 }))
90 "increase the verbosity level"
91 , Option ['O'] ["offline"]
92 (ReqArg (\ n opts -> opts { optOffline = n:optOffline opts }) "NODE")
93 " set node as offline"
94 , Option ['V'] ["version"]
95 (NoArg (\ opts -> opts { optShowVer = True}))
96 "show the version of the program"
97 , Option ['h'] ["help"]
98 (NoArg (\ opts -> opts { optShowHelp = True}))
102 {- | Start computing the solution at the given depth and recurse until
103 we find a valid solution or we exceed the maximum depth.
106 iterateDepth :: Cluster.Table -- ^ The starting table
107 -> Int -- ^ Remaining length
108 -> Cluster.NameList -- ^ Node idx to name list
109 -> Cluster.NameList -- ^ Inst idx to name list
110 -> Int -- ^ Max node name len
111 -> Int -- ^ Max instance name len
112 -> [[String]] -- ^ Current command list
113 -> Bool -- ^ Wheter to be silent
114 -> IO (Cluster.Table, [[String]]) -- ^ The resulting table and
116 iterateDepth ini_tbl max_rounds ktn kti nmlen imlen cmd_strs oneline =
117 let Cluster.Table ini_nl ini_il ini_cv ini_plc = ini_tbl
118 all_inst = Container.elems ini_il
119 node_idx = map Node.idx . filter (not . Node.offline) $
120 Container.elems ini_nl
121 fin_tbl = Cluster.checkMove node_idx ini_tbl all_inst
122 (Cluster.Table _ _ fin_cv fin_plc) = fin_tbl
123 ini_plc_len = length ini_plc
124 fin_plc_len = length fin_plc
125 allowed_next = (max_rounds < 0 || length fin_plc < max_rounds)
129 (sol_line, cmds) = Cluster.printSolutionLine ini_il ktn kti
130 nmlen imlen (head fin_plc) fin_plc_len
131 upd_cmd_strs = cmds:cmd_strs
132 unless (oneline || fin_plc_len == ini_plc_len) $ do
135 (if fin_cv < ini_cv then -- this round made success, try deeper
137 then iterateDepth fin_tbl max_rounds ktn kti
138 nmlen imlen upd_cmd_strs oneline
139 -- don't go deeper, but return the better solution
140 else return (fin_tbl, upd_cmd_strs)
142 return (ini_tbl, cmd_strs))
147 cmd_args <- System.getArgs
148 (opts, args) <- CLI.parseOpts cmd_args "hbal" options
149 defaultOptions optShowHelp
151 unless (null args) $ do
152 hPutStrLn stderr "Error: this program doesn't take any arguments."
153 exitWith $ ExitFailure 1
155 when (optShowVer opts) $ do
156 putStr $ CLI.showVersion "hbal"
159 (env_node, env_inst) <- CLI.parseEnv ()
160 let nodef = if optNodeSet opts then optNodef opts
162 instf = if optInstSet opts then optInstf opts
164 oneline = optOneline opts
165 verbose = optVerbose opts
166 (node_data, inst_data) =
167 case optMaster opts of
168 "" -> (readFile nodef,
170 host -> (readData getNodes host,
171 readData getInstances host)
173 (loaded_nl, il, csf, ktn, kti) <- liftM2 Cluster.loadData node_data inst_data
174 let (fix_msgs, fixed_nl) = Cluster.checkData loaded_nl il ktn kti
176 unless (null fix_msgs) $ do
177 putStrLn "Warning: cluster has inconsistent data:"
178 putStrLn . unlines . map (\s -> printf " - %s" s) $ fix_msgs
180 let offline_names = optOffline opts
181 all_names = snd . unzip $ ktn
182 offline_wrong = filter (\n -> not $ elem n all_names) offline_names
183 offline_indices = fst . unzip .
184 filter (\(_, n) -> elem n offline_names) $ ktn
186 when (length offline_wrong > 0) $ do
187 printf "Wrong node name(s) set as offline: %s\n"
188 (commaJoin offline_wrong)
189 exitWith $ ExitFailure 1
191 let nl = Container.map (\n -> if elem (Node.idx n) offline_indices
192 then Node.setOffline n True
195 unless oneline $ printf "Loaded %d nodes, %d instances\n"
199 when (length csf > 0 && not oneline && verbose > 0) $ do
200 printf "Note: Stripping common suffix of '%s' from names\n" csf
202 let (bad_nodes, bad_instances) = Cluster.computeBadItems nl il
203 unless (oneline || verbose == 0) $ printf
204 "Initial check done: %d bad nodes, %d bad instances.\n"
205 (length bad_nodes) (length bad_instances)
207 when (length bad_nodes > 0) $ do
208 putStrLn "Cluster is not N+1 happy, continuing but no guarantee \
209 \that the cluster will end N+1 happy."
211 when (optShowNodes opts) $
213 putStrLn "Initial cluster status:"
214 putStrLn $ Cluster.printNodes ktn nl
216 let ini_cv = Cluster.compCV nl
217 ini_tbl = Cluster.Table nl il ini_cv []
218 unless oneline (if verbose > 1 then
219 printf "Initial coefficients: overall %.8f, %s\n"
220 ini_cv (Cluster.printStats nl)
222 printf "Initial score: %.8f\n" ini_cv)
224 unless oneline $ putStrLn "Trying to minimize the CV..."
225 let mlen_fn = maximum . (map length) . snd . unzip
229 (fin_tbl, cmd_strs) <- iterateDepth ini_tbl (optMaxLength opts)
230 ktn kti nmlen imlen [] oneline
231 let (Cluster.Table fin_nl _ fin_cv fin_plc) = fin_tbl
232 ord_plc = reverse fin_plc
233 sol_msg = if null fin_plc
234 then printf "No solution found\n"
236 then printf "Final coefficients: overall %.8f, %s\n"
237 fin_cv (Cluster.printStats fin_nl)
238 else printf "Cluster score improved from %.8f to %.8f\n"
242 unless oneline $ putStr sol_msg
244 unless (oneline || verbose == 0) $
245 printf "Solution length=%d\n" (length ord_plc)
247 let cmd_data = Cluster.formatCmds . reverse $ cmd_strs
249 when (isJust $ optShowCmds opts) $
251 let out_path = fromJust $ optShowCmds opts
253 (if out_path == "-" then
254 printf "Commands to run to reach the above solution:\n%s"
255 (unlines . map (" " ++) .
256 filter (/= "check") .
259 writeFile out_path (CLI.shTemplate ++ cmd_data)
260 printf "The commands have been written to file '%s'\n" out_path)
262 when (optShowNodes opts) $
264 let (orig_mem, orig_disk) = Cluster.totalResources nl
265 (final_mem, final_disk) = Cluster.totalResources fin_nl
267 putStrLn "Final cluster status:"
268 putStrLn $ Cluster.printNodes ktn fin_nl
271 printf "Original: mem=%d disk=%d\n" orig_mem orig_disk
272 printf "Final: mem=%d disk=%d\n" final_mem final_disk
274 printf "%.8f %d %.8f %8.3f\n"
275 ini_cv (length ord_plc) fin_cv (ini_cv / fin_cv)