1 {-| Solver for N+1 cluster errors
5 module Main (main) where
9 import Data.Maybe (isJust, fromJust, fromMaybe)
13 import System.Console.GetOpt
14 import qualified System
16 import Text.Printf (printf)
18 import qualified Ganeti.HTools.Container as Container
19 import qualified Ganeti.HTools.Cluster as Cluster
20 import qualified Ganeti.HTools.Node as Node
21 import qualified Ganeti.HTools.CLI as CLI
22 import Ganeti.HTools.Rapi
23 import Ganeti.HTools.Utils
25 -- | Command line options structure.
26 data Options = Options
27 { optShowNodes :: Bool -- ^ Whether to show node status
28 , optShowCmds :: Maybe FilePath -- ^ Whether to show the command list
29 , optOneline :: Bool -- ^ Switch output to a single line
30 , optNodef :: FilePath -- ^ Path to the nodes file
31 , optInstf :: FilePath -- ^ Path to the instances file
32 , optMaxLength :: Int -- ^ Stop after this many steps
33 , optMaster :: String -- ^ Collect data from RAPI
34 , optVerbose :: Int -- ^ Verbosity level
35 , optOffline :: [String] -- ^ Names of offline nodes
36 , optShowVer :: Bool -- ^ Just show the program version
37 , optShowHelp :: Bool -- ^ Just show the help
40 -- | Default values for the command line options.
41 defaultOptions :: Options
42 defaultOptions = Options
43 { optShowNodes = False
44 , optShowCmds = Nothing
47 , optInstf = "instances"
56 -- | Options list and functions
57 options :: [OptDescr (Options -> Options)]
59 [ Option ['p'] ["print-nodes"]
60 (NoArg (\ opts -> opts { optShowNodes = True }))
61 "print the final node list"
62 , Option ['C'] ["print-commands"]
63 (OptArg ((\ f opts -> opts { optShowCmds = Just f }) . fromMaybe "-")
65 "print the ganeti command list for reaching the solution,\
66 \if an argument is passed then write the commands to a file named\
68 , Option ['o'] ["oneline"]
69 (NoArg (\ opts -> opts { optOneline = True }))
70 "print the ganeti command list for reaching the solution"
71 , Option ['n'] ["nodes"]
72 (ReqArg (\ f opts -> opts { optNodef = f }) "FILE")
74 , Option ['i'] ["instances"]
75 (ReqArg (\ f opts -> opts { optInstf = f }) "FILE")
76 "the instance list FILE"
77 , Option ['m'] ["master"]
78 (ReqArg (\ m opts -> opts { optMaster = m }) "ADDRESS")
79 "collect data via RAPI at the given ADDRESS"
80 , Option ['l'] ["max-length"]
81 (ReqArg (\ i opts -> opts { optMaxLength = (read i)::Int }) "N")
82 "cap the solution at this many moves (useful for very unbalanced \
84 , Option ['v'] ["verbose"]
85 (NoArg (\ opts -> opts { optVerbose = (optVerbose opts) + 1 }))
86 "increase the verbosity level"
87 , Option ['O'] ["offline"]
88 (ReqArg (\ n opts -> opts { optOffline = n:optOffline opts }) "NODE")
89 " set node as offline"
90 , Option ['V'] ["version"]
91 (NoArg (\ opts -> opts { optShowVer = True}))
92 "show the version of the program"
93 , Option ['h'] ["help"]
94 (NoArg (\ opts -> opts { optShowHelp = True}))
98 {- | Start computing the solution at the given depth and recurse until
99 we find a valid solution or we exceed the maximum depth.
102 iterateDepth :: Cluster.Table -- ^ The starting table
103 -> Int -- ^ Remaining length
104 -> Cluster.NameList -- ^ Node idx to name list
105 -> Cluster.NameList -- ^ Inst idx to name list
106 -> Int -- ^ Max node name len
107 -> Int -- ^ Max instance name len
108 -> [[String]] -- ^ Current command list
109 -> Bool -- ^ Wheter to be silent
110 -> IO (Cluster.Table, [[String]]) -- ^ The resulting table and
112 iterateDepth ini_tbl max_rounds ktn kti nmlen imlen cmd_strs oneline =
113 let Cluster.Table ini_nl ini_il ini_cv ini_plc = ini_tbl
114 all_inst = Container.elems ini_il
115 node_idx = map Node.idx . filter (not . Node.offline) $
116 Container.elems ini_nl
117 fin_tbl = Cluster.checkMove node_idx ini_tbl all_inst
118 (Cluster.Table _ _ fin_cv fin_plc) = fin_tbl
119 ini_plc_len = length ini_plc
120 fin_plc_len = length fin_plc
121 allowed_next = (max_rounds < 0 || length fin_plc < max_rounds)
125 (sol_line, cmds) = Cluster.printSolutionLine ini_il ktn kti
126 nmlen imlen (head fin_plc) fin_plc_len
127 upd_cmd_strs = cmds:cmd_strs
128 unless (oneline || fin_plc_len == ini_plc_len) $ do
131 (if fin_cv < ini_cv then -- this round made success, try deeper
133 then iterateDepth fin_tbl max_rounds ktn kti
134 nmlen imlen upd_cmd_strs oneline
135 -- don't go deeper, but return the better solution
136 else return (fin_tbl, upd_cmd_strs)
138 return (ini_tbl, cmd_strs))
143 cmd_args <- System.getArgs
144 (opts, _) <- CLI.parseOpts cmd_args "hbal" options defaultOptions optShowHelp
146 when (optShowVer opts) $ do
147 putStr $ CLI.showVersion "hbal"
150 let oneline = optOneline opts
151 verbose = optVerbose opts
152 (node_data, inst_data) =
153 case optMaster opts of
154 "" -> (readFile $ optNodef opts,
155 readFile $ optInstf opts)
156 host -> (readData getNodes host,
157 readData getInstances host)
159 (loaded_nl, il, csf, ktn, kti) <- liftM2 Cluster.loadData node_data inst_data
160 let (fix_msgs, fixed_nl) = Cluster.checkData loaded_nl il ktn kti
162 unless (null fix_msgs) $ do
163 putStrLn "Warning: cluster has inconsistent data:"
164 putStrLn . unlines . map (\s -> printf " - %s" s) $ fix_msgs
166 let offline_names = optOffline opts
167 all_names = snd . unzip $ ktn
168 offline_wrong = filter (\n -> not $ elem n all_names) offline_names
169 offline_indices = fst . unzip .
170 filter (\(_, n) -> elem n offline_names) $ ktn
172 when (length offline_wrong > 0) $ do
173 printf "Wrong node name(s) set as offline: %s\n"
174 (commaJoin offline_wrong)
175 exitWith $ ExitFailure 1
177 let nl = Container.map (\n -> if elem (Node.idx n) offline_indices
178 then Node.setOffline n True
181 unless oneline $ printf "Loaded %d nodes, %d instances\n"
185 when (length csf > 0 && not oneline && verbose > 0) $ do
186 printf "Note: Stripping common suffix of '%s' from names\n" csf
188 let (bad_nodes, bad_instances) = Cluster.computeBadItems nl il
189 unless (oneline || verbose == 0) $ printf
190 "Initial check done: %d bad nodes, %d bad instances.\n"
191 (length bad_nodes) (length bad_instances)
193 when (length bad_nodes > 0) $ do
194 putStrLn "Cluster is not N+1 happy, continuing but no guarantee \
195 \that the cluster will end N+1 happy."
197 when (optShowNodes opts) $
199 putStrLn "Initial cluster status:"
200 putStrLn $ Cluster.printNodes ktn nl
202 let ini_cv = Cluster.compCV nl
203 ini_tbl = Cluster.Table nl il ini_cv []
204 unless oneline (if verbose > 1 then
205 printf "Initial coefficients: overall %.8f, %s\n"
206 ini_cv (Cluster.printStats nl)
208 printf "Initial score: %.8f\n" ini_cv)
210 unless oneline $ putStrLn "Trying to minimize the CV..."
211 let mlen_fn = maximum . (map length) . snd . unzip
215 (fin_tbl, cmd_strs) <- iterateDepth ini_tbl (optMaxLength opts)
216 ktn kti nmlen imlen [] oneline
217 let (Cluster.Table fin_nl _ fin_cv fin_plc) = fin_tbl
218 ord_plc = reverse fin_plc
219 sol_msg = if null fin_plc
220 then printf "No solution found\n"
222 then printf "Final coefficients: overall %.8f, %s\n"
223 fin_cv (Cluster.printStats fin_nl)
224 else printf "Cluster score improved from %.8f to %.8f\n"
228 unless oneline $ putStr sol_msg
230 unless (oneline || verbose == 0) $
231 printf "Solution length=%d\n" (length ord_plc)
233 let cmd_data = Cluster.formatCmds . reverse $ cmd_strs
235 when (isJust $ optShowCmds opts) $
237 let out_path = fromJust $ optShowCmds opts
239 (if out_path == "-" then
240 printf "Commands to run to reach the above solution:\n%s"
241 (unlines . map (" " ++) .
242 filter (/= "check") .
245 writeFile out_path (CLI.shTemplate ++ cmd_data)
246 printf "The commands have been written to file '%s'\n" out_path)
248 when (optShowNodes opts) $
250 let (orig_mem, orig_disk) = Cluster.totalResources nl
251 (final_mem, final_disk) = Cluster.totalResources fin_nl
253 putStrLn "Final cluster status:"
254 putStrLn $ Cluster.printNodes ktn fin_nl
257 printf "Original: mem=%d disk=%d\n" orig_mem orig_disk
258 printf "Final: mem=%d disk=%d\n" final_mem final_disk
260 printf "%.8f %d %.8f %8.3f\n"
261 ini_cv (length ord_plc) fin_cv (ini_cv / fin_cv)