1 {-| Solver for N+1 cluster errors
5 module Main (main) where
12 import System.Console.GetOpt
13 import qualified System
15 import Text.Printf (printf)
17 import qualified Ganeti.HTools.Container as Container
18 import qualified Ganeti.HTools.Cluster as Cluster
19 import qualified Ganeti.HTools.Node as Node
20 import qualified Ganeti.HTools.CLI as CLI
21 import Ganeti.HTools.Rapi
22 import Ganeti.HTools.Utils
24 -- | Command line options structure.
25 data Options = Options
26 { optShowNodes :: Bool -- ^ Whether to show node status
27 , optShowCmds :: Bool -- ^ Whether to show the command list
28 , optOneline :: Bool -- ^ Switch output to a single line
29 , optNodef :: FilePath -- ^ Path to the nodes file
30 , optInstf :: FilePath -- ^ Path to the instances file
31 , optMaxLength :: Int -- ^ Stop after this many steps
32 , optMaster :: String -- ^ Collect data from RAPI
33 , optVerbose :: Int -- ^ Verbosity level
34 , optOffline :: [String] -- ^ Names of offline nodes
35 , optShowVer :: Bool -- ^ Just show the program version
36 , optShowHelp :: Bool -- ^ Just show the help
39 -- | Default values for the command line options.
40 defaultOptions :: Options
41 defaultOptions = Options
42 { optShowNodes = False
46 , optInstf = "instances"
55 -- | Options list and functions
56 options :: [OptDescr (Options -> Options)]
58 [ Option ['p'] ["print-nodes"]
59 (NoArg (\ opts -> opts { optShowNodes = True }))
60 "print the final node list"
61 , Option ['C'] ["print-commands"]
62 (NoArg (\ opts -> opts { optShowCmds = True }))
63 "print the ganeti command list for reaching the solution"
64 , Option ['o'] ["oneline"]
65 (NoArg (\ opts -> opts { optOneline = True }))
66 "print the ganeti command list for reaching the solution"
67 , Option ['n'] ["nodes"]
68 (ReqArg (\ f opts -> opts { optNodef = f }) "FILE")
70 , Option ['i'] ["instances"]
71 (ReqArg (\ f opts -> opts { optInstf = f }) "FILE")
72 "the instance list FILE"
73 , Option ['m'] ["master"]
74 (ReqArg (\ m opts -> opts { optMaster = m }) "ADDRESS")
75 "collect data via RAPI at the given ADDRESS"
76 , Option ['l'] ["max-length"]
77 (ReqArg (\ i opts -> opts { optMaxLength = (read i)::Int }) "N")
78 "cap the solution at this many moves (useful for very unbalanced \
80 , Option ['v'] ["verbose"]
81 (NoArg (\ opts -> opts { optVerbose = (optVerbose opts) + 1 }))
82 "increase the verbosity level"
83 , Option ['O'] ["offline"]
84 (ReqArg (\ n opts -> opts { optOffline = n:optOffline opts }) "NODE")
85 " set node as offline"
86 , Option ['V'] ["version"]
87 (NoArg (\ opts -> opts { optShowVer = True}))
88 "show the version of the program"
89 , Option ['h'] ["help"]
90 (NoArg (\ opts -> opts { optShowHelp = True}))
94 {- | Start computing the solution at the given depth and recurse until
95 we find a valid solution or we exceed the maximum depth.
98 iterateDepth :: Cluster.Table -- ^ The starting table
99 -> Int -- ^ Remaining length
100 -> Cluster.NameList -- ^ Node idx to name list
101 -> Cluster.NameList -- ^ Inst idx to name list
102 -> Int -- ^ Max node name len
103 -> Int -- ^ Max instance name len
104 -> [[String]] -- ^ Current command list
105 -> Bool -- ^ Wheter to be silent
106 -> IO (Cluster.Table, [[String]]) -- ^ The resulting table and
108 iterateDepth ini_tbl max_rounds ktn kti nmlen imlen cmd_strs oneline =
109 let Cluster.Table ini_nl ini_il ini_cv ini_plc = ini_tbl
110 all_inst = Container.elems ini_il
111 node_idx = map Node.idx . filter (not . Node.offline) $
112 Container.elems ini_nl
113 fin_tbl = Cluster.checkMove node_idx ini_tbl all_inst
114 (Cluster.Table _ _ fin_cv fin_plc) = fin_tbl
115 ini_plc_len = length ini_plc
116 fin_plc_len = length fin_plc
117 allowed_next = (max_rounds < 0 || length fin_plc < max_rounds)
121 (sol_line, cmds) = Cluster.printSolutionLine ini_il ktn kti
122 nmlen imlen (head fin_plc) fin_plc_len
123 upd_cmd_strs = cmds:cmd_strs
124 unless (oneline || fin_plc_len == ini_plc_len) $ do
127 (if fin_cv < ini_cv then -- this round made success, try deeper
129 then iterateDepth fin_tbl max_rounds ktn kti
130 nmlen imlen upd_cmd_strs oneline
131 -- don't go deeper, but return the better solution
132 else return (fin_tbl, upd_cmd_strs)
134 return (ini_tbl, cmd_strs))
139 cmd_args <- System.getArgs
140 (opts, _) <- CLI.parseOpts cmd_args "hbal" options defaultOptions optShowHelp
142 when (optShowVer opts) $ do
143 putStr $ CLI.showVersion "hbal"
146 let oneline = optOneline opts
147 verbose = optVerbose opts
148 (node_data, inst_data) =
149 case optMaster opts of
150 "" -> (readFile $ optNodef opts,
151 readFile $ optInstf opts)
152 host -> (readData getNodes host,
153 readData getInstances host)
155 (loaded_nl, il, csf, ktn, kti) <- liftM2 Cluster.loadData node_data inst_data
156 let (fix_msgs, fixed_nl) = Cluster.checkData loaded_nl il ktn kti
158 unless (null fix_msgs) $ do
159 putStrLn "Warning: cluster has inconsistent data:"
160 putStrLn . unlines . map (\s -> printf " - %s" s) $ fix_msgs
162 let offline_names = optOffline opts
163 offline_indices = fst . unzip .
164 filter (\(_, n) -> elem n offline_names) $ ktn
166 let nl = Container.map (\n -> if elem (Node.idx n) offline_indices
167 then Node.setOffline n True
170 unless oneline $ printf "Loaded %d nodes, %d instances\n"
174 when (length csf > 0 && not oneline && verbose > 0) $ do
175 printf "Note: Stripping common suffix of '%s' from names\n" csf
177 let (bad_nodes, bad_instances) = Cluster.computeBadItems nl il
178 unless (oneline || verbose == 0) $ printf
179 "Initial check done: %d bad nodes, %d bad instances.\n"
180 (length bad_nodes) (length bad_instances)
182 when (length bad_nodes > 0) $ do
183 putStrLn "Cluster is not N+1 happy, continuing but no guarantee \
184 \that the cluster will end N+1 happy."
186 when (optShowNodes opts) $
188 putStrLn "Initial cluster status:"
189 putStrLn $ Cluster.printNodes ktn nl
191 let ini_cv = Cluster.compCV nl
192 ini_tbl = Cluster.Table nl il ini_cv []
193 unless oneline (if verbose > 1 then
194 printf "Initial coefficients: overall %.8f, %s\n"
195 ini_cv (Cluster.printStats nl)
197 printf "Initial score: %.8f\n" ini_cv)
199 unless oneline $ putStrLn "Trying to minimize the CV..."
200 let mlen_fn = maximum . (map length) . snd . unzip
204 (fin_tbl, cmd_strs) <- iterateDepth ini_tbl (optMaxLength opts)
205 ktn kti nmlen imlen [] oneline
206 let (Cluster.Table fin_nl _ fin_cv fin_plc) = fin_tbl
207 ord_plc = reverse fin_plc
208 sol_msg = if null fin_plc
209 then printf "No solution found\n"
211 then printf "Final coefficients: overall %.8f, %s\n"
212 fin_cv (Cluster.printStats fin_nl)
213 else printf "Cluster score improved from %.8f to %.8f\n"
217 unless oneline $ putStr sol_msg
219 unless (oneline || verbose == 0) $
220 printf "Solution length=%d\n" (length ord_plc)
222 when (optShowCmds opts) $
225 putStrLn "Commands to run to reach the above solution:"
226 putStr . Cluster.formatCmds . reverse $ cmd_strs
227 when (optShowNodes opts) $
229 let (orig_mem, orig_disk) = Cluster.totalResources nl
230 (final_mem, final_disk) = Cluster.totalResources fin_nl
232 putStrLn "Final cluster status:"
233 putStrLn $ Cluster.printNodes ktn fin_nl
236 printf "Original: mem=%d disk=%d\n" orig_mem orig_disk
237 printf "Final: mem=%d disk=%d\n" final_mem final_disk
239 printf "%.8f %d %.8f %8.3f\n"
240 ini_cv (length ord_plc) fin_cv (ini_cv / fin_cv)