1 {-| Solver for N+1 cluster errors
5 module Main (main) where
12 import System.Console.GetOpt
13 import qualified System
15 import Text.Printf (printf)
17 import qualified Ganeti.HTools.Container as Container
18 import qualified Ganeti.HTools.Cluster as Cluster
19 import qualified Ganeti.HTools.Version as Version
20 import qualified Ganeti.HTools.Node as Node
21 import Ganeti.HTools.Rapi
22 import Ganeti.HTools.Utils
24 -- | Command line options structure.
25 data Options = Options
26 { optShowNodes :: Bool -- ^ Whether to show node status
27 , optShowCmds :: Bool -- ^ Whether to show the command list
28 , optOneline :: Bool -- ^ Switch output to a single line
29 , optNodef :: FilePath -- ^ Path to the nodes file
30 , optInstf :: FilePath -- ^ Path to the instances file
31 , optMaxLength :: Int -- ^ Stop after this many steps
32 , optMaster :: String -- ^ Collect data from RAPI
33 , optVerbose :: Int -- ^ Verbosity level
34 , optShowVer :: Bool -- ^ Just show the program version
35 , optOffline :: [String] -- ^ Names of offline nodes
38 -- | Default values for the command line options.
39 defaultOptions :: Options
40 defaultOptions = Options
41 { optShowNodes = False
45 , optInstf = "instances"
53 -- | Options list and functions
54 options :: [OptDescr (Options -> Options)]
56 [ Option ['p'] ["print-nodes"]
57 (NoArg (\ opts -> opts { optShowNodes = True }))
58 "print the final node list"
59 , Option ['C'] ["print-commands"]
60 (NoArg (\ opts -> opts { optShowCmds = True }))
61 "print the ganeti command list for reaching the solution"
62 , Option ['o'] ["oneline"]
63 (NoArg (\ opts -> opts { optOneline = True }))
64 "print the ganeti command list for reaching the solution"
65 , Option ['n'] ["nodes"]
66 (ReqArg (\ f opts -> opts { optNodef = f }) "FILE")
68 , Option ['i'] ["instances"]
69 (ReqArg (\ f opts -> opts { optInstf = f }) "FILE")
70 "the instance list FILE"
71 , Option ['m'] ["master"]
72 (ReqArg (\ m opts -> opts { optMaster = m }) "ADDRESS")
73 "collect data via RAPI at the given ADDRESS"
74 , Option ['l'] ["max-length"]
75 (ReqArg (\ i opts -> opts { optMaxLength = (read i)::Int }) "N")
76 "cap the solution at this many moves (useful for very unbalanced \
78 , Option ['v'] ["verbose"]
79 (NoArg (\ opts -> let nv = (optVerbose opts)
80 in opts { optVerbose = nv + 1 }))
81 "increase the verbosity level"
82 , Option ['V'] ["version"]
83 (NoArg (\ opts -> opts { optShowVer = True}))
84 "show the version of the program"
85 , Option ['O'] ["offline"]
86 (ReqArg (\ n opts -> opts { optOffline = n:optOffline opts }) "NODE")
87 " set node as offline"
90 -- | Command line parser, using the 'options' structure.
91 parseOpts :: [String] -> IO (Options, [String])
93 case getOpt Permute options argv of
95 return (foldl (flip id) defaultOptions o, n)
97 ioError (userError (concat errs ++ usageInfo header options))
98 where header = printf "hbal %s\nUsage: hbal [OPTION...]"
101 {- | Start computing the solution at the given depth and recurse until
102 we find a valid solution or we exceed the maximum depth.
105 iterateDepth :: Cluster.Table -- ^ The starting table
106 -> Int -- ^ Remaining length
107 -> Cluster.NameList -- ^ Node idx to name list
108 -> Cluster.NameList -- ^ Inst idx to name list
109 -> Int -- ^ Max node name len
110 -> Int -- ^ Max instance name len
111 -> [[String]] -- ^ Current command list
112 -> Bool -- ^ Wheter to be silent
113 -> IO (Cluster.Table, [[String]]) -- ^ The resulting table and
115 iterateDepth ini_tbl max_rounds ktn kti nmlen imlen cmd_strs oneline =
116 let Cluster.Table ini_nl ini_il ini_cv ini_plc = ini_tbl
117 all_inst = Container.elems ini_il
118 node_idx = map Node.idx . filter (not . Node.offline) $
119 Container.elems ini_nl
120 fin_tbl = Cluster.checkMove node_idx ini_tbl all_inst
121 (Cluster.Table _ _ fin_cv fin_plc) = fin_tbl
122 ini_plc_len = length ini_plc
123 fin_plc_len = length fin_plc
124 allowed_next = (max_rounds < 0 || length fin_plc < max_rounds)
128 (sol_line, cmds) = Cluster.printSolutionLine ini_il ktn kti
129 nmlen imlen (head fin_plc) fin_plc_len
130 upd_cmd_strs = cmds:cmd_strs
131 unless (oneline || fin_plc_len == ini_plc_len) $ do
134 (if fin_cv < ini_cv then -- this round made success, try deeper
136 then iterateDepth fin_tbl max_rounds ktn kti
137 nmlen imlen upd_cmd_strs oneline
138 -- don't go deeper, but return the better solution
139 else return (fin_tbl, upd_cmd_strs)
141 return (ini_tbl, cmd_strs))
146 cmd_args <- System.getArgs
147 (opts, _) <- parseOpts cmd_args
149 when (optShowVer opts) $ do
150 putStr $ showVersion "hbal"
153 let oneline = optOneline opts
154 verbose = optVerbose opts
155 (node_data, inst_data) =
156 case optMaster opts of
157 "" -> (readFile $ optNodef opts,
158 readFile $ optInstf opts)
159 host -> (readData getNodes host,
160 readData getInstances host)
162 (loaded_nl, il, csf, ktn, kti) <- liftM2 Cluster.loadData node_data inst_data
164 let offline_names = optOffline opts
165 offline_indices = fst . unzip .
166 filter (\(_, n) -> elem n offline_names) $ ktn
168 let nl = Container.map (\n -> if elem (Node.idx n) offline_indices
169 then Node.setOffline n True
172 unless oneline $ printf "Loaded %d nodes, %d instances\n"
176 when (length csf > 0 && not oneline && verbose > 0) $ do
177 printf "Note: Stripping common suffix of '%s' from names\n" csf
179 let (bad_nodes, bad_instances) = Cluster.computeBadItems nl il
180 unless (oneline || verbose == 0) $ printf
181 "Initial check done: %d bad nodes, %d bad instances.\n"
182 (length bad_nodes) (length bad_instances)
184 when (length bad_nodes > 0) $ do
185 putStrLn "Cluster is not N+1 happy, continuing but no guarantee \
186 \that the cluster will end N+1 happy."
188 when (optShowNodes opts) $
190 putStrLn "Initial cluster status:"
191 putStrLn $ Cluster.printNodes ktn nl
193 let ini_cv = Cluster.compCV nl
194 ini_tbl = Cluster.Table nl il ini_cv []
195 unless oneline (if verbose > 1 then
196 printf "Initial coefficients: overall %.8f, %s\n"
197 ini_cv (Cluster.printStats nl)
199 printf "Initial score: %.8f\n" ini_cv)
201 unless oneline $ putStrLn "Trying to minimize the CV..."
202 let mlen_fn = maximum . (map length) . snd . unzip
206 (fin_tbl, cmd_strs) <- iterateDepth ini_tbl (optMaxLength opts)
207 ktn kti nmlen imlen [] oneline
208 let (Cluster.Table fin_nl _ fin_cv fin_plc) = fin_tbl
209 ord_plc = reverse fin_plc
210 sol_msg = if null fin_plc
211 then printf "No solution found\n"
213 then printf "Final coefficients: overall %.8f, %s\n"
214 fin_cv (Cluster.printStats fin_nl)
215 else printf "Cluster score improved from %.8f to %.8f\n"
219 unless oneline $ putStr sol_msg
221 unless (oneline || verbose == 0) $
222 printf "Solution length=%d\n" (length ord_plc)
224 when (optShowCmds opts) $
227 putStrLn "Commands to run to reach the above solution:"
228 putStr . Cluster.formatCmds . reverse $ cmd_strs
229 when (optShowNodes opts) $
231 let (orig_mem, orig_disk) = Cluster.totalResources nl
232 (final_mem, final_disk) = Cluster.totalResources fin_nl
234 putStrLn "Final cluster status:"
235 putStrLn $ Cluster.printNodes ktn fin_nl
238 printf "Original: mem=%d disk=%d\n" orig_mem orig_disk
239 printf "Final: mem=%d disk=%d\n" final_mem final_disk
241 printf "%.8f %d %.8f %8.3f\n"
242 ini_cv (length ord_plc) fin_cv (ini_cv / fin_cv)