7 Copyright (C) 2009 Google Inc.
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 2 of the License, or
12 (at your option) any later version.
14 This program is distributed in the hope that it will be useful, but
15 WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
26 module Main (main) where
28 import Control.Concurrent (threadDelay)
29 import Control.Exception (bracket)
32 import Data.Maybe (isJust, fromJust)
36 import qualified System
38 import Text.Printf (printf, hPrintf)
39 import Text.JSON (showJSON)
41 import qualified Ganeti.HTools.Container as Container
42 import qualified Ganeti.HTools.Cluster as Cluster
43 import qualified Ganeti.HTools.Node as Node
44 import qualified Ganeti.HTools.Instance as Instance
46 import Ganeti.HTools.CLI
47 import Ganeti.HTools.ExtLoader
48 import Ganeti.HTools.Utils
49 import Ganeti.HTools.Types
51 import qualified Ganeti.Luxi as L
52 import qualified Ganeti.OpCodes as OpCodes
55 -- | Options list and functions
80 {- | Start computing the solution at the given depth and recurse until
81 we find a valid solution or we exceed the maximum depth.
84 iterateDepth :: Cluster.Table -- ^ The starting table
85 -> Int -- ^ Remaining length
86 -> Bool -- ^ Allow disk moves
87 -> Int -- ^ Max node name len
88 -> Int -- ^ Max instance name len
89 -> [MoveJob] -- ^ Current command list
90 -> Bool -- ^ Whether to be silent
91 -> Score -- ^ Score at which to stop
92 -> IO (Cluster.Table, [MoveJob]) -- ^ The resulting table
94 iterateDepth ini_tbl max_rounds disk_moves nmlen imlen
95 cmd_strs oneline min_score =
96 let Cluster.Table ini_nl ini_il _ _ = ini_tbl
97 m_fin_tbl = Cluster.tryBalance ini_tbl max_rounds disk_moves min_score
103 (Cluster.Table _ _ _ fin_plc) = fin_tbl
104 fin_plc_len = length fin_plc
105 cur_plc@(idx, _, _, move, _) = head fin_plc
106 (sol_line, cmds) = Cluster.printSolutionLine ini_nl ini_il
107 nmlen imlen cur_plc fin_plc_len
108 afn = Cluster.involvedNodes ini_il cur_plc
109 upd_cmd_strs = (afn, idx, move, cmds):cmd_strs
113 iterateDepth fin_tbl max_rounds disk_moves
114 nmlen imlen upd_cmd_strs oneline min_score
115 Nothing -> return (ini_tbl, cmd_strs)
117 -- | Formats the solution for the oneline display
118 formatOneline :: Double -> Int -> Double -> String
119 formatOneline ini_cv plc_len fin_cv =
120 printf "%.8f %d %.8f %8.3f" ini_cv plc_len fin_cv
121 (if fin_cv == 0 then 1 else ini_cv / fin_cv)
123 -- | Submits a list of jobs and waits for all to finish execution
124 execJobs :: L.Client -> [[OpCodes.OpCode]] -> IO (Result [String])
125 execJobs client = L.submitManyJobs client . showJSON
127 -- | Polls a set of jobs at a fixed interval until all are finished
128 -- one way or another
129 waitForJobs :: L.Client -> [String] -> IO (Result [JobStatus])
130 waitForJobs client jids = do
131 sts <- L.queryJobsStatus client jids
133 Bad x -> return $ Bad x
134 Ok s -> if any (<= JobRunning) s
136 -- TODO: replace hardcoded value with a better thing
137 threadDelay (1000000 * 15)
138 waitForJobs client jids
141 -- | Check that a set of job statuses is all success
142 checkJobsStatus :: [JobStatus] -> Bool
143 checkJobsStatus = all (== JobSuccess)
145 -- | Execute an entire jobset
146 execJobSet :: String -> String -> Node.List
147 -> Instance.List -> [JobSet] -> IO ()
148 execJobSet _ _ _ _ [] = return ()
149 execJobSet master csf nl il (js:jss) = do
150 -- map from jobset (htools list of positions) to [[opcodes]]
151 let jobs = map (\(_, idx, move, _) ->
152 Cluster.iMoveToJob csf nl il idx move) js
153 let descr = map (\(_, idx, _, _) -> Container.nameOf il idx) js
154 putStrLn $ "Executing jobset for instances " ++ commaJoin descr
155 jrs <- bracket (L.getClient master) L.closeClient
157 jids <- execJobs client jobs
159 Bad x -> return $ Bad x
161 putStrLn $ "Got job IDs " ++ commaJoin x
166 hPutStrLn stderr $ "Cannot compute job status, aborting: " ++ show x
168 Ok x -> if checkJobsStatus x
169 then execJobSet master csf nl il jss
171 hPutStrLn stderr $ "Not all jobs completed successfully: " ++
173 hPutStrLn stderr "Aborting.")
178 cmd_args <- System.getArgs
179 (opts, args) <- parseOpts cmd_args "hbal" options
181 unless (null args) $ do
182 hPutStrLn stderr "Error: this program doesn't take any arguments."
183 exitWith $ ExitFailure 1
185 let oneline = optOneline opts
186 verbose = optVerbose opts
187 shownodes = optShowNodes opts
189 (fixed_nl, il, ctags, csf) <- loadExternalData opts
191 let offline_names = optOffline opts
192 all_nodes = Container.elems fixed_nl
193 all_names = map Node.name all_nodes
194 offline_wrong = filter (flip notElem all_names) offline_names
195 offline_indices = map Node.idx $
196 filter (\n -> elem (Node.name n) offline_names)
201 when (length offline_wrong > 0) $ do
202 hPrintf stderr "Wrong node name(s) set as offline: %s\n"
203 (commaJoin offline_wrong)
204 exitWith $ ExitFailure 1
206 let nm = Container.map (\n -> if elem (Node.idx n) offline_indices
207 then Node.setOffline n True
209 nl = Container.map (flip Node.setMdsk m_dsk . flip Node.setMcpu m_cpu)
212 when (not oneline && verbose > 1) $
213 putStrLn $ "Loaded cluster tags: " ++ intercalate "," ctags
215 when (Container.size il == 0) $ do
216 (if oneline then putStrLn $ formatOneline 0 0 0
217 else printf "Cluster is empty, exiting.\n")
220 unless oneline $ printf "Loaded %d nodes, %d instances\n"
224 when (length csf > 0 && not oneline && verbose > 1) $
225 printf "Note: Stripping common suffix of '%s' from names\n" csf
227 let (bad_nodes, bad_instances) = Cluster.computeBadItems nl il
228 unless (oneline || verbose == 0) $ printf
229 "Initial check done: %d bad nodes, %d bad instances.\n"
230 (length bad_nodes) (length bad_instances)
232 when (length bad_nodes > 0) $
233 putStrLn "Cluster is not N+1 happy, continuing but no guarantee \
234 \that the cluster will end N+1 happy."
236 when (optShowInsts opts) $ do
238 putStrLn "Initial instance map:"
239 putStrLn $ Cluster.printInsts nl il
241 when (isJust shownodes) $
243 putStrLn "Initial cluster status:"
244 putStrLn $ Cluster.printNodes nl (fromJust shownodes)
246 let ini_cv = Cluster.compCV nl
247 ini_tbl = Cluster.Table nl il ini_cv []
248 min_cv = optMinScore opts
250 when (ini_cv < min_cv) $ do
252 putStrLn $ formatOneline ini_cv 0 ini_cv
253 else printf "Cluster is already well balanced (initial score %.6g,\n\
254 \minimum score %.6g).\nNothing to do, exiting\n"
258 unless oneline (if verbose > 2 then
259 printf "Initial coefficients: overall %.8f, %s\n"
260 ini_cv (Cluster.printStats nl)
262 printf "Initial score: %.8f\n" ini_cv)
264 unless oneline $ putStrLn "Trying to minimize the CV..."
265 let imlen = Container.maxNameLen il
266 nmlen = Container.maxNameLen nl
268 (fin_tbl, cmd_strs) <- iterateDepth ini_tbl (optMaxLength opts)
270 nmlen imlen [] oneline min_cv
271 let (Cluster.Table fin_nl fin_il fin_cv fin_plc) = fin_tbl
272 ord_plc = reverse fin_plc
273 sol_msg = if null fin_plc
274 then printf "No solution found\n"
276 then printf "Final coefficients: overall %.8f, %s\n"
277 fin_cv (Cluster.printStats fin_nl)
278 else printf "Cluster score improved from %.8f to %.8f\n"
282 unless oneline $ putStr sol_msg
284 unless (oneline || verbose == 0) $
285 printf "Solution length=%d\n" (length ord_plc)
287 let cmd_jobs = Cluster.splitJobs cmd_strs
288 cmd_data = Cluster.formatCmds cmd_jobs
290 when (isJust $ optShowCmds opts) $
292 let out_path = fromJust $ optShowCmds opts
294 (if out_path == "-" then
295 printf "Commands to run to reach the above solution:\n%s"
296 (unlines . map (" " ++) .
297 filter (/= " check") .
300 writeFile out_path (shTemplate ++ cmd_data)
301 printf "The commands have been written to file '%s'\n" out_path)
303 when (optExecJobs opts && not (null ord_plc))
304 (case optLuxi opts of
306 hPutStrLn stderr "Execution of commands possible only on LUXI"
307 exitWith $ ExitFailure 1
308 Just master -> execJobSet master csf fin_nl il cmd_jobs)
310 when (optShowInsts opts) $ do
312 putStrLn "Final instance map:"
313 putStr $ Cluster.printInsts fin_nl fin_il
315 when (isJust shownodes) $
317 let ini_cs = Cluster.totalResources nl
318 fin_cs = Cluster.totalResources fin_nl
320 putStrLn "Final cluster status:"
321 putStrLn $ Cluster.printNodes fin_nl (fromJust shownodes)
324 printf "Original: mem=%d disk=%d\n"
325 (Cluster.csFmem ini_cs) (Cluster.csFdsk ini_cs)
326 printf "Final: mem=%d disk=%d\n"
327 (Cluster.csFmem fin_cs) (Cluster.csFdsk fin_cs)
329 putStrLn $ formatOneline ini_cv (length ord_plc) fin_cv