7 Copyright (C) 2009 Google Inc.
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 2 of the License, or
12 (at your option) any later version.
14 This program is distributed in the hope that it will be useful, but
15 WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
26 module Main (main) where
28 import Control.Concurrent (threadDelay)
29 import Control.Exception (bracket)
32 import Data.Maybe (isJust, fromJust)
36 import qualified System
38 import Text.Printf (printf, hPrintf)
39 import Text.JSON (showJSON)
41 import qualified Ganeti.HTools.Container as Container
42 import qualified Ganeti.HTools.Cluster as Cluster
43 import qualified Ganeti.HTools.Node as Node
44 import qualified Ganeti.HTools.Instance as Instance
46 import Ganeti.HTools.CLI
47 import Ganeti.HTools.ExtLoader
48 import Ganeti.HTools.Utils
49 import Ganeti.HTools.Types
51 import qualified Ganeti.Luxi as L
52 import qualified Ganeti.OpCodes as OpCodes
55 -- | Options list and functions
80 {- | Start computing the solution at the given depth and recurse until
81 we find a valid solution or we exceed the maximum depth.
84 iterateDepth :: Cluster.Table -- ^ The starting table
85 -> Int -- ^ Remaining length
86 -> Bool -- ^ Allow disk moves
87 -> Int -- ^ Max node name len
88 -> Int -- ^ Max instance name len
89 -> [MoveJob] -- ^ Current command list
90 -> Bool -- ^ Whether to be silent
91 -> Score -- ^ Score at which to stop
92 -> IO (Cluster.Table, [MoveJob]) -- ^ The resulting table
94 iterateDepth ini_tbl max_rounds disk_moves nmlen imlen
95 cmd_strs oneline min_score =
96 let Cluster.Table ini_nl ini_il _ _ = ini_tbl
97 allowed_next = Cluster.doNextBalance ini_tbl max_rounds min_score
98 m_fin_tbl = if allowed_next
99 then Cluster.tryBalance ini_tbl disk_moves
106 (Cluster.Table _ _ _ fin_plc) = fin_tbl
107 fin_plc_len = length fin_plc
108 cur_plc@(idx, _, _, move, _) = head fin_plc
109 (sol_line, cmds) = Cluster.printSolutionLine ini_nl ini_il
110 nmlen imlen cur_plc fin_plc_len
111 afn = Cluster.involvedNodes ini_il cur_plc
112 upd_cmd_strs = (afn, idx, move, cmds):cmd_strs
116 iterateDepth fin_tbl max_rounds disk_moves
117 nmlen imlen upd_cmd_strs oneline min_score
118 Nothing -> return (ini_tbl, cmd_strs)
120 -- | Formats the solution for the oneline display
121 formatOneline :: Double -> Int -> Double -> String
122 formatOneline ini_cv plc_len fin_cv =
123 printf "%.8f %d %.8f %8.3f" ini_cv plc_len fin_cv
124 (if fin_cv == 0 then 1 else ini_cv / fin_cv)
126 -- | Submits a list of jobs and waits for all to finish execution
127 execJobs :: L.Client -> [[OpCodes.OpCode]] -> IO (Result [String])
128 execJobs client = L.submitManyJobs client . showJSON
130 -- | Polls a set of jobs at a fixed interval until all are finished
131 -- one way or another
132 waitForJobs :: L.Client -> [String] -> IO (Result [JobStatus])
133 waitForJobs client jids = do
134 sts <- L.queryJobsStatus client jids
136 Bad x -> return $ Bad x
137 Ok s -> if any (<= JobRunning) s
139 -- TODO: replace hardcoded value with a better thing
140 threadDelay (1000000 * 15)
141 waitForJobs client jids
144 -- | Check that a set of job statuses is all success
145 checkJobsStatus :: [JobStatus] -> Bool
146 checkJobsStatus = all (== JobSuccess)
148 -- | Execute an entire jobset
149 execJobSet :: String -> String -> Node.List
150 -> Instance.List -> [JobSet] -> IO ()
151 execJobSet _ _ _ _ [] = return ()
152 execJobSet master csf nl il (js:jss) = do
153 -- map from jobset (htools list of positions) to [[opcodes]]
154 let jobs = map (\(_, idx, move, _) ->
155 Cluster.iMoveToJob csf nl il idx move) js
156 let descr = map (\(_, idx, _, _) -> Container.nameOf il idx) js
157 putStrLn $ "Executing jobset for instances " ++ commaJoin descr
158 jrs <- bracket (L.getClient master) L.closeClient
160 jids <- execJobs client jobs
162 Bad x -> return $ Bad x
164 putStrLn $ "Got job IDs " ++ commaJoin x
169 hPutStrLn stderr $ "Cannot compute job status, aborting: " ++ show x
171 Ok x -> if checkJobsStatus x
172 then execJobSet master csf nl il jss
174 hPutStrLn stderr $ "Not all jobs completed successfully: " ++
176 hPutStrLn stderr "Aborting.")
181 cmd_args <- System.getArgs
182 (opts, args) <- parseOpts cmd_args "hbal" options
184 unless (null args) $ do
185 hPutStrLn stderr "Error: this program doesn't take any arguments."
186 exitWith $ ExitFailure 1
188 let oneline = optOneline opts
189 verbose = optVerbose opts
190 shownodes = optShowNodes opts
192 (fixed_nl, il, ctags, csf) <- loadExternalData opts
194 let offline_names = optOffline opts
195 all_nodes = Container.elems fixed_nl
196 all_names = map Node.name all_nodes
197 offline_wrong = filter (flip notElem all_names) offline_names
198 offline_indices = map Node.idx $
199 filter (\n -> elem (Node.name n) offline_names)
204 when (length offline_wrong > 0) $ do
205 hPrintf stderr "Wrong node name(s) set as offline: %s\n"
206 (commaJoin offline_wrong)
207 exitWith $ ExitFailure 1
209 let nm = Container.map (\n -> if elem (Node.idx n) offline_indices
210 then Node.setOffline n True
212 nl = Container.map (flip Node.setMdsk m_dsk . flip Node.setMcpu m_cpu)
215 when (not oneline && verbose > 1) $
216 putStrLn $ "Loaded cluster tags: " ++ intercalate "," ctags
218 when (Container.size il == 0) $ do
219 (if oneline then putStrLn $ formatOneline 0 0 0
220 else printf "Cluster is empty, exiting.\n")
223 unless oneline $ printf "Loaded %d nodes, %d instances\n"
227 when (length csf > 0 && not oneline && verbose > 1) $
228 printf "Note: Stripping common suffix of '%s' from names\n" csf
230 let (bad_nodes, bad_instances) = Cluster.computeBadItems nl il
231 unless (oneline || verbose == 0) $ printf
232 "Initial check done: %d bad nodes, %d bad instances.\n"
233 (length bad_nodes) (length bad_instances)
235 when (length bad_nodes > 0) $
236 putStrLn "Cluster is not N+1 happy, continuing but no guarantee \
237 \that the cluster will end N+1 happy."
239 when (optShowInsts opts) $ do
241 putStrLn "Initial instance map:"
242 putStrLn $ Cluster.printInsts nl il
244 when (isJust shownodes) $
246 putStrLn "Initial cluster status:"
247 putStrLn $ Cluster.printNodes nl (fromJust shownodes)
249 let ini_cv = Cluster.compCV nl
250 ini_tbl = Cluster.Table nl il ini_cv []
251 min_cv = optMinScore opts
253 when (ini_cv < min_cv) $ do
255 putStrLn $ formatOneline ini_cv 0 ini_cv
256 else printf "Cluster is already well balanced (initial score %.6g,\n\
257 \minimum score %.6g).\nNothing to do, exiting\n"
261 unless oneline (if verbose > 2 then
262 printf "Initial coefficients: overall %.8f, %s\n"
263 ini_cv (Cluster.printStats nl)
265 printf "Initial score: %.8f\n" ini_cv)
267 unless oneline $ putStrLn "Trying to minimize the CV..."
268 let imlen = Container.maxNameLen il
269 nmlen = Container.maxNameLen nl
271 (fin_tbl, cmd_strs) <- iterateDepth ini_tbl (optMaxLength opts)
273 nmlen imlen [] oneline min_cv
274 let (Cluster.Table fin_nl fin_il fin_cv fin_plc) = fin_tbl
275 ord_plc = reverse fin_plc
276 sol_msg = if null fin_plc
277 then printf "No solution found\n"
279 then printf "Final coefficients: overall %.8f, %s\n"
280 fin_cv (Cluster.printStats fin_nl)
281 else printf "Cluster score improved from %.8f to %.8f\n"
285 unless oneline $ putStr sol_msg
287 unless (oneline || verbose == 0) $
288 printf "Solution length=%d\n" (length ord_plc)
290 let cmd_jobs = Cluster.splitJobs cmd_strs
291 cmd_data = Cluster.formatCmds cmd_jobs
293 when (isJust $ optShowCmds opts) $
295 let out_path = fromJust $ optShowCmds opts
297 (if out_path == "-" then
298 printf "Commands to run to reach the above solution:\n%s"
299 (unlines . map (" " ++) .
300 filter (/= " check") .
303 writeFile out_path (shTemplate ++ cmd_data)
304 printf "The commands have been written to file '%s'\n" out_path)
306 when (optExecJobs opts && not (null ord_plc))
307 (case optLuxi opts of
309 hPutStrLn stderr "Execution of commands possible only on LUXI"
310 exitWith $ ExitFailure 1
311 Just master -> execJobSet master csf fin_nl il cmd_jobs)
313 when (optShowInsts opts) $ do
315 putStrLn "Final instance map:"
316 putStr $ Cluster.printInsts fin_nl fin_il
318 when (isJust shownodes) $
320 let ini_cs = Cluster.totalResources nl
321 fin_cs = Cluster.totalResources fin_nl
323 putStrLn "Final cluster status:"
324 putStrLn $ Cluster.printNodes fin_nl (fromJust shownodes)
327 printf "Original: mem=%d disk=%d\n"
328 (Cluster.csFmem ini_cs) (Cluster.csFdsk ini_cs)
329 printf "Final: mem=%d disk=%d\n"
330 (Cluster.csFmem fin_cs) (Cluster.csFdsk fin_cs)
332 putStrLn $ formatOneline ini_cv (length ord_plc) fin_cv