7 Copyright (C) 2009 Google Inc.
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 2 of the License, or
12 (at your option) any later version.
14 This program is distributed in the hope that it will be useful, but
15 WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
26 module Main (main) where
28 import Control.Concurrent (threadDelay)
29 import Control.Exception (bracket)
32 import Data.Maybe (isJust, fromJust)
36 import qualified System
38 import Text.Printf (printf, hPrintf)
39 import Text.JSON (showJSON)
41 import qualified Ganeti.HTools.Container as Container
42 import qualified Ganeti.HTools.Cluster as Cluster
43 import qualified Ganeti.HTools.Node as Node
44 import qualified Ganeti.HTools.Instance as Instance
46 import Ganeti.HTools.CLI
47 import Ganeti.HTools.ExtLoader
48 import Ganeti.HTools.Utils
49 import Ganeti.HTools.Types
51 import qualified Ganeti.Luxi as L
52 import qualified Ganeti.OpCodes as OpCodes
55 -- | Options list and functions
81 {- | Start computing the solution at the given depth and recurse until
82 we find a valid solution or we exceed the maximum depth.
85 iterateDepth :: Cluster.Table -- ^ The starting table
86 -> Int -- ^ Remaining length
87 -> Bool -- ^ Allow disk moves
88 -> Int -- ^ Max node name len
89 -> Int -- ^ Max instance name len
90 -> [MoveJob] -- ^ Current command list
91 -> Bool -- ^ Whether to be silent
92 -> Score -- ^ Score at which to stop
93 -> IO (Cluster.Table, [MoveJob]) -- ^ The resulting table
95 iterateDepth ini_tbl max_rounds disk_moves nmlen imlen
96 cmd_strs oneline min_score =
97 let Cluster.Table ini_nl ini_il _ _ = ini_tbl
98 m_fin_tbl = Cluster.tryBalance ini_tbl max_rounds disk_moves min_score
104 (Cluster.Table _ _ _ fin_plc) = fin_tbl
105 fin_plc_len = length fin_plc
106 cur_plc@(idx, _, _, move, _) = head fin_plc
107 (sol_line, cmds) = Cluster.printSolutionLine ini_nl ini_il
108 nmlen imlen cur_plc fin_plc_len
109 afn = Cluster.involvedNodes ini_il cur_plc
110 upd_cmd_strs = (afn, idx, move, cmds):cmd_strs
114 iterateDepth fin_tbl max_rounds disk_moves
115 nmlen imlen upd_cmd_strs oneline min_score
116 Nothing -> return (ini_tbl, cmd_strs)
118 -- | Formats the solution for the oneline display
119 formatOneline :: Double -> Int -> Double -> String
120 formatOneline ini_cv plc_len fin_cv =
121 printf "%.8f %d %.8f %8.3f" ini_cv plc_len fin_cv
122 (if fin_cv == 0 then 1 else ini_cv / fin_cv)
124 -- | Submits a list of jobs and waits for all to finish execution
125 execJobs :: L.Client -> [[OpCodes.OpCode]] -> IO (Result [String])
126 execJobs client = L.submitManyJobs client . showJSON
128 -- | Polls a set of jobs at a fixed interval until all are finished
129 -- one way or another
130 waitForJobs :: L.Client -> [String] -> IO (Result [JobStatus])
131 waitForJobs client jids = do
132 sts <- L.queryJobsStatus client jids
134 Bad x -> return $ Bad x
135 Ok s -> if any (<= JobRunning) s
137 -- TODO: replace hardcoded value with a better thing
138 threadDelay (1000000 * 15)
139 waitForJobs client jids
142 -- | Check that a set of job statuses is all success
143 checkJobsStatus :: [JobStatus] -> Bool
144 checkJobsStatus = all (== JobSuccess)
146 -- | Execute an entire jobset
147 execJobSet :: String -> String -> Node.List
148 -> Instance.List -> [JobSet] -> IO ()
149 execJobSet _ _ _ _ [] = return ()
150 execJobSet master csf nl il (js:jss) = do
151 -- map from jobset (htools list of positions) to [[opcodes]]
152 let jobs = map (\(_, idx, move, _) ->
153 Cluster.iMoveToJob csf nl il idx move) js
154 let descr = map (\(_, idx, _, _) -> Container.nameOf il idx) js
155 putStrLn $ "Executing jobset for instances " ++ commaJoin descr
156 jrs <- bracket (L.getClient master) L.closeClient
158 jids <- execJobs client jobs
160 Bad x -> return $ Bad x
162 putStrLn $ "Got job IDs " ++ commaJoin x
167 hPutStrLn stderr $ "Cannot compute job status, aborting: " ++ show x
169 Ok x -> if checkJobsStatus x
170 then execJobSet master csf nl il jss
172 hPutStrLn stderr $ "Not all jobs completed successfully: " ++
174 hPutStrLn stderr "Aborting.")
179 cmd_args <- System.getArgs
180 (opts, args) <- parseOpts cmd_args "hbal" options
182 unless (null args) $ do
183 hPutStrLn stderr "Error: this program doesn't take any arguments."
184 exitWith $ ExitFailure 1
186 let oneline = optOneline opts
187 verbose = optVerbose opts
188 shownodes = optShowNodes opts
190 (fixed_nl, il, ctags, csf) <- loadExternalData opts
192 let offline_names = optOffline opts
193 all_nodes = Container.elems fixed_nl
194 all_names = map Node.name all_nodes
195 offline_wrong = filter (flip notElem all_names) offline_names
196 offline_indices = map Node.idx $
197 filter (\n -> elem (Node.name n) offline_names)
202 when (length offline_wrong > 0) $ do
203 hPrintf stderr "Wrong node name(s) set as offline: %s\n"
204 (commaJoin offline_wrong)
205 exitWith $ ExitFailure 1
207 let nm = Container.map (\n -> if elem (Node.idx n) offline_indices
208 then Node.setOffline n True
210 nl = Container.map (flip Node.setMdsk m_dsk . flip Node.setMcpu m_cpu)
213 when (not oneline && verbose > 1) $
214 putStrLn $ "Loaded cluster tags: " ++ intercalate "," ctags
216 when (Container.size il == 0) $ do
217 (if oneline then putStrLn $ formatOneline 0 0 0
218 else printf "Cluster is empty, exiting.\n")
221 unless oneline $ printf "Loaded %d nodes, %d instances\n"
225 when (length csf > 0 && not oneline && verbose > 1) $
226 printf "Note: Stripping common suffix of '%s' from names\n" csf
228 let (bad_nodes, bad_instances) = Cluster.computeBadItems nl il
229 unless (oneline || verbose == 0) $ printf
230 "Initial check done: %d bad nodes, %d bad instances.\n"
231 (length bad_nodes) (length bad_instances)
233 when (length bad_nodes > 0) $
234 putStrLn "Cluster is not N+1 happy, continuing but no guarantee \
235 \that the cluster will end N+1 happy."
237 when (optShowInsts opts) $ do
239 putStrLn "Initial instance map:"
240 putStrLn $ Cluster.printInsts nl il
242 when (isJust shownodes) $
244 putStrLn "Initial cluster status:"
245 putStrLn $ Cluster.printNodes nl (fromJust shownodes)
247 let ini_cv = Cluster.compCV nl
248 ini_tbl = Cluster.Table nl il ini_cv []
249 min_cv = optMinScore opts
251 when (ini_cv < min_cv) $ do
253 putStrLn $ formatOneline ini_cv 0 ini_cv
254 else printf "Cluster is already well balanced (initial score %.6g,\n\
255 \minimum score %.6g).\nNothing to do, exiting\n"
259 unless oneline (if verbose > 2 then
260 printf "Initial coefficients: overall %.8f, %s\n"
261 ini_cv (Cluster.printStats nl)
263 printf "Initial score: %.8f\n" ini_cv)
265 unless oneline $ putStrLn "Trying to minimize the CV..."
266 let imlen = Container.maxNameLen il
267 nmlen = Container.maxNameLen nl
269 (fin_tbl, cmd_strs) <- iterateDepth ini_tbl (optMaxLength opts)
271 nmlen imlen [] oneline min_cv
272 let (Cluster.Table fin_nl fin_il fin_cv fin_plc) = fin_tbl
273 ord_plc = reverse fin_plc
274 sol_msg = if null fin_plc
275 then printf "No solution found\n"
277 then printf "Final coefficients: overall %.8f, %s\n"
278 fin_cv (Cluster.printStats fin_nl)
279 else printf "Cluster score improved from %.8f to %.8f\n"
283 unless oneline $ putStr sol_msg
285 unless (oneline || verbose == 0) $
286 printf "Solution length=%d\n" (length ord_plc)
288 let cmd_jobs = Cluster.splitJobs cmd_strs
289 cmd_data = Cluster.formatCmds cmd_jobs
291 when (isJust $ optShowCmds opts) $
293 let out_path = fromJust $ optShowCmds opts
295 (if out_path == "-" then
296 printf "Commands to run to reach the above solution:\n%s"
297 (unlines . map (" " ++) .
298 filter (/= " check") .
301 writeFile out_path (shTemplate ++ cmd_data)
302 printf "The commands have been written to file '%s'\n" out_path)
304 when (optExecJobs opts && not (null ord_plc))
305 (case optLuxi opts of
307 hPutStrLn stderr "Execution of commands possible only on LUXI"
308 exitWith $ ExitFailure 1
309 Just master -> execJobSet master csf fin_nl il cmd_jobs)
311 when (optShowInsts opts) $ do
313 putStrLn "Final instance map:"
314 putStr $ Cluster.printInsts fin_nl fin_il
316 when (isJust shownodes) $
318 let ini_cs = Cluster.totalResources nl
319 fin_cs = Cluster.totalResources fin_nl
321 putStrLn "Final cluster status:"
322 putStrLn $ Cluster.printNodes fin_nl (fromJust shownodes)
325 printf "Original: mem=%d disk=%d\n"
326 (Cluster.csFmem ini_cs) (Cluster.csFdsk ini_cs)
327 printf "Final: mem=%d disk=%d\n"
328 (Cluster.csFmem fin_cs) (Cluster.csFdsk fin_cs)
330 putStrLn $ formatOneline ini_cv (length ord_plc) fin_cv