7 Copyright (C) 2009 Google Inc.
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 2 of the License, or
12 (at your option) any later version.
14 This program is distributed in the hope that it will be useful, but
15 WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
26 module Main (main) where
28 import Control.Concurrent (threadDelay)
29 import Control.Exception (bracket)
31 import Data.Maybe (isJust, fromJust)
33 import System (exitWith, ExitCode(..))
35 import qualified System
37 import Text.Printf (printf, hPrintf)
38 import Text.JSON (showJSON)
40 import qualified Ganeti.HTools.Container as Container
41 import qualified Ganeti.HTools.Cluster as Cluster
42 import qualified Ganeti.HTools.Node as Node
43 import qualified Ganeti.HTools.Instance as Instance
45 import Ganeti.HTools.CLI
46 import Ganeti.HTools.ExtLoader
47 import Ganeti.HTools.Utils
48 import Ganeti.HTools.Types
50 import qualified Ganeti.Luxi as L
51 import qualified Ganeti.OpCodes as OpCodes
54 -- | Options list and functions
81 {- | Start computing the solution at the given depth and recurse until
82 we find a valid solution or we exceed the maximum depth.
85 iterateDepth :: Cluster.Table -- ^ The starting table
86 -> Int -- ^ Remaining length
87 -> Bool -- ^ Allow disk moves
88 -> Int -- ^ Max node name len
89 -> Int -- ^ Max instance name len
90 -> [MoveJob] -- ^ Current command list
91 -> Bool -- ^ Whether to be silent
92 -> Score -- ^ Score at which to stop
93 -> Bool -- ^ Enable evacuation mode
94 -> IO (Cluster.Table, [MoveJob]) -- ^ The resulting table
96 iterateDepth ini_tbl max_rounds disk_moves nmlen imlen
97 cmd_strs oneline min_score evac_mode =
98 let Cluster.Table ini_nl ini_il _ _ = ini_tbl
99 allowed_next = Cluster.doNextBalance ini_tbl max_rounds min_score
100 m_fin_tbl = if allowed_next
101 then Cluster.tryBalance ini_tbl disk_moves evac_mode
108 (Cluster.Table _ _ _ fin_plc) = fin_tbl
109 fin_plc_len = length fin_plc
110 cur_plc@(idx, _, _, move, _) = head fin_plc
111 (sol_line, cmds) = Cluster.printSolutionLine ini_nl ini_il
112 nmlen imlen cur_plc fin_plc_len
113 afn = Cluster.involvedNodes ini_il cur_plc
114 upd_cmd_strs = (afn, idx, move, cmds):cmd_strs
118 iterateDepth fin_tbl max_rounds disk_moves
119 nmlen imlen upd_cmd_strs oneline min_score
121 Nothing -> return (ini_tbl, cmd_strs)
123 -- | Formats the solution for the oneline display
124 formatOneline :: Double -> Int -> Double -> String
125 formatOneline ini_cv plc_len fin_cv =
126 printf "%.8f %d %.8f %8.3f" ini_cv plc_len fin_cv
127 (if fin_cv == 0 then 1 else ini_cv / fin_cv)
129 -- | Submits a list of jobs and waits for all to finish execution
130 execJobs :: L.Client -> [[OpCodes.OpCode]] -> IO (Result [String])
131 execJobs client = L.submitManyJobs client . showJSON
133 -- | Polls a set of jobs at a fixed interval until all are finished
134 -- one way or another
135 waitForJobs :: L.Client -> [String] -> IO (Result [JobStatus])
136 waitForJobs client jids = do
137 sts <- L.queryJobsStatus client jids
139 Bad x -> return $ Bad x
140 Ok s -> if any (<= JobRunning) s
142 -- TODO: replace hardcoded value with a better thing
143 threadDelay (1000000 * 15)
144 waitForJobs client jids
147 -- | Check that a set of job statuses is all success
148 checkJobsStatus :: [JobStatus] -> Bool
149 checkJobsStatus = all (== JobSuccess)
151 -- | Execute an entire jobset
152 execJobSet :: String -> String -> Node.List
153 -> Instance.List -> [JobSet] -> IO ()
154 execJobSet _ _ _ _ [] = return ()
155 execJobSet master csf nl il (js:jss) = do
156 -- map from jobset (htools list of positions) to [[opcodes]]
157 let jobs = map (\(_, idx, move, _) ->
158 Cluster.iMoveToJob csf nl il idx move) js
159 let descr = map (\(_, idx, _, _) -> Container.nameOf il idx) js
160 putStrLn $ "Executing jobset for instances " ++ commaJoin descr
161 jrs <- bracket (L.getClient master) L.closeClient
163 jids <- execJobs client jobs
165 Bad x -> return $ Bad x
167 putStrLn $ "Got job IDs " ++ commaJoin x
172 hPutStrLn stderr $ "Cannot compute job status, aborting: " ++ show x
174 Ok x -> if checkJobsStatus x
175 then execJobSet master csf nl il jss
177 hPutStrLn stderr $ "Not all jobs completed successfully: " ++
179 hPutStrLn stderr "Aborting.")
184 cmd_args <- System.getArgs
185 (opts, args) <- parseOpts cmd_args "hbal" options
187 unless (null args) $ do
188 hPutStrLn stderr "Error: this program doesn't take any arguments."
189 exitWith $ ExitFailure 1
191 let oneline = optOneline opts
192 verbose = optVerbose opts
193 shownodes = optShowNodes opts
195 (fixed_nl, il, ctags, csf) <- loadExternalData opts
197 let offline_names = optOffline opts
198 all_nodes = Container.elems fixed_nl
199 all_names = map Node.name all_nodes
200 offline_wrong = filter (`notElem` all_names) offline_names
201 offline_indices = map Node.idx $
202 filter (\n -> Node.name n `elem` offline_names)
207 when (length offline_wrong > 0) $ do
208 hPrintf stderr "Wrong node name(s) set as offline: %s\n"
209 (commaJoin offline_wrong) :: IO ()
210 exitWith $ ExitFailure 1
212 let nm = Container.map (\n -> if Node.idx n `elem` offline_indices
213 then Node.setOffline n True
215 nl = Container.map (flip Node.setMdsk m_dsk . flip Node.setMcpu m_cpu)
218 when (not oneline && verbose > 1) $
219 putStrLn $ "Loaded cluster tags: " ++ intercalate "," ctags
221 when (Container.size il == 0) $ do
222 (if oneline then putStrLn $ formatOneline 0 0 0
223 else printf "Cluster is empty, exiting.\n")
226 unless oneline $ printf "Loaded %d nodes, %d instances\n"
230 when (length csf > 0 && not oneline && verbose > 1) $
231 printf "Note: Stripping common suffix of '%s' from names\n" csf
233 let (bad_nodes, bad_instances) = Cluster.computeBadItems nl il
234 unless (oneline || verbose == 0) $ printf
235 "Initial check done: %d bad nodes, %d bad instances.\n"
236 (length bad_nodes) (length bad_instances)
238 when (length bad_nodes > 0) $
239 putStrLn "Cluster is not N+1 happy, continuing but no guarantee \
240 \that the cluster will end N+1 happy."
242 when (optShowInsts opts) $ do
244 putStrLn "Initial instance map:"
245 putStrLn $ Cluster.printInsts nl il
247 when (isJust shownodes) $
249 putStrLn "Initial cluster status:"
250 putStrLn $ Cluster.printNodes nl (fromJust shownodes)
252 let ini_cv = Cluster.compCV nl
253 ini_tbl = Cluster.Table nl il ini_cv []
254 min_cv = optMinScore opts
256 when (ini_cv < min_cv) $ do
258 putStrLn $ formatOneline ini_cv 0 ini_cv
259 else printf "Cluster is already well balanced (initial score %.6g,\n\
260 \minimum score %.6g).\nNothing to do, exiting\n"
264 unless oneline (if verbose > 2 then
265 printf "Initial coefficients: overall %.8f, %s\n"
266 ini_cv (Cluster.printStats nl)
268 printf "Initial score: %.8f\n" ini_cv)
270 unless oneline $ putStrLn "Trying to minimize the CV..."
271 let imlen = Container.maxNameLen il
272 nmlen = Container.maxNameLen nl
274 (fin_tbl, cmd_strs) <- iterateDepth ini_tbl (optMaxLength opts)
276 nmlen imlen [] oneline min_cv (optEvacMode opts)
277 let (Cluster.Table fin_nl fin_il fin_cv fin_plc) = fin_tbl
278 ord_plc = reverse fin_plc
279 sol_msg = if null fin_plc
280 then printf "No solution found\n"
282 then printf "Final coefficients: overall %.8f, %s\n"
283 fin_cv (Cluster.printStats fin_nl)
284 else printf "Cluster score improved from %.8f to %.8f\n"
288 unless oneline $ putStr sol_msg
290 unless (oneline || verbose == 0) $
291 printf "Solution length=%d\n" (length ord_plc)
293 let cmd_jobs = Cluster.splitJobs cmd_strs
294 cmd_data = Cluster.formatCmds cmd_jobs
296 when (isJust $ optShowCmds opts) $
298 let out_path = fromJust $ optShowCmds opts
300 (if out_path == "-" then
301 printf "Commands to run to reach the above solution:\n%s"
302 (unlines . map (" " ++) .
303 filter (/= " check") .
306 writeFile out_path (shTemplate ++ cmd_data)
307 printf "The commands have been written to file '%s'\n" out_path)
309 when (optExecJobs opts && not (null ord_plc))
310 (case optLuxi opts of
312 hPutStrLn stderr "Execution of commands possible only on LUXI"
313 exitWith $ ExitFailure 1
314 Just master -> execJobSet master csf fin_nl il cmd_jobs)
316 when (optShowInsts opts) $ do
318 putStrLn "Final instance map:"
319 putStr $ Cluster.printInsts fin_nl fin_il
321 when (isJust shownodes) $
323 let ini_cs = Cluster.totalResources nl
324 fin_cs = Cluster.totalResources fin_nl
326 putStrLn "Final cluster status:"
327 putStrLn $ Cluster.printNodes fin_nl (fromJust shownodes)
330 printf "Original: mem=%d disk=%d\n"
331 (Cluster.csFmem ini_cs) (Cluster.csFdsk ini_cs) :: IO ()
332 printf "Final: mem=%d disk=%d\n"
333 (Cluster.csFmem fin_cs) (Cluster.csFdsk fin_cs)
335 putStrLn $ formatOneline ini_cv (length ord_plc) fin_cv