1 {-| Cluster rebalancer.
7 Copyright (C) 2009, 2010, 2011 Google Inc.
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 2 of the License, or
12 (at your option) any later version.
14 This program is distributed in the hope that it will be useful, but
15 WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
26 module Ganeti.HTools.Program.Hbal (main) where
28 import Control.Concurrent (threadDelay)
29 import Control.Exception (bracket)
32 import Data.Maybe (isJust, isNothing, fromJust)
34 import System (exitWith, ExitCode(..))
36 import System.Posix.Process
37 import System.Posix.Signals
38 import qualified System
40 import Text.Printf (printf, hPrintf)
42 import qualified Ganeti.HTools.Container as Container
43 import qualified Ganeti.HTools.Cluster as Cluster
44 import qualified Ganeti.HTools.Group as Group
45 import qualified Ganeti.HTools.Node as Node
46 import qualified Ganeti.HTools.Instance as Instance
48 import Ganeti.HTools.CLI
49 import Ganeti.HTools.ExtLoader
50 import Ganeti.HTools.Utils
51 import Ganeti.HTools.Types
52 import Ganeti.HTools.Loader
54 import qualified Ganeti.Luxi as L
57 -- | Options list and functions.
90 {- | Start computing the solution at the given depth and recurse until
91 we find a valid solution or we exceed the maximum depth.
94 iterateDepth :: Cluster.Table -- ^ The starting table
95 -> Int -- ^ Remaining length
96 -> Bool -- ^ Allow disk moves
97 -> Bool -- ^ Allow instance moves
98 -> Int -- ^ Max node name len
99 -> Int -- ^ Max instance name len
100 -> [MoveJob] -- ^ Current command list
101 -> Bool -- ^ Whether to be silent
102 -> Score -- ^ Score at which to stop
103 -> Score -- ^ Min gain limit
104 -> Score -- ^ Min score gain
105 -> Bool -- ^ Enable evacuation mode
106 -> IO (Cluster.Table, [MoveJob]) -- ^ The resulting table
108 iterateDepth ini_tbl max_rounds disk_moves inst_moves nmlen imlen
109 cmd_strs oneline min_score mg_limit min_gain evac_mode =
110 let Cluster.Table ini_nl ini_il _ _ = ini_tbl
111 allowed_next = Cluster.doNextBalance ini_tbl max_rounds min_score
112 m_fin_tbl = if allowed_next
113 then Cluster.tryBalance ini_tbl disk_moves inst_moves
114 evac_mode mg_limit min_gain
121 (Cluster.Table _ _ _ fin_plc) = fin_tbl
122 fin_plc_len = length fin_plc
123 cur_plc@(idx, _, _, move, _) = head fin_plc
124 (sol_line, cmds) = Cluster.printSolutionLine ini_nl ini_il
125 nmlen imlen cur_plc fin_plc_len
126 afn = Cluster.involvedNodes ini_il cur_plc
127 upd_cmd_strs = (afn, idx, move, cmds):cmd_strs
131 iterateDepth fin_tbl max_rounds disk_moves inst_moves
132 nmlen imlen upd_cmd_strs oneline min_score
133 mg_limit min_gain evac_mode
134 Nothing -> return (ini_tbl, cmd_strs)
136 -- | Formats the solution for the oneline display.
137 formatOneline :: Double -> Int -> Double -> String
138 formatOneline ini_cv plc_len fin_cv =
139 printf "%.8f %d %.8f %8.3f" ini_cv plc_len fin_cv
140 (if fin_cv == 0 then 1 else ini_cv / fin_cv)
142 -- | Polls a set of jobs at a fixed interval until all are finished
143 -- one way or another.
144 waitForJobs :: L.Client -> [String] -> IO (Result [JobStatus])
145 waitForJobs client jids = do
146 sts <- L.queryJobsStatus client jids
148 Bad x -> return $ Bad x
149 Ok s -> if any (<= JOB_STATUS_RUNNING) s
151 -- TODO: replace hardcoded value with a better thing
152 threadDelay (1000000 * 15)
153 waitForJobs client jids
156 -- | Check that a set of job statuses is all success.
157 checkJobsStatus :: [JobStatus] -> Bool
158 checkJobsStatus = all (== JOB_STATUS_SUCCESS)
160 -- | Wrapper over execJobSet checking for early termination.
161 execWrapper :: String -> Node.List
162 -> Instance.List -> IORef Int -> [JobSet] -> IO Bool
163 execWrapper _ _ _ _ [] = return True
164 execWrapper master nl il cref alljss = do
165 cancel <- readIORef cref
168 hPrintf stderr "Exiting early due to user request, %d\
169 \ jobset(s) remaining." (length alljss)::IO ()
171 else execJobSet master nl il cref alljss)
173 -- | Execute an entire jobset.
174 execJobSet :: String -> Node.List
175 -> Instance.List -> IORef Int -> [JobSet] -> IO Bool
176 execJobSet _ _ _ _ [] = return True
177 execJobSet master nl il cref (js:jss) = do
178 -- map from jobset (htools list of positions) to [[opcodes]]
179 let jobs = map (\(_, idx, move, _) ->
180 Cluster.iMoveToJob nl il idx move) js
181 let descr = map (\(_, idx, _, _) -> Container.nameOf il idx) js
182 putStrLn $ "Executing jobset for instances " ++ commaJoin descr
183 jrs <- bracket (L.getClient master) L.closeClient
185 jids <- L.submitManyJobs client jobs
187 Bad x -> return $ Bad x
189 putStrLn $ "Got job IDs " ++ commaJoin x
194 hPutStrLn stderr $ "Cannot compute job status, aborting: " ++ show x
196 Ok x -> if checkJobsStatus x
197 then execWrapper master nl il cref jss
199 hPutStrLn stderr $ "Not all jobs completed successfully: " ++
201 hPutStrLn stderr "Aborting."
204 -- | Signal handler for graceful termination.
205 hangleSigInt :: IORef Int -> IO ()
206 hangleSigInt cref = do
208 putStrLn ("Cancel request registered, will exit at" ++
209 " the end of the current job set...")
211 -- | Signal handler for immediate termination.
212 hangleSigTerm :: IORef Int -> IO ()
213 hangleSigTerm cref = do
214 -- update the cref to 2, just for consistency
216 putStrLn "Double cancel request, exiting now..."
217 exitImmediately $ ExitFailure 2
219 -- | Runs a job set with handling of signals.
220 runJobSet :: String -> Node.List -> Instance.List -> [JobSet] -> IO Bool
221 runJobSet master fin_nl il cmd_jobs = do
223 mapM_ (\(hnd, sig) -> installHandler sig (Catch (hnd cref)) Nothing)
224 [(hangleSigTerm, softwareTermination), (hangleSigInt, keyboardSignal)]
225 execWrapper master fin_nl il cref cmd_jobs
230 cmd_args <- System.getArgs
231 (opts, args) <- parseOpts cmd_args "hbal" options
233 unless (null args) $ do
234 hPutStrLn stderr "Error: this program doesn't take any arguments."
235 exitWith $ ExitFailure 1
237 let oneline = optOneline opts
238 verbose = optVerbose opts
239 shownodes = optShowNodes opts
240 showinsts = optShowInsts opts
242 ini_cdata@(ClusterData gl fixed_nl ilf ctags) <- loadExternalData opts
244 let offline_passed = optOffline opts
245 all_nodes = Container.elems fixed_nl
246 offline_lkp = map (lookupName (map Node.name all_nodes)) offline_passed
247 offline_wrong = filter (not . goodLookupResult) offline_lkp
248 offline_names = map lrContent offline_lkp
249 offline_indices = map Node.idx $
250 filter (\n -> Node.name n `elem` offline_names)
254 csf = commonSuffix fixed_nl ilf
256 when (not (null offline_wrong)) $ do
257 hPrintf stderr "Error: Wrong node name(s) set as offline: %s\n"
258 (commaJoin (map lrContent offline_wrong)) :: IO ()
259 exitWith $ ExitFailure 1
261 let nm = Container.map (\n -> if Node.idx n `elem` offline_indices
262 then Node.setOffline n True
264 nlf = Container.map (flip Node.setMdsk m_dsk . flip Node.setMcpu m_cpu)
267 when (not oneline && verbose > 1) $
268 putStrLn $ "Loaded cluster tags: " ++ intercalate "," ctags
270 when (Container.size ilf == 0) $ do
271 (if oneline then putStrLn $ formatOneline 0 0 0
272 else printf "Cluster is empty, exiting.\n")
275 let split_insts = Cluster.findSplitInstances nlf ilf
276 unless (null split_insts) $ do
277 hPutStrLn stderr "Found instances belonging to multiple node groups:"
278 mapM_ (\i -> hPutStrLn stderr $ " " ++ Instance.name i) split_insts
279 hPutStrLn stderr "Aborting."
280 exitWith $ ExitFailure 1
282 let ngroups = Cluster.splitCluster nlf ilf
283 when (length ngroups > 1 && isNothing (optGroup opts)) $ do
284 hPutStrLn stderr "Found multiple node groups:"
285 mapM_ (hPutStrLn stderr . (" " ++) . Group.name .
286 flip Container.find gl . fst) ngroups
287 hPutStrLn stderr "Aborting."
288 exitWith $ ExitFailure 1
290 maybeSaveData (optSaveCluster opts) "original" "before balancing" ini_cdata
292 unless oneline $ printf "Loaded %d nodes, %d instances\n"
296 (gname, (nl, il)) <- case optGroup opts of
298 let (gidx, cdata) = head ngroups
299 grp = Container.find gidx gl
300 return (Group.name grp, cdata)
301 Just g -> case Container.findByName gl g of
303 hPutStrLn stderr $ "Node group " ++ g ++
304 " not found. Node group list is:"
305 mapM_ (hPutStrLn stderr . (" " ++) . Group.name ) (Container.elems gl)
306 hPutStrLn stderr "Aborting."
307 exitWith $ ExitFailure 1
309 case lookup (Group.idx grp) ngroups of
311 -- TODO: while this is unlikely to happen, log here the
312 -- actual group data to help debugging
313 hPutStrLn stderr "Internal failure, missing group idx"
314 exitWith $ ExitFailure 1
315 Just cdata -> return (Group.name grp, cdata)
317 unless oneline $ printf "Group size %d nodes, %d instances\n"
321 putStrLn $ "Selected node group: " ++ gname
323 when (length csf > 0 && not oneline && verbose > 1) $
324 printf "Note: Stripping common suffix of '%s' from names\n" csf
326 let (bad_nodes, bad_instances) = Cluster.computeBadItems nl il
327 unless (oneline || verbose == 0) $ printf
328 "Initial check done: %d bad nodes, %d bad instances.\n"
329 (length bad_nodes) (length bad_instances)
331 when (length bad_nodes > 0) $
332 putStrLn "Cluster is not N+1 happy, continuing but no guarantee \
333 \that the cluster will end N+1 happy."
335 maybePrintInsts showinsts "Initial" (Cluster.printInsts nl il)
337 maybePrintNodes shownodes "Initial cluster" (Cluster.printNodes nl)
339 let ini_cv = Cluster.compCV nl
340 ini_tbl = Cluster.Table nl il ini_cv []
341 min_cv = optMinScore opts
343 when (ini_cv < min_cv) $ do
345 putStrLn $ formatOneline ini_cv 0 ini_cv
346 else printf "Cluster is already well balanced (initial score %.6g,\n\
347 \minimum score %.6g).\nNothing to do, exiting\n"
351 unless oneline (if verbose > 2 then
352 printf "Initial coefficients: overall %.8f, %s\n"
353 ini_cv (Cluster.printStats nl)
355 printf "Initial score: %.8f\n" ini_cv)
357 unless oneline $ putStrLn "Trying to minimize the CV..."
358 let imlen = maximum . map (length . Instance.alias) $ Container.elems il
359 nmlen = maximum . map (length . Node.alias) $ Container.elems nl
361 (fin_tbl, cmd_strs) <- iterateDepth ini_tbl (optMaxLength opts)
364 nmlen imlen [] oneline min_cv
365 (optMinGainLim opts) (optMinGain opts)
367 let (Cluster.Table fin_nl fin_il fin_cv fin_plc) = fin_tbl
368 ord_plc = reverse fin_plc
370 _ | null fin_plc -> printf "No solution found\n"
372 printf "Final coefficients: overall %.8f, %s\n"
373 fin_cv (Cluster.printStats fin_nl)
375 printf "Cluster score improved from %.8f to %.8f\n"
376 ini_cv fin_cv ::String
378 unless oneline $ putStr sol_msg
380 unless (oneline || verbose == 0) $
381 printf "Solution length=%d\n" (length ord_plc)
383 let cmd_jobs = Cluster.splitJobs cmd_strs
384 cmd_data = Cluster.formatCmds cmd_jobs
386 when (isJust $ optShowCmds opts) $
388 let out_path = fromJust $ optShowCmds opts
390 (if out_path == "-" then
391 printf "Commands to run to reach the above solution:\n%s"
392 (unlines . map (" " ++) .
393 filter (/= " check") .
396 writeFile out_path (shTemplate ++ cmd_data)
397 printf "The commands have been written to file '%s'\n" out_path)
399 maybeSaveData (optSaveCluster opts) "balanced" "after balancing"
400 (ClusterData gl fin_nl fin_il ctags)
402 maybePrintInsts showinsts "Final" (Cluster.printInsts fin_nl fin_il)
404 maybePrintNodes shownodes "Final cluster" (Cluster.printNodes fin_nl)
406 when (verbose > 3) $ do
407 let ini_cs = Cluster.totalResources nl
408 fin_cs = Cluster.totalResources fin_nl
409 printf "Original: mem=%d disk=%d\n"
410 (Cluster.csFmem ini_cs) (Cluster.csFdsk ini_cs) :: IO ()
411 printf "Final: mem=%d disk=%d\n"
412 (Cluster.csFmem fin_cs) (Cluster.csFdsk fin_cs)
414 putStrLn $ formatOneline ini_cv (length ord_plc) fin_cv
417 if optExecJobs opts && not (null ord_plc)
418 then (case optLuxi opts of
420 hPutStrLn stderr "Execution of commands possible only on LUXI"
422 Just master -> runJobSet master fin_nl il cmd_jobs)
424 unless eval (exitWith (ExitFailure 1))