1 {-| Cluster rebalancer.
7 Copyright (C) 2009, 2010, 2011, 2012 Google Inc.
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 2 of the License, or
12 (at your option) any later version.
14 This program is distributed in the hope that it will be useful, but
15 WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
26 module Ganeti.HTools.Program.Hbal (main, options) where
28 import Control.Concurrent (threadDelay)
29 import Control.Exception (bracket)
32 import Data.Maybe (isJust, isNothing, fromJust)
36 import System.Posix.Process
37 import System.Posix.Signals
39 import Text.Printf (printf, hPrintf)
41 import qualified Ganeti.HTools.Container as Container
42 import qualified Ganeti.HTools.Cluster as Cluster
43 import qualified Ganeti.HTools.Group as Group
44 import qualified Ganeti.HTools.Node as Node
45 import qualified Ganeti.HTools.Instance as Instance
47 import Ganeti.HTools.CLI
48 import Ganeti.HTools.ExtLoader
49 import Ganeti.HTools.Utils
50 import Ganeti.HTools.Types
51 import Ganeti.HTools.Loader
53 import qualified Ganeti.Luxi as L
56 -- | Options list and functions.
89 {- | Start computing the solution at the given depth and recurse until
90 we find a valid solution or we exceed the maximum depth.
93 iterateDepth :: Cluster.Table -- ^ The starting table
94 -> Int -- ^ Remaining length
95 -> Bool -- ^ Allow disk moves
96 -> Bool -- ^ Allow instance moves
97 -> Int -- ^ Max node name len
98 -> Int -- ^ Max instance name len
99 -> [MoveJob] -- ^ Current command list
100 -> Score -- ^ Score at which to stop
101 -> Score -- ^ Min gain limit
102 -> Score -- ^ Min score gain
103 -> Bool -- ^ Enable evacuation mode
104 -> IO (Cluster.Table, [MoveJob]) -- ^ The resulting table
106 iterateDepth ini_tbl max_rounds disk_moves inst_moves nmlen imlen
107 cmd_strs min_score mg_limit min_gain evac_mode =
108 let Cluster.Table ini_nl ini_il _ _ = ini_tbl
109 allowed_next = Cluster.doNextBalance ini_tbl max_rounds min_score
110 m_fin_tbl = if allowed_next
111 then Cluster.tryBalance ini_tbl disk_moves inst_moves
112 evac_mode mg_limit min_gain
117 let (Cluster.Table _ _ _ fin_plc) = fin_tbl
118 fin_plc_len = length fin_plc
119 cur_plc@(idx, _, _, move, _) = head fin_plc
120 (sol_line, cmds) = Cluster.printSolutionLine ini_nl ini_il
121 nmlen imlen cur_plc fin_plc_len
122 afn = Cluster.involvedNodes ini_il cur_plc
123 upd_cmd_strs = (afn, idx, move, cmds):cmd_strs
126 iterateDepth fin_tbl max_rounds disk_moves inst_moves
127 nmlen imlen upd_cmd_strs min_score
128 mg_limit min_gain evac_mode
129 Nothing -> return (ini_tbl, cmd_strs)
131 -- | Displays the cluster stats.
132 printStats :: Node.List -> Node.List -> IO ()
133 printStats ini_nl fin_nl = do
134 let ini_cs = Cluster.totalResources ini_nl
135 fin_cs = Cluster.totalResources fin_nl
136 printf "Original: mem=%d disk=%d\n"
137 (Cluster.csFmem ini_cs) (Cluster.csFdsk ini_cs) :: IO ()
138 printf "Final: mem=%d disk=%d\n"
139 (Cluster.csFmem fin_cs) (Cluster.csFdsk fin_cs)
141 -- | Saves the rebalance commands to a text file.
142 saveBalanceCommands :: Options -> String -> IO ()
143 saveBalanceCommands opts cmd_data = do
144 let out_path = fromJust $ optShowCmds opts
147 then printf "Commands to run to reach the above solution:\n%s"
148 (unlines . map (" " ++) .
149 filter (/= " check") .
152 writeFile out_path (shTemplate ++ cmd_data)
153 printf "The commands have been written to file '%s'\n" out_path
155 -- | Polls a set of jobs at a fixed interval until all are finished
156 -- one way or another.
157 waitForJobs :: L.Client -> [String] -> IO (Result [JobStatus])
158 waitForJobs client jids = do
159 sts <- L.queryJobsStatus client jids
161 Bad x -> return $ Bad x
162 Ok s -> if any (<= JOB_STATUS_RUNNING) s
164 -- TODO: replace hardcoded value with a better thing
165 threadDelay (1000000 * 15)
166 waitForJobs client jids
169 -- | Check that a set of job statuses is all success.
170 checkJobsStatus :: [JobStatus] -> Bool
171 checkJobsStatus = all (== JOB_STATUS_SUCCESS)
173 -- | Wrapper over execJobSet checking for early termination.
174 execWrapper :: String -> Node.List
175 -> Instance.List -> IORef Int -> [JobSet] -> IO Bool
176 execWrapper _ _ _ _ [] = return True
177 execWrapper master nl il cref alljss = do
178 cancel <- readIORef cref
181 hPrintf stderr "Exiting early due to user request, %d\
182 \ jobset(s) remaining." (length alljss)::IO ()
184 else execJobSet master nl il cref alljss
186 -- | Execute an entire jobset.
187 execJobSet :: String -> Node.List
188 -> Instance.List -> IORef Int -> [JobSet] -> IO Bool
189 execJobSet _ _ _ _ [] = return True
190 execJobSet master nl il cref (js:jss) = do
191 -- map from jobset (htools list of positions) to [[opcodes]]
192 let jobs = map (\(_, idx, move, _) ->
193 Cluster.iMoveToJob nl il idx move) js
194 let descr = map (\(_, idx, _, _) -> Container.nameOf il idx) js
195 putStrLn $ "Executing jobset for instances " ++ commaJoin descr
196 jrs <- bracket (L.getClient master) L.closeClient
198 jids <- L.submitManyJobs client jobs
200 Bad x -> return $ Bad x
202 putStrLn $ "Got job IDs " ++ commaJoin x
207 hPutStrLn stderr $ "Cannot compute job status, aborting: " ++ show x
209 Ok x -> if checkJobsStatus x
210 then execWrapper master nl il cref jss
212 hPutStrLn stderr $ "Not all jobs completed successfully: " ++
214 hPutStrLn stderr "Aborting."
217 -- | Executes the jobs, if possible and desired.
218 maybeExecJobs :: Options
224 maybeExecJobs opts ord_plc fin_nl il cmd_jobs =
225 if optExecJobs opts && not (null ord_plc)
226 then (case optLuxi opts of
228 hPutStrLn stderr "Execution of commands possible only on LUXI"
230 Just master -> runJobSet master fin_nl il cmd_jobs)
233 -- | Signal handler for graceful termination.
234 hangleSigInt :: IORef Int -> IO ()
235 hangleSigInt cref = do
237 putStrLn ("Cancel request registered, will exit at" ++
238 " the end of the current job set...")
240 -- | Signal handler for immediate termination.
241 hangleSigTerm :: IORef Int -> IO ()
242 hangleSigTerm cref = do
243 -- update the cref to 2, just for consistency
245 putStrLn "Double cancel request, exiting now..."
246 exitImmediately $ ExitFailure 2
248 -- | Runs a job set with handling of signals.
249 runJobSet :: String -> Node.List -> Instance.List -> [JobSet] -> IO Bool
250 runJobSet master fin_nl il cmd_jobs = do
252 mapM_ (\(hnd, sig) -> installHandler sig (Catch (hnd cref)) Nothing)
253 [(hangleSigTerm, softwareTermination), (hangleSigInt, keyboardSignal)]
254 execWrapper master fin_nl il cref cmd_jobs
256 -- | Select the target node group.
257 selectGroup :: Options -> Group.List -> Node.List -> Instance.List
258 -> IO (String, (Node.List, Instance.List))
259 selectGroup opts gl nlf ilf = do
260 let ngroups = Cluster.splitCluster nlf ilf
261 when (length ngroups > 1 && isNothing (optGroup opts)) $ do
262 hPutStrLn stderr "Found multiple node groups:"
263 mapM_ (hPutStrLn stderr . (" " ++) . Group.name .
264 flip Container.find gl . fst) ngroups
265 hPutStrLn stderr "Aborting."
266 exitWith $ ExitFailure 1
268 case optGroup opts of
270 let (gidx, cdata) = head ngroups
271 grp = Container.find gidx gl
272 return (Group.name grp, cdata)
273 Just g -> case Container.findByName gl g of
275 hPutStrLn stderr $ "Node group " ++ g ++
276 " not found. Node group list is:"
277 mapM_ (hPutStrLn stderr . (" " ++) . Group.name ) (Container.elems gl)
278 hPutStrLn stderr "Aborting."
279 exitWith $ ExitFailure 1
281 case lookup (Group.idx grp) ngroups of
283 -- This will only happen if there are no nodes assigned
285 return (Group.name grp, (Container.empty, Container.empty))
286 Just cdata -> return (Group.name grp, cdata)
288 -- | Do a few checks on the cluster data.
289 checkCluster :: Int -> Node.List -> Instance.List -> IO ()
290 checkCluster verbose nl il = do
291 -- nothing to do on an empty cluster
292 when (Container.null il) $ do
293 printf "Cluster is empty, exiting.\n"::IO ()
296 -- hbal doesn't currently handle split clusters
297 let split_insts = Cluster.findSplitInstances nl il
298 unless (null split_insts) $ do
299 hPutStrLn stderr "Found instances belonging to multiple node groups:"
300 mapM_ (\i -> hPutStrLn stderr $ " " ++ Instance.name i) split_insts
301 hPutStrLn stderr "Aborting."
302 exitWith $ ExitFailure 1
304 printf "Loaded %d nodes, %d instances\n"
306 (Container.size il)::IO ()
308 let csf = commonSuffix nl il
309 when (not (null csf) && verbose > 1) $
310 printf "Note: Stripping common suffix of '%s' from names\n" csf
312 -- | Do a few checks on the selected group data.
313 checkGroup :: Int -> String -> Node.List -> Instance.List -> IO ()
314 checkGroup verbose gname nl il = do
315 printf "Group size %d nodes, %d instances\n"
317 (Container.size il)::IO ()
319 putStrLn $ "Selected node group: " ++ gname
321 let (bad_nodes, bad_instances) = Cluster.computeBadItems nl il
322 unless (verbose == 0) $ printf
323 "Initial check done: %d bad nodes, %d bad instances.\n"
324 (length bad_nodes) (length bad_instances)
326 when (length bad_nodes > 0) $
327 putStrLn "Cluster is not N+1 happy, continuing but no guarantee \
328 \that the cluster will end N+1 happy."
330 -- | Check that we actually need to rebalance.
331 checkNeedRebalance :: Options -> Score -> IO ()
332 checkNeedRebalance opts ini_cv = do
333 let min_cv = optMinScore opts
334 when (ini_cv < min_cv) $ do
335 printf "Cluster is already well balanced (initial score %.6g,\n\
336 \minimum score %.6g).\nNothing to do, exiting\n"
337 ini_cv min_cv:: IO ()
341 main :: Options -> [String] -> IO ()
343 unless (null args) $ do
344 hPutStrLn stderr "Error: this program doesn't take any arguments."
345 exitWith $ ExitFailure 1
347 let verbose = optVerbose opts
348 shownodes = optShowNodes opts
349 showinsts = optShowInsts opts
351 ini_cdata@(ClusterData gl fixed_nl ilf ctags ipol) <- loadExternalData opts
353 when (verbose > 1) $ do
354 putStrLn $ "Loaded cluster tags: " ++ intercalate "," ctags
355 putStrLn $ "Loaded cluster ipolicy: " ++ show ipol
357 nlf <- setNodeStatus opts fixed_nl
358 checkCluster verbose nlf ilf
360 maybeSaveData (optSaveCluster opts) "original" "before balancing" ini_cdata
362 (gname, (nl, il)) <- selectGroup opts gl nlf ilf
364 checkGroup verbose gname nl il
366 maybePrintInsts showinsts "Initial" (Cluster.printInsts nl il)
368 maybePrintNodes shownodes "Initial cluster" (Cluster.printNodes nl)
370 let ini_cv = Cluster.compCV nl
371 ini_tbl = Cluster.Table nl il ini_cv []
372 min_cv = optMinScore opts
374 checkNeedRebalance opts ini_cv
377 then printf "Initial coefficients: overall %.8f\n%s"
378 ini_cv (Cluster.printStats " " nl)::IO ()
379 else printf "Initial score: %.8f\n" ini_cv
381 putStrLn "Trying to minimize the CV..."
382 let imlen = maximum . map (length . Instance.alias) $ Container.elems il
383 nmlen = maximum . map (length . Node.alias) $ Container.elems nl
385 (fin_tbl, cmd_strs) <- iterateDepth ini_tbl (optMaxLength opts)
388 nmlen imlen [] min_cv
389 (optMinGainLim opts) (optMinGain opts)
391 let (Cluster.Table fin_nl fin_il fin_cv fin_plc) = fin_tbl
392 ord_plc = reverse fin_plc
394 _ | null fin_plc -> printf "No solution found\n"
396 printf "Final coefficients: overall %.8f\n%s"
397 fin_cv (Cluster.printStats " " fin_nl)
399 printf "Cluster score improved from %.8f to %.8f\n"
400 ini_cv fin_cv ::String
404 unless (verbose == 0) $
405 printf "Solution length=%d\n" (length ord_plc)
407 let cmd_jobs = Cluster.splitJobs cmd_strs
409 when (isJust $ optShowCmds opts) $
410 saveBalanceCommands opts $ Cluster.formatCmds cmd_jobs
412 maybeSaveData (optSaveCluster opts) "balanced" "after balancing"
413 ini_cdata { cdNodes = fin_nl, cdInstances = fin_il }
415 maybePrintInsts showinsts "Final" (Cluster.printInsts fin_nl fin_il)
417 maybePrintNodes shownodes "Final cluster" (Cluster.printNodes fin_nl)
419 when (verbose > 3) $ printStats nl fin_nl
421 eval <- maybeExecJobs opts ord_plc fin_nl il cmd_jobs
422 unless eval (exitWith (ExitFailure 1))