1 {-| Cluster rebalancer.
7 Copyright (C) 2009, 2010, 2011, 2012 Google Inc.
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 2 of the License, or
12 (at your option) any later version.
14 This program is distributed in the hope that it will be useful, but
15 WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
26 module Ganeti.HTools.Program.Hbal
33 import Control.Concurrent (threadDelay)
34 import Control.Exception (bracket)
37 import Data.Maybe (isJust, isNothing, fromJust)
41 import System.Posix.Process
42 import System.Posix.Signals
44 import Text.Printf (printf, hPrintf)
46 import qualified Ganeti.HTools.Container as Container
47 import qualified Ganeti.HTools.Cluster as Cluster
48 import qualified Ganeti.HTools.Group as Group
49 import qualified Ganeti.HTools.Node as Node
50 import qualified Ganeti.HTools.Instance as Instance
52 import Ganeti.BasicTypes
55 import Ganeti.HTools.CLI
56 import Ganeti.HTools.ExtLoader
57 import Ganeti.HTools.Types
58 import Ganeti.HTools.Loader
61 import qualified Ganeti.Luxi as L
64 -- | Options list and functions.
65 options :: IO [OptType]
97 -- | The list of arguments supported by the program.
98 arguments :: [ArgCompletion]
101 {- | Start computing the solution at the given depth and recurse until
102 we find a valid solution or we exceed the maximum depth.
105 iterateDepth :: Bool -- ^ Whether to print moves
106 -> Cluster.Table -- ^ The starting table
107 -> Int -- ^ Remaining length
108 -> Bool -- ^ Allow disk moves
109 -> Bool -- ^ Allow instance moves
110 -> Int -- ^ Max node name len
111 -> Int -- ^ Max instance name len
112 -> [MoveJob] -- ^ Current command list
113 -> Score -- ^ Score at which to stop
114 -> Score -- ^ Min gain limit
115 -> Score -- ^ Min score gain
116 -> Bool -- ^ Enable evacuation mode
117 -> IO (Cluster.Table, [MoveJob]) -- ^ The resulting table
119 iterateDepth printmove ini_tbl max_rounds disk_moves inst_moves nmlen imlen
120 cmd_strs min_score mg_limit min_gain evac_mode =
121 let Cluster.Table ini_nl ini_il _ _ = ini_tbl
122 allowed_next = Cluster.doNextBalance ini_tbl max_rounds min_score
123 m_fin_tbl = if allowed_next
124 then Cluster.tryBalance ini_tbl disk_moves inst_moves
125 evac_mode mg_limit min_gain
130 let (Cluster.Table _ _ _ fin_plc) = fin_tbl
131 fin_plc_len = length fin_plc
132 cur_plc@(idx, _, _, move, _) = head fin_plc
133 (sol_line, cmds) = Cluster.printSolutionLine ini_nl ini_il
134 nmlen imlen cur_plc fin_plc_len
135 afn = Cluster.involvedNodes ini_il cur_plc
136 upd_cmd_strs = (afn, idx, move, cmds):cmd_strs
140 iterateDepth printmove fin_tbl max_rounds disk_moves inst_moves
141 nmlen imlen upd_cmd_strs min_score
142 mg_limit min_gain evac_mode
143 Nothing -> return (ini_tbl, cmd_strs)
145 -- | Displays the cluster stats.
146 printStats :: Node.List -> Node.List -> IO ()
147 printStats ini_nl fin_nl = do
148 let ini_cs = Cluster.totalResources ini_nl
149 fin_cs = Cluster.totalResources fin_nl
150 printf "Original: mem=%d disk=%d\n"
151 (Cluster.csFmem ini_cs) (Cluster.csFdsk ini_cs) :: IO ()
152 printf "Final: mem=%d disk=%d\n"
153 (Cluster.csFmem fin_cs) (Cluster.csFdsk fin_cs)
155 -- | Saves the rebalance commands to a text file.
156 saveBalanceCommands :: Options -> String -> IO ()
157 saveBalanceCommands opts cmd_data = do
158 let out_path = fromJust $ optShowCmds opts
161 then printf "Commands to run to reach the above solution:\n%s"
162 (unlines . map (" " ++) .
163 filter (/= " check") .
166 writeFile out_path (shTemplate ++ cmd_data)
167 printf "The commands have been written to file '%s'\n" out_path
169 -- | Polls a set of jobs at a fixed interval until all are finished
170 -- one way or another.
171 waitForJobs :: L.Client -> [L.JobId] -> IO (Result [JobStatus])
172 waitForJobs client jids = do
173 sts <- L.queryJobsStatus client jids
175 Bad e -> return . Bad $ "Checking job status: " ++ formatError e
176 Ok s -> if any (<= JOB_STATUS_RUNNING) s
178 -- TODO: replace hardcoded value with a better thing
179 threadDelay (1000000 * 15)
180 waitForJobs client jids
183 -- | Check that a set of job statuses is all success.
184 checkJobsStatus :: [JobStatus] -> Bool
185 checkJobsStatus = all (== JOB_STATUS_SUCCESS)
187 -- | Wrapper over execJobSet checking for early termination.
188 execWrapper :: String -> Node.List
189 -> Instance.List -> IORef Int -> [JobSet] -> IO Bool
190 execWrapper _ _ _ _ [] = return True
191 execWrapper master nl il cref alljss = do
192 cancel <- readIORef cref
195 hPrintf stderr "Exiting early due to user request, %d\
196 \ jobset(s) remaining." (length alljss)::IO ()
198 else execJobSet master nl il cref alljss
200 -- | Execute an entire jobset.
201 execJobSet :: String -> Node.List
202 -> Instance.List -> IORef Int -> [JobSet] -> IO Bool
203 execJobSet _ _ _ _ [] = return True
204 execJobSet master nl il cref (js:jss) = do
205 -- map from jobset (htools list of positions) to [[opcodes]]
206 let jobs = map (\(_, idx, move, _) ->
207 Cluster.iMoveToJob nl il idx move) js
208 let descr = map (\(_, idx, _, _) -> Container.nameOf il idx) js
209 putStrLn $ "Executing jobset for instances " ++ commaJoin descr
210 jrs <- bracket (L.getClient master) L.closeClient
212 jids <- L.submitManyJobs client jobs
214 Bad e -> return . Bad $ "Job submission error: " ++ formatError e
216 putStrLn $ "Got job IDs " ++ commaJoin (map show x)
223 Ok x -> if checkJobsStatus x
224 then execWrapper master nl il cref jss
226 hPutStrLn stderr $ "Not all jobs completed successfully: " ++
228 hPutStrLn stderr "Aborting."
231 -- | Executes the jobs, if possible and desired.
232 maybeExecJobs :: Options
238 maybeExecJobs opts ord_plc fin_nl il cmd_jobs =
239 if optExecJobs opts && not (null ord_plc)
240 then (case optLuxi opts of
242 hPutStrLn stderr "Execution of commands possible only on LUXI"
244 Just master -> runJobSet master fin_nl il cmd_jobs)
247 -- | Signal handler for graceful termination.
248 hangleSigInt :: IORef Int -> IO ()
249 hangleSigInt cref = do
251 putStrLn ("Cancel request registered, will exit at" ++
252 " the end of the current job set...")
254 -- | Signal handler for immediate termination.
255 hangleSigTerm :: IORef Int -> IO ()
256 hangleSigTerm cref = do
257 -- update the cref to 2, just for consistency
259 putStrLn "Double cancel request, exiting now..."
260 exitImmediately $ ExitFailure 2
262 -- | Runs a job set with handling of signals.
263 runJobSet :: String -> Node.List -> Instance.List -> [JobSet] -> IO Bool
264 runJobSet master fin_nl il cmd_jobs = do
266 mapM_ (\(hnd, sig) -> installHandler sig (Catch (hnd cref)) Nothing)
267 [(hangleSigTerm, softwareTermination), (hangleSigInt, keyboardSignal)]
268 execWrapper master fin_nl il cref cmd_jobs
270 -- | Select the target node group.
271 selectGroup :: Options -> Group.List -> Node.List -> Instance.List
272 -> IO (String, (Node.List, Instance.List))
273 selectGroup opts gl nlf ilf = do
274 let ngroups = Cluster.splitCluster nlf ilf
275 when (length ngroups > 1 && isNothing (optGroup opts)) $ do
276 hPutStrLn stderr "Found multiple node groups:"
277 mapM_ (hPutStrLn stderr . (" " ++) . Group.name .
278 flip Container.find gl . fst) ngroups
281 case optGroup opts of
283 let (gidx, cdata) = head ngroups
284 grp = Container.find gidx gl
285 return (Group.name grp, cdata)
286 Just g -> case Container.findByName gl g of
288 hPutStrLn stderr $ "Node group " ++ g ++
289 " not found. Node group list is:"
290 mapM_ (hPutStrLn stderr . (" " ++) . Group.name ) (Container.elems gl)
293 case lookup (Group.idx grp) ngroups of
295 -- This will only happen if there are no nodes assigned
297 return (Group.name grp, (Container.empty, Container.empty))
298 Just cdata -> return (Group.name grp, cdata)
300 -- | Do a few checks on the cluster data.
301 checkCluster :: Int -> Node.List -> Instance.List -> IO ()
302 checkCluster verbose nl il = do
303 -- nothing to do on an empty cluster
304 when (Container.null il) $ do
305 printf "Cluster is empty, exiting.\n"::IO ()
308 -- hbal doesn't currently handle split clusters
309 let split_insts = Cluster.findSplitInstances nl il
310 unless (null split_insts || verbose <= 1) $ do
311 hPutStrLn stderr "Found instances belonging to multiple node groups:"
312 mapM_ (\i -> hPutStrLn stderr $ " " ++ Instance.name i) split_insts
313 hPutStrLn stderr "These instances will not be moved."
315 printf "Loaded %d nodes, %d instances\n"
317 (Container.size il)::IO ()
319 let csf = commonSuffix nl il
320 when (not (null csf) && verbose > 1) $
321 printf "Note: Stripping common suffix of '%s' from names\n" csf
323 -- | Do a few checks on the selected group data.
324 checkGroup :: Int -> String -> Node.List -> Instance.List -> IO ()
325 checkGroup verbose gname nl il = do
326 printf "Group size %d nodes, %d instances\n"
328 (Container.size il)::IO ()
330 putStrLn $ "Selected node group: " ++ gname
332 let (bad_nodes, bad_instances) = Cluster.computeBadItems nl il
333 unless (verbose == 0) $ printf
334 "Initial check done: %d bad nodes, %d bad instances.\n"
335 (length bad_nodes) (length bad_instances)
337 unless (null bad_nodes) $
338 putStrLn "Cluster is not N+1 happy, continuing but no guarantee \
339 \that the cluster will end N+1 happy."
341 -- | Check that we actually need to rebalance.
342 checkNeedRebalance :: Options -> Score -> IO ()
343 checkNeedRebalance opts ini_cv = do
344 let min_cv = optMinScore opts
345 when (ini_cv < min_cv) $ do
346 printf "Cluster is already well balanced (initial score %.6g,\n\
347 \minimum score %.6g).\nNothing to do, exiting\n"
348 ini_cv min_cv:: IO ()
352 main :: Options -> [String] -> IO ()
354 unless (null args) $ exitErr "This program doesn't take any arguments."
356 let verbose = optVerbose opts
357 shownodes = optShowNodes opts
358 showinsts = optShowInsts opts
360 ini_cdata@(ClusterData gl fixed_nl ilf ctags ipol) <- loadExternalData opts
362 when (verbose > 1) $ do
363 putStrLn $ "Loaded cluster tags: " ++ intercalate "," ctags
364 putStrLn $ "Loaded cluster ipolicy: " ++ show ipol
366 nlf <- setNodeStatus opts fixed_nl
367 checkCluster verbose nlf ilf
369 maybeSaveData (optSaveCluster opts) "original" "before balancing" ini_cdata
371 (gname, (nl, il)) <- selectGroup opts gl nlf ilf
373 checkGroup verbose gname nl il
375 maybePrintInsts showinsts "Initial" (Cluster.printInsts nl il)
377 maybePrintNodes shownodes "Initial cluster" (Cluster.printNodes nl)
379 let ini_cv = Cluster.compCV nl
380 ini_tbl = Cluster.Table nl il ini_cv []
381 min_cv = optMinScore opts
383 checkNeedRebalance opts ini_cv
386 then printf "Initial coefficients: overall %.8f\n%s"
387 ini_cv (Cluster.printStats " " nl)::IO ()
388 else printf "Initial score: %.8f\n" ini_cv
390 putStrLn "Trying to minimize the CV..."
391 let imlen = maximum . map (length . Instance.alias) $ Container.elems il
392 nmlen = maximum . map (length . Node.alias) $ Container.elems nl
394 (fin_tbl, cmd_strs) <- iterateDepth True ini_tbl (optMaxLength opts)
397 nmlen imlen [] min_cv
398 (optMinGainLim opts) (optMinGain opts)
400 let (Cluster.Table fin_nl fin_il fin_cv fin_plc) = fin_tbl
401 ord_plc = reverse fin_plc
403 _ | null fin_plc -> printf "No solution found\n"
405 printf "Final coefficients: overall %.8f\n%s"
406 fin_cv (Cluster.printStats " " fin_nl)
408 printf "Cluster score improved from %.8f to %.8f\n"
409 ini_cv fin_cv ::String
413 unless (verbose == 0) $
414 printf "Solution length=%d\n" (length ord_plc)
416 let cmd_jobs = Cluster.splitJobs cmd_strs
418 when (isJust $ optShowCmds opts) .
419 saveBalanceCommands opts $ Cluster.formatCmds cmd_jobs
421 maybeSaveData (optSaveCluster opts) "balanced" "after balancing"
422 ini_cdata { cdNodes = fin_nl, cdInstances = fin_il }
424 maybePrintInsts showinsts "Final" (Cluster.printInsts fin_nl fin_il)
426 maybePrintNodes shownodes "Final cluster" (Cluster.printNodes fin_nl)
428 when (verbose > 3) $ printStats nl fin_nl
430 eval <- maybeExecJobs opts ord_plc fin_nl il cmd_jobs
431 unless eval (exitWith (ExitFailure 1))