1 {-| Cluster rebalancer.
7 Copyright (C) 2009, 2010, 2011, 2012 Google Inc.
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 2 of the License, or
12 (at your option) any later version.
14 This program is distributed in the hope that it will be useful, but
15 WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
26 module Ganeti.HTools.Program.Hbal
33 import Control.Concurrent (threadDelay)
34 import Control.Exception (bracket)
37 import Data.Maybe (isJust, isNothing, fromJust)
41 import System.Posix.Process
42 import System.Posix.Signals
44 import Text.Printf (printf, hPrintf)
46 import qualified Ganeti.HTools.Container as Container
47 import qualified Ganeti.HTools.Cluster as Cluster
48 import qualified Ganeti.HTools.Group as Group
49 import qualified Ganeti.HTools.Node as Node
50 import qualified Ganeti.HTools.Instance as Instance
53 import Ganeti.HTools.CLI
54 import Ganeti.HTools.ExtLoader
55 import Ganeti.HTools.Types
56 import Ganeti.HTools.Loader
59 import qualified Ganeti.Luxi as L
62 -- | Options list and functions.
93 -- | The list of arguments supported by the program.
94 arguments :: [ArgCompletion]
97 {- | Start computing the solution at the given depth and recurse until
98 we find a valid solution or we exceed the maximum depth.
101 iterateDepth :: Bool -- ^ Whether to print moves
102 -> Cluster.Table -- ^ The starting table
103 -> Int -- ^ Remaining length
104 -> Bool -- ^ Allow disk moves
105 -> Bool -- ^ Allow instance moves
106 -> Int -- ^ Max node name len
107 -> Int -- ^ Max instance name len
108 -> [MoveJob] -- ^ Current command list
109 -> Score -- ^ Score at which to stop
110 -> Score -- ^ Min gain limit
111 -> Score -- ^ Min score gain
112 -> Bool -- ^ Enable evacuation mode
113 -> IO (Cluster.Table, [MoveJob]) -- ^ The resulting table
115 iterateDepth printmove ini_tbl max_rounds disk_moves inst_moves nmlen imlen
116 cmd_strs min_score mg_limit min_gain evac_mode =
117 let Cluster.Table ini_nl ini_il _ _ = ini_tbl
118 allowed_next = Cluster.doNextBalance ini_tbl max_rounds min_score
119 m_fin_tbl = if allowed_next
120 then Cluster.tryBalance ini_tbl disk_moves inst_moves
121 evac_mode mg_limit min_gain
126 let (Cluster.Table _ _ _ fin_plc) = fin_tbl
127 fin_plc_len = length fin_plc
128 cur_plc@(idx, _, _, move, _) = head fin_plc
129 (sol_line, cmds) = Cluster.printSolutionLine ini_nl ini_il
130 nmlen imlen cur_plc fin_plc_len
131 afn = Cluster.involvedNodes ini_il cur_plc
132 upd_cmd_strs = (afn, idx, move, cmds):cmd_strs
136 iterateDepth printmove fin_tbl max_rounds disk_moves inst_moves
137 nmlen imlen upd_cmd_strs min_score
138 mg_limit min_gain evac_mode
139 Nothing -> return (ini_tbl, cmd_strs)
141 -- | Displays the cluster stats.
142 printStats :: Node.List -> Node.List -> IO ()
143 printStats ini_nl fin_nl = do
144 let ini_cs = Cluster.totalResources ini_nl
145 fin_cs = Cluster.totalResources fin_nl
146 printf "Original: mem=%d disk=%d\n"
147 (Cluster.csFmem ini_cs) (Cluster.csFdsk ini_cs) :: IO ()
148 printf "Final: mem=%d disk=%d\n"
149 (Cluster.csFmem fin_cs) (Cluster.csFdsk fin_cs)
151 -- | Saves the rebalance commands to a text file.
152 saveBalanceCommands :: Options -> String -> IO ()
153 saveBalanceCommands opts cmd_data = do
154 let out_path = fromJust $ optShowCmds opts
157 then printf "Commands to run to reach the above solution:\n%s"
158 (unlines . map (" " ++) .
159 filter (/= " check") .
162 writeFile out_path (shTemplate ++ cmd_data)
163 printf "The commands have been written to file '%s'\n" out_path
165 -- | Polls a set of jobs at a fixed interval until all are finished
166 -- one way or another.
167 waitForJobs :: L.Client -> [L.JobId] -> IO (Result [JobStatus])
168 waitForJobs client jids = do
169 sts <- L.queryJobsStatus client jids
171 Bad x -> return $ Bad x
172 Ok s -> if any (<= JOB_STATUS_RUNNING) s
174 -- TODO: replace hardcoded value with a better thing
175 threadDelay (1000000 * 15)
176 waitForJobs client jids
179 -- | Check that a set of job statuses is all success.
180 checkJobsStatus :: [JobStatus] -> Bool
181 checkJobsStatus = all (== JOB_STATUS_SUCCESS)
183 -- | Wrapper over execJobSet checking for early termination.
184 execWrapper :: String -> Node.List
185 -> Instance.List -> IORef Int -> [JobSet] -> IO Bool
186 execWrapper _ _ _ _ [] = return True
187 execWrapper master nl il cref alljss = do
188 cancel <- readIORef cref
191 hPrintf stderr "Exiting early due to user request, %d\
192 \ jobset(s) remaining." (length alljss)::IO ()
194 else execJobSet master nl il cref alljss
196 -- | Execute an entire jobset.
197 execJobSet :: String -> Node.List
198 -> Instance.List -> IORef Int -> [JobSet] -> IO Bool
199 execJobSet _ _ _ _ [] = return True
200 execJobSet master nl il cref (js:jss) = do
201 -- map from jobset (htools list of positions) to [[opcodes]]
202 let jobs = map (\(_, idx, move, _) ->
203 Cluster.iMoveToJob nl il idx move) js
204 let descr = map (\(_, idx, _, _) -> Container.nameOf il idx) js
205 putStrLn $ "Executing jobset for instances " ++ commaJoin descr
206 jrs <- bracket (L.getClient master) L.closeClient
208 jids <- L.submitManyJobs client jobs
210 Bad x -> return $ Bad x
212 putStrLn $ "Got job IDs " ++ commaJoin (map show x)
217 hPutStrLn stderr $ "Cannot compute job status, aborting: " ++ show x
219 Ok x -> if checkJobsStatus x
220 then execWrapper master nl il cref jss
222 hPutStrLn stderr $ "Not all jobs completed successfully: " ++
224 hPutStrLn stderr "Aborting."
227 -- | Executes the jobs, if possible and desired.
228 maybeExecJobs :: Options
234 maybeExecJobs opts ord_plc fin_nl il cmd_jobs =
235 if optExecJobs opts && not (null ord_plc)
236 then (case optLuxi opts of
238 hPutStrLn stderr "Execution of commands possible only on LUXI"
240 Just master -> runJobSet master fin_nl il cmd_jobs)
243 -- | Signal handler for graceful termination.
244 hangleSigInt :: IORef Int -> IO ()
245 hangleSigInt cref = do
247 putStrLn ("Cancel request registered, will exit at" ++
248 " the end of the current job set...")
250 -- | Signal handler for immediate termination.
251 hangleSigTerm :: IORef Int -> IO ()
252 hangleSigTerm cref = do
253 -- update the cref to 2, just for consistency
255 putStrLn "Double cancel request, exiting now..."
256 exitImmediately $ ExitFailure 2
258 -- | Runs a job set with handling of signals.
259 runJobSet :: String -> Node.List -> Instance.List -> [JobSet] -> IO Bool
260 runJobSet master fin_nl il cmd_jobs = do
262 mapM_ (\(hnd, sig) -> installHandler sig (Catch (hnd cref)) Nothing)
263 [(hangleSigTerm, softwareTermination), (hangleSigInt, keyboardSignal)]
264 execWrapper master fin_nl il cref cmd_jobs
266 -- | Select the target node group.
267 selectGroup :: Options -> Group.List -> Node.List -> Instance.List
268 -> IO (String, (Node.List, Instance.List))
269 selectGroup opts gl nlf ilf = do
270 let ngroups = Cluster.splitCluster nlf ilf
271 when (length ngroups > 1 && isNothing (optGroup opts)) $ do
272 hPutStrLn stderr "Found multiple node groups:"
273 mapM_ (hPutStrLn stderr . (" " ++) . Group.name .
274 flip Container.find gl . fst) ngroups
275 hPutStrLn stderr "Aborting."
276 exitWith $ ExitFailure 1
278 case optGroup opts of
280 let (gidx, cdata) = head ngroups
281 grp = Container.find gidx gl
282 return (Group.name grp, cdata)
283 Just g -> case Container.findByName gl g of
285 hPutStrLn stderr $ "Node group " ++ g ++
286 " not found. Node group list is:"
287 mapM_ (hPutStrLn stderr . (" " ++) . Group.name ) (Container.elems gl)
288 hPutStrLn stderr "Aborting."
289 exitWith $ ExitFailure 1
291 case lookup (Group.idx grp) ngroups of
293 -- This will only happen if there are no nodes assigned
295 return (Group.name grp, (Container.empty, Container.empty))
296 Just cdata -> return (Group.name grp, cdata)
298 -- | Do a few checks on the cluster data.
299 checkCluster :: Int -> Node.List -> Instance.List -> IO ()
300 checkCluster verbose nl il = do
301 -- nothing to do on an empty cluster
302 when (Container.null il) $ do
303 printf "Cluster is empty, exiting.\n"::IO ()
306 -- hbal doesn't currently handle split clusters
307 let split_insts = Cluster.findSplitInstances nl il
308 unless (null split_insts || verbose <= 1) $ do
309 hPutStrLn stderr "Found instances belonging to multiple node groups:"
310 mapM_ (\i -> hPutStrLn stderr $ " " ++ Instance.name i) split_insts
311 hPutStrLn stderr "These instances will not be moved."
313 printf "Loaded %d nodes, %d instances\n"
315 (Container.size il)::IO ()
317 let csf = commonSuffix nl il
318 when (not (null csf) && verbose > 1) $
319 printf "Note: Stripping common suffix of '%s' from names\n" csf
321 -- | Do a few checks on the selected group data.
322 checkGroup :: Int -> String -> Node.List -> Instance.List -> IO ()
323 checkGroup verbose gname nl il = do
324 printf "Group size %d nodes, %d instances\n"
326 (Container.size il)::IO ()
328 putStrLn $ "Selected node group: " ++ gname
330 let (bad_nodes, bad_instances) = Cluster.computeBadItems nl il
331 unless (verbose == 0) $ printf
332 "Initial check done: %d bad nodes, %d bad instances.\n"
333 (length bad_nodes) (length bad_instances)
335 unless (null bad_nodes) $
336 putStrLn "Cluster is not N+1 happy, continuing but no guarantee \
337 \that the cluster will end N+1 happy."
339 -- | Check that we actually need to rebalance.
340 checkNeedRebalance :: Options -> Score -> IO ()
341 checkNeedRebalance opts ini_cv = do
342 let min_cv = optMinScore opts
343 when (ini_cv < min_cv) $ do
344 printf "Cluster is already well balanced (initial score %.6g,\n\
345 \minimum score %.6g).\nNothing to do, exiting\n"
346 ini_cv min_cv:: IO ()
350 main :: Options -> [String] -> IO ()
352 unless (null args) $ do
353 hPutStrLn stderr "Error: this program doesn't take any arguments."
354 exitWith $ ExitFailure 1
356 let verbose = optVerbose opts
357 shownodes = optShowNodes opts
358 showinsts = optShowInsts opts
360 ini_cdata@(ClusterData gl fixed_nl ilf ctags ipol) <- loadExternalData opts
362 when (verbose > 1) $ do
363 putStrLn $ "Loaded cluster tags: " ++ intercalate "," ctags
364 putStrLn $ "Loaded cluster ipolicy: " ++ show ipol
366 nlf <- setNodeStatus opts fixed_nl
367 checkCluster verbose nlf ilf
369 maybeSaveData (optSaveCluster opts) "original" "before balancing" ini_cdata
371 (gname, (nl, il)) <- selectGroup opts gl nlf ilf
373 checkGroup verbose gname nl il
375 maybePrintInsts showinsts "Initial" (Cluster.printInsts nl il)
377 maybePrintNodes shownodes "Initial cluster" (Cluster.printNodes nl)
379 let ini_cv = Cluster.compCV nl
380 ini_tbl = Cluster.Table nl il ini_cv []
381 min_cv = optMinScore opts
383 checkNeedRebalance opts ini_cv
386 then printf "Initial coefficients: overall %.8f\n%s"
387 ini_cv (Cluster.printStats " " nl)::IO ()
388 else printf "Initial score: %.8f\n" ini_cv
390 putStrLn "Trying to minimize the CV..."
391 let imlen = maximum . map (length . Instance.alias) $ Container.elems il
392 nmlen = maximum . map (length . Node.alias) $ Container.elems nl
394 (fin_tbl, cmd_strs) <- iterateDepth True ini_tbl (optMaxLength opts)
397 nmlen imlen [] min_cv
398 (optMinGainLim opts) (optMinGain opts)
400 let (Cluster.Table fin_nl fin_il fin_cv fin_plc) = fin_tbl
401 ord_plc = reverse fin_plc
403 _ | null fin_plc -> printf "No solution found\n"
405 printf "Final coefficients: overall %.8f\n%s"
406 fin_cv (Cluster.printStats " " fin_nl)
408 printf "Cluster score improved from %.8f to %.8f\n"
409 ini_cv fin_cv ::String
413 unless (verbose == 0) $
414 printf "Solution length=%d\n" (length ord_plc)
416 let cmd_jobs = Cluster.splitJobs cmd_strs
418 when (isJust $ optShowCmds opts) .
419 saveBalanceCommands opts $ Cluster.formatCmds cmd_jobs
421 maybeSaveData (optSaveCluster opts) "balanced" "after balancing"
422 ini_cdata { cdNodes = fin_nl, cdInstances = fin_il }
424 maybePrintInsts showinsts "Final" (Cluster.printInsts fin_nl fin_il)
426 maybePrintNodes shownodes "Final cluster" (Cluster.printNodes fin_nl)
428 when (verbose > 3) $ printStats nl fin_nl
430 eval <- maybeExecJobs opts ord_plc fin_nl il cmd_jobs
431 unless eval (exitWith (ExitFailure 1))