1 {-| Cluster rebalancer.
7 Copyright (C) 2009, 2010, 2011, 2012, 2013 Google Inc.
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 2 of the License, or
12 (at your option) any later version.
14 This program is distributed in the hope that it will be useful, but
15 WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
26 module Ganeti.HTools.Program.Hbal
33 import Control.Exception (bracket)
36 import Data.Maybe (isJust, isNothing, fromJust)
40 import System.Posix.Process
41 import System.Posix.Signals
43 import Text.Printf (printf)
45 import qualified Ganeti.HTools.Container as Container
46 import qualified Ganeti.HTools.Cluster as Cluster
47 import qualified Ganeti.HTools.Group as Group
48 import qualified Ganeti.HTools.Node as Node
49 import qualified Ganeti.HTools.Instance as Instance
51 import Ganeti.BasicTypes
53 import Ganeti.HTools.CLI
54 import Ganeti.HTools.ExtLoader
55 import Ganeti.HTools.Types
56 import Ganeti.HTools.Loader
57 import Ganeti.OpCodes (wrapOpCode, setOpComment, setOpPriority,
59 import Ganeti.Jobs as Jobs
63 import qualified Ganeti.Luxi as L
64 import Ganeti.Version (version)
66 -- | Options list and functions.
67 options :: IO [OptType]
103 -- | The list of arguments supported by the program.
104 arguments :: [ArgCompletion]
107 -- | A simple type alias for clearer signature.
108 type Annotator = OpCode -> MetaOpCode
110 -- | Wraps an 'OpCode' in a 'MetaOpCode' while also adding a comment
111 -- about what generated the opcode.
112 annotateOpCode :: Annotator
114 setOpComment ("rebalancing via hbal " ++ version) . wrapOpCode
116 {- | Start computing the solution at the given depth and recurse until
117 we find a valid solution or we exceed the maximum depth.
120 iterateDepth :: Bool -- ^ Whether to print moves
121 -> Cluster.Table -- ^ The starting table
122 -> Int -- ^ Remaining length
123 -> Bool -- ^ Allow disk moves
124 -> Bool -- ^ Allow instance moves
125 -> Int -- ^ Max node name len
126 -> Int -- ^ Max instance name len
127 -> [MoveJob] -- ^ Current command list
128 -> Score -- ^ Score at which to stop
129 -> Score -- ^ Min gain limit
130 -> Score -- ^ Min score gain
131 -> Bool -- ^ Enable evacuation mode
132 -> IO (Cluster.Table, [MoveJob]) -- ^ The resulting table
134 iterateDepth printmove ini_tbl max_rounds disk_moves inst_moves nmlen imlen
135 cmd_strs min_score mg_limit min_gain evac_mode =
136 let Cluster.Table ini_nl ini_il _ _ = ini_tbl
137 allowed_next = Cluster.doNextBalance ini_tbl max_rounds min_score
138 m_fin_tbl = if allowed_next
139 then Cluster.tryBalance ini_tbl disk_moves inst_moves
140 evac_mode mg_limit min_gain
145 let (Cluster.Table _ _ _ fin_plc) = fin_tbl
146 cur_plc@(idx, _, _, move, _) <-
147 exitIfEmpty "Empty placement list returned for solution?!" fin_plc
148 let fin_plc_len = length fin_plc
149 (sol_line, cmds) = Cluster.printSolutionLine ini_nl ini_il
150 nmlen imlen cur_plc fin_plc_len
151 afn = Cluster.involvedNodes ini_il cur_plc
152 upd_cmd_strs = (afn, idx, move, cmds):cmd_strs
156 iterateDepth printmove fin_tbl max_rounds disk_moves inst_moves
157 nmlen imlen upd_cmd_strs min_score
158 mg_limit min_gain evac_mode
159 Nothing -> return (ini_tbl, cmd_strs)
161 -- | Displays the cluster stats.
162 printStats :: Node.List -> Node.List -> IO ()
163 printStats ini_nl fin_nl = do
164 let ini_cs = Cluster.totalResources ini_nl
165 fin_cs = Cluster.totalResources fin_nl
166 printf "Original: mem=%d disk=%d\n"
167 (Cluster.csFmem ini_cs) (Cluster.csFdsk ini_cs) :: IO ()
168 printf "Final: mem=%d disk=%d\n"
169 (Cluster.csFmem fin_cs) (Cluster.csFdsk fin_cs)
171 -- | Saves the rebalance commands to a text file.
172 saveBalanceCommands :: Options -> String -> IO ()
173 saveBalanceCommands opts cmd_data = do
174 let out_path = fromJust $ optShowCmds opts
177 then printf "Commands to run to reach the above solution:\n%s"
178 (unlines . map (" " ++) .
179 filter (/= " check") .
182 writeFile out_path (shTemplate ++ cmd_data)
183 printf "The commands have been written to file '%s'\n" out_path
185 -- | Wrapper over execJobSet checking for early termination via an IORef.
186 execCancelWrapper :: Annotator -> String -> Node.List
187 -> Instance.List -> IORef Int -> [JobSet] -> IO (Result ())
188 execCancelWrapper _ _ _ _ _ [] = return $ Ok ()
189 execCancelWrapper anno master nl il cref alljss = do
190 cancel <- readIORef cref
193 putStrLn $ "Exiting early due to user request, " ++
194 show (length alljss) ++ " jobset(s) remaining."
196 else execJobSet anno master nl il cref alljss
198 -- | Execute an entire jobset.
199 execJobSet :: Annotator -> String -> Node.List
200 -> Instance.List -> IORef Int -> [JobSet] -> IO (Result ())
201 execJobSet _ _ _ _ _ [] = return $ Ok ()
202 execJobSet anno master nl il cref (js:jss) = do
203 -- map from jobset (htools list of positions) to [[opcodes]]
204 let jobs = map (\(_, idx, move, _) ->
205 map anno $ Cluster.iMoveToJob nl il idx move) js
206 descr = map (\(_, idx, _, _) -> Container.nameOf il idx) js
208 putStrLn . ("Got job IDs " ++) . commaJoin . map (show . fromJobId)
209 putStrLn $ "Executing jobset for instances " ++ commaJoin descr
210 jrs <- bracket (L.getClient master) L.closeClient $
211 Jobs.execJobsWait jobs logfn
213 Bad x -> return $ Bad x
214 Ok x -> if null failures
215 then execCancelWrapper anno master nl il cref jss
216 else return . Bad . unlines $ [
217 "Not all jobs completed successfully: " ++ show failures,
220 failures = filter ((/= JOB_STATUS_SUCCESS) . snd) x
222 -- | Executes the jobs, if possible and desired.
223 maybeExecJobs :: Options
229 maybeExecJobs opts ord_plc fin_nl il cmd_jobs =
230 if optExecJobs opts && not (null ord_plc)
231 then (case optLuxi opts of
233 return $ Bad "Execution of commands possible only on LUXI"
235 let annotator = maybe id setOpPriority (optPriority opts) .
237 in execWithCancel annotator master fin_nl il cmd_jobs)
240 -- | Signal handler for graceful termination.
241 handleSigInt :: IORef Int -> IO ()
242 handleSigInt cref = do
244 putStrLn ("Cancel request registered, will exit at" ++
245 " the end of the current job set...")
247 -- | Signal handler for immediate termination.
248 handleSigTerm :: IORef Int -> IO ()
249 handleSigTerm cref = do
250 -- update the cref to 2, just for consistency
252 putStrLn "Double cancel request, exiting now..."
253 exitImmediately $ ExitFailure 2
255 -- | Prepares to run a set of jobsets with handling of signals and early
257 execWithCancel :: Annotator -> String -> Node.List -> Instance.List -> [JobSet]
259 execWithCancel anno master fin_nl il cmd_jobs = do
261 mapM_ (\(hnd, sig) -> installHandler sig (Catch (hnd cref)) Nothing)
262 [(handleSigTerm, softwareTermination), (handleSigInt, keyboardSignal)]
263 execCancelWrapper anno master fin_nl il cref cmd_jobs
265 -- | Select the target node group.
266 selectGroup :: Options -> Group.List -> Node.List -> Instance.List
267 -> IO (String, (Node.List, Instance.List))
268 selectGroup opts gl nlf ilf = do
269 let ngroups = Cluster.splitCluster nlf ilf
270 when (length ngroups > 1 && isNothing (optGroup opts)) $ do
271 hPutStrLn stderr "Found multiple node groups:"
272 mapM_ (hPutStrLn stderr . (" " ++) . Group.name .
273 flip Container.find gl . fst) ngroups
276 case optGroup opts of
278 (gidx, cdata) <- exitIfEmpty "No groups found by splitCluster?!" ngroups
279 let grp = Container.find gidx gl
280 return (Group.name grp, cdata)
281 Just g -> case Container.findByName gl g of
283 hPutStrLn stderr $ "Node group " ++ g ++
284 " not found. Node group list is:"
285 mapM_ (hPutStrLn stderr . (" " ++) . Group.name ) (Container.elems gl)
288 case lookup (Group.idx grp) ngroups of
290 -- This will only happen if there are no nodes assigned
292 return (Group.name grp, (Container.empty, Container.empty))
293 Just cdata -> return (Group.name grp, cdata)
295 -- | Do a few checks on the cluster data.
296 checkCluster :: Int -> Node.List -> Instance.List -> IO ()
297 checkCluster verbose nl il = do
298 -- nothing to do on an empty cluster
299 when (Container.null il) $ do
300 printf "Cluster is empty, exiting.\n"::IO ()
303 -- hbal doesn't currently handle split clusters
304 let split_insts = Cluster.findSplitInstances nl il
305 unless (null split_insts || verbose <= 1) $ do
306 hPutStrLn stderr "Found instances belonging to multiple node groups:"
307 mapM_ (\i -> hPutStrLn stderr $ " " ++ Instance.name i) split_insts
308 hPutStrLn stderr "These instances will not be moved."
310 printf "Loaded %d nodes, %d instances\n"
312 (Container.size il)::IO ()
314 let csf = commonSuffix nl il
315 when (not (null csf) && verbose > 1) $
316 printf "Note: Stripping common suffix of '%s' from names\n" csf
318 -- | Do a few checks on the selected group data.
319 checkGroup :: Int -> String -> Node.List -> Instance.List -> IO ()
320 checkGroup verbose gname nl il = do
321 printf "Group size %d nodes, %d instances\n"
323 (Container.size il)::IO ()
325 putStrLn $ "Selected node group: " ++ gname
327 let (bad_nodes, bad_instances) = Cluster.computeBadItems nl il
328 unless (verbose < 1) $ printf
329 "Initial check done: %d bad nodes, %d bad instances.\n"
330 (length bad_nodes) (length bad_instances)
332 unless (null bad_nodes) $
333 putStrLn "Cluster is not N+1 happy, continuing but no guarantee \
334 \that the cluster will end N+1 happy."
336 -- | Check that we actually need to rebalance.
337 checkNeedRebalance :: Options -> Score -> IO ()
338 checkNeedRebalance opts ini_cv = do
339 let min_cv = optMinScore opts
340 when (ini_cv < min_cv) $ do
341 printf "Cluster is already well balanced (initial score %.6g,\n\
342 \minimum score %.6g).\nNothing to do, exiting\n"
343 ini_cv min_cv:: IO ()
347 main :: Options -> [String] -> IO ()
349 unless (null args) $ exitErr "This program doesn't take any arguments."
351 let verbose = optVerbose opts
352 shownodes = optShowNodes opts
353 showinsts = optShowInsts opts
355 ini_cdata@(ClusterData gl fixed_nl ilf ctags ipol) <- loadExternalData opts
357 when (verbose > 1) $ do
358 putStrLn $ "Loaded cluster tags: " ++ intercalate "," ctags
359 putStrLn $ "Loaded cluster ipolicy: " ++ show ipol
361 nlf <- setNodeStatus opts fixed_nl
362 checkCluster verbose nlf ilf
364 maybeSaveData (optSaveCluster opts) "original" "before balancing" ini_cdata
366 (gname, (nl, il)) <- selectGroup opts gl nlf ilf
368 checkGroup verbose gname nl il
370 maybePrintInsts showinsts "Initial" (Cluster.printInsts nl il)
372 maybePrintNodes shownodes "Initial cluster" (Cluster.printNodes nl)
374 let ini_cv = Cluster.compCV nl
375 ini_tbl = Cluster.Table nl il ini_cv []
376 min_cv = optMinScore opts
378 checkNeedRebalance opts ini_cv
381 then printf "Initial coefficients: overall %.8f\n%s"
382 ini_cv (Cluster.printStats " " nl)::IO ()
383 else printf "Initial score: %.8f\n" ini_cv
385 putStrLn "Trying to minimize the CV..."
386 let imlen = maximum . map (length . Instance.alias) $ Container.elems il
387 nmlen = maximum . map (length . Node.alias) $ Container.elems nl
389 (fin_tbl, cmd_strs) <- iterateDepth True ini_tbl (optMaxLength opts)
392 nmlen imlen [] min_cv
393 (optMinGainLim opts) (optMinGain opts)
395 let (Cluster.Table fin_nl fin_il fin_cv fin_plc) = fin_tbl
396 ord_plc = reverse fin_plc
398 _ | null fin_plc -> printf "No solution found\n"
400 printf "Final coefficients: overall %.8f\n%s"
401 fin_cv (Cluster.printStats " " fin_nl)
403 printf "Cluster score improved from %.8f to %.8f\n"
404 ini_cv fin_cv ::String
408 unless (verbose < 1) $
409 printf "Solution length=%d\n" (length ord_plc)
411 let cmd_jobs = Cluster.splitJobs cmd_strs
413 when (isJust $ optShowCmds opts) .
414 saveBalanceCommands opts $ Cluster.formatCmds cmd_jobs
416 maybeSaveData (optSaveCluster opts) "balanced" "after balancing"
417 ini_cdata { cdNodes = fin_nl, cdInstances = fin_il }
419 maybePrintInsts showinsts "Final" (Cluster.printInsts fin_nl fin_il)
421 maybePrintNodes shownodes "Final cluster" (Cluster.printNodes fin_nl)
423 when (verbose > 3) $ printStats nl fin_nl
425 exitIfBad "hbal" =<< maybeExecJobs opts ord_plc fin_nl il cmd_jobs