1 {-| Cluster rebalancer.
7 Copyright (C) 2009, 2010, 2011, 2012, 2013 Google Inc.
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 2 of the License, or
12 (at your option) any later version.
14 This program is distributed in the hope that it will be useful, but
15 WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
26 module Ganeti.HTools.Program.Hbal
33 import Control.Exception (bracket)
36 import Data.Maybe (isJust, isNothing, fromJust)
40 import System.Posix.Process
41 import System.Posix.Signals
43 import Text.Printf (printf)
45 import qualified Ganeti.HTools.Container as Container
46 import qualified Ganeti.HTools.Cluster as Cluster
47 import qualified Ganeti.HTools.Group as Group
48 import qualified Ganeti.HTools.Node as Node
49 import qualified Ganeti.HTools.Instance as Instance
51 import Ganeti.BasicTypes
53 import Ganeti.HTools.CLI
54 import Ganeti.HTools.ExtLoader
55 import Ganeti.HTools.Types
56 import Ganeti.HTools.Loader
57 import Ganeti.OpCodes (wrapOpCode, setOpComment, setOpPriority,
59 import Ganeti.Jobs as Jobs
63 import qualified Ganeti.Luxi as L
64 import Ganeti.Version (version)
66 -- | Options list and functions.
67 options :: IO [OptType]
103 -- | The list of arguments supported by the program.
104 arguments :: [ArgCompletion]
107 -- | A simple type alias for clearer signature.
108 type Annotator = OpCode -> MetaOpCode
110 -- | Wraps an 'OpCode' in a 'MetaOpCode' while also adding a comment
111 -- about what generated the opcode.
112 annotateOpCode :: Annotator
114 setOpComment ("rebalancing via hbal " ++ version) . wrapOpCode
116 {- | Start computing the solution at the given depth and recurse until
117 we find a valid solution or we exceed the maximum depth.
120 iterateDepth :: Bool -- ^ Whether to print moves
121 -> Cluster.Table -- ^ The starting table
122 -> Int -- ^ Remaining length
123 -> Bool -- ^ Allow disk moves
124 -> Bool -- ^ Allow instance moves
125 -> Int -- ^ Max node name len
126 -> Int -- ^ Max instance name len
127 -> [MoveJob] -- ^ Current command list
128 -> Score -- ^ Score at which to stop
129 -> Score -- ^ Min gain limit
130 -> Score -- ^ Min score gain
131 -> Bool -- ^ Enable evacuation mode
132 -> IO (Cluster.Table, [MoveJob]) -- ^ The resulting table
134 iterateDepth printmove ini_tbl max_rounds disk_moves inst_moves nmlen imlen
135 cmd_strs min_score mg_limit min_gain evac_mode =
136 let Cluster.Table ini_nl ini_il _ _ = ini_tbl
137 allowed_next = Cluster.doNextBalance ini_tbl max_rounds min_score
138 m_fin_tbl = if allowed_next
139 then Cluster.tryBalance ini_tbl disk_moves inst_moves
140 evac_mode mg_limit min_gain
145 let (Cluster.Table _ _ _ fin_plc) = fin_tbl
146 cur_plc@(idx, _, _, move, _) <-
147 exitIfEmpty "Empty placement list returned for solution?!" fin_plc
148 let fin_plc_len = length fin_plc
149 (sol_line, cmds) = Cluster.printSolutionLine ini_nl ini_il
150 nmlen imlen cur_plc fin_plc_len
151 afn = Cluster.involvedNodes ini_il cur_plc
152 upd_cmd_strs = (afn, idx, move, cmds):cmd_strs
156 iterateDepth printmove fin_tbl max_rounds disk_moves inst_moves
157 nmlen imlen upd_cmd_strs min_score
158 mg_limit min_gain evac_mode
159 Nothing -> return (ini_tbl, cmd_strs)
161 -- | Displays the cluster stats.
162 printStats :: Node.List -> Node.List -> IO ()
163 printStats ini_nl fin_nl = do
164 let ini_cs = Cluster.totalResources ini_nl
165 fin_cs = Cluster.totalResources fin_nl
166 printf "Original: mem=%d disk=%d\n"
167 (Cluster.csFmem ini_cs) (Cluster.csFdsk ini_cs) :: IO ()
168 printf "Final: mem=%d disk=%d\n"
169 (Cluster.csFmem fin_cs) (Cluster.csFdsk fin_cs)
171 -- | Saves the rebalance commands to a text file.
172 saveBalanceCommands :: Options -> String -> IO ()
173 saveBalanceCommands opts cmd_data = do
174 let out_path = fromJust $ optShowCmds opts
177 then printf "Commands to run to reach the above solution:\n%s"
178 (unlines . map (" " ++) .
179 filter (/= " check") .
182 writeFile out_path (shTemplate ++ cmd_data)
183 printf "The commands have been written to file '%s'\n" out_path
185 -- | Wrapper over execJobSet checking for early termination via an IORef.
186 execCancelWrapper :: Annotator -> String -> Node.List
187 -> Instance.List -> IORef Int -> [JobSet] -> IO (Result ())
188 execCancelWrapper _ _ _ _ _ [] = return $ Ok ()
189 execCancelWrapper anno master nl il cref alljss = do
190 cancel <- readIORef cref
193 putStrLn $ "Exiting early due to user request, " ++
194 show (length alljss) ++ " jobset(s) remaining."
196 else execJobSet anno master nl il cref alljss
198 -- | Execute an entire jobset.
199 execJobSet :: Annotator -> String -> Node.List
200 -> Instance.List -> IORef Int -> [JobSet] -> IO (Result ())
201 execJobSet _ _ _ _ _ [] = return $ Ok ()
202 execJobSet anno master nl il cref (js:jss) = do
203 -- map from jobset (htools list of positions) to [[opcodes]]
204 let jobs = map (\(_, idx, move, _) ->
205 map anno $ Cluster.iMoveToJob nl il idx move) js
206 descr = map (\(_, idx, _, _) -> Container.nameOf il idx) js
207 logfn = putStrLn . ("Got job IDs" ++) . commaJoin . map (show . fromJobId)
208 putStrLn $ "Executing jobset for instances " ++ commaJoin descr
209 jrs <- bracket (L.getClient master) L.closeClient $
210 Jobs.execJobsWait jobs logfn
212 Bad x -> return $ Bad x
213 Ok x -> if null failures
214 then execCancelWrapper anno master nl il cref jss
215 else return . Bad . unlines $ [
216 "Not all jobs completed successfully: " ++ show failures,
219 failures = filter ((/= JOB_STATUS_SUCCESS) . snd) x
221 -- | Executes the jobs, if possible and desired.
222 maybeExecJobs :: Options
228 maybeExecJobs opts ord_plc fin_nl il cmd_jobs =
229 if optExecJobs opts && not (null ord_plc)
230 then (case optLuxi opts of
232 return $ Bad "Execution of commands possible only on LUXI"
234 let annotator = maybe id setOpPriority (optPriority opts) .
236 in execWithCancel annotator master fin_nl il cmd_jobs)
239 -- | Signal handler for graceful termination.
240 handleSigInt :: IORef Int -> IO ()
241 handleSigInt cref = do
243 putStrLn ("Cancel request registered, will exit at" ++
244 " the end of the current job set...")
246 -- | Signal handler for immediate termination.
247 handleSigTerm :: IORef Int -> IO ()
248 handleSigTerm cref = do
249 -- update the cref to 2, just for consistency
251 putStrLn "Double cancel request, exiting now..."
252 exitImmediately $ ExitFailure 2
254 -- | Prepares to run a set of jobsets with handling of signals and early
256 execWithCancel :: Annotator -> String -> Node.List -> Instance.List -> [JobSet]
258 execWithCancel anno master fin_nl il cmd_jobs = do
260 mapM_ (\(hnd, sig) -> installHandler sig (Catch (hnd cref)) Nothing)
261 [(handleSigTerm, softwareTermination), (handleSigInt, keyboardSignal)]
262 execCancelWrapper anno master fin_nl il cref cmd_jobs
264 -- | Select the target node group.
265 selectGroup :: Options -> Group.List -> Node.List -> Instance.List
266 -> IO (String, (Node.List, Instance.List))
267 selectGroup opts gl nlf ilf = do
268 let ngroups = Cluster.splitCluster nlf ilf
269 when (length ngroups > 1 && isNothing (optGroup opts)) $ do
270 hPutStrLn stderr "Found multiple node groups:"
271 mapM_ (hPutStrLn stderr . (" " ++) . Group.name .
272 flip Container.find gl . fst) ngroups
275 case optGroup opts of
277 (gidx, cdata) <- exitIfEmpty "No groups found by splitCluster?!" ngroups
278 let grp = Container.find gidx gl
279 return (Group.name grp, cdata)
280 Just g -> case Container.findByName gl g of
282 hPutStrLn stderr $ "Node group " ++ g ++
283 " not found. Node group list is:"
284 mapM_ (hPutStrLn stderr . (" " ++) . Group.name ) (Container.elems gl)
287 case lookup (Group.idx grp) ngroups of
289 -- This will only happen if there are no nodes assigned
291 return (Group.name grp, (Container.empty, Container.empty))
292 Just cdata -> return (Group.name grp, cdata)
294 -- | Do a few checks on the cluster data.
295 checkCluster :: Int -> Node.List -> Instance.List -> IO ()
296 checkCluster verbose nl il = do
297 -- nothing to do on an empty cluster
298 when (Container.null il) $ do
299 printf "Cluster is empty, exiting.\n"::IO ()
302 -- hbal doesn't currently handle split clusters
303 let split_insts = Cluster.findSplitInstances nl il
304 unless (null split_insts || verbose <= 1) $ do
305 hPutStrLn stderr "Found instances belonging to multiple node groups:"
306 mapM_ (\i -> hPutStrLn stderr $ " " ++ Instance.name i) split_insts
307 hPutStrLn stderr "These instances will not be moved."
309 printf "Loaded %d nodes, %d instances\n"
311 (Container.size il)::IO ()
313 let csf = commonSuffix nl il
314 when (not (null csf) && verbose > 1) $
315 printf "Note: Stripping common suffix of '%s' from names\n" csf
317 -- | Do a few checks on the selected group data.
318 checkGroup :: Int -> String -> Node.List -> Instance.List -> IO ()
319 checkGroup verbose gname nl il = do
320 printf "Group size %d nodes, %d instances\n"
322 (Container.size il)::IO ()
324 putStrLn $ "Selected node group: " ++ gname
326 let (bad_nodes, bad_instances) = Cluster.computeBadItems nl il
327 unless (verbose < 1) $ printf
328 "Initial check done: %d bad nodes, %d bad instances.\n"
329 (length bad_nodes) (length bad_instances)
331 unless (null bad_nodes) $
332 putStrLn "Cluster is not N+1 happy, continuing but no guarantee \
333 \that the cluster will end N+1 happy."
335 -- | Check that we actually need to rebalance.
336 checkNeedRebalance :: Options -> Score -> IO ()
337 checkNeedRebalance opts ini_cv = do
338 let min_cv = optMinScore opts
339 when (ini_cv < min_cv) $ do
340 printf "Cluster is already well balanced (initial score %.6g,\n\
341 \minimum score %.6g).\nNothing to do, exiting\n"
342 ini_cv min_cv:: IO ()
346 main :: Options -> [String] -> IO ()
348 unless (null args) $ exitErr "This program doesn't take any arguments."
350 let verbose = optVerbose opts
351 shownodes = optShowNodes opts
352 showinsts = optShowInsts opts
354 ini_cdata@(ClusterData gl fixed_nl ilf ctags ipol) <- loadExternalData opts
356 when (verbose > 1) $ do
357 putStrLn $ "Loaded cluster tags: " ++ intercalate "," ctags
358 putStrLn $ "Loaded cluster ipolicy: " ++ show ipol
360 nlf <- setNodeStatus opts fixed_nl
361 checkCluster verbose nlf ilf
363 maybeSaveData (optSaveCluster opts) "original" "before balancing" ini_cdata
365 (gname, (nl, il)) <- selectGroup opts gl nlf ilf
367 checkGroup verbose gname nl il
369 maybePrintInsts showinsts "Initial" (Cluster.printInsts nl il)
371 maybePrintNodes shownodes "Initial cluster" (Cluster.printNodes nl)
373 let ini_cv = Cluster.compCV nl
374 ini_tbl = Cluster.Table nl il ini_cv []
375 min_cv = optMinScore opts
377 checkNeedRebalance opts ini_cv
380 then printf "Initial coefficients: overall %.8f\n%s"
381 ini_cv (Cluster.printStats " " nl)::IO ()
382 else printf "Initial score: %.8f\n" ini_cv
384 putStrLn "Trying to minimize the CV..."
385 let imlen = maximum . map (length . Instance.alias) $ Container.elems il
386 nmlen = maximum . map (length . Node.alias) $ Container.elems nl
388 (fin_tbl, cmd_strs) <- iterateDepth True ini_tbl (optMaxLength opts)
391 nmlen imlen [] min_cv
392 (optMinGainLim opts) (optMinGain opts)
394 let (Cluster.Table fin_nl fin_il fin_cv fin_plc) = fin_tbl
395 ord_plc = reverse fin_plc
397 _ | null fin_plc -> printf "No solution found\n"
399 printf "Final coefficients: overall %.8f\n%s"
400 fin_cv (Cluster.printStats " " fin_nl)
402 printf "Cluster score improved from %.8f to %.8f\n"
403 ini_cv fin_cv ::String
407 unless (verbose < 1) $
408 printf "Solution length=%d\n" (length ord_plc)
410 let cmd_jobs = Cluster.splitJobs cmd_strs
412 when (isJust $ optShowCmds opts) .
413 saveBalanceCommands opts $ Cluster.formatCmds cmd_jobs
415 maybeSaveData (optSaveCluster opts) "balanced" "after balancing"
416 ini_cdata { cdNodes = fin_nl, cdInstances = fin_il }
418 maybePrintInsts showinsts "Final" (Cluster.printInsts fin_nl fin_il)
420 maybePrintNodes shownodes "Final cluster" (Cluster.printNodes fin_nl)
422 when (verbose > 3) $ printStats nl fin_nl
424 exitIfBad "hbal" =<< maybeExecJobs opts ord_plc fin_nl il cmd_jobs