root / src / Ganeti / HTools / Program / Hbal.hs @ a81ca843
History | View | Annotate | Download (15 kB)
1 |
{-| Cluster rebalancer. |
---|---|
2 |
|
3 |
-} |
4 |
|
5 |
{- |
6 |
|
7 |
Copyright (C) 2009, 2010, 2011, 2012, 2013 Google Inc. |
8 |
|
9 |
This program is free software; you can redistribute it and/or modify |
10 |
it under the terms of the GNU General Public License as published by |
11 |
the Free Software Foundation; either version 2 of the License, or |
12 |
(at your option) any later version. |
13 |
|
14 |
This program is distributed in the hope that it will be useful, but |
15 |
WITHOUT ANY WARRANTY; without even the implied warranty of |
16 |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
17 |
General Public License for more details. |
18 |
|
19 |
You should have received a copy of the GNU General Public License |
20 |
along with this program; if not, write to the Free Software |
21 |
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA |
22 |
02110-1301, USA. |
23 |
|
24 |
-} |
25 |
|
26 |
module Ganeti.HTools.Program.Hbal |
27 |
( main |
28 |
, options |
29 |
, arguments |
30 |
, iterateDepth |
31 |
) where |
32 |
|
33 |
import Control.Exception (bracket) |
34 |
import Control.Monad |
35 |
import Data.List |
36 |
import Data.Maybe (isJust, isNothing, fromJust) |
37 |
import Data.IORef |
38 |
import System.Exit |
39 |
import System.IO |
40 |
import System.Posix.Process |
41 |
import System.Posix.Signals |
42 |
|
43 |
import Text.Printf (printf) |
44 |
|
45 |
import qualified Ganeti.HTools.Container as Container |
46 |
import qualified Ganeti.HTools.Cluster as Cluster |
47 |
import qualified Ganeti.HTools.Group as Group |
48 |
import qualified Ganeti.HTools.Node as Node |
49 |
import qualified Ganeti.HTools.Instance as Instance |
50 |
|
51 |
import Ganeti.BasicTypes |
52 |
import Ganeti.Common |
53 |
import Ganeti.HTools.CLI |
54 |
import Ganeti.HTools.ExtLoader |
55 |
import Ganeti.HTools.Types |
56 |
import Ganeti.HTools.Loader |
57 |
import Ganeti.OpCodes (wrapOpCode, setOpComment, setOpPriority, |
58 |
OpCode, MetaOpCode) |
59 |
import Ganeti.Jobs as Jobs |
60 |
import Ganeti.Types |
61 |
import Ganeti.Utils |
62 |
|
63 |
import qualified Ganeti.Luxi as L |
64 |
import Ganeti.Version (version) |
65 |
|
66 |
-- | Options list and functions. |
67 |
options :: IO [OptType] |
68 |
options = do |
69 |
luxi <- oLuxiSocket |
70 |
return |
71 |
[ oPrintNodes |
72 |
, oPrintInsts |
73 |
, oPrintCommands |
74 |
, oDataFile |
75 |
, oEvacMode |
76 |
, oRapiMaster |
77 |
, luxi |
78 |
, oIAllocSrc |
79 |
, oExecJobs |
80 |
, oGroup |
81 |
, oMaxSolLength |
82 |
, oVerbose |
83 |
, oQuiet |
84 |
, oOfflineNode |
85 |
, oMinScore |
86 |
, oMaxCpu |
87 |
, oMinDisk |
88 |
, oMinGain |
89 |
, oMinGainLim |
90 |
, oDiskMoves |
91 |
, oSelInst |
92 |
, oInstMoves |
93 |
, oDynuFile |
94 |
, oExTags |
95 |
, oExInst |
96 |
, oSaveCluster |
97 |
, oPriority |
98 |
] |
99 |
|
100 |
-- | The list of arguments supported by the program. |
101 |
arguments :: [ArgCompletion] |
102 |
arguments = [] |
103 |
|
104 |
-- | A simple type alias for clearer signature. |
105 |
type Annotator = OpCode -> MetaOpCode |
106 |
|
107 |
-- | Wraps an 'OpCode' in a 'MetaOpCode' while also adding a comment |
108 |
-- about what generated the opcode. |
109 |
annotateOpCode :: Annotator |
110 |
annotateOpCode = |
111 |
setOpComment ("rebalancing via hbal " ++ version) . wrapOpCode |
112 |
|
113 |
{- | Start computing the solution at the given depth and recurse until |
114 |
we find a valid solution or we exceed the maximum depth. |
115 |
|
116 |
-} |
117 |
iterateDepth :: Bool -- ^ Whether to print moves |
118 |
-> Cluster.Table -- ^ The starting table |
119 |
-> Int -- ^ Remaining length |
120 |
-> Bool -- ^ Allow disk moves |
121 |
-> Bool -- ^ Allow instance moves |
122 |
-> Int -- ^ Max node name len |
123 |
-> Int -- ^ Max instance name len |
124 |
-> [MoveJob] -- ^ Current command list |
125 |
-> Score -- ^ Score at which to stop |
126 |
-> Score -- ^ Min gain limit |
127 |
-> Score -- ^ Min score gain |
128 |
-> Bool -- ^ Enable evacuation mode |
129 |
-> IO (Cluster.Table, [MoveJob]) -- ^ The resulting table |
130 |
-- and commands |
131 |
iterateDepth printmove ini_tbl max_rounds disk_moves inst_moves nmlen imlen |
132 |
cmd_strs min_score mg_limit min_gain evac_mode = |
133 |
let Cluster.Table ini_nl ini_il _ _ = ini_tbl |
134 |
allowed_next = Cluster.doNextBalance ini_tbl max_rounds min_score |
135 |
m_fin_tbl = if allowed_next |
136 |
then Cluster.tryBalance ini_tbl disk_moves inst_moves |
137 |
evac_mode mg_limit min_gain |
138 |
else Nothing |
139 |
in case m_fin_tbl of |
140 |
Just fin_tbl -> |
141 |
do |
142 |
let (Cluster.Table _ _ _ fin_plc) = fin_tbl |
143 |
cur_plc@(idx, _, _, move, _) <- |
144 |
exitIfEmpty "Empty placement list returned for solution?!" fin_plc |
145 |
let fin_plc_len = length fin_plc |
146 |
(sol_line, cmds) = Cluster.printSolutionLine ini_nl ini_il |
147 |
nmlen imlen cur_plc fin_plc_len |
148 |
afn = Cluster.involvedNodes ini_il cur_plc |
149 |
upd_cmd_strs = (afn, idx, move, cmds):cmd_strs |
150 |
when printmove $ do |
151 |
putStrLn sol_line |
152 |
hFlush stdout |
153 |
iterateDepth printmove fin_tbl max_rounds disk_moves inst_moves |
154 |
nmlen imlen upd_cmd_strs min_score |
155 |
mg_limit min_gain evac_mode |
156 |
Nothing -> return (ini_tbl, cmd_strs) |
157 |
|
158 |
-- | Displays the cluster stats. |
159 |
printStats :: Node.List -> Node.List -> IO () |
160 |
printStats ini_nl fin_nl = do |
161 |
let ini_cs = Cluster.totalResources ini_nl |
162 |
fin_cs = Cluster.totalResources fin_nl |
163 |
printf "Original: mem=%d disk=%d\n" |
164 |
(Cluster.csFmem ini_cs) (Cluster.csFdsk ini_cs) :: IO () |
165 |
printf "Final: mem=%d disk=%d\n" |
166 |
(Cluster.csFmem fin_cs) (Cluster.csFdsk fin_cs) |
167 |
|
168 |
-- | Saves the rebalance commands to a text file. |
169 |
saveBalanceCommands :: Options -> String -> IO () |
170 |
saveBalanceCommands opts cmd_data = do |
171 |
let out_path = fromJust $ optShowCmds opts |
172 |
putStrLn "" |
173 |
if out_path == "-" |
174 |
then printf "Commands to run to reach the above solution:\n%s" |
175 |
(unlines . map (" " ++) . |
176 |
filter (/= " check") . |
177 |
lines $ cmd_data) |
178 |
else do |
179 |
writeFile out_path (shTemplate ++ cmd_data) |
180 |
printf "The commands have been written to file '%s'\n" out_path |
181 |
|
182 |
-- | Wrapper over execJobSet checking for early termination via an IORef. |
183 |
execCancelWrapper :: Annotator -> String -> Node.List |
184 |
-> Instance.List -> IORef Int -> [JobSet] -> IO (Result ()) |
185 |
execCancelWrapper _ _ _ _ _ [] = return $ Ok () |
186 |
execCancelWrapper anno master nl il cref alljss = do |
187 |
cancel <- readIORef cref |
188 |
if cancel > 0 |
189 |
then return . Bad $ "Exiting early due to user request, " ++ |
190 |
show (length alljss) ++ " jobset(s) remaining." |
191 |
else execJobSet anno master nl il cref alljss |
192 |
|
193 |
-- | Execute an entire jobset. |
194 |
execJobSet :: Annotator -> String -> Node.List |
195 |
-> Instance.List -> IORef Int -> [JobSet] -> IO (Result ()) |
196 |
execJobSet _ _ _ _ _ [] = return $ Ok () |
197 |
execJobSet anno master nl il cref (js:jss) = do |
198 |
-- map from jobset (htools list of positions) to [[opcodes]] |
199 |
let jobs = map (\(_, idx, move, _) -> |
200 |
map anno $ Cluster.iMoveToJob nl il idx move) js |
201 |
descr = map (\(_, idx, _, _) -> Container.nameOf il idx) js |
202 |
logfn = putStrLn . ("Got job IDs" ++) . commaJoin . map (show . fromJobId) |
203 |
putStrLn $ "Executing jobset for instances " ++ commaJoin descr |
204 |
jrs <- bracket (L.getClient master) L.closeClient $ |
205 |
Jobs.execJobsWait jobs logfn |
206 |
case jrs of |
207 |
Bad x -> return $ Bad x |
208 |
Ok x -> if null failures |
209 |
then execCancelWrapper anno master nl il cref jss |
210 |
else return . Bad . unlines $ [ |
211 |
"Not all jobs completed successfully: " ++ show failures, |
212 |
"Aborting."] |
213 |
where |
214 |
failures = filter ((/= JOB_STATUS_SUCCESS) . snd) x |
215 |
|
216 |
-- | Executes the jobs, if possible and desired. |
217 |
maybeExecJobs :: Options |
218 |
-> [a] |
219 |
-> Node.List |
220 |
-> Instance.List |
221 |
-> [JobSet] |
222 |
-> IO (Result ()) |
223 |
maybeExecJobs opts ord_plc fin_nl il cmd_jobs = |
224 |
if optExecJobs opts && not (null ord_plc) |
225 |
then (case optLuxi opts of |
226 |
Nothing -> |
227 |
return $ Bad "Execution of commands possible only on LUXI" |
228 |
Just master -> |
229 |
let annotator = maybe id setOpPriority (optPriority opts) . |
230 |
annotateOpCode |
231 |
in execWithCancel annotator master fin_nl il cmd_jobs) |
232 |
else return $ Ok () |
233 |
|
234 |
-- | Signal handler for graceful termination. |
235 |
handleSigInt :: IORef Int -> IO () |
236 |
handleSigInt cref = do |
237 |
writeIORef cref 1 |
238 |
putStrLn ("Cancel request registered, will exit at" ++ |
239 |
" the end of the current job set...") |
240 |
|
241 |
-- | Signal handler for immediate termination. |
242 |
handleSigTerm :: IORef Int -> IO () |
243 |
handleSigTerm cref = do |
244 |
-- update the cref to 2, just for consistency |
245 |
writeIORef cref 2 |
246 |
putStrLn "Double cancel request, exiting now..." |
247 |
exitImmediately $ ExitFailure 2 |
248 |
|
249 |
-- | Prepares to run a set of jobsets with handling of signals and early |
250 |
-- termination. |
251 |
execWithCancel :: Annotator -> String -> Node.List -> Instance.List -> [JobSet] |
252 |
-> IO (Result ()) |
253 |
execWithCancel anno master fin_nl il cmd_jobs = do |
254 |
cref <- newIORef 0 |
255 |
mapM_ (\(hnd, sig) -> installHandler sig (Catch (hnd cref)) Nothing) |
256 |
[(handleSigTerm, softwareTermination), (handleSigInt, keyboardSignal)] |
257 |
execCancelWrapper anno master fin_nl il cref cmd_jobs |
258 |
|
259 |
-- | Select the target node group. |
260 |
selectGroup :: Options -> Group.List -> Node.List -> Instance.List |
261 |
-> IO (String, (Node.List, Instance.List)) |
262 |
selectGroup opts gl nlf ilf = do |
263 |
let ngroups = Cluster.splitCluster nlf ilf |
264 |
when (length ngroups > 1 && isNothing (optGroup opts)) $ do |
265 |
hPutStrLn stderr "Found multiple node groups:" |
266 |
mapM_ (hPutStrLn stderr . (" " ++) . Group.name . |
267 |
flip Container.find gl . fst) ngroups |
268 |
exitErr "Aborting." |
269 |
|
270 |
case optGroup opts of |
271 |
Nothing -> do |
272 |
(gidx, cdata) <- exitIfEmpty "No groups found by splitCluster?!" ngroups |
273 |
let grp = Container.find gidx gl |
274 |
return (Group.name grp, cdata) |
275 |
Just g -> case Container.findByName gl g of |
276 |
Nothing -> do |
277 |
hPutStrLn stderr $ "Node group " ++ g ++ |
278 |
" not found. Node group list is:" |
279 |
mapM_ (hPutStrLn stderr . (" " ++) . Group.name ) (Container.elems gl) |
280 |
exitErr "Aborting." |
281 |
Just grp -> |
282 |
case lookup (Group.idx grp) ngroups of |
283 |
Nothing -> |
284 |
-- This will only happen if there are no nodes assigned |
285 |
-- to this group |
286 |
return (Group.name grp, (Container.empty, Container.empty)) |
287 |
Just cdata -> return (Group.name grp, cdata) |
288 |
|
289 |
-- | Do a few checks on the cluster data. |
290 |
checkCluster :: Int -> Node.List -> Instance.List -> IO () |
291 |
checkCluster verbose nl il = do |
292 |
-- nothing to do on an empty cluster |
293 |
when (Container.null il) $ do |
294 |
printf "Cluster is empty, exiting.\n"::IO () |
295 |
exitSuccess |
296 |
|
297 |
-- hbal doesn't currently handle split clusters |
298 |
let split_insts = Cluster.findSplitInstances nl il |
299 |
unless (null split_insts || verbose <= 1) $ do |
300 |
hPutStrLn stderr "Found instances belonging to multiple node groups:" |
301 |
mapM_ (\i -> hPutStrLn stderr $ " " ++ Instance.name i) split_insts |
302 |
hPutStrLn stderr "These instances will not be moved." |
303 |
|
304 |
printf "Loaded %d nodes, %d instances\n" |
305 |
(Container.size nl) |
306 |
(Container.size il)::IO () |
307 |
|
308 |
let csf = commonSuffix nl il |
309 |
when (not (null csf) && verbose > 1) $ |
310 |
printf "Note: Stripping common suffix of '%s' from names\n" csf |
311 |
|
312 |
-- | Do a few checks on the selected group data. |
313 |
checkGroup :: Int -> String -> Node.List -> Instance.List -> IO () |
314 |
checkGroup verbose gname nl il = do |
315 |
printf "Group size %d nodes, %d instances\n" |
316 |
(Container.size nl) |
317 |
(Container.size il)::IO () |
318 |
|
319 |
putStrLn $ "Selected node group: " ++ gname |
320 |
|
321 |
let (bad_nodes, bad_instances) = Cluster.computeBadItems nl il |
322 |
unless (verbose == 0) $ printf |
323 |
"Initial check done: %d bad nodes, %d bad instances.\n" |
324 |
(length bad_nodes) (length bad_instances) |
325 |
|
326 |
unless (null bad_nodes) $ |
327 |
putStrLn "Cluster is not N+1 happy, continuing but no guarantee \ |
328 |
\that the cluster will end N+1 happy." |
329 |
|
330 |
-- | Check that we actually need to rebalance. |
331 |
checkNeedRebalance :: Options -> Score -> IO () |
332 |
checkNeedRebalance opts ini_cv = do |
333 |
let min_cv = optMinScore opts |
334 |
when (ini_cv < min_cv) $ do |
335 |
printf "Cluster is already well balanced (initial score %.6g,\n\ |
336 |
\minimum score %.6g).\nNothing to do, exiting\n" |
337 |
ini_cv min_cv:: IO () |
338 |
exitSuccess |
339 |
|
340 |
-- | Main function. |
341 |
main :: Options -> [String] -> IO () |
342 |
main opts args = do |
343 |
unless (null args) $ exitErr "This program doesn't take any arguments." |
344 |
|
345 |
let verbose = optVerbose opts |
346 |
shownodes = optShowNodes opts |
347 |
showinsts = optShowInsts opts |
348 |
|
349 |
ini_cdata@(ClusterData gl fixed_nl ilf ctags ipol) <- loadExternalData opts |
350 |
|
351 |
when (verbose > 1) $ do |
352 |
putStrLn $ "Loaded cluster tags: " ++ intercalate "," ctags |
353 |
putStrLn $ "Loaded cluster ipolicy: " ++ show ipol |
354 |
|
355 |
nlf <- setNodeStatus opts fixed_nl |
356 |
checkCluster verbose nlf ilf |
357 |
|
358 |
maybeSaveData (optSaveCluster opts) "original" "before balancing" ini_cdata |
359 |
|
360 |
(gname, (nl, il)) <- selectGroup opts gl nlf ilf |
361 |
|
362 |
checkGroup verbose gname nl il |
363 |
|
364 |
maybePrintInsts showinsts "Initial" (Cluster.printInsts nl il) |
365 |
|
366 |
maybePrintNodes shownodes "Initial cluster" (Cluster.printNodes nl) |
367 |
|
368 |
let ini_cv = Cluster.compCV nl |
369 |
ini_tbl = Cluster.Table nl il ini_cv [] |
370 |
min_cv = optMinScore opts |
371 |
|
372 |
checkNeedRebalance opts ini_cv |
373 |
|
374 |
if verbose > 2 |
375 |
then printf "Initial coefficients: overall %.8f\n%s" |
376 |
ini_cv (Cluster.printStats " " nl)::IO () |
377 |
else printf "Initial score: %.8f\n" ini_cv |
378 |
|
379 |
putStrLn "Trying to minimize the CV..." |
380 |
let imlen = maximum . map (length . Instance.alias) $ Container.elems il |
381 |
nmlen = maximum . map (length . Node.alias) $ Container.elems nl |
382 |
|
383 |
(fin_tbl, cmd_strs) <- iterateDepth True ini_tbl (optMaxLength opts) |
384 |
(optDiskMoves opts) |
385 |
(optInstMoves opts) |
386 |
nmlen imlen [] min_cv |
387 |
(optMinGainLim opts) (optMinGain opts) |
388 |
(optEvacMode opts) |
389 |
let (Cluster.Table fin_nl fin_il fin_cv fin_plc) = fin_tbl |
390 |
ord_plc = reverse fin_plc |
391 |
sol_msg = case () of |
392 |
_ | null fin_plc -> printf "No solution found\n" |
393 |
| verbose > 2 -> |
394 |
printf "Final coefficients: overall %.8f\n%s" |
395 |
fin_cv (Cluster.printStats " " fin_nl) |
396 |
| otherwise -> |
397 |
printf "Cluster score improved from %.8f to %.8f\n" |
398 |
ini_cv fin_cv ::String |
399 |
|
400 |
putStr sol_msg |
401 |
|
402 |
unless (verbose == 0) $ |
403 |
printf "Solution length=%d\n" (length ord_plc) |
404 |
|
405 |
let cmd_jobs = Cluster.splitJobs cmd_strs |
406 |
|
407 |
when (isJust $ optShowCmds opts) . |
408 |
saveBalanceCommands opts $ Cluster.formatCmds cmd_jobs |
409 |
|
410 |
maybeSaveData (optSaveCluster opts) "balanced" "after balancing" |
411 |
ini_cdata { cdNodes = fin_nl, cdInstances = fin_il } |
412 |
|
413 |
maybePrintInsts showinsts "Final" (Cluster.printInsts fin_nl fin_il) |
414 |
|
415 |
maybePrintNodes shownodes "Final cluster" (Cluster.printNodes fin_nl) |
416 |
|
417 |
when (verbose > 3) $ printStats nl fin_nl |
418 |
|
419 |
exitIfBad "hbal" =<< maybeExecJobs opts ord_plc fin_nl il cmd_jobs |