root / src / Ganeti / HTools / Program / Hbal.hs @ d605e261
History | View | Annotate | Download (14.5 kB)
1 | 525bfb36 | Iustin Pop | {-| Cluster rebalancer. |
---|---|---|---|
2 | e4f08c46 | Iustin Pop | |
3 | e4f08c46 | Iustin Pop | -} |
4 | e4f08c46 | Iustin Pop | |
5 | e2fa2baf | Iustin Pop | {- |
6 | e2fa2baf | Iustin Pop | |
7 | 72747d91 | Iustin Pop | Copyright (C) 2009, 2010, 2011, 2012, 2013 Google Inc. |
8 | e2fa2baf | Iustin Pop | |
9 | e2fa2baf | Iustin Pop | This program is free software; you can redistribute it and/or modify |
10 | e2fa2baf | Iustin Pop | it under the terms of the GNU General Public License as published by |
11 | e2fa2baf | Iustin Pop | the Free Software Foundation; either version 2 of the License, or |
12 | e2fa2baf | Iustin Pop | (at your option) any later version. |
13 | e2fa2baf | Iustin Pop | |
14 | e2fa2baf | Iustin Pop | This program is distributed in the hope that it will be useful, but |
15 | e2fa2baf | Iustin Pop | WITHOUT ANY WARRANTY; without even the implied warranty of |
16 | e2fa2baf | Iustin Pop | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
17 | e2fa2baf | Iustin Pop | General Public License for more details. |
18 | e2fa2baf | Iustin Pop | |
19 | e2fa2baf | Iustin Pop | You should have received a copy of the GNU General Public License |
20 | e2fa2baf | Iustin Pop | along with this program; if not, write to the Free Software |
21 | e2fa2baf | Iustin Pop | Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA |
22 | e2fa2baf | Iustin Pop | 02110-1301, USA. |
23 | e2fa2baf | Iustin Pop | |
24 | e2fa2baf | Iustin Pop | -} |
25 | e2fa2baf | Iustin Pop | |
26 | e6685c53 | Agata Murawska | module Ganeti.HTools.Program.Hbal |
27 | 22278fa7 | Iustin Pop | ( main |
28 | 22278fa7 | Iustin Pop | , options |
29 | 22278fa7 | Iustin Pop | , arguments |
30 | 22278fa7 | Iustin Pop | , iterateDepth |
31 | 22278fa7 | Iustin Pop | ) where |
32 | e4f08c46 | Iustin Pop | |
33 | b2245847 | Iustin Pop | import Control.Exception (bracket) |
34 | cc532bdd | Iustin Pop | import Control.Monad |
35 | e4f08c46 | Iustin Pop | import Data.List |
36 | 7c3a6391 | Klaus Aehlig | import Data.Maybe (isNothing) |
37 | 03cb89f0 | Iustin Pop | import Data.IORef |
38 | 7345b69b | Iustin Pop | import System.Exit |
39 | e4f08c46 | Iustin Pop | import System.IO |
40 | 03cb89f0 | Iustin Pop | import System.Posix.Process |
41 | 03cb89f0 | Iustin Pop | import System.Posix.Signals |
42 | e4f08c46 | Iustin Pop | |
43 | 42afc235 | Dato Simó | import Text.Printf (printf) |
44 | e4f08c46 | Iustin Pop | |
45 | 669d7e3d | Iustin Pop | import qualified Ganeti.HTools.Container as Container |
46 | 669d7e3d | Iustin Pop | import qualified Ganeti.HTools.Cluster as Cluster |
47 | 10ef6b4e | Iustin Pop | import qualified Ganeti.HTools.Group as Group |
48 | ec18dca9 | Iustin Pop | import qualified Ganeti.HTools.Node as Node |
49 | b2245847 | Iustin Pop | import qualified Ganeti.HTools.Instance as Instance |
50 | 040afc35 | Iustin Pop | |
51 | 01e52493 | Iustin Pop | import Ganeti.BasicTypes |
52 | 22278fa7 | Iustin Pop | import Ganeti.Common |
53 | 0427285d | Iustin Pop | import Ganeti.HTools.CLI |
54 | e8f89bb6 | Iustin Pop | import Ganeti.HTools.ExtLoader |
55 | 0e8ae201 | Iustin Pop | import Ganeti.HTools.Types |
56 | 4938fa30 | Guido Trotter | import Ganeti.HTools.Loader |
57 | a81ca843 | Iustin Pop | import Ganeti.OpCodes (wrapOpCode, setOpComment, setOpPriority, |
58 | a81ca843 | Iustin Pop | OpCode, MetaOpCode) |
59 | 369671f4 | Dato Simó | import Ganeti.Jobs as Jobs |
60 | 3bdbe4b3 | Dato Simó | import Ganeti.Types |
61 | 26d62e4c | Iustin Pop | import Ganeti.Utils |
62 | e4f08c46 | Iustin Pop | |
63 | b2245847 | Iustin Pop | import qualified Ganeti.Luxi as L |
64 | 7e723913 | Iustin Pop | import Ganeti.Version (version) |
65 | b2245847 | Iustin Pop | |
66 | 179c0828 | Iustin Pop | -- | Options list and functions. |
67 | d66aa238 | Iustin Pop | options :: IO [OptType] |
68 | 29a30533 | Iustin Pop | options = do |
69 | 29a30533 | Iustin Pop | luxi <- oLuxiSocket |
70 | d66aa238 | Iustin Pop | return |
71 | d66aa238 | Iustin Pop | [ oPrintNodes |
72 | d66aa238 | Iustin Pop | , oPrintInsts |
73 | d66aa238 | Iustin Pop | , oPrintCommands |
74 | d66aa238 | Iustin Pop | , oDataFile |
75 | d66aa238 | Iustin Pop | , oEvacMode |
76 | d66aa238 | Iustin Pop | , oRapiMaster |
77 | 29a30533 | Iustin Pop | , luxi |
78 | d66aa238 | Iustin Pop | , oIAllocSrc |
79 | d66aa238 | Iustin Pop | , oExecJobs |
80 | d66aa238 | Iustin Pop | , oGroup |
81 | d66aa238 | Iustin Pop | , oMaxSolLength |
82 | d66aa238 | Iustin Pop | , oVerbose |
83 | d66aa238 | Iustin Pop | , oQuiet |
84 | d66aa238 | Iustin Pop | , oOfflineNode |
85 | d66aa238 | Iustin Pop | , oMinScore |
86 | d66aa238 | Iustin Pop | , oMaxCpu |
87 | d66aa238 | Iustin Pop | , oMinDisk |
88 | d66aa238 | Iustin Pop | , oMinGain |
89 | d66aa238 | Iustin Pop | , oMinGainLim |
90 | d66aa238 | Iustin Pop | , oDiskMoves |
91 | d66aa238 | Iustin Pop | , oSelInst |
92 | d66aa238 | Iustin Pop | , oInstMoves |
93 | d66aa238 | Iustin Pop | , oDynuFile |
94 | 14b4e045 | Klaus Aehlig | , oIgnoreDyn |
95 | c32c4e4d | Spyros Trigazis | , oMonD |
96 | c62bec27 | Spyros Trigazis | , oMonDDataFile |
97 | d66aa238 | Iustin Pop | , oExTags |
98 | d66aa238 | Iustin Pop | , oExInst |
99 | d66aa238 | Iustin Pop | , oSaveCluster |
100 | a81ca843 | Iustin Pop | , oPriority |
101 | d66aa238 | Iustin Pop | ] |
102 | e4f08c46 | Iustin Pop | |
103 | 22278fa7 | Iustin Pop | -- | The list of arguments supported by the program. |
104 | 22278fa7 | Iustin Pop | arguments :: [ArgCompletion] |
105 | 22278fa7 | Iustin Pop | arguments = [] |
106 | 22278fa7 | Iustin Pop | |
107 | d41f6558 | Iustin Pop | -- | A simple type alias for clearer signature. |
108 | d41f6558 | Iustin Pop | type Annotator = OpCode -> MetaOpCode |
109 | d41f6558 | Iustin Pop | |
110 | 7e723913 | Iustin Pop | -- | Wraps an 'OpCode' in a 'MetaOpCode' while also adding a comment |
111 | 7e723913 | Iustin Pop | -- about what generated the opcode. |
112 | d41f6558 | Iustin Pop | annotateOpCode :: Annotator |
113 | 7e723913 | Iustin Pop | annotateOpCode = |
114 | 7e723913 | Iustin Pop | setOpComment ("rebalancing via hbal " ++ version) . wrapOpCode |
115 | 7e723913 | Iustin Pop | |
116 | 6dc960bc | Iustin Pop | {- | Start computing the solution at the given depth and recurse until |
117 | 6dc960bc | Iustin Pop | we find a valid solution or we exceed the maximum depth. |
118 | 6dc960bc | Iustin Pop | |
119 | 6dc960bc | Iustin Pop | -} |
120 | e6685c53 | Agata Murawska | iterateDepth :: Bool -- ^ Whether to print moves |
121 | e6685c53 | Agata Murawska | -> Cluster.Table -- ^ The starting table |
122 | 6dc960bc | Iustin Pop | -> Int -- ^ Remaining length |
123 | c0501c69 | Iustin Pop | -> Bool -- ^ Allow disk moves |
124 | e7f7c003 | Guido Trotter | -> Bool -- ^ Allow instance moves |
125 | 6dc960bc | Iustin Pop | -> Int -- ^ Max node name len |
126 | 6dc960bc | Iustin Pop | -> Int -- ^ Max instance name len |
127 | 0e8ae201 | Iustin Pop | -> [MoveJob] -- ^ Current command list |
128 | 92e32d76 | Iustin Pop | -> Score -- ^ Score at which to stop |
129 | 848b65c9 | Iustin Pop | -> Score -- ^ Min gain limit |
130 | 848b65c9 | Iustin Pop | -> Score -- ^ Min score gain |
131 | 2e28ac32 | Iustin Pop | -> Bool -- ^ Enable evacuation mode |
132 | 0e8ae201 | Iustin Pop | -> IO (Cluster.Table, [MoveJob]) -- ^ The resulting table |
133 | 0e8ae201 | Iustin Pop | -- and commands |
134 | e6685c53 | Agata Murawska | iterateDepth printmove ini_tbl max_rounds disk_moves inst_moves nmlen imlen |
135 | e19ee6e4 | Iustin Pop | cmd_strs min_score mg_limit min_gain evac_mode = |
136 | 2ba17362 | Iustin Pop | let Cluster.Table ini_nl ini_il _ _ = ini_tbl |
137 | 2ba17362 | Iustin Pop | allowed_next = Cluster.doNextBalance ini_tbl max_rounds min_score |
138 | 2ba17362 | Iustin Pop | m_fin_tbl = if allowed_next |
139 | e08424a8 | Guido Trotter | then Cluster.tryBalance ini_tbl disk_moves inst_moves |
140 | e08424a8 | Guido Trotter | evac_mode mg_limit min_gain |
141 | 5ad86777 | Iustin Pop | else Nothing |
142 | 2ba17362 | Iustin Pop | in case m_fin_tbl of |
143 | 2ba17362 | Iustin Pop | Just fin_tbl -> |
144 | 2ba17362 | Iustin Pop | do |
145 | 2ba17362 | Iustin Pop | let (Cluster.Table _ _ _ fin_plc) = fin_tbl |
146 | 72747d91 | Iustin Pop | cur_plc@(idx, _, _, move, _) <- |
147 | 72747d91 | Iustin Pop | exitIfEmpty "Empty placement list returned for solution?!" fin_plc |
148 | 72747d91 | Iustin Pop | let fin_plc_len = length fin_plc |
149 | 2ba17362 | Iustin Pop | (sol_line, cmds) = Cluster.printSolutionLine ini_nl ini_il |
150 | 2ba17362 | Iustin Pop | nmlen imlen cur_plc fin_plc_len |
151 | 2ba17362 | Iustin Pop | afn = Cluster.involvedNodes ini_il cur_plc |
152 | 2ba17362 | Iustin Pop | upd_cmd_strs = (afn, idx, move, cmds):cmd_strs |
153 | e6685c53 | Agata Murawska | when printmove $ do |
154 | e6685c53 | Agata Murawska | putStrLn sol_line |
155 | e6685c53 | Agata Murawska | hFlush stdout |
156 | e6685c53 | Agata Murawska | iterateDepth printmove fin_tbl max_rounds disk_moves inst_moves |
157 | 2ba17362 | Iustin Pop | nmlen imlen upd_cmd_strs min_score |
158 | 2ba17362 | Iustin Pop | mg_limit min_gain evac_mode |
159 | 2ba17362 | Iustin Pop | Nothing -> return (ini_tbl, cmd_strs) |
160 | 6dc960bc | Iustin Pop | |
161 | 5dad2589 | Iustin Pop | -- | Displays the cluster stats. |
162 | 5dad2589 | Iustin Pop | printStats :: Node.List -> Node.List -> IO () |
163 | 5dad2589 | Iustin Pop | printStats ini_nl fin_nl = do |
164 | 5dad2589 | Iustin Pop | let ini_cs = Cluster.totalResources ini_nl |
165 | 5dad2589 | Iustin Pop | fin_cs = Cluster.totalResources fin_nl |
166 | 5dad2589 | Iustin Pop | printf "Original: mem=%d disk=%d\n" |
167 | 5dad2589 | Iustin Pop | (Cluster.csFmem ini_cs) (Cluster.csFdsk ini_cs) :: IO () |
168 | 5dad2589 | Iustin Pop | printf "Final: mem=%d disk=%d\n" |
169 | 5dad2589 | Iustin Pop | (Cluster.csFmem fin_cs) (Cluster.csFdsk fin_cs) |
170 | 5dad2589 | Iustin Pop | |
171 | 3b23f238 | Dato Simó | -- | Wrapper over execJobSet checking for early termination via an IORef. |
172 | d41f6558 | Iustin Pop | execCancelWrapper :: Annotator -> String -> Node.List |
173 | 42afc235 | Dato Simó | -> Instance.List -> IORef Int -> [JobSet] -> IO (Result ()) |
174 | d41f6558 | Iustin Pop | execCancelWrapper _ _ _ _ _ [] = return $ Ok () |
175 | d41f6558 | Iustin Pop | execCancelWrapper anno master nl il cref alljss = do |
176 | 23448f82 | Iustin Pop | cancel <- readIORef cref |
177 | 3603605a | Iustin Pop | if cancel > 0 |
178 | 2d6bdcc5 | Iustin Pop | then do |
179 | 2d6bdcc5 | Iustin Pop | putStrLn $ "Exiting early due to user request, " ++ |
180 | 2d6bdcc5 | Iustin Pop | show (length alljss) ++ " jobset(s) remaining." |
181 | 2d6bdcc5 | Iustin Pop | return $ Ok () |
182 | d41f6558 | Iustin Pop | else execJobSet anno master nl il cref alljss |
183 | 23448f82 | Iustin Pop | |
184 | 179c0828 | Iustin Pop | -- | Execute an entire jobset. |
185 | d41f6558 | Iustin Pop | execJobSet :: Annotator -> String -> Node.List |
186 | 42afc235 | Dato Simó | -> Instance.List -> IORef Int -> [JobSet] -> IO (Result ()) |
187 | d41f6558 | Iustin Pop | execJobSet _ _ _ _ _ [] = return $ Ok () |
188 | d41f6558 | Iustin Pop | execJobSet anno master nl il cref (js:jss) = do |
189 | b2245847 | Iustin Pop | -- map from jobset (htools list of positions) to [[opcodes]] |
190 | b2245847 | Iustin Pop | let jobs = map (\(_, idx, move, _) -> |
191 | d41f6558 | Iustin Pop | map anno $ Cluster.iMoveToJob nl il idx move) js |
192 | 369671f4 | Dato Simó | descr = map (\(_, idx, _, _) -> Container.nameOf il idx) js |
193 | 369671f4 | Dato Simó | logfn = putStrLn . ("Got job IDs" ++) . commaJoin . map (show . fromJobId) |
194 | b2245847 | Iustin Pop | putStrLn $ "Executing jobset for instances " ++ commaJoin descr |
195 | d605e261 | Petr Pudlak | jrs <- bracket (L.getLuxiClient master) L.closeClient $ |
196 | 164947cc | Dato Simó | Jobs.execJobsWait jobs logfn |
197 | 3603605a | Iustin Pop | case jrs of |
198 | 42afc235 | Dato Simó | Bad x -> return $ Bad x |
199 | 369671f4 | Dato Simó | Ok x -> if null failures |
200 | d41f6558 | Iustin Pop | then execCancelWrapper anno master nl il cref jss |
201 | 42afc235 | Dato Simó | else return . Bad . unlines $ [ |
202 | 369671f4 | Dato Simó | "Not all jobs completed successfully: " ++ show failures, |
203 | 42afc235 | Dato Simó | "Aborting."] |
204 | 369671f4 | Dato Simó | where |
205 | 369671f4 | Dato Simó | failures = filter ((/= JOB_STATUS_SUCCESS) . snd) x |
206 | b2245847 | Iustin Pop | |
207 | 5dad2589 | Iustin Pop | -- | Executes the jobs, if possible and desired. |
208 | 5dad2589 | Iustin Pop | maybeExecJobs :: Options |
209 | 5dad2589 | Iustin Pop | -> [a] |
210 | 5dad2589 | Iustin Pop | -> Node.List |
211 | 5dad2589 | Iustin Pop | -> Instance.List |
212 | 5dad2589 | Iustin Pop | -> [JobSet] |
213 | 42afc235 | Dato Simó | -> IO (Result ()) |
214 | 5dad2589 | Iustin Pop | maybeExecJobs opts ord_plc fin_nl il cmd_jobs = |
215 | 2ba17362 | Iustin Pop | if optExecJobs opts && not (null ord_plc) |
216 | 5dad2589 | Iustin Pop | then (case optLuxi opts of |
217 | d41f6558 | Iustin Pop | Nothing -> |
218 | d41f6558 | Iustin Pop | return $ Bad "Execution of commands possible only on LUXI" |
219 | d41f6558 | Iustin Pop | Just master -> |
220 | a81ca843 | Iustin Pop | let annotator = maybe id setOpPriority (optPriority opts) . |
221 | a81ca843 | Iustin Pop | annotateOpCode |
222 | a81ca843 | Iustin Pop | in execWithCancel annotator master fin_nl il cmd_jobs) |
223 | 42afc235 | Dato Simó | else return $ Ok () |
224 | 5dad2589 | Iustin Pop | |
225 | 179c0828 | Iustin Pop | -- | Signal handler for graceful termination. |
226 | 9cd4288e | Dato Simó | handleSigInt :: IORef Int -> IO () |
227 | 9cd4288e | Dato Simó | handleSigInt cref = do |
228 | 543e859d | Iustin Pop | writeIORef cref 1 |
229 | 543e859d | Iustin Pop | putStrLn ("Cancel request registered, will exit at" ++ |
230 | 543e859d | Iustin Pop | " the end of the current job set...") |
231 | 543e859d | Iustin Pop | |
232 | 179c0828 | Iustin Pop | -- | Signal handler for immediate termination. |
233 | 9cd4288e | Dato Simó | handleSigTerm :: IORef Int -> IO () |
234 | 9cd4288e | Dato Simó | handleSigTerm cref = do |
235 | 543e859d | Iustin Pop | -- update the cref to 2, just for consistency |
236 | 543e859d | Iustin Pop | writeIORef cref 2 |
237 | 543e859d | Iustin Pop | putStrLn "Double cancel request, exiting now..." |
238 | 543e859d | Iustin Pop | exitImmediately $ ExitFailure 2 |
239 | 03cb89f0 | Iustin Pop | |
240 | 3b23f238 | Dato Simó | -- | Prepares to run a set of jobsets with handling of signals and early |
241 | 3b23f238 | Dato Simó | -- termination. |
242 | d41f6558 | Iustin Pop | execWithCancel :: Annotator -> String -> Node.List -> Instance.List -> [JobSet] |
243 | 42afc235 | Dato Simó | -> IO (Result ()) |
244 | d41f6558 | Iustin Pop | execWithCancel anno master fin_nl il cmd_jobs = do |
245 | 03cb89f0 | Iustin Pop | cref <- newIORef 0 |
246 | 543e859d | Iustin Pop | mapM_ (\(hnd, sig) -> installHandler sig (Catch (hnd cref)) Nothing) |
247 | 9cd4288e | Dato Simó | [(handleSigTerm, softwareTermination), (handleSigInt, keyboardSignal)] |
248 | d41f6558 | Iustin Pop | execCancelWrapper anno master fin_nl il cref cmd_jobs |
249 | 03cb89f0 | Iustin Pop | |
250 | 5dad2589 | Iustin Pop | -- | Select the target node group. |
251 | 5dad2589 | Iustin Pop | selectGroup :: Options -> Group.List -> Node.List -> Instance.List |
252 | 5dad2589 | Iustin Pop | -> IO (String, (Node.List, Instance.List)) |
253 | 5dad2589 | Iustin Pop | selectGroup opts gl nlf ilf = do |
254 | 646aa028 | Iustin Pop | let ngroups = Cluster.splitCluster nlf ilf |
255 | 646aa028 | Iustin Pop | when (length ngroups > 1 && isNothing (optGroup opts)) $ do |
256 | 646aa028 | Iustin Pop | hPutStrLn stderr "Found multiple node groups:" |
257 | e0c85e08 | Iustin Pop | mapM_ (hPutStrLn stderr . (" " ++) . Group.name . |
258 | d5072e4c | Iustin Pop | flip Container.find gl . fst) ngroups |
259 | 707cd3d7 | Helga Velroyen | exitErr "Aborting." |
260 | 646aa028 | Iustin Pop | |
261 | 5dad2589 | Iustin Pop | case optGroup opts of |
262 | 10ef6b4e | Iustin Pop | Nothing -> do |
263 | 72747d91 | Iustin Pop | (gidx, cdata) <- exitIfEmpty "No groups found by splitCluster?!" ngroups |
264 | 72747d91 | Iustin Pop | let grp = Container.find gidx gl |
265 | 2ba17362 | Iustin Pop | return (Group.name grp, cdata) |
266 | 10ef6b4e | Iustin Pop | Just g -> case Container.findByName gl g of |
267 | 646aa028 | Iustin Pop | Nothing -> do |
268 | 646aa028 | Iustin Pop | hPutStrLn stderr $ "Node group " ++ g ++ |
269 | 646aa028 | Iustin Pop | " not found. Node group list is:" |
270 | 10ef6b4e | Iustin Pop | mapM_ (hPutStrLn stderr . (" " ++) . Group.name ) (Container.elems gl) |
271 | 707cd3d7 | Helga Velroyen | exitErr "Aborting." |
272 | 10ef6b4e | Iustin Pop | Just grp -> |
273 | 10ef6b4e | Iustin Pop | case lookup (Group.idx grp) ngroups of |
274 | 3603605a | Iustin Pop | Nothing -> |
275 | 2072221f | Iustin Pop | -- This will only happen if there are no nodes assigned |
276 | 2072221f | Iustin Pop | -- to this group |
277 | 2072221f | Iustin Pop | return (Group.name grp, (Container.empty, Container.empty)) |
278 | 10ef6b4e | Iustin Pop | Just cdata -> return (Group.name grp, cdata) |
279 | 646aa028 | Iustin Pop | |
280 | 5dad2589 | Iustin Pop | -- | Do a few checks on the cluster data. |
281 | e19ee6e4 | Iustin Pop | checkCluster :: Int -> Node.List -> Instance.List -> IO () |
282 | e19ee6e4 | Iustin Pop | checkCluster verbose nl il = do |
283 | 5dad2589 | Iustin Pop | -- nothing to do on an empty cluster |
284 | 5dad2589 | Iustin Pop | when (Container.null il) $ do |
285 | e19ee6e4 | Iustin Pop | printf "Cluster is empty, exiting.\n"::IO () |
286 | 2cdaf225 | Iustin Pop | exitSuccess |
287 | 5dad2589 | Iustin Pop | |
288 | 5dad2589 | Iustin Pop | -- hbal doesn't currently handle split clusters |
289 | 5dad2589 | Iustin Pop | let split_insts = Cluster.findSplitInstances nl il |
290 | a7e1fd89 | Iustin Pop | unless (null split_insts || verbose <= 1) $ do |
291 | 5dad2589 | Iustin Pop | hPutStrLn stderr "Found instances belonging to multiple node groups:" |
292 | 5dad2589 | Iustin Pop | mapM_ (\i -> hPutStrLn stderr $ " " ++ Instance.name i) split_insts |
293 | a7e1fd89 | Iustin Pop | hPutStrLn stderr "These instances will not be moved." |
294 | 5dad2589 | Iustin Pop | |
295 | e19ee6e4 | Iustin Pop | printf "Loaded %d nodes, %d instances\n" |
296 | 5dad2589 | Iustin Pop | (Container.size nl) |
297 | e19ee6e4 | Iustin Pop | (Container.size il)::IO () |
298 | 5dad2589 | Iustin Pop | |
299 | 5dad2589 | Iustin Pop | let csf = commonSuffix nl il |
300 | e19ee6e4 | Iustin Pop | when (not (null csf) && verbose > 1) $ |
301 | 5dad2589 | Iustin Pop | printf "Note: Stripping common suffix of '%s' from names\n" csf |
302 | 5dad2589 | Iustin Pop | |
303 | 5dad2589 | Iustin Pop | -- | Do a few checks on the selected group data. |
304 | e19ee6e4 | Iustin Pop | checkGroup :: Int -> String -> Node.List -> Instance.List -> IO () |
305 | e19ee6e4 | Iustin Pop | checkGroup verbose gname nl il = do |
306 | e19ee6e4 | Iustin Pop | printf "Group size %d nodes, %d instances\n" |
307 | e4f08c46 | Iustin Pop | (Container.size nl) |
308 | e19ee6e4 | Iustin Pop | (Container.size il)::IO () |
309 | a0529a64 | Iustin Pop | |
310 | 10ef6b4e | Iustin Pop | putStrLn $ "Selected node group: " ++ gname |
311 | 646aa028 | Iustin Pop | |
312 | e4f08c46 | Iustin Pop | let (bad_nodes, bad_instances) = Cluster.computeBadItems nl il |
313 | bfa99f7a | Iustin Pop | unless (verbose < 1) $ printf |
314 | 27f96567 | Iustin Pop | "Initial check done: %d bad nodes, %d bad instances.\n" |
315 | e4f08c46 | Iustin Pop | (length bad_nodes) (length bad_instances) |
316 | e4f08c46 | Iustin Pop | |
317 | 2cdaf225 | Iustin Pop | unless (null bad_nodes) $ |
318 | 289c3835 | Iustin Pop | putStrLn "Cluster is not N+1 happy, continuing but no guarantee \ |
319 | 289c3835 | Iustin Pop | \that the cluster will end N+1 happy." |
320 | e4f08c46 | Iustin Pop | |
321 | 5dad2589 | Iustin Pop | -- | Check that we actually need to rebalance. |
322 | 5dad2589 | Iustin Pop | checkNeedRebalance :: Options -> Score -> IO () |
323 | 5dad2589 | Iustin Pop | checkNeedRebalance opts ini_cv = do |
324 | 5dad2589 | Iustin Pop | let min_cv = optMinScore opts |
325 | 5dad2589 | Iustin Pop | when (ini_cv < min_cv) $ do |
326 | e19ee6e4 | Iustin Pop | printf "Cluster is already well balanced (initial score %.6g,\n\ |
327 | e19ee6e4 | Iustin Pop | \minimum score %.6g).\nNothing to do, exiting\n" |
328 | e19ee6e4 | Iustin Pop | ini_cv min_cv:: IO () |
329 | 2cdaf225 | Iustin Pop | exitSuccess |
330 | 5dad2589 | Iustin Pop | |
331 | 5dad2589 | Iustin Pop | -- | Main function. |
332 | 21839f47 | Iustin Pop | main :: Options -> [String] -> IO () |
333 | 21839f47 | Iustin Pop | main opts args = do |
334 | 707cd3d7 | Helga Velroyen | unless (null args) $ exitErr "This program doesn't take any arguments." |
335 | 5dad2589 | Iustin Pop | |
336 | e19ee6e4 | Iustin Pop | let verbose = optVerbose opts |
337 | 5dad2589 | Iustin Pop | shownodes = optShowNodes opts |
338 | 5dad2589 | Iustin Pop | showinsts = optShowInsts opts |
339 | 5dad2589 | Iustin Pop | |
340 | e77bc89b | Iustin Pop | ini_cdata@(ClusterData gl fixed_nl ilf ctags ipol) <- loadExternalData opts |
341 | 5dad2589 | Iustin Pop | |
342 | e77bc89b | Iustin Pop | when (verbose > 1) $ do |
343 | 5dad2589 | Iustin Pop | putStrLn $ "Loaded cluster tags: " ++ intercalate "," ctags |
344 | e77bc89b | Iustin Pop | putStrLn $ "Loaded cluster ipolicy: " ++ show ipol |
345 | 5dad2589 | Iustin Pop | |
346 | 5296ee23 | Iustin Pop | nlf <- setNodeStatus opts fixed_nl |
347 | e19ee6e4 | Iustin Pop | checkCluster verbose nlf ilf |
348 | 5dad2589 | Iustin Pop | |
349 | 5dad2589 | Iustin Pop | maybeSaveData (optSaveCluster opts) "original" "before balancing" ini_cdata |
350 | 5dad2589 | Iustin Pop | |
351 | 5dad2589 | Iustin Pop | (gname, (nl, il)) <- selectGroup opts gl nlf ilf |
352 | 5dad2589 | Iustin Pop | |
353 | e19ee6e4 | Iustin Pop | checkGroup verbose gname nl il |
354 | 5dad2589 | Iustin Pop | |
355 | 33e44f0c | Iustin Pop | maybePrintInsts showinsts "Initial" (Cluster.printInsts nl il) |
356 | 507fda3f | Iustin Pop | |
357 | 417f6b50 | Iustin Pop | maybePrintNodes shownodes "Initial cluster" (Cluster.printNodes nl) |
358 | e4f08c46 | Iustin Pop | |
359 | e4f08c46 | Iustin Pop | let ini_cv = Cluster.compCV nl |
360 | e4f08c46 | Iustin Pop | ini_tbl = Cluster.Table nl il ini_cv [] |
361 | b0517d61 | Iustin Pop | min_cv = optMinScore opts |
362 | b0517d61 | Iustin Pop | |
363 | 5dad2589 | Iustin Pop | checkNeedRebalance opts ini_cv |
364 | b0517d61 | Iustin Pop | |
365 | 3603605a | Iustin Pop | if verbose > 2 |
366 | 2922d2c5 | René Nussbaumer | then printf "Initial coefficients: overall %.8f\n%s" |
367 | 2922d2c5 | René Nussbaumer | ini_cv (Cluster.printStats " " nl)::IO () |
368 | 3603605a | Iustin Pop | else printf "Initial score: %.8f\n" ini_cv |
369 | e4f08c46 | Iustin Pop | |
370 | e19ee6e4 | Iustin Pop | putStrLn "Trying to minimize the CV..." |
371 | 14c972c7 | Iustin Pop | let imlen = maximum . map (length . Instance.alias) $ Container.elems il |
372 | 14c972c7 | Iustin Pop | nmlen = maximum . map (length . Node.alias) $ Container.elems nl |
373 | 7dfaafb1 | Iustin Pop | |
374 | e6685c53 | Agata Murawska | (fin_tbl, cmd_strs) <- iterateDepth True ini_tbl (optMaxLength opts) |
375 | c0501c69 | Iustin Pop | (optDiskMoves opts) |
376 | e7f7c003 | Guido Trotter | (optInstMoves opts) |
377 | e19ee6e4 | Iustin Pop | nmlen imlen [] min_cv |
378 | 848b65c9 | Iustin Pop | (optMinGainLim opts) (optMinGain opts) |
379 | 848b65c9 | Iustin Pop | (optEvacMode opts) |
380 | 507fda3f | Iustin Pop | let (Cluster.Table fin_nl fin_il fin_cv fin_plc) = fin_tbl |
381 | e4f08c46 | Iustin Pop | ord_plc = reverse fin_plc |
382 | cc532bdd | Iustin Pop | sol_msg = case () of |
383 | cc532bdd | Iustin Pop | _ | null fin_plc -> printf "No solution found\n" |
384 | cc532bdd | Iustin Pop | | verbose > 2 -> |
385 | 2922d2c5 | René Nussbaumer | printf "Final coefficients: overall %.8f\n%s" |
386 | 2922d2c5 | René Nussbaumer | fin_cv (Cluster.printStats " " fin_nl) |
387 | cc532bdd | Iustin Pop | | otherwise -> |
388 | cc532bdd | Iustin Pop | printf "Cluster score improved from %.8f to %.8f\n" |
389 | cc532bdd | Iustin Pop | ini_cv fin_cv ::String |
390 | e4f08c46 | Iustin Pop | |
391 | e19ee6e4 | Iustin Pop | putStr sol_msg |
392 | 7eff5b09 | Iustin Pop | |
393 | bfa99f7a | Iustin Pop | unless (verbose < 1) $ |
394 | 7eff5b09 | Iustin Pop | printf "Solution length=%d\n" (length ord_plc) |
395 | e4f08c46 | Iustin Pop | |
396 | b2245847 | Iustin Pop | let cmd_jobs = Cluster.splitJobs cmd_strs |
397 | e0eb63f0 | Iustin Pop | |
398 | 7c3a6391 | Klaus Aehlig | maybeSaveCommands "Commands to run to reach the above solution:" opts |
399 | 7c3a6391 | Klaus Aehlig | $ Cluster.formatCmds cmd_jobs |
400 | e0eb63f0 | Iustin Pop | |
401 | 4188449c | Iustin Pop | maybeSaveData (optSaveCluster opts) "balanced" "after balancing" |
402 | 71375ef7 | Iustin Pop | ini_cdata { cdNodes = fin_nl, cdInstances = fin_il } |
403 | b2245847 | Iustin Pop | |
404 | 33e44f0c | Iustin Pop | maybePrintInsts showinsts "Final" (Cluster.printInsts fin_nl fin_il) |
405 | 507fda3f | Iustin Pop | |
406 | 417f6b50 | Iustin Pop | maybePrintNodes shownodes "Final cluster" (Cluster.printNodes fin_nl) |
407 | 417f6b50 | Iustin Pop | |
408 | 5dad2589 | Iustin Pop | when (verbose > 3) $ printStats nl fin_nl |
409 | 5dad2589 | Iustin Pop | |
410 | 42afc235 | Dato Simó | exitIfBad "hbal" =<< maybeExecJobs opts ord_plc fin_nl il cmd_jobs |