Statistics
| Branch: | Tag: | Revision:

root / htools / Ganeti / HTools / Program / Hbal.hs @ ad0e078e

History | View | Annotate | Download (14.7 kB)

1 525bfb36 Iustin Pop
{-| Cluster rebalancer.
2 e4f08c46 Iustin Pop
3 e4f08c46 Iustin Pop
-}
4 e4f08c46 Iustin Pop
5 e2fa2baf Iustin Pop
{-
6 e2fa2baf Iustin Pop
7 21839f47 Iustin Pop
Copyright (C) 2009, 2010, 2011, 2012 Google Inc.
8 e2fa2baf Iustin Pop
9 e2fa2baf Iustin Pop
This program is free software; you can redistribute it and/or modify
10 e2fa2baf Iustin Pop
it under the terms of the GNU General Public License as published by
11 e2fa2baf Iustin Pop
the Free Software Foundation; either version 2 of the License, or
12 e2fa2baf Iustin Pop
(at your option) any later version.
13 e2fa2baf Iustin Pop
14 e2fa2baf Iustin Pop
This program is distributed in the hope that it will be useful, but
15 e2fa2baf Iustin Pop
WITHOUT ANY WARRANTY; without even the implied warranty of
16 e2fa2baf Iustin Pop
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17 e2fa2baf Iustin Pop
General Public License for more details.
18 e2fa2baf Iustin Pop
19 e2fa2baf Iustin Pop
You should have received a copy of the GNU General Public License
20 e2fa2baf Iustin Pop
along with this program; if not, write to the Free Software
21 e2fa2baf Iustin Pop
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 e2fa2baf Iustin Pop
02110-1301, USA.
23 e2fa2baf Iustin Pop
24 e2fa2baf Iustin Pop
-}
25 e2fa2baf Iustin Pop
26 21839f47 Iustin Pop
module Ganeti.HTools.Program.Hbal (main, options) where
27 e4f08c46 Iustin Pop
28 b2245847 Iustin Pop
import Control.Concurrent (threadDelay)
29 b2245847 Iustin Pop
import Control.Exception (bracket)
30 cc532bdd Iustin Pop
import Control.Monad
31 e4f08c46 Iustin Pop
import Data.List
32 646aa028 Iustin Pop
import Data.Maybe (isJust, isNothing, fromJust)
33 03cb89f0 Iustin Pop
import Data.IORef
34 7345b69b Iustin Pop
import System.Exit
35 e4f08c46 Iustin Pop
import System.IO
36 03cb89f0 Iustin Pop
import System.Posix.Process
37 03cb89f0 Iustin Pop
import System.Posix.Signals
38 e4f08c46 Iustin Pop
39 2795466b Iustin Pop
import Text.Printf (printf, hPrintf)
40 e4f08c46 Iustin Pop
41 669d7e3d Iustin Pop
import qualified Ganeti.HTools.Container as Container
42 669d7e3d Iustin Pop
import qualified Ganeti.HTools.Cluster as Cluster
43 10ef6b4e Iustin Pop
import qualified Ganeti.HTools.Group as Group
44 ec18dca9 Iustin Pop
import qualified Ganeti.HTools.Node as Node
45 b2245847 Iustin Pop
import qualified Ganeti.HTools.Instance as Instance
46 040afc35 Iustin Pop
47 0427285d Iustin Pop
import Ganeti.HTools.CLI
48 e8f89bb6 Iustin Pop
import Ganeti.HTools.ExtLoader
49 669d7e3d Iustin Pop
import Ganeti.HTools.Utils
50 0e8ae201 Iustin Pop
import Ganeti.HTools.Types
51 4938fa30 Guido Trotter
import Ganeti.HTools.Loader
52 e4f08c46 Iustin Pop
53 b2245847 Iustin Pop
import qualified Ganeti.Luxi as L
54 b2245847 Iustin Pop
import Ganeti.Jobs
55 b2245847 Iustin Pop
56 179c0828 Iustin Pop
-- | Options list and functions.
57 0427285d Iustin Pop
options :: [OptType]
58 e4f08c46 Iustin Pop
options =
59 2ba17362 Iustin Pop
  [ oPrintNodes
60 2ba17362 Iustin Pop
  , oPrintInsts
61 2ba17362 Iustin Pop
  , oPrintCommands
62 2ba17362 Iustin Pop
  , oDataFile
63 2ba17362 Iustin Pop
  , oEvacMode
64 2ba17362 Iustin Pop
  , oRapiMaster
65 2ba17362 Iustin Pop
  , oLuxiSocket
66 2ba17362 Iustin Pop
  , oExecJobs
67 2ba17362 Iustin Pop
  , oGroup
68 2ba17362 Iustin Pop
  , oMaxSolLength
69 2ba17362 Iustin Pop
  , oVerbose
70 2ba17362 Iustin Pop
  , oQuiet
71 2ba17362 Iustin Pop
  , oOfflineNode
72 2ba17362 Iustin Pop
  , oMinScore
73 2ba17362 Iustin Pop
  , oMaxCpu
74 2ba17362 Iustin Pop
  , oMinDisk
75 2ba17362 Iustin Pop
  , oMinGain
76 2ba17362 Iustin Pop
  , oMinGainLim
77 2ba17362 Iustin Pop
  , oDiskMoves
78 2ba17362 Iustin Pop
  , oSelInst
79 2ba17362 Iustin Pop
  , oInstMoves
80 2ba17362 Iustin Pop
  , oDynuFile
81 2ba17362 Iustin Pop
  , oExTags
82 2ba17362 Iustin Pop
  , oExInst
83 2ba17362 Iustin Pop
  , oSaveCluster
84 2ba17362 Iustin Pop
  , oShowVer
85 2ba17362 Iustin Pop
  , oShowHelp
86 2ba17362 Iustin Pop
  ]
87 e4f08c46 Iustin Pop
88 6dc960bc Iustin Pop
{- | Start computing the solution at the given depth and recurse until
89 6dc960bc Iustin Pop
we find a valid solution or we exceed the maximum depth.
90 6dc960bc Iustin Pop
91 6dc960bc Iustin Pop
-}
92 6dc960bc Iustin Pop
iterateDepth :: Cluster.Table    -- ^ The starting table
93 6dc960bc Iustin Pop
             -> Int              -- ^ Remaining length
94 c0501c69 Iustin Pop
             -> Bool             -- ^ Allow disk moves
95 e7f7c003 Guido Trotter
             -> Bool             -- ^ Allow instance moves
96 6dc960bc Iustin Pop
             -> Int              -- ^ Max node name len
97 6dc960bc Iustin Pop
             -> Int              -- ^ Max instance name len
98 0e8ae201 Iustin Pop
             -> [MoveJob]        -- ^ Current command list
99 92e32d76 Iustin Pop
             -> Score            -- ^ Score at which to stop
100 848b65c9 Iustin Pop
             -> Score            -- ^ Min gain limit
101 848b65c9 Iustin Pop
             -> Score            -- ^ Min score gain
102 2e28ac32 Iustin Pop
             -> Bool             -- ^ Enable evacuation mode
103 0e8ae201 Iustin Pop
             -> IO (Cluster.Table, [MoveJob]) -- ^ The resulting table
104 0e8ae201 Iustin Pop
                                              -- and commands
105 e7f7c003 Guido Trotter
iterateDepth ini_tbl max_rounds disk_moves inst_moves nmlen imlen
106 e19ee6e4 Iustin Pop
             cmd_strs min_score mg_limit min_gain evac_mode =
107 2ba17362 Iustin Pop
  let Cluster.Table ini_nl ini_il _ _ = ini_tbl
108 2ba17362 Iustin Pop
      allowed_next = Cluster.doNextBalance ini_tbl max_rounds min_score
109 2ba17362 Iustin Pop
      m_fin_tbl = if allowed_next
110 e08424a8 Guido Trotter
                    then Cluster.tryBalance ini_tbl disk_moves inst_moves
111 e08424a8 Guido Trotter
                         evac_mode mg_limit min_gain
112 5ad86777 Iustin Pop
                    else Nothing
113 2ba17362 Iustin Pop
  in case m_fin_tbl of
114 2ba17362 Iustin Pop
       Just fin_tbl ->
115 2ba17362 Iustin Pop
         do
116 2ba17362 Iustin Pop
           let (Cluster.Table _ _ _ fin_plc) = fin_tbl
117 2ba17362 Iustin Pop
               fin_plc_len = length fin_plc
118 2ba17362 Iustin Pop
               cur_plc@(idx, _, _, move, _) = head fin_plc
119 2ba17362 Iustin Pop
               (sol_line, cmds) = Cluster.printSolutionLine ini_nl ini_il
120 2ba17362 Iustin Pop
                                  nmlen imlen cur_plc fin_plc_len
121 2ba17362 Iustin Pop
               afn = Cluster.involvedNodes ini_il cur_plc
122 2ba17362 Iustin Pop
               upd_cmd_strs = (afn, idx, move, cmds):cmd_strs
123 2ba17362 Iustin Pop
           putStrLn sol_line
124 2ba17362 Iustin Pop
           hFlush stdout
125 2ba17362 Iustin Pop
           iterateDepth fin_tbl max_rounds disk_moves inst_moves
126 2ba17362 Iustin Pop
                        nmlen imlen upd_cmd_strs min_score
127 2ba17362 Iustin Pop
                        mg_limit min_gain evac_mode
128 2ba17362 Iustin Pop
       Nothing -> return (ini_tbl, cmd_strs)
129 6dc960bc Iustin Pop
130 5dad2589 Iustin Pop
-- | Displays the cluster stats.
131 5dad2589 Iustin Pop
printStats :: Node.List -> Node.List -> IO ()
132 5dad2589 Iustin Pop
printStats ini_nl fin_nl = do
133 5dad2589 Iustin Pop
  let ini_cs = Cluster.totalResources ini_nl
134 5dad2589 Iustin Pop
      fin_cs = Cluster.totalResources fin_nl
135 5dad2589 Iustin Pop
  printf "Original: mem=%d disk=%d\n"
136 5dad2589 Iustin Pop
             (Cluster.csFmem ini_cs) (Cluster.csFdsk ini_cs) :: IO ()
137 5dad2589 Iustin Pop
  printf "Final:    mem=%d disk=%d\n"
138 5dad2589 Iustin Pop
             (Cluster.csFmem fin_cs) (Cluster.csFdsk fin_cs)
139 5dad2589 Iustin Pop
140 5dad2589 Iustin Pop
-- | Saves the rebalance commands to a text file.
141 5dad2589 Iustin Pop
saveBalanceCommands :: Options -> String -> IO ()
142 5dad2589 Iustin Pop
saveBalanceCommands opts cmd_data = do
143 5dad2589 Iustin Pop
  let out_path = fromJust $ optShowCmds opts
144 5dad2589 Iustin Pop
  putStrLn ""
145 3603605a Iustin Pop
  if out_path == "-"
146 3603605a Iustin Pop
    then printf "Commands to run to reach the above solution:\n%s"
147 3603605a Iustin Pop
           (unlines . map ("  " ++) .
148 3603605a Iustin Pop
            filter (/= "  check") .
149 3603605a Iustin Pop
            lines $ cmd_data)
150 3603605a Iustin Pop
    else do
151 3603605a Iustin Pop
      writeFile out_path (shTemplate ++ cmd_data)
152 3603605a Iustin Pop
      printf "The commands have been written to file '%s'\n" out_path
153 5dad2589 Iustin Pop
154 b2245847 Iustin Pop
-- | Polls a set of jobs at a fixed interval until all are finished
155 179c0828 Iustin Pop
-- one way or another.
156 b2245847 Iustin Pop
waitForJobs :: L.Client -> [String] -> IO (Result [JobStatus])
157 b2245847 Iustin Pop
waitForJobs client jids = do
158 b2245847 Iustin Pop
  sts <- L.queryJobsStatus client jids
159 b2245847 Iustin Pop
  case sts of
160 b2245847 Iustin Pop
    Bad x -> return $ Bad x
161 7e98f782 Iustin Pop
    Ok s -> if any (<= JOB_STATUS_RUNNING) s
162 b2245847 Iustin Pop
            then do
163 b2245847 Iustin Pop
              -- TODO: replace hardcoded value with a better thing
164 b2245847 Iustin Pop
              threadDelay (1000000 * 15)
165 b2245847 Iustin Pop
              waitForJobs client jids
166 b2245847 Iustin Pop
            else return $ Ok s
167 b2245847 Iustin Pop
168 179c0828 Iustin Pop
-- | Check that a set of job statuses is all success.
169 b2245847 Iustin Pop
checkJobsStatus :: [JobStatus] -> Bool
170 7e98f782 Iustin Pop
checkJobsStatus = all (== JOB_STATUS_SUCCESS)
171 b2245847 Iustin Pop
172 179c0828 Iustin Pop
-- | Wrapper over execJobSet checking for early termination.
173 23448f82 Iustin Pop
execWrapper :: String -> Node.List
174 5dad2589 Iustin Pop
            -> Instance.List -> IORef Int -> [JobSet] -> IO Bool
175 23448f82 Iustin Pop
execWrapper _      _  _  _    [] = return True
176 23448f82 Iustin Pop
execWrapper master nl il cref alljss = do
177 23448f82 Iustin Pop
  cancel <- readIORef cref
178 3603605a Iustin Pop
  if cancel > 0
179 3603605a Iustin Pop
    then do
180 3603605a Iustin Pop
      hPrintf stderr "Exiting early due to user request, %d\
181 3603605a Iustin Pop
                     \ jobset(s) remaining." (length alljss)::IO ()
182 3603605a Iustin Pop
      return False
183 3603605a Iustin Pop
    else execJobSet master nl il cref alljss
184 23448f82 Iustin Pop
185 179c0828 Iustin Pop
-- | Execute an entire jobset.
186 3e4480e0 Iustin Pop
execJobSet :: String -> Node.List
187 23448f82 Iustin Pop
           -> Instance.List -> IORef Int -> [JobSet] -> IO Bool
188 23448f82 Iustin Pop
execJobSet _      _  _  _    [] = return True
189 23448f82 Iustin Pop
execJobSet master nl il cref (js:jss) = do
190 b2245847 Iustin Pop
  -- map from jobset (htools list of positions) to [[opcodes]]
191 b2245847 Iustin Pop
  let jobs = map (\(_, idx, move, _) ->
192 3e4480e0 Iustin Pop
                      Cluster.iMoveToJob nl il idx move) js
193 b2245847 Iustin Pop
  let descr = map (\(_, idx, _, _) -> Container.nameOf il idx) js
194 b2245847 Iustin Pop
  putStrLn $ "Executing jobset for instances " ++ commaJoin descr
195 b2245847 Iustin Pop
  jrs <- bracket (L.getClient master) L.closeClient
196 b2245847 Iustin Pop
         (\client -> do
197 683b1ca7 Iustin Pop
            jids <- L.submitManyJobs client jobs
198 b2245847 Iustin Pop
            case jids of
199 b2245847 Iustin Pop
              Bad x -> return $ Bad x
200 b2245847 Iustin Pop
              Ok x -> do
201 b2245847 Iustin Pop
                putStrLn $ "Got job IDs " ++ commaJoin x
202 b2245847 Iustin Pop
                waitForJobs client x
203 b2245847 Iustin Pop
         )
204 3603605a Iustin Pop
  case jrs of
205 3603605a Iustin Pop
    Bad x -> do
206 3603605a Iustin Pop
      hPutStrLn stderr $ "Cannot compute job status, aborting: " ++ show x
207 3603605a Iustin Pop
      return False
208 3603605a Iustin Pop
    Ok x -> if checkJobsStatus x
209 3603605a Iustin Pop
              then execWrapper master nl il cref jss
210 3603605a Iustin Pop
              else do
211 3603605a Iustin Pop
                hPutStrLn stderr $ "Not all jobs completed successfully: " ++
212 3603605a Iustin Pop
                          show x
213 3603605a Iustin Pop
                hPutStrLn stderr "Aborting."
214 3603605a Iustin Pop
                return False
215 b2245847 Iustin Pop
216 5dad2589 Iustin Pop
-- | Executes the jobs, if possible and desired.
217 5dad2589 Iustin Pop
maybeExecJobs :: Options
218 5dad2589 Iustin Pop
              -> [a]
219 5dad2589 Iustin Pop
              -> Node.List
220 5dad2589 Iustin Pop
              -> Instance.List
221 5dad2589 Iustin Pop
              -> [JobSet]
222 5dad2589 Iustin Pop
              -> IO Bool
223 5dad2589 Iustin Pop
maybeExecJobs opts ord_plc fin_nl il cmd_jobs =
224 2ba17362 Iustin Pop
  if optExecJobs opts && not (null ord_plc)
225 5dad2589 Iustin Pop
    then (case optLuxi opts of
226 5dad2589 Iustin Pop
            Nothing -> do
227 5dad2589 Iustin Pop
              hPutStrLn stderr "Execution of commands possible only on LUXI"
228 5dad2589 Iustin Pop
              return False
229 5dad2589 Iustin Pop
            Just master -> runJobSet master fin_nl il cmd_jobs)
230 5dad2589 Iustin Pop
    else return True
231 5dad2589 Iustin Pop
232 179c0828 Iustin Pop
-- | Signal handler for graceful termination.
233 543e859d Iustin Pop
hangleSigInt :: IORef Int -> IO ()
234 543e859d Iustin Pop
hangleSigInt cref = do
235 543e859d Iustin Pop
  writeIORef cref 1
236 543e859d Iustin Pop
  putStrLn ("Cancel request registered, will exit at" ++
237 543e859d Iustin Pop
            " the end of the current job set...")
238 543e859d Iustin Pop
239 179c0828 Iustin Pop
-- | Signal handler for immediate termination.
240 543e859d Iustin Pop
hangleSigTerm :: IORef Int -> IO ()
241 543e859d Iustin Pop
hangleSigTerm cref = do
242 543e859d Iustin Pop
  -- update the cref to 2, just for consistency
243 543e859d Iustin Pop
  writeIORef cref 2
244 543e859d Iustin Pop
  putStrLn "Double cancel request, exiting now..."
245 543e859d Iustin Pop
  exitImmediately $ ExitFailure 2
246 03cb89f0 Iustin Pop
247 179c0828 Iustin Pop
-- | Runs a job set with handling of signals.
248 23448f82 Iustin Pop
runJobSet :: String -> Node.List -> Instance.List -> [JobSet] -> IO Bool
249 03cb89f0 Iustin Pop
runJobSet master fin_nl il cmd_jobs = do
250 03cb89f0 Iustin Pop
  cref <- newIORef 0
251 543e859d Iustin Pop
  mapM_ (\(hnd, sig) -> installHandler sig (Catch (hnd cref)) Nothing)
252 543e859d Iustin Pop
    [(hangleSigTerm, softwareTermination), (hangleSigInt, keyboardSignal)]
253 23448f82 Iustin Pop
  execWrapper master fin_nl il cref cmd_jobs
254 03cb89f0 Iustin Pop
255 5dad2589 Iustin Pop
-- | Select the target node group.
256 5dad2589 Iustin Pop
selectGroup :: Options -> Group.List -> Node.List -> Instance.List
257 5dad2589 Iustin Pop
            -> IO (String, (Node.List, Instance.List))
258 5dad2589 Iustin Pop
selectGroup opts gl nlf ilf = do
259 646aa028 Iustin Pop
  let ngroups = Cluster.splitCluster nlf ilf
260 646aa028 Iustin Pop
  when (length ngroups > 1 && isNothing (optGroup opts)) $ do
261 646aa028 Iustin Pop
    hPutStrLn stderr "Found multiple node groups:"
262 e0c85e08 Iustin Pop
    mapM_ (hPutStrLn stderr . ("  " ++) . Group.name .
263 d5072e4c Iustin Pop
           flip Container.find gl . fst) ngroups
264 646aa028 Iustin Pop
    hPutStrLn stderr "Aborting."
265 646aa028 Iustin Pop
    exitWith $ ExitFailure 1
266 646aa028 Iustin Pop
267 5dad2589 Iustin Pop
  case optGroup opts of
268 10ef6b4e Iustin Pop
    Nothing -> do
269 2ba17362 Iustin Pop
      let (gidx, cdata) = head ngroups
270 2ba17362 Iustin Pop
          grp = Container.find gidx gl
271 2ba17362 Iustin Pop
      return (Group.name grp, cdata)
272 10ef6b4e Iustin Pop
    Just g -> case Container.findByName gl g of
273 646aa028 Iustin Pop
      Nothing -> do
274 646aa028 Iustin Pop
        hPutStrLn stderr $ "Node group " ++ g ++
275 646aa028 Iustin Pop
          " not found. Node group list is:"
276 10ef6b4e Iustin Pop
        mapM_ (hPutStrLn stderr . ("  " ++) . Group.name ) (Container.elems gl)
277 646aa028 Iustin Pop
        hPutStrLn stderr "Aborting."
278 646aa028 Iustin Pop
        exitWith $ ExitFailure 1
279 10ef6b4e Iustin Pop
      Just grp ->
280 10ef6b4e Iustin Pop
          case lookup (Group.idx grp) ngroups of
281 3603605a Iustin Pop
            Nothing ->
282 2072221f Iustin Pop
              -- This will only happen if there are no nodes assigned
283 2072221f Iustin Pop
              -- to this group
284 2072221f Iustin Pop
              return (Group.name grp, (Container.empty, Container.empty))
285 10ef6b4e Iustin Pop
            Just cdata -> return (Group.name grp, cdata)
286 646aa028 Iustin Pop
287 5dad2589 Iustin Pop
-- | Do a few checks on the cluster data.
288 e19ee6e4 Iustin Pop
checkCluster :: Int -> Node.List -> Instance.List -> IO ()
289 e19ee6e4 Iustin Pop
checkCluster verbose nl il = do
290 5dad2589 Iustin Pop
  -- nothing to do on an empty cluster
291 5dad2589 Iustin Pop
  when (Container.null il) $ do
292 e19ee6e4 Iustin Pop
         printf "Cluster is empty, exiting.\n"::IO ()
293 5dad2589 Iustin Pop
         exitWith ExitSuccess
294 5dad2589 Iustin Pop
295 5dad2589 Iustin Pop
  -- hbal doesn't currently handle split clusters
296 5dad2589 Iustin Pop
  let split_insts = Cluster.findSplitInstances nl il
297 5dad2589 Iustin Pop
  unless (null split_insts) $ do
298 5dad2589 Iustin Pop
    hPutStrLn stderr "Found instances belonging to multiple node groups:"
299 5dad2589 Iustin Pop
    mapM_ (\i -> hPutStrLn stderr $ "  " ++ Instance.name i) split_insts
300 5dad2589 Iustin Pop
    hPutStrLn stderr "Aborting."
301 5dad2589 Iustin Pop
    exitWith $ ExitFailure 1
302 5dad2589 Iustin Pop
303 e19ee6e4 Iustin Pop
  printf "Loaded %d nodes, %d instances\n"
304 5dad2589 Iustin Pop
             (Container.size nl)
305 e19ee6e4 Iustin Pop
             (Container.size il)::IO ()
306 5dad2589 Iustin Pop
307 5dad2589 Iustin Pop
  let csf = commonSuffix nl il
308 e19ee6e4 Iustin Pop
  when (not (null csf) && verbose > 1) $
309 5dad2589 Iustin Pop
       printf "Note: Stripping common suffix of '%s' from names\n" csf
310 5dad2589 Iustin Pop
311 5dad2589 Iustin Pop
-- | Do a few checks on the selected group data.
312 e19ee6e4 Iustin Pop
checkGroup :: Int -> String -> Node.List -> Instance.List -> IO ()
313 e19ee6e4 Iustin Pop
checkGroup verbose gname nl il = do
314 e19ee6e4 Iustin Pop
  printf "Group size %d nodes, %d instances\n"
315 e4f08c46 Iustin Pop
             (Container.size nl)
316 e19ee6e4 Iustin Pop
             (Container.size il)::IO ()
317 a0529a64 Iustin Pop
318 10ef6b4e Iustin Pop
  putStrLn $ "Selected node group: " ++ gname
319 646aa028 Iustin Pop
320 e4f08c46 Iustin Pop
  let (bad_nodes, bad_instances) = Cluster.computeBadItems nl il
321 e19ee6e4 Iustin Pop
  unless (verbose == 0) $ printf
322 27f96567 Iustin Pop
             "Initial check done: %d bad nodes, %d bad instances.\n"
323 e4f08c46 Iustin Pop
             (length bad_nodes) (length bad_instances)
324 e4f08c46 Iustin Pop
325 9f6dcdea Iustin Pop
  when (length bad_nodes > 0) $
326 289c3835 Iustin Pop
         putStrLn "Cluster is not N+1 happy, continuing but no guarantee \
327 289c3835 Iustin Pop
                  \that the cluster will end N+1 happy."
328 e4f08c46 Iustin Pop
329 5dad2589 Iustin Pop
-- | Check that we actually need to rebalance.
330 5dad2589 Iustin Pop
checkNeedRebalance :: Options -> Score -> IO ()
331 5dad2589 Iustin Pop
checkNeedRebalance opts ini_cv = do
332 5dad2589 Iustin Pop
  let min_cv = optMinScore opts
333 5dad2589 Iustin Pop
  when (ini_cv < min_cv) $ do
334 e19ee6e4 Iustin Pop
         printf "Cluster is already well balanced (initial score %.6g,\n\
335 e19ee6e4 Iustin Pop
                \minimum score %.6g).\nNothing to do, exiting\n"
336 e19ee6e4 Iustin Pop
                ini_cv min_cv:: IO ()
337 5dad2589 Iustin Pop
         exitWith ExitSuccess
338 5dad2589 Iustin Pop
339 5dad2589 Iustin Pop
-- | Main function.
340 21839f47 Iustin Pop
main :: Options -> [String] -> IO ()
341 21839f47 Iustin Pop
main opts args = do
342 5dad2589 Iustin Pop
  unless (null args) $ do
343 5dad2589 Iustin Pop
         hPutStrLn stderr "Error: this program doesn't take any arguments."
344 5dad2589 Iustin Pop
         exitWith $ ExitFailure 1
345 5dad2589 Iustin Pop
346 e19ee6e4 Iustin Pop
  let verbose = optVerbose opts
347 5dad2589 Iustin Pop
      shownodes = optShowNodes opts
348 5dad2589 Iustin Pop
      showinsts = optShowInsts opts
349 5dad2589 Iustin Pop
350 e77bc89b Iustin Pop
  ini_cdata@(ClusterData gl fixed_nl ilf ctags ipol) <- loadExternalData opts
351 5dad2589 Iustin Pop
352 e77bc89b Iustin Pop
  when (verbose > 1) $ do
353 5dad2589 Iustin Pop
       putStrLn $ "Loaded cluster tags: " ++ intercalate "," ctags
354 e77bc89b Iustin Pop
       putStrLn $ "Loaded cluster ipolicy: " ++ show ipol
355 5dad2589 Iustin Pop
356 5296ee23 Iustin Pop
  nlf <- setNodeStatus opts fixed_nl
357 e19ee6e4 Iustin Pop
  checkCluster verbose nlf ilf
358 5dad2589 Iustin Pop
359 5dad2589 Iustin Pop
  maybeSaveData (optSaveCluster opts) "original" "before balancing" ini_cdata
360 5dad2589 Iustin Pop
361 5dad2589 Iustin Pop
  (gname, (nl, il)) <- selectGroup opts gl nlf ilf
362 5dad2589 Iustin Pop
363 e19ee6e4 Iustin Pop
  checkGroup verbose gname nl il
364 5dad2589 Iustin Pop
365 33e44f0c Iustin Pop
  maybePrintInsts showinsts "Initial" (Cluster.printInsts nl il)
366 507fda3f Iustin Pop
367 417f6b50 Iustin Pop
  maybePrintNodes shownodes "Initial cluster" (Cluster.printNodes nl)
368 e4f08c46 Iustin Pop
369 e4f08c46 Iustin Pop
  let ini_cv = Cluster.compCV nl
370 e4f08c46 Iustin Pop
      ini_tbl = Cluster.Table nl il ini_cv []
371 b0517d61 Iustin Pop
      min_cv = optMinScore opts
372 b0517d61 Iustin Pop
373 5dad2589 Iustin Pop
  checkNeedRebalance opts ini_cv
374 b0517d61 Iustin Pop
375 3603605a Iustin Pop
  if verbose > 2
376 3603605a Iustin Pop
    then printf "Initial coefficients: overall %.8f, %s\n"
377 3603605a Iustin Pop
           ini_cv (Cluster.printStats nl)::IO ()
378 3603605a Iustin Pop
    else printf "Initial score: %.8f\n" ini_cv
379 e4f08c46 Iustin Pop
380 e19ee6e4 Iustin Pop
  putStrLn "Trying to minimize the CV..."
381 14c972c7 Iustin Pop
  let imlen = maximum . map (length . Instance.alias) $ Container.elems il
382 14c972c7 Iustin Pop
      nmlen = maximum . map (length . Node.alias) $ Container.elems nl
383 7dfaafb1 Iustin Pop
384 7dfaafb1 Iustin Pop
  (fin_tbl, cmd_strs) <- iterateDepth ini_tbl (optMaxLength opts)
385 c0501c69 Iustin Pop
                         (optDiskMoves opts)
386 e7f7c003 Guido Trotter
                         (optInstMoves opts)
387 e19ee6e4 Iustin Pop
                         nmlen imlen [] min_cv
388 848b65c9 Iustin Pop
                         (optMinGainLim opts) (optMinGain opts)
389 848b65c9 Iustin Pop
                         (optEvacMode opts)
390 507fda3f Iustin Pop
  let (Cluster.Table fin_nl fin_il fin_cv fin_plc) = fin_tbl
391 e4f08c46 Iustin Pop
      ord_plc = reverse fin_plc
392 cc532bdd Iustin Pop
      sol_msg = case () of
393 cc532bdd Iustin Pop
                  _ | null fin_plc -> printf "No solution found\n"
394 cc532bdd Iustin Pop
                    | verbose > 2 ->
395 cc532bdd Iustin Pop
                        printf "Final coefficients:   overall %.8f, %s\n"
396 cc532bdd Iustin Pop
                        fin_cv (Cluster.printStats fin_nl)
397 cc532bdd Iustin Pop
                    | otherwise ->
398 cc532bdd Iustin Pop
                        printf "Cluster score improved from %.8f to %.8f\n"
399 cc532bdd Iustin Pop
                        ini_cv fin_cv ::String
400 e4f08c46 Iustin Pop
401 e19ee6e4 Iustin Pop
  putStr sol_msg
402 7eff5b09 Iustin Pop
403 e19ee6e4 Iustin Pop
  unless (verbose == 0) $
404 7eff5b09 Iustin Pop
         printf "Solution length=%d\n" (length ord_plc)
405 e4f08c46 Iustin Pop
406 b2245847 Iustin Pop
  let cmd_jobs = Cluster.splitJobs cmd_strs
407 e0eb63f0 Iustin Pop
408 e0eb63f0 Iustin Pop
  when (isJust $ optShowCmds opts) $
409 5dad2589 Iustin Pop
       saveBalanceCommands opts $ Cluster.formatCmds cmd_jobs
410 e0eb63f0 Iustin Pop
411 4188449c Iustin Pop
  maybeSaveData (optSaveCluster opts) "balanced" "after balancing"
412 71375ef7 Iustin Pop
                ini_cdata { cdNodes = fin_nl, cdInstances = fin_il }
413 b2245847 Iustin Pop
414 33e44f0c Iustin Pop
  maybePrintInsts showinsts "Final" (Cluster.printInsts fin_nl fin_il)
415 507fda3f Iustin Pop
416 417f6b50 Iustin Pop
  maybePrintNodes shownodes "Final cluster" (Cluster.printNodes fin_nl)
417 417f6b50 Iustin Pop
418 5dad2589 Iustin Pop
  when (verbose > 3) $ printStats nl fin_nl
419 5dad2589 Iustin Pop
420 5dad2589 Iustin Pop
  eval <- maybeExecJobs opts ord_plc fin_nl il cmd_jobs
421 d5072e4c Iustin Pop
  unless eval (exitWith (ExitFailure 1))