Statistics
| Branch: | Tag: | Revision:

root / htools / Ganeti / HTools / Program / Hbal.hs @ 1f5635a9

History | View | Annotate | Download (14.8 kB)

1 525bfb36 Iustin Pop
{-| Cluster rebalancer.
2 e4f08c46 Iustin Pop
3 e4f08c46 Iustin Pop
-}
4 e4f08c46 Iustin Pop
5 e2fa2baf Iustin Pop
{-
6 e2fa2baf Iustin Pop
7 d5072e4c Iustin Pop
Copyright (C) 2009, 2010, 2011 Google Inc.
8 e2fa2baf Iustin Pop
9 e2fa2baf Iustin Pop
This program is free software; you can redistribute it and/or modify
10 e2fa2baf Iustin Pop
it under the terms of the GNU General Public License as published by
11 e2fa2baf Iustin Pop
the Free Software Foundation; either version 2 of the License, or
12 e2fa2baf Iustin Pop
(at your option) any later version.
13 e2fa2baf Iustin Pop
14 e2fa2baf Iustin Pop
This program is distributed in the hope that it will be useful, but
15 e2fa2baf Iustin Pop
WITHOUT ANY WARRANTY; without even the implied warranty of
16 e2fa2baf Iustin Pop
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17 e2fa2baf Iustin Pop
General Public License for more details.
18 e2fa2baf Iustin Pop
19 e2fa2baf Iustin Pop
You should have received a copy of the GNU General Public License
20 e2fa2baf Iustin Pop
along with this program; if not, write to the Free Software
21 e2fa2baf Iustin Pop
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 e2fa2baf Iustin Pop
02110-1301, USA.
23 e2fa2baf Iustin Pop
24 e2fa2baf Iustin Pop
-}
25 e2fa2baf Iustin Pop
26 201b6c34 Iustin Pop
module Ganeti.HTools.Program.Hbal (main) where
27 e4f08c46 Iustin Pop
28 b2245847 Iustin Pop
import Control.Concurrent (threadDelay)
29 b2245847 Iustin Pop
import Control.Exception (bracket)
30 cc532bdd Iustin Pop
import Control.Monad
31 e4f08c46 Iustin Pop
import Data.List
32 646aa028 Iustin Pop
import Data.Maybe (isJust, isNothing, fromJust)
33 03cb89f0 Iustin Pop
import Data.IORef
34 7345b69b Iustin Pop
import System.Environment (getArgs)
35 7345b69b Iustin Pop
import System.Exit
36 e4f08c46 Iustin Pop
import System.IO
37 03cb89f0 Iustin Pop
import System.Posix.Process
38 03cb89f0 Iustin Pop
import System.Posix.Signals
39 e4f08c46 Iustin Pop
40 2795466b Iustin Pop
import Text.Printf (printf, hPrintf)
41 e4f08c46 Iustin Pop
42 669d7e3d Iustin Pop
import qualified Ganeti.HTools.Container as Container
43 669d7e3d Iustin Pop
import qualified Ganeti.HTools.Cluster as Cluster
44 10ef6b4e Iustin Pop
import qualified Ganeti.HTools.Group as Group
45 ec18dca9 Iustin Pop
import qualified Ganeti.HTools.Node as Node
46 b2245847 Iustin Pop
import qualified Ganeti.HTools.Instance as Instance
47 040afc35 Iustin Pop
48 0427285d Iustin Pop
import Ganeti.HTools.CLI
49 e8f89bb6 Iustin Pop
import Ganeti.HTools.ExtLoader
50 669d7e3d Iustin Pop
import Ganeti.HTools.Utils
51 0e8ae201 Iustin Pop
import Ganeti.HTools.Types
52 4938fa30 Guido Trotter
import Ganeti.HTools.Loader
53 e4f08c46 Iustin Pop
54 b2245847 Iustin Pop
import qualified Ganeti.Luxi as L
55 b2245847 Iustin Pop
import Ganeti.Jobs
56 b2245847 Iustin Pop
57 179c0828 Iustin Pop
-- | Options list and functions.
58 0427285d Iustin Pop
options :: [OptType]
59 e4f08c46 Iustin Pop
options =
60 2ba17362 Iustin Pop
  [ oPrintNodes
61 2ba17362 Iustin Pop
  , oPrintInsts
62 2ba17362 Iustin Pop
  , oPrintCommands
63 2ba17362 Iustin Pop
  , oDataFile
64 2ba17362 Iustin Pop
  , oEvacMode
65 2ba17362 Iustin Pop
  , oRapiMaster
66 2ba17362 Iustin Pop
  , oLuxiSocket
67 2ba17362 Iustin Pop
  , oExecJobs
68 2ba17362 Iustin Pop
  , oGroup
69 2ba17362 Iustin Pop
  , oMaxSolLength
70 2ba17362 Iustin Pop
  , oVerbose
71 2ba17362 Iustin Pop
  , oQuiet
72 2ba17362 Iustin Pop
  , oOfflineNode
73 2ba17362 Iustin Pop
  , oMinScore
74 2ba17362 Iustin Pop
  , oMaxCpu
75 2ba17362 Iustin Pop
  , oMinDisk
76 2ba17362 Iustin Pop
  , oMinGain
77 2ba17362 Iustin Pop
  , oMinGainLim
78 2ba17362 Iustin Pop
  , oDiskMoves
79 2ba17362 Iustin Pop
  , oSelInst
80 2ba17362 Iustin Pop
  , oInstMoves
81 2ba17362 Iustin Pop
  , oDynuFile
82 2ba17362 Iustin Pop
  , oExTags
83 2ba17362 Iustin Pop
  , oExInst
84 2ba17362 Iustin Pop
  , oSaveCluster
85 2ba17362 Iustin Pop
  , oShowVer
86 2ba17362 Iustin Pop
  , oShowHelp
87 2ba17362 Iustin Pop
  ]
88 e4f08c46 Iustin Pop
89 6dc960bc Iustin Pop
{- | Start computing the solution at the given depth and recurse until
90 6dc960bc Iustin Pop
we find a valid solution or we exceed the maximum depth.
91 6dc960bc Iustin Pop
92 6dc960bc Iustin Pop
-}
93 6dc960bc Iustin Pop
iterateDepth :: Cluster.Table    -- ^ The starting table
94 6dc960bc Iustin Pop
             -> Int              -- ^ Remaining length
95 c0501c69 Iustin Pop
             -> Bool             -- ^ Allow disk moves
96 e7f7c003 Guido Trotter
             -> Bool             -- ^ Allow instance moves
97 6dc960bc Iustin Pop
             -> Int              -- ^ Max node name len
98 6dc960bc Iustin Pop
             -> Int              -- ^ Max instance name len
99 0e8ae201 Iustin Pop
             -> [MoveJob]        -- ^ Current command list
100 92e32d76 Iustin Pop
             -> Score            -- ^ Score at which to stop
101 848b65c9 Iustin Pop
             -> Score            -- ^ Min gain limit
102 848b65c9 Iustin Pop
             -> Score            -- ^ Min score gain
103 2e28ac32 Iustin Pop
             -> Bool             -- ^ Enable evacuation mode
104 0e8ae201 Iustin Pop
             -> IO (Cluster.Table, [MoveJob]) -- ^ The resulting table
105 0e8ae201 Iustin Pop
                                              -- and commands
106 e7f7c003 Guido Trotter
iterateDepth ini_tbl max_rounds disk_moves inst_moves nmlen imlen
107 e19ee6e4 Iustin Pop
             cmd_strs min_score mg_limit min_gain evac_mode =
108 2ba17362 Iustin Pop
  let Cluster.Table ini_nl ini_il _ _ = ini_tbl
109 2ba17362 Iustin Pop
      allowed_next = Cluster.doNextBalance ini_tbl max_rounds min_score
110 2ba17362 Iustin Pop
      m_fin_tbl = if allowed_next
111 e08424a8 Guido Trotter
                    then Cluster.tryBalance ini_tbl disk_moves inst_moves
112 e08424a8 Guido Trotter
                         evac_mode mg_limit min_gain
113 5ad86777 Iustin Pop
                    else Nothing
114 2ba17362 Iustin Pop
  in case m_fin_tbl of
115 2ba17362 Iustin Pop
       Just fin_tbl ->
116 2ba17362 Iustin Pop
         do
117 2ba17362 Iustin Pop
           let (Cluster.Table _ _ _ fin_plc) = fin_tbl
118 2ba17362 Iustin Pop
               fin_plc_len = length fin_plc
119 2ba17362 Iustin Pop
               cur_plc@(idx, _, _, move, _) = head fin_plc
120 2ba17362 Iustin Pop
               (sol_line, cmds) = Cluster.printSolutionLine ini_nl ini_il
121 2ba17362 Iustin Pop
                                  nmlen imlen cur_plc fin_plc_len
122 2ba17362 Iustin Pop
               afn = Cluster.involvedNodes ini_il cur_plc
123 2ba17362 Iustin Pop
               upd_cmd_strs = (afn, idx, move, cmds):cmd_strs
124 2ba17362 Iustin Pop
           putStrLn sol_line
125 2ba17362 Iustin Pop
           hFlush stdout
126 2ba17362 Iustin Pop
           iterateDepth fin_tbl max_rounds disk_moves inst_moves
127 2ba17362 Iustin Pop
                        nmlen imlen upd_cmd_strs min_score
128 2ba17362 Iustin Pop
                        mg_limit min_gain evac_mode
129 2ba17362 Iustin Pop
       Nothing -> return (ini_tbl, cmd_strs)
130 6dc960bc Iustin Pop
131 5dad2589 Iustin Pop
-- | Displays the cluster stats.
132 5dad2589 Iustin Pop
printStats :: Node.List -> Node.List -> IO ()
133 5dad2589 Iustin Pop
printStats ini_nl fin_nl = do
134 5dad2589 Iustin Pop
  let ini_cs = Cluster.totalResources ini_nl
135 5dad2589 Iustin Pop
      fin_cs = Cluster.totalResources fin_nl
136 5dad2589 Iustin Pop
  printf "Original: mem=%d disk=%d\n"
137 5dad2589 Iustin Pop
             (Cluster.csFmem ini_cs) (Cluster.csFdsk ini_cs) :: IO ()
138 5dad2589 Iustin Pop
  printf "Final:    mem=%d disk=%d\n"
139 5dad2589 Iustin Pop
             (Cluster.csFmem fin_cs) (Cluster.csFdsk fin_cs)
140 5dad2589 Iustin Pop
141 5dad2589 Iustin Pop
-- | Saves the rebalance commands to a text file.
142 5dad2589 Iustin Pop
saveBalanceCommands :: Options -> String -> IO ()
143 5dad2589 Iustin Pop
saveBalanceCommands opts cmd_data = do
144 5dad2589 Iustin Pop
  let out_path = fromJust $ optShowCmds opts
145 5dad2589 Iustin Pop
  putStrLn ""
146 3603605a Iustin Pop
  if out_path == "-"
147 3603605a Iustin Pop
    then printf "Commands to run to reach the above solution:\n%s"
148 3603605a Iustin Pop
           (unlines . map ("  " ++) .
149 3603605a Iustin Pop
            filter (/= "  check") .
150 3603605a Iustin Pop
            lines $ cmd_data)
151 3603605a Iustin Pop
    else do
152 3603605a Iustin Pop
      writeFile out_path (shTemplate ++ cmd_data)
153 3603605a Iustin Pop
      printf "The commands have been written to file '%s'\n" out_path
154 5dad2589 Iustin Pop
155 b2245847 Iustin Pop
-- | Polls a set of jobs at a fixed interval until all are finished
156 179c0828 Iustin Pop
-- one way or another.
157 b2245847 Iustin Pop
waitForJobs :: L.Client -> [String] -> IO (Result [JobStatus])
158 b2245847 Iustin Pop
waitForJobs client jids = do
159 b2245847 Iustin Pop
  sts <- L.queryJobsStatus client jids
160 b2245847 Iustin Pop
  case sts of
161 b2245847 Iustin Pop
    Bad x -> return $ Bad x
162 7e98f782 Iustin Pop
    Ok s -> if any (<= JOB_STATUS_RUNNING) s
163 b2245847 Iustin Pop
            then do
164 b2245847 Iustin Pop
              -- TODO: replace hardcoded value with a better thing
165 b2245847 Iustin Pop
              threadDelay (1000000 * 15)
166 b2245847 Iustin Pop
              waitForJobs client jids
167 b2245847 Iustin Pop
            else return $ Ok s
168 b2245847 Iustin Pop
169 179c0828 Iustin Pop
-- | Check that a set of job statuses is all success.
170 b2245847 Iustin Pop
checkJobsStatus :: [JobStatus] -> Bool
171 7e98f782 Iustin Pop
checkJobsStatus = all (== JOB_STATUS_SUCCESS)
172 b2245847 Iustin Pop
173 179c0828 Iustin Pop
-- | Wrapper over execJobSet checking for early termination.
174 23448f82 Iustin Pop
execWrapper :: String -> Node.List
175 5dad2589 Iustin Pop
            -> Instance.List -> IORef Int -> [JobSet] -> IO Bool
176 23448f82 Iustin Pop
execWrapper _      _  _  _    [] = return True
177 23448f82 Iustin Pop
execWrapper master nl il cref alljss = do
178 23448f82 Iustin Pop
  cancel <- readIORef cref
179 3603605a Iustin Pop
  if cancel > 0
180 3603605a Iustin Pop
    then do
181 3603605a Iustin Pop
      hPrintf stderr "Exiting early due to user request, %d\
182 3603605a Iustin Pop
                     \ jobset(s) remaining." (length alljss)::IO ()
183 3603605a Iustin Pop
      return False
184 3603605a Iustin Pop
    else execJobSet master nl il cref alljss
185 23448f82 Iustin Pop
186 179c0828 Iustin Pop
-- | Execute an entire jobset.
187 3e4480e0 Iustin Pop
execJobSet :: String -> Node.List
188 23448f82 Iustin Pop
           -> Instance.List -> IORef Int -> [JobSet] -> IO Bool
189 23448f82 Iustin Pop
execJobSet _      _  _  _    [] = return True
190 23448f82 Iustin Pop
execJobSet master nl il cref (js:jss) = do
191 b2245847 Iustin Pop
  -- map from jobset (htools list of positions) to [[opcodes]]
192 b2245847 Iustin Pop
  let jobs = map (\(_, idx, move, _) ->
193 3e4480e0 Iustin Pop
                      Cluster.iMoveToJob nl il idx move) js
194 b2245847 Iustin Pop
  let descr = map (\(_, idx, _, _) -> Container.nameOf il idx) js
195 b2245847 Iustin Pop
  putStrLn $ "Executing jobset for instances " ++ commaJoin descr
196 b2245847 Iustin Pop
  jrs <- bracket (L.getClient master) L.closeClient
197 b2245847 Iustin Pop
         (\client -> do
198 683b1ca7 Iustin Pop
            jids <- L.submitManyJobs client jobs
199 b2245847 Iustin Pop
            case jids of
200 b2245847 Iustin Pop
              Bad x -> return $ Bad x
201 b2245847 Iustin Pop
              Ok x -> do
202 b2245847 Iustin Pop
                putStrLn $ "Got job IDs " ++ commaJoin x
203 b2245847 Iustin Pop
                waitForJobs client x
204 b2245847 Iustin Pop
         )
205 3603605a Iustin Pop
  case jrs of
206 3603605a Iustin Pop
    Bad x -> do
207 3603605a Iustin Pop
      hPutStrLn stderr $ "Cannot compute job status, aborting: " ++ show x
208 3603605a Iustin Pop
      return False
209 3603605a Iustin Pop
    Ok x -> if checkJobsStatus x
210 3603605a Iustin Pop
              then execWrapper master nl il cref jss
211 3603605a Iustin Pop
              else do
212 3603605a Iustin Pop
                hPutStrLn stderr $ "Not all jobs completed successfully: " ++
213 3603605a Iustin Pop
                          show x
214 3603605a Iustin Pop
                hPutStrLn stderr "Aborting."
215 3603605a Iustin Pop
                return False
216 b2245847 Iustin Pop
217 5dad2589 Iustin Pop
-- | Executes the jobs, if possible and desired.
218 5dad2589 Iustin Pop
maybeExecJobs :: Options
219 5dad2589 Iustin Pop
              -> [a]
220 5dad2589 Iustin Pop
              -> Node.List
221 5dad2589 Iustin Pop
              -> Instance.List
222 5dad2589 Iustin Pop
              -> [JobSet]
223 5dad2589 Iustin Pop
              -> IO Bool
224 5dad2589 Iustin Pop
maybeExecJobs opts ord_plc fin_nl il cmd_jobs =
225 2ba17362 Iustin Pop
  if optExecJobs opts && not (null ord_plc)
226 5dad2589 Iustin Pop
    then (case optLuxi opts of
227 5dad2589 Iustin Pop
            Nothing -> do
228 5dad2589 Iustin Pop
              hPutStrLn stderr "Execution of commands possible only on LUXI"
229 5dad2589 Iustin Pop
              return False
230 5dad2589 Iustin Pop
            Just master -> runJobSet master fin_nl il cmd_jobs)
231 5dad2589 Iustin Pop
    else return True
232 5dad2589 Iustin Pop
233 179c0828 Iustin Pop
-- | Signal handler for graceful termination.
234 543e859d Iustin Pop
hangleSigInt :: IORef Int -> IO ()
235 543e859d Iustin Pop
hangleSigInt cref = do
236 543e859d Iustin Pop
  writeIORef cref 1
237 543e859d Iustin Pop
  putStrLn ("Cancel request registered, will exit at" ++
238 543e859d Iustin Pop
            " the end of the current job set...")
239 543e859d Iustin Pop
240 179c0828 Iustin Pop
-- | Signal handler for immediate termination.
241 543e859d Iustin Pop
hangleSigTerm :: IORef Int -> IO ()
242 543e859d Iustin Pop
hangleSigTerm cref = do
243 543e859d Iustin Pop
  -- update the cref to 2, just for consistency
244 543e859d Iustin Pop
  writeIORef cref 2
245 543e859d Iustin Pop
  putStrLn "Double cancel request, exiting now..."
246 543e859d Iustin Pop
  exitImmediately $ ExitFailure 2
247 03cb89f0 Iustin Pop
248 179c0828 Iustin Pop
-- | Runs a job set with handling of signals.
249 23448f82 Iustin Pop
runJobSet :: String -> Node.List -> Instance.List -> [JobSet] -> IO Bool
250 03cb89f0 Iustin Pop
runJobSet master fin_nl il cmd_jobs = do
251 03cb89f0 Iustin Pop
  cref <- newIORef 0
252 543e859d Iustin Pop
  mapM_ (\(hnd, sig) -> installHandler sig (Catch (hnd cref)) Nothing)
253 543e859d Iustin Pop
    [(hangleSigTerm, softwareTermination), (hangleSigInt, keyboardSignal)]
254 23448f82 Iustin Pop
  execWrapper master fin_nl il cref cmd_jobs
255 03cb89f0 Iustin Pop
256 5dad2589 Iustin Pop
-- | Select the target node group.
257 5dad2589 Iustin Pop
selectGroup :: Options -> Group.List -> Node.List -> Instance.List
258 5dad2589 Iustin Pop
            -> IO (String, (Node.List, Instance.List))
259 5dad2589 Iustin Pop
selectGroup opts gl nlf ilf = do
260 646aa028 Iustin Pop
  let ngroups = Cluster.splitCluster nlf ilf
261 646aa028 Iustin Pop
  when (length ngroups > 1 && isNothing (optGroup opts)) $ do
262 646aa028 Iustin Pop
    hPutStrLn stderr "Found multiple node groups:"
263 e0c85e08 Iustin Pop
    mapM_ (hPutStrLn stderr . ("  " ++) . Group.name .
264 d5072e4c Iustin Pop
           flip Container.find gl . fst) ngroups
265 646aa028 Iustin Pop
    hPutStrLn stderr "Aborting."
266 646aa028 Iustin Pop
    exitWith $ ExitFailure 1
267 646aa028 Iustin Pop
268 5dad2589 Iustin Pop
  case optGroup opts of
269 10ef6b4e Iustin Pop
    Nothing -> do
270 2ba17362 Iustin Pop
      let (gidx, cdata) = head ngroups
271 2ba17362 Iustin Pop
          grp = Container.find gidx gl
272 2ba17362 Iustin Pop
      return (Group.name grp, cdata)
273 10ef6b4e Iustin Pop
    Just g -> case Container.findByName gl g of
274 646aa028 Iustin Pop
      Nothing -> do
275 646aa028 Iustin Pop
        hPutStrLn stderr $ "Node group " ++ g ++
276 646aa028 Iustin Pop
          " not found. Node group list is:"
277 10ef6b4e Iustin Pop
        mapM_ (hPutStrLn stderr . ("  " ++) . Group.name ) (Container.elems gl)
278 646aa028 Iustin Pop
        hPutStrLn stderr "Aborting."
279 646aa028 Iustin Pop
        exitWith $ ExitFailure 1
280 10ef6b4e Iustin Pop
      Just grp ->
281 10ef6b4e Iustin Pop
          case lookup (Group.idx grp) ngroups of
282 3603605a Iustin Pop
            Nothing ->
283 2072221f Iustin Pop
              -- This will only happen if there are no nodes assigned
284 2072221f Iustin Pop
              -- to this group
285 2072221f Iustin Pop
              return (Group.name grp, (Container.empty, Container.empty))
286 10ef6b4e Iustin Pop
            Just cdata -> return (Group.name grp, cdata)
287 646aa028 Iustin Pop
288 5dad2589 Iustin Pop
-- | Do a few checks on the cluster data.
289 e19ee6e4 Iustin Pop
checkCluster :: Int -> Node.List -> Instance.List -> IO ()
290 e19ee6e4 Iustin Pop
checkCluster verbose nl il = do
291 5dad2589 Iustin Pop
  -- nothing to do on an empty cluster
292 5dad2589 Iustin Pop
  when (Container.null il) $ do
293 e19ee6e4 Iustin Pop
         printf "Cluster is empty, exiting.\n"::IO ()
294 5dad2589 Iustin Pop
         exitWith ExitSuccess
295 5dad2589 Iustin Pop
296 5dad2589 Iustin Pop
  -- hbal doesn't currently handle split clusters
297 5dad2589 Iustin Pop
  let split_insts = Cluster.findSplitInstances nl il
298 5dad2589 Iustin Pop
  unless (null split_insts) $ do
299 5dad2589 Iustin Pop
    hPutStrLn stderr "Found instances belonging to multiple node groups:"
300 5dad2589 Iustin Pop
    mapM_ (\i -> hPutStrLn stderr $ "  " ++ Instance.name i) split_insts
301 5dad2589 Iustin Pop
    hPutStrLn stderr "Aborting."
302 5dad2589 Iustin Pop
    exitWith $ ExitFailure 1
303 5dad2589 Iustin Pop
304 e19ee6e4 Iustin Pop
  printf "Loaded %d nodes, %d instances\n"
305 5dad2589 Iustin Pop
             (Container.size nl)
306 e19ee6e4 Iustin Pop
             (Container.size il)::IO ()
307 5dad2589 Iustin Pop
308 5dad2589 Iustin Pop
  let csf = commonSuffix nl il
309 e19ee6e4 Iustin Pop
  when (not (null csf) && verbose > 1) $
310 5dad2589 Iustin Pop
       printf "Note: Stripping common suffix of '%s' from names\n" csf
311 5dad2589 Iustin Pop
312 5dad2589 Iustin Pop
-- | Do a few checks on the selected group data.
313 e19ee6e4 Iustin Pop
checkGroup :: Int -> String -> Node.List -> Instance.List -> IO ()
314 e19ee6e4 Iustin Pop
checkGroup verbose gname nl il = do
315 e19ee6e4 Iustin Pop
  printf "Group size %d nodes, %d instances\n"
316 e4f08c46 Iustin Pop
             (Container.size nl)
317 e19ee6e4 Iustin Pop
             (Container.size il)::IO ()
318 a0529a64 Iustin Pop
319 10ef6b4e Iustin Pop
  putStrLn $ "Selected node group: " ++ gname
320 646aa028 Iustin Pop
321 e4f08c46 Iustin Pop
  let (bad_nodes, bad_instances) = Cluster.computeBadItems nl il
322 e19ee6e4 Iustin Pop
  unless (verbose == 0) $ printf
323 27f96567 Iustin Pop
             "Initial check done: %d bad nodes, %d bad instances.\n"
324 e4f08c46 Iustin Pop
             (length bad_nodes) (length bad_instances)
325 e4f08c46 Iustin Pop
326 9f6dcdea Iustin Pop
  when (length bad_nodes > 0) $
327 289c3835 Iustin Pop
         putStrLn "Cluster is not N+1 happy, continuing but no guarantee \
328 289c3835 Iustin Pop
                  \that the cluster will end N+1 happy."
329 e4f08c46 Iustin Pop
330 5dad2589 Iustin Pop
-- | Check that we actually need to rebalance.
331 5dad2589 Iustin Pop
checkNeedRebalance :: Options -> Score -> IO ()
332 5dad2589 Iustin Pop
checkNeedRebalance opts ini_cv = do
333 5dad2589 Iustin Pop
  let min_cv = optMinScore opts
334 5dad2589 Iustin Pop
  when (ini_cv < min_cv) $ do
335 e19ee6e4 Iustin Pop
         printf "Cluster is already well balanced (initial score %.6g,\n\
336 e19ee6e4 Iustin Pop
                \minimum score %.6g).\nNothing to do, exiting\n"
337 e19ee6e4 Iustin Pop
                ini_cv min_cv:: IO ()
338 5dad2589 Iustin Pop
         exitWith ExitSuccess
339 5dad2589 Iustin Pop
340 5dad2589 Iustin Pop
-- | Main function.
341 5dad2589 Iustin Pop
main :: IO ()
342 5dad2589 Iustin Pop
main = do
343 7345b69b Iustin Pop
  cmd_args <- getArgs
344 5dad2589 Iustin Pop
  (opts, args) <- parseOpts cmd_args "hbal" options
345 5dad2589 Iustin Pop
346 5dad2589 Iustin Pop
  unless (null args) $ do
347 5dad2589 Iustin Pop
         hPutStrLn stderr "Error: this program doesn't take any arguments."
348 5dad2589 Iustin Pop
         exitWith $ ExitFailure 1
349 5dad2589 Iustin Pop
350 e19ee6e4 Iustin Pop
  let verbose = optVerbose opts
351 5dad2589 Iustin Pop
      shownodes = optShowNodes opts
352 5dad2589 Iustin Pop
      showinsts = optShowInsts opts
353 5dad2589 Iustin Pop
354 e77bc89b Iustin Pop
  ini_cdata@(ClusterData gl fixed_nl ilf ctags ipol) <- loadExternalData opts
355 5dad2589 Iustin Pop
356 e77bc89b Iustin Pop
  when (verbose > 1) $ do
357 5dad2589 Iustin Pop
       putStrLn $ "Loaded cluster tags: " ++ intercalate "," ctags
358 e77bc89b Iustin Pop
       putStrLn $ "Loaded cluster ipolicy: " ++ show ipol
359 5dad2589 Iustin Pop
360 5296ee23 Iustin Pop
  nlf <- setNodeStatus opts fixed_nl
361 e19ee6e4 Iustin Pop
  checkCluster verbose nlf ilf
362 5dad2589 Iustin Pop
363 5dad2589 Iustin Pop
  maybeSaveData (optSaveCluster opts) "original" "before balancing" ini_cdata
364 5dad2589 Iustin Pop
365 5dad2589 Iustin Pop
  (gname, (nl, il)) <- selectGroup opts gl nlf ilf
366 5dad2589 Iustin Pop
367 e19ee6e4 Iustin Pop
  checkGroup verbose gname nl il
368 5dad2589 Iustin Pop
369 33e44f0c Iustin Pop
  maybePrintInsts showinsts "Initial" (Cluster.printInsts nl il)
370 507fda3f Iustin Pop
371 417f6b50 Iustin Pop
  maybePrintNodes shownodes "Initial cluster" (Cluster.printNodes nl)
372 e4f08c46 Iustin Pop
373 e4f08c46 Iustin Pop
  let ini_cv = Cluster.compCV nl
374 e4f08c46 Iustin Pop
      ini_tbl = Cluster.Table nl il ini_cv []
375 b0517d61 Iustin Pop
      min_cv = optMinScore opts
376 b0517d61 Iustin Pop
377 5dad2589 Iustin Pop
  checkNeedRebalance opts ini_cv
378 b0517d61 Iustin Pop
379 3603605a Iustin Pop
  if verbose > 2
380 3603605a Iustin Pop
    then printf "Initial coefficients: overall %.8f, %s\n"
381 3603605a Iustin Pop
           ini_cv (Cluster.printStats nl)::IO ()
382 3603605a Iustin Pop
    else printf "Initial score: %.8f\n" ini_cv
383 e4f08c46 Iustin Pop
384 e19ee6e4 Iustin Pop
  putStrLn "Trying to minimize the CV..."
385 14c972c7 Iustin Pop
  let imlen = maximum . map (length . Instance.alias) $ Container.elems il
386 14c972c7 Iustin Pop
      nmlen = maximum . map (length . Node.alias) $ Container.elems nl
387 7dfaafb1 Iustin Pop
388 7dfaafb1 Iustin Pop
  (fin_tbl, cmd_strs) <- iterateDepth ini_tbl (optMaxLength opts)
389 c0501c69 Iustin Pop
                         (optDiskMoves opts)
390 e7f7c003 Guido Trotter
                         (optInstMoves opts)
391 e19ee6e4 Iustin Pop
                         nmlen imlen [] min_cv
392 848b65c9 Iustin Pop
                         (optMinGainLim opts) (optMinGain opts)
393 848b65c9 Iustin Pop
                         (optEvacMode opts)
394 507fda3f Iustin Pop
  let (Cluster.Table fin_nl fin_il fin_cv fin_plc) = fin_tbl
395 e4f08c46 Iustin Pop
      ord_plc = reverse fin_plc
396 cc532bdd Iustin Pop
      sol_msg = case () of
397 cc532bdd Iustin Pop
                  _ | null fin_plc -> printf "No solution found\n"
398 cc532bdd Iustin Pop
                    | verbose > 2 ->
399 cc532bdd Iustin Pop
                        printf "Final coefficients:   overall %.8f, %s\n"
400 cc532bdd Iustin Pop
                        fin_cv (Cluster.printStats fin_nl)
401 cc532bdd Iustin Pop
                    | otherwise ->
402 cc532bdd Iustin Pop
                        printf "Cluster score improved from %.8f to %.8f\n"
403 cc532bdd Iustin Pop
                        ini_cv fin_cv ::String
404 e4f08c46 Iustin Pop
405 e19ee6e4 Iustin Pop
  putStr sol_msg
406 7eff5b09 Iustin Pop
407 e19ee6e4 Iustin Pop
  unless (verbose == 0) $
408 7eff5b09 Iustin Pop
         printf "Solution length=%d\n" (length ord_plc)
409 e4f08c46 Iustin Pop
410 b2245847 Iustin Pop
  let cmd_jobs = Cluster.splitJobs cmd_strs
411 e0eb63f0 Iustin Pop
412 e0eb63f0 Iustin Pop
  when (isJust $ optShowCmds opts) $
413 5dad2589 Iustin Pop
       saveBalanceCommands opts $ Cluster.formatCmds cmd_jobs
414 e0eb63f0 Iustin Pop
415 4188449c Iustin Pop
  maybeSaveData (optSaveCluster opts) "balanced" "after balancing"
416 71375ef7 Iustin Pop
                ini_cdata { cdNodes = fin_nl, cdInstances = fin_il }
417 b2245847 Iustin Pop
418 33e44f0c Iustin Pop
  maybePrintInsts showinsts "Final" (Cluster.printInsts fin_nl fin_il)
419 507fda3f Iustin Pop
420 417f6b50 Iustin Pop
  maybePrintNodes shownodes "Final cluster" (Cluster.printNodes fin_nl)
421 417f6b50 Iustin Pop
422 5dad2589 Iustin Pop
  when (verbose > 3) $ printStats nl fin_nl
423 5dad2589 Iustin Pop
424 5dad2589 Iustin Pop
  eval <- maybeExecJobs opts ord_plc fin_nl il cmd_jobs
425 d5072e4c Iustin Pop
  unless eval (exitWith (ExitFailure 1))