Statistics
| Branch: | Tag: | Revision:

root / htools / Ganeti / HTools / Program / Hbal.hs @ e19ee6e4

History | View | Annotate | Download (16 kB)

1 525bfb36 Iustin Pop
{-| Cluster rebalancer.
2 e4f08c46 Iustin Pop
3 e4f08c46 Iustin Pop
-}
4 e4f08c46 Iustin Pop
5 e2fa2baf Iustin Pop
{-
6 e2fa2baf Iustin Pop
7 d5072e4c Iustin Pop
Copyright (C) 2009, 2010, 2011 Google Inc.
8 e2fa2baf Iustin Pop
9 e2fa2baf Iustin Pop
This program is free software; you can redistribute it and/or modify
10 e2fa2baf Iustin Pop
it under the terms of the GNU General Public License as published by
11 e2fa2baf Iustin Pop
the Free Software Foundation; either version 2 of the License, or
12 e2fa2baf Iustin Pop
(at your option) any later version.
13 e2fa2baf Iustin Pop
14 e2fa2baf Iustin Pop
This program is distributed in the hope that it will be useful, but
15 e2fa2baf Iustin Pop
WITHOUT ANY WARRANTY; without even the implied warranty of
16 e2fa2baf Iustin Pop
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17 e2fa2baf Iustin Pop
General Public License for more details.
18 e2fa2baf Iustin Pop
19 e2fa2baf Iustin Pop
You should have received a copy of the GNU General Public License
20 e2fa2baf Iustin Pop
along with this program; if not, write to the Free Software
21 e2fa2baf Iustin Pop
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 e2fa2baf Iustin Pop
02110-1301, USA.
23 e2fa2baf Iustin Pop
24 e2fa2baf Iustin Pop
-}
25 e2fa2baf Iustin Pop
26 201b6c34 Iustin Pop
module Ganeti.HTools.Program.Hbal (main) where
27 e4f08c46 Iustin Pop
28 b2245847 Iustin Pop
import Control.Concurrent (threadDelay)
29 b2245847 Iustin Pop
import Control.Exception (bracket)
30 cc532bdd Iustin Pop
import Control.Monad
31 e4f08c46 Iustin Pop
import Data.List
32 646aa028 Iustin Pop
import Data.Maybe (isJust, isNothing, fromJust)
33 03cb89f0 Iustin Pop
import Data.IORef
34 0903280b Iustin Pop
import System (exitWith, ExitCode(..))
35 e4f08c46 Iustin Pop
import System.IO
36 03cb89f0 Iustin Pop
import System.Posix.Process
37 03cb89f0 Iustin Pop
import System.Posix.Signals
38 e4f08c46 Iustin Pop
import qualified System
39 e4f08c46 Iustin Pop
40 2795466b Iustin Pop
import Text.Printf (printf, hPrintf)
41 e4f08c46 Iustin Pop
42 669d7e3d Iustin Pop
import qualified Ganeti.HTools.Container as Container
43 669d7e3d Iustin Pop
import qualified Ganeti.HTools.Cluster as Cluster
44 10ef6b4e Iustin Pop
import qualified Ganeti.HTools.Group as Group
45 ec18dca9 Iustin Pop
import qualified Ganeti.HTools.Node as Node
46 b2245847 Iustin Pop
import qualified Ganeti.HTools.Instance as Instance
47 040afc35 Iustin Pop
48 0427285d Iustin Pop
import Ganeti.HTools.CLI
49 e8f89bb6 Iustin Pop
import Ganeti.HTools.ExtLoader
50 669d7e3d Iustin Pop
import Ganeti.HTools.Utils
51 0e8ae201 Iustin Pop
import Ganeti.HTools.Types
52 4938fa30 Guido Trotter
import Ganeti.HTools.Loader
53 e4f08c46 Iustin Pop
54 b2245847 Iustin Pop
import qualified Ganeti.Luxi as L
55 b2245847 Iustin Pop
import Ganeti.Jobs
56 b2245847 Iustin Pop
57 179c0828 Iustin Pop
-- | Options list and functions.
58 0427285d Iustin Pop
options :: [OptType]
59 e4f08c46 Iustin Pop
options =
60 0427285d Iustin Pop
    [ oPrintNodes
61 507fda3f Iustin Pop
    , oPrintInsts
62 0427285d Iustin Pop
    , oPrintCommands
63 16c2369c Iustin Pop
    , oDataFile
64 2e28ac32 Iustin Pop
    , oEvacMode
65 0427285d Iustin Pop
    , oRapiMaster
66 0427285d Iustin Pop
    , oLuxiSocket
67 b2245847 Iustin Pop
    , oExecJobs
68 646aa028 Iustin Pop
    , oGroup
69 0427285d Iustin Pop
    , oMaxSolLength
70 0427285d Iustin Pop
    , oVerbose
71 0427285d Iustin Pop
    , oQuiet
72 0427285d Iustin Pop
    , oOfflineNode
73 0427285d Iustin Pop
    , oMinScore
74 0427285d Iustin Pop
    , oMaxCpu
75 0427285d Iustin Pop
    , oMinDisk
76 848b65c9 Iustin Pop
    , oMinGain
77 848b65c9 Iustin Pop
    , oMinGainLim
78 c0501c69 Iustin Pop
    , oDiskMoves
79 ddef0585 Guido Trotter
    , oSelInst
80 e7f7c003 Guido Trotter
    , oInstMoves
81 aa8d2e71 Iustin Pop
    , oDynuFile
82 0f15cc76 Iustin Pop
    , oExTags
83 fcbf0da4 Iustin Pop
    , oExInst
84 748654f7 Iustin Pop
    , oSaveCluster
85 0427285d Iustin Pop
    , oShowVer
86 0427285d Iustin Pop
    , oShowHelp
87 7ef4d93e Iustin Pop
    ]
88 e4f08c46 Iustin Pop
89 6dc960bc Iustin Pop
{- | Start computing the solution at the given depth and recurse until
90 6dc960bc Iustin Pop
we find a valid solution or we exceed the maximum depth.
91 6dc960bc Iustin Pop
92 6dc960bc Iustin Pop
-}
93 6dc960bc Iustin Pop
iterateDepth :: Cluster.Table    -- ^ The starting table
94 6dc960bc Iustin Pop
             -> Int              -- ^ Remaining length
95 c0501c69 Iustin Pop
             -> Bool             -- ^ Allow disk moves
96 e7f7c003 Guido Trotter
             -> Bool             -- ^ Allow instance moves
97 6dc960bc Iustin Pop
             -> Int              -- ^ Max node name len
98 6dc960bc Iustin Pop
             -> Int              -- ^ Max instance name len
99 0e8ae201 Iustin Pop
             -> [MoveJob]        -- ^ Current command list
100 92e32d76 Iustin Pop
             -> Score            -- ^ Score at which to stop
101 848b65c9 Iustin Pop
             -> Score            -- ^ Min gain limit
102 848b65c9 Iustin Pop
             -> Score            -- ^ Min score gain
103 2e28ac32 Iustin Pop
             -> Bool             -- ^ Enable evacuation mode
104 0e8ae201 Iustin Pop
             -> IO (Cluster.Table, [MoveJob]) -- ^ The resulting table
105 0e8ae201 Iustin Pop
                                              -- and commands
106 e7f7c003 Guido Trotter
iterateDepth ini_tbl max_rounds disk_moves inst_moves nmlen imlen
107 e19ee6e4 Iustin Pop
             cmd_strs min_score mg_limit min_gain evac_mode =
108 f25e5aac Iustin Pop
    let Cluster.Table ini_nl ini_il _ _ = ini_tbl
109 5ad86777 Iustin Pop
        allowed_next = Cluster.doNextBalance ini_tbl max_rounds min_score
110 5ad86777 Iustin Pop
        m_fin_tbl = if allowed_next
111 e08424a8 Guido Trotter
                    then Cluster.tryBalance ini_tbl disk_moves inst_moves
112 e08424a8 Guido Trotter
                         evac_mode mg_limit min_gain
113 5ad86777 Iustin Pop
                    else Nothing
114 6dc960bc Iustin Pop
    in
115 f25e5aac Iustin Pop
      case m_fin_tbl of
116 f25e5aac Iustin Pop
        Just fin_tbl ->
117 f25e5aac Iustin Pop
            do
118 f25e5aac Iustin Pop
              let
119 f25e5aac Iustin Pop
                  (Cluster.Table _ _ _ fin_plc) = fin_tbl
120 f25e5aac Iustin Pop
                  fin_plc_len = length fin_plc
121 924f9c16 Iustin Pop
                  cur_plc@(idx, _, _, move, _) = head fin_plc
122 f25e5aac Iustin Pop
                  (sol_line, cmds) = Cluster.printSolutionLine ini_nl ini_il
123 0e8ae201 Iustin Pop
                                     nmlen imlen cur_plc fin_plc_len
124 0e8ae201 Iustin Pop
                  afn = Cluster.involvedNodes ini_il cur_plc
125 924f9c16 Iustin Pop
                  upd_cmd_strs = (afn, idx, move, cmds):cmd_strs
126 e19ee6e4 Iustin Pop
              putStrLn sol_line
127 e19ee6e4 Iustin Pop
              hFlush stdout
128 e7f7c003 Guido Trotter
              iterateDepth fin_tbl max_rounds disk_moves inst_moves
129 e19ee6e4 Iustin Pop
                           nmlen imlen upd_cmd_strs min_score
130 848b65c9 Iustin Pop
                           mg_limit min_gain evac_mode
131 f25e5aac Iustin Pop
        Nothing -> return (ini_tbl, cmd_strs)
132 6dc960bc Iustin Pop
133 5dad2589 Iustin Pop
-- | Displays the cluster stats.
134 5dad2589 Iustin Pop
printStats :: Node.List -> Node.List -> IO ()
135 5dad2589 Iustin Pop
printStats ini_nl fin_nl = do
136 5dad2589 Iustin Pop
  let ini_cs = Cluster.totalResources ini_nl
137 5dad2589 Iustin Pop
      fin_cs = Cluster.totalResources fin_nl
138 5dad2589 Iustin Pop
  printf "Original: mem=%d disk=%d\n"
139 5dad2589 Iustin Pop
             (Cluster.csFmem ini_cs) (Cluster.csFdsk ini_cs) :: IO ()
140 5dad2589 Iustin Pop
  printf "Final:    mem=%d disk=%d\n"
141 5dad2589 Iustin Pop
             (Cluster.csFmem fin_cs) (Cluster.csFdsk fin_cs)
142 5dad2589 Iustin Pop
143 5dad2589 Iustin Pop
-- | Saves the rebalance commands to a text file.
144 5dad2589 Iustin Pop
saveBalanceCommands :: Options -> String -> IO ()
145 5dad2589 Iustin Pop
saveBalanceCommands opts cmd_data = do
146 5dad2589 Iustin Pop
  let out_path = fromJust $ optShowCmds opts
147 5dad2589 Iustin Pop
  putStrLn ""
148 5dad2589 Iustin Pop
  (if out_path == "-" then
149 5dad2589 Iustin Pop
       printf "Commands to run to reach the above solution:\n%s"
150 5dad2589 Iustin Pop
                  (unlines . map ("  " ++) .
151 5dad2589 Iustin Pop
                   filter (/= "  check") .
152 5dad2589 Iustin Pop
                   lines $ cmd_data)
153 5dad2589 Iustin Pop
   else do
154 5dad2589 Iustin Pop
     writeFile out_path (shTemplate ++ cmd_data)
155 5dad2589 Iustin Pop
     printf "The commands have been written to file '%s'\n" out_path)
156 5dad2589 Iustin Pop
157 b2245847 Iustin Pop
-- | Polls a set of jobs at a fixed interval until all are finished
158 179c0828 Iustin Pop
-- one way or another.
159 b2245847 Iustin Pop
waitForJobs :: L.Client -> [String] -> IO (Result [JobStatus])
160 b2245847 Iustin Pop
waitForJobs client jids = do
161 b2245847 Iustin Pop
  sts <- L.queryJobsStatus client jids
162 b2245847 Iustin Pop
  case sts of
163 b2245847 Iustin Pop
    Bad x -> return $ Bad x
164 7e98f782 Iustin Pop
    Ok s -> if any (<= JOB_STATUS_RUNNING) s
165 b2245847 Iustin Pop
            then do
166 b2245847 Iustin Pop
              -- TODO: replace hardcoded value with a better thing
167 b2245847 Iustin Pop
              threadDelay (1000000 * 15)
168 b2245847 Iustin Pop
              waitForJobs client jids
169 b2245847 Iustin Pop
            else return $ Ok s
170 b2245847 Iustin Pop
171 179c0828 Iustin Pop
-- | Check that a set of job statuses is all success.
172 b2245847 Iustin Pop
checkJobsStatus :: [JobStatus] -> Bool
173 7e98f782 Iustin Pop
checkJobsStatus = all (== JOB_STATUS_SUCCESS)
174 b2245847 Iustin Pop
175 179c0828 Iustin Pop
-- | Wrapper over execJobSet checking for early termination.
176 23448f82 Iustin Pop
execWrapper :: String -> Node.List
177 5dad2589 Iustin Pop
            -> Instance.List -> IORef Int -> [JobSet] -> IO Bool
178 23448f82 Iustin Pop
execWrapper _      _  _  _    [] = return True
179 23448f82 Iustin Pop
execWrapper master nl il cref alljss = do
180 23448f82 Iustin Pop
  cancel <- readIORef cref
181 23448f82 Iustin Pop
  (if cancel > 0
182 23448f82 Iustin Pop
   then do
183 23448f82 Iustin Pop
     hPrintf stderr "Exiting early due to user request, %d\
184 23448f82 Iustin Pop
                    \ jobset(s) remaining." (length alljss)::IO ()
185 23448f82 Iustin Pop
     return False
186 23448f82 Iustin Pop
   else execJobSet master nl il cref alljss)
187 23448f82 Iustin Pop
188 179c0828 Iustin Pop
-- | Execute an entire jobset.
189 3e4480e0 Iustin Pop
execJobSet :: String -> Node.List
190 23448f82 Iustin Pop
           -> Instance.List -> IORef Int -> [JobSet] -> IO Bool
191 23448f82 Iustin Pop
execJobSet _      _  _  _    [] = return True
192 23448f82 Iustin Pop
execJobSet master nl il cref (js:jss) = do
193 b2245847 Iustin Pop
  -- map from jobset (htools list of positions) to [[opcodes]]
194 b2245847 Iustin Pop
  let jobs = map (\(_, idx, move, _) ->
195 3e4480e0 Iustin Pop
                      Cluster.iMoveToJob nl il idx move) js
196 b2245847 Iustin Pop
  let descr = map (\(_, idx, _, _) -> Container.nameOf il idx) js
197 b2245847 Iustin Pop
  putStrLn $ "Executing jobset for instances " ++ commaJoin descr
198 b2245847 Iustin Pop
  jrs <- bracket (L.getClient master) L.closeClient
199 b2245847 Iustin Pop
         (\client -> do
200 683b1ca7 Iustin Pop
            jids <- L.submitManyJobs client jobs
201 b2245847 Iustin Pop
            case jids of
202 b2245847 Iustin Pop
              Bad x -> return $ Bad x
203 b2245847 Iustin Pop
              Ok x -> do
204 b2245847 Iustin Pop
                putStrLn $ "Got job IDs " ++ commaJoin x
205 b2245847 Iustin Pop
                waitForJobs client x
206 b2245847 Iustin Pop
         )
207 b2245847 Iustin Pop
  (case jrs of
208 b2245847 Iustin Pop
     Bad x -> do
209 b2245847 Iustin Pop
       hPutStrLn stderr $ "Cannot compute job status, aborting: " ++ show x
210 23448f82 Iustin Pop
       return False
211 b2245847 Iustin Pop
     Ok x -> if checkJobsStatus x
212 23448f82 Iustin Pop
             then execWrapper master nl il cref jss
213 b2245847 Iustin Pop
             else do
214 b2245847 Iustin Pop
               hPutStrLn stderr $ "Not all jobs completed successfully: " ++
215 b2245847 Iustin Pop
                         show x
216 23448f82 Iustin Pop
               hPutStrLn stderr "Aborting."
217 23448f82 Iustin Pop
               return False)
218 b2245847 Iustin Pop
219 5dad2589 Iustin Pop
-- | Executes the jobs, if possible and desired.
220 5dad2589 Iustin Pop
maybeExecJobs :: Options
221 5dad2589 Iustin Pop
              -> [a]
222 5dad2589 Iustin Pop
              -> Node.List
223 5dad2589 Iustin Pop
              -> Instance.List
224 5dad2589 Iustin Pop
              -> [JobSet]
225 5dad2589 Iustin Pop
              -> IO Bool
226 5dad2589 Iustin Pop
maybeExecJobs opts ord_plc fin_nl il cmd_jobs =
227 5dad2589 Iustin Pop
    if optExecJobs opts && not (null ord_plc)
228 5dad2589 Iustin Pop
    then (case optLuxi opts of
229 5dad2589 Iustin Pop
            Nothing -> do
230 5dad2589 Iustin Pop
              hPutStrLn stderr "Execution of commands possible only on LUXI"
231 5dad2589 Iustin Pop
              return False
232 5dad2589 Iustin Pop
            Just master -> runJobSet master fin_nl il cmd_jobs)
233 5dad2589 Iustin Pop
    else return True
234 5dad2589 Iustin Pop
235 179c0828 Iustin Pop
-- | Signal handler for graceful termination.
236 543e859d Iustin Pop
hangleSigInt :: IORef Int -> IO ()
237 543e859d Iustin Pop
hangleSigInt cref = do
238 543e859d Iustin Pop
  writeIORef cref 1
239 543e859d Iustin Pop
  putStrLn ("Cancel request registered, will exit at" ++
240 543e859d Iustin Pop
            " the end of the current job set...")
241 543e859d Iustin Pop
242 179c0828 Iustin Pop
-- | Signal handler for immediate termination.
243 543e859d Iustin Pop
hangleSigTerm :: IORef Int -> IO ()
244 543e859d Iustin Pop
hangleSigTerm cref = do
245 543e859d Iustin Pop
  -- update the cref to 2, just for consistency
246 543e859d Iustin Pop
  writeIORef cref 2
247 543e859d Iustin Pop
  putStrLn "Double cancel request, exiting now..."
248 543e859d Iustin Pop
  exitImmediately $ ExitFailure 2
249 03cb89f0 Iustin Pop
250 179c0828 Iustin Pop
-- | Runs a job set with handling of signals.
251 23448f82 Iustin Pop
runJobSet :: String -> Node.List -> Instance.List -> [JobSet] -> IO Bool
252 03cb89f0 Iustin Pop
runJobSet master fin_nl il cmd_jobs = do
253 03cb89f0 Iustin Pop
  cref <- newIORef 0
254 543e859d Iustin Pop
  mapM_ (\(hnd, sig) -> installHandler sig (Catch (hnd cref)) Nothing)
255 543e859d Iustin Pop
    [(hangleSigTerm, softwareTermination), (hangleSigInt, keyboardSignal)]
256 23448f82 Iustin Pop
  execWrapper master fin_nl il cref cmd_jobs
257 03cb89f0 Iustin Pop
258 5dad2589 Iustin Pop
-- | Set node properties based on command line options.
259 5dad2589 Iustin Pop
setNodesStatus :: Options -> Node.List -> IO Node.List
260 5dad2589 Iustin Pop
setNodesStatus opts fixed_nl = do
261 4938fa30 Guido Trotter
  let offline_passed = optOffline opts
262 db1bcfe8 Iustin Pop
      all_nodes = Container.elems fixed_nl
263 4938fa30 Guido Trotter
      offline_lkp = map (lookupName (map Node.name all_nodes)) offline_passed
264 4938fa30 Guido Trotter
      offline_wrong = filter (not . goodLookupResult) offline_lkp
265 4938fa30 Guido Trotter
      offline_names = map lrContent offline_lkp
266 db1bcfe8 Iustin Pop
      offline_indices = map Node.idx $
267 4938fa30 Guido Trotter
                        filter (\n -> Node.name n `elem` offline_names)
268 db1bcfe8 Iustin Pop
                               all_nodes
269 66d67ad4 Iustin Pop
      m_cpu = optMcpu opts
270 66d67ad4 Iustin Pop
      m_dsk = optMdsk opts
271 ec18dca9 Iustin Pop
272 4938fa30 Guido Trotter
  when (not (null offline_wrong)) $ do
273 4938fa30 Guido Trotter
         hPrintf stderr "Error: Wrong node name(s) set as offline: %s\n"
274 4938fa30 Guido Trotter
                     (commaJoin (map lrContent offline_wrong)) :: IO ()
275 3d7cd10b Iustin Pop
         exitWith $ ExitFailure 1
276 3d7cd10b Iustin Pop
277 5182e970 Iustin Pop
  let nm = Container.map (\n -> if Node.idx n `elem` offline_indices
278 ec18dca9 Iustin Pop
                                then Node.setOffline n True
279 a1c6212e Iustin Pop
                                else n) fixed_nl
280 646aa028 Iustin Pop
      nlf = Container.map (flip Node.setMdsk m_dsk . flip Node.setMcpu m_cpu)
281 646aa028 Iustin Pop
            nm
282 5dad2589 Iustin Pop
  return nlf
283 a30b2f5b Iustin Pop
284 5dad2589 Iustin Pop
-- | Select the target node group.
285 5dad2589 Iustin Pop
selectGroup :: Options -> Group.List -> Node.List -> Instance.List
286 5dad2589 Iustin Pop
            -> IO (String, (Node.List, Instance.List))
287 5dad2589 Iustin Pop
selectGroup opts gl nlf ilf = do
288 646aa028 Iustin Pop
  let ngroups = Cluster.splitCluster nlf ilf
289 646aa028 Iustin Pop
  when (length ngroups > 1 && isNothing (optGroup opts)) $ do
290 646aa028 Iustin Pop
    hPutStrLn stderr "Found multiple node groups:"
291 e0c85e08 Iustin Pop
    mapM_ (hPutStrLn stderr . ("  " ++) . Group.name .
292 d5072e4c Iustin Pop
           flip Container.find gl . fst) ngroups
293 646aa028 Iustin Pop
    hPutStrLn stderr "Aborting."
294 646aa028 Iustin Pop
    exitWith $ ExitFailure 1
295 646aa028 Iustin Pop
296 5dad2589 Iustin Pop
  case optGroup opts of
297 10ef6b4e Iustin Pop
    Nothing -> do
298 10ef6b4e Iustin Pop
         let (gidx, cdata) = head ngroups
299 10ef6b4e Iustin Pop
             grp = Container.find gidx gl
300 10ef6b4e Iustin Pop
         return (Group.name grp, cdata)
301 10ef6b4e Iustin Pop
    Just g -> case Container.findByName gl g of
302 646aa028 Iustin Pop
      Nothing -> do
303 646aa028 Iustin Pop
        hPutStrLn stderr $ "Node group " ++ g ++
304 646aa028 Iustin Pop
          " not found. Node group list is:"
305 10ef6b4e Iustin Pop
        mapM_ (hPutStrLn stderr . ("  " ++) . Group.name ) (Container.elems gl)
306 646aa028 Iustin Pop
        hPutStrLn stderr "Aborting."
307 646aa028 Iustin Pop
        exitWith $ ExitFailure 1
308 10ef6b4e Iustin Pop
      Just grp ->
309 10ef6b4e Iustin Pop
          case lookup (Group.idx grp) ngroups of
310 10ef6b4e Iustin Pop
            Nothing -> do
311 10ef6b4e Iustin Pop
              -- TODO: while this is unlikely to happen, log here the
312 10ef6b4e Iustin Pop
              -- actual group data to help debugging
313 d5072e4c Iustin Pop
              hPutStrLn stderr "Internal failure, missing group idx"
314 10ef6b4e Iustin Pop
              exitWith $ ExitFailure 1
315 10ef6b4e Iustin Pop
            Just cdata -> return (Group.name grp, cdata)
316 646aa028 Iustin Pop
317 5dad2589 Iustin Pop
-- | Do a few checks on the cluster data.
318 e19ee6e4 Iustin Pop
checkCluster :: Int -> Node.List -> Instance.List -> IO ()
319 e19ee6e4 Iustin Pop
checkCluster verbose nl il = do
320 5dad2589 Iustin Pop
  -- nothing to do on an empty cluster
321 5dad2589 Iustin Pop
  when (Container.null il) $ do
322 e19ee6e4 Iustin Pop
         printf "Cluster is empty, exiting.\n"::IO ()
323 5dad2589 Iustin Pop
         exitWith ExitSuccess
324 5dad2589 Iustin Pop
325 5dad2589 Iustin Pop
  -- hbal doesn't currently handle split clusters
326 5dad2589 Iustin Pop
  let split_insts = Cluster.findSplitInstances nl il
327 5dad2589 Iustin Pop
  unless (null split_insts) $ do
328 5dad2589 Iustin Pop
    hPutStrLn stderr "Found instances belonging to multiple node groups:"
329 5dad2589 Iustin Pop
    mapM_ (\i -> hPutStrLn stderr $ "  " ++ Instance.name i) split_insts
330 5dad2589 Iustin Pop
    hPutStrLn stderr "Aborting."
331 5dad2589 Iustin Pop
    exitWith $ ExitFailure 1
332 5dad2589 Iustin Pop
333 e19ee6e4 Iustin Pop
  printf "Loaded %d nodes, %d instances\n"
334 5dad2589 Iustin Pop
             (Container.size nl)
335 e19ee6e4 Iustin Pop
             (Container.size il)::IO ()
336 5dad2589 Iustin Pop
337 5dad2589 Iustin Pop
  let csf = commonSuffix nl il
338 e19ee6e4 Iustin Pop
  when (not (null csf) && verbose > 1) $
339 5dad2589 Iustin Pop
       printf "Note: Stripping common suffix of '%s' from names\n" csf
340 5dad2589 Iustin Pop
341 5dad2589 Iustin Pop
-- | Do a few checks on the selected group data.
342 e19ee6e4 Iustin Pop
checkGroup :: Int -> String -> Node.List -> Instance.List -> IO ()
343 e19ee6e4 Iustin Pop
checkGroup verbose gname nl il = do
344 e19ee6e4 Iustin Pop
  printf "Group size %d nodes, %d instances\n"
345 e4f08c46 Iustin Pop
             (Container.size nl)
346 e19ee6e4 Iustin Pop
             (Container.size il)::IO ()
347 a0529a64 Iustin Pop
348 10ef6b4e Iustin Pop
  putStrLn $ "Selected node group: " ++ gname
349 646aa028 Iustin Pop
350 e4f08c46 Iustin Pop
  let (bad_nodes, bad_instances) = Cluster.computeBadItems nl il
351 e19ee6e4 Iustin Pop
  unless (verbose == 0) $ printf
352 27f96567 Iustin Pop
             "Initial check done: %d bad nodes, %d bad instances.\n"
353 e4f08c46 Iustin Pop
             (length bad_nodes) (length bad_instances)
354 e4f08c46 Iustin Pop
355 9f6dcdea Iustin Pop
  when (length bad_nodes > 0) $
356 289c3835 Iustin Pop
         putStrLn "Cluster is not N+1 happy, continuing but no guarantee \
357 289c3835 Iustin Pop
                  \that the cluster will end N+1 happy."
358 e4f08c46 Iustin Pop
359 5dad2589 Iustin Pop
-- | Check that we actually need to rebalance.
360 5dad2589 Iustin Pop
checkNeedRebalance :: Options -> Score -> IO ()
361 5dad2589 Iustin Pop
checkNeedRebalance opts ini_cv = do
362 5dad2589 Iustin Pop
  let min_cv = optMinScore opts
363 5dad2589 Iustin Pop
  when (ini_cv < min_cv) $ do
364 e19ee6e4 Iustin Pop
         printf "Cluster is already well balanced (initial score %.6g,\n\
365 e19ee6e4 Iustin Pop
                \minimum score %.6g).\nNothing to do, exiting\n"
366 e19ee6e4 Iustin Pop
                ini_cv min_cv:: IO ()
367 5dad2589 Iustin Pop
         exitWith ExitSuccess
368 5dad2589 Iustin Pop
369 5dad2589 Iustin Pop
-- | Main function.
370 5dad2589 Iustin Pop
main :: IO ()
371 5dad2589 Iustin Pop
main = do
372 5dad2589 Iustin Pop
  cmd_args <- System.getArgs
373 5dad2589 Iustin Pop
  (opts, args) <- parseOpts cmd_args "hbal" options
374 5dad2589 Iustin Pop
375 5dad2589 Iustin Pop
  unless (null args) $ do
376 5dad2589 Iustin Pop
         hPutStrLn stderr "Error: this program doesn't take any arguments."
377 5dad2589 Iustin Pop
         exitWith $ ExitFailure 1
378 5dad2589 Iustin Pop
379 e19ee6e4 Iustin Pop
  let verbose = optVerbose opts
380 5dad2589 Iustin Pop
      shownodes = optShowNodes opts
381 5dad2589 Iustin Pop
      showinsts = optShowInsts opts
382 5dad2589 Iustin Pop
383 5dad2589 Iustin Pop
  ini_cdata@(ClusterData gl fixed_nl ilf ctags) <- loadExternalData opts
384 5dad2589 Iustin Pop
385 e19ee6e4 Iustin Pop
  when (verbose > 1) $
386 5dad2589 Iustin Pop
       putStrLn $ "Loaded cluster tags: " ++ intercalate "," ctags
387 5dad2589 Iustin Pop
388 5dad2589 Iustin Pop
  nlf <- setNodesStatus opts fixed_nl
389 e19ee6e4 Iustin Pop
  checkCluster verbose nlf ilf
390 5dad2589 Iustin Pop
391 5dad2589 Iustin Pop
  maybeSaveData (optSaveCluster opts) "original" "before balancing" ini_cdata
392 5dad2589 Iustin Pop
393 5dad2589 Iustin Pop
  (gname, (nl, il)) <- selectGroup opts gl nlf ilf
394 5dad2589 Iustin Pop
395 e19ee6e4 Iustin Pop
  checkGroup verbose gname nl il
396 5dad2589 Iustin Pop
397 33e44f0c Iustin Pop
  maybePrintInsts showinsts "Initial" (Cluster.printInsts nl il)
398 507fda3f Iustin Pop
399 417f6b50 Iustin Pop
  maybePrintNodes shownodes "Initial cluster" (Cluster.printNodes nl)
400 e4f08c46 Iustin Pop
401 e4f08c46 Iustin Pop
  let ini_cv = Cluster.compCV nl
402 e4f08c46 Iustin Pop
      ini_tbl = Cluster.Table nl il ini_cv []
403 b0517d61 Iustin Pop
      min_cv = optMinScore opts
404 b0517d61 Iustin Pop
405 5dad2589 Iustin Pop
  checkNeedRebalance opts ini_cv
406 b0517d61 Iustin Pop
407 e19ee6e4 Iustin Pop
  (if verbose > 2
408 e19ee6e4 Iustin Pop
   then printf "Initial coefficients: overall %.8f, %s\n"
409 e19ee6e4 Iustin Pop
        ini_cv (Cluster.printStats nl)::IO ()
410 e19ee6e4 Iustin Pop
   else printf "Initial score: %.8f\n" ini_cv)
411 e4f08c46 Iustin Pop
412 e19ee6e4 Iustin Pop
  putStrLn "Trying to minimize the CV..."
413 14c972c7 Iustin Pop
  let imlen = maximum . map (length . Instance.alias) $ Container.elems il
414 14c972c7 Iustin Pop
      nmlen = maximum . map (length . Node.alias) $ Container.elems nl
415 7dfaafb1 Iustin Pop
416 7dfaafb1 Iustin Pop
  (fin_tbl, cmd_strs) <- iterateDepth ini_tbl (optMaxLength opts)
417 c0501c69 Iustin Pop
                         (optDiskMoves opts)
418 e7f7c003 Guido Trotter
                         (optInstMoves opts)
419 e19ee6e4 Iustin Pop
                         nmlen imlen [] min_cv
420 848b65c9 Iustin Pop
                         (optMinGainLim opts) (optMinGain opts)
421 848b65c9 Iustin Pop
                         (optEvacMode opts)
422 507fda3f Iustin Pop
  let (Cluster.Table fin_nl fin_il fin_cv fin_plc) = fin_tbl
423 e4f08c46 Iustin Pop
      ord_plc = reverse fin_plc
424 cc532bdd Iustin Pop
      sol_msg = case () of
425 cc532bdd Iustin Pop
                  _ | null fin_plc -> printf "No solution found\n"
426 cc532bdd Iustin Pop
                    | verbose > 2 ->
427 cc532bdd Iustin Pop
                        printf "Final coefficients:   overall %.8f, %s\n"
428 cc532bdd Iustin Pop
                        fin_cv (Cluster.printStats fin_nl)
429 cc532bdd Iustin Pop
                    | otherwise ->
430 cc532bdd Iustin Pop
                        printf "Cluster score improved from %.8f to %.8f\n"
431 cc532bdd Iustin Pop
                        ini_cv fin_cv ::String
432 e4f08c46 Iustin Pop
433 e19ee6e4 Iustin Pop
  putStr sol_msg
434 7eff5b09 Iustin Pop
435 e19ee6e4 Iustin Pop
  unless (verbose == 0) $
436 7eff5b09 Iustin Pop
         printf "Solution length=%d\n" (length ord_plc)
437 e4f08c46 Iustin Pop
438 b2245847 Iustin Pop
  let cmd_jobs = Cluster.splitJobs cmd_strs
439 e0eb63f0 Iustin Pop
440 e0eb63f0 Iustin Pop
  when (isJust $ optShowCmds opts) $
441 5dad2589 Iustin Pop
       saveBalanceCommands opts $ Cluster.formatCmds cmd_jobs
442 e0eb63f0 Iustin Pop
443 4188449c Iustin Pop
  maybeSaveData (optSaveCluster opts) "balanced" "after balancing"
444 4188449c Iustin Pop
                (ClusterData gl fin_nl fin_il ctags)
445 b2245847 Iustin Pop
446 33e44f0c Iustin Pop
  maybePrintInsts showinsts "Final" (Cluster.printInsts fin_nl fin_il)
447 507fda3f Iustin Pop
448 417f6b50 Iustin Pop
  maybePrintNodes shownodes "Final cluster" (Cluster.printNodes fin_nl)
449 417f6b50 Iustin Pop
450 5dad2589 Iustin Pop
  when (verbose > 3) $ printStats nl fin_nl
451 5dad2589 Iustin Pop
452 5dad2589 Iustin Pop
  eval <- maybeExecJobs opts ord_plc fin_nl il cmd_jobs
453 d5072e4c Iustin Pop
  unless eval (exitWith (ExitFailure 1))