Statistics
| Branch: | Tag: | Revision:

root / htools / Ganeti / HTools / Program / Hbal.hs @ 7e723913

History | View | Annotate | Download (15.5 kB)

1 525bfb36 Iustin Pop
{-| Cluster rebalancer.
2 e4f08c46 Iustin Pop
3 e4f08c46 Iustin Pop
-}
4 e4f08c46 Iustin Pop
5 e2fa2baf Iustin Pop
{-
6 e2fa2baf Iustin Pop
7 21839f47 Iustin Pop
Copyright (C) 2009, 2010, 2011, 2012 Google Inc.
8 e2fa2baf Iustin Pop
9 e2fa2baf Iustin Pop
This program is free software; you can redistribute it and/or modify
10 e2fa2baf Iustin Pop
it under the terms of the GNU General Public License as published by
11 e2fa2baf Iustin Pop
the Free Software Foundation; either version 2 of the License, or
12 e2fa2baf Iustin Pop
(at your option) any later version.
13 e2fa2baf Iustin Pop
14 e2fa2baf Iustin Pop
This program is distributed in the hope that it will be useful, but
15 e2fa2baf Iustin Pop
WITHOUT ANY WARRANTY; without even the implied warranty of
16 e2fa2baf Iustin Pop
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17 e2fa2baf Iustin Pop
General Public License for more details.
18 e2fa2baf Iustin Pop
19 e2fa2baf Iustin Pop
You should have received a copy of the GNU General Public License
20 e2fa2baf Iustin Pop
along with this program; if not, write to the Free Software
21 e2fa2baf Iustin Pop
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 e2fa2baf Iustin Pop
02110-1301, USA.
23 e2fa2baf Iustin Pop
24 e2fa2baf Iustin Pop
-}
25 e2fa2baf Iustin Pop
26 e6685c53 Agata Murawska
module Ganeti.HTools.Program.Hbal
27 22278fa7 Iustin Pop
  ( main
28 22278fa7 Iustin Pop
  , options
29 22278fa7 Iustin Pop
  , arguments
30 22278fa7 Iustin Pop
  , iterateDepth
31 22278fa7 Iustin Pop
  ) where
32 e4f08c46 Iustin Pop
33 b2245847 Iustin Pop
import Control.Concurrent (threadDelay)
34 b2245847 Iustin Pop
import Control.Exception (bracket)
35 cc532bdd Iustin Pop
import Control.Monad
36 e4f08c46 Iustin Pop
import Data.List
37 646aa028 Iustin Pop
import Data.Maybe (isJust, isNothing, fromJust)
38 03cb89f0 Iustin Pop
import Data.IORef
39 7345b69b Iustin Pop
import System.Exit
40 e4f08c46 Iustin Pop
import System.IO
41 03cb89f0 Iustin Pop
import System.Posix.Process
42 03cb89f0 Iustin Pop
import System.Posix.Signals
43 e4f08c46 Iustin Pop
44 2795466b Iustin Pop
import Text.Printf (printf, hPrintf)
45 e4f08c46 Iustin Pop
46 669d7e3d Iustin Pop
import qualified Ganeti.HTools.Container as Container
47 669d7e3d Iustin Pop
import qualified Ganeti.HTools.Cluster as Cluster
48 10ef6b4e Iustin Pop
import qualified Ganeti.HTools.Group as Group
49 ec18dca9 Iustin Pop
import qualified Ganeti.HTools.Node as Node
50 b2245847 Iustin Pop
import qualified Ganeti.HTools.Instance as Instance
51 040afc35 Iustin Pop
52 01e52493 Iustin Pop
import Ganeti.BasicTypes
53 22278fa7 Iustin Pop
import Ganeti.Common
54 7adb7dff Iustin Pop
import Ganeti.Errors
55 0427285d Iustin Pop
import Ganeti.HTools.CLI
56 e8f89bb6 Iustin Pop
import Ganeti.HTools.ExtLoader
57 0e8ae201 Iustin Pop
import Ganeti.HTools.Types
58 4938fa30 Guido Trotter
import Ganeti.HTools.Loader
59 7e723913 Iustin Pop
import Ganeti.OpCodes (wrapOpCode, setOpComment, OpCode, MetaOpCode)
60 2746a72c Iustin Pop
import Ganeti.Types (fromJobId)
61 26d62e4c Iustin Pop
import Ganeti.Utils
62 e4f08c46 Iustin Pop
63 b2245847 Iustin Pop
import qualified Ganeti.Luxi as L
64 b2245847 Iustin Pop
import Ganeti.Jobs
65 7e723913 Iustin Pop
import Ganeti.Version (version)
66 b2245847 Iustin Pop
67 179c0828 Iustin Pop
-- | Options list and functions.
68 d66aa238 Iustin Pop
options :: IO [OptType]
69 29a30533 Iustin Pop
options = do
70 29a30533 Iustin Pop
  luxi <- oLuxiSocket
71 d66aa238 Iustin Pop
  return
72 d66aa238 Iustin Pop
    [ oPrintNodes
73 d66aa238 Iustin Pop
    , oPrintInsts
74 d66aa238 Iustin Pop
    , oPrintCommands
75 d66aa238 Iustin Pop
    , oDataFile
76 d66aa238 Iustin Pop
    , oEvacMode
77 d66aa238 Iustin Pop
    , oRapiMaster
78 29a30533 Iustin Pop
    , luxi
79 d66aa238 Iustin Pop
    , oIAllocSrc
80 d66aa238 Iustin Pop
    , oExecJobs
81 d66aa238 Iustin Pop
    , oGroup
82 d66aa238 Iustin Pop
    , oMaxSolLength
83 d66aa238 Iustin Pop
    , oVerbose
84 d66aa238 Iustin Pop
    , oQuiet
85 d66aa238 Iustin Pop
    , oOfflineNode
86 d66aa238 Iustin Pop
    , oMinScore
87 d66aa238 Iustin Pop
    , oMaxCpu
88 d66aa238 Iustin Pop
    , oMinDisk
89 d66aa238 Iustin Pop
    , oMinGain
90 d66aa238 Iustin Pop
    , oMinGainLim
91 d66aa238 Iustin Pop
    , oDiskMoves
92 d66aa238 Iustin Pop
    , oSelInst
93 d66aa238 Iustin Pop
    , oInstMoves
94 d66aa238 Iustin Pop
    , oDynuFile
95 d66aa238 Iustin Pop
    , oExTags
96 d66aa238 Iustin Pop
    , oExInst
97 d66aa238 Iustin Pop
    , oSaveCluster
98 d66aa238 Iustin Pop
    ]
99 e4f08c46 Iustin Pop
100 22278fa7 Iustin Pop
-- | The list of arguments supported by the program.
101 22278fa7 Iustin Pop
arguments :: [ArgCompletion]
102 22278fa7 Iustin Pop
arguments = []
103 22278fa7 Iustin Pop
104 7e723913 Iustin Pop
-- | Wraps an 'OpCode' in a 'MetaOpCode' while also adding a comment
105 7e723913 Iustin Pop
-- about what generated the opcode.
106 7e723913 Iustin Pop
annotateOpCode :: OpCode -> MetaOpCode
107 7e723913 Iustin Pop
annotateOpCode =
108 7e723913 Iustin Pop
  setOpComment ("rebalancing via hbal " ++ version) . wrapOpCode
109 7e723913 Iustin Pop
110 6dc960bc Iustin Pop
{- | Start computing the solution at the given depth and recurse until
111 6dc960bc Iustin Pop
we find a valid solution or we exceed the maximum depth.
112 6dc960bc Iustin Pop
113 6dc960bc Iustin Pop
-}
114 e6685c53 Agata Murawska
iterateDepth :: Bool             -- ^ Whether to print moves
115 e6685c53 Agata Murawska
             -> Cluster.Table    -- ^ The starting table
116 6dc960bc Iustin Pop
             -> Int              -- ^ Remaining length
117 c0501c69 Iustin Pop
             -> Bool             -- ^ Allow disk moves
118 e7f7c003 Guido Trotter
             -> Bool             -- ^ Allow instance moves
119 6dc960bc Iustin Pop
             -> Int              -- ^ Max node name len
120 6dc960bc Iustin Pop
             -> Int              -- ^ Max instance name len
121 0e8ae201 Iustin Pop
             -> [MoveJob]        -- ^ Current command list
122 92e32d76 Iustin Pop
             -> Score            -- ^ Score at which to stop
123 848b65c9 Iustin Pop
             -> Score            -- ^ Min gain limit
124 848b65c9 Iustin Pop
             -> Score            -- ^ Min score gain
125 2e28ac32 Iustin Pop
             -> Bool             -- ^ Enable evacuation mode
126 0e8ae201 Iustin Pop
             -> IO (Cluster.Table, [MoveJob]) -- ^ The resulting table
127 0e8ae201 Iustin Pop
                                              -- and commands
128 e6685c53 Agata Murawska
iterateDepth printmove ini_tbl max_rounds disk_moves inst_moves nmlen imlen
129 e19ee6e4 Iustin Pop
             cmd_strs min_score mg_limit min_gain evac_mode =
130 2ba17362 Iustin Pop
  let Cluster.Table ini_nl ini_il _ _ = ini_tbl
131 2ba17362 Iustin Pop
      allowed_next = Cluster.doNextBalance ini_tbl max_rounds min_score
132 2ba17362 Iustin Pop
      m_fin_tbl = if allowed_next
133 e08424a8 Guido Trotter
                    then Cluster.tryBalance ini_tbl disk_moves inst_moves
134 e08424a8 Guido Trotter
                         evac_mode mg_limit min_gain
135 5ad86777 Iustin Pop
                    else Nothing
136 2ba17362 Iustin Pop
  in case m_fin_tbl of
137 2ba17362 Iustin Pop
       Just fin_tbl ->
138 2ba17362 Iustin Pop
         do
139 2ba17362 Iustin Pop
           let (Cluster.Table _ _ _ fin_plc) = fin_tbl
140 2ba17362 Iustin Pop
               fin_plc_len = length fin_plc
141 2ba17362 Iustin Pop
               cur_plc@(idx, _, _, move, _) = head fin_plc
142 2ba17362 Iustin Pop
               (sol_line, cmds) = Cluster.printSolutionLine ini_nl ini_il
143 2ba17362 Iustin Pop
                                  nmlen imlen cur_plc fin_plc_len
144 2ba17362 Iustin Pop
               afn = Cluster.involvedNodes ini_il cur_plc
145 2ba17362 Iustin Pop
               upd_cmd_strs = (afn, idx, move, cmds):cmd_strs
146 e6685c53 Agata Murawska
           when printmove $ do
147 e6685c53 Agata Murawska
               putStrLn sol_line
148 e6685c53 Agata Murawska
               hFlush stdout
149 e6685c53 Agata Murawska
           iterateDepth printmove fin_tbl max_rounds disk_moves inst_moves
150 2ba17362 Iustin Pop
                        nmlen imlen upd_cmd_strs min_score
151 2ba17362 Iustin Pop
                        mg_limit min_gain evac_mode
152 2ba17362 Iustin Pop
       Nothing -> return (ini_tbl, cmd_strs)
153 6dc960bc Iustin Pop
154 5dad2589 Iustin Pop
-- | Displays the cluster stats.
155 5dad2589 Iustin Pop
printStats :: Node.List -> Node.List -> IO ()
156 5dad2589 Iustin Pop
printStats ini_nl fin_nl = do
157 5dad2589 Iustin Pop
  let ini_cs = Cluster.totalResources ini_nl
158 5dad2589 Iustin Pop
      fin_cs = Cluster.totalResources fin_nl
159 5dad2589 Iustin Pop
  printf "Original: mem=%d disk=%d\n"
160 5dad2589 Iustin Pop
             (Cluster.csFmem ini_cs) (Cluster.csFdsk ini_cs) :: IO ()
161 5dad2589 Iustin Pop
  printf "Final:    mem=%d disk=%d\n"
162 5dad2589 Iustin Pop
             (Cluster.csFmem fin_cs) (Cluster.csFdsk fin_cs)
163 5dad2589 Iustin Pop
164 5dad2589 Iustin Pop
-- | Saves the rebalance commands to a text file.
165 5dad2589 Iustin Pop
saveBalanceCommands :: Options -> String -> IO ()
166 5dad2589 Iustin Pop
saveBalanceCommands opts cmd_data = do
167 5dad2589 Iustin Pop
  let out_path = fromJust $ optShowCmds opts
168 5dad2589 Iustin Pop
  putStrLn ""
169 3603605a Iustin Pop
  if out_path == "-"
170 3603605a Iustin Pop
    then printf "Commands to run to reach the above solution:\n%s"
171 3603605a Iustin Pop
           (unlines . map ("  " ++) .
172 3603605a Iustin Pop
            filter (/= "  check") .
173 3603605a Iustin Pop
            lines $ cmd_data)
174 3603605a Iustin Pop
    else do
175 3603605a Iustin Pop
      writeFile out_path (shTemplate ++ cmd_data)
176 3603605a Iustin Pop
      printf "The commands have been written to file '%s'\n" out_path
177 5dad2589 Iustin Pop
178 b2245847 Iustin Pop
-- | Polls a set of jobs at a fixed interval until all are finished
179 179c0828 Iustin Pop
-- one way or another.
180 ccc817a2 Iustin Pop
waitForJobs :: L.Client -> [L.JobId] -> IO (Result [JobStatus])
181 b2245847 Iustin Pop
waitForJobs client jids = do
182 b2245847 Iustin Pop
  sts <- L.queryJobsStatus client jids
183 b2245847 Iustin Pop
  case sts of
184 7adb7dff Iustin Pop
    Bad e -> return . Bad $ "Checking job status: " ++ formatError e
185 7e98f782 Iustin Pop
    Ok s -> if any (<= JOB_STATUS_RUNNING) s
186 b2245847 Iustin Pop
            then do
187 b2245847 Iustin Pop
              -- TODO: replace hardcoded value with a better thing
188 b2245847 Iustin Pop
              threadDelay (1000000 * 15)
189 b2245847 Iustin Pop
              waitForJobs client jids
190 b2245847 Iustin Pop
            else return $ Ok s
191 b2245847 Iustin Pop
192 179c0828 Iustin Pop
-- | Check that a set of job statuses is all success.
193 b2245847 Iustin Pop
checkJobsStatus :: [JobStatus] -> Bool
194 7e98f782 Iustin Pop
checkJobsStatus = all (== JOB_STATUS_SUCCESS)
195 b2245847 Iustin Pop
196 179c0828 Iustin Pop
-- | Wrapper over execJobSet checking for early termination.
197 23448f82 Iustin Pop
execWrapper :: String -> Node.List
198 5dad2589 Iustin Pop
            -> Instance.List -> IORef Int -> [JobSet] -> IO Bool
199 23448f82 Iustin Pop
execWrapper _      _  _  _    [] = return True
200 23448f82 Iustin Pop
execWrapper master nl il cref alljss = do
201 23448f82 Iustin Pop
  cancel <- readIORef cref
202 3603605a Iustin Pop
  if cancel > 0
203 3603605a Iustin Pop
    then do
204 3603605a Iustin Pop
      hPrintf stderr "Exiting early due to user request, %d\
205 3603605a Iustin Pop
                     \ jobset(s) remaining." (length alljss)::IO ()
206 2a2e2610 Iustin Pop
      return True
207 3603605a Iustin Pop
    else execJobSet master nl il cref alljss
208 23448f82 Iustin Pop
209 179c0828 Iustin Pop
-- | Execute an entire jobset.
210 3e4480e0 Iustin Pop
execJobSet :: String -> Node.List
211 23448f82 Iustin Pop
           -> Instance.List -> IORef Int -> [JobSet] -> IO Bool
212 23448f82 Iustin Pop
execJobSet _      _  _  _    [] = return True
213 23448f82 Iustin Pop
execJobSet master nl il cref (js:jss) = do
214 b2245847 Iustin Pop
  -- map from jobset (htools list of positions) to [[opcodes]]
215 b2245847 Iustin Pop
  let jobs = map (\(_, idx, move, _) ->
216 7e723913 Iustin Pop
                      map annotateOpCode $
217 3e4480e0 Iustin Pop
                      Cluster.iMoveToJob nl il idx move) js
218 b2245847 Iustin Pop
  let descr = map (\(_, idx, _, _) -> Container.nameOf il idx) js
219 b2245847 Iustin Pop
  putStrLn $ "Executing jobset for instances " ++ commaJoin descr
220 b2245847 Iustin Pop
  jrs <- bracket (L.getClient master) L.closeClient
221 b2245847 Iustin Pop
         (\client -> do
222 683b1ca7 Iustin Pop
            jids <- L.submitManyJobs client jobs
223 b2245847 Iustin Pop
            case jids of
224 7adb7dff Iustin Pop
              Bad e -> return . Bad $ "Job submission error: " ++ formatError e
225 b2245847 Iustin Pop
              Ok x -> do
226 2746a72c Iustin Pop
                putStrLn $ "Got job IDs " ++
227 2746a72c Iustin Pop
                           commaJoin (map (show . fromJobId) x)
228 b2245847 Iustin Pop
                waitForJobs client x
229 b2245847 Iustin Pop
         )
230 3603605a Iustin Pop
  case jrs of
231 3603605a Iustin Pop
    Bad x -> do
232 7adb7dff Iustin Pop
      hPutStrLn stderr x
233 3603605a Iustin Pop
      return False
234 3603605a Iustin Pop
    Ok x -> if checkJobsStatus x
235 3603605a Iustin Pop
              then execWrapper master nl il cref jss
236 3603605a Iustin Pop
              else do
237 3603605a Iustin Pop
                hPutStrLn stderr $ "Not all jobs completed successfully: " ++
238 3603605a Iustin Pop
                          show x
239 3603605a Iustin Pop
                hPutStrLn stderr "Aborting."
240 3603605a Iustin Pop
                return False
241 b2245847 Iustin Pop
242 5dad2589 Iustin Pop
-- | Executes the jobs, if possible and desired.
243 5dad2589 Iustin Pop
maybeExecJobs :: Options
244 5dad2589 Iustin Pop
              -> [a]
245 5dad2589 Iustin Pop
              -> Node.List
246 5dad2589 Iustin Pop
              -> Instance.List
247 5dad2589 Iustin Pop
              -> [JobSet]
248 5dad2589 Iustin Pop
              -> IO Bool
249 5dad2589 Iustin Pop
maybeExecJobs opts ord_plc fin_nl il cmd_jobs =
250 2ba17362 Iustin Pop
  if optExecJobs opts && not (null ord_plc)
251 5dad2589 Iustin Pop
    then (case optLuxi opts of
252 5dad2589 Iustin Pop
            Nothing -> do
253 5dad2589 Iustin Pop
              hPutStrLn stderr "Execution of commands possible only on LUXI"
254 5dad2589 Iustin Pop
              return False
255 5dad2589 Iustin Pop
            Just master -> runJobSet master fin_nl il cmd_jobs)
256 5dad2589 Iustin Pop
    else return True
257 5dad2589 Iustin Pop
258 179c0828 Iustin Pop
-- | Signal handler for graceful termination.
259 543e859d Iustin Pop
hangleSigInt :: IORef Int -> IO ()
260 543e859d Iustin Pop
hangleSigInt cref = do
261 543e859d Iustin Pop
  writeIORef cref 1
262 543e859d Iustin Pop
  putStrLn ("Cancel request registered, will exit at" ++
263 543e859d Iustin Pop
            " the end of the current job set...")
264 543e859d Iustin Pop
265 179c0828 Iustin Pop
-- | Signal handler for immediate termination.
266 543e859d Iustin Pop
hangleSigTerm :: IORef Int -> IO ()
267 543e859d Iustin Pop
hangleSigTerm cref = do
268 543e859d Iustin Pop
  -- update the cref to 2, just for consistency
269 543e859d Iustin Pop
  writeIORef cref 2
270 543e859d Iustin Pop
  putStrLn "Double cancel request, exiting now..."
271 543e859d Iustin Pop
  exitImmediately $ ExitFailure 2
272 03cb89f0 Iustin Pop
273 179c0828 Iustin Pop
-- | Runs a job set with handling of signals.
274 23448f82 Iustin Pop
runJobSet :: String -> Node.List -> Instance.List -> [JobSet] -> IO Bool
275 03cb89f0 Iustin Pop
runJobSet master fin_nl il cmd_jobs = do
276 03cb89f0 Iustin Pop
  cref <- newIORef 0
277 543e859d Iustin Pop
  mapM_ (\(hnd, sig) -> installHandler sig (Catch (hnd cref)) Nothing)
278 543e859d Iustin Pop
    [(hangleSigTerm, softwareTermination), (hangleSigInt, keyboardSignal)]
279 23448f82 Iustin Pop
  execWrapper master fin_nl il cref cmd_jobs
280 03cb89f0 Iustin Pop
281 5dad2589 Iustin Pop
-- | Select the target node group.
282 5dad2589 Iustin Pop
selectGroup :: Options -> Group.List -> Node.List -> Instance.List
283 5dad2589 Iustin Pop
            -> IO (String, (Node.List, Instance.List))
284 5dad2589 Iustin Pop
selectGroup opts gl nlf ilf = do
285 646aa028 Iustin Pop
  let ngroups = Cluster.splitCluster nlf ilf
286 646aa028 Iustin Pop
  when (length ngroups > 1 && isNothing (optGroup opts)) $ do
287 646aa028 Iustin Pop
    hPutStrLn stderr "Found multiple node groups:"
288 e0c85e08 Iustin Pop
    mapM_ (hPutStrLn stderr . ("  " ++) . Group.name .
289 d5072e4c Iustin Pop
           flip Container.find gl . fst) ngroups
290 707cd3d7 Helga Velroyen
    exitErr "Aborting."
291 646aa028 Iustin Pop
292 5dad2589 Iustin Pop
  case optGroup opts of
293 10ef6b4e Iustin Pop
    Nothing -> do
294 2ba17362 Iustin Pop
      let (gidx, cdata) = head ngroups
295 2ba17362 Iustin Pop
          grp = Container.find gidx gl
296 2ba17362 Iustin Pop
      return (Group.name grp, cdata)
297 10ef6b4e Iustin Pop
    Just g -> case Container.findByName gl g of
298 646aa028 Iustin Pop
      Nothing -> do
299 646aa028 Iustin Pop
        hPutStrLn stderr $ "Node group " ++ g ++
300 646aa028 Iustin Pop
          " not found. Node group list is:"
301 10ef6b4e Iustin Pop
        mapM_ (hPutStrLn stderr . ("  " ++) . Group.name ) (Container.elems gl)
302 707cd3d7 Helga Velroyen
        exitErr "Aborting."
303 10ef6b4e Iustin Pop
      Just grp ->
304 10ef6b4e Iustin Pop
          case lookup (Group.idx grp) ngroups of
305 3603605a Iustin Pop
            Nothing ->
306 2072221f Iustin Pop
              -- This will only happen if there are no nodes assigned
307 2072221f Iustin Pop
              -- to this group
308 2072221f Iustin Pop
              return (Group.name grp, (Container.empty, Container.empty))
309 10ef6b4e Iustin Pop
            Just cdata -> return (Group.name grp, cdata)
310 646aa028 Iustin Pop
311 5dad2589 Iustin Pop
-- | Do a few checks on the cluster data.
312 e19ee6e4 Iustin Pop
checkCluster :: Int -> Node.List -> Instance.List -> IO ()
313 e19ee6e4 Iustin Pop
checkCluster verbose nl il = do
314 5dad2589 Iustin Pop
  -- nothing to do on an empty cluster
315 5dad2589 Iustin Pop
  when (Container.null il) $ do
316 e19ee6e4 Iustin Pop
         printf "Cluster is empty, exiting.\n"::IO ()
317 2cdaf225 Iustin Pop
         exitSuccess
318 5dad2589 Iustin Pop
319 5dad2589 Iustin Pop
  -- hbal doesn't currently handle split clusters
320 5dad2589 Iustin Pop
  let split_insts = Cluster.findSplitInstances nl il
321 a7e1fd89 Iustin Pop
  unless (null split_insts || verbose <= 1) $ do
322 5dad2589 Iustin Pop
    hPutStrLn stderr "Found instances belonging to multiple node groups:"
323 5dad2589 Iustin Pop
    mapM_ (\i -> hPutStrLn stderr $ "  " ++ Instance.name i) split_insts
324 a7e1fd89 Iustin Pop
    hPutStrLn stderr "These instances will not be moved."
325 5dad2589 Iustin Pop
326 e19ee6e4 Iustin Pop
  printf "Loaded %d nodes, %d instances\n"
327 5dad2589 Iustin Pop
             (Container.size nl)
328 e19ee6e4 Iustin Pop
             (Container.size il)::IO ()
329 5dad2589 Iustin Pop
330 5dad2589 Iustin Pop
  let csf = commonSuffix nl il
331 e19ee6e4 Iustin Pop
  when (not (null csf) && verbose > 1) $
332 5dad2589 Iustin Pop
       printf "Note: Stripping common suffix of '%s' from names\n" csf
333 5dad2589 Iustin Pop
334 5dad2589 Iustin Pop
-- | Do a few checks on the selected group data.
335 e19ee6e4 Iustin Pop
checkGroup :: Int -> String -> Node.List -> Instance.List -> IO ()
336 e19ee6e4 Iustin Pop
checkGroup verbose gname nl il = do
337 e19ee6e4 Iustin Pop
  printf "Group size %d nodes, %d instances\n"
338 e4f08c46 Iustin Pop
             (Container.size nl)
339 e19ee6e4 Iustin Pop
             (Container.size il)::IO ()
340 a0529a64 Iustin Pop
341 10ef6b4e Iustin Pop
  putStrLn $ "Selected node group: " ++ gname
342 646aa028 Iustin Pop
343 e4f08c46 Iustin Pop
  let (bad_nodes, bad_instances) = Cluster.computeBadItems nl il
344 e19ee6e4 Iustin Pop
  unless (verbose == 0) $ printf
345 27f96567 Iustin Pop
             "Initial check done: %d bad nodes, %d bad instances.\n"
346 e4f08c46 Iustin Pop
             (length bad_nodes) (length bad_instances)
347 e4f08c46 Iustin Pop
348 2cdaf225 Iustin Pop
  unless (null bad_nodes) $
349 289c3835 Iustin Pop
         putStrLn "Cluster is not N+1 happy, continuing but no guarantee \
350 289c3835 Iustin Pop
                  \that the cluster will end N+1 happy."
351 e4f08c46 Iustin Pop
352 5dad2589 Iustin Pop
-- | Check that we actually need to rebalance.
353 5dad2589 Iustin Pop
checkNeedRebalance :: Options -> Score -> IO ()
354 5dad2589 Iustin Pop
checkNeedRebalance opts ini_cv = do
355 5dad2589 Iustin Pop
  let min_cv = optMinScore opts
356 5dad2589 Iustin Pop
  when (ini_cv < min_cv) $ do
357 e19ee6e4 Iustin Pop
         printf "Cluster is already well balanced (initial score %.6g,\n\
358 e19ee6e4 Iustin Pop
                \minimum score %.6g).\nNothing to do, exiting\n"
359 e19ee6e4 Iustin Pop
                ini_cv min_cv:: IO ()
360 2cdaf225 Iustin Pop
         exitSuccess
361 5dad2589 Iustin Pop
362 5dad2589 Iustin Pop
-- | Main function.
363 21839f47 Iustin Pop
main :: Options -> [String] -> IO ()
364 21839f47 Iustin Pop
main opts args = do
365 707cd3d7 Helga Velroyen
  unless (null args) $ exitErr "This program doesn't take any arguments."
366 5dad2589 Iustin Pop
367 e19ee6e4 Iustin Pop
  let verbose = optVerbose opts
368 5dad2589 Iustin Pop
      shownodes = optShowNodes opts
369 5dad2589 Iustin Pop
      showinsts = optShowInsts opts
370 5dad2589 Iustin Pop
371 e77bc89b Iustin Pop
  ini_cdata@(ClusterData gl fixed_nl ilf ctags ipol) <- loadExternalData opts
372 5dad2589 Iustin Pop
373 e77bc89b Iustin Pop
  when (verbose > 1) $ do
374 5dad2589 Iustin Pop
       putStrLn $ "Loaded cluster tags: " ++ intercalate "," ctags
375 e77bc89b Iustin Pop
       putStrLn $ "Loaded cluster ipolicy: " ++ show ipol
376 5dad2589 Iustin Pop
377 5296ee23 Iustin Pop
  nlf <- setNodeStatus opts fixed_nl
378 e19ee6e4 Iustin Pop
  checkCluster verbose nlf ilf
379 5dad2589 Iustin Pop
380 5dad2589 Iustin Pop
  maybeSaveData (optSaveCluster opts) "original" "before balancing" ini_cdata
381 5dad2589 Iustin Pop
382 5dad2589 Iustin Pop
  (gname, (nl, il)) <- selectGroup opts gl nlf ilf
383 5dad2589 Iustin Pop
384 e19ee6e4 Iustin Pop
  checkGroup verbose gname nl il
385 5dad2589 Iustin Pop
386 33e44f0c Iustin Pop
  maybePrintInsts showinsts "Initial" (Cluster.printInsts nl il)
387 507fda3f Iustin Pop
388 417f6b50 Iustin Pop
  maybePrintNodes shownodes "Initial cluster" (Cluster.printNodes nl)
389 e4f08c46 Iustin Pop
390 e4f08c46 Iustin Pop
  let ini_cv = Cluster.compCV nl
391 e4f08c46 Iustin Pop
      ini_tbl = Cluster.Table nl il ini_cv []
392 b0517d61 Iustin Pop
      min_cv = optMinScore opts
393 b0517d61 Iustin Pop
394 5dad2589 Iustin Pop
  checkNeedRebalance opts ini_cv
395 b0517d61 Iustin Pop
396 3603605a Iustin Pop
  if verbose > 2
397 2922d2c5 René Nussbaumer
    then printf "Initial coefficients: overall %.8f\n%s"
398 2922d2c5 René Nussbaumer
           ini_cv (Cluster.printStats "  " nl)::IO ()
399 3603605a Iustin Pop
    else printf "Initial score: %.8f\n" ini_cv
400 e4f08c46 Iustin Pop
401 e19ee6e4 Iustin Pop
  putStrLn "Trying to minimize the CV..."
402 14c972c7 Iustin Pop
  let imlen = maximum . map (length . Instance.alias) $ Container.elems il
403 14c972c7 Iustin Pop
      nmlen = maximum . map (length . Node.alias) $ Container.elems nl
404 7dfaafb1 Iustin Pop
405 e6685c53 Agata Murawska
  (fin_tbl, cmd_strs) <- iterateDepth True ini_tbl (optMaxLength opts)
406 c0501c69 Iustin Pop
                         (optDiskMoves opts)
407 e7f7c003 Guido Trotter
                         (optInstMoves opts)
408 e19ee6e4 Iustin Pop
                         nmlen imlen [] min_cv
409 848b65c9 Iustin Pop
                         (optMinGainLim opts) (optMinGain opts)
410 848b65c9 Iustin Pop
                         (optEvacMode opts)
411 507fda3f Iustin Pop
  let (Cluster.Table fin_nl fin_il fin_cv fin_plc) = fin_tbl
412 e4f08c46 Iustin Pop
      ord_plc = reverse fin_plc
413 cc532bdd Iustin Pop
      sol_msg = case () of
414 cc532bdd Iustin Pop
                  _ | null fin_plc -> printf "No solution found\n"
415 cc532bdd Iustin Pop
                    | verbose > 2 ->
416 2922d2c5 René Nussbaumer
                        printf "Final coefficients:   overall %.8f\n%s"
417 2922d2c5 René Nussbaumer
                        fin_cv (Cluster.printStats "  " fin_nl)
418 cc532bdd Iustin Pop
                    | otherwise ->
419 cc532bdd Iustin Pop
                        printf "Cluster score improved from %.8f to %.8f\n"
420 cc532bdd Iustin Pop
                        ini_cv fin_cv ::String
421 e4f08c46 Iustin Pop
422 e19ee6e4 Iustin Pop
  putStr sol_msg
423 7eff5b09 Iustin Pop
424 e19ee6e4 Iustin Pop
  unless (verbose == 0) $
425 7eff5b09 Iustin Pop
         printf "Solution length=%d\n" (length ord_plc)
426 e4f08c46 Iustin Pop
427 b2245847 Iustin Pop
  let cmd_jobs = Cluster.splitJobs cmd_strs
428 e0eb63f0 Iustin Pop
429 2cdaf225 Iustin Pop
  when (isJust $ optShowCmds opts) .
430 5dad2589 Iustin Pop
       saveBalanceCommands opts $ Cluster.formatCmds cmd_jobs
431 e0eb63f0 Iustin Pop
432 4188449c Iustin Pop
  maybeSaveData (optSaveCluster opts) "balanced" "after balancing"
433 71375ef7 Iustin Pop
                ini_cdata { cdNodes = fin_nl, cdInstances = fin_il }
434 b2245847 Iustin Pop
435 33e44f0c Iustin Pop
  maybePrintInsts showinsts "Final" (Cluster.printInsts fin_nl fin_il)
436 507fda3f Iustin Pop
437 417f6b50 Iustin Pop
  maybePrintNodes shownodes "Final cluster" (Cluster.printNodes fin_nl)
438 417f6b50 Iustin Pop
439 5dad2589 Iustin Pop
  when (verbose > 3) $ printStats nl fin_nl
440 5dad2589 Iustin Pop
441 5dad2589 Iustin Pop
  eval <- maybeExecJobs opts ord_plc fin_nl il cmd_jobs
442 d5072e4c Iustin Pop
  unless eval (exitWith (ExitFailure 1))