Statistics
| Branch: | Tag: | Revision:

root / htools / hbal.hs @ 2e5eb96a

History | View | Annotate | Download (15.2 kB)

1 66d67ad4 Iustin Pop
{-| Cluster rebalancer
2 e4f08c46 Iustin Pop
3 e4f08c46 Iustin Pop
-}
4 e4f08c46 Iustin Pop
5 e2fa2baf Iustin Pop
{-
6 e2fa2baf Iustin Pop
7 d5072e4c Iustin Pop
Copyright (C) 2009, 2010, 2011 Google Inc.
8 e2fa2baf Iustin Pop
9 e2fa2baf Iustin Pop
This program is free software; you can redistribute it and/or modify
10 e2fa2baf Iustin Pop
it under the terms of the GNU General Public License as published by
11 e2fa2baf Iustin Pop
the Free Software Foundation; either version 2 of the License, or
12 e2fa2baf Iustin Pop
(at your option) any later version.
13 e2fa2baf Iustin Pop
14 e2fa2baf Iustin Pop
This program is distributed in the hope that it will be useful, but
15 e2fa2baf Iustin Pop
WITHOUT ANY WARRANTY; without even the implied warranty of
16 e2fa2baf Iustin Pop
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17 e2fa2baf Iustin Pop
General Public License for more details.
18 e2fa2baf Iustin Pop
19 e2fa2baf Iustin Pop
You should have received a copy of the GNU General Public License
20 e2fa2baf Iustin Pop
along with this program; if not, write to the Free Software
21 e2fa2baf Iustin Pop
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 e2fa2baf Iustin Pop
02110-1301, USA.
23 e2fa2baf Iustin Pop
24 e2fa2baf Iustin Pop
-}
25 e2fa2baf Iustin Pop
26 e4f08c46 Iustin Pop
module Main (main) where
27 e4f08c46 Iustin Pop
28 b2245847 Iustin Pop
import Control.Concurrent (threadDelay)
29 b2245847 Iustin Pop
import Control.Exception (bracket)
30 e4f08c46 Iustin Pop
import Data.List
31 646aa028 Iustin Pop
import Data.Maybe (isJust, isNothing, fromJust)
32 03cb89f0 Iustin Pop
import Data.IORef
33 e4f08c46 Iustin Pop
import Monad
34 0903280b Iustin Pop
import System (exitWith, ExitCode(..))
35 e4f08c46 Iustin Pop
import System.IO
36 03cb89f0 Iustin Pop
import System.Posix.Process
37 03cb89f0 Iustin Pop
import System.Posix.Signals
38 e4f08c46 Iustin Pop
import qualified System
39 e4f08c46 Iustin Pop
40 2795466b Iustin Pop
import Text.Printf (printf, hPrintf)
41 e4f08c46 Iustin Pop
42 669d7e3d Iustin Pop
import qualified Ganeti.HTools.Container as Container
43 669d7e3d Iustin Pop
import qualified Ganeti.HTools.Cluster as Cluster
44 10ef6b4e Iustin Pop
import qualified Ganeti.HTools.Group as Group
45 ec18dca9 Iustin Pop
import qualified Ganeti.HTools.Node as Node
46 b2245847 Iustin Pop
import qualified Ganeti.HTools.Instance as Instance
47 040afc35 Iustin Pop
48 0427285d Iustin Pop
import Ganeti.HTools.CLI
49 e8f89bb6 Iustin Pop
import Ganeti.HTools.ExtLoader
50 669d7e3d Iustin Pop
import Ganeti.HTools.Utils
51 0e8ae201 Iustin Pop
import Ganeti.HTools.Types
52 017a0c3d Iustin Pop
import Ganeti.HTools.Loader (ClusterData(..))
53 e4f08c46 Iustin Pop
54 b2245847 Iustin Pop
import qualified Ganeti.Luxi as L
55 b2245847 Iustin Pop
import Ganeti.Jobs
56 b2245847 Iustin Pop
57 e4f08c46 Iustin Pop
-- | Options list and functions
58 0427285d Iustin Pop
options :: [OptType]
59 e4f08c46 Iustin Pop
options =
60 0427285d Iustin Pop
    [ oPrintNodes
61 507fda3f Iustin Pop
    , oPrintInsts
62 0427285d Iustin Pop
    , oPrintCommands
63 0427285d Iustin Pop
    , oOneline
64 16c2369c Iustin Pop
    , oDataFile
65 2e28ac32 Iustin Pop
    , oEvacMode
66 0427285d Iustin Pop
    , oRapiMaster
67 0427285d Iustin Pop
    , oLuxiSocket
68 b2245847 Iustin Pop
    , oExecJobs
69 646aa028 Iustin Pop
    , oGroup
70 0427285d Iustin Pop
    , oMaxSolLength
71 0427285d Iustin Pop
    , oVerbose
72 0427285d Iustin Pop
    , oQuiet
73 0427285d Iustin Pop
    , oOfflineNode
74 0427285d Iustin Pop
    , oMinScore
75 0427285d Iustin Pop
    , oMaxCpu
76 0427285d Iustin Pop
    , oMinDisk
77 848b65c9 Iustin Pop
    , oMinGain
78 848b65c9 Iustin Pop
    , oMinGainLim
79 c0501c69 Iustin Pop
    , oDiskMoves
80 aa8d2e71 Iustin Pop
    , oDynuFile
81 0f15cc76 Iustin Pop
    , oExTags
82 fcbf0da4 Iustin Pop
    , oExInst
83 748654f7 Iustin Pop
    , oSaveCluster
84 0427285d Iustin Pop
    , oShowVer
85 0427285d Iustin Pop
    , oShowHelp
86 7ef4d93e Iustin Pop
    ]
87 e4f08c46 Iustin Pop
88 6dc960bc Iustin Pop
{- | Start computing the solution at the given depth and recurse until
89 6dc960bc Iustin Pop
we find a valid solution or we exceed the maximum depth.
90 6dc960bc Iustin Pop
91 6dc960bc Iustin Pop
-}
92 6dc960bc Iustin Pop
iterateDepth :: Cluster.Table    -- ^ The starting table
93 6dc960bc Iustin Pop
             -> Int              -- ^ Remaining length
94 c0501c69 Iustin Pop
             -> Bool             -- ^ Allow disk moves
95 6dc960bc Iustin Pop
             -> Int              -- ^ Max node name len
96 6dc960bc Iustin Pop
             -> Int              -- ^ Max instance name len
97 0e8ae201 Iustin Pop
             -> [MoveJob]        -- ^ Current command list
98 24acc2c6 Guido Trotter
             -> Bool             -- ^ Whether to be silent
99 92e32d76 Iustin Pop
             -> Score            -- ^ Score at which to stop
100 848b65c9 Iustin Pop
             -> Score            -- ^ Min gain limit
101 848b65c9 Iustin Pop
             -> Score            -- ^ Min score gain
102 2e28ac32 Iustin Pop
             -> Bool             -- ^ Enable evacuation mode
103 0e8ae201 Iustin Pop
             -> IO (Cluster.Table, [MoveJob]) -- ^ The resulting table
104 0e8ae201 Iustin Pop
                                              -- and commands
105 c0501c69 Iustin Pop
iterateDepth ini_tbl max_rounds disk_moves nmlen imlen
106 848b65c9 Iustin Pop
             cmd_strs oneline min_score mg_limit min_gain evac_mode =
107 f25e5aac Iustin Pop
    let Cluster.Table ini_nl ini_il _ _ = ini_tbl
108 5ad86777 Iustin Pop
        allowed_next = Cluster.doNextBalance ini_tbl max_rounds min_score
109 5ad86777 Iustin Pop
        m_fin_tbl = if allowed_next
110 2e28ac32 Iustin Pop
                    then Cluster.tryBalance ini_tbl disk_moves evac_mode
111 848b65c9 Iustin Pop
                         mg_limit min_gain
112 5ad86777 Iustin Pop
                    else Nothing
113 6dc960bc Iustin Pop
    in
114 f25e5aac Iustin Pop
      case m_fin_tbl of
115 f25e5aac Iustin Pop
        Just fin_tbl ->
116 f25e5aac Iustin Pop
            do
117 f25e5aac Iustin Pop
              let
118 f25e5aac Iustin Pop
                  (Cluster.Table _ _ _ fin_plc) = fin_tbl
119 f25e5aac Iustin Pop
                  fin_plc_len = length fin_plc
120 924f9c16 Iustin Pop
                  cur_plc@(idx, _, _, move, _) = head fin_plc
121 f25e5aac Iustin Pop
                  (sol_line, cmds) = Cluster.printSolutionLine ini_nl ini_il
122 0e8ae201 Iustin Pop
                                     nmlen imlen cur_plc fin_plc_len
123 0e8ae201 Iustin Pop
                  afn = Cluster.involvedNodes ini_il cur_plc
124 924f9c16 Iustin Pop
                  upd_cmd_strs = (afn, idx, move, cmds):cmd_strs
125 f25e5aac Iustin Pop
              unless oneline $ do
126 f25e5aac Iustin Pop
                       putStrLn sol_line
127 f25e5aac Iustin Pop
                       hFlush stdout
128 f25e5aac Iustin Pop
              iterateDepth fin_tbl max_rounds disk_moves
129 f25e5aac Iustin Pop
                           nmlen imlen upd_cmd_strs oneline min_score
130 848b65c9 Iustin Pop
                           mg_limit min_gain evac_mode
131 f25e5aac Iustin Pop
        Nothing -> return (ini_tbl, cmd_strs)
132 6dc960bc Iustin Pop
133 ba6c6006 Iustin Pop
-- | Formats the solution for the oneline display
134 ba6c6006 Iustin Pop
formatOneline :: Double -> Int -> Double -> String
135 ba6c6006 Iustin Pop
formatOneline ini_cv plc_len fin_cv =
136 ba6c6006 Iustin Pop
    printf "%.8f %d %.8f %8.3f" ini_cv plc_len fin_cv
137 9f6dcdea Iustin Pop
               (if fin_cv == 0 then 1 else ini_cv / fin_cv)
138 ba6c6006 Iustin Pop
139 b2245847 Iustin Pop
-- | Polls a set of jobs at a fixed interval until all are finished
140 b2245847 Iustin Pop
-- one way or another
141 b2245847 Iustin Pop
waitForJobs :: L.Client -> [String] -> IO (Result [JobStatus])
142 b2245847 Iustin Pop
waitForJobs client jids = do
143 b2245847 Iustin Pop
  sts <- L.queryJobsStatus client jids
144 b2245847 Iustin Pop
  case sts of
145 b2245847 Iustin Pop
    Bad x -> return $ Bad x
146 7e98f782 Iustin Pop
    Ok s -> if any (<= JOB_STATUS_RUNNING) s
147 b2245847 Iustin Pop
            then do
148 b2245847 Iustin Pop
              -- TODO: replace hardcoded value with a better thing
149 b2245847 Iustin Pop
              threadDelay (1000000 * 15)
150 b2245847 Iustin Pop
              waitForJobs client jids
151 b2245847 Iustin Pop
            else return $ Ok s
152 b2245847 Iustin Pop
153 b2245847 Iustin Pop
-- | Check that a set of job statuses is all success
154 b2245847 Iustin Pop
checkJobsStatus :: [JobStatus] -> Bool
155 7e98f782 Iustin Pop
checkJobsStatus = all (== JOB_STATUS_SUCCESS)
156 b2245847 Iustin Pop
157 23448f82 Iustin Pop
-- | Wrapper over execJobSet checking for early termination
158 23448f82 Iustin Pop
execWrapper :: String -> Node.List
159 23448f82 Iustin Pop
           -> Instance.List -> IORef Int -> [JobSet] -> IO Bool
160 23448f82 Iustin Pop
execWrapper _      _  _  _    [] = return True
161 23448f82 Iustin Pop
execWrapper master nl il cref alljss = do
162 23448f82 Iustin Pop
  cancel <- readIORef cref
163 23448f82 Iustin Pop
  (if cancel > 0
164 23448f82 Iustin Pop
   then do
165 23448f82 Iustin Pop
     hPrintf stderr "Exiting early due to user request, %d\
166 23448f82 Iustin Pop
                    \ jobset(s) remaining." (length alljss)::IO ()
167 23448f82 Iustin Pop
     return False
168 23448f82 Iustin Pop
   else execJobSet master nl il cref alljss)
169 23448f82 Iustin Pop
170 b2245847 Iustin Pop
-- | Execute an entire jobset
171 3e4480e0 Iustin Pop
execJobSet :: String -> Node.List
172 23448f82 Iustin Pop
           -> Instance.List -> IORef Int -> [JobSet] -> IO Bool
173 23448f82 Iustin Pop
execJobSet _      _  _  _    [] = return True
174 23448f82 Iustin Pop
execJobSet master nl il cref (js:jss) = do
175 b2245847 Iustin Pop
  -- map from jobset (htools list of positions) to [[opcodes]]
176 b2245847 Iustin Pop
  let jobs = map (\(_, idx, move, _) ->
177 3e4480e0 Iustin Pop
                      Cluster.iMoveToJob nl il idx move) js
178 b2245847 Iustin Pop
  let descr = map (\(_, idx, _, _) -> Container.nameOf il idx) js
179 b2245847 Iustin Pop
  putStrLn $ "Executing jobset for instances " ++ commaJoin descr
180 b2245847 Iustin Pop
  jrs <- bracket (L.getClient master) L.closeClient
181 b2245847 Iustin Pop
         (\client -> do
182 683b1ca7 Iustin Pop
            jids <- L.submitManyJobs client jobs
183 b2245847 Iustin Pop
            case jids of
184 b2245847 Iustin Pop
              Bad x -> return $ Bad x
185 b2245847 Iustin Pop
              Ok x -> do
186 b2245847 Iustin Pop
                putStrLn $ "Got job IDs " ++ commaJoin x
187 b2245847 Iustin Pop
                waitForJobs client x
188 b2245847 Iustin Pop
         )
189 b2245847 Iustin Pop
  (case jrs of
190 b2245847 Iustin Pop
     Bad x -> do
191 b2245847 Iustin Pop
       hPutStrLn stderr $ "Cannot compute job status, aborting: " ++ show x
192 23448f82 Iustin Pop
       return False
193 b2245847 Iustin Pop
     Ok x -> if checkJobsStatus x
194 23448f82 Iustin Pop
             then execWrapper master nl il cref jss
195 b2245847 Iustin Pop
             else do
196 b2245847 Iustin Pop
               hPutStrLn stderr $ "Not all jobs completed successfully: " ++
197 b2245847 Iustin Pop
                         show x
198 23448f82 Iustin Pop
               hPutStrLn stderr "Aborting."
199 23448f82 Iustin Pop
               return False)
200 b2245847 Iustin Pop
201 543e859d Iustin Pop
-- | Signal handler for graceful termination
202 543e859d Iustin Pop
hangleSigInt :: IORef Int -> IO ()
203 543e859d Iustin Pop
hangleSigInt cref = do
204 543e859d Iustin Pop
  writeIORef cref 1
205 543e859d Iustin Pop
  putStrLn ("Cancel request registered, will exit at" ++
206 543e859d Iustin Pop
            " the end of the current job set...")
207 543e859d Iustin Pop
208 543e859d Iustin Pop
-- | Signal handler for immediate termination
209 543e859d Iustin Pop
hangleSigTerm :: IORef Int -> IO ()
210 543e859d Iustin Pop
hangleSigTerm cref = do
211 543e859d Iustin Pop
  -- update the cref to 2, just for consistency
212 543e859d Iustin Pop
  writeIORef cref 2
213 543e859d Iustin Pop
  putStrLn "Double cancel request, exiting now..."
214 543e859d Iustin Pop
  exitImmediately $ ExitFailure 2
215 03cb89f0 Iustin Pop
216 23448f82 Iustin Pop
runJobSet :: String -> Node.List -> Instance.List -> [JobSet] -> IO Bool
217 03cb89f0 Iustin Pop
runJobSet master fin_nl il cmd_jobs = do
218 03cb89f0 Iustin Pop
  cref <- newIORef 0
219 543e859d Iustin Pop
  mapM_ (\(hnd, sig) -> installHandler sig (Catch (hnd cref)) Nothing)
220 543e859d Iustin Pop
    [(hangleSigTerm, softwareTermination), (hangleSigInt, keyboardSignal)]
221 23448f82 Iustin Pop
  execWrapper master fin_nl il cref cmd_jobs
222 03cb89f0 Iustin Pop
223 e4f08c46 Iustin Pop
-- | Main function.
224 e4f08c46 Iustin Pop
main :: IO ()
225 e4f08c46 Iustin Pop
main = do
226 e4f08c46 Iustin Pop
  cmd_args <- System.getArgs
227 0427285d Iustin Pop
  (opts, args) <- parseOpts cmd_args "hbal" options
228 45f01962 Iustin Pop
229 45f01962 Iustin Pop
  unless (null args) $ do
230 45f01962 Iustin Pop
         hPutStrLn stderr "Error: this program doesn't take any arguments."
231 45f01962 Iustin Pop
         exitWith $ ExitFailure 1
232 a30b2f5b Iustin Pop
233 fae371cc Iustin Pop
  let oneline = optOneline opts
234 7eff5b09 Iustin Pop
      verbose = optVerbose opts
235 e98fb766 Iustin Pop
      shownodes = optShowNodes opts
236 33e44f0c Iustin Pop
      showinsts = optShowInsts opts
237 fae371cc Iustin Pop
238 4188449c Iustin Pop
  ini_cdata@(ClusterData gl fixed_nl ilf ctags) <- loadExternalData opts
239 ec18dca9 Iustin Pop
240 ec18dca9 Iustin Pop
  let offline_names = optOffline opts
241 db1bcfe8 Iustin Pop
      all_nodes = Container.elems fixed_nl
242 c854092b Iustin Pop
      all_names = concatMap allNames all_nodes
243 5182e970 Iustin Pop
      offline_wrong = filter (`notElem` all_names) offline_names
244 db1bcfe8 Iustin Pop
      offline_indices = map Node.idx $
245 f9acea10 Iustin Pop
                        filter (\n ->
246 f9acea10 Iustin Pop
                                 Node.name n `elem` offline_names ||
247 f9acea10 Iustin Pop
                                 Node.alias n `elem` offline_names)
248 db1bcfe8 Iustin Pop
                               all_nodes
249 66d67ad4 Iustin Pop
      m_cpu = optMcpu opts
250 66d67ad4 Iustin Pop
      m_dsk = optMdsk opts
251 646aa028 Iustin Pop
      csf = commonSuffix fixed_nl ilf
252 ec18dca9 Iustin Pop
253 3d7cd10b Iustin Pop
  when (length offline_wrong > 0) $ do
254 2795466b Iustin Pop
         hPrintf stderr "Wrong node name(s) set as offline: %s\n"
255 c939b58e Iustin Pop
                     (commaJoin offline_wrong) :: IO ()
256 3d7cd10b Iustin Pop
         exitWith $ ExitFailure 1
257 3d7cd10b Iustin Pop
258 5182e970 Iustin Pop
  let nm = Container.map (\n -> if Node.idx n `elem` offline_indices
259 ec18dca9 Iustin Pop
                                then Node.setOffline n True
260 a1c6212e Iustin Pop
                                else n) fixed_nl
261 646aa028 Iustin Pop
      nlf = Container.map (flip Node.setMdsk m_dsk . flip Node.setMcpu m_cpu)
262 646aa028 Iustin Pop
            nm
263 a30b2f5b Iustin Pop
264 ea017cbc Iustin Pop
  when (not oneline && verbose > 1) $
265 ea017cbc Iustin Pop
       putStrLn $ "Loaded cluster tags: " ++ intercalate "," ctags
266 ea017cbc Iustin Pop
267 646aa028 Iustin Pop
  when (Container.size ilf == 0) $ do
268 926c35b1 Iustin Pop
         (if oneline then putStrLn $ formatOneline 0 0 0
269 926c35b1 Iustin Pop
          else printf "Cluster is empty, exiting.\n")
270 dcbcdb58 Iustin Pop
         exitWith ExitSuccess
271 dcbcdb58 Iustin Pop
272 646aa028 Iustin Pop
  let split_insts = Cluster.findSplitInstances nlf ilf
273 646aa028 Iustin Pop
  when (not . null $ split_insts) $ do
274 646aa028 Iustin Pop
    hPutStrLn stderr "Found instances belonging to multiple node groups:"
275 646aa028 Iustin Pop
    mapM_ (\i -> hPutStrLn stderr $ "  " ++ Instance.name i) split_insts
276 646aa028 Iustin Pop
    hPutStrLn stderr "Aborting."
277 646aa028 Iustin Pop
    exitWith $ ExitFailure 1
278 646aa028 Iustin Pop
279 646aa028 Iustin Pop
  let ngroups = Cluster.splitCluster nlf ilf
280 646aa028 Iustin Pop
  when (length ngroups > 1 && isNothing (optGroup opts)) $ do
281 646aa028 Iustin Pop
    hPutStrLn stderr "Found multiple node groups:"
282 e0c85e08 Iustin Pop
    mapM_ (hPutStrLn stderr . ("  " ++) . Group.name .
283 d5072e4c Iustin Pop
           flip Container.find gl . fst) ngroups
284 646aa028 Iustin Pop
    hPutStrLn stderr "Aborting."
285 646aa028 Iustin Pop
    exitWith $ ExitFailure 1
286 646aa028 Iustin Pop
287 4188449c Iustin Pop
  maybeSaveData (optSaveCluster opts) "original" "before balancing" ini_cdata
288 4188449c Iustin Pop
289 27f96567 Iustin Pop
  unless oneline $ printf "Loaded %d nodes, %d instances\n"
290 646aa028 Iustin Pop
             (Container.size nlf)
291 646aa028 Iustin Pop
             (Container.size ilf)
292 646aa028 Iustin Pop
293 10ef6b4e Iustin Pop
  (gname, (nl, il)) <- case optGroup opts of
294 10ef6b4e Iustin Pop
    Nothing -> do
295 10ef6b4e Iustin Pop
         let (gidx, cdata) = head ngroups
296 10ef6b4e Iustin Pop
             grp = Container.find gidx gl
297 10ef6b4e Iustin Pop
         return (Group.name grp, cdata)
298 10ef6b4e Iustin Pop
    Just g -> case Container.findByName gl g of
299 646aa028 Iustin Pop
      Nothing -> do
300 646aa028 Iustin Pop
        hPutStrLn stderr $ "Node group " ++ g ++
301 646aa028 Iustin Pop
          " not found. Node group list is:"
302 10ef6b4e Iustin Pop
        mapM_ (hPutStrLn stderr . ("  " ++) . Group.name ) (Container.elems gl)
303 646aa028 Iustin Pop
        hPutStrLn stderr "Aborting."
304 646aa028 Iustin Pop
        exitWith $ ExitFailure 1
305 10ef6b4e Iustin Pop
      Just grp ->
306 10ef6b4e Iustin Pop
          case lookup (Group.idx grp) ngroups of
307 10ef6b4e Iustin Pop
            Nothing -> do
308 10ef6b4e Iustin Pop
              -- TODO: while this is unlikely to happen, log here the
309 10ef6b4e Iustin Pop
              -- actual group data to help debugging
310 d5072e4c Iustin Pop
              hPutStrLn stderr "Internal failure, missing group idx"
311 10ef6b4e Iustin Pop
              exitWith $ ExitFailure 1
312 10ef6b4e Iustin Pop
            Just cdata -> return (Group.name grp, cdata)
313 646aa028 Iustin Pop
314 646aa028 Iustin Pop
  unless oneline $ printf "Group size %d nodes, %d instances\n"
315 e4f08c46 Iustin Pop
             (Container.size nl)
316 e4f08c46 Iustin Pop
             (Container.size il)
317 a0529a64 Iustin Pop
318 10ef6b4e Iustin Pop
  putStrLn $ "Selected node group: " ++ gname
319 646aa028 Iustin Pop
320 9f6dcdea Iustin Pop
  when (length csf > 0 && not oneline && verbose > 1) $
321 9f6dcdea Iustin Pop
       printf "Note: Stripping common suffix of '%s' from names\n" csf
322 a0529a64 Iustin Pop
323 e4f08c46 Iustin Pop
  let (bad_nodes, bad_instances) = Cluster.computeBadItems nl il
324 7eff5b09 Iustin Pop
  unless (oneline || verbose == 0) $ printf
325 27f96567 Iustin Pop
             "Initial check done: %d bad nodes, %d bad instances.\n"
326 e4f08c46 Iustin Pop
             (length bad_nodes) (length bad_instances)
327 e4f08c46 Iustin Pop
328 9f6dcdea Iustin Pop
  when (length bad_nodes > 0) $
329 289c3835 Iustin Pop
         putStrLn "Cluster is not N+1 happy, continuing but no guarantee \
330 289c3835 Iustin Pop
                  \that the cluster will end N+1 happy."
331 e4f08c46 Iustin Pop
332 33e44f0c Iustin Pop
  maybePrintInsts showinsts "Initial" (Cluster.printInsts nl il)
333 507fda3f Iustin Pop
334 417f6b50 Iustin Pop
  maybePrintNodes shownodes "Initial cluster" (Cluster.printNodes nl)
335 e4f08c46 Iustin Pop
336 e4f08c46 Iustin Pop
  let ini_cv = Cluster.compCV nl
337 e4f08c46 Iustin Pop
      ini_tbl = Cluster.Table nl il ini_cv []
338 b0517d61 Iustin Pop
      min_cv = optMinScore opts
339 b0517d61 Iustin Pop
340 b0517d61 Iustin Pop
  when (ini_cv < min_cv) $ do
341 b0517d61 Iustin Pop
         (if oneline then
342 ba6c6006 Iustin Pop
              putStrLn $ formatOneline ini_cv 0 ini_cv
343 b0517d61 Iustin Pop
          else printf "Cluster is already well balanced (initial score %.6g,\n\
344 b0517d61 Iustin Pop
                      \minimum score %.6g).\nNothing to do, exiting\n"
345 b0517d61 Iustin Pop
                      ini_cv min_cv)
346 b0517d61 Iustin Pop
         exitWith ExitSuccess
347 b0517d61 Iustin Pop
348 d09b6ed3 Iustin Pop
  unless oneline (if verbose > 2 then
349 7eff5b09 Iustin Pop
                      printf "Initial coefficients: overall %.8f, %s\n"
350 7eff5b09 Iustin Pop
                      ini_cv (Cluster.printStats nl)
351 7eff5b09 Iustin Pop
                  else
352 7eff5b09 Iustin Pop
                      printf "Initial score: %.8f\n" ini_cv)
353 e4f08c46 Iustin Pop
354 27f96567 Iustin Pop
  unless oneline $ putStrLn "Trying to minimize the CV..."
355 14c972c7 Iustin Pop
  let imlen = maximum . map (length . Instance.alias) $ Container.elems il
356 14c972c7 Iustin Pop
      nmlen = maximum . map (length . Node.alias) $ Container.elems nl
357 7dfaafb1 Iustin Pop
358 7dfaafb1 Iustin Pop
  (fin_tbl, cmd_strs) <- iterateDepth ini_tbl (optMaxLength opts)
359 c0501c69 Iustin Pop
                         (optDiskMoves opts)
360 848b65c9 Iustin Pop
                         nmlen imlen [] oneline min_cv
361 848b65c9 Iustin Pop
                         (optMinGainLim opts) (optMinGain opts)
362 848b65c9 Iustin Pop
                         (optEvacMode opts)
363 507fda3f Iustin Pop
  let (Cluster.Table fin_nl fin_il fin_cv fin_plc) = fin_tbl
364 e4f08c46 Iustin Pop
      ord_plc = reverse fin_plc
365 9f6dcdea Iustin Pop
      sol_msg = if null fin_plc
366 9f6dcdea Iustin Pop
                then printf "No solution found\n"
367 9f6dcdea Iustin Pop
                else if verbose > 2
368 9f6dcdea Iustin Pop
                     then printf "Final coefficients:   overall %.8f, %s\n"
369 9f6dcdea Iustin Pop
                          fin_cv (Cluster.printStats fin_nl)
370 9f6dcdea Iustin Pop
                     else printf "Cluster score improved from %.8f to %.8f\n"
371 9f6dcdea Iustin Pop
                          ini_cv fin_cv
372 9f6dcdea Iustin Pop
                              ::String
373 e4f08c46 Iustin Pop
374 7eff5b09 Iustin Pop
  unless oneline $ putStr sol_msg
375 7eff5b09 Iustin Pop
376 7eff5b09 Iustin Pop
  unless (oneline || verbose == 0) $
377 7eff5b09 Iustin Pop
         printf "Solution length=%d\n" (length ord_plc)
378 e4f08c46 Iustin Pop
379 b2245847 Iustin Pop
  let cmd_jobs = Cluster.splitJobs cmd_strs
380 b2245847 Iustin Pop
      cmd_data = Cluster.formatCmds cmd_jobs
381 e0eb63f0 Iustin Pop
382 e0eb63f0 Iustin Pop
  when (isJust $ optShowCmds opts) $
383 e4f08c46 Iustin Pop
       do
384 e0eb63f0 Iustin Pop
         let out_path = fromJust $ optShowCmds opts
385 e4f08c46 Iustin Pop
         putStrLn ""
386 e0eb63f0 Iustin Pop
         (if out_path == "-" then
387 e0eb63f0 Iustin Pop
              printf "Commands to run to reach the above solution:\n%s"
388 e0eb63f0 Iustin Pop
                     (unlines . map ("  " ++) .
389 0e8ae201 Iustin Pop
                      filter (/= "  check") .
390 e0eb63f0 Iustin Pop
                      lines $ cmd_data)
391 e0eb63f0 Iustin Pop
          else do
392 0427285d Iustin Pop
            writeFile out_path (shTemplate ++ cmd_data)
393 e0eb63f0 Iustin Pop
            printf "The commands have been written to file '%s'\n" out_path)
394 e0eb63f0 Iustin Pop
395 4188449c Iustin Pop
  maybeSaveData (optSaveCluster opts) "balanced" "after balancing"
396 4188449c Iustin Pop
                (ClusterData gl fin_nl fin_il ctags)
397 b2245847 Iustin Pop
398 33e44f0c Iustin Pop
  maybePrintInsts showinsts "Final" (Cluster.printInsts fin_nl fin_il)
399 507fda3f Iustin Pop
400 417f6b50 Iustin Pop
  maybePrintNodes shownodes "Final cluster" (Cluster.printNodes fin_nl)
401 417f6b50 Iustin Pop
402 417f6b50 Iustin Pop
  when (verbose > 3) $ do
403 1a7eff0e Iustin Pop
         let ini_cs = Cluster.totalResources nl
404 1a7eff0e Iustin Pop
             fin_cs = Cluster.totalResources fin_nl
405 417f6b50 Iustin Pop
         printf "Original: mem=%d disk=%d\n"
406 417f6b50 Iustin Pop
                    (Cluster.csFmem ini_cs) (Cluster.csFdsk ini_cs) :: IO ()
407 417f6b50 Iustin Pop
         printf "Final:    mem=%d disk=%d\n"
408 417f6b50 Iustin Pop
                    (Cluster.csFmem fin_cs) (Cluster.csFdsk fin_cs)
409 ba6c6006 Iustin Pop
  when oneline $
410 ba6c6006 Iustin Pop
         putStrLn $ formatOneline ini_cv (length ord_plc) fin_cv
411 748654f7 Iustin Pop
412 23448f82 Iustin Pop
  eval <-
413 23448f82 Iustin Pop
      if optExecJobs opts && not (null ord_plc)
414 23448f82 Iustin Pop
      then (case optLuxi opts of
415 748654f7 Iustin Pop
              Nothing -> do
416 748654f7 Iustin Pop
                hPutStrLn stderr "Execution of commands possible only on LUXI"
417 23448f82 Iustin Pop
                return False
418 03cb89f0 Iustin Pop
              Just master -> runJobSet master fin_nl il cmd_jobs)
419 23448f82 Iustin Pop
      else return True
420 d5072e4c Iustin Pop
  unless eval (exitWith (ExitFailure 1))