Statistics
| Branch: | Tag: | Revision:

root / hbal.hs @ aa8d2e71

History | View | Annotate | Download (11.3 kB)

1 66d67ad4 Iustin Pop
{-| Cluster rebalancer
2 e4f08c46 Iustin Pop
3 e4f08c46 Iustin Pop
-}
4 e4f08c46 Iustin Pop
5 e2fa2baf Iustin Pop
{-
6 e2fa2baf Iustin Pop
7 e2fa2baf Iustin Pop
Copyright (C) 2009 Google Inc.
8 e2fa2baf Iustin Pop
9 e2fa2baf Iustin Pop
This program is free software; you can redistribute it and/or modify
10 e2fa2baf Iustin Pop
it under the terms of the GNU General Public License as published by
11 e2fa2baf Iustin Pop
the Free Software Foundation; either version 2 of the License, or
12 e2fa2baf Iustin Pop
(at your option) any later version.
13 e2fa2baf Iustin Pop
14 e2fa2baf Iustin Pop
This program is distributed in the hope that it will be useful, but
15 e2fa2baf Iustin Pop
WITHOUT ANY WARRANTY; without even the implied warranty of
16 e2fa2baf Iustin Pop
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17 e2fa2baf Iustin Pop
General Public License for more details.
18 e2fa2baf Iustin Pop
19 e2fa2baf Iustin Pop
You should have received a copy of the GNU General Public License
20 e2fa2baf Iustin Pop
along with this program; if not, write to the Free Software
21 e2fa2baf Iustin Pop
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 e2fa2baf Iustin Pop
02110-1301, USA.
23 e2fa2baf Iustin Pop
24 e2fa2baf Iustin Pop
-}
25 e2fa2baf Iustin Pop
26 e4f08c46 Iustin Pop
module Main (main) where
27 e4f08c46 Iustin Pop
28 b2245847 Iustin Pop
import Control.Concurrent (threadDelay)
29 b2245847 Iustin Pop
import Control.Exception (bracket)
30 e4f08c46 Iustin Pop
import Data.List
31 e4f08c46 Iustin Pop
import Data.Function
32 0427285d Iustin Pop
import Data.Maybe (isJust, fromJust)
33 e4f08c46 Iustin Pop
import Monad
34 e4f08c46 Iustin Pop
import System
35 e4f08c46 Iustin Pop
import System.IO
36 e4f08c46 Iustin Pop
import qualified System
37 e4f08c46 Iustin Pop
38 2795466b Iustin Pop
import Text.Printf (printf, hPrintf)
39 b2245847 Iustin Pop
import Text.JSON (showJSON)
40 e4f08c46 Iustin Pop
41 669d7e3d Iustin Pop
import qualified Ganeti.HTools.Container as Container
42 669d7e3d Iustin Pop
import qualified Ganeti.HTools.Cluster as Cluster
43 ec18dca9 Iustin Pop
import qualified Ganeti.HTools.Node as Node
44 b2245847 Iustin Pop
import qualified Ganeti.HTools.Instance as Instance
45 040afc35 Iustin Pop
46 0427285d Iustin Pop
import Ganeti.HTools.CLI
47 e8f89bb6 Iustin Pop
import Ganeti.HTools.ExtLoader
48 669d7e3d Iustin Pop
import Ganeti.HTools.Utils
49 0e8ae201 Iustin Pop
import Ganeti.HTools.Types
50 e4f08c46 Iustin Pop
51 b2245847 Iustin Pop
import qualified Ganeti.Luxi as L
52 b2245847 Iustin Pop
import qualified Ganeti.OpCodes as OpCodes
53 b2245847 Iustin Pop
import Ganeti.Jobs
54 b2245847 Iustin Pop
55 e4f08c46 Iustin Pop
-- | Options list and functions
56 0427285d Iustin Pop
options :: [OptType]
57 e4f08c46 Iustin Pop
options =
58 0427285d Iustin Pop
    [ oPrintNodes
59 507fda3f Iustin Pop
    , oPrintInsts
60 0427285d Iustin Pop
    , oPrintCommands
61 0427285d Iustin Pop
    , oOneline
62 0427285d Iustin Pop
    , oNodeFile
63 0427285d Iustin Pop
    , oInstFile
64 0427285d Iustin Pop
    , oRapiMaster
65 0427285d Iustin Pop
    , oLuxiSocket
66 b2245847 Iustin Pop
    , oExecJobs
67 0427285d Iustin Pop
    , oMaxSolLength
68 0427285d Iustin Pop
    , oVerbose
69 0427285d Iustin Pop
    , oQuiet
70 0427285d Iustin Pop
    , oOfflineNode
71 0427285d Iustin Pop
    , oMinScore
72 0427285d Iustin Pop
    , oMaxCpu
73 0427285d Iustin Pop
    , oMinDisk
74 c0501c69 Iustin Pop
    , oDiskMoves
75 aa8d2e71 Iustin Pop
    , oDynuFile
76 0427285d Iustin Pop
    , oShowVer
77 0427285d Iustin Pop
    , oShowHelp
78 7ef4d93e Iustin Pop
    ]
79 e4f08c46 Iustin Pop
80 6dc960bc Iustin Pop
{- | Start computing the solution at the given depth and recurse until
81 6dc960bc Iustin Pop
we find a valid solution or we exceed the maximum depth.
82 6dc960bc Iustin Pop
83 6dc960bc Iustin Pop
-}
84 6dc960bc Iustin Pop
iterateDepth :: Cluster.Table    -- ^ The starting table
85 6dc960bc Iustin Pop
             -> Int              -- ^ Remaining length
86 c0501c69 Iustin Pop
             -> Bool             -- ^ Allow disk moves
87 6dc960bc Iustin Pop
             -> Int              -- ^ Max node name len
88 6dc960bc Iustin Pop
             -> Int              -- ^ Max instance name len
89 0e8ae201 Iustin Pop
             -> [MoveJob]        -- ^ Current command list
90 24acc2c6 Guido Trotter
             -> Bool             -- ^ Whether to be silent
91 92e32d76 Iustin Pop
             -> Score            -- ^ Score at which to stop
92 0e8ae201 Iustin Pop
             -> IO (Cluster.Table, [MoveJob]) -- ^ The resulting table
93 0e8ae201 Iustin Pop
                                              -- and commands
94 c0501c69 Iustin Pop
iterateDepth ini_tbl max_rounds disk_moves nmlen imlen
95 b0517d61 Iustin Pop
             cmd_strs oneline min_score =
96 f25e5aac Iustin Pop
    let Cluster.Table ini_nl ini_il _ _ = ini_tbl
97 f25e5aac Iustin Pop
        m_fin_tbl = Cluster.tryBalance ini_tbl max_rounds disk_moves min_score
98 6dc960bc Iustin Pop
    in
99 f25e5aac Iustin Pop
      case m_fin_tbl of
100 f25e5aac Iustin Pop
        Just fin_tbl ->
101 f25e5aac Iustin Pop
            do
102 f25e5aac Iustin Pop
              let
103 f25e5aac Iustin Pop
                  (Cluster.Table _ _ _ fin_plc) = fin_tbl
104 f25e5aac Iustin Pop
                  fin_plc_len = length fin_plc
105 924f9c16 Iustin Pop
                  cur_plc@(idx, _, _, move, _) = head fin_plc
106 f25e5aac Iustin Pop
                  (sol_line, cmds) = Cluster.printSolutionLine ini_nl ini_il
107 0e8ae201 Iustin Pop
                                     nmlen imlen cur_plc fin_plc_len
108 0e8ae201 Iustin Pop
                  afn = Cluster.involvedNodes ini_il cur_plc
109 924f9c16 Iustin Pop
                  upd_cmd_strs = (afn, idx, move, cmds):cmd_strs
110 f25e5aac Iustin Pop
              unless oneline $ do
111 f25e5aac Iustin Pop
                       putStrLn sol_line
112 f25e5aac Iustin Pop
                       hFlush stdout
113 f25e5aac Iustin Pop
              iterateDepth fin_tbl max_rounds disk_moves
114 f25e5aac Iustin Pop
                           nmlen imlen upd_cmd_strs oneline min_score
115 f25e5aac Iustin Pop
        Nothing -> return (ini_tbl, cmd_strs)
116 6dc960bc Iustin Pop
117 ba6c6006 Iustin Pop
-- | Formats the solution for the oneline display
118 ba6c6006 Iustin Pop
formatOneline :: Double -> Int -> Double -> String
119 ba6c6006 Iustin Pop
formatOneline ini_cv plc_len fin_cv =
120 ba6c6006 Iustin Pop
    printf "%.8f %d %.8f %8.3f" ini_cv plc_len fin_cv
121 9f6dcdea Iustin Pop
               (if fin_cv == 0 then 1 else ini_cv / fin_cv)
122 ba6c6006 Iustin Pop
123 b2245847 Iustin Pop
-- | Submits a list of jobs and waits for all to finish execution
124 b2245847 Iustin Pop
execJobs :: L.Client -> [[OpCodes.OpCode]] -> IO (Result [String])
125 b2245847 Iustin Pop
execJobs client = L.submitManyJobs client . showJSON
126 b2245847 Iustin Pop
127 b2245847 Iustin Pop
-- | Polls a set of jobs at a fixed interval until all are finished
128 b2245847 Iustin Pop
-- one way or another
129 b2245847 Iustin Pop
waitForJobs :: L.Client -> [String] -> IO (Result [JobStatus])
130 b2245847 Iustin Pop
waitForJobs client jids = do
131 b2245847 Iustin Pop
  sts <- L.queryJobsStatus client jids
132 b2245847 Iustin Pop
  case sts of
133 b2245847 Iustin Pop
    Bad x -> return $ Bad x
134 b2245847 Iustin Pop
    Ok s -> if any (<= JobRunning) s
135 b2245847 Iustin Pop
            then do
136 b2245847 Iustin Pop
              -- TODO: replace hardcoded value with a better thing
137 b2245847 Iustin Pop
              threadDelay (1000000 * 15)
138 b2245847 Iustin Pop
              waitForJobs client jids
139 b2245847 Iustin Pop
            else return $ Ok s
140 b2245847 Iustin Pop
141 b2245847 Iustin Pop
-- | Check that a set of job statuses is all success
142 b2245847 Iustin Pop
checkJobsStatus :: [JobStatus] -> Bool
143 b2245847 Iustin Pop
checkJobsStatus = all (== JobSuccess)
144 b2245847 Iustin Pop
145 b2245847 Iustin Pop
-- | Execute an entire jobset
146 b2245847 Iustin Pop
execJobSet :: String -> String -> Node.List
147 b2245847 Iustin Pop
           -> Instance.List -> [JobSet] -> IO ()
148 b2245847 Iustin Pop
execJobSet _      _   _  _  [] = return ()
149 b2245847 Iustin Pop
execJobSet master csf nl il (js:jss) = do
150 b2245847 Iustin Pop
  -- map from jobset (htools list of positions) to [[opcodes]]
151 b2245847 Iustin Pop
  let jobs = map (\(_, idx, move, _) ->
152 b2245847 Iustin Pop
                      Cluster.iMoveToJob csf nl il idx move) js
153 b2245847 Iustin Pop
  let descr = map (\(_, idx, _, _) -> Container.nameOf il idx) js
154 b2245847 Iustin Pop
  putStrLn $ "Executing jobset for instances " ++ commaJoin descr
155 b2245847 Iustin Pop
  jrs <- bracket (L.getClient master) L.closeClient
156 b2245847 Iustin Pop
         (\client -> do
157 b2245847 Iustin Pop
            jids <- execJobs client jobs
158 b2245847 Iustin Pop
            case jids of
159 b2245847 Iustin Pop
              Bad x -> return $ Bad x
160 b2245847 Iustin Pop
              Ok x -> do
161 b2245847 Iustin Pop
                putStrLn $ "Got job IDs " ++ commaJoin x
162 b2245847 Iustin Pop
                waitForJobs client x
163 b2245847 Iustin Pop
         )
164 b2245847 Iustin Pop
  (case jrs of
165 b2245847 Iustin Pop
     Bad x -> do
166 b2245847 Iustin Pop
       hPutStrLn stderr $ "Cannot compute job status, aborting: " ++ show x
167 b2245847 Iustin Pop
       return ()
168 b2245847 Iustin Pop
     Ok x -> if checkJobsStatus x
169 b2245847 Iustin Pop
             then execJobSet master csf nl il jss
170 b2245847 Iustin Pop
             else do
171 b2245847 Iustin Pop
               hPutStrLn stderr $ "Not all jobs completed successfully: " ++
172 b2245847 Iustin Pop
                         show x
173 b2245847 Iustin Pop
               hPutStrLn stderr "Aborting.")
174 b2245847 Iustin Pop
175 e4f08c46 Iustin Pop
-- | Main function.
176 e4f08c46 Iustin Pop
main :: IO ()
177 e4f08c46 Iustin Pop
main = do
178 e4f08c46 Iustin Pop
  cmd_args <- System.getArgs
179 0427285d Iustin Pop
  (opts, args) <- parseOpts cmd_args "hbal" options
180 45f01962 Iustin Pop
181 45f01962 Iustin Pop
  unless (null args) $ do
182 45f01962 Iustin Pop
         hPutStrLn stderr "Error: this program doesn't take any arguments."
183 45f01962 Iustin Pop
         exitWith $ ExitFailure 1
184 a30b2f5b Iustin Pop
185 fae371cc Iustin Pop
  let oneline = optOneline opts
186 7eff5b09 Iustin Pop
      verbose = optVerbose opts
187 fae371cc Iustin Pop
188 0427285d Iustin Pop
  (fixed_nl, il, csf) <- loadExternalData opts
189 ec18dca9 Iustin Pop
190 ec18dca9 Iustin Pop
  let offline_names = optOffline opts
191 db1bcfe8 Iustin Pop
      all_nodes = Container.elems fixed_nl
192 db1bcfe8 Iustin Pop
      all_names = map Node.name all_nodes
193 9f6dcdea Iustin Pop
      offline_wrong = filter (flip notElem all_names) offline_names
194 db1bcfe8 Iustin Pop
      offline_indices = map Node.idx $
195 db1bcfe8 Iustin Pop
                        filter (\n -> elem (Node.name n) offline_names)
196 db1bcfe8 Iustin Pop
                               all_nodes
197 66d67ad4 Iustin Pop
      m_cpu = optMcpu opts
198 66d67ad4 Iustin Pop
      m_dsk = optMdsk opts
199 ec18dca9 Iustin Pop
200 3d7cd10b Iustin Pop
  when (length offline_wrong > 0) $ do
201 2795466b Iustin Pop
         hPrintf stderr "Wrong node name(s) set as offline: %s\n"
202 2795466b Iustin Pop
                     (commaJoin offline_wrong)
203 3d7cd10b Iustin Pop
         exitWith $ ExitFailure 1
204 3d7cd10b Iustin Pop
205 66d67ad4 Iustin Pop
  let nm = Container.map (\n -> if elem (Node.idx n) offline_indices
206 ec18dca9 Iustin Pop
                                then Node.setOffline n True
207 a1c6212e Iustin Pop
                                else n) fixed_nl
208 66d67ad4 Iustin Pop
      nl = Container.map (flip Node.setMdsk m_dsk . flip Node.setMcpu m_cpu)
209 66d67ad4 Iustin Pop
           nm
210 a30b2f5b Iustin Pop
211 dcbcdb58 Iustin Pop
  when (Container.size il == 0) $ do
212 926c35b1 Iustin Pop
         (if oneline then putStrLn $ formatOneline 0 0 0
213 926c35b1 Iustin Pop
          else printf "Cluster is empty, exiting.\n")
214 dcbcdb58 Iustin Pop
         exitWith ExitSuccess
215 dcbcdb58 Iustin Pop
216 27f96567 Iustin Pop
  unless oneline $ printf "Loaded %d nodes, %d instances\n"
217 e4f08c46 Iustin Pop
             (Container.size nl)
218 e4f08c46 Iustin Pop
             (Container.size il)
219 a0529a64 Iustin Pop
220 9f6dcdea Iustin Pop
  when (length csf > 0 && not oneline && verbose > 1) $
221 9f6dcdea Iustin Pop
       printf "Note: Stripping common suffix of '%s' from names\n" csf
222 a0529a64 Iustin Pop
223 e4f08c46 Iustin Pop
  let (bad_nodes, bad_instances) = Cluster.computeBadItems nl il
224 7eff5b09 Iustin Pop
  unless (oneline || verbose == 0) $ printf
225 27f96567 Iustin Pop
             "Initial check done: %d bad nodes, %d bad instances.\n"
226 e4f08c46 Iustin Pop
             (length bad_nodes) (length bad_instances)
227 e4f08c46 Iustin Pop
228 9f6dcdea Iustin Pop
  when (length bad_nodes > 0) $
229 289c3835 Iustin Pop
         putStrLn "Cluster is not N+1 happy, continuing but no guarantee \
230 289c3835 Iustin Pop
                  \that the cluster will end N+1 happy."
231 e4f08c46 Iustin Pop
232 507fda3f Iustin Pop
  when (optShowInsts opts) $ do
233 507fda3f Iustin Pop
         putStrLn ""
234 507fda3f Iustin Pop
         putStrLn "Initial instance map:"
235 507fda3f Iustin Pop
         putStrLn $ Cluster.printInsts nl il
236 507fda3f Iustin Pop
237 e4f08c46 Iustin Pop
  when (optShowNodes opts) $
238 e4f08c46 Iustin Pop
       do
239 e4f08c46 Iustin Pop
         putStrLn "Initial cluster status:"
240 dbd6700b Iustin Pop
         putStrLn $ Cluster.printNodes nl
241 e4f08c46 Iustin Pop
242 e4f08c46 Iustin Pop
  let ini_cv = Cluster.compCV nl
243 e4f08c46 Iustin Pop
      ini_tbl = Cluster.Table nl il ini_cv []
244 b0517d61 Iustin Pop
      min_cv = optMinScore opts
245 b0517d61 Iustin Pop
246 b0517d61 Iustin Pop
  when (ini_cv < min_cv) $ do
247 b0517d61 Iustin Pop
         (if oneline then
248 ba6c6006 Iustin Pop
              putStrLn $ formatOneline ini_cv 0 ini_cv
249 b0517d61 Iustin Pop
          else printf "Cluster is already well balanced (initial score %.6g,\n\
250 b0517d61 Iustin Pop
                      \minimum score %.6g).\nNothing to do, exiting\n"
251 b0517d61 Iustin Pop
                      ini_cv min_cv)
252 b0517d61 Iustin Pop
         exitWith ExitSuccess
253 b0517d61 Iustin Pop
254 d09b6ed3 Iustin Pop
  unless oneline (if verbose > 2 then
255 7eff5b09 Iustin Pop
                      printf "Initial coefficients: overall %.8f, %s\n"
256 7eff5b09 Iustin Pop
                      ini_cv (Cluster.printStats nl)
257 7eff5b09 Iustin Pop
                  else
258 7eff5b09 Iustin Pop
                      printf "Initial score: %.8f\n" ini_cv)
259 e4f08c46 Iustin Pop
260 27f96567 Iustin Pop
  unless oneline $ putStrLn "Trying to minimize the CV..."
261 262a08a2 Iustin Pop
  let imlen = Container.maxNameLen il
262 262a08a2 Iustin Pop
      nmlen = Container.maxNameLen nl
263 7dfaafb1 Iustin Pop
264 7dfaafb1 Iustin Pop
  (fin_tbl, cmd_strs) <- iterateDepth ini_tbl (optMaxLength opts)
265 c0501c69 Iustin Pop
                         (optDiskMoves opts)
266 db1bcfe8 Iustin Pop
                         nmlen imlen [] oneline min_cv
267 507fda3f Iustin Pop
  let (Cluster.Table fin_nl fin_il fin_cv fin_plc) = fin_tbl
268 e4f08c46 Iustin Pop
      ord_plc = reverse fin_plc
269 9f6dcdea Iustin Pop
      sol_msg = if null fin_plc
270 9f6dcdea Iustin Pop
                then printf "No solution found\n"
271 9f6dcdea Iustin Pop
                else if verbose > 2
272 9f6dcdea Iustin Pop
                     then printf "Final coefficients:   overall %.8f, %s\n"
273 9f6dcdea Iustin Pop
                          fin_cv (Cluster.printStats fin_nl)
274 9f6dcdea Iustin Pop
                     else printf "Cluster score improved from %.8f to %.8f\n"
275 9f6dcdea Iustin Pop
                          ini_cv fin_cv
276 9f6dcdea Iustin Pop
                              ::String
277 e4f08c46 Iustin Pop
278 7eff5b09 Iustin Pop
  unless oneline $ putStr sol_msg
279 7eff5b09 Iustin Pop
280 7eff5b09 Iustin Pop
  unless (oneline || verbose == 0) $
281 7eff5b09 Iustin Pop
         printf "Solution length=%d\n" (length ord_plc)
282 e4f08c46 Iustin Pop
283 b2245847 Iustin Pop
  let cmd_jobs = Cluster.splitJobs cmd_strs
284 b2245847 Iustin Pop
      cmd_data = Cluster.formatCmds cmd_jobs
285 e0eb63f0 Iustin Pop
286 e0eb63f0 Iustin Pop
  when (isJust $ optShowCmds opts) $
287 e4f08c46 Iustin Pop
       do
288 e0eb63f0 Iustin Pop
         let out_path = fromJust $ optShowCmds opts
289 e4f08c46 Iustin Pop
         putStrLn ""
290 e0eb63f0 Iustin Pop
         (if out_path == "-" then
291 e0eb63f0 Iustin Pop
              printf "Commands to run to reach the above solution:\n%s"
292 e0eb63f0 Iustin Pop
                     (unlines . map ("  " ++) .
293 0e8ae201 Iustin Pop
                      filter (/= "  check") .
294 e0eb63f0 Iustin Pop
                      lines $ cmd_data)
295 e0eb63f0 Iustin Pop
          else do
296 0427285d Iustin Pop
            writeFile out_path (shTemplate ++ cmd_data)
297 e0eb63f0 Iustin Pop
            printf "The commands have been written to file '%s'\n" out_path)
298 e0eb63f0 Iustin Pop
299 b2245847 Iustin Pop
  when (optExecJobs opts && not (null ord_plc))
300 b2245847 Iustin Pop
           (case optLuxi opts of
301 b2245847 Iustin Pop
              Nothing -> do
302 b2245847 Iustin Pop
                hPutStrLn stderr "Execution of commands possible only on LUXI"
303 b2245847 Iustin Pop
                exitWith $ ExitFailure 1
304 b2245847 Iustin Pop
              Just master -> execJobSet master csf fin_nl il cmd_jobs)
305 b2245847 Iustin Pop
306 507fda3f Iustin Pop
  when (optShowInsts opts) $ do
307 507fda3f Iustin Pop
         putStrLn ""
308 507fda3f Iustin Pop
         putStrLn "Final instance map:"
309 507fda3f Iustin Pop
         putStr $ Cluster.printInsts fin_nl fin_il
310 507fda3f Iustin Pop
311 e4f08c46 Iustin Pop
  when (optShowNodes opts) $
312 e4f08c46 Iustin Pop
       do
313 1a7eff0e Iustin Pop
         let ini_cs = Cluster.totalResources nl
314 1a7eff0e Iustin Pop
             fin_cs = Cluster.totalResources fin_nl
315 e4f08c46 Iustin Pop
         putStrLn ""
316 e4f08c46 Iustin Pop
         putStrLn "Final cluster status:"
317 dbd6700b Iustin Pop
         putStrLn $ Cluster.printNodes fin_nl
318 d09b6ed3 Iustin Pop
         when (verbose > 3) $
319 7eff5b09 Iustin Pop
              do
320 1a7eff0e Iustin Pop
                printf "Original: mem=%d disk=%d\n"
321 f5b553da Iustin Pop
                       (Cluster.csFmem ini_cs) (Cluster.csFdsk ini_cs)
322 1a7eff0e Iustin Pop
                printf "Final:    mem=%d disk=%d\n"
323 f5b553da Iustin Pop
                       (Cluster.csFmem fin_cs) (Cluster.csFdsk fin_cs)
324 ba6c6006 Iustin Pop
  when oneline $
325 ba6c6006 Iustin Pop
         putStrLn $ formatOneline ini_cv (length ord_plc) fin_cv