Statistics
| Branch: | Tag: | Revision:

root / hbal.hs @ 94e05c32

History | View | Annotate | Download (11.4 kB)

1 66d67ad4 Iustin Pop
{-| Cluster rebalancer
2 e4f08c46 Iustin Pop
3 e4f08c46 Iustin Pop
-}
4 e4f08c46 Iustin Pop
5 e2fa2baf Iustin Pop
{-
6 e2fa2baf Iustin Pop
7 e2fa2baf Iustin Pop
Copyright (C) 2009 Google Inc.
8 e2fa2baf Iustin Pop
9 e2fa2baf Iustin Pop
This program is free software; you can redistribute it and/or modify
10 e2fa2baf Iustin Pop
it under the terms of the GNU General Public License as published by
11 e2fa2baf Iustin Pop
the Free Software Foundation; either version 2 of the License, or
12 e2fa2baf Iustin Pop
(at your option) any later version.
13 e2fa2baf Iustin Pop
14 e2fa2baf Iustin Pop
This program is distributed in the hope that it will be useful, but
15 e2fa2baf Iustin Pop
WITHOUT ANY WARRANTY; without even the implied warranty of
16 e2fa2baf Iustin Pop
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17 e2fa2baf Iustin Pop
General Public License for more details.
18 e2fa2baf Iustin Pop
19 e2fa2baf Iustin Pop
You should have received a copy of the GNU General Public License
20 e2fa2baf Iustin Pop
along with this program; if not, write to the Free Software
21 e2fa2baf Iustin Pop
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 e2fa2baf Iustin Pop
02110-1301, USA.
23 e2fa2baf Iustin Pop
24 e2fa2baf Iustin Pop
-}
25 e2fa2baf Iustin Pop
26 e4f08c46 Iustin Pop
module Main (main) where
27 e4f08c46 Iustin Pop
28 b2245847 Iustin Pop
import Control.Concurrent (threadDelay)
29 b2245847 Iustin Pop
import Control.Exception (bracket)
30 e4f08c46 Iustin Pop
import Data.List
31 e4f08c46 Iustin Pop
import Data.Function
32 0427285d Iustin Pop
import Data.Maybe (isJust, fromJust)
33 e4f08c46 Iustin Pop
import Monad
34 e4f08c46 Iustin Pop
import System
35 e4f08c46 Iustin Pop
import System.IO
36 e4f08c46 Iustin Pop
import qualified System
37 e4f08c46 Iustin Pop
38 2795466b Iustin Pop
import Text.Printf (printf, hPrintf)
39 b2245847 Iustin Pop
import Text.JSON (showJSON)
40 e4f08c46 Iustin Pop
41 669d7e3d Iustin Pop
import qualified Ganeti.HTools.Container as Container
42 669d7e3d Iustin Pop
import qualified Ganeti.HTools.Cluster as Cluster
43 ec18dca9 Iustin Pop
import qualified Ganeti.HTools.Node as Node
44 b2245847 Iustin Pop
import qualified Ganeti.HTools.Instance as Instance
45 040afc35 Iustin Pop
46 0427285d Iustin Pop
import Ganeti.HTools.CLI
47 e8f89bb6 Iustin Pop
import Ganeti.HTools.ExtLoader
48 669d7e3d Iustin Pop
import Ganeti.HTools.Utils
49 0e8ae201 Iustin Pop
import Ganeti.HTools.Types
50 e4f08c46 Iustin Pop
51 b2245847 Iustin Pop
import qualified Ganeti.Luxi as L
52 b2245847 Iustin Pop
import qualified Ganeti.OpCodes as OpCodes
53 b2245847 Iustin Pop
import Ganeti.Jobs
54 b2245847 Iustin Pop
55 e4f08c46 Iustin Pop
-- | Options list and functions
56 0427285d Iustin Pop
options :: [OptType]
57 e4f08c46 Iustin Pop
options =
58 0427285d Iustin Pop
    [ oPrintNodes
59 507fda3f Iustin Pop
    , oPrintInsts
60 0427285d Iustin Pop
    , oPrintCommands
61 0427285d Iustin Pop
    , oOneline
62 0427285d Iustin Pop
    , oNodeFile
63 0427285d Iustin Pop
    , oInstFile
64 0427285d Iustin Pop
    , oRapiMaster
65 0427285d Iustin Pop
    , oLuxiSocket
66 b2245847 Iustin Pop
    , oExecJobs
67 0427285d Iustin Pop
    , oMaxSolLength
68 0427285d Iustin Pop
    , oVerbose
69 0427285d Iustin Pop
    , oQuiet
70 0427285d Iustin Pop
    , oOfflineNode
71 0427285d Iustin Pop
    , oMinScore
72 0427285d Iustin Pop
    , oMaxCpu
73 0427285d Iustin Pop
    , oMinDisk
74 c0501c69 Iustin Pop
    , oDiskMoves
75 aa8d2e71 Iustin Pop
    , oDynuFile
76 0f15cc76 Iustin Pop
    , oExTags
77 0427285d Iustin Pop
    , oShowVer
78 0427285d Iustin Pop
    , oShowHelp
79 7ef4d93e Iustin Pop
    ]
80 e4f08c46 Iustin Pop
81 6dc960bc Iustin Pop
{- | Start computing the solution at the given depth and recurse until
82 6dc960bc Iustin Pop
we find a valid solution or we exceed the maximum depth.
83 6dc960bc Iustin Pop
84 6dc960bc Iustin Pop
-}
85 6dc960bc Iustin Pop
iterateDepth :: Cluster.Table    -- ^ The starting table
86 6dc960bc Iustin Pop
             -> Int              -- ^ Remaining length
87 c0501c69 Iustin Pop
             -> Bool             -- ^ Allow disk moves
88 6dc960bc Iustin Pop
             -> Int              -- ^ Max node name len
89 6dc960bc Iustin Pop
             -> Int              -- ^ Max instance name len
90 0e8ae201 Iustin Pop
             -> [MoveJob]        -- ^ Current command list
91 24acc2c6 Guido Trotter
             -> Bool             -- ^ Whether to be silent
92 92e32d76 Iustin Pop
             -> Score            -- ^ Score at which to stop
93 0e8ae201 Iustin Pop
             -> IO (Cluster.Table, [MoveJob]) -- ^ The resulting table
94 0e8ae201 Iustin Pop
                                              -- and commands
95 c0501c69 Iustin Pop
iterateDepth ini_tbl max_rounds disk_moves nmlen imlen
96 b0517d61 Iustin Pop
             cmd_strs oneline min_score =
97 f25e5aac Iustin Pop
    let Cluster.Table ini_nl ini_il _ _ = ini_tbl
98 f25e5aac Iustin Pop
        m_fin_tbl = Cluster.tryBalance ini_tbl max_rounds disk_moves min_score
99 6dc960bc Iustin Pop
    in
100 f25e5aac Iustin Pop
      case m_fin_tbl of
101 f25e5aac Iustin Pop
        Just fin_tbl ->
102 f25e5aac Iustin Pop
            do
103 f25e5aac Iustin Pop
              let
104 f25e5aac Iustin Pop
                  (Cluster.Table _ _ _ fin_plc) = fin_tbl
105 f25e5aac Iustin Pop
                  fin_plc_len = length fin_plc
106 924f9c16 Iustin Pop
                  cur_plc@(idx, _, _, move, _) = head fin_plc
107 f25e5aac Iustin Pop
                  (sol_line, cmds) = Cluster.printSolutionLine ini_nl ini_il
108 0e8ae201 Iustin Pop
                                     nmlen imlen cur_plc fin_plc_len
109 0e8ae201 Iustin Pop
                  afn = Cluster.involvedNodes ini_il cur_plc
110 924f9c16 Iustin Pop
                  upd_cmd_strs = (afn, idx, move, cmds):cmd_strs
111 f25e5aac Iustin Pop
              unless oneline $ do
112 f25e5aac Iustin Pop
                       putStrLn sol_line
113 f25e5aac Iustin Pop
                       hFlush stdout
114 f25e5aac Iustin Pop
              iterateDepth fin_tbl max_rounds disk_moves
115 f25e5aac Iustin Pop
                           nmlen imlen upd_cmd_strs oneline min_score
116 f25e5aac Iustin Pop
        Nothing -> return (ini_tbl, cmd_strs)
117 6dc960bc Iustin Pop
118 ba6c6006 Iustin Pop
-- | Formats the solution for the oneline display
119 ba6c6006 Iustin Pop
formatOneline :: Double -> Int -> Double -> String
120 ba6c6006 Iustin Pop
formatOneline ini_cv plc_len fin_cv =
121 ba6c6006 Iustin Pop
    printf "%.8f %d %.8f %8.3f" ini_cv plc_len fin_cv
122 9f6dcdea Iustin Pop
               (if fin_cv == 0 then 1 else ini_cv / fin_cv)
123 ba6c6006 Iustin Pop
124 b2245847 Iustin Pop
-- | Submits a list of jobs and waits for all to finish execution
125 b2245847 Iustin Pop
execJobs :: L.Client -> [[OpCodes.OpCode]] -> IO (Result [String])
126 b2245847 Iustin Pop
execJobs client = L.submitManyJobs client . showJSON
127 b2245847 Iustin Pop
128 b2245847 Iustin Pop
-- | Polls a set of jobs at a fixed interval until all are finished
129 b2245847 Iustin Pop
-- one way or another
130 b2245847 Iustin Pop
waitForJobs :: L.Client -> [String] -> IO (Result [JobStatus])
131 b2245847 Iustin Pop
waitForJobs client jids = do
132 b2245847 Iustin Pop
  sts <- L.queryJobsStatus client jids
133 b2245847 Iustin Pop
  case sts of
134 b2245847 Iustin Pop
    Bad x -> return $ Bad x
135 b2245847 Iustin Pop
    Ok s -> if any (<= JobRunning) s
136 b2245847 Iustin Pop
            then do
137 b2245847 Iustin Pop
              -- TODO: replace hardcoded value with a better thing
138 b2245847 Iustin Pop
              threadDelay (1000000 * 15)
139 b2245847 Iustin Pop
              waitForJobs client jids
140 b2245847 Iustin Pop
            else return $ Ok s
141 b2245847 Iustin Pop
142 b2245847 Iustin Pop
-- | Check that a set of job statuses is all success
143 b2245847 Iustin Pop
checkJobsStatus :: [JobStatus] -> Bool
144 b2245847 Iustin Pop
checkJobsStatus = all (== JobSuccess)
145 b2245847 Iustin Pop
146 b2245847 Iustin Pop
-- | Execute an entire jobset
147 b2245847 Iustin Pop
execJobSet :: String -> String -> Node.List
148 b2245847 Iustin Pop
           -> Instance.List -> [JobSet] -> IO ()
149 b2245847 Iustin Pop
execJobSet _      _   _  _  [] = return ()
150 b2245847 Iustin Pop
execJobSet master csf nl il (js:jss) = do
151 b2245847 Iustin Pop
  -- map from jobset (htools list of positions) to [[opcodes]]
152 b2245847 Iustin Pop
  let jobs = map (\(_, idx, move, _) ->
153 b2245847 Iustin Pop
                      Cluster.iMoveToJob csf nl il idx move) js
154 b2245847 Iustin Pop
  let descr = map (\(_, idx, _, _) -> Container.nameOf il idx) js
155 b2245847 Iustin Pop
  putStrLn $ "Executing jobset for instances " ++ commaJoin descr
156 b2245847 Iustin Pop
  jrs <- bracket (L.getClient master) L.closeClient
157 b2245847 Iustin Pop
         (\client -> do
158 b2245847 Iustin Pop
            jids <- execJobs client jobs
159 b2245847 Iustin Pop
            case jids of
160 b2245847 Iustin Pop
              Bad x -> return $ Bad x
161 b2245847 Iustin Pop
              Ok x -> do
162 b2245847 Iustin Pop
                putStrLn $ "Got job IDs " ++ commaJoin x
163 b2245847 Iustin Pop
                waitForJobs client x
164 b2245847 Iustin Pop
         )
165 b2245847 Iustin Pop
  (case jrs of
166 b2245847 Iustin Pop
     Bad x -> do
167 b2245847 Iustin Pop
       hPutStrLn stderr $ "Cannot compute job status, aborting: " ++ show x
168 b2245847 Iustin Pop
       return ()
169 b2245847 Iustin Pop
     Ok x -> if checkJobsStatus x
170 b2245847 Iustin Pop
             then execJobSet master csf nl il jss
171 b2245847 Iustin Pop
             else do
172 b2245847 Iustin Pop
               hPutStrLn stderr $ "Not all jobs completed successfully: " ++
173 b2245847 Iustin Pop
                         show x
174 b2245847 Iustin Pop
               hPutStrLn stderr "Aborting.")
175 b2245847 Iustin Pop
176 e4f08c46 Iustin Pop
-- | Main function.
177 e4f08c46 Iustin Pop
main :: IO ()
178 e4f08c46 Iustin Pop
main = do
179 e4f08c46 Iustin Pop
  cmd_args <- System.getArgs
180 0427285d Iustin Pop
  (opts, args) <- parseOpts cmd_args "hbal" options
181 45f01962 Iustin Pop
182 45f01962 Iustin Pop
  unless (null args) $ do
183 45f01962 Iustin Pop
         hPutStrLn stderr "Error: this program doesn't take any arguments."
184 45f01962 Iustin Pop
         exitWith $ ExitFailure 1
185 a30b2f5b Iustin Pop
186 fae371cc Iustin Pop
  let oneline = optOneline opts
187 7eff5b09 Iustin Pop
      verbose = optVerbose opts
188 e98fb766 Iustin Pop
      shownodes = optShowNodes opts
189 fae371cc Iustin Pop
190 94e05c32 Iustin Pop
  (fixed_nl, il, _, csf) <- loadExternalData opts
191 ec18dca9 Iustin Pop
192 ec18dca9 Iustin Pop
  let offline_names = optOffline opts
193 db1bcfe8 Iustin Pop
      all_nodes = Container.elems fixed_nl
194 db1bcfe8 Iustin Pop
      all_names = map Node.name all_nodes
195 9f6dcdea Iustin Pop
      offline_wrong = filter (flip notElem all_names) offline_names
196 db1bcfe8 Iustin Pop
      offline_indices = map Node.idx $
197 db1bcfe8 Iustin Pop
                        filter (\n -> elem (Node.name n) offline_names)
198 db1bcfe8 Iustin Pop
                               all_nodes
199 66d67ad4 Iustin Pop
      m_cpu = optMcpu opts
200 66d67ad4 Iustin Pop
      m_dsk = optMdsk opts
201 ec18dca9 Iustin Pop
202 3d7cd10b Iustin Pop
  when (length offline_wrong > 0) $ do
203 2795466b Iustin Pop
         hPrintf stderr "Wrong node name(s) set as offline: %s\n"
204 2795466b Iustin Pop
                     (commaJoin offline_wrong)
205 3d7cd10b Iustin Pop
         exitWith $ ExitFailure 1
206 3d7cd10b Iustin Pop
207 66d67ad4 Iustin Pop
  let nm = Container.map (\n -> if elem (Node.idx n) offline_indices
208 ec18dca9 Iustin Pop
                                then Node.setOffline n True
209 a1c6212e Iustin Pop
                                else n) fixed_nl
210 66d67ad4 Iustin Pop
      nl = Container.map (flip Node.setMdsk m_dsk . flip Node.setMcpu m_cpu)
211 66d67ad4 Iustin Pop
           nm
212 a30b2f5b Iustin Pop
213 dcbcdb58 Iustin Pop
  when (Container.size il == 0) $ do
214 926c35b1 Iustin Pop
         (if oneline then putStrLn $ formatOneline 0 0 0
215 926c35b1 Iustin Pop
          else printf "Cluster is empty, exiting.\n")
216 dcbcdb58 Iustin Pop
         exitWith ExitSuccess
217 dcbcdb58 Iustin Pop
218 27f96567 Iustin Pop
  unless oneline $ printf "Loaded %d nodes, %d instances\n"
219 e4f08c46 Iustin Pop
             (Container.size nl)
220 e4f08c46 Iustin Pop
             (Container.size il)
221 a0529a64 Iustin Pop
222 9f6dcdea Iustin Pop
  when (length csf > 0 && not oneline && verbose > 1) $
223 9f6dcdea Iustin Pop
       printf "Note: Stripping common suffix of '%s' from names\n" csf
224 a0529a64 Iustin Pop
225 e4f08c46 Iustin Pop
  let (bad_nodes, bad_instances) = Cluster.computeBadItems nl il
226 7eff5b09 Iustin Pop
  unless (oneline || verbose == 0) $ printf
227 27f96567 Iustin Pop
             "Initial check done: %d bad nodes, %d bad instances.\n"
228 e4f08c46 Iustin Pop
             (length bad_nodes) (length bad_instances)
229 e4f08c46 Iustin Pop
230 9f6dcdea Iustin Pop
  when (length bad_nodes > 0) $
231 289c3835 Iustin Pop
         putStrLn "Cluster is not N+1 happy, continuing but no guarantee \
232 289c3835 Iustin Pop
                  \that the cluster will end N+1 happy."
233 e4f08c46 Iustin Pop
234 507fda3f Iustin Pop
  when (optShowInsts opts) $ do
235 507fda3f Iustin Pop
         putStrLn ""
236 507fda3f Iustin Pop
         putStrLn "Initial instance map:"
237 507fda3f Iustin Pop
         putStrLn $ Cluster.printInsts nl il
238 507fda3f Iustin Pop
239 e98fb766 Iustin Pop
  when (isJust shownodes) $
240 e4f08c46 Iustin Pop
       do
241 e4f08c46 Iustin Pop
         putStrLn "Initial cluster status:"
242 e98fb766 Iustin Pop
         putStrLn $ Cluster.printNodes nl (fromJust shownodes)
243 e4f08c46 Iustin Pop
244 e4f08c46 Iustin Pop
  let ini_cv = Cluster.compCV nl
245 e4f08c46 Iustin Pop
      ini_tbl = Cluster.Table nl il ini_cv []
246 b0517d61 Iustin Pop
      min_cv = optMinScore opts
247 b0517d61 Iustin Pop
248 b0517d61 Iustin Pop
  when (ini_cv < min_cv) $ do
249 b0517d61 Iustin Pop
         (if oneline then
250 ba6c6006 Iustin Pop
              putStrLn $ formatOneline ini_cv 0 ini_cv
251 b0517d61 Iustin Pop
          else printf "Cluster is already well balanced (initial score %.6g,\n\
252 b0517d61 Iustin Pop
                      \minimum score %.6g).\nNothing to do, exiting\n"
253 b0517d61 Iustin Pop
                      ini_cv min_cv)
254 b0517d61 Iustin Pop
         exitWith ExitSuccess
255 b0517d61 Iustin Pop
256 d09b6ed3 Iustin Pop
  unless oneline (if verbose > 2 then
257 7eff5b09 Iustin Pop
                      printf "Initial coefficients: overall %.8f, %s\n"
258 7eff5b09 Iustin Pop
                      ini_cv (Cluster.printStats nl)
259 7eff5b09 Iustin Pop
                  else
260 7eff5b09 Iustin Pop
                      printf "Initial score: %.8f\n" ini_cv)
261 e4f08c46 Iustin Pop
262 27f96567 Iustin Pop
  unless oneline $ putStrLn "Trying to minimize the CV..."
263 262a08a2 Iustin Pop
  let imlen = Container.maxNameLen il
264 262a08a2 Iustin Pop
      nmlen = Container.maxNameLen nl
265 7dfaafb1 Iustin Pop
266 7dfaafb1 Iustin Pop
  (fin_tbl, cmd_strs) <- iterateDepth ini_tbl (optMaxLength opts)
267 c0501c69 Iustin Pop
                         (optDiskMoves opts)
268 db1bcfe8 Iustin Pop
                         nmlen imlen [] oneline min_cv
269 507fda3f Iustin Pop
  let (Cluster.Table fin_nl fin_il fin_cv fin_plc) = fin_tbl
270 e4f08c46 Iustin Pop
      ord_plc = reverse fin_plc
271 9f6dcdea Iustin Pop
      sol_msg = if null fin_plc
272 9f6dcdea Iustin Pop
                then printf "No solution found\n"
273 9f6dcdea Iustin Pop
                else if verbose > 2
274 9f6dcdea Iustin Pop
                     then printf "Final coefficients:   overall %.8f, %s\n"
275 9f6dcdea Iustin Pop
                          fin_cv (Cluster.printStats fin_nl)
276 9f6dcdea Iustin Pop
                     else printf "Cluster score improved from %.8f to %.8f\n"
277 9f6dcdea Iustin Pop
                          ini_cv fin_cv
278 9f6dcdea Iustin Pop
                              ::String
279 e4f08c46 Iustin Pop
280 7eff5b09 Iustin Pop
  unless oneline $ putStr sol_msg
281 7eff5b09 Iustin Pop
282 7eff5b09 Iustin Pop
  unless (oneline || verbose == 0) $
283 7eff5b09 Iustin Pop
         printf "Solution length=%d\n" (length ord_plc)
284 e4f08c46 Iustin Pop
285 b2245847 Iustin Pop
  let cmd_jobs = Cluster.splitJobs cmd_strs
286 b2245847 Iustin Pop
      cmd_data = Cluster.formatCmds cmd_jobs
287 e0eb63f0 Iustin Pop
288 e0eb63f0 Iustin Pop
  when (isJust $ optShowCmds opts) $
289 e4f08c46 Iustin Pop
       do
290 e0eb63f0 Iustin Pop
         let out_path = fromJust $ optShowCmds opts
291 e4f08c46 Iustin Pop
         putStrLn ""
292 e0eb63f0 Iustin Pop
         (if out_path == "-" then
293 e0eb63f0 Iustin Pop
              printf "Commands to run to reach the above solution:\n%s"
294 e0eb63f0 Iustin Pop
                     (unlines . map ("  " ++) .
295 0e8ae201 Iustin Pop
                      filter (/= "  check") .
296 e0eb63f0 Iustin Pop
                      lines $ cmd_data)
297 e0eb63f0 Iustin Pop
          else do
298 0427285d Iustin Pop
            writeFile out_path (shTemplate ++ cmd_data)
299 e0eb63f0 Iustin Pop
            printf "The commands have been written to file '%s'\n" out_path)
300 e0eb63f0 Iustin Pop
301 b2245847 Iustin Pop
  when (optExecJobs opts && not (null ord_plc))
302 b2245847 Iustin Pop
           (case optLuxi opts of
303 b2245847 Iustin Pop
              Nothing -> do
304 b2245847 Iustin Pop
                hPutStrLn stderr "Execution of commands possible only on LUXI"
305 b2245847 Iustin Pop
                exitWith $ ExitFailure 1
306 b2245847 Iustin Pop
              Just master -> execJobSet master csf fin_nl il cmd_jobs)
307 b2245847 Iustin Pop
308 507fda3f Iustin Pop
  when (optShowInsts opts) $ do
309 507fda3f Iustin Pop
         putStrLn ""
310 507fda3f Iustin Pop
         putStrLn "Final instance map:"
311 507fda3f Iustin Pop
         putStr $ Cluster.printInsts fin_nl fin_il
312 507fda3f Iustin Pop
313 e98fb766 Iustin Pop
  when (isJust shownodes) $
314 e4f08c46 Iustin Pop
       do
315 1a7eff0e Iustin Pop
         let ini_cs = Cluster.totalResources nl
316 1a7eff0e Iustin Pop
             fin_cs = Cluster.totalResources fin_nl
317 e4f08c46 Iustin Pop
         putStrLn ""
318 e4f08c46 Iustin Pop
         putStrLn "Final cluster status:"
319 e98fb766 Iustin Pop
         putStrLn $ Cluster.printNodes fin_nl (fromJust shownodes)
320 d09b6ed3 Iustin Pop
         when (verbose > 3) $
321 7eff5b09 Iustin Pop
              do
322 1a7eff0e Iustin Pop
                printf "Original: mem=%d disk=%d\n"
323 f5b553da Iustin Pop
                       (Cluster.csFmem ini_cs) (Cluster.csFdsk ini_cs)
324 1a7eff0e Iustin Pop
                printf "Final:    mem=%d disk=%d\n"
325 f5b553da Iustin Pop
                       (Cluster.csFmem fin_cs) (Cluster.csFdsk fin_cs)
326 ba6c6006 Iustin Pop
  when oneline $
327 ba6c6006 Iustin Pop
         putStrLn $ formatOneline ini_cv (length ord_plc) fin_cv