Statistics
| Branch: | Tag: | Revision:

root / htools / hbal.hs @ 2e5eb96a

History | View | Annotate | Download (15.2 kB)

1
{-| Cluster rebalancer
2

    
3
-}
4

    
5
{-
6

    
7
Copyright (C) 2009, 2010, 2011 Google Inc.
8

    
9
This program is free software; you can redistribute it and/or modify
10
it under the terms of the GNU General Public License as published by
11
the Free Software Foundation; either version 2 of the License, or
12
(at your option) any later version.
13

    
14
This program is distributed in the hope that it will be useful, but
15
WITHOUT ANY WARRANTY; without even the implied warranty of
16
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17
General Public License for more details.
18

    
19
You should have received a copy of the GNU General Public License
20
along with this program; if not, write to the Free Software
21
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22
02110-1301, USA.
23

    
24
-}
25

    
26
module Main (main) where
27

    
28
import Control.Concurrent (threadDelay)
29
import Control.Exception (bracket)
30
import Data.List
31
import Data.Maybe (isJust, isNothing, fromJust)
32
import Data.IORef
33
import Monad
34
import System (exitWith, ExitCode(..))
35
import System.IO
36
import System.Posix.Process
37
import System.Posix.Signals
38
import qualified System
39

    
40
import Text.Printf (printf, hPrintf)
41

    
42
import qualified Ganeti.HTools.Container as Container
43
import qualified Ganeti.HTools.Cluster as Cluster
44
import qualified Ganeti.HTools.Group as Group
45
import qualified Ganeti.HTools.Node as Node
46
import qualified Ganeti.HTools.Instance as Instance
47

    
48
import Ganeti.HTools.CLI
49
import Ganeti.HTools.ExtLoader
50
import Ganeti.HTools.Utils
51
import Ganeti.HTools.Types
52
import Ganeti.HTools.Loader (ClusterData(..))
53

    
54
import qualified Ganeti.Luxi as L
55
import Ganeti.Jobs
56

    
57
-- | Options list and functions
58
options :: [OptType]
59
options =
60
    [ oPrintNodes
61
    , oPrintInsts
62
    , oPrintCommands
63
    , oOneline
64
    , oDataFile
65
    , oEvacMode
66
    , oRapiMaster
67
    , oLuxiSocket
68
    , oExecJobs
69
    , oGroup
70
    , oMaxSolLength
71
    , oVerbose
72
    , oQuiet
73
    , oOfflineNode
74
    , oMinScore
75
    , oMaxCpu
76
    , oMinDisk
77
    , oMinGain
78
    , oMinGainLim
79
    , oDiskMoves
80
    , oDynuFile
81
    , oExTags
82
    , oExInst
83
    , oSaveCluster
84
    , oShowVer
85
    , oShowHelp
86
    ]
87

    
88
{- | Start computing the solution at the given depth and recurse until
89
we find a valid solution or we exceed the maximum depth.
90

    
91
-}
92
iterateDepth :: Cluster.Table    -- ^ The starting table
93
             -> Int              -- ^ Remaining length
94
             -> Bool             -- ^ Allow disk moves
95
             -> Int              -- ^ Max node name len
96
             -> Int              -- ^ Max instance name len
97
             -> [MoveJob]        -- ^ Current command list
98
             -> Bool             -- ^ Whether to be silent
99
             -> Score            -- ^ Score at which to stop
100
             -> Score            -- ^ Min gain limit
101
             -> Score            -- ^ Min score gain
102
             -> Bool             -- ^ Enable evacuation mode
103
             -> IO (Cluster.Table, [MoveJob]) -- ^ The resulting table
104
                                              -- and commands
105
iterateDepth ini_tbl max_rounds disk_moves nmlen imlen
106
             cmd_strs oneline min_score mg_limit min_gain evac_mode =
107
    let Cluster.Table ini_nl ini_il _ _ = ini_tbl
108
        allowed_next = Cluster.doNextBalance ini_tbl max_rounds min_score
109
        m_fin_tbl = if allowed_next
110
                    then Cluster.tryBalance ini_tbl disk_moves evac_mode
111
                         mg_limit min_gain
112
                    else Nothing
113
    in
114
      case m_fin_tbl of
115
        Just fin_tbl ->
116
            do
117
              let
118
                  (Cluster.Table _ _ _ fin_plc) = fin_tbl
119
                  fin_plc_len = length fin_plc
120
                  cur_plc@(idx, _, _, move, _) = head fin_plc
121
                  (sol_line, cmds) = Cluster.printSolutionLine ini_nl ini_il
122
                                     nmlen imlen cur_plc fin_plc_len
123
                  afn = Cluster.involvedNodes ini_il cur_plc
124
                  upd_cmd_strs = (afn, idx, move, cmds):cmd_strs
125
              unless oneline $ do
126
                       putStrLn sol_line
127
                       hFlush stdout
128
              iterateDepth fin_tbl max_rounds disk_moves
129
                           nmlen imlen upd_cmd_strs oneline min_score
130
                           mg_limit min_gain evac_mode
131
        Nothing -> return (ini_tbl, cmd_strs)
132

    
133
-- | Formats the solution for the oneline display
134
formatOneline :: Double -> Int -> Double -> String
135
formatOneline ini_cv plc_len fin_cv =
136
    printf "%.8f %d %.8f %8.3f" ini_cv plc_len fin_cv
137
               (if fin_cv == 0 then 1 else ini_cv / fin_cv)
138

    
139
-- | Polls a set of jobs at a fixed interval until all are finished
140
-- one way or another
141
waitForJobs :: L.Client -> [String] -> IO (Result [JobStatus])
142
waitForJobs client jids = do
143
  sts <- L.queryJobsStatus client jids
144
  case sts of
145
    Bad x -> return $ Bad x
146
    Ok s -> if any (<= JOB_STATUS_RUNNING) s
147
            then do
148
              -- TODO: replace hardcoded value with a better thing
149
              threadDelay (1000000 * 15)
150
              waitForJobs client jids
151
            else return $ Ok s
152

    
153
-- | Check that a set of job statuses is all success
154
checkJobsStatus :: [JobStatus] -> Bool
155
checkJobsStatus = all (== JOB_STATUS_SUCCESS)
156

    
157
-- | Wrapper over execJobSet checking for early termination
158
execWrapper :: String -> Node.List
159
           -> Instance.List -> IORef Int -> [JobSet] -> IO Bool
160
execWrapper _      _  _  _    [] = return True
161
execWrapper master nl il cref alljss = do
162
  cancel <- readIORef cref
163
  (if cancel > 0
164
   then do
165
     hPrintf stderr "Exiting early due to user request, %d\
166
                    \ jobset(s) remaining." (length alljss)::IO ()
167
     return False
168
   else execJobSet master nl il cref alljss)
169

    
170
-- | Execute an entire jobset
171
execJobSet :: String -> Node.List
172
           -> Instance.List -> IORef Int -> [JobSet] -> IO Bool
173
execJobSet _      _  _  _    [] = return True
174
execJobSet master nl il cref (js:jss) = do
175
  -- map from jobset (htools list of positions) to [[opcodes]]
176
  let jobs = map (\(_, idx, move, _) ->
177
                      Cluster.iMoveToJob nl il idx move) js
178
  let descr = map (\(_, idx, _, _) -> Container.nameOf il idx) js
179
  putStrLn $ "Executing jobset for instances " ++ commaJoin descr
180
  jrs <- bracket (L.getClient master) L.closeClient
181
         (\client -> do
182
            jids <- L.submitManyJobs client jobs
183
            case jids of
184
              Bad x -> return $ Bad x
185
              Ok x -> do
186
                putStrLn $ "Got job IDs " ++ commaJoin x
187
                waitForJobs client x
188
         )
189
  (case jrs of
190
     Bad x -> do
191
       hPutStrLn stderr $ "Cannot compute job status, aborting: " ++ show x
192
       return False
193
     Ok x -> if checkJobsStatus x
194
             then execWrapper master nl il cref jss
195
             else do
196
               hPutStrLn stderr $ "Not all jobs completed successfully: " ++
197
                         show x
198
               hPutStrLn stderr "Aborting."
199
               return False)
200

    
201
-- | Signal handler for graceful termination
202
hangleSigInt :: IORef Int -> IO ()
203
hangleSigInt cref = do
204
  writeIORef cref 1
205
  putStrLn ("Cancel request registered, will exit at" ++
206
            " the end of the current job set...")
207

    
208
-- | Signal handler for immediate termination
209
hangleSigTerm :: IORef Int -> IO ()
210
hangleSigTerm cref = do
211
  -- update the cref to 2, just for consistency
212
  writeIORef cref 2
213
  putStrLn "Double cancel request, exiting now..."
214
  exitImmediately $ ExitFailure 2
215

    
216
runJobSet :: String -> Node.List -> Instance.List -> [JobSet] -> IO Bool
217
runJobSet master fin_nl il cmd_jobs = do
218
  cref <- newIORef 0
219
  mapM_ (\(hnd, sig) -> installHandler sig (Catch (hnd cref)) Nothing)
220
    [(hangleSigTerm, softwareTermination), (hangleSigInt, keyboardSignal)]
221
  execWrapper master fin_nl il cref cmd_jobs
222

    
223
-- | Main function.
224
main :: IO ()
225
main = do
226
  cmd_args <- System.getArgs
227
  (opts, args) <- parseOpts cmd_args "hbal" options
228

    
229
  unless (null args) $ do
230
         hPutStrLn stderr "Error: this program doesn't take any arguments."
231
         exitWith $ ExitFailure 1
232

    
233
  let oneline = optOneline opts
234
      verbose = optVerbose opts
235
      shownodes = optShowNodes opts
236
      showinsts = optShowInsts opts
237

    
238
  ini_cdata@(ClusterData gl fixed_nl ilf ctags) <- loadExternalData opts
239

    
240
  let offline_names = optOffline opts
241
      all_nodes = Container.elems fixed_nl
242
      all_names = concatMap allNames all_nodes
243
      offline_wrong = filter (`notElem` all_names) offline_names
244
      offline_indices = map Node.idx $
245
                        filter (\n ->
246
                                 Node.name n `elem` offline_names ||
247
                                 Node.alias n `elem` offline_names)
248
                               all_nodes
249
      m_cpu = optMcpu opts
250
      m_dsk = optMdsk opts
251
      csf = commonSuffix fixed_nl ilf
252

    
253
  when (length offline_wrong > 0) $ do
254
         hPrintf stderr "Wrong node name(s) set as offline: %s\n"
255
                     (commaJoin offline_wrong) :: IO ()
256
         exitWith $ ExitFailure 1
257

    
258
  let nm = Container.map (\n -> if Node.idx n `elem` offline_indices
259
                                then Node.setOffline n True
260
                                else n) fixed_nl
261
      nlf = Container.map (flip Node.setMdsk m_dsk . flip Node.setMcpu m_cpu)
262
            nm
263

    
264
  when (not oneline && verbose > 1) $
265
       putStrLn $ "Loaded cluster tags: " ++ intercalate "," ctags
266

    
267
  when (Container.size ilf == 0) $ do
268
         (if oneline then putStrLn $ formatOneline 0 0 0
269
          else printf "Cluster is empty, exiting.\n")
270
         exitWith ExitSuccess
271

    
272
  let split_insts = Cluster.findSplitInstances nlf ilf
273
  when (not . null $ split_insts) $ do
274
    hPutStrLn stderr "Found instances belonging to multiple node groups:"
275
    mapM_ (\i -> hPutStrLn stderr $ "  " ++ Instance.name i) split_insts
276
    hPutStrLn stderr "Aborting."
277
    exitWith $ ExitFailure 1
278

    
279
  let ngroups = Cluster.splitCluster nlf ilf
280
  when (length ngroups > 1 && isNothing (optGroup opts)) $ do
281
    hPutStrLn stderr "Found multiple node groups:"
282
    mapM_ (hPutStrLn stderr . ("  " ++) . Group.name .
283
           flip Container.find gl . fst) ngroups
284
    hPutStrLn stderr "Aborting."
285
    exitWith $ ExitFailure 1
286

    
287
  maybeSaveData (optSaveCluster opts) "original" "before balancing" ini_cdata
288

    
289
  unless oneline $ printf "Loaded %d nodes, %d instances\n"
290
             (Container.size nlf)
291
             (Container.size ilf)
292

    
293
  (gname, (nl, il)) <- case optGroup opts of
294
    Nothing -> do
295
         let (gidx, cdata) = head ngroups
296
             grp = Container.find gidx gl
297
         return (Group.name grp, cdata)
298
    Just g -> case Container.findByName gl g of
299
      Nothing -> do
300
        hPutStrLn stderr $ "Node group " ++ g ++
301
          " not found. Node group list is:"
302
        mapM_ (hPutStrLn stderr . ("  " ++) . Group.name ) (Container.elems gl)
303
        hPutStrLn stderr "Aborting."
304
        exitWith $ ExitFailure 1
305
      Just grp ->
306
          case lookup (Group.idx grp) ngroups of
307
            Nothing -> do
308
              -- TODO: while this is unlikely to happen, log here the
309
              -- actual group data to help debugging
310
              hPutStrLn stderr "Internal failure, missing group idx"
311
              exitWith $ ExitFailure 1
312
            Just cdata -> return (Group.name grp, cdata)
313

    
314
  unless oneline $ printf "Group size %d nodes, %d instances\n"
315
             (Container.size nl)
316
             (Container.size il)
317

    
318
  putStrLn $ "Selected node group: " ++ gname
319

    
320
  when (length csf > 0 && not oneline && verbose > 1) $
321
       printf "Note: Stripping common suffix of '%s' from names\n" csf
322

    
323
  let (bad_nodes, bad_instances) = Cluster.computeBadItems nl il
324
  unless (oneline || verbose == 0) $ printf
325
             "Initial check done: %d bad nodes, %d bad instances.\n"
326
             (length bad_nodes) (length bad_instances)
327

    
328
  when (length bad_nodes > 0) $
329
         putStrLn "Cluster is not N+1 happy, continuing but no guarantee \
330
                  \that the cluster will end N+1 happy."
331

    
332
  maybePrintInsts showinsts "Initial" (Cluster.printInsts nl il)
333

    
334
  maybePrintNodes shownodes "Initial cluster" (Cluster.printNodes nl)
335

    
336
  let ini_cv = Cluster.compCV nl
337
      ini_tbl = Cluster.Table nl il ini_cv []
338
      min_cv = optMinScore opts
339

    
340
  when (ini_cv < min_cv) $ do
341
         (if oneline then
342
              putStrLn $ formatOneline ini_cv 0 ini_cv
343
          else printf "Cluster is already well balanced (initial score %.6g,\n\
344
                      \minimum score %.6g).\nNothing to do, exiting\n"
345
                      ini_cv min_cv)
346
         exitWith ExitSuccess
347

    
348
  unless oneline (if verbose > 2 then
349
                      printf "Initial coefficients: overall %.8f, %s\n"
350
                      ini_cv (Cluster.printStats nl)
351
                  else
352
                      printf "Initial score: %.8f\n" ini_cv)
353

    
354
  unless oneline $ putStrLn "Trying to minimize the CV..."
355
  let imlen = maximum . map (length . Instance.alias) $ Container.elems il
356
      nmlen = maximum . map (length . Node.alias) $ Container.elems nl
357

    
358
  (fin_tbl, cmd_strs) <- iterateDepth ini_tbl (optMaxLength opts)
359
                         (optDiskMoves opts)
360
                         nmlen imlen [] oneline min_cv
361
                         (optMinGainLim opts) (optMinGain opts)
362
                         (optEvacMode opts)
363
  let (Cluster.Table fin_nl fin_il fin_cv fin_plc) = fin_tbl
364
      ord_plc = reverse fin_plc
365
      sol_msg = if null fin_plc
366
                then printf "No solution found\n"
367
                else if verbose > 2
368
                     then printf "Final coefficients:   overall %.8f, %s\n"
369
                          fin_cv (Cluster.printStats fin_nl)
370
                     else printf "Cluster score improved from %.8f to %.8f\n"
371
                          ini_cv fin_cv
372
                              ::String
373

    
374
  unless oneline $ putStr sol_msg
375

    
376
  unless (oneline || verbose == 0) $
377
         printf "Solution length=%d\n" (length ord_plc)
378

    
379
  let cmd_jobs = Cluster.splitJobs cmd_strs
380
      cmd_data = Cluster.formatCmds cmd_jobs
381

    
382
  when (isJust $ optShowCmds opts) $
383
       do
384
         let out_path = fromJust $ optShowCmds opts
385
         putStrLn ""
386
         (if out_path == "-" then
387
              printf "Commands to run to reach the above solution:\n%s"
388
                     (unlines . map ("  " ++) .
389
                      filter (/= "  check") .
390
                      lines $ cmd_data)
391
          else do
392
            writeFile out_path (shTemplate ++ cmd_data)
393
            printf "The commands have been written to file '%s'\n" out_path)
394

    
395
  maybeSaveData (optSaveCluster opts) "balanced" "after balancing"
396
                (ClusterData gl fin_nl fin_il ctags)
397

    
398
  maybePrintInsts showinsts "Final" (Cluster.printInsts fin_nl fin_il)
399

    
400
  maybePrintNodes shownodes "Final cluster" (Cluster.printNodes fin_nl)
401

    
402
  when (verbose > 3) $ do
403
         let ini_cs = Cluster.totalResources nl
404
             fin_cs = Cluster.totalResources fin_nl
405
         printf "Original: mem=%d disk=%d\n"
406
                    (Cluster.csFmem ini_cs) (Cluster.csFdsk ini_cs) :: IO ()
407
         printf "Final:    mem=%d disk=%d\n"
408
                    (Cluster.csFmem fin_cs) (Cluster.csFdsk fin_cs)
409
  when oneline $
410
         putStrLn $ formatOneline ini_cv (length ord_plc) fin_cv
411

    
412
  eval <-
413
      if optExecJobs opts && not (null ord_plc)
414
      then (case optLuxi opts of
415
              Nothing -> do
416
                hPutStrLn stderr "Execution of commands possible only on LUXI"
417
                return False
418
              Just master -> runJobSet master fin_nl il cmd_jobs)
419
      else return True
420
  unless eval (exitWith (ExitFailure 1))