Statistics
| Branch: | Tag: | Revision:

root / htools / Ganeti / HTools / Program / Hbal.hs @ 5296ee23

History | View | Annotate | Download (14.9 kB)

1
{-| Cluster rebalancer.
2

    
3
-}
4

    
5
{-
6

    
7
Copyright (C) 2009, 2010, 2011 Google Inc.
8

    
9
This program is free software; you can redistribute it and/or modify
10
it under the terms of the GNU General Public License as published by
11
the Free Software Foundation; either version 2 of the License, or
12
(at your option) any later version.
13

    
14
This program is distributed in the hope that it will be useful, but
15
WITHOUT ANY WARRANTY; without even the implied warranty of
16
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17
General Public License for more details.
18

    
19
You should have received a copy of the GNU General Public License
20
along with this program; if not, write to the Free Software
21
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22
02110-1301, USA.
23

    
24
-}
25

    
26
module Ganeti.HTools.Program.Hbal (main) where
27

    
28
import Control.Concurrent (threadDelay)
29
import Control.Exception (bracket)
30
import Control.Monad
31
import Data.List
32
import Data.Maybe (isJust, isNothing, fromJust)
33
import Data.IORef
34
import System (exitWith, ExitCode(..))
35
import System.IO
36
import System.Posix.Process
37
import System.Posix.Signals
38
import qualified System
39

    
40
import Text.Printf (printf, hPrintf)
41

    
42
import qualified Ganeti.HTools.Container as Container
43
import qualified Ganeti.HTools.Cluster as Cluster
44
import qualified Ganeti.HTools.Group as Group
45
import qualified Ganeti.HTools.Node as Node
46
import qualified Ganeti.HTools.Instance as Instance
47

    
48
import Ganeti.HTools.CLI
49
import Ganeti.HTools.ExtLoader
50
import Ganeti.HTools.Utils
51
import Ganeti.HTools.Types
52
import Ganeti.HTools.Loader
53

    
54
import qualified Ganeti.Luxi as L
55
import Ganeti.Jobs
56

    
57
-- | Options list and functions.
58
options :: [OptType]
59
options =
60
    [ oPrintNodes
61
    , oPrintInsts
62
    , oPrintCommands
63
    , oDataFile
64
    , oEvacMode
65
    , oRapiMaster
66
    , oLuxiSocket
67
    , oExecJobs
68
    , oGroup
69
    , oMaxSolLength
70
    , oVerbose
71
    , oQuiet
72
    , oOfflineNode
73
    , oMinScore
74
    , oMaxCpu
75
    , oMinDisk
76
    , oMinGain
77
    , oMinGainLim
78
    , oDiskMoves
79
    , oSelInst
80
    , oInstMoves
81
    , oDynuFile
82
    , oExTags
83
    , oExInst
84
    , oSaveCluster
85
    , oShowVer
86
    , oShowHelp
87
    ]
88

    
89
{- | Start computing the solution at the given depth and recurse until
90
we find a valid solution or we exceed the maximum depth.
91

    
92
-}
93
iterateDepth :: Cluster.Table    -- ^ The starting table
94
             -> Int              -- ^ Remaining length
95
             -> Bool             -- ^ Allow disk moves
96
             -> Bool             -- ^ Allow instance moves
97
             -> Int              -- ^ Max node name len
98
             -> Int              -- ^ Max instance name len
99
             -> [MoveJob]        -- ^ Current command list
100
             -> Score            -- ^ Score at which to stop
101
             -> Score            -- ^ Min gain limit
102
             -> Score            -- ^ Min score gain
103
             -> Bool             -- ^ Enable evacuation mode
104
             -> IO (Cluster.Table, [MoveJob]) -- ^ The resulting table
105
                                              -- and commands
106
iterateDepth ini_tbl max_rounds disk_moves inst_moves nmlen imlen
107
             cmd_strs min_score mg_limit min_gain evac_mode =
108
    let Cluster.Table ini_nl ini_il _ _ = ini_tbl
109
        allowed_next = Cluster.doNextBalance ini_tbl max_rounds min_score
110
        m_fin_tbl = if allowed_next
111
                    then Cluster.tryBalance ini_tbl disk_moves inst_moves
112
                         evac_mode mg_limit min_gain
113
                    else Nothing
114
    in
115
      case m_fin_tbl of
116
        Just fin_tbl ->
117
            do
118
              let
119
                  (Cluster.Table _ _ _ fin_plc) = fin_tbl
120
                  fin_plc_len = length fin_plc
121
                  cur_plc@(idx, _, _, move, _) = head fin_plc
122
                  (sol_line, cmds) = Cluster.printSolutionLine ini_nl ini_il
123
                                     nmlen imlen cur_plc fin_plc_len
124
                  afn = Cluster.involvedNodes ini_il cur_plc
125
                  upd_cmd_strs = (afn, idx, move, cmds):cmd_strs
126
              putStrLn sol_line
127
              hFlush stdout
128
              iterateDepth fin_tbl max_rounds disk_moves inst_moves
129
                           nmlen imlen upd_cmd_strs min_score
130
                           mg_limit min_gain evac_mode
131
        Nothing -> return (ini_tbl, cmd_strs)
132

    
133
-- | Displays the cluster stats.
134
printStats :: Node.List -> Node.List -> IO ()
135
printStats ini_nl fin_nl = do
136
  let ini_cs = Cluster.totalResources ini_nl
137
      fin_cs = Cluster.totalResources fin_nl
138
  printf "Original: mem=%d disk=%d\n"
139
             (Cluster.csFmem ini_cs) (Cluster.csFdsk ini_cs) :: IO ()
140
  printf "Final:    mem=%d disk=%d\n"
141
             (Cluster.csFmem fin_cs) (Cluster.csFdsk fin_cs)
142

    
143
-- | Saves the rebalance commands to a text file.
144
saveBalanceCommands :: Options -> String -> IO ()
145
saveBalanceCommands opts cmd_data = do
146
  let out_path = fromJust $ optShowCmds opts
147
  putStrLn ""
148
  (if out_path == "-" then
149
       printf "Commands to run to reach the above solution:\n%s"
150
                  (unlines . map ("  " ++) .
151
                   filter (/= "  check") .
152
                   lines $ cmd_data)
153
   else do
154
     writeFile out_path (shTemplate ++ cmd_data)
155
     printf "The commands have been written to file '%s'\n" out_path)
156

    
157
-- | Polls a set of jobs at a fixed interval until all are finished
158
-- one way or another.
159
waitForJobs :: L.Client -> [String] -> IO (Result [JobStatus])
160
waitForJobs client jids = do
161
  sts <- L.queryJobsStatus client jids
162
  case sts of
163
    Bad x -> return $ Bad x
164
    Ok s -> if any (<= JOB_STATUS_RUNNING) s
165
            then do
166
              -- TODO: replace hardcoded value with a better thing
167
              threadDelay (1000000 * 15)
168
              waitForJobs client jids
169
            else return $ Ok s
170

    
171
-- | Check that a set of job statuses is all success.
172
checkJobsStatus :: [JobStatus] -> Bool
173
checkJobsStatus = all (== JOB_STATUS_SUCCESS)
174

    
175
-- | Wrapper over execJobSet checking for early termination.
176
execWrapper :: String -> Node.List
177
            -> Instance.List -> IORef Int -> [JobSet] -> IO Bool
178
execWrapper _      _  _  _    [] = return True
179
execWrapper master nl il cref alljss = do
180
  cancel <- readIORef cref
181
  (if cancel > 0
182
   then do
183
     hPrintf stderr "Exiting early due to user request, %d\
184
                    \ jobset(s) remaining." (length alljss)::IO ()
185
     return False
186
   else execJobSet master nl il cref alljss)
187

    
188
-- | Execute an entire jobset.
189
execJobSet :: String -> Node.List
190
           -> Instance.List -> IORef Int -> [JobSet] -> IO Bool
191
execJobSet _      _  _  _    [] = return True
192
execJobSet master nl il cref (js:jss) = do
193
  -- map from jobset (htools list of positions) to [[opcodes]]
194
  let jobs = map (\(_, idx, move, _) ->
195
                      Cluster.iMoveToJob nl il idx move) js
196
  let descr = map (\(_, idx, _, _) -> Container.nameOf il idx) js
197
  putStrLn $ "Executing jobset for instances " ++ commaJoin descr
198
  jrs <- bracket (L.getClient master) L.closeClient
199
         (\client -> do
200
            jids <- L.submitManyJobs client jobs
201
            case jids of
202
              Bad x -> return $ Bad x
203
              Ok x -> do
204
                putStrLn $ "Got job IDs " ++ commaJoin x
205
                waitForJobs client x
206
         )
207
  (case jrs of
208
     Bad x -> do
209
       hPutStrLn stderr $ "Cannot compute job status, aborting: " ++ show x
210
       return False
211
     Ok x -> if checkJobsStatus x
212
             then execWrapper master nl il cref jss
213
             else do
214
               hPutStrLn stderr $ "Not all jobs completed successfully: " ++
215
                         show x
216
               hPutStrLn stderr "Aborting."
217
               return False)
218

    
219
-- | Executes the jobs, if possible and desired.
220
maybeExecJobs :: Options
221
              -> [a]
222
              -> Node.List
223
              -> Instance.List
224
              -> [JobSet]
225
              -> IO Bool
226
maybeExecJobs opts ord_plc fin_nl il cmd_jobs =
227
    if optExecJobs opts && not (null ord_plc)
228
    then (case optLuxi opts of
229
            Nothing -> do
230
              hPutStrLn stderr "Execution of commands possible only on LUXI"
231
              return False
232
            Just master -> runJobSet master fin_nl il cmd_jobs)
233
    else return True
234

    
235
-- | Signal handler for graceful termination.
236
hangleSigInt :: IORef Int -> IO ()
237
hangleSigInt cref = do
238
  writeIORef cref 1
239
  putStrLn ("Cancel request registered, will exit at" ++
240
            " the end of the current job set...")
241

    
242
-- | Signal handler for immediate termination.
243
hangleSigTerm :: IORef Int -> IO ()
244
hangleSigTerm cref = do
245
  -- update the cref to 2, just for consistency
246
  writeIORef cref 2
247
  putStrLn "Double cancel request, exiting now..."
248
  exitImmediately $ ExitFailure 2
249

    
250
-- | Runs a job set with handling of signals.
251
runJobSet :: String -> Node.List -> Instance.List -> [JobSet] -> IO Bool
252
runJobSet master fin_nl il cmd_jobs = do
253
  cref <- newIORef 0
254
  mapM_ (\(hnd, sig) -> installHandler sig (Catch (hnd cref)) Nothing)
255
    [(hangleSigTerm, softwareTermination), (hangleSigInt, keyboardSignal)]
256
  execWrapper master fin_nl il cref cmd_jobs
257

    
258
-- | Select the target node group.
259
selectGroup :: Options -> Group.List -> Node.List -> Instance.List
260
            -> IO (String, (Node.List, Instance.List))
261
selectGroup opts gl nlf ilf = do
262
  let ngroups = Cluster.splitCluster nlf ilf
263
  when (length ngroups > 1 && isNothing (optGroup opts)) $ do
264
    hPutStrLn stderr "Found multiple node groups:"
265
    mapM_ (hPutStrLn stderr . ("  " ++) . Group.name .
266
           flip Container.find gl . fst) ngroups
267
    hPutStrLn stderr "Aborting."
268
    exitWith $ ExitFailure 1
269

    
270
  case optGroup opts of
271
    Nothing -> do
272
         let (gidx, cdata) = head ngroups
273
             grp = Container.find gidx gl
274
         return (Group.name grp, cdata)
275
    Just g -> case Container.findByName gl g of
276
      Nothing -> do
277
        hPutStrLn stderr $ "Node group " ++ g ++
278
          " not found. Node group list is:"
279
        mapM_ (hPutStrLn stderr . ("  " ++) . Group.name ) (Container.elems gl)
280
        hPutStrLn stderr "Aborting."
281
        exitWith $ ExitFailure 1
282
      Just grp ->
283
          case lookup (Group.idx grp) ngroups of
284
            Nothing -> do
285
              -- TODO: while this is unlikely to happen, log here the
286
              -- actual group data to help debugging
287
              hPutStrLn stderr "Internal failure, missing group idx"
288
              exitWith $ ExitFailure 1
289
            Just cdata -> return (Group.name grp, cdata)
290

    
291
-- | Do a few checks on the cluster data.
292
checkCluster :: Int -> Node.List -> Instance.List -> IO ()
293
checkCluster verbose nl il = do
294
  -- nothing to do on an empty cluster
295
  when (Container.null il) $ do
296
         printf "Cluster is empty, exiting.\n"::IO ()
297
         exitWith ExitSuccess
298

    
299
  -- hbal doesn't currently handle split clusters
300
  let split_insts = Cluster.findSplitInstances nl il
301
  unless (null split_insts) $ do
302
    hPutStrLn stderr "Found instances belonging to multiple node groups:"
303
    mapM_ (\i -> hPutStrLn stderr $ "  " ++ Instance.name i) split_insts
304
    hPutStrLn stderr "Aborting."
305
    exitWith $ ExitFailure 1
306

    
307
  printf "Loaded %d nodes, %d instances\n"
308
             (Container.size nl)
309
             (Container.size il)::IO ()
310

    
311
  let csf = commonSuffix nl il
312
  when (not (null csf) && verbose > 1) $
313
       printf "Note: Stripping common suffix of '%s' from names\n" csf
314

    
315
-- | Do a few checks on the selected group data.
316
checkGroup :: Int -> String -> Node.List -> Instance.List -> IO ()
317
checkGroup verbose gname nl il = do
318
  printf "Group size %d nodes, %d instances\n"
319
             (Container.size nl)
320
             (Container.size il)::IO ()
321

    
322
  putStrLn $ "Selected node group: " ++ gname
323

    
324
  let (bad_nodes, bad_instances) = Cluster.computeBadItems nl il
325
  unless (verbose == 0) $ printf
326
             "Initial check done: %d bad nodes, %d bad instances.\n"
327
             (length bad_nodes) (length bad_instances)
328

    
329
  when (length bad_nodes > 0) $
330
         putStrLn "Cluster is not N+1 happy, continuing but no guarantee \
331
                  \that the cluster will end N+1 happy."
332

    
333
-- | Check that we actually need to rebalance.
334
checkNeedRebalance :: Options -> Score -> IO ()
335
checkNeedRebalance opts ini_cv = do
336
  let min_cv = optMinScore opts
337
  when (ini_cv < min_cv) $ do
338
         printf "Cluster is already well balanced (initial score %.6g,\n\
339
                \minimum score %.6g).\nNothing to do, exiting\n"
340
                ini_cv min_cv:: IO ()
341
         exitWith ExitSuccess
342

    
343
-- | Main function.
344
main :: IO ()
345
main = do
346
  cmd_args <- System.getArgs
347
  (opts, args) <- parseOpts cmd_args "hbal" options
348

    
349
  unless (null args) $ do
350
         hPutStrLn stderr "Error: this program doesn't take any arguments."
351
         exitWith $ ExitFailure 1
352

    
353
  let verbose = optVerbose opts
354
      shownodes = optShowNodes opts
355
      showinsts = optShowInsts opts
356

    
357
  ini_cdata@(ClusterData gl fixed_nl ilf ctags) <- loadExternalData opts
358

    
359
  when (verbose > 1) $
360
       putStrLn $ "Loaded cluster tags: " ++ intercalate "," ctags
361

    
362
  nlf <- setNodeStatus opts fixed_nl
363
  checkCluster verbose nlf ilf
364

    
365
  maybeSaveData (optSaveCluster opts) "original" "before balancing" ini_cdata
366

    
367
  (gname, (nl, il)) <- selectGroup opts gl nlf ilf
368

    
369
  checkGroup verbose gname nl il
370

    
371
  maybePrintInsts showinsts "Initial" (Cluster.printInsts nl il)
372

    
373
  maybePrintNodes shownodes "Initial cluster" (Cluster.printNodes nl)
374

    
375
  let ini_cv = Cluster.compCV nl
376
      ini_tbl = Cluster.Table nl il ini_cv []
377
      min_cv = optMinScore opts
378

    
379
  checkNeedRebalance opts ini_cv
380

    
381
  (if verbose > 2
382
   then printf "Initial coefficients: overall %.8f, %s\n"
383
        ini_cv (Cluster.printStats nl)::IO ()
384
   else printf "Initial score: %.8f\n" ini_cv)
385

    
386
  putStrLn "Trying to minimize the CV..."
387
  let imlen = maximum . map (length . Instance.alias) $ Container.elems il
388
      nmlen = maximum . map (length . Node.alias) $ Container.elems nl
389

    
390
  (fin_tbl, cmd_strs) <- iterateDepth ini_tbl (optMaxLength opts)
391
                         (optDiskMoves opts)
392
                         (optInstMoves opts)
393
                         nmlen imlen [] min_cv
394
                         (optMinGainLim opts) (optMinGain opts)
395
                         (optEvacMode opts)
396
  let (Cluster.Table fin_nl fin_il fin_cv fin_plc) = fin_tbl
397
      ord_plc = reverse fin_plc
398
      sol_msg = case () of
399
                  _ | null fin_plc -> printf "No solution found\n"
400
                    | verbose > 2 ->
401
                        printf "Final coefficients:   overall %.8f, %s\n"
402
                        fin_cv (Cluster.printStats fin_nl)
403
                    | otherwise ->
404
                        printf "Cluster score improved from %.8f to %.8f\n"
405
                        ini_cv fin_cv ::String
406

    
407
  putStr sol_msg
408

    
409
  unless (verbose == 0) $
410
         printf "Solution length=%d\n" (length ord_plc)
411

    
412
  let cmd_jobs = Cluster.splitJobs cmd_strs
413

    
414
  when (isJust $ optShowCmds opts) $
415
       saveBalanceCommands opts $ Cluster.formatCmds cmd_jobs
416

    
417
  maybeSaveData (optSaveCluster opts) "balanced" "after balancing"
418
                (ClusterData gl fin_nl fin_il ctags)
419

    
420
  maybePrintInsts showinsts "Final" (Cluster.printInsts fin_nl fin_il)
421

    
422
  maybePrintNodes shownodes "Final cluster" (Cluster.printNodes fin_nl)
423

    
424
  when (verbose > 3) $ printStats nl fin_nl
425

    
426
  eval <- maybeExecJobs opts ord_plc fin_nl il cmd_jobs
427
  unless eval (exitWith (ExitFailure 1))