Statistics
| Branch: | Tag: | Revision:

root / htools / Ganeti / HTools / Program / Hbal.hs @ e19ee6e4

History | View | Annotate | Download (16 kB)

1
{-| Cluster rebalancer.
2

    
3
-}
4

    
5
{-
6

    
7
Copyright (C) 2009, 2010, 2011 Google Inc.
8

    
9
This program is free software; you can redistribute it and/or modify
10
it under the terms of the GNU General Public License as published by
11
the Free Software Foundation; either version 2 of the License, or
12
(at your option) any later version.
13

    
14
This program is distributed in the hope that it will be useful, but
15
WITHOUT ANY WARRANTY; without even the implied warranty of
16
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17
General Public License for more details.
18

    
19
You should have received a copy of the GNU General Public License
20
along with this program; if not, write to the Free Software
21
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22
02110-1301, USA.
23

    
24
-}
25

    
26
module Ganeti.HTools.Program.Hbal (main) where
27

    
28
import Control.Concurrent (threadDelay)
29
import Control.Exception (bracket)
30
import Control.Monad
31
import Data.List
32
import Data.Maybe (isJust, isNothing, fromJust)
33
import Data.IORef
34
import System (exitWith, ExitCode(..))
35
import System.IO
36
import System.Posix.Process
37
import System.Posix.Signals
38
import qualified System
39

    
40
import Text.Printf (printf, hPrintf)
41

    
42
import qualified Ganeti.HTools.Container as Container
43
import qualified Ganeti.HTools.Cluster as Cluster
44
import qualified Ganeti.HTools.Group as Group
45
import qualified Ganeti.HTools.Node as Node
46
import qualified Ganeti.HTools.Instance as Instance
47

    
48
import Ganeti.HTools.CLI
49
import Ganeti.HTools.ExtLoader
50
import Ganeti.HTools.Utils
51
import Ganeti.HTools.Types
52
import Ganeti.HTools.Loader
53

    
54
import qualified Ganeti.Luxi as L
55
import Ganeti.Jobs
56

    
57
-- | Options list and functions.
58
options :: [OptType]
59
options =
60
    [ oPrintNodes
61
    , oPrintInsts
62
    , oPrintCommands
63
    , oDataFile
64
    , oEvacMode
65
    , oRapiMaster
66
    , oLuxiSocket
67
    , oExecJobs
68
    , oGroup
69
    , oMaxSolLength
70
    , oVerbose
71
    , oQuiet
72
    , oOfflineNode
73
    , oMinScore
74
    , oMaxCpu
75
    , oMinDisk
76
    , oMinGain
77
    , oMinGainLim
78
    , oDiskMoves
79
    , oSelInst
80
    , oInstMoves
81
    , oDynuFile
82
    , oExTags
83
    , oExInst
84
    , oSaveCluster
85
    , oShowVer
86
    , oShowHelp
87
    ]
88

    
89
{- | Start computing the solution at the given depth and recurse until
90
we find a valid solution or we exceed the maximum depth.
91

    
92
-}
93
iterateDepth :: Cluster.Table    -- ^ The starting table
94
             -> Int              -- ^ Remaining length
95
             -> Bool             -- ^ Allow disk moves
96
             -> Bool             -- ^ Allow instance moves
97
             -> Int              -- ^ Max node name len
98
             -> Int              -- ^ Max instance name len
99
             -> [MoveJob]        -- ^ Current command list
100
             -> Score            -- ^ Score at which to stop
101
             -> Score            -- ^ Min gain limit
102
             -> Score            -- ^ Min score gain
103
             -> Bool             -- ^ Enable evacuation mode
104
             -> IO (Cluster.Table, [MoveJob]) -- ^ The resulting table
105
                                              -- and commands
106
iterateDepth ini_tbl max_rounds disk_moves inst_moves nmlen imlen
107
             cmd_strs min_score mg_limit min_gain evac_mode =
108
    let Cluster.Table ini_nl ini_il _ _ = ini_tbl
109
        allowed_next = Cluster.doNextBalance ini_tbl max_rounds min_score
110
        m_fin_tbl = if allowed_next
111
                    then Cluster.tryBalance ini_tbl disk_moves inst_moves
112
                         evac_mode mg_limit min_gain
113
                    else Nothing
114
    in
115
      case m_fin_tbl of
116
        Just fin_tbl ->
117
            do
118
              let
119
                  (Cluster.Table _ _ _ fin_plc) = fin_tbl
120
                  fin_plc_len = length fin_plc
121
                  cur_plc@(idx, _, _, move, _) = head fin_plc
122
                  (sol_line, cmds) = Cluster.printSolutionLine ini_nl ini_il
123
                                     nmlen imlen cur_plc fin_plc_len
124
                  afn = Cluster.involvedNodes ini_il cur_plc
125
                  upd_cmd_strs = (afn, idx, move, cmds):cmd_strs
126
              putStrLn sol_line
127
              hFlush stdout
128
              iterateDepth fin_tbl max_rounds disk_moves inst_moves
129
                           nmlen imlen upd_cmd_strs min_score
130
                           mg_limit min_gain evac_mode
131
        Nothing -> return (ini_tbl, cmd_strs)
132

    
133
-- | Displays the cluster stats.
134
printStats :: Node.List -> Node.List -> IO ()
135
printStats ini_nl fin_nl = do
136
  let ini_cs = Cluster.totalResources ini_nl
137
      fin_cs = Cluster.totalResources fin_nl
138
  printf "Original: mem=%d disk=%d\n"
139
             (Cluster.csFmem ini_cs) (Cluster.csFdsk ini_cs) :: IO ()
140
  printf "Final:    mem=%d disk=%d\n"
141
             (Cluster.csFmem fin_cs) (Cluster.csFdsk fin_cs)
142

    
143
-- | Saves the rebalance commands to a text file.
144
saveBalanceCommands :: Options -> String -> IO ()
145
saveBalanceCommands opts cmd_data = do
146
  let out_path = fromJust $ optShowCmds opts
147
  putStrLn ""
148
  (if out_path == "-" then
149
       printf "Commands to run to reach the above solution:\n%s"
150
                  (unlines . map ("  " ++) .
151
                   filter (/= "  check") .
152
                   lines $ cmd_data)
153
   else do
154
     writeFile out_path (shTemplate ++ cmd_data)
155
     printf "The commands have been written to file '%s'\n" out_path)
156

    
157
-- | Polls a set of jobs at a fixed interval until all are finished
158
-- one way or another.
159
waitForJobs :: L.Client -> [String] -> IO (Result [JobStatus])
160
waitForJobs client jids = do
161
  sts <- L.queryJobsStatus client jids
162
  case sts of
163
    Bad x -> return $ Bad x
164
    Ok s -> if any (<= JOB_STATUS_RUNNING) s
165
            then do
166
              -- TODO: replace hardcoded value with a better thing
167
              threadDelay (1000000 * 15)
168
              waitForJobs client jids
169
            else return $ Ok s
170

    
171
-- | Check that a set of job statuses is all success.
172
checkJobsStatus :: [JobStatus] -> Bool
173
checkJobsStatus = all (== JOB_STATUS_SUCCESS)
174

    
175
-- | Wrapper over execJobSet checking for early termination.
176
execWrapper :: String -> Node.List
177
            -> Instance.List -> IORef Int -> [JobSet] -> IO Bool
178
execWrapper _      _  _  _    [] = return True
179
execWrapper master nl il cref alljss = do
180
  cancel <- readIORef cref
181
  (if cancel > 0
182
   then do
183
     hPrintf stderr "Exiting early due to user request, %d\
184
                    \ jobset(s) remaining." (length alljss)::IO ()
185
     return False
186
   else execJobSet master nl il cref alljss)
187

    
188
-- | Execute an entire jobset.
189
execJobSet :: String -> Node.List
190
           -> Instance.List -> IORef Int -> [JobSet] -> IO Bool
191
execJobSet _      _  _  _    [] = return True
192
execJobSet master nl il cref (js:jss) = do
193
  -- map from jobset (htools list of positions) to [[opcodes]]
194
  let jobs = map (\(_, idx, move, _) ->
195
                      Cluster.iMoveToJob nl il idx move) js
196
  let descr = map (\(_, idx, _, _) -> Container.nameOf il idx) js
197
  putStrLn $ "Executing jobset for instances " ++ commaJoin descr
198
  jrs <- bracket (L.getClient master) L.closeClient
199
         (\client -> do
200
            jids <- L.submitManyJobs client jobs
201
            case jids of
202
              Bad x -> return $ Bad x
203
              Ok x -> do
204
                putStrLn $ "Got job IDs " ++ commaJoin x
205
                waitForJobs client x
206
         )
207
  (case jrs of
208
     Bad x -> do
209
       hPutStrLn stderr $ "Cannot compute job status, aborting: " ++ show x
210
       return False
211
     Ok x -> if checkJobsStatus x
212
             then execWrapper master nl il cref jss
213
             else do
214
               hPutStrLn stderr $ "Not all jobs completed successfully: " ++
215
                         show x
216
               hPutStrLn stderr "Aborting."
217
               return False)
218

    
219
-- | Executes the jobs, if possible and desired.
220
maybeExecJobs :: Options
221
              -> [a]
222
              -> Node.List
223
              -> Instance.List
224
              -> [JobSet]
225
              -> IO Bool
226
maybeExecJobs opts ord_plc fin_nl il cmd_jobs =
227
    if optExecJobs opts && not (null ord_plc)
228
    then (case optLuxi opts of
229
            Nothing -> do
230
              hPutStrLn stderr "Execution of commands possible only on LUXI"
231
              return False
232
            Just master -> runJobSet master fin_nl il cmd_jobs)
233
    else return True
234

    
235
-- | Signal handler for graceful termination.
236
hangleSigInt :: IORef Int -> IO ()
237
hangleSigInt cref = do
238
  writeIORef cref 1
239
  putStrLn ("Cancel request registered, will exit at" ++
240
            " the end of the current job set...")
241

    
242
-- | Signal handler for immediate termination.
243
hangleSigTerm :: IORef Int -> IO ()
244
hangleSigTerm cref = do
245
  -- update the cref to 2, just for consistency
246
  writeIORef cref 2
247
  putStrLn "Double cancel request, exiting now..."
248
  exitImmediately $ ExitFailure 2
249

    
250
-- | Runs a job set with handling of signals.
251
runJobSet :: String -> Node.List -> Instance.List -> [JobSet] -> IO Bool
252
runJobSet master fin_nl il cmd_jobs = do
253
  cref <- newIORef 0
254
  mapM_ (\(hnd, sig) -> installHandler sig (Catch (hnd cref)) Nothing)
255
    [(hangleSigTerm, softwareTermination), (hangleSigInt, keyboardSignal)]
256
  execWrapper master fin_nl il cref cmd_jobs
257

    
258
-- | Set node properties based on command line options.
259
setNodesStatus :: Options -> Node.List -> IO Node.List
260
setNodesStatus opts fixed_nl = do
261
  let offline_passed = optOffline opts
262
      all_nodes = Container.elems fixed_nl
263
      offline_lkp = map (lookupName (map Node.name all_nodes)) offline_passed
264
      offline_wrong = filter (not . goodLookupResult) offline_lkp
265
      offline_names = map lrContent offline_lkp
266
      offline_indices = map Node.idx $
267
                        filter (\n -> Node.name n `elem` offline_names)
268
                               all_nodes
269
      m_cpu = optMcpu opts
270
      m_dsk = optMdsk opts
271

    
272
  when (not (null offline_wrong)) $ do
273
         hPrintf stderr "Error: Wrong node name(s) set as offline: %s\n"
274
                     (commaJoin (map lrContent offline_wrong)) :: IO ()
275
         exitWith $ ExitFailure 1
276

    
277
  let nm = Container.map (\n -> if Node.idx n `elem` offline_indices
278
                                then Node.setOffline n True
279
                                else n) fixed_nl
280
      nlf = Container.map (flip Node.setMdsk m_dsk . flip Node.setMcpu m_cpu)
281
            nm
282
  return nlf
283

    
284
-- | Select the target node group.
285
selectGroup :: Options -> Group.List -> Node.List -> Instance.List
286
            -> IO (String, (Node.List, Instance.List))
287
selectGroup opts gl nlf ilf = do
288
  let ngroups = Cluster.splitCluster nlf ilf
289
  when (length ngroups > 1 && isNothing (optGroup opts)) $ do
290
    hPutStrLn stderr "Found multiple node groups:"
291
    mapM_ (hPutStrLn stderr . ("  " ++) . Group.name .
292
           flip Container.find gl . fst) ngroups
293
    hPutStrLn stderr "Aborting."
294
    exitWith $ ExitFailure 1
295

    
296
  case optGroup opts of
297
    Nothing -> do
298
         let (gidx, cdata) = head ngroups
299
             grp = Container.find gidx gl
300
         return (Group.name grp, cdata)
301
    Just g -> case Container.findByName gl g of
302
      Nothing -> do
303
        hPutStrLn stderr $ "Node group " ++ g ++
304
          " not found. Node group list is:"
305
        mapM_ (hPutStrLn stderr . ("  " ++) . Group.name ) (Container.elems gl)
306
        hPutStrLn stderr "Aborting."
307
        exitWith $ ExitFailure 1
308
      Just grp ->
309
          case lookup (Group.idx grp) ngroups of
310
            Nothing -> do
311
              -- TODO: while this is unlikely to happen, log here the
312
              -- actual group data to help debugging
313
              hPutStrLn stderr "Internal failure, missing group idx"
314
              exitWith $ ExitFailure 1
315
            Just cdata -> return (Group.name grp, cdata)
316

    
317
-- | Do a few checks on the cluster data.
318
checkCluster :: Int -> Node.List -> Instance.List -> IO ()
319
checkCluster verbose nl il = do
320
  -- nothing to do on an empty cluster
321
  when (Container.null il) $ do
322
         printf "Cluster is empty, exiting.\n"::IO ()
323
         exitWith ExitSuccess
324

    
325
  -- hbal doesn't currently handle split clusters
326
  let split_insts = Cluster.findSplitInstances nl il
327
  unless (null split_insts) $ do
328
    hPutStrLn stderr "Found instances belonging to multiple node groups:"
329
    mapM_ (\i -> hPutStrLn stderr $ "  " ++ Instance.name i) split_insts
330
    hPutStrLn stderr "Aborting."
331
    exitWith $ ExitFailure 1
332

    
333
  printf "Loaded %d nodes, %d instances\n"
334
             (Container.size nl)
335
             (Container.size il)::IO ()
336

    
337
  let csf = commonSuffix nl il
338
  when (not (null csf) && verbose > 1) $
339
       printf "Note: Stripping common suffix of '%s' from names\n" csf
340

    
341
-- | Do a few checks on the selected group data.
342
checkGroup :: Int -> String -> Node.List -> Instance.List -> IO ()
343
checkGroup verbose gname nl il = do
344
  printf "Group size %d nodes, %d instances\n"
345
             (Container.size nl)
346
             (Container.size il)::IO ()
347

    
348
  putStrLn $ "Selected node group: " ++ gname
349

    
350
  let (bad_nodes, bad_instances) = Cluster.computeBadItems nl il
351
  unless (verbose == 0) $ printf
352
             "Initial check done: %d bad nodes, %d bad instances.\n"
353
             (length bad_nodes) (length bad_instances)
354

    
355
  when (length bad_nodes > 0) $
356
         putStrLn "Cluster is not N+1 happy, continuing but no guarantee \
357
                  \that the cluster will end N+1 happy."
358

    
359
-- | Check that we actually need to rebalance.
360
checkNeedRebalance :: Options -> Score -> IO ()
361
checkNeedRebalance opts ini_cv = do
362
  let min_cv = optMinScore opts
363
  when (ini_cv < min_cv) $ do
364
         printf "Cluster is already well balanced (initial score %.6g,\n\
365
                \minimum score %.6g).\nNothing to do, exiting\n"
366
                ini_cv min_cv:: IO ()
367
         exitWith ExitSuccess
368

    
369
-- | Main function.
370
main :: IO ()
371
main = do
372
  cmd_args <- System.getArgs
373
  (opts, args) <- parseOpts cmd_args "hbal" options
374

    
375
  unless (null args) $ do
376
         hPutStrLn stderr "Error: this program doesn't take any arguments."
377
         exitWith $ ExitFailure 1
378

    
379
  let verbose = optVerbose opts
380
      shownodes = optShowNodes opts
381
      showinsts = optShowInsts opts
382

    
383
  ini_cdata@(ClusterData gl fixed_nl ilf ctags) <- loadExternalData opts
384

    
385
  when (verbose > 1) $
386
       putStrLn $ "Loaded cluster tags: " ++ intercalate "," ctags
387

    
388
  nlf <- setNodesStatus opts fixed_nl
389
  checkCluster verbose nlf ilf
390

    
391
  maybeSaveData (optSaveCluster opts) "original" "before balancing" ini_cdata
392

    
393
  (gname, (nl, il)) <- selectGroup opts gl nlf ilf
394

    
395
  checkGroup verbose gname nl il
396

    
397
  maybePrintInsts showinsts "Initial" (Cluster.printInsts nl il)
398

    
399
  maybePrintNodes shownodes "Initial cluster" (Cluster.printNodes nl)
400

    
401
  let ini_cv = Cluster.compCV nl
402
      ini_tbl = Cluster.Table nl il ini_cv []
403
      min_cv = optMinScore opts
404

    
405
  checkNeedRebalance opts ini_cv
406

    
407
  (if verbose > 2
408
   then printf "Initial coefficients: overall %.8f, %s\n"
409
        ini_cv (Cluster.printStats nl)::IO ()
410
   else printf "Initial score: %.8f\n" ini_cv)
411

    
412
  putStrLn "Trying to minimize the CV..."
413
  let imlen = maximum . map (length . Instance.alias) $ Container.elems il
414
      nmlen = maximum . map (length . Node.alias) $ Container.elems nl
415

    
416
  (fin_tbl, cmd_strs) <- iterateDepth ini_tbl (optMaxLength opts)
417
                         (optDiskMoves opts)
418
                         (optInstMoves opts)
419
                         nmlen imlen [] min_cv
420
                         (optMinGainLim opts) (optMinGain opts)
421
                         (optEvacMode opts)
422
  let (Cluster.Table fin_nl fin_il fin_cv fin_plc) = fin_tbl
423
      ord_plc = reverse fin_plc
424
      sol_msg = case () of
425
                  _ | null fin_plc -> printf "No solution found\n"
426
                    | verbose > 2 ->
427
                        printf "Final coefficients:   overall %.8f, %s\n"
428
                        fin_cv (Cluster.printStats fin_nl)
429
                    | otherwise ->
430
                        printf "Cluster score improved from %.8f to %.8f\n"
431
                        ini_cv fin_cv ::String
432

    
433
  putStr sol_msg
434

    
435
  unless (verbose == 0) $
436
         printf "Solution length=%d\n" (length ord_plc)
437

    
438
  let cmd_jobs = Cluster.splitJobs cmd_strs
439

    
440
  when (isJust $ optShowCmds opts) $
441
       saveBalanceCommands opts $ Cluster.formatCmds cmd_jobs
442

    
443
  maybeSaveData (optSaveCluster opts) "balanced" "after balancing"
444
                (ClusterData gl fin_nl fin_il ctags)
445

    
446
  maybePrintInsts showinsts "Final" (Cluster.printInsts fin_nl fin_il)
447

    
448
  maybePrintNodes shownodes "Final cluster" (Cluster.printNodes fin_nl)
449

    
450
  when (verbose > 3) $ printStats nl fin_nl
451

    
452
  eval <- maybeExecJobs opts ord_plc fin_nl il cmd_jobs
453
  unless eval (exitWith (ExitFailure 1))