Statistics
| Branch: | Tag: | Revision:

root / htools / Ganeti / HTools / Program / Hbal.hs @ a7e1fd89

History | View | Annotate | Download (14.9 kB)

1
{-| Cluster rebalancer.
2

    
3
-}
4

    
5
{-
6

    
7
Copyright (C) 2009, 2010, 2011, 2012 Google Inc.
8

    
9
This program is free software; you can redistribute it and/or modify
10
it under the terms of the GNU General Public License as published by
11
the Free Software Foundation; either version 2 of the License, or
12
(at your option) any later version.
13

    
14
This program is distributed in the hope that it will be useful, but
15
WITHOUT ANY WARRANTY; without even the implied warranty of
16
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17
General Public License for more details.
18

    
19
You should have received a copy of the GNU General Public License
20
along with this program; if not, write to the Free Software
21
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22
02110-1301, USA.
23

    
24
-}
25

    
26
module Ganeti.HTools.Program.Hbal
27
    ( main
28
    , options
29
    , iterateDepth
30
    ) where
31

    
32
import Control.Concurrent (threadDelay)
33
import Control.Exception (bracket)
34
import Control.Monad
35
import Data.List
36
import Data.Maybe (isJust, isNothing, fromJust)
37
import Data.IORef
38
import System.Exit
39
import System.IO
40
import System.Posix.Process
41
import System.Posix.Signals
42

    
43
import Text.Printf (printf, hPrintf)
44

    
45
import qualified Ganeti.HTools.Container as Container
46
import qualified Ganeti.HTools.Cluster as Cluster
47
import qualified Ganeti.HTools.Group as Group
48
import qualified Ganeti.HTools.Node as Node
49
import qualified Ganeti.HTools.Instance as Instance
50

    
51
import Ganeti.HTools.CLI
52
import Ganeti.HTools.ExtLoader
53
import Ganeti.HTools.Utils
54
import Ganeti.HTools.Types
55
import Ganeti.HTools.Loader
56

    
57
import qualified Ganeti.Luxi as L
58
import Ganeti.Jobs
59

    
60
-- | Options list and functions.
61
options :: [OptType]
62
options =
63
  [ oPrintNodes
64
  , oPrintInsts
65
  , oPrintCommands
66
  , oDataFile
67
  , oEvacMode
68
  , oRapiMaster
69
  , oLuxiSocket
70
  , oIAllocSrc
71
  , oExecJobs
72
  , oGroup
73
  , oMaxSolLength
74
  , oVerbose
75
  , oQuiet
76
  , oOfflineNode
77
  , oMinScore
78
  , oMaxCpu
79
  , oMinDisk
80
  , oMinGain
81
  , oMinGainLim
82
  , oDiskMoves
83
  , oSelInst
84
  , oInstMoves
85
  , oDynuFile
86
  , oExTags
87
  , oExInst
88
  , oSaveCluster
89
  , oShowVer
90
  , oShowHelp
91
  ]
92

    
93
{- | Start computing the solution at the given depth and recurse until
94
we find a valid solution or we exceed the maximum depth.
95

    
96
-}
97
iterateDepth :: Bool             -- ^ Whether to print moves
98
             -> Cluster.Table    -- ^ The starting table
99
             -> Int              -- ^ Remaining length
100
             -> Bool             -- ^ Allow disk moves
101
             -> Bool             -- ^ Allow instance moves
102
             -> Int              -- ^ Max node name len
103
             -> Int              -- ^ Max instance name len
104
             -> [MoveJob]        -- ^ Current command list
105
             -> Score            -- ^ Score at which to stop
106
             -> Score            -- ^ Min gain limit
107
             -> Score            -- ^ Min score gain
108
             -> Bool             -- ^ Enable evacuation mode
109
             -> IO (Cluster.Table, [MoveJob]) -- ^ The resulting table
110
                                              -- and commands
111
iterateDepth printmove ini_tbl max_rounds disk_moves inst_moves nmlen imlen
112
             cmd_strs min_score mg_limit min_gain evac_mode =
113
  let Cluster.Table ini_nl ini_il _ _ = ini_tbl
114
      allowed_next = Cluster.doNextBalance ini_tbl max_rounds min_score
115
      m_fin_tbl = if allowed_next
116
                    then Cluster.tryBalance ini_tbl disk_moves inst_moves
117
                         evac_mode mg_limit min_gain
118
                    else Nothing
119
  in case m_fin_tbl of
120
       Just fin_tbl ->
121
         do
122
           let (Cluster.Table _ _ _ fin_plc) = fin_tbl
123
               fin_plc_len = length fin_plc
124
               cur_plc@(idx, _, _, move, _) = head fin_plc
125
               (sol_line, cmds) = Cluster.printSolutionLine ini_nl ini_il
126
                                  nmlen imlen cur_plc fin_plc_len
127
               afn = Cluster.involvedNodes ini_il cur_plc
128
               upd_cmd_strs = (afn, idx, move, cmds):cmd_strs
129
           when printmove $ do
130
               putStrLn sol_line
131
               hFlush stdout
132
           iterateDepth printmove fin_tbl max_rounds disk_moves inst_moves
133
                        nmlen imlen upd_cmd_strs min_score
134
                        mg_limit min_gain evac_mode
135
       Nothing -> return (ini_tbl, cmd_strs)
136

    
137
-- | Displays the cluster stats.
138
printStats :: Node.List -> Node.List -> IO ()
139
printStats ini_nl fin_nl = do
140
  let ini_cs = Cluster.totalResources ini_nl
141
      fin_cs = Cluster.totalResources fin_nl
142
  printf "Original: mem=%d disk=%d\n"
143
             (Cluster.csFmem ini_cs) (Cluster.csFdsk ini_cs) :: IO ()
144
  printf "Final:    mem=%d disk=%d\n"
145
             (Cluster.csFmem fin_cs) (Cluster.csFdsk fin_cs)
146

    
147
-- | Saves the rebalance commands to a text file.
148
saveBalanceCommands :: Options -> String -> IO ()
149
saveBalanceCommands opts cmd_data = do
150
  let out_path = fromJust $ optShowCmds opts
151
  putStrLn ""
152
  if out_path == "-"
153
    then printf "Commands to run to reach the above solution:\n%s"
154
           (unlines . map ("  " ++) .
155
            filter (/= "  check") .
156
            lines $ cmd_data)
157
    else do
158
      writeFile out_path (shTemplate ++ cmd_data)
159
      printf "The commands have been written to file '%s'\n" out_path
160

    
161
-- | Polls a set of jobs at a fixed interval until all are finished
162
-- one way or another.
163
waitForJobs :: L.Client -> [String] -> IO (Result [JobStatus])
164
waitForJobs client jids = do
165
  sts <- L.queryJobsStatus client jids
166
  case sts of
167
    Bad x -> return $ Bad x
168
    Ok s -> if any (<= JOB_STATUS_RUNNING) s
169
            then do
170
              -- TODO: replace hardcoded value with a better thing
171
              threadDelay (1000000 * 15)
172
              waitForJobs client jids
173
            else return $ Ok s
174

    
175
-- | Check that a set of job statuses is all success.
176
checkJobsStatus :: [JobStatus] -> Bool
177
checkJobsStatus = all (== JOB_STATUS_SUCCESS)
178

    
179
-- | Wrapper over execJobSet checking for early termination.
180
execWrapper :: String -> Node.List
181
            -> Instance.List -> IORef Int -> [JobSet] -> IO Bool
182
execWrapper _      _  _  _    [] = return True
183
execWrapper master nl il cref alljss = do
184
  cancel <- readIORef cref
185
  if cancel > 0
186
    then do
187
      hPrintf stderr "Exiting early due to user request, %d\
188
                     \ jobset(s) remaining." (length alljss)::IO ()
189
      return False
190
    else execJobSet master nl il cref alljss
191

    
192
-- | Execute an entire jobset.
193
execJobSet :: String -> Node.List
194
           -> Instance.List -> IORef Int -> [JobSet] -> IO Bool
195
execJobSet _      _  _  _    [] = return True
196
execJobSet master nl il cref (js:jss) = do
197
  -- map from jobset (htools list of positions) to [[opcodes]]
198
  let jobs = map (\(_, idx, move, _) ->
199
                      Cluster.iMoveToJob nl il idx move) js
200
  let descr = map (\(_, idx, _, _) -> Container.nameOf il idx) js
201
  putStrLn $ "Executing jobset for instances " ++ commaJoin descr
202
  jrs <- bracket (L.getClient master) L.closeClient
203
         (\client -> do
204
            jids <- L.submitManyJobs client jobs
205
            case jids of
206
              Bad x -> return $ Bad x
207
              Ok x -> do
208
                putStrLn $ "Got job IDs " ++ commaJoin x
209
                waitForJobs client x
210
         )
211
  case jrs of
212
    Bad x -> do
213
      hPutStrLn stderr $ "Cannot compute job status, aborting: " ++ show x
214
      return False
215
    Ok x -> if checkJobsStatus x
216
              then execWrapper master nl il cref jss
217
              else do
218
                hPutStrLn stderr $ "Not all jobs completed successfully: " ++
219
                          show x
220
                hPutStrLn stderr "Aborting."
221
                return False
222

    
223
-- | Executes the jobs, if possible and desired.
224
maybeExecJobs :: Options
225
              -> [a]
226
              -> Node.List
227
              -> Instance.List
228
              -> [JobSet]
229
              -> IO Bool
230
maybeExecJobs opts ord_plc fin_nl il cmd_jobs =
231
  if optExecJobs opts && not (null ord_plc)
232
    then (case optLuxi opts of
233
            Nothing -> do
234
              hPutStrLn stderr "Execution of commands possible only on LUXI"
235
              return False
236
            Just master -> runJobSet master fin_nl il cmd_jobs)
237
    else return True
238

    
239
-- | Signal handler for graceful termination.
240
hangleSigInt :: IORef Int -> IO ()
241
hangleSigInt cref = do
242
  writeIORef cref 1
243
  putStrLn ("Cancel request registered, will exit at" ++
244
            " the end of the current job set...")
245

    
246
-- | Signal handler for immediate termination.
247
hangleSigTerm :: IORef Int -> IO ()
248
hangleSigTerm cref = do
249
  -- update the cref to 2, just for consistency
250
  writeIORef cref 2
251
  putStrLn "Double cancel request, exiting now..."
252
  exitImmediately $ ExitFailure 2
253

    
254
-- | Runs a job set with handling of signals.
255
runJobSet :: String -> Node.List -> Instance.List -> [JobSet] -> IO Bool
256
runJobSet master fin_nl il cmd_jobs = do
257
  cref <- newIORef 0
258
  mapM_ (\(hnd, sig) -> installHandler sig (Catch (hnd cref)) Nothing)
259
    [(hangleSigTerm, softwareTermination), (hangleSigInt, keyboardSignal)]
260
  execWrapper master fin_nl il cref cmd_jobs
261

    
262
-- | Select the target node group.
263
selectGroup :: Options -> Group.List -> Node.List -> Instance.List
264
            -> IO (String, (Node.List, Instance.List))
265
selectGroup opts gl nlf ilf = do
266
  let ngroups = Cluster.splitCluster nlf ilf
267
  when (length ngroups > 1 && isNothing (optGroup opts)) $ do
268
    hPutStrLn stderr "Found multiple node groups:"
269
    mapM_ (hPutStrLn stderr . ("  " ++) . Group.name .
270
           flip Container.find gl . fst) ngroups
271
    hPutStrLn stderr "Aborting."
272
    exitWith $ ExitFailure 1
273

    
274
  case optGroup opts of
275
    Nothing -> do
276
      let (gidx, cdata) = head ngroups
277
          grp = Container.find gidx gl
278
      return (Group.name grp, cdata)
279
    Just g -> case Container.findByName gl g of
280
      Nothing -> do
281
        hPutStrLn stderr $ "Node group " ++ g ++
282
          " not found. Node group list is:"
283
        mapM_ (hPutStrLn stderr . ("  " ++) . Group.name ) (Container.elems gl)
284
        hPutStrLn stderr "Aborting."
285
        exitWith $ ExitFailure 1
286
      Just grp ->
287
          case lookup (Group.idx grp) ngroups of
288
            Nothing ->
289
              -- This will only happen if there are no nodes assigned
290
              -- to this group
291
              return (Group.name grp, (Container.empty, Container.empty))
292
            Just cdata -> return (Group.name grp, cdata)
293

    
294
-- | Do a few checks on the cluster data.
295
checkCluster :: Int -> Node.List -> Instance.List -> IO ()
296
checkCluster verbose nl il = do
297
  -- nothing to do on an empty cluster
298
  when (Container.null il) $ do
299
         printf "Cluster is empty, exiting.\n"::IO ()
300
         exitWith ExitSuccess
301

    
302
  -- hbal doesn't currently handle split clusters
303
  let split_insts = Cluster.findSplitInstances nl il
304
  unless (null split_insts || verbose <= 1) $ do
305
    hPutStrLn stderr "Found instances belonging to multiple node groups:"
306
    mapM_ (\i -> hPutStrLn stderr $ "  " ++ Instance.name i) split_insts
307
    hPutStrLn stderr "These instances will not be moved."
308

    
309
  printf "Loaded %d nodes, %d instances\n"
310
             (Container.size nl)
311
             (Container.size il)::IO ()
312

    
313
  let csf = commonSuffix nl il
314
  when (not (null csf) && verbose > 1) $
315
       printf "Note: Stripping common suffix of '%s' from names\n" csf
316

    
317
-- | Do a few checks on the selected group data.
318
checkGroup :: Int -> String -> Node.List -> Instance.List -> IO ()
319
checkGroup verbose gname nl il = do
320
  printf "Group size %d nodes, %d instances\n"
321
             (Container.size nl)
322
             (Container.size il)::IO ()
323

    
324
  putStrLn $ "Selected node group: " ++ gname
325

    
326
  let (bad_nodes, bad_instances) = Cluster.computeBadItems nl il
327
  unless (verbose == 0) $ printf
328
             "Initial check done: %d bad nodes, %d bad instances.\n"
329
             (length bad_nodes) (length bad_instances)
330

    
331
  when (length bad_nodes > 0) $
332
         putStrLn "Cluster is not N+1 happy, continuing but no guarantee \
333
                  \that the cluster will end N+1 happy."
334

    
335
-- | Check that we actually need to rebalance.
336
checkNeedRebalance :: Options -> Score -> IO ()
337
checkNeedRebalance opts ini_cv = do
338
  let min_cv = optMinScore opts
339
  when (ini_cv < min_cv) $ do
340
         printf "Cluster is already well balanced (initial score %.6g,\n\
341
                \minimum score %.6g).\nNothing to do, exiting\n"
342
                ini_cv min_cv:: IO ()
343
         exitWith ExitSuccess
344

    
345
-- | Main function.
346
main :: Options -> [String] -> IO ()
347
main opts args = do
348
  unless (null args) $ do
349
         hPutStrLn stderr "Error: this program doesn't take any arguments."
350
         exitWith $ ExitFailure 1
351

    
352
  let verbose = optVerbose opts
353
      shownodes = optShowNodes opts
354
      showinsts = optShowInsts opts
355

    
356
  ini_cdata@(ClusterData gl fixed_nl ilf ctags ipol) <- loadExternalData opts
357

    
358
  when (verbose > 1) $ do
359
       putStrLn $ "Loaded cluster tags: " ++ intercalate "," ctags
360
       putStrLn $ "Loaded cluster ipolicy: " ++ show ipol
361

    
362
  nlf <- setNodeStatus opts fixed_nl
363
  checkCluster verbose nlf ilf
364

    
365
  maybeSaveData (optSaveCluster opts) "original" "before balancing" ini_cdata
366

    
367
  (gname, (nl, il)) <- selectGroup opts gl nlf ilf
368

    
369
  checkGroup verbose gname nl il
370

    
371
  maybePrintInsts showinsts "Initial" (Cluster.printInsts nl il)
372

    
373
  maybePrintNodes shownodes "Initial cluster" (Cluster.printNodes nl)
374

    
375
  let ini_cv = Cluster.compCV nl
376
      ini_tbl = Cluster.Table nl il ini_cv []
377
      min_cv = optMinScore opts
378

    
379
  checkNeedRebalance opts ini_cv
380

    
381
  if verbose > 2
382
    then printf "Initial coefficients: overall %.8f\n%s"
383
           ini_cv (Cluster.printStats "  " nl)::IO ()
384
    else printf "Initial score: %.8f\n" ini_cv
385

    
386
  putStrLn "Trying to minimize the CV..."
387
  let imlen = maximum . map (length . Instance.alias) $ Container.elems il
388
      nmlen = maximum . map (length . Node.alias) $ Container.elems nl
389

    
390
  (fin_tbl, cmd_strs) <- iterateDepth True ini_tbl (optMaxLength opts)
391
                         (optDiskMoves opts)
392
                         (optInstMoves opts)
393
                         nmlen imlen [] min_cv
394
                         (optMinGainLim opts) (optMinGain opts)
395
                         (optEvacMode opts)
396
  let (Cluster.Table fin_nl fin_il fin_cv fin_plc) = fin_tbl
397
      ord_plc = reverse fin_plc
398
      sol_msg = case () of
399
                  _ | null fin_plc -> printf "No solution found\n"
400
                    | verbose > 2 ->
401
                        printf "Final coefficients:   overall %.8f\n%s"
402
                        fin_cv (Cluster.printStats "  " fin_nl)
403
                    | otherwise ->
404
                        printf "Cluster score improved from %.8f to %.8f\n"
405
                        ini_cv fin_cv ::String
406

    
407
  putStr sol_msg
408

    
409
  unless (verbose == 0) $
410
         printf "Solution length=%d\n" (length ord_plc)
411

    
412
  let cmd_jobs = Cluster.splitJobs cmd_strs
413

    
414
  when (isJust $ optShowCmds opts) $
415
       saveBalanceCommands opts $ Cluster.formatCmds cmd_jobs
416

    
417
  maybeSaveData (optSaveCluster opts) "balanced" "after balancing"
418
                ini_cdata { cdNodes = fin_nl, cdInstances = fin_il }
419

    
420
  maybePrintInsts showinsts "Final" (Cluster.printInsts fin_nl fin_il)
421

    
422
  maybePrintNodes shownodes "Final cluster" (Cluster.printNodes fin_nl)
423

    
424
  when (verbose > 3) $ printStats nl fin_nl
425

    
426
  eval <- maybeExecJobs opts ord_plc fin_nl il cmd_jobs
427
  unless eval (exitWith (ExitFailure 1))