Statistics
| Branch: | Tag: | Revision:

root / htools / Ganeti / HTools / Program / Hbal.hs @ 2922d2c5

History | View | Annotate | Download (14.7 kB)

1
{-| Cluster rebalancer.
2

    
3
-}
4

    
5
{-
6

    
7
Copyright (C) 2009, 2010, 2011, 2012 Google Inc.
8

    
9
This program is free software; you can redistribute it and/or modify
10
it under the terms of the GNU General Public License as published by
11
the Free Software Foundation; either version 2 of the License, or
12
(at your option) any later version.
13

    
14
This program is distributed in the hope that it will be useful, but
15
WITHOUT ANY WARRANTY; without even the implied warranty of
16
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17
General Public License for more details.
18

    
19
You should have received a copy of the GNU General Public License
20
along with this program; if not, write to the Free Software
21
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22
02110-1301, USA.
23

    
24
-}
25

    
26
module Ganeti.HTools.Program.Hbal (main, options) where
27

    
28
import Control.Concurrent (threadDelay)
29
import Control.Exception (bracket)
30
import Control.Monad
31
import Data.List
32
import Data.Maybe (isJust, isNothing, fromJust)
33
import Data.IORef
34
import System.Exit
35
import System.IO
36
import System.Posix.Process
37
import System.Posix.Signals
38

    
39
import Text.Printf (printf, hPrintf)
40

    
41
import qualified Ganeti.HTools.Container as Container
42
import qualified Ganeti.HTools.Cluster as Cluster
43
import qualified Ganeti.HTools.Group as Group
44
import qualified Ganeti.HTools.Node as Node
45
import qualified Ganeti.HTools.Instance as Instance
46

    
47
import Ganeti.HTools.CLI
48
import Ganeti.HTools.ExtLoader
49
import Ganeti.HTools.Utils
50
import Ganeti.HTools.Types
51
import Ganeti.HTools.Loader
52

    
53
import qualified Ganeti.Luxi as L
54
import Ganeti.Jobs
55

    
56
-- | Options list and functions.
57
options :: [OptType]
58
options =
59
  [ oPrintNodes
60
  , oPrintInsts
61
  , oPrintCommands
62
  , oDataFile
63
  , oEvacMode
64
  , oRapiMaster
65
  , oLuxiSocket
66
  , oExecJobs
67
  , oGroup
68
  , oMaxSolLength
69
  , oVerbose
70
  , oQuiet
71
  , oOfflineNode
72
  , oMinScore
73
  , oMaxCpu
74
  , oMinDisk
75
  , oMinGain
76
  , oMinGainLim
77
  , oDiskMoves
78
  , oSelInst
79
  , oInstMoves
80
  , oDynuFile
81
  , oExTags
82
  , oExInst
83
  , oSaveCluster
84
  , oShowVer
85
  , oShowHelp
86
  ]
87

    
88
{- | Start computing the solution at the given depth and recurse until
89
we find a valid solution or we exceed the maximum depth.
90

    
91
-}
92
iterateDepth :: Cluster.Table    -- ^ The starting table
93
             -> Int              -- ^ Remaining length
94
             -> Bool             -- ^ Allow disk moves
95
             -> Bool             -- ^ Allow instance moves
96
             -> Int              -- ^ Max node name len
97
             -> Int              -- ^ Max instance name len
98
             -> [MoveJob]        -- ^ Current command list
99
             -> Score            -- ^ Score at which to stop
100
             -> Score            -- ^ Min gain limit
101
             -> Score            -- ^ Min score gain
102
             -> Bool             -- ^ Enable evacuation mode
103
             -> IO (Cluster.Table, [MoveJob]) -- ^ The resulting table
104
                                              -- and commands
105
iterateDepth ini_tbl max_rounds disk_moves inst_moves nmlen imlen
106
             cmd_strs min_score mg_limit min_gain evac_mode =
107
  let Cluster.Table ini_nl ini_il _ _ = ini_tbl
108
      allowed_next = Cluster.doNextBalance ini_tbl max_rounds min_score
109
      m_fin_tbl = if allowed_next
110
                    then Cluster.tryBalance ini_tbl disk_moves inst_moves
111
                         evac_mode mg_limit min_gain
112
                    else Nothing
113
  in case m_fin_tbl of
114
       Just fin_tbl ->
115
         do
116
           let (Cluster.Table _ _ _ fin_plc) = fin_tbl
117
               fin_plc_len = length fin_plc
118
               cur_plc@(idx, _, _, move, _) = head fin_plc
119
               (sol_line, cmds) = Cluster.printSolutionLine ini_nl ini_il
120
                                  nmlen imlen cur_plc fin_plc_len
121
               afn = Cluster.involvedNodes ini_il cur_plc
122
               upd_cmd_strs = (afn, idx, move, cmds):cmd_strs
123
           putStrLn sol_line
124
           hFlush stdout
125
           iterateDepth fin_tbl max_rounds disk_moves inst_moves
126
                        nmlen imlen upd_cmd_strs min_score
127
                        mg_limit min_gain evac_mode
128
       Nothing -> return (ini_tbl, cmd_strs)
129

    
130
-- | Displays the cluster stats.
131
printStats :: Node.List -> Node.List -> IO ()
132
printStats ini_nl fin_nl = do
133
  let ini_cs = Cluster.totalResources ini_nl
134
      fin_cs = Cluster.totalResources fin_nl
135
  printf "Original: mem=%d disk=%d\n"
136
             (Cluster.csFmem ini_cs) (Cluster.csFdsk ini_cs) :: IO ()
137
  printf "Final:    mem=%d disk=%d\n"
138
             (Cluster.csFmem fin_cs) (Cluster.csFdsk fin_cs)
139

    
140
-- | Saves the rebalance commands to a text file.
141
saveBalanceCommands :: Options -> String -> IO ()
142
saveBalanceCommands opts cmd_data = do
143
  let out_path = fromJust $ optShowCmds opts
144
  putStrLn ""
145
  if out_path == "-"
146
    then printf "Commands to run to reach the above solution:\n%s"
147
           (unlines . map ("  " ++) .
148
            filter (/= "  check") .
149
            lines $ cmd_data)
150
    else do
151
      writeFile out_path (shTemplate ++ cmd_data)
152
      printf "The commands have been written to file '%s'\n" out_path
153

    
154
-- | Polls a set of jobs at a fixed interval until all are finished
155
-- one way or another.
156
waitForJobs :: L.Client -> [String] -> IO (Result [JobStatus])
157
waitForJobs client jids = do
158
  sts <- L.queryJobsStatus client jids
159
  case sts of
160
    Bad x -> return $ Bad x
161
    Ok s -> if any (<= JOB_STATUS_RUNNING) s
162
            then do
163
              -- TODO: replace hardcoded value with a better thing
164
              threadDelay (1000000 * 15)
165
              waitForJobs client jids
166
            else return $ Ok s
167

    
168
-- | Check that a set of job statuses is all success.
169
checkJobsStatus :: [JobStatus] -> Bool
170
checkJobsStatus = all (== JOB_STATUS_SUCCESS)
171

    
172
-- | Wrapper over execJobSet checking for early termination.
173
execWrapper :: String -> Node.List
174
            -> Instance.List -> IORef Int -> [JobSet] -> IO Bool
175
execWrapper _      _  _  _    [] = return True
176
execWrapper master nl il cref alljss = do
177
  cancel <- readIORef cref
178
  if cancel > 0
179
    then do
180
      hPrintf stderr "Exiting early due to user request, %d\
181
                     \ jobset(s) remaining." (length alljss)::IO ()
182
      return False
183
    else execJobSet master nl il cref alljss
184

    
185
-- | Execute an entire jobset.
186
execJobSet :: String -> Node.List
187
           -> Instance.List -> IORef Int -> [JobSet] -> IO Bool
188
execJobSet _      _  _  _    [] = return True
189
execJobSet master nl il cref (js:jss) = do
190
  -- map from jobset (htools list of positions) to [[opcodes]]
191
  let jobs = map (\(_, idx, move, _) ->
192
                      Cluster.iMoveToJob nl il idx move) js
193
  let descr = map (\(_, idx, _, _) -> Container.nameOf il idx) js
194
  putStrLn $ "Executing jobset for instances " ++ commaJoin descr
195
  jrs <- bracket (L.getClient master) L.closeClient
196
         (\client -> do
197
            jids <- L.submitManyJobs client jobs
198
            case jids of
199
              Bad x -> return $ Bad x
200
              Ok x -> do
201
                putStrLn $ "Got job IDs " ++ commaJoin x
202
                waitForJobs client x
203
         )
204
  case jrs of
205
    Bad x -> do
206
      hPutStrLn stderr $ "Cannot compute job status, aborting: " ++ show x
207
      return False
208
    Ok x -> if checkJobsStatus x
209
              then execWrapper master nl il cref jss
210
              else do
211
                hPutStrLn stderr $ "Not all jobs completed successfully: " ++
212
                          show x
213
                hPutStrLn stderr "Aborting."
214
                return False
215

    
216
-- | Executes the jobs, if possible and desired.
217
maybeExecJobs :: Options
218
              -> [a]
219
              -> Node.List
220
              -> Instance.List
221
              -> [JobSet]
222
              -> IO Bool
223
maybeExecJobs opts ord_plc fin_nl il cmd_jobs =
224
  if optExecJobs opts && not (null ord_plc)
225
    then (case optLuxi opts of
226
            Nothing -> do
227
              hPutStrLn stderr "Execution of commands possible only on LUXI"
228
              return False
229
            Just master -> runJobSet master fin_nl il cmd_jobs)
230
    else return True
231

    
232
-- | Signal handler for graceful termination.
233
hangleSigInt :: IORef Int -> IO ()
234
hangleSigInt cref = do
235
  writeIORef cref 1
236
  putStrLn ("Cancel request registered, will exit at" ++
237
            " the end of the current job set...")
238

    
239
-- | Signal handler for immediate termination.
240
hangleSigTerm :: IORef Int -> IO ()
241
hangleSigTerm cref = do
242
  -- update the cref to 2, just for consistency
243
  writeIORef cref 2
244
  putStrLn "Double cancel request, exiting now..."
245
  exitImmediately $ ExitFailure 2
246

    
247
-- | Runs a job set with handling of signals.
248
runJobSet :: String -> Node.List -> Instance.List -> [JobSet] -> IO Bool
249
runJobSet master fin_nl il cmd_jobs = do
250
  cref <- newIORef 0
251
  mapM_ (\(hnd, sig) -> installHandler sig (Catch (hnd cref)) Nothing)
252
    [(hangleSigTerm, softwareTermination), (hangleSigInt, keyboardSignal)]
253
  execWrapper master fin_nl il cref cmd_jobs
254

    
255
-- | Select the target node group.
256
selectGroup :: Options -> Group.List -> Node.List -> Instance.List
257
            -> IO (String, (Node.List, Instance.List))
258
selectGroup opts gl nlf ilf = do
259
  let ngroups = Cluster.splitCluster nlf ilf
260
  when (length ngroups > 1 && isNothing (optGroup opts)) $ do
261
    hPutStrLn stderr "Found multiple node groups:"
262
    mapM_ (hPutStrLn stderr . ("  " ++) . Group.name .
263
           flip Container.find gl . fst) ngroups
264
    hPutStrLn stderr "Aborting."
265
    exitWith $ ExitFailure 1
266

    
267
  case optGroup opts of
268
    Nothing -> do
269
      let (gidx, cdata) = head ngroups
270
          grp = Container.find gidx gl
271
      return (Group.name grp, cdata)
272
    Just g -> case Container.findByName gl g of
273
      Nothing -> do
274
        hPutStrLn stderr $ "Node group " ++ g ++
275
          " not found. Node group list is:"
276
        mapM_ (hPutStrLn stderr . ("  " ++) . Group.name ) (Container.elems gl)
277
        hPutStrLn stderr "Aborting."
278
        exitWith $ ExitFailure 1
279
      Just grp ->
280
          case lookup (Group.idx grp) ngroups of
281
            Nothing ->
282
              -- This will only happen if there are no nodes assigned
283
              -- to this group
284
              return (Group.name grp, (Container.empty, Container.empty))
285
            Just cdata -> return (Group.name grp, cdata)
286

    
287
-- | Do a few checks on the cluster data.
288
checkCluster :: Int -> Node.List -> Instance.List -> IO ()
289
checkCluster verbose nl il = do
290
  -- nothing to do on an empty cluster
291
  when (Container.null il) $ do
292
         printf "Cluster is empty, exiting.\n"::IO ()
293
         exitWith ExitSuccess
294

    
295
  -- hbal doesn't currently handle split clusters
296
  let split_insts = Cluster.findSplitInstances nl il
297
  unless (null split_insts) $ do
298
    hPutStrLn stderr "Found instances belonging to multiple node groups:"
299
    mapM_ (\i -> hPutStrLn stderr $ "  " ++ Instance.name i) split_insts
300
    hPutStrLn stderr "Aborting."
301
    exitWith $ ExitFailure 1
302

    
303
  printf "Loaded %d nodes, %d instances\n"
304
             (Container.size nl)
305
             (Container.size il)::IO ()
306

    
307
  let csf = commonSuffix nl il
308
  when (not (null csf) && verbose > 1) $
309
       printf "Note: Stripping common suffix of '%s' from names\n" csf
310

    
311
-- | Do a few checks on the selected group data.
312
checkGroup :: Int -> String -> Node.List -> Instance.List -> IO ()
313
checkGroup verbose gname nl il = do
314
  printf "Group size %d nodes, %d instances\n"
315
             (Container.size nl)
316
             (Container.size il)::IO ()
317

    
318
  putStrLn $ "Selected node group: " ++ gname
319

    
320
  let (bad_nodes, bad_instances) = Cluster.computeBadItems nl il
321
  unless (verbose == 0) $ printf
322
             "Initial check done: %d bad nodes, %d bad instances.\n"
323
             (length bad_nodes) (length bad_instances)
324

    
325
  when (length bad_nodes > 0) $
326
         putStrLn "Cluster is not N+1 happy, continuing but no guarantee \
327
                  \that the cluster will end N+1 happy."
328

    
329
-- | Check that we actually need to rebalance.
330
checkNeedRebalance :: Options -> Score -> IO ()
331
checkNeedRebalance opts ini_cv = do
332
  let min_cv = optMinScore opts
333
  when (ini_cv < min_cv) $ do
334
         printf "Cluster is already well balanced (initial score %.6g,\n\
335
                \minimum score %.6g).\nNothing to do, exiting\n"
336
                ini_cv min_cv:: IO ()
337
         exitWith ExitSuccess
338

    
339
-- | Main function.
340
main :: Options -> [String] -> IO ()
341
main opts args = do
342
  unless (null args) $ do
343
         hPutStrLn stderr "Error: this program doesn't take any arguments."
344
         exitWith $ ExitFailure 1
345

    
346
  let verbose = optVerbose opts
347
      shownodes = optShowNodes opts
348
      showinsts = optShowInsts opts
349

    
350
  ini_cdata@(ClusterData gl fixed_nl ilf ctags ipol) <- loadExternalData opts
351

    
352
  when (verbose > 1) $ do
353
       putStrLn $ "Loaded cluster tags: " ++ intercalate "," ctags
354
       putStrLn $ "Loaded cluster ipolicy: " ++ show ipol
355

    
356
  nlf <- setNodeStatus opts fixed_nl
357
  checkCluster verbose nlf ilf
358

    
359
  maybeSaveData (optSaveCluster opts) "original" "before balancing" ini_cdata
360

    
361
  (gname, (nl, il)) <- selectGroup opts gl nlf ilf
362

    
363
  checkGroup verbose gname nl il
364

    
365
  maybePrintInsts showinsts "Initial" (Cluster.printInsts nl il)
366

    
367
  maybePrintNodes shownodes "Initial cluster" (Cluster.printNodes nl)
368

    
369
  let ini_cv = Cluster.compCV nl
370
      ini_tbl = Cluster.Table nl il ini_cv []
371
      min_cv = optMinScore opts
372

    
373
  checkNeedRebalance opts ini_cv
374

    
375
  if verbose > 2
376
    then printf "Initial coefficients: overall %.8f\n%s"
377
           ini_cv (Cluster.printStats "  " nl)::IO ()
378
    else printf "Initial score: %.8f\n" ini_cv
379

    
380
  putStrLn "Trying to minimize the CV..."
381
  let imlen = maximum . map (length . Instance.alias) $ Container.elems il
382
      nmlen = maximum . map (length . Node.alias) $ Container.elems nl
383

    
384
  (fin_tbl, cmd_strs) <- iterateDepth ini_tbl (optMaxLength opts)
385
                         (optDiskMoves opts)
386
                         (optInstMoves opts)
387
                         nmlen imlen [] min_cv
388
                         (optMinGainLim opts) (optMinGain opts)
389
                         (optEvacMode opts)
390
  let (Cluster.Table fin_nl fin_il fin_cv fin_plc) = fin_tbl
391
      ord_plc = reverse fin_plc
392
      sol_msg = case () of
393
                  _ | null fin_plc -> printf "No solution found\n"
394
                    | verbose > 2 ->
395
                        printf "Final coefficients:   overall %.8f\n%s"
396
                        fin_cv (Cluster.printStats "  " fin_nl)
397
                    | otherwise ->
398
                        printf "Cluster score improved from %.8f to %.8f\n"
399
                        ini_cv fin_cv ::String
400

    
401
  putStr sol_msg
402

    
403
  unless (verbose == 0) $
404
         printf "Solution length=%d\n" (length ord_plc)
405

    
406
  let cmd_jobs = Cluster.splitJobs cmd_strs
407

    
408
  when (isJust $ optShowCmds opts) $
409
       saveBalanceCommands opts $ Cluster.formatCmds cmd_jobs
410

    
411
  maybeSaveData (optSaveCluster opts) "balanced" "after balancing"
412
                ini_cdata { cdNodes = fin_nl, cdInstances = fin_il }
413

    
414
  maybePrintInsts showinsts "Final" (Cluster.printInsts fin_nl fin_il)
415

    
416
  maybePrintNodes shownodes "Final cluster" (Cluster.printNodes fin_nl)
417

    
418
  when (verbose > 3) $ printStats nl fin_nl
419

    
420
  eval <- maybeExecJobs opts ord_plc fin_nl il cmd_jobs
421
  unless eval (exitWith (ExitFailure 1))