Statistics
| Branch: | Tag: | Revision:

root / htools / Ganeti / HTools / Program / Hbal.hs @ 22278fa7

History | View | Annotate | Download (15 kB)

1
{-| Cluster rebalancer.
2

    
3
-}
4

    
5
{-
6

    
7
Copyright (C) 2009, 2010, 2011, 2012 Google Inc.
8

    
9
This program is free software; you can redistribute it and/or modify
10
it under the terms of the GNU General Public License as published by
11
the Free Software Foundation; either version 2 of the License, or
12
(at your option) any later version.
13

    
14
This program is distributed in the hope that it will be useful, but
15
WITHOUT ANY WARRANTY; without even the implied warranty of
16
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17
General Public License for more details.
18

    
19
You should have received a copy of the GNU General Public License
20
along with this program; if not, write to the Free Software
21
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22
02110-1301, USA.
23

    
24
-}
25

    
26
module Ganeti.HTools.Program.Hbal
27
  ( main
28
  , options
29
  , arguments
30
  , iterateDepth
31
  ) where
32

    
33
import Control.Concurrent (threadDelay)
34
import Control.Exception (bracket)
35
import Control.Monad
36
import Data.List
37
import Data.Maybe (isJust, isNothing, fromJust)
38
import Data.IORef
39
import System.Exit
40
import System.IO
41
import System.Posix.Process
42
import System.Posix.Signals
43

    
44
import Text.Printf (printf, hPrintf)
45

    
46
import qualified Ganeti.HTools.Container as Container
47
import qualified Ganeti.HTools.Cluster as Cluster
48
import qualified Ganeti.HTools.Group as Group
49
import qualified Ganeti.HTools.Node as Node
50
import qualified Ganeti.HTools.Instance as Instance
51

    
52
import Ganeti.Common
53
import Ganeti.HTools.CLI
54
import Ganeti.HTools.ExtLoader
55
import Ganeti.HTools.Types
56
import Ganeti.HTools.Loader
57
import Ganeti.Utils
58

    
59
import qualified Ganeti.Luxi as L
60
import Ganeti.Jobs
61

    
62
-- | Options list and functions.
63
options :: [OptType]
64
options =
65
  [ oPrintNodes
66
  , oPrintInsts
67
  , oPrintCommands
68
  , oDataFile
69
  , oEvacMode
70
  , oRapiMaster
71
  , oLuxiSocket
72
  , oIAllocSrc
73
  , oExecJobs
74
  , oGroup
75
  , oMaxSolLength
76
  , oVerbose
77
  , oQuiet
78
  , oOfflineNode
79
  , oMinScore
80
  , oMaxCpu
81
  , oMinDisk
82
  , oMinGain
83
  , oMinGainLim
84
  , oDiskMoves
85
  , oSelInst
86
  , oInstMoves
87
  , oDynuFile
88
  , oExTags
89
  , oExInst
90
  , oSaveCluster
91
  ]
92

    
93
-- | The list of arguments supported by the program.
94
arguments :: [ArgCompletion]
95
arguments = []
96

    
97
{- | Start computing the solution at the given depth and recurse until
98
we find a valid solution or we exceed the maximum depth.
99

    
100
-}
101
iterateDepth :: Bool             -- ^ Whether to print moves
102
             -> Cluster.Table    -- ^ The starting table
103
             -> Int              -- ^ Remaining length
104
             -> Bool             -- ^ Allow disk moves
105
             -> Bool             -- ^ Allow instance moves
106
             -> Int              -- ^ Max node name len
107
             -> Int              -- ^ Max instance name len
108
             -> [MoveJob]        -- ^ Current command list
109
             -> Score            -- ^ Score at which to stop
110
             -> Score            -- ^ Min gain limit
111
             -> Score            -- ^ Min score gain
112
             -> Bool             -- ^ Enable evacuation mode
113
             -> IO (Cluster.Table, [MoveJob]) -- ^ The resulting table
114
                                              -- and commands
115
iterateDepth printmove ini_tbl max_rounds disk_moves inst_moves nmlen imlen
116
             cmd_strs min_score mg_limit min_gain evac_mode =
117
  let Cluster.Table ini_nl ini_il _ _ = ini_tbl
118
      allowed_next = Cluster.doNextBalance ini_tbl max_rounds min_score
119
      m_fin_tbl = if allowed_next
120
                    then Cluster.tryBalance ini_tbl disk_moves inst_moves
121
                         evac_mode mg_limit min_gain
122
                    else Nothing
123
  in case m_fin_tbl of
124
       Just fin_tbl ->
125
         do
126
           let (Cluster.Table _ _ _ fin_plc) = fin_tbl
127
               fin_plc_len = length fin_plc
128
               cur_plc@(idx, _, _, move, _) = head fin_plc
129
               (sol_line, cmds) = Cluster.printSolutionLine ini_nl ini_il
130
                                  nmlen imlen cur_plc fin_plc_len
131
               afn = Cluster.involvedNodes ini_il cur_plc
132
               upd_cmd_strs = (afn, idx, move, cmds):cmd_strs
133
           when printmove $ do
134
               putStrLn sol_line
135
               hFlush stdout
136
           iterateDepth printmove fin_tbl max_rounds disk_moves inst_moves
137
                        nmlen imlen upd_cmd_strs min_score
138
                        mg_limit min_gain evac_mode
139
       Nothing -> return (ini_tbl, cmd_strs)
140

    
141
-- | Displays the cluster stats.
142
printStats :: Node.List -> Node.List -> IO ()
143
printStats ini_nl fin_nl = do
144
  let ini_cs = Cluster.totalResources ini_nl
145
      fin_cs = Cluster.totalResources fin_nl
146
  printf "Original: mem=%d disk=%d\n"
147
             (Cluster.csFmem ini_cs) (Cluster.csFdsk ini_cs) :: IO ()
148
  printf "Final:    mem=%d disk=%d\n"
149
             (Cluster.csFmem fin_cs) (Cluster.csFdsk fin_cs)
150

    
151
-- | Saves the rebalance commands to a text file.
152
saveBalanceCommands :: Options -> String -> IO ()
153
saveBalanceCommands opts cmd_data = do
154
  let out_path = fromJust $ optShowCmds opts
155
  putStrLn ""
156
  if out_path == "-"
157
    then printf "Commands to run to reach the above solution:\n%s"
158
           (unlines . map ("  " ++) .
159
            filter (/= "  check") .
160
            lines $ cmd_data)
161
    else do
162
      writeFile out_path (shTemplate ++ cmd_data)
163
      printf "The commands have been written to file '%s'\n" out_path
164

    
165
-- | Polls a set of jobs at a fixed interval until all are finished
166
-- one way or another.
167
waitForJobs :: L.Client -> [L.JobId] -> IO (Result [JobStatus])
168
waitForJobs client jids = do
169
  sts <- L.queryJobsStatus client jids
170
  case sts of
171
    Bad x -> return $ Bad x
172
    Ok s -> if any (<= JOB_STATUS_RUNNING) s
173
            then do
174
              -- TODO: replace hardcoded value with a better thing
175
              threadDelay (1000000 * 15)
176
              waitForJobs client jids
177
            else return $ Ok s
178

    
179
-- | Check that a set of job statuses is all success.
180
checkJobsStatus :: [JobStatus] -> Bool
181
checkJobsStatus = all (== JOB_STATUS_SUCCESS)
182

    
183
-- | Wrapper over execJobSet checking for early termination.
184
execWrapper :: String -> Node.List
185
            -> Instance.List -> IORef Int -> [JobSet] -> IO Bool
186
execWrapper _      _  _  _    [] = return True
187
execWrapper master nl il cref alljss = do
188
  cancel <- readIORef cref
189
  if cancel > 0
190
    then do
191
      hPrintf stderr "Exiting early due to user request, %d\
192
                     \ jobset(s) remaining." (length alljss)::IO ()
193
      return True
194
    else execJobSet master nl il cref alljss
195

    
196
-- | Execute an entire jobset.
197
execJobSet :: String -> Node.List
198
           -> Instance.List -> IORef Int -> [JobSet] -> IO Bool
199
execJobSet _      _  _  _    [] = return True
200
execJobSet master nl il cref (js:jss) = do
201
  -- map from jobset (htools list of positions) to [[opcodes]]
202
  let jobs = map (\(_, idx, move, _) ->
203
                      Cluster.iMoveToJob nl il idx move) js
204
  let descr = map (\(_, idx, _, _) -> Container.nameOf il idx) js
205
  putStrLn $ "Executing jobset for instances " ++ commaJoin descr
206
  jrs <- bracket (L.getClient master) L.closeClient
207
         (\client -> do
208
            jids <- L.submitManyJobs client jobs
209
            case jids of
210
              Bad x -> return $ Bad x
211
              Ok x -> do
212
                putStrLn $ "Got job IDs " ++ commaJoin (map show x)
213
                waitForJobs client x
214
         )
215
  case jrs of
216
    Bad x -> do
217
      hPutStrLn stderr $ "Cannot compute job status, aborting: " ++ show x
218
      return False
219
    Ok x -> if checkJobsStatus x
220
              then execWrapper master nl il cref jss
221
              else do
222
                hPutStrLn stderr $ "Not all jobs completed successfully: " ++
223
                          show x
224
                hPutStrLn stderr "Aborting."
225
                return False
226

    
227
-- | Executes the jobs, if possible and desired.
228
maybeExecJobs :: Options
229
              -> [a]
230
              -> Node.List
231
              -> Instance.List
232
              -> [JobSet]
233
              -> IO Bool
234
maybeExecJobs opts ord_plc fin_nl il cmd_jobs =
235
  if optExecJobs opts && not (null ord_plc)
236
    then (case optLuxi opts of
237
            Nothing -> do
238
              hPutStrLn stderr "Execution of commands possible only on LUXI"
239
              return False
240
            Just master -> runJobSet master fin_nl il cmd_jobs)
241
    else return True
242

    
243
-- | Signal handler for graceful termination.
244
hangleSigInt :: IORef Int -> IO ()
245
hangleSigInt cref = do
246
  writeIORef cref 1
247
  putStrLn ("Cancel request registered, will exit at" ++
248
            " the end of the current job set...")
249

    
250
-- | Signal handler for immediate termination.
251
hangleSigTerm :: IORef Int -> IO ()
252
hangleSigTerm cref = do
253
  -- update the cref to 2, just for consistency
254
  writeIORef cref 2
255
  putStrLn "Double cancel request, exiting now..."
256
  exitImmediately $ ExitFailure 2
257

    
258
-- | Runs a job set with handling of signals.
259
runJobSet :: String -> Node.List -> Instance.List -> [JobSet] -> IO Bool
260
runJobSet master fin_nl il cmd_jobs = do
261
  cref <- newIORef 0
262
  mapM_ (\(hnd, sig) -> installHandler sig (Catch (hnd cref)) Nothing)
263
    [(hangleSigTerm, softwareTermination), (hangleSigInt, keyboardSignal)]
264
  execWrapper master fin_nl il cref cmd_jobs
265

    
266
-- | Select the target node group.
267
selectGroup :: Options -> Group.List -> Node.List -> Instance.List
268
            -> IO (String, (Node.List, Instance.List))
269
selectGroup opts gl nlf ilf = do
270
  let ngroups = Cluster.splitCluster nlf ilf
271
  when (length ngroups > 1 && isNothing (optGroup opts)) $ do
272
    hPutStrLn stderr "Found multiple node groups:"
273
    mapM_ (hPutStrLn stderr . ("  " ++) . Group.name .
274
           flip Container.find gl . fst) ngroups
275
    hPutStrLn stderr "Aborting."
276
    exitWith $ ExitFailure 1
277

    
278
  case optGroup opts of
279
    Nothing -> do
280
      let (gidx, cdata) = head ngroups
281
          grp = Container.find gidx gl
282
      return (Group.name grp, cdata)
283
    Just g -> case Container.findByName gl g of
284
      Nothing -> do
285
        hPutStrLn stderr $ "Node group " ++ g ++
286
          " not found. Node group list is:"
287
        mapM_ (hPutStrLn stderr . ("  " ++) . Group.name ) (Container.elems gl)
288
        hPutStrLn stderr "Aborting."
289
        exitWith $ ExitFailure 1
290
      Just grp ->
291
          case lookup (Group.idx grp) ngroups of
292
            Nothing ->
293
              -- This will only happen if there are no nodes assigned
294
              -- to this group
295
              return (Group.name grp, (Container.empty, Container.empty))
296
            Just cdata -> return (Group.name grp, cdata)
297

    
298
-- | Do a few checks on the cluster data.
299
checkCluster :: Int -> Node.List -> Instance.List -> IO ()
300
checkCluster verbose nl il = do
301
  -- nothing to do on an empty cluster
302
  when (Container.null il) $ do
303
         printf "Cluster is empty, exiting.\n"::IO ()
304
         exitSuccess
305

    
306
  -- hbal doesn't currently handle split clusters
307
  let split_insts = Cluster.findSplitInstances nl il
308
  unless (null split_insts || verbose <= 1) $ do
309
    hPutStrLn stderr "Found instances belonging to multiple node groups:"
310
    mapM_ (\i -> hPutStrLn stderr $ "  " ++ Instance.name i) split_insts
311
    hPutStrLn stderr "These instances will not be moved."
312

    
313
  printf "Loaded %d nodes, %d instances\n"
314
             (Container.size nl)
315
             (Container.size il)::IO ()
316

    
317
  let csf = commonSuffix nl il
318
  when (not (null csf) && verbose > 1) $
319
       printf "Note: Stripping common suffix of '%s' from names\n" csf
320

    
321
-- | Do a few checks on the selected group data.
322
checkGroup :: Int -> String -> Node.List -> Instance.List -> IO ()
323
checkGroup verbose gname nl il = do
324
  printf "Group size %d nodes, %d instances\n"
325
             (Container.size nl)
326
             (Container.size il)::IO ()
327

    
328
  putStrLn $ "Selected node group: " ++ gname
329

    
330
  let (bad_nodes, bad_instances) = Cluster.computeBadItems nl il
331
  unless (verbose == 0) $ printf
332
             "Initial check done: %d bad nodes, %d bad instances.\n"
333
             (length bad_nodes) (length bad_instances)
334

    
335
  unless (null bad_nodes) $
336
         putStrLn "Cluster is not N+1 happy, continuing but no guarantee \
337
                  \that the cluster will end N+1 happy."
338

    
339
-- | Check that we actually need to rebalance.
340
checkNeedRebalance :: Options -> Score -> IO ()
341
checkNeedRebalance opts ini_cv = do
342
  let min_cv = optMinScore opts
343
  when (ini_cv < min_cv) $ do
344
         printf "Cluster is already well balanced (initial score %.6g,\n\
345
                \minimum score %.6g).\nNothing to do, exiting\n"
346
                ini_cv min_cv:: IO ()
347
         exitSuccess
348

    
349
-- | Main function.
350
main :: Options -> [String] -> IO ()
351
main opts args = do
352
  unless (null args) $ do
353
         hPutStrLn stderr "Error: this program doesn't take any arguments."
354
         exitWith $ ExitFailure 1
355

    
356
  let verbose = optVerbose opts
357
      shownodes = optShowNodes opts
358
      showinsts = optShowInsts opts
359

    
360
  ini_cdata@(ClusterData gl fixed_nl ilf ctags ipol) <- loadExternalData opts
361

    
362
  when (verbose > 1) $ do
363
       putStrLn $ "Loaded cluster tags: " ++ intercalate "," ctags
364
       putStrLn $ "Loaded cluster ipolicy: " ++ show ipol
365

    
366
  nlf <- setNodeStatus opts fixed_nl
367
  checkCluster verbose nlf ilf
368

    
369
  maybeSaveData (optSaveCluster opts) "original" "before balancing" ini_cdata
370

    
371
  (gname, (nl, il)) <- selectGroup opts gl nlf ilf
372

    
373
  checkGroup verbose gname nl il
374

    
375
  maybePrintInsts showinsts "Initial" (Cluster.printInsts nl il)
376

    
377
  maybePrintNodes shownodes "Initial cluster" (Cluster.printNodes nl)
378

    
379
  let ini_cv = Cluster.compCV nl
380
      ini_tbl = Cluster.Table nl il ini_cv []
381
      min_cv = optMinScore opts
382

    
383
  checkNeedRebalance opts ini_cv
384

    
385
  if verbose > 2
386
    then printf "Initial coefficients: overall %.8f\n%s"
387
           ini_cv (Cluster.printStats "  " nl)::IO ()
388
    else printf "Initial score: %.8f\n" ini_cv
389

    
390
  putStrLn "Trying to minimize the CV..."
391
  let imlen = maximum . map (length . Instance.alias) $ Container.elems il
392
      nmlen = maximum . map (length . Node.alias) $ Container.elems nl
393

    
394
  (fin_tbl, cmd_strs) <- iterateDepth True ini_tbl (optMaxLength opts)
395
                         (optDiskMoves opts)
396
                         (optInstMoves opts)
397
                         nmlen imlen [] min_cv
398
                         (optMinGainLim opts) (optMinGain opts)
399
                         (optEvacMode opts)
400
  let (Cluster.Table fin_nl fin_il fin_cv fin_plc) = fin_tbl
401
      ord_plc = reverse fin_plc
402
      sol_msg = case () of
403
                  _ | null fin_plc -> printf "No solution found\n"
404
                    | verbose > 2 ->
405
                        printf "Final coefficients:   overall %.8f\n%s"
406
                        fin_cv (Cluster.printStats "  " fin_nl)
407
                    | otherwise ->
408
                        printf "Cluster score improved from %.8f to %.8f\n"
409
                        ini_cv fin_cv ::String
410

    
411
  putStr sol_msg
412

    
413
  unless (verbose == 0) $
414
         printf "Solution length=%d\n" (length ord_plc)
415

    
416
  let cmd_jobs = Cluster.splitJobs cmd_strs
417

    
418
  when (isJust $ optShowCmds opts) .
419
       saveBalanceCommands opts $ Cluster.formatCmds cmd_jobs
420

    
421
  maybeSaveData (optSaveCluster opts) "balanced" "after balancing"
422
                ini_cdata { cdNodes = fin_nl, cdInstances = fin_il }
423

    
424
  maybePrintInsts showinsts "Final" (Cluster.printInsts fin_nl fin_il)
425

    
426
  maybePrintNodes shownodes "Final cluster" (Cluster.printNodes fin_nl)
427

    
428
  when (verbose > 3) $ printStats nl fin_nl
429

    
430
  eval <- maybeExecJobs opts ord_plc fin_nl il cmd_jobs
431
  unless eval (exitWith (ExitFailure 1))