Statistics
| Branch: | Tag: | Revision:

root / htools / Ganeti / HTools / Program / Hbal.hs @ d66aa238

History | View | Annotate | Download (15 kB)

1
{-| Cluster rebalancer.
2

    
3
-}
4

    
5
{-
6

    
7
Copyright (C) 2009, 2010, 2011, 2012 Google Inc.
8

    
9
This program is free software; you can redistribute it and/or modify
10
it under the terms of the GNU General Public License as published by
11
the Free Software Foundation; either version 2 of the License, or
12
(at your option) any later version.
13

    
14
This program is distributed in the hope that it will be useful, but
15
WITHOUT ANY WARRANTY; without even the implied warranty of
16
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17
General Public License for more details.
18

    
19
You should have received a copy of the GNU General Public License
20
along with this program; if not, write to the Free Software
21
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22
02110-1301, USA.
23

    
24
-}
25

    
26
module Ganeti.HTools.Program.Hbal
27
  ( main
28
  , options
29
  , arguments
30
  , iterateDepth
31
  ) where
32

    
33
import Control.Concurrent (threadDelay)
34
import Control.Exception (bracket)
35
import Control.Monad
36
import Data.List
37
import Data.Maybe (isJust, isNothing, fromJust)
38
import Data.IORef
39
import System.Exit
40
import System.IO
41
import System.Posix.Process
42
import System.Posix.Signals
43

    
44
import Text.Printf (printf, hPrintf)
45

    
46
import qualified Ganeti.HTools.Container as Container
47
import qualified Ganeti.HTools.Cluster as Cluster
48
import qualified Ganeti.HTools.Group as Group
49
import qualified Ganeti.HTools.Node as Node
50
import qualified Ganeti.HTools.Instance as Instance
51

    
52
import Ganeti.BasicTypes
53
import Ganeti.Common
54
import Ganeti.Errors
55
import Ganeti.HTools.CLI
56
import Ganeti.HTools.ExtLoader
57
import Ganeti.HTools.Types
58
import Ganeti.HTools.Loader
59
import Ganeti.Utils
60

    
61
import qualified Ganeti.Luxi as L
62
import Ganeti.Jobs
63

    
64
-- | Options list and functions.
65
options :: IO [OptType]
66
options =
67
  return
68
    [ oPrintNodes
69
    , oPrintInsts
70
    , oPrintCommands
71
    , oDataFile
72
    , oEvacMode
73
    , oRapiMaster
74
    , oLuxiSocket
75
    , oIAllocSrc
76
    , oExecJobs
77
    , oGroup
78
    , oMaxSolLength
79
    , oVerbose
80
    , oQuiet
81
    , oOfflineNode
82
    , oMinScore
83
    , oMaxCpu
84
    , oMinDisk
85
    , oMinGain
86
    , oMinGainLim
87
    , oDiskMoves
88
    , oSelInst
89
    , oInstMoves
90
    , oDynuFile
91
    , oExTags
92
    , oExInst
93
    , oSaveCluster
94
    ]
95

    
96
-- | The list of arguments supported by the program.
97
arguments :: [ArgCompletion]
98
arguments = []
99

    
100
{- | Start computing the solution at the given depth and recurse until
101
we find a valid solution or we exceed the maximum depth.
102

    
103
-}
104
iterateDepth :: Bool             -- ^ Whether to print moves
105
             -> Cluster.Table    -- ^ The starting table
106
             -> Int              -- ^ Remaining length
107
             -> Bool             -- ^ Allow disk moves
108
             -> Bool             -- ^ Allow instance moves
109
             -> Int              -- ^ Max node name len
110
             -> Int              -- ^ Max instance name len
111
             -> [MoveJob]        -- ^ Current command list
112
             -> Score            -- ^ Score at which to stop
113
             -> Score            -- ^ Min gain limit
114
             -> Score            -- ^ Min score gain
115
             -> Bool             -- ^ Enable evacuation mode
116
             -> IO (Cluster.Table, [MoveJob]) -- ^ The resulting table
117
                                              -- and commands
118
iterateDepth printmove ini_tbl max_rounds disk_moves inst_moves nmlen imlen
119
             cmd_strs min_score mg_limit min_gain evac_mode =
120
  let Cluster.Table ini_nl ini_il _ _ = ini_tbl
121
      allowed_next = Cluster.doNextBalance ini_tbl max_rounds min_score
122
      m_fin_tbl = if allowed_next
123
                    then Cluster.tryBalance ini_tbl disk_moves inst_moves
124
                         evac_mode mg_limit min_gain
125
                    else Nothing
126
  in case m_fin_tbl of
127
       Just fin_tbl ->
128
         do
129
           let (Cluster.Table _ _ _ fin_plc) = fin_tbl
130
               fin_plc_len = length fin_plc
131
               cur_plc@(idx, _, _, move, _) = head fin_plc
132
               (sol_line, cmds) = Cluster.printSolutionLine ini_nl ini_il
133
                                  nmlen imlen cur_plc fin_plc_len
134
               afn = Cluster.involvedNodes ini_il cur_plc
135
               upd_cmd_strs = (afn, idx, move, cmds):cmd_strs
136
           when printmove $ do
137
               putStrLn sol_line
138
               hFlush stdout
139
           iterateDepth printmove fin_tbl max_rounds disk_moves inst_moves
140
                        nmlen imlen upd_cmd_strs min_score
141
                        mg_limit min_gain evac_mode
142
       Nothing -> return (ini_tbl, cmd_strs)
143

    
144
-- | Displays the cluster stats.
145
printStats :: Node.List -> Node.List -> IO ()
146
printStats ini_nl fin_nl = do
147
  let ini_cs = Cluster.totalResources ini_nl
148
      fin_cs = Cluster.totalResources fin_nl
149
  printf "Original: mem=%d disk=%d\n"
150
             (Cluster.csFmem ini_cs) (Cluster.csFdsk ini_cs) :: IO ()
151
  printf "Final:    mem=%d disk=%d\n"
152
             (Cluster.csFmem fin_cs) (Cluster.csFdsk fin_cs)
153

    
154
-- | Saves the rebalance commands to a text file.
155
saveBalanceCommands :: Options -> String -> IO ()
156
saveBalanceCommands opts cmd_data = do
157
  let out_path = fromJust $ optShowCmds opts
158
  putStrLn ""
159
  if out_path == "-"
160
    then printf "Commands to run to reach the above solution:\n%s"
161
           (unlines . map ("  " ++) .
162
            filter (/= "  check") .
163
            lines $ cmd_data)
164
    else do
165
      writeFile out_path (shTemplate ++ cmd_data)
166
      printf "The commands have been written to file '%s'\n" out_path
167

    
168
-- | Polls a set of jobs at a fixed interval until all are finished
169
-- one way or another.
170
waitForJobs :: L.Client -> [L.JobId] -> IO (Result [JobStatus])
171
waitForJobs client jids = do
172
  sts <- L.queryJobsStatus client jids
173
  case sts of
174
    Bad e -> return . Bad $ "Checking job status: " ++ formatError e
175
    Ok s -> if any (<= JOB_STATUS_RUNNING) s
176
            then do
177
              -- TODO: replace hardcoded value with a better thing
178
              threadDelay (1000000 * 15)
179
              waitForJobs client jids
180
            else return $ Ok s
181

    
182
-- | Check that a set of job statuses is all success.
183
checkJobsStatus :: [JobStatus] -> Bool
184
checkJobsStatus = all (== JOB_STATUS_SUCCESS)
185

    
186
-- | Wrapper over execJobSet checking for early termination.
187
execWrapper :: String -> Node.List
188
            -> Instance.List -> IORef Int -> [JobSet] -> IO Bool
189
execWrapper _      _  _  _    [] = return True
190
execWrapper master nl il cref alljss = do
191
  cancel <- readIORef cref
192
  if cancel > 0
193
    then do
194
      hPrintf stderr "Exiting early due to user request, %d\
195
                     \ jobset(s) remaining." (length alljss)::IO ()
196
      return True
197
    else execJobSet master nl il cref alljss
198

    
199
-- | Execute an entire jobset.
200
execJobSet :: String -> Node.List
201
           -> Instance.List -> IORef Int -> [JobSet] -> IO Bool
202
execJobSet _      _  _  _    [] = return True
203
execJobSet master nl il cref (js:jss) = do
204
  -- map from jobset (htools list of positions) to [[opcodes]]
205
  let jobs = map (\(_, idx, move, _) ->
206
                      Cluster.iMoveToJob nl il idx move) js
207
  let descr = map (\(_, idx, _, _) -> Container.nameOf il idx) js
208
  putStrLn $ "Executing jobset for instances " ++ commaJoin descr
209
  jrs <- bracket (L.getClient master) L.closeClient
210
         (\client -> do
211
            jids <- L.submitManyJobs client jobs
212
            case jids of
213
              Bad e -> return . Bad $ "Job submission error: " ++ formatError e
214
              Ok x -> do
215
                putStrLn $ "Got job IDs " ++ commaJoin (map show x)
216
                waitForJobs client x
217
         )
218
  case jrs of
219
    Bad x -> do
220
      hPutStrLn stderr x
221
      return False
222
    Ok x -> if checkJobsStatus x
223
              then execWrapper master nl il cref jss
224
              else do
225
                hPutStrLn stderr $ "Not all jobs completed successfully: " ++
226
                          show x
227
                hPutStrLn stderr "Aborting."
228
                return False
229

    
230
-- | Executes the jobs, if possible and desired.
231
maybeExecJobs :: Options
232
              -> [a]
233
              -> Node.List
234
              -> Instance.List
235
              -> [JobSet]
236
              -> IO Bool
237
maybeExecJobs opts ord_plc fin_nl il cmd_jobs =
238
  if optExecJobs opts && not (null ord_plc)
239
    then (case optLuxi opts of
240
            Nothing -> do
241
              hPutStrLn stderr "Execution of commands possible only on LUXI"
242
              return False
243
            Just master -> runJobSet master fin_nl il cmd_jobs)
244
    else return True
245

    
246
-- | Signal handler for graceful termination.
247
hangleSigInt :: IORef Int -> IO ()
248
hangleSigInt cref = do
249
  writeIORef cref 1
250
  putStrLn ("Cancel request registered, will exit at" ++
251
            " the end of the current job set...")
252

    
253
-- | Signal handler for immediate termination.
254
hangleSigTerm :: IORef Int -> IO ()
255
hangleSigTerm cref = do
256
  -- update the cref to 2, just for consistency
257
  writeIORef cref 2
258
  putStrLn "Double cancel request, exiting now..."
259
  exitImmediately $ ExitFailure 2
260

    
261
-- | Runs a job set with handling of signals.
262
runJobSet :: String -> Node.List -> Instance.List -> [JobSet] -> IO Bool
263
runJobSet master fin_nl il cmd_jobs = do
264
  cref <- newIORef 0
265
  mapM_ (\(hnd, sig) -> installHandler sig (Catch (hnd cref)) Nothing)
266
    [(hangleSigTerm, softwareTermination), (hangleSigInt, keyboardSignal)]
267
  execWrapper master fin_nl il cref cmd_jobs
268

    
269
-- | Select the target node group.
270
selectGroup :: Options -> Group.List -> Node.List -> Instance.List
271
            -> IO (String, (Node.List, Instance.List))
272
selectGroup opts gl nlf ilf = do
273
  let ngroups = Cluster.splitCluster nlf ilf
274
  when (length ngroups > 1 && isNothing (optGroup opts)) $ do
275
    hPutStrLn stderr "Found multiple node groups:"
276
    mapM_ (hPutStrLn stderr . ("  " ++) . Group.name .
277
           flip Container.find gl . fst) ngroups
278
    exitErr "Aborting."
279

    
280
  case optGroup opts of
281
    Nothing -> do
282
      let (gidx, cdata) = head ngroups
283
          grp = Container.find gidx gl
284
      return (Group.name grp, cdata)
285
    Just g -> case Container.findByName gl g of
286
      Nothing -> do
287
        hPutStrLn stderr $ "Node group " ++ g ++
288
          " not found. Node group list is:"
289
        mapM_ (hPutStrLn stderr . ("  " ++) . Group.name ) (Container.elems gl)
290
        exitErr "Aborting."
291
      Just grp ->
292
          case lookup (Group.idx grp) ngroups of
293
            Nothing ->
294
              -- This will only happen if there are no nodes assigned
295
              -- to this group
296
              return (Group.name grp, (Container.empty, Container.empty))
297
            Just cdata -> return (Group.name grp, cdata)
298

    
299
-- | Do a few checks on the cluster data.
300
checkCluster :: Int -> Node.List -> Instance.List -> IO ()
301
checkCluster verbose nl il = do
302
  -- nothing to do on an empty cluster
303
  when (Container.null il) $ do
304
         printf "Cluster is empty, exiting.\n"::IO ()
305
         exitSuccess
306

    
307
  -- hbal doesn't currently handle split clusters
308
  let split_insts = Cluster.findSplitInstances nl il
309
  unless (null split_insts || verbose <= 1) $ do
310
    hPutStrLn stderr "Found instances belonging to multiple node groups:"
311
    mapM_ (\i -> hPutStrLn stderr $ "  " ++ Instance.name i) split_insts
312
    hPutStrLn stderr "These instances will not be moved."
313

    
314
  printf "Loaded %d nodes, %d instances\n"
315
             (Container.size nl)
316
             (Container.size il)::IO ()
317

    
318
  let csf = commonSuffix nl il
319
  when (not (null csf) && verbose > 1) $
320
       printf "Note: Stripping common suffix of '%s' from names\n" csf
321

    
322
-- | Do a few checks on the selected group data.
323
checkGroup :: Int -> String -> Node.List -> Instance.List -> IO ()
324
checkGroup verbose gname nl il = do
325
  printf "Group size %d nodes, %d instances\n"
326
             (Container.size nl)
327
             (Container.size il)::IO ()
328

    
329
  putStrLn $ "Selected node group: " ++ gname
330

    
331
  let (bad_nodes, bad_instances) = Cluster.computeBadItems nl il
332
  unless (verbose == 0) $ printf
333
             "Initial check done: %d bad nodes, %d bad instances.\n"
334
             (length bad_nodes) (length bad_instances)
335

    
336
  unless (null bad_nodes) $
337
         putStrLn "Cluster is not N+1 happy, continuing but no guarantee \
338
                  \that the cluster will end N+1 happy."
339

    
340
-- | Check that we actually need to rebalance.
341
checkNeedRebalance :: Options -> Score -> IO ()
342
checkNeedRebalance opts ini_cv = do
343
  let min_cv = optMinScore opts
344
  when (ini_cv < min_cv) $ do
345
         printf "Cluster is already well balanced (initial score %.6g,\n\
346
                \minimum score %.6g).\nNothing to do, exiting\n"
347
                ini_cv min_cv:: IO ()
348
         exitSuccess
349

    
350
-- | Main function.
351
main :: Options -> [String] -> IO ()
352
main opts args = do
353
  unless (null args) $ exitErr "This program doesn't take any arguments."
354

    
355
  let verbose = optVerbose opts
356
      shownodes = optShowNodes opts
357
      showinsts = optShowInsts opts
358

    
359
  ini_cdata@(ClusterData gl fixed_nl ilf ctags ipol) <- loadExternalData opts
360

    
361
  when (verbose > 1) $ do
362
       putStrLn $ "Loaded cluster tags: " ++ intercalate "," ctags
363
       putStrLn $ "Loaded cluster ipolicy: " ++ show ipol
364

    
365
  nlf <- setNodeStatus opts fixed_nl
366
  checkCluster verbose nlf ilf
367

    
368
  maybeSaveData (optSaveCluster opts) "original" "before balancing" ini_cdata
369

    
370
  (gname, (nl, il)) <- selectGroup opts gl nlf ilf
371

    
372
  checkGroup verbose gname nl il
373

    
374
  maybePrintInsts showinsts "Initial" (Cluster.printInsts nl il)
375

    
376
  maybePrintNodes shownodes "Initial cluster" (Cluster.printNodes nl)
377

    
378
  let ini_cv = Cluster.compCV nl
379
      ini_tbl = Cluster.Table nl il ini_cv []
380
      min_cv = optMinScore opts
381

    
382
  checkNeedRebalance opts ini_cv
383

    
384
  if verbose > 2
385
    then printf "Initial coefficients: overall %.8f\n%s"
386
           ini_cv (Cluster.printStats "  " nl)::IO ()
387
    else printf "Initial score: %.8f\n" ini_cv
388

    
389
  putStrLn "Trying to minimize the CV..."
390
  let imlen = maximum . map (length . Instance.alias) $ Container.elems il
391
      nmlen = maximum . map (length . Node.alias) $ Container.elems nl
392

    
393
  (fin_tbl, cmd_strs) <- iterateDepth True ini_tbl (optMaxLength opts)
394
                         (optDiskMoves opts)
395
                         (optInstMoves opts)
396
                         nmlen imlen [] min_cv
397
                         (optMinGainLim opts) (optMinGain opts)
398
                         (optEvacMode opts)
399
  let (Cluster.Table fin_nl fin_il fin_cv fin_plc) = fin_tbl
400
      ord_plc = reverse fin_plc
401
      sol_msg = case () of
402
                  _ | null fin_plc -> printf "No solution found\n"
403
                    | verbose > 2 ->
404
                        printf "Final coefficients:   overall %.8f\n%s"
405
                        fin_cv (Cluster.printStats "  " fin_nl)
406
                    | otherwise ->
407
                        printf "Cluster score improved from %.8f to %.8f\n"
408
                        ini_cv fin_cv ::String
409

    
410
  putStr sol_msg
411

    
412
  unless (verbose == 0) $
413
         printf "Solution length=%d\n" (length ord_plc)
414

    
415
  let cmd_jobs = Cluster.splitJobs cmd_strs
416

    
417
  when (isJust $ optShowCmds opts) .
418
       saveBalanceCommands opts $ Cluster.formatCmds cmd_jobs
419

    
420
  maybeSaveData (optSaveCluster opts) "balanced" "after balancing"
421
                ini_cdata { cdNodes = fin_nl, cdInstances = fin_il }
422

    
423
  maybePrintInsts showinsts "Final" (Cluster.printInsts fin_nl fin_il)
424

    
425
  maybePrintNodes shownodes "Final cluster" (Cluster.printNodes fin_nl)
426

    
427
  when (verbose > 3) $ printStats nl fin_nl
428

    
429
  eval <- maybeExecJobs opts ord_plc fin_nl il cmd_jobs
430
  unless eval (exitWith (ExitFailure 1))