Statistics
| Branch: | Tag: | Revision:

root / htools / Ganeti / HTools / Program / Hbal.hs @ 0547d608

History | View | Annotate | Download (14.8 kB)

1
{-| Cluster rebalancer.
2

    
3
-}
4

    
5
{-
6

    
7
Copyright (C) 2009, 2010, 2011, 2012 Google Inc.
8

    
9
This program is free software; you can redistribute it and/or modify
10
it under the terms of the GNU General Public License as published by
11
the Free Software Foundation; either version 2 of the License, or
12
(at your option) any later version.
13

    
14
This program is distributed in the hope that it will be useful, but
15
WITHOUT ANY WARRANTY; without even the implied warranty of
16
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17
General Public License for more details.
18

    
19
You should have received a copy of the GNU General Public License
20
along with this program; if not, write to the Free Software
21
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22
02110-1301, USA.
23

    
24
-}
25

    
26
module Ganeti.HTools.Program.Hbal (main, options) where
27

    
28
import Control.Concurrent (threadDelay)
29
import Control.Exception (bracket)
30
import Control.Monad
31
import Data.List
32
import Data.Maybe (isJust, isNothing, fromJust)
33
import Data.IORef
34
import System.Exit
35
import System.IO
36
import System.Posix.Process
37
import System.Posix.Signals
38

    
39
import Text.Printf (printf, hPrintf)
40

    
41
import qualified Ganeti.HTools.Container as Container
42
import qualified Ganeti.HTools.Cluster as Cluster
43
import qualified Ganeti.HTools.Group as Group
44
import qualified Ganeti.HTools.Node as Node
45
import qualified Ganeti.HTools.Instance as Instance
46

    
47
import Ganeti.HTools.CLI
48
import Ganeti.HTools.ExtLoader
49
import Ganeti.HTools.Utils
50
import Ganeti.HTools.Types
51
import Ganeti.HTools.Loader
52

    
53
import qualified Ganeti.Luxi as L
54
import Ganeti.Jobs
55

    
56
-- | Options list and functions.
57
options :: [OptType]
58
options =
59
  [ oPrintNodes
60
  , oPrintInsts
61
  , oPrintCommands
62
  , oDataFile
63
  , oEvacMode
64
  , oRapiMaster
65
  , oLuxiSocket
66
  , oIAllocSrc
67
  , oExecJobs
68
  , oGroup
69
  , oMaxSolLength
70
  , oVerbose
71
  , oQuiet
72
  , oOfflineNode
73
  , oMinScore
74
  , oMaxCpu
75
  , oMinDisk
76
  , oMinGain
77
  , oMinGainLim
78
  , oDiskMoves
79
  , oSelInst
80
  , oInstMoves
81
  , oDynuFile
82
  , oExTags
83
  , oExInst
84
  , oSaveCluster
85
  , oShowVer
86
  , oShowHelp
87
  ]
88

    
89
{- | Start computing the solution at the given depth and recurse until
90
we find a valid solution or we exceed the maximum depth.
91

    
92
-}
93
iterateDepth :: Cluster.Table    -- ^ The starting table
94
             -> Int              -- ^ Remaining length
95
             -> Bool             -- ^ Allow disk moves
96
             -> Bool             -- ^ Allow instance moves
97
             -> Int              -- ^ Max node name len
98
             -> Int              -- ^ Max instance name len
99
             -> [MoveJob]        -- ^ Current command list
100
             -> Score            -- ^ Score at which to stop
101
             -> Score            -- ^ Min gain limit
102
             -> Score            -- ^ Min score gain
103
             -> Bool             -- ^ Enable evacuation mode
104
             -> IO (Cluster.Table, [MoveJob]) -- ^ The resulting table
105
                                              -- and commands
106
iterateDepth ini_tbl max_rounds disk_moves inst_moves nmlen imlen
107
             cmd_strs min_score mg_limit min_gain evac_mode =
108
  let Cluster.Table ini_nl ini_il _ _ = ini_tbl
109
      allowed_next = Cluster.doNextBalance ini_tbl max_rounds min_score
110
      m_fin_tbl = if allowed_next
111
                    then Cluster.tryBalance ini_tbl disk_moves inst_moves
112
                         evac_mode mg_limit min_gain
113
                    else Nothing
114
  in case m_fin_tbl of
115
       Just fin_tbl ->
116
         do
117
           let (Cluster.Table _ _ _ fin_plc) = fin_tbl
118
               fin_plc_len = length fin_plc
119
               cur_plc@(idx, _, _, move, _) = head fin_plc
120
               (sol_line, cmds) = Cluster.printSolutionLine ini_nl ini_il
121
                                  nmlen imlen cur_plc fin_plc_len
122
               afn = Cluster.involvedNodes ini_il cur_plc
123
               upd_cmd_strs = (afn, idx, move, cmds):cmd_strs
124
           putStrLn sol_line
125
           hFlush stdout
126
           iterateDepth fin_tbl max_rounds disk_moves inst_moves
127
                        nmlen imlen upd_cmd_strs min_score
128
                        mg_limit min_gain evac_mode
129
       Nothing -> return (ini_tbl, cmd_strs)
130

    
131
-- | Displays the cluster stats.
132
printStats :: Node.List -> Node.List -> IO ()
133
printStats ini_nl fin_nl = do
134
  let ini_cs = Cluster.totalResources ini_nl
135
      fin_cs = Cluster.totalResources fin_nl
136
  printf "Original: mem=%d disk=%d\n"
137
             (Cluster.csFmem ini_cs) (Cluster.csFdsk ini_cs) :: IO ()
138
  printf "Final:    mem=%d disk=%d\n"
139
             (Cluster.csFmem fin_cs) (Cluster.csFdsk fin_cs)
140

    
141
-- | Saves the rebalance commands to a text file.
142
saveBalanceCommands :: Options -> String -> IO ()
143
saveBalanceCommands opts cmd_data = do
144
  let out_path = fromJust $ optShowCmds opts
145
  putStrLn ""
146
  if out_path == "-"
147
    then printf "Commands to run to reach the above solution:\n%s"
148
           (unlines . map ("  " ++) .
149
            filter (/= "  check") .
150
            lines $ cmd_data)
151
    else do
152
      writeFile out_path (shTemplate ++ cmd_data)
153
      printf "The commands have been written to file '%s'\n" out_path
154

    
155
-- | Polls a set of jobs at a fixed interval until all are finished
156
-- one way or another.
157
waitForJobs :: L.Client -> [String] -> IO (Result [JobStatus])
158
waitForJobs client jids = do
159
  sts <- L.queryJobsStatus client jids
160
  case sts of
161
    Bad x -> return $ Bad x
162
    Ok s -> if any (<= JOB_STATUS_RUNNING) s
163
            then do
164
              -- TODO: replace hardcoded value with a better thing
165
              threadDelay (1000000 * 15)
166
              waitForJobs client jids
167
            else return $ Ok s
168

    
169
-- | Check that a set of job statuses is all success.
170
checkJobsStatus :: [JobStatus] -> Bool
171
checkJobsStatus = all (== JOB_STATUS_SUCCESS)
172

    
173
-- | Wrapper over execJobSet checking for early termination.
174
execWrapper :: String -> Node.List
175
            -> Instance.List -> IORef Int -> [JobSet] -> IO Bool
176
execWrapper _      _  _  _    [] = return True
177
execWrapper master nl il cref alljss = do
178
  cancel <- readIORef cref
179
  if cancel > 0
180
    then do
181
      hPrintf stderr "Exiting early due to user request, %d\
182
                     \ jobset(s) remaining." (length alljss)::IO ()
183
      return False
184
    else execJobSet master nl il cref alljss
185

    
186
-- | Execute an entire jobset.
187
execJobSet :: String -> Node.List
188
           -> Instance.List -> IORef Int -> [JobSet] -> IO Bool
189
execJobSet _      _  _  _    [] = return True
190
execJobSet master nl il cref (js:jss) = do
191
  -- map from jobset (htools list of positions) to [[opcodes]]
192
  let jobs = map (\(_, idx, move, _) ->
193
                      Cluster.iMoveToJob nl il idx move) js
194
  let descr = map (\(_, idx, _, _) -> Container.nameOf il idx) js
195
  putStrLn $ "Executing jobset for instances " ++ commaJoin descr
196
  jrs <- bracket (L.getClient master) L.closeClient
197
         (\client -> do
198
            jids <- L.submitManyJobs client jobs
199
            case jids of
200
              Bad x -> return $ Bad x
201
              Ok x -> do
202
                putStrLn $ "Got job IDs " ++ commaJoin x
203
                waitForJobs client x
204
         )
205
  case jrs of
206
    Bad x -> do
207
      hPutStrLn stderr $ "Cannot compute job status, aborting: " ++ show x
208
      return False
209
    Ok x -> if checkJobsStatus x
210
              then execWrapper master nl il cref jss
211
              else do
212
                hPutStrLn stderr $ "Not all jobs completed successfully: " ++
213
                          show x
214
                hPutStrLn stderr "Aborting."
215
                return False
216

    
217
-- | Executes the jobs, if possible and desired.
218
maybeExecJobs :: Options
219
              -> [a]
220
              -> Node.List
221
              -> Instance.List
222
              -> [JobSet]
223
              -> IO Bool
224
maybeExecJobs opts ord_plc fin_nl il cmd_jobs =
225
  if optExecJobs opts && not (null ord_plc)
226
    then (case optLuxi opts of
227
            Nothing -> do
228
              hPutStrLn stderr "Execution of commands possible only on LUXI"
229
              return False
230
            Just master -> runJobSet master fin_nl il cmd_jobs)
231
    else return True
232

    
233
-- | Signal handler for graceful termination.
234
hangleSigInt :: IORef Int -> IO ()
235
hangleSigInt cref = do
236
  writeIORef cref 1
237
  putStrLn ("Cancel request registered, will exit at" ++
238
            " the end of the current job set...")
239

    
240
-- | Signal handler for immediate termination.
241
hangleSigTerm :: IORef Int -> IO ()
242
hangleSigTerm cref = do
243
  -- update the cref to 2, just for consistency
244
  writeIORef cref 2
245
  putStrLn "Double cancel request, exiting now..."
246
  exitImmediately $ ExitFailure 2
247

    
248
-- | Runs a job set with handling of signals.
249
runJobSet :: String -> Node.List -> Instance.List -> [JobSet] -> IO Bool
250
runJobSet master fin_nl il cmd_jobs = do
251
  cref <- newIORef 0
252
  mapM_ (\(hnd, sig) -> installHandler sig (Catch (hnd cref)) Nothing)
253
    [(hangleSigTerm, softwareTermination), (hangleSigInt, keyboardSignal)]
254
  execWrapper master fin_nl il cref cmd_jobs
255

    
256
-- | Select the target node group.
257
selectGroup :: Options -> Group.List -> Node.List -> Instance.List
258
            -> IO (String, (Node.List, Instance.List))
259
selectGroup opts gl nlf ilf = do
260
  let ngroups = Cluster.splitCluster nlf ilf
261
  when (length ngroups > 1 && isNothing (optGroup opts)) $ do
262
    hPutStrLn stderr "Found multiple node groups:"
263
    mapM_ (hPutStrLn stderr . ("  " ++) . Group.name .
264
           flip Container.find gl . fst) ngroups
265
    hPutStrLn stderr "Aborting."
266
    exitWith $ ExitFailure 1
267

    
268
  case optGroup opts of
269
    Nothing -> do
270
      let (gidx, cdata) = head ngroups
271
          grp = Container.find gidx gl
272
      return (Group.name grp, cdata)
273
    Just g -> case Container.findByName gl g of
274
      Nothing -> do
275
        hPutStrLn stderr $ "Node group " ++ g ++
276
          " not found. Node group list is:"
277
        mapM_ (hPutStrLn stderr . ("  " ++) . Group.name ) (Container.elems gl)
278
        hPutStrLn stderr "Aborting."
279
        exitWith $ ExitFailure 1
280
      Just grp ->
281
          case lookup (Group.idx grp) ngroups of
282
            Nothing ->
283
              -- This will only happen if there are no nodes assigned
284
              -- to this group
285
              return (Group.name grp, (Container.empty, Container.empty))
286
            Just cdata -> return (Group.name grp, cdata)
287

    
288
-- | Do a few checks on the cluster data.
289
checkCluster :: Int -> Node.List -> Instance.List -> IO ()
290
checkCluster verbose nl il = do
291
  -- nothing to do on an empty cluster
292
  when (Container.null il) $ do
293
         printf "Cluster is empty, exiting.\n"::IO ()
294
         exitWith ExitSuccess
295

    
296
  -- hbal doesn't currently handle split clusters
297
  let split_insts = Cluster.findSplitInstances nl il
298
  unless (null split_insts) $ do
299
    hPutStrLn stderr "Found instances belonging to multiple node groups:"
300
    mapM_ (\i -> hPutStrLn stderr $ "  " ++ Instance.name i) split_insts
301
    hPutStrLn stderr "Aborting."
302
    exitWith $ ExitFailure 1
303

    
304
  printf "Loaded %d nodes, %d instances\n"
305
             (Container.size nl)
306
             (Container.size il)::IO ()
307

    
308
  let csf = commonSuffix nl il
309
  when (not (null csf) && verbose > 1) $
310
       printf "Note: Stripping common suffix of '%s' from names\n" csf
311

    
312
-- | Do a few checks on the selected group data.
313
checkGroup :: Int -> String -> Node.List -> Instance.List -> IO ()
314
checkGroup verbose gname nl il = do
315
  printf "Group size %d nodes, %d instances\n"
316
             (Container.size nl)
317
             (Container.size il)::IO ()
318

    
319
  putStrLn $ "Selected node group: " ++ gname
320

    
321
  let (bad_nodes, bad_instances) = Cluster.computeBadItems nl il
322
  unless (verbose == 0) $ printf
323
             "Initial check done: %d bad nodes, %d bad instances.\n"
324
             (length bad_nodes) (length bad_instances)
325

    
326
  when (length bad_nodes > 0) $
327
         putStrLn "Cluster is not N+1 happy, continuing but no guarantee \
328
                  \that the cluster will end N+1 happy."
329

    
330
-- | Check that we actually need to rebalance.
331
checkNeedRebalance :: Options -> Score -> IO ()
332
checkNeedRebalance opts ini_cv = do
333
  let min_cv = optMinScore opts
334
  when (ini_cv < min_cv) $ do
335
         printf "Cluster is already well balanced (initial score %.6g,\n\
336
                \minimum score %.6g).\nNothing to do, exiting\n"
337
                ini_cv min_cv:: IO ()
338
         exitWith ExitSuccess
339

    
340
-- | Main function.
341
main :: Options -> [String] -> IO ()
342
main opts args = do
343
  unless (null args) $ do
344
         hPutStrLn stderr "Error: this program doesn't take any arguments."
345
         exitWith $ ExitFailure 1
346

    
347
  let verbose = optVerbose opts
348
      shownodes = optShowNodes opts
349
      showinsts = optShowInsts opts
350

    
351
  ini_cdata@(ClusterData gl fixed_nl ilf ctags ipol) <- loadExternalData opts
352

    
353
  when (verbose > 1) $ do
354
       putStrLn $ "Loaded cluster tags: " ++ intercalate "," ctags
355
       putStrLn $ "Loaded cluster ipolicy: " ++ show ipol
356

    
357
  nlf <- setNodeStatus opts fixed_nl
358
  checkCluster verbose nlf ilf
359

    
360
  maybeSaveData (optSaveCluster opts) "original" "before balancing" ini_cdata
361

    
362
  (gname, (nl, il)) <- selectGroup opts gl nlf ilf
363

    
364
  checkGroup verbose gname nl il
365

    
366
  maybePrintInsts showinsts "Initial" (Cluster.printInsts nl il)
367

    
368
  maybePrintNodes shownodes "Initial cluster" (Cluster.printNodes nl)
369

    
370
  let ini_cv = Cluster.compCV nl
371
      ini_tbl = Cluster.Table nl il ini_cv []
372
      min_cv = optMinScore opts
373

    
374
  checkNeedRebalance opts ini_cv
375

    
376
  if verbose > 2
377
    then printf "Initial coefficients: overall %.8f\n%s"
378
           ini_cv (Cluster.printStats "  " nl)::IO ()
379
    else printf "Initial score: %.8f\n" ini_cv
380

    
381
  putStrLn "Trying to minimize the CV..."
382
  let imlen = maximum . map (length . Instance.alias) $ Container.elems il
383
      nmlen = maximum . map (length . Node.alias) $ Container.elems nl
384

    
385
  (fin_tbl, cmd_strs) <- iterateDepth ini_tbl (optMaxLength opts)
386
                         (optDiskMoves opts)
387
                         (optInstMoves opts)
388
                         nmlen imlen [] min_cv
389
                         (optMinGainLim opts) (optMinGain opts)
390
                         (optEvacMode opts)
391
  let (Cluster.Table fin_nl fin_il fin_cv fin_plc) = fin_tbl
392
      ord_plc = reverse fin_plc
393
      sol_msg = case () of
394
                  _ | null fin_plc -> printf "No solution found\n"
395
                    | verbose > 2 ->
396
                        printf "Final coefficients:   overall %.8f\n%s"
397
                        fin_cv (Cluster.printStats "  " fin_nl)
398
                    | otherwise ->
399
                        printf "Cluster score improved from %.8f to %.8f\n"
400
                        ini_cv fin_cv ::String
401

    
402
  putStr sol_msg
403

    
404
  unless (verbose == 0) $
405
         printf "Solution length=%d\n" (length ord_plc)
406

    
407
  let cmd_jobs = Cluster.splitJobs cmd_strs
408

    
409
  when (isJust $ optShowCmds opts) $
410
       saveBalanceCommands opts $ Cluster.formatCmds cmd_jobs
411

    
412
  maybeSaveData (optSaveCluster opts) "balanced" "after balancing"
413
                ini_cdata { cdNodes = fin_nl, cdInstances = fin_il }
414

    
415
  maybePrintInsts showinsts "Final" (Cluster.printInsts fin_nl fin_il)
416

    
417
  maybePrintNodes shownodes "Final cluster" (Cluster.printNodes fin_nl)
418

    
419
  when (verbose > 3) $ printStats nl fin_nl
420

    
421
  eval <- maybeExecJobs opts ord_plc fin_nl il cmd_jobs
422
  unless eval (exitWith (ExitFailure 1))