Statistics
| Branch: | Tag: | Revision:

root / htools / Ganeti / HTools / Program / Hbal.hs @ 29a30533

History | View | Annotate | Download (15 kB)

1
{-| Cluster rebalancer.
2

    
3
-}
4

    
5
{-
6

    
7
Copyright (C) 2009, 2010, 2011, 2012 Google Inc.
8

    
9
This program is free software; you can redistribute it and/or modify
10
it under the terms of the GNU General Public License as published by
11
the Free Software Foundation; either version 2 of the License, or
12
(at your option) any later version.
13

    
14
This program is distributed in the hope that it will be useful, but
15
WITHOUT ANY WARRANTY; without even the implied warranty of
16
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17
General Public License for more details.
18

    
19
You should have received a copy of the GNU General Public License
20
along with this program; if not, write to the Free Software
21
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22
02110-1301, USA.
23

    
24
-}
25

    
26
module Ganeti.HTools.Program.Hbal
27
  ( main
28
  , options
29
  , arguments
30
  , iterateDepth
31
  ) where
32

    
33
import Control.Concurrent (threadDelay)
34
import Control.Exception (bracket)
35
import Control.Monad
36
import Data.List
37
import Data.Maybe (isJust, isNothing, fromJust)
38
import Data.IORef
39
import System.Exit
40
import System.IO
41
import System.Posix.Process
42
import System.Posix.Signals
43

    
44
import Text.Printf (printf, hPrintf)
45

    
46
import qualified Ganeti.HTools.Container as Container
47
import qualified Ganeti.HTools.Cluster as Cluster
48
import qualified Ganeti.HTools.Group as Group
49
import qualified Ganeti.HTools.Node as Node
50
import qualified Ganeti.HTools.Instance as Instance
51

    
52
import Ganeti.BasicTypes
53
import Ganeti.Common
54
import Ganeti.Errors
55
import Ganeti.HTools.CLI
56
import Ganeti.HTools.ExtLoader
57
import Ganeti.HTools.Types
58
import Ganeti.HTools.Loader
59
import Ganeti.Utils
60

    
61
import qualified Ganeti.Luxi as L
62
import Ganeti.Jobs
63

    
64
-- | Options list and functions.
65
options :: IO [OptType]
66
options = do
67
  luxi <- oLuxiSocket
68
  return
69
    [ oPrintNodes
70
    , oPrintInsts
71
    , oPrintCommands
72
    , oDataFile
73
    , oEvacMode
74
    , oRapiMaster
75
    , luxi
76
    , oIAllocSrc
77
    , oExecJobs
78
    , oGroup
79
    , oMaxSolLength
80
    , oVerbose
81
    , oQuiet
82
    , oOfflineNode
83
    , oMinScore
84
    , oMaxCpu
85
    , oMinDisk
86
    , oMinGain
87
    , oMinGainLim
88
    , oDiskMoves
89
    , oSelInst
90
    , oInstMoves
91
    , oDynuFile
92
    , oExTags
93
    , oExInst
94
    , oSaveCluster
95
    ]
96

    
97
-- | The list of arguments supported by the program.
98
arguments :: [ArgCompletion]
99
arguments = []
100

    
101
{- | Start computing the solution at the given depth and recurse until
102
we find a valid solution or we exceed the maximum depth.
103

    
104
-}
105
iterateDepth :: Bool             -- ^ Whether to print moves
106
             -> Cluster.Table    -- ^ The starting table
107
             -> Int              -- ^ Remaining length
108
             -> Bool             -- ^ Allow disk moves
109
             -> Bool             -- ^ Allow instance moves
110
             -> Int              -- ^ Max node name len
111
             -> Int              -- ^ Max instance name len
112
             -> [MoveJob]        -- ^ Current command list
113
             -> Score            -- ^ Score at which to stop
114
             -> Score            -- ^ Min gain limit
115
             -> Score            -- ^ Min score gain
116
             -> Bool             -- ^ Enable evacuation mode
117
             -> IO (Cluster.Table, [MoveJob]) -- ^ The resulting table
118
                                              -- and commands
119
iterateDepth printmove ini_tbl max_rounds disk_moves inst_moves nmlen imlen
120
             cmd_strs min_score mg_limit min_gain evac_mode =
121
  let Cluster.Table ini_nl ini_il _ _ = ini_tbl
122
      allowed_next = Cluster.doNextBalance ini_tbl max_rounds min_score
123
      m_fin_tbl = if allowed_next
124
                    then Cluster.tryBalance ini_tbl disk_moves inst_moves
125
                         evac_mode mg_limit min_gain
126
                    else Nothing
127
  in case m_fin_tbl of
128
       Just fin_tbl ->
129
         do
130
           let (Cluster.Table _ _ _ fin_plc) = fin_tbl
131
               fin_plc_len = length fin_plc
132
               cur_plc@(idx, _, _, move, _) = head fin_plc
133
               (sol_line, cmds) = Cluster.printSolutionLine ini_nl ini_il
134
                                  nmlen imlen cur_plc fin_plc_len
135
               afn = Cluster.involvedNodes ini_il cur_plc
136
               upd_cmd_strs = (afn, idx, move, cmds):cmd_strs
137
           when printmove $ do
138
               putStrLn sol_line
139
               hFlush stdout
140
           iterateDepth printmove fin_tbl max_rounds disk_moves inst_moves
141
                        nmlen imlen upd_cmd_strs min_score
142
                        mg_limit min_gain evac_mode
143
       Nothing -> return (ini_tbl, cmd_strs)
144

    
145
-- | Displays the cluster stats.
146
printStats :: Node.List -> Node.List -> IO ()
147
printStats ini_nl fin_nl = do
148
  let ini_cs = Cluster.totalResources ini_nl
149
      fin_cs = Cluster.totalResources fin_nl
150
  printf "Original: mem=%d disk=%d\n"
151
             (Cluster.csFmem ini_cs) (Cluster.csFdsk ini_cs) :: IO ()
152
  printf "Final:    mem=%d disk=%d\n"
153
             (Cluster.csFmem fin_cs) (Cluster.csFdsk fin_cs)
154

    
155
-- | Saves the rebalance commands to a text file.
156
saveBalanceCommands :: Options -> String -> IO ()
157
saveBalanceCommands opts cmd_data = do
158
  let out_path = fromJust $ optShowCmds opts
159
  putStrLn ""
160
  if out_path == "-"
161
    then printf "Commands to run to reach the above solution:\n%s"
162
           (unlines . map ("  " ++) .
163
            filter (/= "  check") .
164
            lines $ cmd_data)
165
    else do
166
      writeFile out_path (shTemplate ++ cmd_data)
167
      printf "The commands have been written to file '%s'\n" out_path
168

    
169
-- | Polls a set of jobs at a fixed interval until all are finished
170
-- one way or another.
171
waitForJobs :: L.Client -> [L.JobId] -> IO (Result [JobStatus])
172
waitForJobs client jids = do
173
  sts <- L.queryJobsStatus client jids
174
  case sts of
175
    Bad e -> return . Bad $ "Checking job status: " ++ formatError e
176
    Ok s -> if any (<= JOB_STATUS_RUNNING) s
177
            then do
178
              -- TODO: replace hardcoded value with a better thing
179
              threadDelay (1000000 * 15)
180
              waitForJobs client jids
181
            else return $ Ok s
182

    
183
-- | Check that a set of job statuses is all success.
184
checkJobsStatus :: [JobStatus] -> Bool
185
checkJobsStatus = all (== JOB_STATUS_SUCCESS)
186

    
187
-- | Wrapper over execJobSet checking for early termination.
188
execWrapper :: String -> Node.List
189
            -> Instance.List -> IORef Int -> [JobSet] -> IO Bool
190
execWrapper _      _  _  _    [] = return True
191
execWrapper master nl il cref alljss = do
192
  cancel <- readIORef cref
193
  if cancel > 0
194
    then do
195
      hPrintf stderr "Exiting early due to user request, %d\
196
                     \ jobset(s) remaining." (length alljss)::IO ()
197
      return True
198
    else execJobSet master nl il cref alljss
199

    
200
-- | Execute an entire jobset.
201
execJobSet :: String -> Node.List
202
           -> Instance.List -> IORef Int -> [JobSet] -> IO Bool
203
execJobSet _      _  _  _    [] = return True
204
execJobSet master nl il cref (js:jss) = do
205
  -- map from jobset (htools list of positions) to [[opcodes]]
206
  let jobs = map (\(_, idx, move, _) ->
207
                      Cluster.iMoveToJob nl il idx move) js
208
  let descr = map (\(_, idx, _, _) -> Container.nameOf il idx) js
209
  putStrLn $ "Executing jobset for instances " ++ commaJoin descr
210
  jrs <- bracket (L.getClient master) L.closeClient
211
         (\client -> do
212
            jids <- L.submitManyJobs client jobs
213
            case jids of
214
              Bad e -> return . Bad $ "Job submission error: " ++ formatError e
215
              Ok x -> do
216
                putStrLn $ "Got job IDs " ++ commaJoin (map show x)
217
                waitForJobs client x
218
         )
219
  case jrs of
220
    Bad x -> do
221
      hPutStrLn stderr x
222
      return False
223
    Ok x -> if checkJobsStatus x
224
              then execWrapper master nl il cref jss
225
              else do
226
                hPutStrLn stderr $ "Not all jobs completed successfully: " ++
227
                          show x
228
                hPutStrLn stderr "Aborting."
229
                return False
230

    
231
-- | Executes the jobs, if possible and desired.
232
maybeExecJobs :: Options
233
              -> [a]
234
              -> Node.List
235
              -> Instance.List
236
              -> [JobSet]
237
              -> IO Bool
238
maybeExecJobs opts ord_plc fin_nl il cmd_jobs =
239
  if optExecJobs opts && not (null ord_plc)
240
    then (case optLuxi opts of
241
            Nothing -> do
242
              hPutStrLn stderr "Execution of commands possible only on LUXI"
243
              return False
244
            Just master -> runJobSet master fin_nl il cmd_jobs)
245
    else return True
246

    
247
-- | Signal handler for graceful termination.
248
hangleSigInt :: IORef Int -> IO ()
249
hangleSigInt cref = do
250
  writeIORef cref 1
251
  putStrLn ("Cancel request registered, will exit at" ++
252
            " the end of the current job set...")
253

    
254
-- | Signal handler for immediate termination.
255
hangleSigTerm :: IORef Int -> IO ()
256
hangleSigTerm cref = do
257
  -- update the cref to 2, just for consistency
258
  writeIORef cref 2
259
  putStrLn "Double cancel request, exiting now..."
260
  exitImmediately $ ExitFailure 2
261

    
262
-- | Runs a job set with handling of signals.
263
runJobSet :: String -> Node.List -> Instance.List -> [JobSet] -> IO Bool
264
runJobSet master fin_nl il cmd_jobs = do
265
  cref <- newIORef 0
266
  mapM_ (\(hnd, sig) -> installHandler sig (Catch (hnd cref)) Nothing)
267
    [(hangleSigTerm, softwareTermination), (hangleSigInt, keyboardSignal)]
268
  execWrapper master fin_nl il cref cmd_jobs
269

    
270
-- | Select the target node group.
271
selectGroup :: Options -> Group.List -> Node.List -> Instance.List
272
            -> IO (String, (Node.List, Instance.List))
273
selectGroup opts gl nlf ilf = do
274
  let ngroups = Cluster.splitCluster nlf ilf
275
  when (length ngroups > 1 && isNothing (optGroup opts)) $ do
276
    hPutStrLn stderr "Found multiple node groups:"
277
    mapM_ (hPutStrLn stderr . ("  " ++) . Group.name .
278
           flip Container.find gl . fst) ngroups
279
    exitErr "Aborting."
280

    
281
  case optGroup opts of
282
    Nothing -> do
283
      let (gidx, cdata) = head ngroups
284
          grp = Container.find gidx gl
285
      return (Group.name grp, cdata)
286
    Just g -> case Container.findByName gl g of
287
      Nothing -> do
288
        hPutStrLn stderr $ "Node group " ++ g ++
289
          " not found. Node group list is:"
290
        mapM_ (hPutStrLn stderr . ("  " ++) . Group.name ) (Container.elems gl)
291
        exitErr "Aborting."
292
      Just grp ->
293
          case lookup (Group.idx grp) ngroups of
294
            Nothing ->
295
              -- This will only happen if there are no nodes assigned
296
              -- to this group
297
              return (Group.name grp, (Container.empty, Container.empty))
298
            Just cdata -> return (Group.name grp, cdata)
299

    
300
-- | Do a few checks on the cluster data.
301
checkCluster :: Int -> Node.List -> Instance.List -> IO ()
302
checkCluster verbose nl il = do
303
  -- nothing to do on an empty cluster
304
  when (Container.null il) $ do
305
         printf "Cluster is empty, exiting.\n"::IO ()
306
         exitSuccess
307

    
308
  -- hbal doesn't currently handle split clusters
309
  let split_insts = Cluster.findSplitInstances nl il
310
  unless (null split_insts || verbose <= 1) $ do
311
    hPutStrLn stderr "Found instances belonging to multiple node groups:"
312
    mapM_ (\i -> hPutStrLn stderr $ "  " ++ Instance.name i) split_insts
313
    hPutStrLn stderr "These instances will not be moved."
314

    
315
  printf "Loaded %d nodes, %d instances\n"
316
             (Container.size nl)
317
             (Container.size il)::IO ()
318

    
319
  let csf = commonSuffix nl il
320
  when (not (null csf) && verbose > 1) $
321
       printf "Note: Stripping common suffix of '%s' from names\n" csf
322

    
323
-- | Do a few checks on the selected group data.
324
checkGroup :: Int -> String -> Node.List -> Instance.List -> IO ()
325
checkGroup verbose gname nl il = do
326
  printf "Group size %d nodes, %d instances\n"
327
             (Container.size nl)
328
             (Container.size il)::IO ()
329

    
330
  putStrLn $ "Selected node group: " ++ gname
331

    
332
  let (bad_nodes, bad_instances) = Cluster.computeBadItems nl il
333
  unless (verbose == 0) $ printf
334
             "Initial check done: %d bad nodes, %d bad instances.\n"
335
             (length bad_nodes) (length bad_instances)
336

    
337
  unless (null bad_nodes) $
338
         putStrLn "Cluster is not N+1 happy, continuing but no guarantee \
339
                  \that the cluster will end N+1 happy."
340

    
341
-- | Check that we actually need to rebalance.
342
checkNeedRebalance :: Options -> Score -> IO ()
343
checkNeedRebalance opts ini_cv = do
344
  let min_cv = optMinScore opts
345
  when (ini_cv < min_cv) $ do
346
         printf "Cluster is already well balanced (initial score %.6g,\n\
347
                \minimum score %.6g).\nNothing to do, exiting\n"
348
                ini_cv min_cv:: IO ()
349
         exitSuccess
350

    
351
-- | Main function.
352
main :: Options -> [String] -> IO ()
353
main opts args = do
354
  unless (null args) $ exitErr "This program doesn't take any arguments."
355

    
356
  let verbose = optVerbose opts
357
      shownodes = optShowNodes opts
358
      showinsts = optShowInsts opts
359

    
360
  ini_cdata@(ClusterData gl fixed_nl ilf ctags ipol) <- loadExternalData opts
361

    
362
  when (verbose > 1) $ do
363
       putStrLn $ "Loaded cluster tags: " ++ intercalate "," ctags
364
       putStrLn $ "Loaded cluster ipolicy: " ++ show ipol
365

    
366
  nlf <- setNodeStatus opts fixed_nl
367
  checkCluster verbose nlf ilf
368

    
369
  maybeSaveData (optSaveCluster opts) "original" "before balancing" ini_cdata
370

    
371
  (gname, (nl, il)) <- selectGroup opts gl nlf ilf
372

    
373
  checkGroup verbose gname nl il
374

    
375
  maybePrintInsts showinsts "Initial" (Cluster.printInsts nl il)
376

    
377
  maybePrintNodes shownodes "Initial cluster" (Cluster.printNodes nl)
378

    
379
  let ini_cv = Cluster.compCV nl
380
      ini_tbl = Cluster.Table nl il ini_cv []
381
      min_cv = optMinScore opts
382

    
383
  checkNeedRebalance opts ini_cv
384

    
385
  if verbose > 2
386
    then printf "Initial coefficients: overall %.8f\n%s"
387
           ini_cv (Cluster.printStats "  " nl)::IO ()
388
    else printf "Initial score: %.8f\n" ini_cv
389

    
390
  putStrLn "Trying to minimize the CV..."
391
  let imlen = maximum . map (length . Instance.alias) $ Container.elems il
392
      nmlen = maximum . map (length . Node.alias) $ Container.elems nl
393

    
394
  (fin_tbl, cmd_strs) <- iterateDepth True ini_tbl (optMaxLength opts)
395
                         (optDiskMoves opts)
396
                         (optInstMoves opts)
397
                         nmlen imlen [] min_cv
398
                         (optMinGainLim opts) (optMinGain opts)
399
                         (optEvacMode opts)
400
  let (Cluster.Table fin_nl fin_il fin_cv fin_plc) = fin_tbl
401
      ord_plc = reverse fin_plc
402
      sol_msg = case () of
403
                  _ | null fin_plc -> printf "No solution found\n"
404
                    | verbose > 2 ->
405
                        printf "Final coefficients:   overall %.8f\n%s"
406
                        fin_cv (Cluster.printStats "  " fin_nl)
407
                    | otherwise ->
408
                        printf "Cluster score improved from %.8f to %.8f\n"
409
                        ini_cv fin_cv ::String
410

    
411
  putStr sol_msg
412

    
413
  unless (verbose == 0) $
414
         printf "Solution length=%d\n" (length ord_plc)
415

    
416
  let cmd_jobs = Cluster.splitJobs cmd_strs
417

    
418
  when (isJust $ optShowCmds opts) .
419
       saveBalanceCommands opts $ Cluster.formatCmds cmd_jobs
420

    
421
  maybeSaveData (optSaveCluster opts) "balanced" "after balancing"
422
                ini_cdata { cdNodes = fin_nl, cdInstances = fin_il }
423

    
424
  maybePrintInsts showinsts "Final" (Cluster.printInsts fin_nl fin_il)
425

    
426
  maybePrintNodes shownodes "Final cluster" (Cluster.printNodes fin_nl)
427

    
428
  when (verbose > 3) $ printStats nl fin_nl
429

    
430
  eval <- maybeExecJobs opts ord_plc fin_nl il cmd_jobs
431
  unless eval (exitWith (ExitFailure 1))