Statistics
| Branch: | Tag: | Revision:

root / htools / Ganeti / HTools / Program / Hbal.hs @ 707cd3d7

History | View | Annotate | Download (14.9 kB)

1
{-| Cluster rebalancer.
2

    
3
-}
4

    
5
{-
6

    
7
Copyright (C) 2009, 2010, 2011, 2012 Google Inc.
8

    
9
This program is free software; you can redistribute it and/or modify
10
it under the terms of the GNU General Public License as published by
11
the Free Software Foundation; either version 2 of the License, or
12
(at your option) any later version.
13

    
14
This program is distributed in the hope that it will be useful, but
15
WITHOUT ANY WARRANTY; without even the implied warranty of
16
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17
General Public License for more details.
18

    
19
You should have received a copy of the GNU General Public License
20
along with this program; if not, write to the Free Software
21
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22
02110-1301, USA.
23

    
24
-}
25

    
26
module Ganeti.HTools.Program.Hbal
27
  ( main
28
  , options
29
  , arguments
30
  , iterateDepth
31
  ) where
32

    
33
import Control.Concurrent (threadDelay)
34
import Control.Exception (bracket)
35
import Control.Monad
36
import Data.List
37
import Data.Maybe (isJust, isNothing, fromJust)
38
import Data.IORef
39
import System.Exit
40
import System.IO
41
import System.Posix.Process
42
import System.Posix.Signals
43

    
44
import Text.Printf (printf, hPrintf)
45

    
46
import qualified Ganeti.HTools.Container as Container
47
import qualified Ganeti.HTools.Cluster as Cluster
48
import qualified Ganeti.HTools.Group as Group
49
import qualified Ganeti.HTools.Node as Node
50
import qualified Ganeti.HTools.Instance as Instance
51

    
52
import Ganeti.BasicTypes
53
import Ganeti.Common
54
import Ganeti.HTools.CLI
55
import Ganeti.HTools.ExtLoader
56
import Ganeti.HTools.Types
57
import Ganeti.HTools.Loader
58
import Ganeti.Utils
59

    
60
import qualified Ganeti.Luxi as L
61
import Ganeti.Jobs
62

    
63
-- | Options list and functions.
64
options :: [OptType]
65
options =
66
  [ oPrintNodes
67
  , oPrintInsts
68
  , oPrintCommands
69
  , oDataFile
70
  , oEvacMode
71
  , oRapiMaster
72
  , oLuxiSocket
73
  , oIAllocSrc
74
  , oExecJobs
75
  , oGroup
76
  , oMaxSolLength
77
  , oVerbose
78
  , oQuiet
79
  , oOfflineNode
80
  , oMinScore
81
  , oMaxCpu
82
  , oMinDisk
83
  , oMinGain
84
  , oMinGainLim
85
  , oDiskMoves
86
  , oSelInst
87
  , oInstMoves
88
  , oDynuFile
89
  , oExTags
90
  , oExInst
91
  , oSaveCluster
92
  ]
93

    
94
-- | The list of arguments supported by the program.
95
arguments :: [ArgCompletion]
96
arguments = []
97

    
98
{- | Start computing the solution at the given depth and recurse until
99
we find a valid solution or we exceed the maximum depth.
100

    
101
-}
102
iterateDepth :: Bool             -- ^ Whether to print moves
103
             -> Cluster.Table    -- ^ The starting table
104
             -> Int              -- ^ Remaining length
105
             -> Bool             -- ^ Allow disk moves
106
             -> Bool             -- ^ Allow instance moves
107
             -> Int              -- ^ Max node name len
108
             -> Int              -- ^ Max instance name len
109
             -> [MoveJob]        -- ^ Current command list
110
             -> Score            -- ^ Score at which to stop
111
             -> Score            -- ^ Min gain limit
112
             -> Score            -- ^ Min score gain
113
             -> Bool             -- ^ Enable evacuation mode
114
             -> IO (Cluster.Table, [MoveJob]) -- ^ The resulting table
115
                                              -- and commands
116
iterateDepth printmove ini_tbl max_rounds disk_moves inst_moves nmlen imlen
117
             cmd_strs min_score mg_limit min_gain evac_mode =
118
  let Cluster.Table ini_nl ini_il _ _ = ini_tbl
119
      allowed_next = Cluster.doNextBalance ini_tbl max_rounds min_score
120
      m_fin_tbl = if allowed_next
121
                    then Cluster.tryBalance ini_tbl disk_moves inst_moves
122
                         evac_mode mg_limit min_gain
123
                    else Nothing
124
  in case m_fin_tbl of
125
       Just fin_tbl ->
126
         do
127
           let (Cluster.Table _ _ _ fin_plc) = fin_tbl
128
               fin_plc_len = length fin_plc
129
               cur_plc@(idx, _, _, move, _) = head fin_plc
130
               (sol_line, cmds) = Cluster.printSolutionLine ini_nl ini_il
131
                                  nmlen imlen cur_plc fin_plc_len
132
               afn = Cluster.involvedNodes ini_il cur_plc
133
               upd_cmd_strs = (afn, idx, move, cmds):cmd_strs
134
           when printmove $ do
135
               putStrLn sol_line
136
               hFlush stdout
137
           iterateDepth printmove fin_tbl max_rounds disk_moves inst_moves
138
                        nmlen imlen upd_cmd_strs min_score
139
                        mg_limit min_gain evac_mode
140
       Nothing -> return (ini_tbl, cmd_strs)
141

    
142
-- | Displays the cluster stats.
143
printStats :: Node.List -> Node.List -> IO ()
144
printStats ini_nl fin_nl = do
145
  let ini_cs = Cluster.totalResources ini_nl
146
      fin_cs = Cluster.totalResources fin_nl
147
  printf "Original: mem=%d disk=%d\n"
148
             (Cluster.csFmem ini_cs) (Cluster.csFdsk ini_cs) :: IO ()
149
  printf "Final:    mem=%d disk=%d\n"
150
             (Cluster.csFmem fin_cs) (Cluster.csFdsk fin_cs)
151

    
152
-- | Saves the rebalance commands to a text file.
153
saveBalanceCommands :: Options -> String -> IO ()
154
saveBalanceCommands opts cmd_data = do
155
  let out_path = fromJust $ optShowCmds opts
156
  putStrLn ""
157
  if out_path == "-"
158
    then printf "Commands to run to reach the above solution:\n%s"
159
           (unlines . map ("  " ++) .
160
            filter (/= "  check") .
161
            lines $ cmd_data)
162
    else do
163
      writeFile out_path (shTemplate ++ cmd_data)
164
      printf "The commands have been written to file '%s'\n" out_path
165

    
166
-- | Polls a set of jobs at a fixed interval until all are finished
167
-- one way or another.
168
waitForJobs :: L.Client -> [L.JobId] -> IO (Result [JobStatus])
169
waitForJobs client jids = do
170
  sts <- L.queryJobsStatus client jids
171
  case sts of
172
    Bad x -> return $ Bad x
173
    Ok s -> if any (<= JOB_STATUS_RUNNING) s
174
            then do
175
              -- TODO: replace hardcoded value with a better thing
176
              threadDelay (1000000 * 15)
177
              waitForJobs client jids
178
            else return $ Ok s
179

    
180
-- | Check that a set of job statuses is all success.
181
checkJobsStatus :: [JobStatus] -> Bool
182
checkJobsStatus = all (== JOB_STATUS_SUCCESS)
183

    
184
-- | Wrapper over execJobSet checking for early termination.
185
execWrapper :: String -> Node.List
186
            -> Instance.List -> IORef Int -> [JobSet] -> IO Bool
187
execWrapper _      _  _  _    [] = return True
188
execWrapper master nl il cref alljss = do
189
  cancel <- readIORef cref
190
  if cancel > 0
191
    then do
192
      hPrintf stderr "Exiting early due to user request, %d\
193
                     \ jobset(s) remaining." (length alljss)::IO ()
194
      return True
195
    else execJobSet master nl il cref alljss
196

    
197
-- | Execute an entire jobset.
198
execJobSet :: String -> Node.List
199
           -> Instance.List -> IORef Int -> [JobSet] -> IO Bool
200
execJobSet _      _  _  _    [] = return True
201
execJobSet master nl il cref (js:jss) = do
202
  -- map from jobset (htools list of positions) to [[opcodes]]
203
  let jobs = map (\(_, idx, move, _) ->
204
                      Cluster.iMoveToJob nl il idx move) js
205
  let descr = map (\(_, idx, _, _) -> Container.nameOf il idx) js
206
  putStrLn $ "Executing jobset for instances " ++ commaJoin descr
207
  jrs <- bracket (L.getClient master) L.closeClient
208
         (\client -> do
209
            jids <- L.submitManyJobs client jobs
210
            case jids of
211
              Bad x -> return $ Bad x
212
              Ok x -> do
213
                putStrLn $ "Got job IDs " ++ commaJoin (map show x)
214
                waitForJobs client x
215
         )
216
  case jrs of
217
    Bad x -> do
218
      hPutStrLn stderr $ "Cannot compute job status, aborting: " ++ show x
219
      return False
220
    Ok x -> if checkJobsStatus x
221
              then execWrapper master nl il cref jss
222
              else do
223
                hPutStrLn stderr $ "Not all jobs completed successfully: " ++
224
                          show x
225
                hPutStrLn stderr "Aborting."
226
                return False
227

    
228
-- | Executes the jobs, if possible and desired.
229
maybeExecJobs :: Options
230
              -> [a]
231
              -> Node.List
232
              -> Instance.List
233
              -> [JobSet]
234
              -> IO Bool
235
maybeExecJobs opts ord_plc fin_nl il cmd_jobs =
236
  if optExecJobs opts && not (null ord_plc)
237
    then (case optLuxi opts of
238
            Nothing -> do
239
              hPutStrLn stderr "Execution of commands possible only on LUXI"
240
              return False
241
            Just master -> runJobSet master fin_nl il cmd_jobs)
242
    else return True
243

    
244
-- | Signal handler for graceful termination.
245
hangleSigInt :: IORef Int -> IO ()
246
hangleSigInt cref = do
247
  writeIORef cref 1
248
  putStrLn ("Cancel request registered, will exit at" ++
249
            " the end of the current job set...")
250

    
251
-- | Signal handler for immediate termination.
252
hangleSigTerm :: IORef Int -> IO ()
253
hangleSigTerm cref = do
254
  -- update the cref to 2, just for consistency
255
  writeIORef cref 2
256
  putStrLn "Double cancel request, exiting now..."
257
  exitImmediately $ ExitFailure 2
258

    
259
-- | Runs a job set with handling of signals.
260
runJobSet :: String -> Node.List -> Instance.List -> [JobSet] -> IO Bool
261
runJobSet master fin_nl il cmd_jobs = do
262
  cref <- newIORef 0
263
  mapM_ (\(hnd, sig) -> installHandler sig (Catch (hnd cref)) Nothing)
264
    [(hangleSigTerm, softwareTermination), (hangleSigInt, keyboardSignal)]
265
  execWrapper master fin_nl il cref cmd_jobs
266

    
267
-- | Select the target node group.
268
selectGroup :: Options -> Group.List -> Node.List -> Instance.List
269
            -> IO (String, (Node.List, Instance.List))
270
selectGroup opts gl nlf ilf = do
271
  let ngroups = Cluster.splitCluster nlf ilf
272
  when (length ngroups > 1 && isNothing (optGroup opts)) $ do
273
    hPutStrLn stderr "Found multiple node groups:"
274
    mapM_ (hPutStrLn stderr . ("  " ++) . Group.name .
275
           flip Container.find gl . fst) ngroups
276
    exitErr "Aborting."
277

    
278
  case optGroup opts of
279
    Nothing -> do
280
      let (gidx, cdata) = head ngroups
281
          grp = Container.find gidx gl
282
      return (Group.name grp, cdata)
283
    Just g -> case Container.findByName gl g of
284
      Nothing -> do
285
        hPutStrLn stderr $ "Node group " ++ g ++
286
          " not found. Node group list is:"
287
        mapM_ (hPutStrLn stderr . ("  " ++) . Group.name ) (Container.elems gl)
288
        exitErr "Aborting."
289
      Just grp ->
290
          case lookup (Group.idx grp) ngroups of
291
            Nothing ->
292
              -- This will only happen if there are no nodes assigned
293
              -- to this group
294
              return (Group.name grp, (Container.empty, Container.empty))
295
            Just cdata -> return (Group.name grp, cdata)
296

    
297
-- | Do a few checks on the cluster data.
298
checkCluster :: Int -> Node.List -> Instance.List -> IO ()
299
checkCluster verbose nl il = do
300
  -- nothing to do on an empty cluster
301
  when (Container.null il) $ do
302
         printf "Cluster is empty, exiting.\n"::IO ()
303
         exitSuccess
304

    
305
  -- hbal doesn't currently handle split clusters
306
  let split_insts = Cluster.findSplitInstances nl il
307
  unless (null split_insts || verbose <= 1) $ do
308
    hPutStrLn stderr "Found instances belonging to multiple node groups:"
309
    mapM_ (\i -> hPutStrLn stderr $ "  " ++ Instance.name i) split_insts
310
    hPutStrLn stderr "These instances will not be moved."
311

    
312
  printf "Loaded %d nodes, %d instances\n"
313
             (Container.size nl)
314
             (Container.size il)::IO ()
315

    
316
  let csf = commonSuffix nl il
317
  when (not (null csf) && verbose > 1) $
318
       printf "Note: Stripping common suffix of '%s' from names\n" csf
319

    
320
-- | Do a few checks on the selected group data.
321
checkGroup :: Int -> String -> Node.List -> Instance.List -> IO ()
322
checkGroup verbose gname nl il = do
323
  printf "Group size %d nodes, %d instances\n"
324
             (Container.size nl)
325
             (Container.size il)::IO ()
326

    
327
  putStrLn $ "Selected node group: " ++ gname
328

    
329
  let (bad_nodes, bad_instances) = Cluster.computeBadItems nl il
330
  unless (verbose == 0) $ printf
331
             "Initial check done: %d bad nodes, %d bad instances.\n"
332
             (length bad_nodes) (length bad_instances)
333

    
334
  unless (null bad_nodes) $
335
         putStrLn "Cluster is not N+1 happy, continuing but no guarantee \
336
                  \that the cluster will end N+1 happy."
337

    
338
-- | Check that we actually need to rebalance.
339
checkNeedRebalance :: Options -> Score -> IO ()
340
checkNeedRebalance opts ini_cv = do
341
  let min_cv = optMinScore opts
342
  when (ini_cv < min_cv) $ do
343
         printf "Cluster is already well balanced (initial score %.6g,\n\
344
                \minimum score %.6g).\nNothing to do, exiting\n"
345
                ini_cv min_cv:: IO ()
346
         exitSuccess
347

    
348
-- | Main function.
349
main :: Options -> [String] -> IO ()
350
main opts args = do
351
  unless (null args) $ exitErr "This program doesn't take any arguments."
352

    
353
  let verbose = optVerbose opts
354
      shownodes = optShowNodes opts
355
      showinsts = optShowInsts opts
356

    
357
  ini_cdata@(ClusterData gl fixed_nl ilf ctags ipol) <- loadExternalData opts
358

    
359
  when (verbose > 1) $ do
360
       putStrLn $ "Loaded cluster tags: " ++ intercalate "," ctags
361
       putStrLn $ "Loaded cluster ipolicy: " ++ show ipol
362

    
363
  nlf <- setNodeStatus opts fixed_nl
364
  checkCluster verbose nlf ilf
365

    
366
  maybeSaveData (optSaveCluster opts) "original" "before balancing" ini_cdata
367

    
368
  (gname, (nl, il)) <- selectGroup opts gl nlf ilf
369

    
370
  checkGroup verbose gname nl il
371

    
372
  maybePrintInsts showinsts "Initial" (Cluster.printInsts nl il)
373

    
374
  maybePrintNodes shownodes "Initial cluster" (Cluster.printNodes nl)
375

    
376
  let ini_cv = Cluster.compCV nl
377
      ini_tbl = Cluster.Table nl il ini_cv []
378
      min_cv = optMinScore opts
379

    
380
  checkNeedRebalance opts ini_cv
381

    
382
  if verbose > 2
383
    then printf "Initial coefficients: overall %.8f\n%s"
384
           ini_cv (Cluster.printStats "  " nl)::IO ()
385
    else printf "Initial score: %.8f\n" ini_cv
386

    
387
  putStrLn "Trying to minimize the CV..."
388
  let imlen = maximum . map (length . Instance.alias) $ Container.elems il
389
      nmlen = maximum . map (length . Node.alias) $ Container.elems nl
390

    
391
  (fin_tbl, cmd_strs) <- iterateDepth True ini_tbl (optMaxLength opts)
392
                         (optDiskMoves opts)
393
                         (optInstMoves opts)
394
                         nmlen imlen [] min_cv
395
                         (optMinGainLim opts) (optMinGain opts)
396
                         (optEvacMode opts)
397
  let (Cluster.Table fin_nl fin_il fin_cv fin_plc) = fin_tbl
398
      ord_plc = reverse fin_plc
399
      sol_msg = case () of
400
                  _ | null fin_plc -> printf "No solution found\n"
401
                    | verbose > 2 ->
402
                        printf "Final coefficients:   overall %.8f\n%s"
403
                        fin_cv (Cluster.printStats "  " fin_nl)
404
                    | otherwise ->
405
                        printf "Cluster score improved from %.8f to %.8f\n"
406
                        ini_cv fin_cv ::String
407

    
408
  putStr sol_msg
409

    
410
  unless (verbose == 0) $
411
         printf "Solution length=%d\n" (length ord_plc)
412

    
413
  let cmd_jobs = Cluster.splitJobs cmd_strs
414

    
415
  when (isJust $ optShowCmds opts) .
416
       saveBalanceCommands opts $ Cluster.formatCmds cmd_jobs
417

    
418
  maybeSaveData (optSaveCluster opts) "balanced" "after balancing"
419
                ini_cdata { cdNodes = fin_nl, cdInstances = fin_il }
420

    
421
  maybePrintInsts showinsts "Final" (Cluster.printInsts fin_nl fin_il)
422

    
423
  maybePrintNodes shownodes "Final cluster" (Cluster.printNodes fin_nl)
424

    
425
  when (verbose > 3) $ printStats nl fin_nl
426

    
427
  eval <- maybeExecJobs opts ord_plc fin_nl il cmd_jobs
428
  unless eval (exitWith (ExitFailure 1))