Statistics
| Branch: | Tag: | Revision:

root / htools / Ganeti / HTools / Program / Hbal.hs @ 7e723913

History | View | Annotate | Download (15.5 kB)

1
{-| Cluster rebalancer.
2

    
3
-}
4

    
5
{-
6

    
7
Copyright (C) 2009, 2010, 2011, 2012 Google Inc.
8

    
9
This program is free software; you can redistribute it and/or modify
10
it under the terms of the GNU General Public License as published by
11
the Free Software Foundation; either version 2 of the License, or
12
(at your option) any later version.
13

    
14
This program is distributed in the hope that it will be useful, but
15
WITHOUT ANY WARRANTY; without even the implied warranty of
16
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17
General Public License for more details.
18

    
19
You should have received a copy of the GNU General Public License
20
along with this program; if not, write to the Free Software
21
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22
02110-1301, USA.
23

    
24
-}
25

    
26
module Ganeti.HTools.Program.Hbal
27
  ( main
28
  , options
29
  , arguments
30
  , iterateDepth
31
  ) where
32

    
33
import Control.Concurrent (threadDelay)
34
import Control.Exception (bracket)
35
import Control.Monad
36
import Data.List
37
import Data.Maybe (isJust, isNothing, fromJust)
38
import Data.IORef
39
import System.Exit
40
import System.IO
41
import System.Posix.Process
42
import System.Posix.Signals
43

    
44
import Text.Printf (printf, hPrintf)
45

    
46
import qualified Ganeti.HTools.Container as Container
47
import qualified Ganeti.HTools.Cluster as Cluster
48
import qualified Ganeti.HTools.Group as Group
49
import qualified Ganeti.HTools.Node as Node
50
import qualified Ganeti.HTools.Instance as Instance
51

    
52
import Ganeti.BasicTypes
53
import Ganeti.Common
54
import Ganeti.Errors
55
import Ganeti.HTools.CLI
56
import Ganeti.HTools.ExtLoader
57
import Ganeti.HTools.Types
58
import Ganeti.HTools.Loader
59
import Ganeti.OpCodes (wrapOpCode, setOpComment, OpCode, MetaOpCode)
60
import Ganeti.Types (fromJobId)
61
import Ganeti.Utils
62

    
63
import qualified Ganeti.Luxi as L
64
import Ganeti.Jobs
65
import Ganeti.Version (version)
66

    
67
-- | Options list and functions.
68
options :: IO [OptType]
69
options = do
70
  luxi <- oLuxiSocket
71
  return
72
    [ oPrintNodes
73
    , oPrintInsts
74
    , oPrintCommands
75
    , oDataFile
76
    , oEvacMode
77
    , oRapiMaster
78
    , luxi
79
    , oIAllocSrc
80
    , oExecJobs
81
    , oGroup
82
    , oMaxSolLength
83
    , oVerbose
84
    , oQuiet
85
    , oOfflineNode
86
    , oMinScore
87
    , oMaxCpu
88
    , oMinDisk
89
    , oMinGain
90
    , oMinGainLim
91
    , oDiskMoves
92
    , oSelInst
93
    , oInstMoves
94
    , oDynuFile
95
    , oExTags
96
    , oExInst
97
    , oSaveCluster
98
    ]
99

    
100
-- | The list of arguments supported by the program.
101
arguments :: [ArgCompletion]
102
arguments = []
103

    
104
-- | Wraps an 'OpCode' in a 'MetaOpCode' while also adding a comment
105
-- about what generated the opcode.
106
annotateOpCode :: OpCode -> MetaOpCode
107
annotateOpCode =
108
  setOpComment ("rebalancing via hbal " ++ version) . wrapOpCode
109

    
110
{- | Start computing the solution at the given depth and recurse until
111
we find a valid solution or we exceed the maximum depth.
112

    
113
-}
114
iterateDepth :: Bool             -- ^ Whether to print moves
115
             -> Cluster.Table    -- ^ The starting table
116
             -> Int              -- ^ Remaining length
117
             -> Bool             -- ^ Allow disk moves
118
             -> Bool             -- ^ Allow instance moves
119
             -> Int              -- ^ Max node name len
120
             -> Int              -- ^ Max instance name len
121
             -> [MoveJob]        -- ^ Current command list
122
             -> Score            -- ^ Score at which to stop
123
             -> Score            -- ^ Min gain limit
124
             -> Score            -- ^ Min score gain
125
             -> Bool             -- ^ Enable evacuation mode
126
             -> IO (Cluster.Table, [MoveJob]) -- ^ The resulting table
127
                                              -- and commands
128
iterateDepth printmove ini_tbl max_rounds disk_moves inst_moves nmlen imlen
129
             cmd_strs min_score mg_limit min_gain evac_mode =
130
  let Cluster.Table ini_nl ini_il _ _ = ini_tbl
131
      allowed_next = Cluster.doNextBalance ini_tbl max_rounds min_score
132
      m_fin_tbl = if allowed_next
133
                    then Cluster.tryBalance ini_tbl disk_moves inst_moves
134
                         evac_mode mg_limit min_gain
135
                    else Nothing
136
  in case m_fin_tbl of
137
       Just fin_tbl ->
138
         do
139
           let (Cluster.Table _ _ _ fin_plc) = fin_tbl
140
               fin_plc_len = length fin_plc
141
               cur_plc@(idx, _, _, move, _) = head fin_plc
142
               (sol_line, cmds) = Cluster.printSolutionLine ini_nl ini_il
143
                                  nmlen imlen cur_plc fin_plc_len
144
               afn = Cluster.involvedNodes ini_il cur_plc
145
               upd_cmd_strs = (afn, idx, move, cmds):cmd_strs
146
           when printmove $ do
147
               putStrLn sol_line
148
               hFlush stdout
149
           iterateDepth printmove fin_tbl max_rounds disk_moves inst_moves
150
                        nmlen imlen upd_cmd_strs min_score
151
                        mg_limit min_gain evac_mode
152
       Nothing -> return (ini_tbl, cmd_strs)
153

    
154
-- | Displays the cluster stats.
155
printStats :: Node.List -> Node.List -> IO ()
156
printStats ini_nl fin_nl = do
157
  let ini_cs = Cluster.totalResources ini_nl
158
      fin_cs = Cluster.totalResources fin_nl
159
  printf "Original: mem=%d disk=%d\n"
160
             (Cluster.csFmem ini_cs) (Cluster.csFdsk ini_cs) :: IO ()
161
  printf "Final:    mem=%d disk=%d\n"
162
             (Cluster.csFmem fin_cs) (Cluster.csFdsk fin_cs)
163

    
164
-- | Saves the rebalance commands to a text file.
165
saveBalanceCommands :: Options -> String -> IO ()
166
saveBalanceCommands opts cmd_data = do
167
  let out_path = fromJust $ optShowCmds opts
168
  putStrLn ""
169
  if out_path == "-"
170
    then printf "Commands to run to reach the above solution:\n%s"
171
           (unlines . map ("  " ++) .
172
            filter (/= "  check") .
173
            lines $ cmd_data)
174
    else do
175
      writeFile out_path (shTemplate ++ cmd_data)
176
      printf "The commands have been written to file '%s'\n" out_path
177

    
178
-- | Polls a set of jobs at a fixed interval until all are finished
179
-- one way or another.
180
waitForJobs :: L.Client -> [L.JobId] -> IO (Result [JobStatus])
181
waitForJobs client jids = do
182
  sts <- L.queryJobsStatus client jids
183
  case sts of
184
    Bad e -> return . Bad $ "Checking job status: " ++ formatError e
185
    Ok s -> if any (<= JOB_STATUS_RUNNING) s
186
            then do
187
              -- TODO: replace hardcoded value with a better thing
188
              threadDelay (1000000 * 15)
189
              waitForJobs client jids
190
            else return $ Ok s
191

    
192
-- | Check that a set of job statuses is all success.
193
checkJobsStatus :: [JobStatus] -> Bool
194
checkJobsStatus = all (== JOB_STATUS_SUCCESS)
195

    
196
-- | Wrapper over execJobSet checking for early termination.
197
execWrapper :: String -> Node.List
198
            -> Instance.List -> IORef Int -> [JobSet] -> IO Bool
199
execWrapper _      _  _  _    [] = return True
200
execWrapper master nl il cref alljss = do
201
  cancel <- readIORef cref
202
  if cancel > 0
203
    then do
204
      hPrintf stderr "Exiting early due to user request, %d\
205
                     \ jobset(s) remaining." (length alljss)::IO ()
206
      return True
207
    else execJobSet master nl il cref alljss
208

    
209
-- | Execute an entire jobset.
210
execJobSet :: String -> Node.List
211
           -> Instance.List -> IORef Int -> [JobSet] -> IO Bool
212
execJobSet _      _  _  _    [] = return True
213
execJobSet master nl il cref (js:jss) = do
214
  -- map from jobset (htools list of positions) to [[opcodes]]
215
  let jobs = map (\(_, idx, move, _) ->
216
                      map annotateOpCode $
217
                      Cluster.iMoveToJob nl il idx move) js
218
  let descr = map (\(_, idx, _, _) -> Container.nameOf il idx) js
219
  putStrLn $ "Executing jobset for instances " ++ commaJoin descr
220
  jrs <- bracket (L.getClient master) L.closeClient
221
         (\client -> do
222
            jids <- L.submitManyJobs client jobs
223
            case jids of
224
              Bad e -> return . Bad $ "Job submission error: " ++ formatError e
225
              Ok x -> do
226
                putStrLn $ "Got job IDs " ++
227
                           commaJoin (map (show . fromJobId) x)
228
                waitForJobs client x
229
         )
230
  case jrs of
231
    Bad x -> do
232
      hPutStrLn stderr x
233
      return False
234
    Ok x -> if checkJobsStatus x
235
              then execWrapper master nl il cref jss
236
              else do
237
                hPutStrLn stderr $ "Not all jobs completed successfully: " ++
238
                          show x
239
                hPutStrLn stderr "Aborting."
240
                return False
241

    
242
-- | Executes the jobs, if possible and desired.
243
maybeExecJobs :: Options
244
              -> [a]
245
              -> Node.List
246
              -> Instance.List
247
              -> [JobSet]
248
              -> IO Bool
249
maybeExecJobs opts ord_plc fin_nl il cmd_jobs =
250
  if optExecJobs opts && not (null ord_plc)
251
    then (case optLuxi opts of
252
            Nothing -> do
253
              hPutStrLn stderr "Execution of commands possible only on LUXI"
254
              return False
255
            Just master -> runJobSet master fin_nl il cmd_jobs)
256
    else return True
257

    
258
-- | Signal handler for graceful termination.
259
hangleSigInt :: IORef Int -> IO ()
260
hangleSigInt cref = do
261
  writeIORef cref 1
262
  putStrLn ("Cancel request registered, will exit at" ++
263
            " the end of the current job set...")
264

    
265
-- | Signal handler for immediate termination.
266
hangleSigTerm :: IORef Int -> IO ()
267
hangleSigTerm cref = do
268
  -- update the cref to 2, just for consistency
269
  writeIORef cref 2
270
  putStrLn "Double cancel request, exiting now..."
271
  exitImmediately $ ExitFailure 2
272

    
273
-- | Runs a job set with handling of signals.
274
runJobSet :: String -> Node.List -> Instance.List -> [JobSet] -> IO Bool
275
runJobSet master fin_nl il cmd_jobs = do
276
  cref <- newIORef 0
277
  mapM_ (\(hnd, sig) -> installHandler sig (Catch (hnd cref)) Nothing)
278
    [(hangleSigTerm, softwareTermination), (hangleSigInt, keyboardSignal)]
279
  execWrapper master fin_nl il cref cmd_jobs
280

    
281
-- | Select the target node group.
282
selectGroup :: Options -> Group.List -> Node.List -> Instance.List
283
            -> IO (String, (Node.List, Instance.List))
284
selectGroup opts gl nlf ilf = do
285
  let ngroups = Cluster.splitCluster nlf ilf
286
  when (length ngroups > 1 && isNothing (optGroup opts)) $ do
287
    hPutStrLn stderr "Found multiple node groups:"
288
    mapM_ (hPutStrLn stderr . ("  " ++) . Group.name .
289
           flip Container.find gl . fst) ngroups
290
    exitErr "Aborting."
291

    
292
  case optGroup opts of
293
    Nothing -> do
294
      let (gidx, cdata) = head ngroups
295
          grp = Container.find gidx gl
296
      return (Group.name grp, cdata)
297
    Just g -> case Container.findByName gl g of
298
      Nothing -> do
299
        hPutStrLn stderr $ "Node group " ++ g ++
300
          " not found. Node group list is:"
301
        mapM_ (hPutStrLn stderr . ("  " ++) . Group.name ) (Container.elems gl)
302
        exitErr "Aborting."
303
      Just grp ->
304
          case lookup (Group.idx grp) ngroups of
305
            Nothing ->
306
              -- This will only happen if there are no nodes assigned
307
              -- to this group
308
              return (Group.name grp, (Container.empty, Container.empty))
309
            Just cdata -> return (Group.name grp, cdata)
310

    
311
-- | Do a few checks on the cluster data.
312
checkCluster :: Int -> Node.List -> Instance.List -> IO ()
313
checkCluster verbose nl il = do
314
  -- nothing to do on an empty cluster
315
  when (Container.null il) $ do
316
         printf "Cluster is empty, exiting.\n"::IO ()
317
         exitSuccess
318

    
319
  -- hbal doesn't currently handle split clusters
320
  let split_insts = Cluster.findSplitInstances nl il
321
  unless (null split_insts || verbose <= 1) $ do
322
    hPutStrLn stderr "Found instances belonging to multiple node groups:"
323
    mapM_ (\i -> hPutStrLn stderr $ "  " ++ Instance.name i) split_insts
324
    hPutStrLn stderr "These instances will not be moved."
325

    
326
  printf "Loaded %d nodes, %d instances\n"
327
             (Container.size nl)
328
             (Container.size il)::IO ()
329

    
330
  let csf = commonSuffix nl il
331
  when (not (null csf) && verbose > 1) $
332
       printf "Note: Stripping common suffix of '%s' from names\n" csf
333

    
334
-- | Do a few checks on the selected group data.
335
checkGroup :: Int -> String -> Node.List -> Instance.List -> IO ()
336
checkGroup verbose gname nl il = do
337
  printf "Group size %d nodes, %d instances\n"
338
             (Container.size nl)
339
             (Container.size il)::IO ()
340

    
341
  putStrLn $ "Selected node group: " ++ gname
342

    
343
  let (bad_nodes, bad_instances) = Cluster.computeBadItems nl il
344
  unless (verbose == 0) $ printf
345
             "Initial check done: %d bad nodes, %d bad instances.\n"
346
             (length bad_nodes) (length bad_instances)
347

    
348
  unless (null bad_nodes) $
349
         putStrLn "Cluster is not N+1 happy, continuing but no guarantee \
350
                  \that the cluster will end N+1 happy."
351

    
352
-- | Check that we actually need to rebalance.
353
checkNeedRebalance :: Options -> Score -> IO ()
354
checkNeedRebalance opts ini_cv = do
355
  let min_cv = optMinScore opts
356
  when (ini_cv < min_cv) $ do
357
         printf "Cluster is already well balanced (initial score %.6g,\n\
358
                \minimum score %.6g).\nNothing to do, exiting\n"
359
                ini_cv min_cv:: IO ()
360
         exitSuccess
361

    
362
-- | Main function.
363
main :: Options -> [String] -> IO ()
364
main opts args = do
365
  unless (null args) $ exitErr "This program doesn't take any arguments."
366

    
367
  let verbose = optVerbose opts
368
      shownodes = optShowNodes opts
369
      showinsts = optShowInsts opts
370

    
371
  ini_cdata@(ClusterData gl fixed_nl ilf ctags ipol) <- loadExternalData opts
372

    
373
  when (verbose > 1) $ do
374
       putStrLn $ "Loaded cluster tags: " ++ intercalate "," ctags
375
       putStrLn $ "Loaded cluster ipolicy: " ++ show ipol
376

    
377
  nlf <- setNodeStatus opts fixed_nl
378
  checkCluster verbose nlf ilf
379

    
380
  maybeSaveData (optSaveCluster opts) "original" "before balancing" ini_cdata
381

    
382
  (gname, (nl, il)) <- selectGroup opts gl nlf ilf
383

    
384
  checkGroup verbose gname nl il
385

    
386
  maybePrintInsts showinsts "Initial" (Cluster.printInsts nl il)
387

    
388
  maybePrintNodes shownodes "Initial cluster" (Cluster.printNodes nl)
389

    
390
  let ini_cv = Cluster.compCV nl
391
      ini_tbl = Cluster.Table nl il ini_cv []
392
      min_cv = optMinScore opts
393

    
394
  checkNeedRebalance opts ini_cv
395

    
396
  if verbose > 2
397
    then printf "Initial coefficients: overall %.8f\n%s"
398
           ini_cv (Cluster.printStats "  " nl)::IO ()
399
    else printf "Initial score: %.8f\n" ini_cv
400

    
401
  putStrLn "Trying to minimize the CV..."
402
  let imlen = maximum . map (length . Instance.alias) $ Container.elems il
403
      nmlen = maximum . map (length . Node.alias) $ Container.elems nl
404

    
405
  (fin_tbl, cmd_strs) <- iterateDepth True ini_tbl (optMaxLength opts)
406
                         (optDiskMoves opts)
407
                         (optInstMoves opts)
408
                         nmlen imlen [] min_cv
409
                         (optMinGainLim opts) (optMinGain opts)
410
                         (optEvacMode opts)
411
  let (Cluster.Table fin_nl fin_il fin_cv fin_plc) = fin_tbl
412
      ord_plc = reverse fin_plc
413
      sol_msg = case () of
414
                  _ | null fin_plc -> printf "No solution found\n"
415
                    | verbose > 2 ->
416
                        printf "Final coefficients:   overall %.8f\n%s"
417
                        fin_cv (Cluster.printStats "  " fin_nl)
418
                    | otherwise ->
419
                        printf "Cluster score improved from %.8f to %.8f\n"
420
                        ini_cv fin_cv ::String
421

    
422
  putStr sol_msg
423

    
424
  unless (verbose == 0) $
425
         printf "Solution length=%d\n" (length ord_plc)
426

    
427
  let cmd_jobs = Cluster.splitJobs cmd_strs
428

    
429
  when (isJust $ optShowCmds opts) .
430
       saveBalanceCommands opts $ Cluster.formatCmds cmd_jobs
431

    
432
  maybeSaveData (optSaveCluster opts) "balanced" "after balancing"
433
                ini_cdata { cdNodes = fin_nl, cdInstances = fin_il }
434

    
435
  maybePrintInsts showinsts "Final" (Cluster.printInsts fin_nl fin_il)
436

    
437
  maybePrintNodes shownodes "Final cluster" (Cluster.printNodes fin_nl)
438

    
439
  when (verbose > 3) $ printStats nl fin_nl
440

    
441
  eval <- maybeExecJobs opts ord_plc fin_nl il cmd_jobs
442
  unless eval (exitWith (ExitFailure 1))