Statistics
| Branch: | Tag: | Revision:

root / src / Ganeti / HTools / Program / Hbal.hs @ a81ca843

History | View | Annotate | Download (15 kB)

1
{-| Cluster rebalancer.
2

    
3
-}
4

    
5
{-
6

    
7
Copyright (C) 2009, 2010, 2011, 2012, 2013 Google Inc.
8

    
9
This program is free software; you can redistribute it and/or modify
10
it under the terms of the GNU General Public License as published by
11
the Free Software Foundation; either version 2 of the License, or
12
(at your option) any later version.
13

    
14
This program is distributed in the hope that it will be useful, but
15
WITHOUT ANY WARRANTY; without even the implied warranty of
16
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17
General Public License for more details.
18

    
19
You should have received a copy of the GNU General Public License
20
along with this program; if not, write to the Free Software
21
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22
02110-1301, USA.
23

    
24
-}
25

    
26
module Ganeti.HTools.Program.Hbal
27
  ( main
28
  , options
29
  , arguments
30
  , iterateDepth
31
  ) where
32

    
33
import Control.Exception (bracket)
34
import Control.Monad
35
import Data.List
36
import Data.Maybe (isJust, isNothing, fromJust)
37
import Data.IORef
38
import System.Exit
39
import System.IO
40
import System.Posix.Process
41
import System.Posix.Signals
42

    
43
import Text.Printf (printf)
44

    
45
import qualified Ganeti.HTools.Container as Container
46
import qualified Ganeti.HTools.Cluster as Cluster
47
import qualified Ganeti.HTools.Group as Group
48
import qualified Ganeti.HTools.Node as Node
49
import qualified Ganeti.HTools.Instance as Instance
50

    
51
import Ganeti.BasicTypes
52
import Ganeti.Common
53
import Ganeti.HTools.CLI
54
import Ganeti.HTools.ExtLoader
55
import Ganeti.HTools.Types
56
import Ganeti.HTools.Loader
57
import Ganeti.OpCodes (wrapOpCode, setOpComment, setOpPriority,
58
                       OpCode, MetaOpCode)
59
import Ganeti.Jobs as Jobs
60
import Ganeti.Types
61
import Ganeti.Utils
62

    
63
import qualified Ganeti.Luxi as L
64
import Ganeti.Version (version)
65

    
66
-- | Options list and functions.
67
options :: IO [OptType]
68
options = do
69
  luxi <- oLuxiSocket
70
  return
71
    [ oPrintNodes
72
    , oPrintInsts
73
    , oPrintCommands
74
    , oDataFile
75
    , oEvacMode
76
    , oRapiMaster
77
    , luxi
78
    , oIAllocSrc
79
    , oExecJobs
80
    , oGroup
81
    , oMaxSolLength
82
    , oVerbose
83
    , oQuiet
84
    , oOfflineNode
85
    , oMinScore
86
    , oMaxCpu
87
    , oMinDisk
88
    , oMinGain
89
    , oMinGainLim
90
    , oDiskMoves
91
    , oSelInst
92
    , oInstMoves
93
    , oDynuFile
94
    , oExTags
95
    , oExInst
96
    , oSaveCluster
97
    , oPriority
98
    ]
99

    
100
-- | The list of arguments supported by the program.
101
arguments :: [ArgCompletion]
102
arguments = []
103

    
104
-- | A simple type alias for clearer signature.
105
type Annotator = OpCode -> MetaOpCode
106

    
107
-- | Wraps an 'OpCode' in a 'MetaOpCode' while also adding a comment
108
-- about what generated the opcode.
109
annotateOpCode :: Annotator
110
annotateOpCode =
111
  setOpComment ("rebalancing via hbal " ++ version) . wrapOpCode
112

    
113
{- | Start computing the solution at the given depth and recurse until
114
we find a valid solution or we exceed the maximum depth.
115

    
116
-}
117
iterateDepth :: Bool             -- ^ Whether to print moves
118
             -> Cluster.Table    -- ^ The starting table
119
             -> Int              -- ^ Remaining length
120
             -> Bool             -- ^ Allow disk moves
121
             -> Bool             -- ^ Allow instance moves
122
             -> Int              -- ^ Max node name len
123
             -> Int              -- ^ Max instance name len
124
             -> [MoveJob]        -- ^ Current command list
125
             -> Score            -- ^ Score at which to stop
126
             -> Score            -- ^ Min gain limit
127
             -> Score            -- ^ Min score gain
128
             -> Bool             -- ^ Enable evacuation mode
129
             -> IO (Cluster.Table, [MoveJob]) -- ^ The resulting table
130
                                              -- and commands
131
iterateDepth printmove ini_tbl max_rounds disk_moves inst_moves nmlen imlen
132
             cmd_strs min_score mg_limit min_gain evac_mode =
133
  let Cluster.Table ini_nl ini_il _ _ = ini_tbl
134
      allowed_next = Cluster.doNextBalance ini_tbl max_rounds min_score
135
      m_fin_tbl = if allowed_next
136
                    then Cluster.tryBalance ini_tbl disk_moves inst_moves
137
                         evac_mode mg_limit min_gain
138
                    else Nothing
139
  in case m_fin_tbl of
140
       Just fin_tbl ->
141
         do
142
           let (Cluster.Table _ _ _ fin_plc) = fin_tbl
143
           cur_plc@(idx, _, _, move, _) <-
144
             exitIfEmpty "Empty placement list returned for solution?!" fin_plc
145
           let fin_plc_len = length fin_plc
146
               (sol_line, cmds) = Cluster.printSolutionLine ini_nl ini_il
147
                                  nmlen imlen cur_plc fin_plc_len
148
               afn = Cluster.involvedNodes ini_il cur_plc
149
               upd_cmd_strs = (afn, idx, move, cmds):cmd_strs
150
           when printmove $ do
151
               putStrLn sol_line
152
               hFlush stdout
153
           iterateDepth printmove fin_tbl max_rounds disk_moves inst_moves
154
                        nmlen imlen upd_cmd_strs min_score
155
                        mg_limit min_gain evac_mode
156
       Nothing -> return (ini_tbl, cmd_strs)
157

    
158
-- | Displays the cluster stats.
159
printStats :: Node.List -> Node.List -> IO ()
160
printStats ini_nl fin_nl = do
161
  let ini_cs = Cluster.totalResources ini_nl
162
      fin_cs = Cluster.totalResources fin_nl
163
  printf "Original: mem=%d disk=%d\n"
164
             (Cluster.csFmem ini_cs) (Cluster.csFdsk ini_cs) :: IO ()
165
  printf "Final:    mem=%d disk=%d\n"
166
             (Cluster.csFmem fin_cs) (Cluster.csFdsk fin_cs)
167

    
168
-- | Saves the rebalance commands to a text file.
169
saveBalanceCommands :: Options -> String -> IO ()
170
saveBalanceCommands opts cmd_data = do
171
  let out_path = fromJust $ optShowCmds opts
172
  putStrLn ""
173
  if out_path == "-"
174
    then printf "Commands to run to reach the above solution:\n%s"
175
           (unlines . map ("  " ++) .
176
            filter (/= "  check") .
177
            lines $ cmd_data)
178
    else do
179
      writeFile out_path (shTemplate ++ cmd_data)
180
      printf "The commands have been written to file '%s'\n" out_path
181

    
182
-- | Wrapper over execJobSet checking for early termination via an IORef.
183
execCancelWrapper :: Annotator -> String -> Node.List
184
                  -> Instance.List -> IORef Int -> [JobSet] -> IO (Result ())
185
execCancelWrapper _    _      _  _  _    [] = return $ Ok ()
186
execCancelWrapper anno master nl il cref alljss = do
187
  cancel <- readIORef cref
188
  if cancel > 0
189
    then return . Bad $ "Exiting early due to user request, " ++
190
                        show (length alljss) ++ " jobset(s) remaining."
191
    else execJobSet anno master nl il cref alljss
192

    
193
-- | Execute an entire jobset.
194
execJobSet :: Annotator -> String -> Node.List
195
           -> Instance.List -> IORef Int -> [JobSet] -> IO (Result ())
196
execJobSet _    _      _  _  _    [] = return $ Ok ()
197
execJobSet anno master nl il cref (js:jss) = do
198
  -- map from jobset (htools list of positions) to [[opcodes]]
199
  let jobs = map (\(_, idx, move, _) ->
200
                    map anno $ Cluster.iMoveToJob nl il idx move) js
201
      descr = map (\(_, idx, _, _) -> Container.nameOf il idx) js
202
      logfn = putStrLn . ("Got job IDs" ++) . commaJoin . map (show . fromJobId)
203
  putStrLn $ "Executing jobset for instances " ++ commaJoin descr
204
  jrs <- bracket (L.getClient master) L.closeClient $
205
         Jobs.execJobsWait jobs logfn
206
  case jrs of
207
    Bad x -> return $ Bad x
208
    Ok x -> if null failures
209
              then execCancelWrapper anno master nl il cref jss
210
              else return . Bad . unlines $ [
211
                "Not all jobs completed successfully: " ++ show failures,
212
                "Aborting."]
213
      where
214
        failures = filter ((/= JOB_STATUS_SUCCESS) . snd) x
215

    
216
-- | Executes the jobs, if possible and desired.
217
maybeExecJobs :: Options
218
              -> [a]
219
              -> Node.List
220
              -> Instance.List
221
              -> [JobSet]
222
              -> IO (Result ())
223
maybeExecJobs opts ord_plc fin_nl il cmd_jobs =
224
  if optExecJobs opts && not (null ord_plc)
225
    then (case optLuxi opts of
226
            Nothing ->
227
              return $ Bad "Execution of commands possible only on LUXI"
228
            Just master ->
229
              let annotator = maybe id setOpPriority (optPriority opts) .
230
                              annotateOpCode
231
              in execWithCancel annotator master fin_nl il cmd_jobs)
232
    else return $ Ok ()
233

    
234
-- | Signal handler for graceful termination.
235
handleSigInt :: IORef Int -> IO ()
236
handleSigInt cref = do
237
  writeIORef cref 1
238
  putStrLn ("Cancel request registered, will exit at" ++
239
            " the end of the current job set...")
240

    
241
-- | Signal handler for immediate termination.
242
handleSigTerm :: IORef Int -> IO ()
243
handleSigTerm cref = do
244
  -- update the cref to 2, just for consistency
245
  writeIORef cref 2
246
  putStrLn "Double cancel request, exiting now..."
247
  exitImmediately $ ExitFailure 2
248

    
249
-- | Prepares to run a set of jobsets with handling of signals and early
250
-- termination.
251
execWithCancel :: Annotator -> String -> Node.List -> Instance.List -> [JobSet]
252
               -> IO (Result ())
253
execWithCancel anno master fin_nl il cmd_jobs = do
254
  cref <- newIORef 0
255
  mapM_ (\(hnd, sig) -> installHandler sig (Catch (hnd cref)) Nothing)
256
    [(handleSigTerm, softwareTermination), (handleSigInt, keyboardSignal)]
257
  execCancelWrapper anno master fin_nl il cref cmd_jobs
258

    
259
-- | Select the target node group.
260
selectGroup :: Options -> Group.List -> Node.List -> Instance.List
261
            -> IO (String, (Node.List, Instance.List))
262
selectGroup opts gl nlf ilf = do
263
  let ngroups = Cluster.splitCluster nlf ilf
264
  when (length ngroups > 1 && isNothing (optGroup opts)) $ do
265
    hPutStrLn stderr "Found multiple node groups:"
266
    mapM_ (hPutStrLn stderr . ("  " ++) . Group.name .
267
           flip Container.find gl . fst) ngroups
268
    exitErr "Aborting."
269

    
270
  case optGroup opts of
271
    Nothing -> do
272
      (gidx, cdata) <- exitIfEmpty "No groups found by splitCluster?!" ngroups
273
      let grp = Container.find gidx gl
274
      return (Group.name grp, cdata)
275
    Just g -> case Container.findByName gl g of
276
      Nothing -> do
277
        hPutStrLn stderr $ "Node group " ++ g ++
278
          " not found. Node group list is:"
279
        mapM_ (hPutStrLn stderr . ("  " ++) . Group.name ) (Container.elems gl)
280
        exitErr "Aborting."
281
      Just grp ->
282
          case lookup (Group.idx grp) ngroups of
283
            Nothing ->
284
              -- This will only happen if there are no nodes assigned
285
              -- to this group
286
              return (Group.name grp, (Container.empty, Container.empty))
287
            Just cdata -> return (Group.name grp, cdata)
288

    
289
-- | Do a few checks on the cluster data.
290
checkCluster :: Int -> Node.List -> Instance.List -> IO ()
291
checkCluster verbose nl il = do
292
  -- nothing to do on an empty cluster
293
  when (Container.null il) $ do
294
         printf "Cluster is empty, exiting.\n"::IO ()
295
         exitSuccess
296

    
297
  -- hbal doesn't currently handle split clusters
298
  let split_insts = Cluster.findSplitInstances nl il
299
  unless (null split_insts || verbose <= 1) $ do
300
    hPutStrLn stderr "Found instances belonging to multiple node groups:"
301
    mapM_ (\i -> hPutStrLn stderr $ "  " ++ Instance.name i) split_insts
302
    hPutStrLn stderr "These instances will not be moved."
303

    
304
  printf "Loaded %d nodes, %d instances\n"
305
             (Container.size nl)
306
             (Container.size il)::IO ()
307

    
308
  let csf = commonSuffix nl il
309
  when (not (null csf) && verbose > 1) $
310
       printf "Note: Stripping common suffix of '%s' from names\n" csf
311

    
312
-- | Do a few checks on the selected group data.
313
checkGroup :: Int -> String -> Node.List -> Instance.List -> IO ()
314
checkGroup verbose gname nl il = do
315
  printf "Group size %d nodes, %d instances\n"
316
             (Container.size nl)
317
             (Container.size il)::IO ()
318

    
319
  putStrLn $ "Selected node group: " ++ gname
320

    
321
  let (bad_nodes, bad_instances) = Cluster.computeBadItems nl il
322
  unless (verbose == 0) $ printf
323
             "Initial check done: %d bad nodes, %d bad instances.\n"
324
             (length bad_nodes) (length bad_instances)
325

    
326
  unless (null bad_nodes) $
327
         putStrLn "Cluster is not N+1 happy, continuing but no guarantee \
328
                  \that the cluster will end N+1 happy."
329

    
330
-- | Check that we actually need to rebalance.
331
checkNeedRebalance :: Options -> Score -> IO ()
332
checkNeedRebalance opts ini_cv = do
333
  let min_cv = optMinScore opts
334
  when (ini_cv < min_cv) $ do
335
         printf "Cluster is already well balanced (initial score %.6g,\n\
336
                \minimum score %.6g).\nNothing to do, exiting\n"
337
                ini_cv min_cv:: IO ()
338
         exitSuccess
339

    
340
-- | Main function.
341
main :: Options -> [String] -> IO ()
342
main opts args = do
343
  unless (null args) $ exitErr "This program doesn't take any arguments."
344

    
345
  let verbose = optVerbose opts
346
      shownodes = optShowNodes opts
347
      showinsts = optShowInsts opts
348

    
349
  ini_cdata@(ClusterData gl fixed_nl ilf ctags ipol) <- loadExternalData opts
350

    
351
  when (verbose > 1) $ do
352
       putStrLn $ "Loaded cluster tags: " ++ intercalate "," ctags
353
       putStrLn $ "Loaded cluster ipolicy: " ++ show ipol
354

    
355
  nlf <- setNodeStatus opts fixed_nl
356
  checkCluster verbose nlf ilf
357

    
358
  maybeSaveData (optSaveCluster opts) "original" "before balancing" ini_cdata
359

    
360
  (gname, (nl, il)) <- selectGroup opts gl nlf ilf
361

    
362
  checkGroup verbose gname nl il
363

    
364
  maybePrintInsts showinsts "Initial" (Cluster.printInsts nl il)
365

    
366
  maybePrintNodes shownodes "Initial cluster" (Cluster.printNodes nl)
367

    
368
  let ini_cv = Cluster.compCV nl
369
      ini_tbl = Cluster.Table nl il ini_cv []
370
      min_cv = optMinScore opts
371

    
372
  checkNeedRebalance opts ini_cv
373

    
374
  if verbose > 2
375
    then printf "Initial coefficients: overall %.8f\n%s"
376
           ini_cv (Cluster.printStats "  " nl)::IO ()
377
    else printf "Initial score: %.8f\n" ini_cv
378

    
379
  putStrLn "Trying to minimize the CV..."
380
  let imlen = maximum . map (length . Instance.alias) $ Container.elems il
381
      nmlen = maximum . map (length . Node.alias) $ Container.elems nl
382

    
383
  (fin_tbl, cmd_strs) <- iterateDepth True ini_tbl (optMaxLength opts)
384
                         (optDiskMoves opts)
385
                         (optInstMoves opts)
386
                         nmlen imlen [] min_cv
387
                         (optMinGainLim opts) (optMinGain opts)
388
                         (optEvacMode opts)
389
  let (Cluster.Table fin_nl fin_il fin_cv fin_plc) = fin_tbl
390
      ord_plc = reverse fin_plc
391
      sol_msg = case () of
392
                  _ | null fin_plc -> printf "No solution found\n"
393
                    | verbose > 2 ->
394
                        printf "Final coefficients:   overall %.8f\n%s"
395
                        fin_cv (Cluster.printStats "  " fin_nl)
396
                    | otherwise ->
397
                        printf "Cluster score improved from %.8f to %.8f\n"
398
                        ini_cv fin_cv ::String
399

    
400
  putStr sol_msg
401

    
402
  unless (verbose == 0) $
403
         printf "Solution length=%d\n" (length ord_plc)
404

    
405
  let cmd_jobs = Cluster.splitJobs cmd_strs
406

    
407
  when (isJust $ optShowCmds opts) .
408
       saveBalanceCommands opts $ Cluster.formatCmds cmd_jobs
409

    
410
  maybeSaveData (optSaveCluster opts) "balanced" "after balancing"
411
                ini_cdata { cdNodes = fin_nl, cdInstances = fin_il }
412

    
413
  maybePrintInsts showinsts "Final" (Cluster.printInsts fin_nl fin_il)
414

    
415
  maybePrintNodes shownodes "Final cluster" (Cluster.printNodes fin_nl)
416

    
417
  when (verbose > 3) $ printStats nl fin_nl
418

    
419
  exitIfBad "hbal" =<< maybeExecJobs opts ord_plc fin_nl il cmd_jobs