Statistics
| Branch: | Tag: | Revision:

root / src / Ganeti / HTools / Program / Hbal.hs @ c62bec27

History | View | Annotate | Download (15 kB)

1
{-| Cluster rebalancer.
2

    
3
-}
4

    
5
{-
6

    
7
Copyright (C) 2009, 2010, 2011, 2012, 2013 Google Inc.
8

    
9
This program is free software; you can redistribute it and/or modify
10
it under the terms of the GNU General Public License as published by
11
the Free Software Foundation; either version 2 of the License, or
12
(at your option) any later version.
13

    
14
This program is distributed in the hope that it will be useful, but
15
WITHOUT ANY WARRANTY; without even the implied warranty of
16
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17
General Public License for more details.
18

    
19
You should have received a copy of the GNU General Public License
20
along with this program; if not, write to the Free Software
21
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22
02110-1301, USA.
23

    
24
-}
25

    
26
module Ganeti.HTools.Program.Hbal
27
  ( main
28
  , options
29
  , arguments
30
  , iterateDepth
31
  ) where
32

    
33
import Control.Exception (bracket)
34
import Control.Monad
35
import Data.List
36
import Data.Maybe (isJust, isNothing, fromJust)
37
import Data.IORef
38
import System.Exit
39
import System.IO
40
import System.Posix.Process
41
import System.Posix.Signals
42

    
43
import Text.Printf (printf)
44

    
45
import qualified Ganeti.HTools.Container as Container
46
import qualified Ganeti.HTools.Cluster as Cluster
47
import qualified Ganeti.HTools.Group as Group
48
import qualified Ganeti.HTools.Node as Node
49
import qualified Ganeti.HTools.Instance as Instance
50

    
51
import Ganeti.BasicTypes
52
import Ganeti.Common
53
import Ganeti.HTools.CLI
54
import Ganeti.HTools.ExtLoader
55
import Ganeti.HTools.Types
56
import Ganeti.HTools.Loader
57
import Ganeti.OpCodes (wrapOpCode, setOpComment, setOpPriority,
58
                       OpCode, MetaOpCode)
59
import Ganeti.Jobs as Jobs
60
import Ganeti.Types
61
import Ganeti.Utils
62

    
63
import qualified Ganeti.Luxi as L
64
import Ganeti.Version (version)
65

    
66
-- | Options list and functions.
67
options :: IO [OptType]
68
options = do
69
  luxi <- oLuxiSocket
70
  return
71
    [ oPrintNodes
72
    , oPrintInsts
73
    , oPrintCommands
74
    , oDataFile
75
    , oEvacMode
76
    , oRapiMaster
77
    , luxi
78
    , oIAllocSrc
79
    , oExecJobs
80
    , oGroup
81
    , oMaxSolLength
82
    , oVerbose
83
    , oQuiet
84
    , oOfflineNode
85
    , oMinScore
86
    , oMaxCpu
87
    , oMinDisk
88
    , oMinGain
89
    , oMinGainLim
90
    , oDiskMoves
91
    , oSelInst
92
    , oInstMoves
93
    , oDynuFile
94
    , oIgnoreDyn 
95
    , oMonD
96
    , oMonDDataFile
97
    , oExTags
98
    , oExInst
99
    , oSaveCluster
100
    , oPriority
101
    ]
102

    
103
-- | The list of arguments supported by the program.
104
arguments :: [ArgCompletion]
105
arguments = []
106

    
107
-- | A simple type alias for clearer signature.
108
type Annotator = OpCode -> MetaOpCode
109

    
110
-- | Wraps an 'OpCode' in a 'MetaOpCode' while also adding a comment
111
-- about what generated the opcode.
112
annotateOpCode :: Annotator
113
annotateOpCode =
114
  setOpComment ("rebalancing via hbal " ++ version) . wrapOpCode
115

    
116
{- | Start computing the solution at the given depth and recurse until
117
we find a valid solution or we exceed the maximum depth.
118

    
119
-}
120
iterateDepth :: Bool             -- ^ Whether to print moves
121
             -> Cluster.Table    -- ^ The starting table
122
             -> Int              -- ^ Remaining length
123
             -> Bool             -- ^ Allow disk moves
124
             -> Bool             -- ^ Allow instance moves
125
             -> Int              -- ^ Max node name len
126
             -> Int              -- ^ Max instance name len
127
             -> [MoveJob]        -- ^ Current command list
128
             -> Score            -- ^ Score at which to stop
129
             -> Score            -- ^ Min gain limit
130
             -> Score            -- ^ Min score gain
131
             -> Bool             -- ^ Enable evacuation mode
132
             -> IO (Cluster.Table, [MoveJob]) -- ^ The resulting table
133
                                              -- and commands
134
iterateDepth printmove ini_tbl max_rounds disk_moves inst_moves nmlen imlen
135
             cmd_strs min_score mg_limit min_gain evac_mode =
136
  let Cluster.Table ini_nl ini_il _ _ = ini_tbl
137
      allowed_next = Cluster.doNextBalance ini_tbl max_rounds min_score
138
      m_fin_tbl = if allowed_next
139
                    then Cluster.tryBalance ini_tbl disk_moves inst_moves
140
                         evac_mode mg_limit min_gain
141
                    else Nothing
142
  in case m_fin_tbl of
143
       Just fin_tbl ->
144
         do
145
           let (Cluster.Table _ _ _ fin_plc) = fin_tbl
146
           cur_plc@(idx, _, _, move, _) <-
147
             exitIfEmpty "Empty placement list returned for solution?!" fin_plc
148
           let fin_plc_len = length fin_plc
149
               (sol_line, cmds) = Cluster.printSolutionLine ini_nl ini_il
150
                                  nmlen imlen cur_plc fin_plc_len
151
               afn = Cluster.involvedNodes ini_il cur_plc
152
               upd_cmd_strs = (afn, idx, move, cmds):cmd_strs
153
           when printmove $ do
154
               putStrLn sol_line
155
               hFlush stdout
156
           iterateDepth printmove fin_tbl max_rounds disk_moves inst_moves
157
                        nmlen imlen upd_cmd_strs min_score
158
                        mg_limit min_gain evac_mode
159
       Nothing -> return (ini_tbl, cmd_strs)
160

    
161
-- | Displays the cluster stats.
162
printStats :: Node.List -> Node.List -> IO ()
163
printStats ini_nl fin_nl = do
164
  let ini_cs = Cluster.totalResources ini_nl
165
      fin_cs = Cluster.totalResources fin_nl
166
  printf "Original: mem=%d disk=%d\n"
167
             (Cluster.csFmem ini_cs) (Cluster.csFdsk ini_cs) :: IO ()
168
  printf "Final:    mem=%d disk=%d\n"
169
             (Cluster.csFmem fin_cs) (Cluster.csFdsk fin_cs)
170

    
171
-- | Saves the rebalance commands to a text file.
172
saveBalanceCommands :: Options -> String -> IO ()
173
saveBalanceCommands opts cmd_data = do
174
  let out_path = fromJust $ optShowCmds opts
175
  putStrLn ""
176
  if out_path == "-"
177
    then printf "Commands to run to reach the above solution:\n%s"
178
           (unlines . map ("  " ++) .
179
            filter (/= "  check") .
180
            lines $ cmd_data)
181
    else do
182
      writeFile out_path (shTemplate ++ cmd_data)
183
      printf "The commands have been written to file '%s'\n" out_path
184

    
185
-- | Wrapper over execJobSet checking for early termination via an IORef.
186
execCancelWrapper :: Annotator -> String -> Node.List
187
                  -> Instance.List -> IORef Int -> [JobSet] -> IO (Result ())
188
execCancelWrapper _    _      _  _  _    [] = return $ Ok ()
189
execCancelWrapper anno master nl il cref alljss = do
190
  cancel <- readIORef cref
191
  if cancel > 0
192
    then do
193
      putStrLn $ "Exiting early due to user request, " ++
194
               show (length alljss) ++ " jobset(s) remaining."
195
      return $ Ok ()
196
    else execJobSet anno master nl il cref alljss
197

    
198
-- | Execute an entire jobset.
199
execJobSet :: Annotator -> String -> Node.List
200
           -> Instance.List -> IORef Int -> [JobSet] -> IO (Result ())
201
execJobSet _    _      _  _  _    [] = return $ Ok ()
202
execJobSet anno master nl il cref (js:jss) = do
203
  -- map from jobset (htools list of positions) to [[opcodes]]
204
  let jobs = map (\(_, idx, move, _) ->
205
                    map anno $ Cluster.iMoveToJob nl il idx move) js
206
      descr = map (\(_, idx, _, _) -> Container.nameOf il idx) js
207
      logfn = putStrLn . ("Got job IDs" ++) . commaJoin . map (show . fromJobId)
208
  putStrLn $ "Executing jobset for instances " ++ commaJoin descr
209
  jrs <- bracket (L.getClient master) L.closeClient $
210
         Jobs.execJobsWait jobs logfn
211
  case jrs of
212
    Bad x -> return $ Bad x
213
    Ok x -> if null failures
214
              then execCancelWrapper anno master nl il cref jss
215
              else return . Bad . unlines $ [
216
                "Not all jobs completed successfully: " ++ show failures,
217
                "Aborting."]
218
      where
219
        failures = filter ((/= JOB_STATUS_SUCCESS) . snd) x
220

    
221
-- | Executes the jobs, if possible and desired.
222
maybeExecJobs :: Options
223
              -> [a]
224
              -> Node.List
225
              -> Instance.List
226
              -> [JobSet]
227
              -> IO (Result ())
228
maybeExecJobs opts ord_plc fin_nl il cmd_jobs =
229
  if optExecJobs opts && not (null ord_plc)
230
    then (case optLuxi opts of
231
            Nothing ->
232
              return $ Bad "Execution of commands possible only on LUXI"
233
            Just master ->
234
              let annotator = maybe id setOpPriority (optPriority opts) .
235
                              annotateOpCode
236
              in execWithCancel annotator master fin_nl il cmd_jobs)
237
    else return $ Ok ()
238

    
239
-- | Signal handler for graceful termination.
240
handleSigInt :: IORef Int -> IO ()
241
handleSigInt cref = do
242
  writeIORef cref 1
243
  putStrLn ("Cancel request registered, will exit at" ++
244
            " the end of the current job set...")
245

    
246
-- | Signal handler for immediate termination.
247
handleSigTerm :: IORef Int -> IO ()
248
handleSigTerm cref = do
249
  -- update the cref to 2, just for consistency
250
  writeIORef cref 2
251
  putStrLn "Double cancel request, exiting now..."
252
  exitImmediately $ ExitFailure 2
253

    
254
-- | Prepares to run a set of jobsets with handling of signals and early
255
-- termination.
256
execWithCancel :: Annotator -> String -> Node.List -> Instance.List -> [JobSet]
257
               -> IO (Result ())
258
execWithCancel anno master fin_nl il cmd_jobs = do
259
  cref <- newIORef 0
260
  mapM_ (\(hnd, sig) -> installHandler sig (Catch (hnd cref)) Nothing)
261
    [(handleSigTerm, softwareTermination), (handleSigInt, keyboardSignal)]
262
  execCancelWrapper anno master fin_nl il cref cmd_jobs
263

    
264
-- | Select the target node group.
265
selectGroup :: Options -> Group.List -> Node.List -> Instance.List
266
            -> IO (String, (Node.List, Instance.List))
267
selectGroup opts gl nlf ilf = do
268
  let ngroups = Cluster.splitCluster nlf ilf
269
  when (length ngroups > 1 && isNothing (optGroup opts)) $ do
270
    hPutStrLn stderr "Found multiple node groups:"
271
    mapM_ (hPutStrLn stderr . ("  " ++) . Group.name .
272
           flip Container.find gl . fst) ngroups
273
    exitErr "Aborting."
274

    
275
  case optGroup opts of
276
    Nothing -> do
277
      (gidx, cdata) <- exitIfEmpty "No groups found by splitCluster?!" ngroups
278
      let grp = Container.find gidx gl
279
      return (Group.name grp, cdata)
280
    Just g -> case Container.findByName gl g of
281
      Nothing -> do
282
        hPutStrLn stderr $ "Node group " ++ g ++
283
          " not found. Node group list is:"
284
        mapM_ (hPutStrLn stderr . ("  " ++) . Group.name ) (Container.elems gl)
285
        exitErr "Aborting."
286
      Just grp ->
287
          case lookup (Group.idx grp) ngroups of
288
            Nothing ->
289
              -- This will only happen if there are no nodes assigned
290
              -- to this group
291
              return (Group.name grp, (Container.empty, Container.empty))
292
            Just cdata -> return (Group.name grp, cdata)
293

    
294
-- | Do a few checks on the cluster data.
295
checkCluster :: Int -> Node.List -> Instance.List -> IO ()
296
checkCluster verbose nl il = do
297
  -- nothing to do on an empty cluster
298
  when (Container.null il) $ do
299
         printf "Cluster is empty, exiting.\n"::IO ()
300
         exitSuccess
301

    
302
  -- hbal doesn't currently handle split clusters
303
  let split_insts = Cluster.findSplitInstances nl il
304
  unless (null split_insts || verbose <= 1) $ do
305
    hPutStrLn stderr "Found instances belonging to multiple node groups:"
306
    mapM_ (\i -> hPutStrLn stderr $ "  " ++ Instance.name i) split_insts
307
    hPutStrLn stderr "These instances will not be moved."
308

    
309
  printf "Loaded %d nodes, %d instances\n"
310
             (Container.size nl)
311
             (Container.size il)::IO ()
312

    
313
  let csf = commonSuffix nl il
314
  when (not (null csf) && verbose > 1) $
315
       printf "Note: Stripping common suffix of '%s' from names\n" csf
316

    
317
-- | Do a few checks on the selected group data.
318
checkGroup :: Int -> String -> Node.List -> Instance.List -> IO ()
319
checkGroup verbose gname nl il = do
320
  printf "Group size %d nodes, %d instances\n"
321
             (Container.size nl)
322
             (Container.size il)::IO ()
323

    
324
  putStrLn $ "Selected node group: " ++ gname
325

    
326
  let (bad_nodes, bad_instances) = Cluster.computeBadItems nl il
327
  unless (verbose < 1) $ printf
328
             "Initial check done: %d bad nodes, %d bad instances.\n"
329
             (length bad_nodes) (length bad_instances)
330

    
331
  unless (null bad_nodes) $
332
         putStrLn "Cluster is not N+1 happy, continuing but no guarantee \
333
                  \that the cluster will end N+1 happy."
334

    
335
-- | Check that we actually need to rebalance.
336
checkNeedRebalance :: Options -> Score -> IO ()
337
checkNeedRebalance opts ini_cv = do
338
  let min_cv = optMinScore opts
339
  when (ini_cv < min_cv) $ do
340
         printf "Cluster is already well balanced (initial score %.6g,\n\
341
                \minimum score %.6g).\nNothing to do, exiting\n"
342
                ini_cv min_cv:: IO ()
343
         exitSuccess
344

    
345
-- | Main function.
346
main :: Options -> [String] -> IO ()
347
main opts args = do
348
  unless (null args) $ exitErr "This program doesn't take any arguments."
349

    
350
  let verbose = optVerbose opts
351
      shownodes = optShowNodes opts
352
      showinsts = optShowInsts opts
353

    
354
  ini_cdata@(ClusterData gl fixed_nl ilf ctags ipol) <- loadExternalData opts
355

    
356
  when (verbose > 1) $ do
357
       putStrLn $ "Loaded cluster tags: " ++ intercalate "," ctags
358
       putStrLn $ "Loaded cluster ipolicy: " ++ show ipol
359

    
360
  nlf <- setNodeStatus opts fixed_nl
361
  checkCluster verbose nlf ilf
362

    
363
  maybeSaveData (optSaveCluster opts) "original" "before balancing" ini_cdata
364

    
365
  (gname, (nl, il)) <- selectGroup opts gl nlf ilf
366

    
367
  checkGroup verbose gname nl il
368

    
369
  maybePrintInsts showinsts "Initial" (Cluster.printInsts nl il)
370

    
371
  maybePrintNodes shownodes "Initial cluster" (Cluster.printNodes nl)
372

    
373
  let ini_cv = Cluster.compCV nl
374
      ini_tbl = Cluster.Table nl il ini_cv []
375
      min_cv = optMinScore opts
376

    
377
  checkNeedRebalance opts ini_cv
378

    
379
  if verbose > 2
380
    then printf "Initial coefficients: overall %.8f\n%s"
381
           ini_cv (Cluster.printStats "  " nl)::IO ()
382
    else printf "Initial score: %.8f\n" ini_cv
383

    
384
  putStrLn "Trying to minimize the CV..."
385
  let imlen = maximum . map (length . Instance.alias) $ Container.elems il
386
      nmlen = maximum . map (length . Node.alias) $ Container.elems nl
387

    
388
  (fin_tbl, cmd_strs) <- iterateDepth True ini_tbl (optMaxLength opts)
389
                         (optDiskMoves opts)
390
                         (optInstMoves opts)
391
                         nmlen imlen [] min_cv
392
                         (optMinGainLim opts) (optMinGain opts)
393
                         (optEvacMode opts)
394
  let (Cluster.Table fin_nl fin_il fin_cv fin_plc) = fin_tbl
395
      ord_plc = reverse fin_plc
396
      sol_msg = case () of
397
                  _ | null fin_plc -> printf "No solution found\n"
398
                    | verbose > 2 ->
399
                        printf "Final coefficients:   overall %.8f\n%s"
400
                        fin_cv (Cluster.printStats "  " fin_nl)
401
                    | otherwise ->
402
                        printf "Cluster score improved from %.8f to %.8f\n"
403
                        ini_cv fin_cv ::String
404

    
405
  putStr sol_msg
406

    
407
  unless (verbose < 1) $
408
         printf "Solution length=%d\n" (length ord_plc)
409

    
410
  let cmd_jobs = Cluster.splitJobs cmd_strs
411

    
412
  when (isJust $ optShowCmds opts) .
413
       saveBalanceCommands opts $ Cluster.formatCmds cmd_jobs
414

    
415
  maybeSaveData (optSaveCluster opts) "balanced" "after balancing"
416
                ini_cdata { cdNodes = fin_nl, cdInstances = fin_il }
417

    
418
  maybePrintInsts showinsts "Final" (Cluster.printInsts fin_nl fin_il)
419

    
420
  maybePrintNodes shownodes "Final cluster" (Cluster.printNodes fin_nl)
421

    
422
  when (verbose > 3) $ printStats nl fin_nl
423

    
424
  exitIfBad "hbal" =<< maybeExecJobs opts ord_plc fin_nl il cmd_jobs