Statistics
| Branch: | Tag: | Revision:

root / src / Ganeti / HTools / Program / Hbal.hs @ 7c3a6391

History | View | Annotate | Download (14.5 kB)

1
{-| Cluster rebalancer.
2

    
3
-}
4

    
5
{-
6

    
7
Copyright (C) 2009, 2010, 2011, 2012, 2013 Google Inc.
8

    
9
This program is free software; you can redistribute it and/or modify
10
it under the terms of the GNU General Public License as published by
11
the Free Software Foundation; either version 2 of the License, or
12
(at your option) any later version.
13

    
14
This program is distributed in the hope that it will be useful, but
15
WITHOUT ANY WARRANTY; without even the implied warranty of
16
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17
General Public License for more details.
18

    
19
You should have received a copy of the GNU General Public License
20
along with this program; if not, write to the Free Software
21
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22
02110-1301, USA.
23

    
24
-}
25

    
26
module Ganeti.HTools.Program.Hbal
27
  ( main
28
  , options
29
  , arguments
30
  , iterateDepth
31
  ) where
32

    
33
import Control.Exception (bracket)
34
import Control.Monad
35
import Data.List
36
import Data.Maybe (isNothing)
37
import Data.IORef
38
import System.Exit
39
import System.IO
40
import System.Posix.Process
41
import System.Posix.Signals
42

    
43
import Text.Printf (printf)
44

    
45
import qualified Ganeti.HTools.Container as Container
46
import qualified Ganeti.HTools.Cluster as Cluster
47
import qualified Ganeti.HTools.Group as Group
48
import qualified Ganeti.HTools.Node as Node
49
import qualified Ganeti.HTools.Instance as Instance
50

    
51
import Ganeti.BasicTypes
52
import Ganeti.Common
53
import Ganeti.HTools.CLI
54
import Ganeti.HTools.ExtLoader
55
import Ganeti.HTools.Types
56
import Ganeti.HTools.Loader
57
import Ganeti.OpCodes (wrapOpCode, setOpComment, setOpPriority,
58
                       OpCode, MetaOpCode)
59
import Ganeti.Jobs as Jobs
60
import Ganeti.Types
61
import Ganeti.Utils
62

    
63
import qualified Ganeti.Luxi as L
64
import Ganeti.Version (version)
65

    
66
-- | Options list and functions.
67
options :: IO [OptType]
68
options = do
69
  luxi <- oLuxiSocket
70
  return
71
    [ oPrintNodes
72
    , oPrintInsts
73
    , oPrintCommands
74
    , oDataFile
75
    , oEvacMode
76
    , oRapiMaster
77
    , luxi
78
    , oIAllocSrc
79
    , oExecJobs
80
    , oGroup
81
    , oMaxSolLength
82
    , oVerbose
83
    , oQuiet
84
    , oOfflineNode
85
    , oMinScore
86
    , oMaxCpu
87
    , oMinDisk
88
    , oMinGain
89
    , oMinGainLim
90
    , oDiskMoves
91
    , oSelInst
92
    , oInstMoves
93
    , oDynuFile
94
    , oIgnoreDyn 
95
    , oMonD
96
    , oMonDDataFile
97
    , oExTags
98
    , oExInst
99
    , oSaveCluster
100
    , oPriority
101
    ]
102

    
103
-- | The list of arguments supported by the program.
104
arguments :: [ArgCompletion]
105
arguments = []
106

    
107
-- | A simple type alias for clearer signature.
108
type Annotator = OpCode -> MetaOpCode
109

    
110
-- | Wraps an 'OpCode' in a 'MetaOpCode' while also adding a comment
111
-- about what generated the opcode.
112
annotateOpCode :: Annotator
113
annotateOpCode =
114
  setOpComment ("rebalancing via hbal " ++ version) . wrapOpCode
115

    
116
{- | Start computing the solution at the given depth and recurse until
117
we find a valid solution or we exceed the maximum depth.
118

    
119
-}
120
iterateDepth :: Bool             -- ^ Whether to print moves
121
             -> Cluster.Table    -- ^ The starting table
122
             -> Int              -- ^ Remaining length
123
             -> Bool             -- ^ Allow disk moves
124
             -> Bool             -- ^ Allow instance moves
125
             -> Int              -- ^ Max node name len
126
             -> Int              -- ^ Max instance name len
127
             -> [MoveJob]        -- ^ Current command list
128
             -> Score            -- ^ Score at which to stop
129
             -> Score            -- ^ Min gain limit
130
             -> Score            -- ^ Min score gain
131
             -> Bool             -- ^ Enable evacuation mode
132
             -> IO (Cluster.Table, [MoveJob]) -- ^ The resulting table
133
                                              -- and commands
134
iterateDepth printmove ini_tbl max_rounds disk_moves inst_moves nmlen imlen
135
             cmd_strs min_score mg_limit min_gain evac_mode =
136
  let Cluster.Table ini_nl ini_il _ _ = ini_tbl
137
      allowed_next = Cluster.doNextBalance ini_tbl max_rounds min_score
138
      m_fin_tbl = if allowed_next
139
                    then Cluster.tryBalance ini_tbl disk_moves inst_moves
140
                         evac_mode mg_limit min_gain
141
                    else Nothing
142
  in case m_fin_tbl of
143
       Just fin_tbl ->
144
         do
145
           let (Cluster.Table _ _ _ fin_plc) = fin_tbl
146
           cur_plc@(idx, _, _, move, _) <-
147
             exitIfEmpty "Empty placement list returned for solution?!" fin_plc
148
           let fin_plc_len = length fin_plc
149
               (sol_line, cmds) = Cluster.printSolutionLine ini_nl ini_il
150
                                  nmlen imlen cur_plc fin_plc_len
151
               afn = Cluster.involvedNodes ini_il cur_plc
152
               upd_cmd_strs = (afn, idx, move, cmds):cmd_strs
153
           when printmove $ do
154
               putStrLn sol_line
155
               hFlush stdout
156
           iterateDepth printmove fin_tbl max_rounds disk_moves inst_moves
157
                        nmlen imlen upd_cmd_strs min_score
158
                        mg_limit min_gain evac_mode
159
       Nothing -> return (ini_tbl, cmd_strs)
160

    
161
-- | Displays the cluster stats.
162
printStats :: Node.List -> Node.List -> IO ()
163
printStats ini_nl fin_nl = do
164
  let ini_cs = Cluster.totalResources ini_nl
165
      fin_cs = Cluster.totalResources fin_nl
166
  printf "Original: mem=%d disk=%d\n"
167
             (Cluster.csFmem ini_cs) (Cluster.csFdsk ini_cs) :: IO ()
168
  printf "Final:    mem=%d disk=%d\n"
169
             (Cluster.csFmem fin_cs) (Cluster.csFdsk fin_cs)
170

    
171
-- | Wrapper over execJobSet checking for early termination via an IORef.
172
execCancelWrapper :: Annotator -> String -> Node.List
173
                  -> Instance.List -> IORef Int -> [JobSet] -> IO (Result ())
174
execCancelWrapper _    _      _  _  _    [] = return $ Ok ()
175
execCancelWrapper anno master nl il cref alljss = do
176
  cancel <- readIORef cref
177
  if cancel > 0
178
    then do
179
      putStrLn $ "Exiting early due to user request, " ++
180
               show (length alljss) ++ " jobset(s) remaining."
181
      return $ Ok ()
182
    else execJobSet anno master nl il cref alljss
183

    
184
-- | Execute an entire jobset.
185
execJobSet :: Annotator -> String -> Node.List
186
           -> Instance.List -> IORef Int -> [JobSet] -> IO (Result ())
187
execJobSet _    _      _  _  _    [] = return $ Ok ()
188
execJobSet anno master nl il cref (js:jss) = do
189
  -- map from jobset (htools list of positions) to [[opcodes]]
190
  let jobs = map (\(_, idx, move, _) ->
191
                    map anno $ Cluster.iMoveToJob nl il idx move) js
192
      descr = map (\(_, idx, _, _) -> Container.nameOf il idx) js
193
      logfn = putStrLn . ("Got job IDs" ++) . commaJoin . map (show . fromJobId)
194
  putStrLn $ "Executing jobset for instances " ++ commaJoin descr
195
  jrs <- bracket (L.getClient master) L.closeClient $
196
         Jobs.execJobsWait jobs logfn
197
  case jrs of
198
    Bad x -> return $ Bad x
199
    Ok x -> if null failures
200
              then execCancelWrapper anno master nl il cref jss
201
              else return . Bad . unlines $ [
202
                "Not all jobs completed successfully: " ++ show failures,
203
                "Aborting."]
204
      where
205
        failures = filter ((/= JOB_STATUS_SUCCESS) . snd) x
206

    
207
-- | Executes the jobs, if possible and desired.
208
maybeExecJobs :: Options
209
              -> [a]
210
              -> Node.List
211
              -> Instance.List
212
              -> [JobSet]
213
              -> IO (Result ())
214
maybeExecJobs opts ord_plc fin_nl il cmd_jobs =
215
  if optExecJobs opts && not (null ord_plc)
216
    then (case optLuxi opts of
217
            Nothing ->
218
              return $ Bad "Execution of commands possible only on LUXI"
219
            Just master ->
220
              let annotator = maybe id setOpPriority (optPriority opts) .
221
                              annotateOpCode
222
              in execWithCancel annotator master fin_nl il cmd_jobs)
223
    else return $ Ok ()
224

    
225
-- | Signal handler for graceful termination.
226
handleSigInt :: IORef Int -> IO ()
227
handleSigInt cref = do
228
  writeIORef cref 1
229
  putStrLn ("Cancel request registered, will exit at" ++
230
            " the end of the current job set...")
231

    
232
-- | Signal handler for immediate termination.
233
handleSigTerm :: IORef Int -> IO ()
234
handleSigTerm cref = do
235
  -- update the cref to 2, just for consistency
236
  writeIORef cref 2
237
  putStrLn "Double cancel request, exiting now..."
238
  exitImmediately $ ExitFailure 2
239

    
240
-- | Prepares to run a set of jobsets with handling of signals and early
241
-- termination.
242
execWithCancel :: Annotator -> String -> Node.List -> Instance.List -> [JobSet]
243
               -> IO (Result ())
244
execWithCancel anno master fin_nl il cmd_jobs = do
245
  cref <- newIORef 0
246
  mapM_ (\(hnd, sig) -> installHandler sig (Catch (hnd cref)) Nothing)
247
    [(handleSigTerm, softwareTermination), (handleSigInt, keyboardSignal)]
248
  execCancelWrapper anno master fin_nl il cref cmd_jobs
249

    
250
-- | Select the target node group.
251
selectGroup :: Options -> Group.List -> Node.List -> Instance.List
252
            -> IO (String, (Node.List, Instance.List))
253
selectGroup opts gl nlf ilf = do
254
  let ngroups = Cluster.splitCluster nlf ilf
255
  when (length ngroups > 1 && isNothing (optGroup opts)) $ do
256
    hPutStrLn stderr "Found multiple node groups:"
257
    mapM_ (hPutStrLn stderr . ("  " ++) . Group.name .
258
           flip Container.find gl . fst) ngroups
259
    exitErr "Aborting."
260

    
261
  case optGroup opts of
262
    Nothing -> do
263
      (gidx, cdata) <- exitIfEmpty "No groups found by splitCluster?!" ngroups
264
      let grp = Container.find gidx gl
265
      return (Group.name grp, cdata)
266
    Just g -> case Container.findByName gl g of
267
      Nothing -> do
268
        hPutStrLn stderr $ "Node group " ++ g ++
269
          " not found. Node group list is:"
270
        mapM_ (hPutStrLn stderr . ("  " ++) . Group.name ) (Container.elems gl)
271
        exitErr "Aborting."
272
      Just grp ->
273
          case lookup (Group.idx grp) ngroups of
274
            Nothing ->
275
              -- This will only happen if there are no nodes assigned
276
              -- to this group
277
              return (Group.name grp, (Container.empty, Container.empty))
278
            Just cdata -> return (Group.name grp, cdata)
279

    
280
-- | Do a few checks on the cluster data.
281
checkCluster :: Int -> Node.List -> Instance.List -> IO ()
282
checkCluster verbose nl il = do
283
  -- nothing to do on an empty cluster
284
  when (Container.null il) $ do
285
         printf "Cluster is empty, exiting.\n"::IO ()
286
         exitSuccess
287

    
288
  -- hbal doesn't currently handle split clusters
289
  let split_insts = Cluster.findSplitInstances nl il
290
  unless (null split_insts || verbose <= 1) $ do
291
    hPutStrLn stderr "Found instances belonging to multiple node groups:"
292
    mapM_ (\i -> hPutStrLn stderr $ "  " ++ Instance.name i) split_insts
293
    hPutStrLn stderr "These instances will not be moved."
294

    
295
  printf "Loaded %d nodes, %d instances\n"
296
             (Container.size nl)
297
             (Container.size il)::IO ()
298

    
299
  let csf = commonSuffix nl il
300
  when (not (null csf) && verbose > 1) $
301
       printf "Note: Stripping common suffix of '%s' from names\n" csf
302

    
303
-- | Do a few checks on the selected group data.
304
checkGroup :: Int -> String -> Node.List -> Instance.List -> IO ()
305
checkGroup verbose gname nl il = do
306
  printf "Group size %d nodes, %d instances\n"
307
             (Container.size nl)
308
             (Container.size il)::IO ()
309

    
310
  putStrLn $ "Selected node group: " ++ gname
311

    
312
  let (bad_nodes, bad_instances) = Cluster.computeBadItems nl il
313
  unless (verbose < 1) $ printf
314
             "Initial check done: %d bad nodes, %d bad instances.\n"
315
             (length bad_nodes) (length bad_instances)
316

    
317
  unless (null bad_nodes) $
318
         putStrLn "Cluster is not N+1 happy, continuing but no guarantee \
319
                  \that the cluster will end N+1 happy."
320

    
321
-- | Check that we actually need to rebalance.
322
checkNeedRebalance :: Options -> Score -> IO ()
323
checkNeedRebalance opts ini_cv = do
324
  let min_cv = optMinScore opts
325
  when (ini_cv < min_cv) $ do
326
         printf "Cluster is already well balanced (initial score %.6g,\n\
327
                \minimum score %.6g).\nNothing to do, exiting\n"
328
                ini_cv min_cv:: IO ()
329
         exitSuccess
330

    
331
-- | Main function.
332
main :: Options -> [String] -> IO ()
333
main opts args = do
334
  unless (null args) $ exitErr "This program doesn't take any arguments."
335

    
336
  let verbose = optVerbose opts
337
      shownodes = optShowNodes opts
338
      showinsts = optShowInsts opts
339

    
340
  ini_cdata@(ClusterData gl fixed_nl ilf ctags ipol) <- loadExternalData opts
341

    
342
  when (verbose > 1) $ do
343
       putStrLn $ "Loaded cluster tags: " ++ intercalate "," ctags
344
       putStrLn $ "Loaded cluster ipolicy: " ++ show ipol
345

    
346
  nlf <- setNodeStatus opts fixed_nl
347
  checkCluster verbose nlf ilf
348

    
349
  maybeSaveData (optSaveCluster opts) "original" "before balancing" ini_cdata
350

    
351
  (gname, (nl, il)) <- selectGroup opts gl nlf ilf
352

    
353
  checkGroup verbose gname nl il
354

    
355
  maybePrintInsts showinsts "Initial" (Cluster.printInsts nl il)
356

    
357
  maybePrintNodes shownodes "Initial cluster" (Cluster.printNodes nl)
358

    
359
  let ini_cv = Cluster.compCV nl
360
      ini_tbl = Cluster.Table nl il ini_cv []
361
      min_cv = optMinScore opts
362

    
363
  checkNeedRebalance opts ini_cv
364

    
365
  if verbose > 2
366
    then printf "Initial coefficients: overall %.8f\n%s"
367
           ini_cv (Cluster.printStats "  " nl)::IO ()
368
    else printf "Initial score: %.8f\n" ini_cv
369

    
370
  putStrLn "Trying to minimize the CV..."
371
  let imlen = maximum . map (length . Instance.alias) $ Container.elems il
372
      nmlen = maximum . map (length . Node.alias) $ Container.elems nl
373

    
374
  (fin_tbl, cmd_strs) <- iterateDepth True ini_tbl (optMaxLength opts)
375
                         (optDiskMoves opts)
376
                         (optInstMoves opts)
377
                         nmlen imlen [] min_cv
378
                         (optMinGainLim opts) (optMinGain opts)
379
                         (optEvacMode opts)
380
  let (Cluster.Table fin_nl fin_il fin_cv fin_plc) = fin_tbl
381
      ord_plc = reverse fin_plc
382
      sol_msg = case () of
383
                  _ | null fin_plc -> printf "No solution found\n"
384
                    | verbose > 2 ->
385
                        printf "Final coefficients:   overall %.8f\n%s"
386
                        fin_cv (Cluster.printStats "  " fin_nl)
387
                    | otherwise ->
388
                        printf "Cluster score improved from %.8f to %.8f\n"
389
                        ini_cv fin_cv ::String
390

    
391
  putStr sol_msg
392

    
393
  unless (verbose < 1) $
394
         printf "Solution length=%d\n" (length ord_plc)
395

    
396
  let cmd_jobs = Cluster.splitJobs cmd_strs
397

    
398
  maybeSaveCommands "Commands to run to reach the above solution:" opts
399
    $ Cluster.formatCmds cmd_jobs
400

    
401
  maybeSaveData (optSaveCluster opts) "balanced" "after balancing"
402
                ini_cdata { cdNodes = fin_nl, cdInstances = fin_il }
403

    
404
  maybePrintInsts showinsts "Final" (Cluster.printInsts fin_nl fin_il)
405

    
406
  maybePrintNodes shownodes "Final cluster" (Cluster.printNodes fin_nl)
407

    
408
  when (verbose > 3) $ printStats nl fin_nl
409

    
410
  exitIfBad "hbal" =<< maybeExecJobs opts ord_plc fin_nl il cmd_jobs