Statistics
| Branch: | Tag: | Revision:

root / src / Ganeti / HTools / Program / Hbal.hs @ 3add7574

History | View | Annotate | Download (14.5 kB)

1
{-| Cluster rebalancer.
2

    
3
-}
4

    
5
{-
6

    
7
Copyright (C) 2009, 2010, 2011, 2012 Google Inc.
8

    
9
This program is free software; you can redistribute it and/or modify
10
it under the terms of the GNU General Public License as published by
11
the Free Software Foundation; either version 2 of the License, or
12
(at your option) any later version.
13

    
14
This program is distributed in the hope that it will be useful, but
15
WITHOUT ANY WARRANTY; without even the implied warranty of
16
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17
General Public License for more details.
18

    
19
You should have received a copy of the GNU General Public License
20
along with this program; if not, write to the Free Software
21
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22
02110-1301, USA.
23

    
24
-}
25

    
26
module Ganeti.HTools.Program.Hbal
27
  ( main
28
  , options
29
  , arguments
30
  , iterateDepth
31
  ) where
32

    
33
import Control.Exception (bracket)
34
import Control.Monad
35
import Data.List
36
import Data.Maybe (isJust, isNothing, fromJust)
37
import Data.IORef
38
import System.Exit
39
import System.IO
40
import System.Posix.Process
41
import System.Posix.Signals
42

    
43
import Text.Printf (printf)
44

    
45
import qualified Ganeti.HTools.Container as Container
46
import qualified Ganeti.HTools.Cluster as Cluster
47
import qualified Ganeti.HTools.Group as Group
48
import qualified Ganeti.HTools.Node as Node
49
import qualified Ganeti.HTools.Instance as Instance
50

    
51
import Ganeti.BasicTypes
52
import Ganeti.Common
53
import Ganeti.HTools.CLI
54
import Ganeti.HTools.ExtLoader
55
import Ganeti.HTools.Types
56
import Ganeti.HTools.Loader
57
import Ganeti.OpCodes (wrapOpCode, setOpComment, OpCode, MetaOpCode)
58
import Ganeti.Jobs as Jobs
59
import Ganeti.Types
60
import Ganeti.Utils
61

    
62
import qualified Ganeti.Luxi as L
63
import Ganeti.Version (version)
64

    
65
-- | Options list and functions.
66
options :: IO [OptType]
67
options = do
68
  luxi <- oLuxiSocket
69
  return
70
    [ oPrintNodes
71
    , oPrintInsts
72
    , oPrintCommands
73
    , oDataFile
74
    , oEvacMode
75
    , oRapiMaster
76
    , luxi
77
    , oIAllocSrc
78
    , oExecJobs
79
    , oGroup
80
    , oMaxSolLength
81
    , oVerbose
82
    , oQuiet
83
    , oOfflineNode
84
    , oMinScore
85
    , oMaxCpu
86
    , oMinDisk
87
    , oMinGain
88
    , oMinGainLim
89
    , oDiskMoves
90
    , oSelInst
91
    , oInstMoves
92
    , oDynuFile
93
    , oExTags
94
    , oExInst
95
    , oSaveCluster
96
    ]
97

    
98
-- | The list of arguments supported by the program.
99
arguments :: [ArgCompletion]
100
arguments = []
101

    
102
-- | Wraps an 'OpCode' in a 'MetaOpCode' while also adding a comment
103
-- about what generated the opcode.
104
annotateOpCode :: OpCode -> MetaOpCode
105
annotateOpCode =
106
  setOpComment ("rebalancing via hbal " ++ version) . wrapOpCode
107

    
108
{- | Start computing the solution at the given depth and recurse until
109
we find a valid solution or we exceed the maximum depth.
110

    
111
-}
112
iterateDepth :: Bool             -- ^ Whether to print moves
113
             -> Cluster.Table    -- ^ The starting table
114
             -> Int              -- ^ Remaining length
115
             -> Bool             -- ^ Allow disk moves
116
             -> Bool             -- ^ Allow instance moves
117
             -> Int              -- ^ Max node name len
118
             -> Int              -- ^ Max instance name len
119
             -> [MoveJob]        -- ^ Current command list
120
             -> Score            -- ^ Score at which to stop
121
             -> Score            -- ^ Min gain limit
122
             -> Score            -- ^ Min score gain
123
             -> Bool             -- ^ Enable evacuation mode
124
             -> IO (Cluster.Table, [MoveJob]) -- ^ The resulting table
125
                                              -- and commands
126
iterateDepth printmove ini_tbl max_rounds disk_moves inst_moves nmlen imlen
127
             cmd_strs min_score mg_limit min_gain evac_mode =
128
  let Cluster.Table ini_nl ini_il _ _ = ini_tbl
129
      allowed_next = Cluster.doNextBalance ini_tbl max_rounds min_score
130
      m_fin_tbl = if allowed_next
131
                    then Cluster.tryBalance ini_tbl disk_moves inst_moves
132
                         evac_mode mg_limit min_gain
133
                    else Nothing
134
  in case m_fin_tbl of
135
       Just fin_tbl ->
136
         do
137
           let (Cluster.Table _ _ _ fin_plc) = fin_tbl
138
               fin_plc_len = length fin_plc
139
               cur_plc@(idx, _, _, move, _) = head fin_plc
140
               (sol_line, cmds) = Cluster.printSolutionLine ini_nl ini_il
141
                                  nmlen imlen cur_plc fin_plc_len
142
               afn = Cluster.involvedNodes ini_il cur_plc
143
               upd_cmd_strs = (afn, idx, move, cmds):cmd_strs
144
           when printmove $ do
145
               putStrLn sol_line
146
               hFlush stdout
147
           iterateDepth printmove fin_tbl max_rounds disk_moves inst_moves
148
                        nmlen imlen upd_cmd_strs min_score
149
                        mg_limit min_gain evac_mode
150
       Nothing -> return (ini_tbl, cmd_strs)
151

    
152
-- | Displays the cluster stats.
153
printStats :: Node.List -> Node.List -> IO ()
154
printStats ini_nl fin_nl = do
155
  let ini_cs = Cluster.totalResources ini_nl
156
      fin_cs = Cluster.totalResources fin_nl
157
  printf "Original: mem=%d disk=%d\n"
158
             (Cluster.csFmem ini_cs) (Cluster.csFdsk ini_cs) :: IO ()
159
  printf "Final:    mem=%d disk=%d\n"
160
             (Cluster.csFmem fin_cs) (Cluster.csFdsk fin_cs)
161

    
162
-- | Saves the rebalance commands to a text file.
163
saveBalanceCommands :: Options -> String -> IO ()
164
saveBalanceCommands opts cmd_data = do
165
  let out_path = fromJust $ optShowCmds opts
166
  putStrLn ""
167
  if out_path == "-"
168
    then printf "Commands to run to reach the above solution:\n%s"
169
           (unlines . map ("  " ++) .
170
            filter (/= "  check") .
171
            lines $ cmd_data)
172
    else do
173
      writeFile out_path (shTemplate ++ cmd_data)
174
      printf "The commands have been written to file '%s'\n" out_path
175

    
176
-- | Wrapper over execJobSet checking for early termination via an IORef.
177
execCancelWrapper :: String -> Node.List
178
                  -> Instance.List -> IORef Int -> [JobSet] -> IO (Result ())
179
execCancelWrapper _      _  _  _    [] = return $ Ok ()
180
execCancelWrapper master nl il cref alljss = do
181
  cancel <- readIORef cref
182
  if cancel > 0
183
    then return . Bad $ "Exiting early due to user request, " ++
184
                        show (length alljss) ++ " jobset(s) remaining."
185
    else execJobSet master nl il cref alljss
186

    
187
-- | Execute an entire jobset.
188
execJobSet :: String -> Node.List
189
           -> Instance.List -> IORef Int -> [JobSet] -> IO (Result ())
190
execJobSet _      _  _  _    [] = return $ Ok ()
191
execJobSet master nl il cref (js:jss) = do
192
  -- map from jobset (htools list of positions) to [[opcodes]]
193
  let jobs = map (\(_, idx, move, _) ->
194
                      map annotateOpCode $
195
                      Cluster.iMoveToJob nl il idx move) js
196
      descr = map (\(_, idx, _, _) -> Container.nameOf il idx) js
197
      logfn = putStrLn . ("Got job IDs" ++) . commaJoin . map (show . fromJobId)
198
  putStrLn $ "Executing jobset for instances " ++ commaJoin descr
199
  jrs <- bracket (L.getClient master) L.closeClient $
200
         Jobs.execJobsWait jobs logfn
201
  case jrs of
202
    Bad x -> return $ Bad x
203
    Ok x -> if null failures
204
              then execCancelWrapper master nl il cref jss
205
              else return . Bad . unlines $ [
206
                "Not all jobs completed successfully: " ++ show failures,
207
                "Aborting."]
208
      where
209
        failures = filter ((/= JOB_STATUS_SUCCESS) . snd) x
210

    
211
-- | Executes the jobs, if possible and desired.
212
maybeExecJobs :: Options
213
              -> [a]
214
              -> Node.List
215
              -> Instance.List
216
              -> [JobSet]
217
              -> IO (Result ())
218
maybeExecJobs opts ord_plc fin_nl il cmd_jobs =
219
  if optExecJobs opts && not (null ord_plc)
220
    then (case optLuxi opts of
221
            Nothing -> return $
222
                       Bad "Execution of commands possible only on LUXI"
223
            Just master -> execWithCancel master fin_nl il cmd_jobs)
224
    else return $ Ok ()
225

    
226
-- | Signal handler for graceful termination.
227
handleSigInt :: IORef Int -> IO ()
228
handleSigInt cref = do
229
  writeIORef cref 1
230
  putStrLn ("Cancel request registered, will exit at" ++
231
            " the end of the current job set...")
232

    
233
-- | Signal handler for immediate termination.
234
handleSigTerm :: IORef Int -> IO ()
235
handleSigTerm cref = do
236
  -- update the cref to 2, just for consistency
237
  writeIORef cref 2
238
  putStrLn "Double cancel request, exiting now..."
239
  exitImmediately $ ExitFailure 2
240

    
241
-- | Prepares to run a set of jobsets with handling of signals and early
242
-- termination.
243
execWithCancel :: String -> Node.List -> Instance.List -> [JobSet]
244
               -> IO (Result ())
245
execWithCancel master fin_nl il cmd_jobs = do
246
  cref <- newIORef 0
247
  mapM_ (\(hnd, sig) -> installHandler sig (Catch (hnd cref)) Nothing)
248
    [(handleSigTerm, softwareTermination), (handleSigInt, keyboardSignal)]
249
  execCancelWrapper master fin_nl il cref cmd_jobs
250

    
251
-- | Select the target node group.
252
selectGroup :: Options -> Group.List -> Node.List -> Instance.List
253
            -> IO (String, (Node.List, Instance.List))
254
selectGroup opts gl nlf ilf = do
255
  let ngroups = Cluster.splitCluster nlf ilf
256
  when (length ngroups > 1 && isNothing (optGroup opts)) $ do
257
    hPutStrLn stderr "Found multiple node groups:"
258
    mapM_ (hPutStrLn stderr . ("  " ++) . Group.name .
259
           flip Container.find gl . fst) ngroups
260
    exitErr "Aborting."
261

    
262
  case optGroup opts of
263
    Nothing -> do
264
      let (gidx, cdata) = head ngroups
265
          grp = Container.find gidx gl
266
      return (Group.name grp, cdata)
267
    Just g -> case Container.findByName gl g of
268
      Nothing -> do
269
        hPutStrLn stderr $ "Node group " ++ g ++
270
          " not found. Node group list is:"
271
        mapM_ (hPutStrLn stderr . ("  " ++) . Group.name ) (Container.elems gl)
272
        exitErr "Aborting."
273
      Just grp ->
274
          case lookup (Group.idx grp) ngroups of
275
            Nothing ->
276
              -- This will only happen if there are no nodes assigned
277
              -- to this group
278
              return (Group.name grp, (Container.empty, Container.empty))
279
            Just cdata -> return (Group.name grp, cdata)
280

    
281
-- | Do a few checks on the cluster data.
282
checkCluster :: Int -> Node.List -> Instance.List -> IO ()
283
checkCluster verbose nl il = do
284
  -- nothing to do on an empty cluster
285
  when (Container.null il) $ do
286
         printf "Cluster is empty, exiting.\n"::IO ()
287
         exitSuccess
288

    
289
  -- hbal doesn't currently handle split clusters
290
  let split_insts = Cluster.findSplitInstances nl il
291
  unless (null split_insts || verbose <= 1) $ do
292
    hPutStrLn stderr "Found instances belonging to multiple node groups:"
293
    mapM_ (\i -> hPutStrLn stderr $ "  " ++ Instance.name i) split_insts
294
    hPutStrLn stderr "These instances will not be moved."
295

    
296
  printf "Loaded %d nodes, %d instances\n"
297
             (Container.size nl)
298
             (Container.size il)::IO ()
299

    
300
  let csf = commonSuffix nl il
301
  when (not (null csf) && verbose > 1) $
302
       printf "Note: Stripping common suffix of '%s' from names\n" csf
303

    
304
-- | Do a few checks on the selected group data.
305
checkGroup :: Int -> String -> Node.List -> Instance.List -> IO ()
306
checkGroup verbose gname nl il = do
307
  printf "Group size %d nodes, %d instances\n"
308
             (Container.size nl)
309
             (Container.size il)::IO ()
310

    
311
  putStrLn $ "Selected node group: " ++ gname
312

    
313
  let (bad_nodes, bad_instances) = Cluster.computeBadItems nl il
314
  unless (verbose == 0) $ printf
315
             "Initial check done: %d bad nodes, %d bad instances.\n"
316
             (length bad_nodes) (length bad_instances)
317

    
318
  unless (null bad_nodes) $
319
         putStrLn "Cluster is not N+1 happy, continuing but no guarantee \
320
                  \that the cluster will end N+1 happy."
321

    
322
-- | Check that we actually need to rebalance.
323
checkNeedRebalance :: Options -> Score -> IO ()
324
checkNeedRebalance opts ini_cv = do
325
  let min_cv = optMinScore opts
326
  when (ini_cv < min_cv) $ do
327
         printf "Cluster is already well balanced (initial score %.6g,\n\
328
                \minimum score %.6g).\nNothing to do, exiting\n"
329
                ini_cv min_cv:: IO ()
330
         exitSuccess
331

    
332
-- | Main function.
333
main :: Options -> [String] -> IO ()
334
main opts args = do
335
  unless (null args) $ exitErr "This program doesn't take any arguments."
336

    
337
  let verbose = optVerbose opts
338
      shownodes = optShowNodes opts
339
      showinsts = optShowInsts opts
340

    
341
  ini_cdata@(ClusterData gl fixed_nl ilf ctags ipol) <- loadExternalData opts
342

    
343
  when (verbose > 1) $ do
344
       putStrLn $ "Loaded cluster tags: " ++ intercalate "," ctags
345
       putStrLn $ "Loaded cluster ipolicy: " ++ show ipol
346

    
347
  nlf <- setNodeStatus opts fixed_nl
348
  checkCluster verbose nlf ilf
349

    
350
  maybeSaveData (optSaveCluster opts) "original" "before balancing" ini_cdata
351

    
352
  (gname, (nl, il)) <- selectGroup opts gl nlf ilf
353

    
354
  checkGroup verbose gname nl il
355

    
356
  maybePrintInsts showinsts "Initial" (Cluster.printInsts nl il)
357

    
358
  maybePrintNodes shownodes "Initial cluster" (Cluster.printNodes nl)
359

    
360
  let ini_cv = Cluster.compCV nl
361
      ini_tbl = Cluster.Table nl il ini_cv []
362
      min_cv = optMinScore opts
363

    
364
  checkNeedRebalance opts ini_cv
365

    
366
  if verbose > 2
367
    then printf "Initial coefficients: overall %.8f\n%s"
368
           ini_cv (Cluster.printStats "  " nl)::IO ()
369
    else printf "Initial score: %.8f\n" ini_cv
370

    
371
  putStrLn "Trying to minimize the CV..."
372
  let imlen = maximum . map (length . Instance.alias) $ Container.elems il
373
      nmlen = maximum . map (length . Node.alias) $ Container.elems nl
374

    
375
  (fin_tbl, cmd_strs) <- iterateDepth True ini_tbl (optMaxLength opts)
376
                         (optDiskMoves opts)
377
                         (optInstMoves opts)
378
                         nmlen imlen [] min_cv
379
                         (optMinGainLim opts) (optMinGain opts)
380
                         (optEvacMode opts)
381
  let (Cluster.Table fin_nl fin_il fin_cv fin_plc) = fin_tbl
382
      ord_plc = reverse fin_plc
383
      sol_msg = case () of
384
                  _ | null fin_plc -> printf "No solution found\n"
385
                    | verbose > 2 ->
386
                        printf "Final coefficients:   overall %.8f\n%s"
387
                        fin_cv (Cluster.printStats "  " fin_nl)
388
                    | otherwise ->
389
                        printf "Cluster score improved from %.8f to %.8f\n"
390
                        ini_cv fin_cv ::String
391

    
392
  putStr sol_msg
393

    
394
  unless (verbose == 0) $
395
         printf "Solution length=%d\n" (length ord_plc)
396

    
397
  let cmd_jobs = Cluster.splitJobs cmd_strs
398

    
399
  when (isJust $ optShowCmds opts) .
400
       saveBalanceCommands opts $ Cluster.formatCmds cmd_jobs
401

    
402
  maybeSaveData (optSaveCluster opts) "balanced" "after balancing"
403
                ini_cdata { cdNodes = fin_nl, cdInstances = fin_il }
404

    
405
  maybePrintInsts showinsts "Final" (Cluster.printInsts fin_nl fin_il)
406

    
407
  maybePrintNodes shownodes "Final cluster" (Cluster.printNodes fin_nl)
408

    
409
  when (verbose > 3) $ printStats nl fin_nl
410

    
411
  exitIfBad "hbal" =<< maybeExecJobs opts ord_plc fin_nl il cmd_jobs