Statistics
| Branch: | Tag: | Revision:

root / src / Ganeti / HTools / Program / Hbal.hs @ 0e2a45dd

History | View | Annotate | Download (15 kB)

1
{-| Cluster rebalancer.
2

    
3
-}
4

    
5
{-
6

    
7
Copyright (C) 2009, 2010, 2011, 2012, 2013 Google Inc.
8

    
9
This program is free software; you can redistribute it and/or modify
10
it under the terms of the GNU General Public License as published by
11
the Free Software Foundation; either version 2 of the License, or
12
(at your option) any later version.
13

    
14
This program is distributed in the hope that it will be useful, but
15
WITHOUT ANY WARRANTY; without even the implied warranty of
16
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17
General Public License for more details.
18

    
19
You should have received a copy of the GNU General Public License
20
along with this program; if not, write to the Free Software
21
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22
02110-1301, USA.
23

    
24
-}
25

    
26
module Ganeti.HTools.Program.Hbal
27
  ( main
28
  , options
29
  , arguments
30
  , iterateDepth
31
  ) where
32

    
33
import Control.Exception (bracket)
34
import Control.Monad
35
import Data.List
36
import Data.Maybe (isJust, isNothing, fromJust)
37
import Data.IORef
38
import System.Exit
39
import System.IO
40
import System.Posix.Process
41
import System.Posix.Signals
42

    
43
import Text.Printf (printf)
44

    
45
import qualified Ganeti.HTools.Container as Container
46
import qualified Ganeti.HTools.Cluster as Cluster
47
import qualified Ganeti.HTools.Group as Group
48
import qualified Ganeti.HTools.Node as Node
49
import qualified Ganeti.HTools.Instance as Instance
50

    
51
import Ganeti.BasicTypes
52
import Ganeti.Common
53
import Ganeti.HTools.CLI
54
import Ganeti.HTools.ExtLoader
55
import Ganeti.HTools.Types
56
import Ganeti.HTools.Loader
57
import Ganeti.OpCodes (wrapOpCode, setOpComment, setOpPriority,
58
                       OpCode, MetaOpCode)
59
import Ganeti.Jobs as Jobs
60
import Ganeti.Types
61
import Ganeti.Utils
62

    
63
import qualified Ganeti.Luxi as L
64
import Ganeti.Version (version)
65

    
66
-- | Options list and functions.
67
options :: IO [OptType]
68
options = do
69
  luxi <- oLuxiSocket
70
  return
71
    [ oPrintNodes
72
    , oPrintInsts
73
    , oPrintCommands
74
    , oDataFile
75
    , oEvacMode
76
    , oRapiMaster
77
    , luxi
78
    , oIAllocSrc
79
    , oExecJobs
80
    , oGroup
81
    , oMaxSolLength
82
    , oVerbose
83
    , oQuiet
84
    , oOfflineNode
85
    , oMinScore
86
    , oMaxCpu
87
    , oMinDisk
88
    , oMinGain
89
    , oMinGainLim
90
    , oDiskMoves
91
    , oSelInst
92
    , oInstMoves
93
    , oDynuFile
94
    , oExTags
95
    , oExInst
96
    , oSaveCluster
97
    , oPriority
98
    ]
99

    
100
-- | The list of arguments supported by the program.
101
arguments :: [ArgCompletion]
102
arguments = []
103

    
104
-- | A simple type alias for clearer signature.
105
type Annotator = OpCode -> MetaOpCode
106

    
107
-- | Wraps an 'OpCode' in a 'MetaOpCode' while also adding a comment
108
-- about what generated the opcode.
109
annotateOpCode :: Annotator
110
annotateOpCode =
111
  setOpComment ("rebalancing via hbal " ++ version) . wrapOpCode
112

    
113
{- | Start computing the solution at the given depth and recurse until
114
we find a valid solution or we exceed the maximum depth.
115

    
116
-}
117
iterateDepth :: Bool             -- ^ Whether to print moves
118
             -> Cluster.Table    -- ^ The starting table
119
             -> Int              -- ^ Remaining length
120
             -> Bool             -- ^ Allow disk moves
121
             -> Bool             -- ^ Allow instance moves
122
             -> Int              -- ^ Max node name len
123
             -> Int              -- ^ Max instance name len
124
             -> [MoveJob]        -- ^ Current command list
125
             -> Score            -- ^ Score at which to stop
126
             -> Score            -- ^ Min gain limit
127
             -> Score            -- ^ Min score gain
128
             -> Bool             -- ^ Enable evacuation mode
129
             -> IO (Cluster.Table, [MoveJob]) -- ^ The resulting table
130
                                              -- and commands
131
iterateDepth printmove ini_tbl max_rounds disk_moves inst_moves nmlen imlen
132
             cmd_strs min_score mg_limit min_gain evac_mode =
133
  let Cluster.Table ini_nl ini_il _ _ = ini_tbl
134
      allowed_next = Cluster.doNextBalance ini_tbl max_rounds min_score
135
      m_fin_tbl = if allowed_next
136
                    then Cluster.tryBalance ini_tbl disk_moves inst_moves
137
                         evac_mode mg_limit min_gain
138
                    else Nothing
139
  in case m_fin_tbl of
140
       Just fin_tbl ->
141
         do
142
           let (Cluster.Table _ _ _ fin_plc) = fin_tbl
143
           cur_plc@(idx, _, _, move, _) <-
144
             exitIfEmpty "Empty placement list returned for solution?!" fin_plc
145
           let fin_plc_len = length fin_plc
146
               (sol_line, cmds) = Cluster.printSolutionLine ini_nl ini_il
147
                                  nmlen imlen cur_plc fin_plc_len
148
               afn = Cluster.involvedNodes ini_il cur_plc
149
               upd_cmd_strs = (afn, idx, move, cmds):cmd_strs
150
           when printmove $ do
151
               putStrLn sol_line
152
               hFlush stdout
153
           iterateDepth printmove fin_tbl max_rounds disk_moves inst_moves
154
                        nmlen imlen upd_cmd_strs min_score
155
                        mg_limit min_gain evac_mode
156
       Nothing -> return (ini_tbl, cmd_strs)
157

    
158
-- | Displays the cluster stats.
159
printStats :: Node.List -> Node.List -> IO ()
160
printStats ini_nl fin_nl = do
161
  let ini_cs = Cluster.totalResources ini_nl
162
      fin_cs = Cluster.totalResources fin_nl
163
  printf "Original: mem=%d disk=%d\n"
164
             (Cluster.csFmem ini_cs) (Cluster.csFdsk ini_cs) :: IO ()
165
  printf "Final:    mem=%d disk=%d\n"
166
             (Cluster.csFmem fin_cs) (Cluster.csFdsk fin_cs)
167

    
168
-- | Saves the rebalance commands to a text file.
169
saveBalanceCommands :: Options -> String -> IO ()
170
saveBalanceCommands opts cmd_data = do
171
  let out_path = fromJust $ optShowCmds opts
172
  putStrLn ""
173
  if out_path == "-"
174
    then printf "Commands to run to reach the above solution:\n%s"
175
           (unlines . map ("  " ++) .
176
            filter (/= "  check") .
177
            lines $ cmd_data)
178
    else do
179
      writeFile out_path (shTemplate ++ cmd_data)
180
      printf "The commands have been written to file '%s'\n" out_path
181

    
182
-- | Wrapper over execJobSet checking for early termination via an IORef.
183
execCancelWrapper :: Annotator -> String -> Node.List
184
                  -> Instance.List -> IORef Int -> [JobSet] -> IO (Result ())
185
execCancelWrapper _    _      _  _  _    [] = return $ Ok ()
186
execCancelWrapper anno master nl il cref alljss = do
187
  cancel <- readIORef cref
188
  if cancel > 0
189
    then do
190
      putStrLn $ "Exiting early due to user request, " ++
191
               show (length alljss) ++ " jobset(s) remaining."
192
      return $ Ok ()
193
    else execJobSet anno master nl il cref alljss
194

    
195
-- | Execute an entire jobset.
196
execJobSet :: Annotator -> String -> Node.List
197
           -> Instance.List -> IORef Int -> [JobSet] -> IO (Result ())
198
execJobSet _    _      _  _  _    [] = return $ Ok ()
199
execJobSet anno master nl il cref (js:jss) = do
200
  -- map from jobset (htools list of positions) to [[opcodes]]
201
  let jobs = map (\(_, idx, move, _) ->
202
                    map anno $ Cluster.iMoveToJob nl il idx move) js
203
      descr = map (\(_, idx, _, _) -> Container.nameOf il idx) js
204
      logfn =
205
        putStrLn . ("Got job IDs " ++) . commaJoin . map (show . fromJobId)
206
  putStrLn $ "Executing jobset for instances " ++ commaJoin descr
207
  jrs <- bracket (L.getClient master) L.closeClient $
208
         Jobs.execJobsWait jobs logfn
209
  case jrs of
210
    Bad x -> return $ Bad x
211
    Ok x -> if null failures
212
              then execCancelWrapper anno master nl il cref jss
213
              else return . Bad . unlines $ [
214
                "Not all jobs completed successfully: " ++ show failures,
215
                "Aborting."]
216
      where
217
        failures = filter ((/= JOB_STATUS_SUCCESS) . snd) x
218

    
219
-- | Executes the jobs, if possible and desired.
220
maybeExecJobs :: Options
221
              -> [a]
222
              -> Node.List
223
              -> Instance.List
224
              -> [JobSet]
225
              -> IO (Result ())
226
maybeExecJobs opts ord_plc fin_nl il cmd_jobs =
227
  if optExecJobs opts && not (null ord_plc)
228
    then (case optLuxi opts of
229
            Nothing ->
230
              return $ Bad "Execution of commands possible only on LUXI"
231
            Just master ->
232
              let annotator = maybe id setOpPriority (optPriority opts) .
233
                              annotateOpCode
234
              in execWithCancel annotator master fin_nl il cmd_jobs)
235
    else return $ Ok ()
236

    
237
-- | Signal handler for graceful termination.
238
handleSigInt :: IORef Int -> IO ()
239
handleSigInt cref = do
240
  writeIORef cref 1
241
  putStrLn ("Cancel request registered, will exit at" ++
242
            " the end of the current job set...")
243

    
244
-- | Signal handler for immediate termination.
245
handleSigTerm :: IORef Int -> IO ()
246
handleSigTerm cref = do
247
  -- update the cref to 2, just for consistency
248
  writeIORef cref 2
249
  putStrLn "Double cancel request, exiting now..."
250
  exitImmediately $ ExitFailure 2
251

    
252
-- | Prepares to run a set of jobsets with handling of signals and early
253
-- termination.
254
execWithCancel :: Annotator -> String -> Node.List -> Instance.List -> [JobSet]
255
               -> IO (Result ())
256
execWithCancel anno master fin_nl il cmd_jobs = do
257
  cref <- newIORef 0
258
  mapM_ (\(hnd, sig) -> installHandler sig (Catch (hnd cref)) Nothing)
259
    [(handleSigTerm, softwareTermination), (handleSigInt, keyboardSignal)]
260
  execCancelWrapper anno master fin_nl il cref cmd_jobs
261

    
262
-- | Select the target node group.
263
selectGroup :: Options -> Group.List -> Node.List -> Instance.List
264
            -> IO (String, (Node.List, Instance.List))
265
selectGroup opts gl nlf ilf = do
266
  let ngroups = Cluster.splitCluster nlf ilf
267
  when (length ngroups > 1 && isNothing (optGroup opts)) $ do
268
    hPutStrLn stderr "Found multiple node groups:"
269
    mapM_ (hPutStrLn stderr . ("  " ++) . Group.name .
270
           flip Container.find gl . fst) ngroups
271
    exitErr "Aborting."
272

    
273
  case optGroup opts of
274
    Nothing -> do
275
      (gidx, cdata) <- exitIfEmpty "No groups found by splitCluster?!" ngroups
276
      let grp = Container.find gidx gl
277
      return (Group.name grp, cdata)
278
    Just g -> case Container.findByName gl g of
279
      Nothing -> do
280
        hPutStrLn stderr $ "Node group " ++ g ++
281
          " not found. Node group list is:"
282
        mapM_ (hPutStrLn stderr . ("  " ++) . Group.name ) (Container.elems gl)
283
        exitErr "Aborting."
284
      Just grp ->
285
          case lookup (Group.idx grp) ngroups of
286
            Nothing ->
287
              -- This will only happen if there are no nodes assigned
288
              -- to this group
289
              return (Group.name grp, (Container.empty, Container.empty))
290
            Just cdata -> return (Group.name grp, cdata)
291

    
292
-- | Do a few checks on the cluster data.
293
checkCluster :: Int -> Node.List -> Instance.List -> IO ()
294
checkCluster verbose nl il = do
295
  -- nothing to do on an empty cluster
296
  when (Container.null il) $ do
297
         printf "Cluster is empty, exiting.\n"::IO ()
298
         exitSuccess
299

    
300
  -- hbal doesn't currently handle split clusters
301
  let split_insts = Cluster.findSplitInstances nl il
302
  unless (null split_insts || verbose <= 1) $ do
303
    hPutStrLn stderr "Found instances belonging to multiple node groups:"
304
    mapM_ (\i -> hPutStrLn stderr $ "  " ++ Instance.name i) split_insts
305
    hPutStrLn stderr "These instances will not be moved."
306

    
307
  printf "Loaded %d nodes, %d instances\n"
308
             (Container.size nl)
309
             (Container.size il)::IO ()
310

    
311
  let csf = commonSuffix nl il
312
  when (not (null csf) && verbose > 1) $
313
       printf "Note: Stripping common suffix of '%s' from names\n" csf
314

    
315
-- | Do a few checks on the selected group data.
316
checkGroup :: Int -> String -> Node.List -> Instance.List -> IO ()
317
checkGroup verbose gname nl il = do
318
  printf "Group size %d nodes, %d instances\n"
319
             (Container.size nl)
320
             (Container.size il)::IO ()
321

    
322
  putStrLn $ "Selected node group: " ++ gname
323

    
324
  let (bad_nodes, bad_instances) = Cluster.computeBadItems nl il
325
  unless (verbose < 1) $ printf
326
             "Initial check done: %d bad nodes, %d bad instances.\n"
327
             (length bad_nodes) (length bad_instances)
328

    
329
  unless (null bad_nodes) $
330
         putStrLn "Cluster is not N+1 happy, continuing but no guarantee \
331
                  \that the cluster will end N+1 happy."
332

    
333
-- | Check that we actually need to rebalance.
334
checkNeedRebalance :: Options -> Score -> IO ()
335
checkNeedRebalance opts ini_cv = do
336
  let min_cv = optMinScore opts
337
  when (ini_cv < min_cv) $ do
338
         printf "Cluster is already well balanced (initial score %.6g,\n\
339
                \minimum score %.6g).\nNothing to do, exiting\n"
340
                ini_cv min_cv:: IO ()
341
         exitSuccess
342

    
343
-- | Main function.
344
main :: Options -> [String] -> IO ()
345
main opts args = do
346
  unless (null args) $ exitErr "This program doesn't take any arguments."
347

    
348
  let verbose = optVerbose opts
349
      shownodes = optShowNodes opts
350
      showinsts = optShowInsts opts
351

    
352
  ini_cdata@(ClusterData gl fixed_nl ilf ctags ipol) <- loadExternalData opts
353

    
354
  when (verbose > 1) $ do
355
       putStrLn $ "Loaded cluster tags: " ++ intercalate "," ctags
356
       putStrLn $ "Loaded cluster ipolicy: " ++ show ipol
357

    
358
  nlf <- setNodeStatus opts fixed_nl
359
  checkCluster verbose nlf ilf
360

    
361
  maybeSaveData (optSaveCluster opts) "original" "before balancing" ini_cdata
362

    
363
  (gname, (nl, il)) <- selectGroup opts gl nlf ilf
364

    
365
  checkGroup verbose gname nl il
366

    
367
  maybePrintInsts showinsts "Initial" (Cluster.printInsts nl il)
368

    
369
  maybePrintNodes shownodes "Initial cluster" (Cluster.printNodes nl)
370

    
371
  let ini_cv = Cluster.compCV nl
372
      ini_tbl = Cluster.Table nl il ini_cv []
373
      min_cv = optMinScore opts
374

    
375
  checkNeedRebalance opts ini_cv
376

    
377
  if verbose > 2
378
    then printf "Initial coefficients: overall %.8f\n%s"
379
           ini_cv (Cluster.printStats "  " nl)::IO ()
380
    else printf "Initial score: %.8f\n" ini_cv
381

    
382
  putStrLn "Trying to minimize the CV..."
383
  let imlen = maximum . map (length . Instance.alias) $ Container.elems il
384
      nmlen = maximum . map (length . Node.alias) $ Container.elems nl
385

    
386
  (fin_tbl, cmd_strs) <- iterateDepth True ini_tbl (optMaxLength opts)
387
                         (optDiskMoves opts)
388
                         (optInstMoves opts)
389
                         nmlen imlen [] min_cv
390
                         (optMinGainLim opts) (optMinGain opts)
391
                         (optEvacMode opts)
392
  let (Cluster.Table fin_nl fin_il fin_cv fin_plc) = fin_tbl
393
      ord_plc = reverse fin_plc
394
      sol_msg = case () of
395
                  _ | null fin_plc -> printf "No solution found\n"
396
                    | verbose > 2 ->
397
                        printf "Final coefficients:   overall %.8f\n%s"
398
                        fin_cv (Cluster.printStats "  " fin_nl)
399
                    | otherwise ->
400
                        printf "Cluster score improved from %.8f to %.8f\n"
401
                        ini_cv fin_cv ::String
402

    
403
  putStr sol_msg
404

    
405
  unless (verbose < 1) $
406
         printf "Solution length=%d\n" (length ord_plc)
407

    
408
  let cmd_jobs = Cluster.splitJobs cmd_strs
409

    
410
  when (isJust $ optShowCmds opts) .
411
       saveBalanceCommands opts $ Cluster.formatCmds cmd_jobs
412

    
413
  maybeSaveData (optSaveCluster opts) "balanced" "after balancing"
414
                ini_cdata { cdNodes = fin_nl, cdInstances = fin_il }
415

    
416
  maybePrintInsts showinsts "Final" (Cluster.printInsts fin_nl fin_il)
417

    
418
  maybePrintNodes shownodes "Final cluster" (Cluster.printNodes fin_nl)
419

    
420
  when (verbose > 3) $ printStats nl fin_nl
421

    
422
  exitIfBad "hbal" =<< maybeExecJobs opts ord_plc fin_nl il cmd_jobs