Statistics
| Branch: | Tag: | Revision:

root / htools / Ganeti / HTools / Program / Hbal.hs @ 4938fa30

History | View | Annotate | Download (15.4 kB)

1
{-| Cluster rebalancer.
2

    
3
-}
4

    
5
{-
6

    
7
Copyright (C) 2009, 2010, 2011 Google Inc.
8

    
9
This program is free software; you can redistribute it and/or modify
10
it under the terms of the GNU General Public License as published by
11
the Free Software Foundation; either version 2 of the License, or
12
(at your option) any later version.
13

    
14
This program is distributed in the hope that it will be useful, but
15
WITHOUT ANY WARRANTY; without even the implied warranty of
16
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17
General Public License for more details.
18

    
19
You should have received a copy of the GNU General Public License
20
along with this program; if not, write to the Free Software
21
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22
02110-1301, USA.
23

    
24
-}
25

    
26
module Ganeti.HTools.Program.Hbal (main) where
27

    
28
import Control.Concurrent (threadDelay)
29
import Control.Exception (bracket)
30
import Control.Monad
31
import Data.List
32
import Data.Maybe (isJust, isNothing, fromJust)
33
import Data.IORef
34
import System (exitWith, ExitCode(..))
35
import System.IO
36
import System.Posix.Process
37
import System.Posix.Signals
38
import qualified System
39

    
40
import Text.Printf (printf, hPrintf)
41

    
42
import qualified Ganeti.HTools.Container as Container
43
import qualified Ganeti.HTools.Cluster as Cluster
44
import qualified Ganeti.HTools.Group as Group
45
import qualified Ganeti.HTools.Node as Node
46
import qualified Ganeti.HTools.Instance as Instance
47

    
48
import Ganeti.HTools.CLI
49
import Ganeti.HTools.ExtLoader
50
import Ganeti.HTools.Utils
51
import Ganeti.HTools.Types
52
import Ganeti.HTools.Loader
53

    
54
import qualified Ganeti.Luxi as L
55
import Ganeti.Jobs
56

    
57
-- | Options list and functions
58
options :: [OptType]
59
options =
60
    [ oPrintNodes
61
    , oPrintInsts
62
    , oPrintCommands
63
    , oOneline
64
    , oDataFile
65
    , oEvacMode
66
    , oRapiMaster
67
    , oLuxiSocket
68
    , oExecJobs
69
    , oGroup
70
    , oMaxSolLength
71
    , oVerbose
72
    , oQuiet
73
    , oOfflineNode
74
    , oMinScore
75
    , oMaxCpu
76
    , oMinDisk
77
    , oMinGain
78
    , oMinGainLim
79
    , oDiskMoves
80
    , oSelInst
81
    , oInstMoves
82
    , oDynuFile
83
    , oExTags
84
    , oExInst
85
    , oSaveCluster
86
    , oShowVer
87
    , oShowHelp
88
    ]
89

    
90
{- | Start computing the solution at the given depth and recurse until
91
we find a valid solution or we exceed the maximum depth.
92

    
93
-}
94
iterateDepth :: Cluster.Table    -- ^ The starting table
95
             -> Int              -- ^ Remaining length
96
             -> Bool             -- ^ Allow disk moves
97
             -> Bool             -- ^ Allow instance moves
98
             -> Int              -- ^ Max node name len
99
             -> Int              -- ^ Max instance name len
100
             -> [MoveJob]        -- ^ Current command list
101
             -> Bool             -- ^ Whether to be silent
102
             -> Score            -- ^ Score at which to stop
103
             -> Score            -- ^ Min gain limit
104
             -> Score            -- ^ Min score gain
105
             -> Bool             -- ^ Enable evacuation mode
106
             -> IO (Cluster.Table, [MoveJob]) -- ^ The resulting table
107
                                              -- and commands
108
iterateDepth ini_tbl max_rounds disk_moves inst_moves nmlen imlen
109
             cmd_strs oneline min_score mg_limit min_gain evac_mode =
110
    let Cluster.Table ini_nl ini_il _ _ = ini_tbl
111
        allowed_next = Cluster.doNextBalance ini_tbl max_rounds min_score
112
        m_fin_tbl = if allowed_next
113
                    then Cluster.tryBalance ini_tbl disk_moves inst_moves
114
                         evac_mode mg_limit min_gain
115
                    else Nothing
116
    in
117
      case m_fin_tbl of
118
        Just fin_tbl ->
119
            do
120
              let
121
                  (Cluster.Table _ _ _ fin_plc) = fin_tbl
122
                  fin_plc_len = length fin_plc
123
                  cur_plc@(idx, _, _, move, _) = head fin_plc
124
                  (sol_line, cmds) = Cluster.printSolutionLine ini_nl ini_il
125
                                     nmlen imlen cur_plc fin_plc_len
126
                  afn = Cluster.involvedNodes ini_il cur_plc
127
                  upd_cmd_strs = (afn, idx, move, cmds):cmd_strs
128
              unless oneline $ do
129
                       putStrLn sol_line
130
                       hFlush stdout
131
              iterateDepth fin_tbl max_rounds disk_moves inst_moves
132
                           nmlen imlen upd_cmd_strs oneline min_score
133
                           mg_limit min_gain evac_mode
134
        Nothing -> return (ini_tbl, cmd_strs)
135

    
136
-- | Formats the solution for the oneline display
137
formatOneline :: Double -> Int -> Double -> String
138
formatOneline ini_cv plc_len fin_cv =
139
    printf "%.8f %d %.8f %8.3f" ini_cv plc_len fin_cv
140
               (if fin_cv == 0 then 1 else ini_cv / fin_cv)
141

    
142
-- | Polls a set of jobs at a fixed interval until all are finished
143
-- one way or another
144
waitForJobs :: L.Client -> [String] -> IO (Result [JobStatus])
145
waitForJobs client jids = do
146
  sts <- L.queryJobsStatus client jids
147
  case sts of
148
    Bad x -> return $ Bad x
149
    Ok s -> if any (<= JOB_STATUS_RUNNING) s
150
            then do
151
              -- TODO: replace hardcoded value with a better thing
152
              threadDelay (1000000 * 15)
153
              waitForJobs client jids
154
            else return $ Ok s
155

    
156
-- | Check that a set of job statuses is all success
157
checkJobsStatus :: [JobStatus] -> Bool
158
checkJobsStatus = all (== JOB_STATUS_SUCCESS)
159

    
160
-- | Wrapper over execJobSet checking for early termination
161
execWrapper :: String -> Node.List
162
           -> Instance.List -> IORef Int -> [JobSet] -> IO Bool
163
execWrapper _      _  _  _    [] = return True
164
execWrapper master nl il cref alljss = do
165
  cancel <- readIORef cref
166
  (if cancel > 0
167
   then do
168
     hPrintf stderr "Exiting early due to user request, %d\
169
                    \ jobset(s) remaining." (length alljss)::IO ()
170
     return False
171
   else execJobSet master nl il cref alljss)
172

    
173
-- | Execute an entire jobset
174
execJobSet :: String -> Node.List
175
           -> Instance.List -> IORef Int -> [JobSet] -> IO Bool
176
execJobSet _      _  _  _    [] = return True
177
execJobSet master nl il cref (js:jss) = do
178
  -- map from jobset (htools list of positions) to [[opcodes]]
179
  let jobs = map (\(_, idx, move, _) ->
180
                      Cluster.iMoveToJob nl il idx move) js
181
  let descr = map (\(_, idx, _, _) -> Container.nameOf il idx) js
182
  putStrLn $ "Executing jobset for instances " ++ commaJoin descr
183
  jrs <- bracket (L.getClient master) L.closeClient
184
         (\client -> do
185
            jids <- L.submitManyJobs client jobs
186
            case jids of
187
              Bad x -> return $ Bad x
188
              Ok x -> do
189
                putStrLn $ "Got job IDs " ++ commaJoin x
190
                waitForJobs client x
191
         )
192
  (case jrs of
193
     Bad x -> do
194
       hPutStrLn stderr $ "Cannot compute job status, aborting: " ++ show x
195
       return False
196
     Ok x -> if checkJobsStatus x
197
             then execWrapper master nl il cref jss
198
             else do
199
               hPutStrLn stderr $ "Not all jobs completed successfully: " ++
200
                         show x
201
               hPutStrLn stderr "Aborting."
202
               return False)
203

    
204
-- | Signal handler for graceful termination
205
hangleSigInt :: IORef Int -> IO ()
206
hangleSigInt cref = do
207
  writeIORef cref 1
208
  putStrLn ("Cancel request registered, will exit at" ++
209
            " the end of the current job set...")
210

    
211
-- | Signal handler for immediate termination
212
hangleSigTerm :: IORef Int -> IO ()
213
hangleSigTerm cref = do
214
  -- update the cref to 2, just for consistency
215
  writeIORef cref 2
216
  putStrLn "Double cancel request, exiting now..."
217
  exitImmediately $ ExitFailure 2
218

    
219
runJobSet :: String -> Node.List -> Instance.List -> [JobSet] -> IO Bool
220
runJobSet master fin_nl il cmd_jobs = do
221
  cref <- newIORef 0
222
  mapM_ (\(hnd, sig) -> installHandler sig (Catch (hnd cref)) Nothing)
223
    [(hangleSigTerm, softwareTermination), (hangleSigInt, keyboardSignal)]
224
  execWrapper master fin_nl il cref cmd_jobs
225

    
226
-- | Main function.
227
main :: IO ()
228
main = do
229
  cmd_args <- System.getArgs
230
  (opts, args) <- parseOpts cmd_args "hbal" options
231

    
232
  unless (null args) $ do
233
         hPutStrLn stderr "Error: this program doesn't take any arguments."
234
         exitWith $ ExitFailure 1
235

    
236
  let oneline = optOneline opts
237
      verbose = optVerbose opts
238
      shownodes = optShowNodes opts
239
      showinsts = optShowInsts opts
240

    
241
  ini_cdata@(ClusterData gl fixed_nl ilf ctags) <- loadExternalData opts
242

    
243
  let offline_passed = optOffline opts
244
      all_nodes = Container.elems fixed_nl
245
      offline_lkp = map (lookupName (map Node.name all_nodes)) offline_passed
246
      offline_wrong = filter (not . goodLookupResult) offline_lkp
247
      offline_names = map lrContent offline_lkp
248
      offline_indices = map Node.idx $
249
                        filter (\n -> Node.name n `elem` offline_names)
250
                               all_nodes
251
      m_cpu = optMcpu opts
252
      m_dsk = optMdsk opts
253
      csf = commonSuffix fixed_nl ilf
254

    
255
  when (not (null offline_wrong)) $ do
256
         hPrintf stderr "Error: Wrong node name(s) set as offline: %s\n"
257
                     (commaJoin (map lrContent offline_wrong)) :: IO ()
258
         exitWith $ ExitFailure 1
259

    
260
  let nm = Container.map (\n -> if Node.idx n `elem` offline_indices
261
                                then Node.setOffline n True
262
                                else n) fixed_nl
263
      nlf = Container.map (flip Node.setMdsk m_dsk . flip Node.setMcpu m_cpu)
264
            nm
265

    
266
  when (not oneline && verbose > 1) $
267
       putStrLn $ "Loaded cluster tags: " ++ intercalate "," ctags
268

    
269
  when (Container.size ilf == 0) $ do
270
         (if oneline then putStrLn $ formatOneline 0 0 0
271
          else printf "Cluster is empty, exiting.\n")
272
         exitWith ExitSuccess
273

    
274
  let split_insts = Cluster.findSplitInstances nlf ilf
275
  when (not . null $ split_insts) $ do
276
    hPutStrLn stderr "Found instances belonging to multiple node groups:"
277
    mapM_ (\i -> hPutStrLn stderr $ "  " ++ Instance.name i) split_insts
278
    hPutStrLn stderr "Aborting."
279
    exitWith $ ExitFailure 1
280

    
281
  let ngroups = Cluster.splitCluster nlf ilf
282
  when (length ngroups > 1 && isNothing (optGroup opts)) $ do
283
    hPutStrLn stderr "Found multiple node groups:"
284
    mapM_ (hPutStrLn stderr . ("  " ++) . Group.name .
285
           flip Container.find gl . fst) ngroups
286
    hPutStrLn stderr "Aborting."
287
    exitWith $ ExitFailure 1
288

    
289
  maybeSaveData (optSaveCluster opts) "original" "before balancing" ini_cdata
290

    
291
  unless oneline $ printf "Loaded %d nodes, %d instances\n"
292
             (Container.size nlf)
293
             (Container.size ilf)
294

    
295
  (gname, (nl, il)) <- case optGroup opts of
296
    Nothing -> do
297
         let (gidx, cdata) = head ngroups
298
             grp = Container.find gidx gl
299
         return (Group.name grp, cdata)
300
    Just g -> case Container.findByName gl g of
301
      Nothing -> do
302
        hPutStrLn stderr $ "Node group " ++ g ++
303
          " not found. Node group list is:"
304
        mapM_ (hPutStrLn stderr . ("  " ++) . Group.name ) (Container.elems gl)
305
        hPutStrLn stderr "Aborting."
306
        exitWith $ ExitFailure 1
307
      Just grp ->
308
          case lookup (Group.idx grp) ngroups of
309
            Nothing -> do
310
              -- TODO: while this is unlikely to happen, log here the
311
              -- actual group data to help debugging
312
              hPutStrLn stderr "Internal failure, missing group idx"
313
              exitWith $ ExitFailure 1
314
            Just cdata -> return (Group.name grp, cdata)
315

    
316
  unless oneline $ printf "Group size %d nodes, %d instances\n"
317
             (Container.size nl)
318
             (Container.size il)
319

    
320
  putStrLn $ "Selected node group: " ++ gname
321

    
322
  when (length csf > 0 && not oneline && verbose > 1) $
323
       printf "Note: Stripping common suffix of '%s' from names\n" csf
324

    
325
  let (bad_nodes, bad_instances) = Cluster.computeBadItems nl il
326
  unless (oneline || verbose == 0) $ printf
327
             "Initial check done: %d bad nodes, %d bad instances.\n"
328
             (length bad_nodes) (length bad_instances)
329

    
330
  when (length bad_nodes > 0) $
331
         putStrLn "Cluster is not N+1 happy, continuing but no guarantee \
332
                  \that the cluster will end N+1 happy."
333

    
334
  maybePrintInsts showinsts "Initial" (Cluster.printInsts nl il)
335

    
336
  maybePrintNodes shownodes "Initial cluster" (Cluster.printNodes nl)
337

    
338
  let ini_cv = Cluster.compCV nl
339
      ini_tbl = Cluster.Table nl il ini_cv []
340
      min_cv = optMinScore opts
341

    
342
  when (ini_cv < min_cv) $ do
343
         (if oneline then
344
              putStrLn $ formatOneline ini_cv 0 ini_cv
345
          else printf "Cluster is already well balanced (initial score %.6g,\n\
346
                      \minimum score %.6g).\nNothing to do, exiting\n"
347
                      ini_cv min_cv)
348
         exitWith ExitSuccess
349

    
350
  unless oneline (if verbose > 2 then
351
                      printf "Initial coefficients: overall %.8f, %s\n"
352
                      ini_cv (Cluster.printStats nl)
353
                  else
354
                      printf "Initial score: %.8f\n" ini_cv)
355

    
356
  unless oneline $ putStrLn "Trying to minimize the CV..."
357
  let imlen = maximum . map (length . Instance.alias) $ Container.elems il
358
      nmlen = maximum . map (length . Node.alias) $ Container.elems nl
359

    
360
  (fin_tbl, cmd_strs) <- iterateDepth ini_tbl (optMaxLength opts)
361
                         (optDiskMoves opts)
362
                         (optInstMoves opts)
363
                         nmlen imlen [] oneline min_cv
364
                         (optMinGainLim opts) (optMinGain opts)
365
                         (optEvacMode opts)
366
  let (Cluster.Table fin_nl fin_il fin_cv fin_plc) = fin_tbl
367
      ord_plc = reverse fin_plc
368
      sol_msg = case () of
369
                  _ | null fin_plc -> printf "No solution found\n"
370
                    | verbose > 2 ->
371
                        printf "Final coefficients:   overall %.8f, %s\n"
372
                        fin_cv (Cluster.printStats fin_nl)
373
                    | otherwise ->
374
                        printf "Cluster score improved from %.8f to %.8f\n"
375
                        ini_cv fin_cv ::String
376

    
377
  unless oneline $ putStr sol_msg
378

    
379
  unless (oneline || verbose == 0) $
380
         printf "Solution length=%d\n" (length ord_plc)
381

    
382
  let cmd_jobs = Cluster.splitJobs cmd_strs
383
      cmd_data = Cluster.formatCmds cmd_jobs
384

    
385
  when (isJust $ optShowCmds opts) $
386
       do
387
         let out_path = fromJust $ optShowCmds opts
388
         putStrLn ""
389
         (if out_path == "-" then
390
              printf "Commands to run to reach the above solution:\n%s"
391
                     (unlines . map ("  " ++) .
392
                      filter (/= "  check") .
393
                      lines $ cmd_data)
394
          else do
395
            writeFile out_path (shTemplate ++ cmd_data)
396
            printf "The commands have been written to file '%s'\n" out_path)
397

    
398
  maybeSaveData (optSaveCluster opts) "balanced" "after balancing"
399
                (ClusterData gl fin_nl fin_il ctags)
400

    
401
  maybePrintInsts showinsts "Final" (Cluster.printInsts fin_nl fin_il)
402

    
403
  maybePrintNodes shownodes "Final cluster" (Cluster.printNodes fin_nl)
404

    
405
  when (verbose > 3) $ do
406
         let ini_cs = Cluster.totalResources nl
407
             fin_cs = Cluster.totalResources fin_nl
408
         printf "Original: mem=%d disk=%d\n"
409
                    (Cluster.csFmem ini_cs) (Cluster.csFdsk ini_cs) :: IO ()
410
         printf "Final:    mem=%d disk=%d\n"
411
                    (Cluster.csFmem fin_cs) (Cluster.csFdsk fin_cs)
412
  when oneline $
413
         putStrLn $ formatOneline ini_cv (length ord_plc) fin_cv
414

    
415
  eval <-
416
      if optExecJobs opts && not (null ord_plc)
417
      then (case optLuxi opts of
418
              Nothing -> do
419
                hPutStrLn stderr "Execution of commands possible only on LUXI"
420
                return False
421
              Just master -> runJobSet master fin_nl il cmd_jobs)
422
      else return True
423
  unless eval (exitWith (ExitFailure 1))