Statistics
| Branch: | Tag: | Revision:

root / htools / Ganeti / HTools / Program / Hbal.hs @ 179c0828

History | View | Annotate | Download (15.4 kB)

1
{-| Cluster rebalancer.
2

    
3
-}
4

    
5
{-
6

    
7
Copyright (C) 2009, 2010, 2011 Google Inc.
8

    
9
This program is free software; you can redistribute it and/or modify
10
it under the terms of the GNU General Public License as published by
11
the Free Software Foundation; either version 2 of the License, or
12
(at your option) any later version.
13

    
14
This program is distributed in the hope that it will be useful, but
15
WITHOUT ANY WARRANTY; without even the implied warranty of
16
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17
General Public License for more details.
18

    
19
You should have received a copy of the GNU General Public License
20
along with this program; if not, write to the Free Software
21
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22
02110-1301, USA.
23

    
24
-}
25

    
26
module Ganeti.HTools.Program.Hbal (main) where
27

    
28
import Control.Concurrent (threadDelay)
29
import Control.Exception (bracket)
30
import Control.Monad
31
import Data.List
32
import Data.Maybe (isJust, isNothing, fromJust)
33
import Data.IORef
34
import System (exitWith, ExitCode(..))
35
import System.IO
36
import System.Posix.Process
37
import System.Posix.Signals
38
import qualified System
39

    
40
import Text.Printf (printf, hPrintf)
41

    
42
import qualified Ganeti.HTools.Container as Container
43
import qualified Ganeti.HTools.Cluster as Cluster
44
import qualified Ganeti.HTools.Group as Group
45
import qualified Ganeti.HTools.Node as Node
46
import qualified Ganeti.HTools.Instance as Instance
47

    
48
import Ganeti.HTools.CLI
49
import Ganeti.HTools.ExtLoader
50
import Ganeti.HTools.Utils
51
import Ganeti.HTools.Types
52
import Ganeti.HTools.Loader
53

    
54
import qualified Ganeti.Luxi as L
55
import Ganeti.Jobs
56

    
57
-- | Options list and functions.
58
options :: [OptType]
59
options =
60
    [ oPrintNodes
61
    , oPrintInsts
62
    , oPrintCommands
63
    , oOneline
64
    , oDataFile
65
    , oEvacMode
66
    , oRapiMaster
67
    , oLuxiSocket
68
    , oExecJobs
69
    , oGroup
70
    , oMaxSolLength
71
    , oVerbose
72
    , oQuiet
73
    , oOfflineNode
74
    , oMinScore
75
    , oMaxCpu
76
    , oMinDisk
77
    , oMinGain
78
    , oMinGainLim
79
    , oDiskMoves
80
    , oSelInst
81
    , oInstMoves
82
    , oDynuFile
83
    , oExTags
84
    , oExInst
85
    , oSaveCluster
86
    , oShowVer
87
    , oShowHelp
88
    ]
89

    
90
{- | Start computing the solution at the given depth and recurse until
91
we find a valid solution or we exceed the maximum depth.
92

    
93
-}
94
iterateDepth :: Cluster.Table    -- ^ The starting table
95
             -> Int              -- ^ Remaining length
96
             -> Bool             -- ^ Allow disk moves
97
             -> Bool             -- ^ Allow instance moves
98
             -> Int              -- ^ Max node name len
99
             -> Int              -- ^ Max instance name len
100
             -> [MoveJob]        -- ^ Current command list
101
             -> Bool             -- ^ Whether to be silent
102
             -> Score            -- ^ Score at which to stop
103
             -> Score            -- ^ Min gain limit
104
             -> Score            -- ^ Min score gain
105
             -> Bool             -- ^ Enable evacuation mode
106
             -> IO (Cluster.Table, [MoveJob]) -- ^ The resulting table
107
                                              -- and commands
108
iterateDepth ini_tbl max_rounds disk_moves inst_moves nmlen imlen
109
             cmd_strs oneline min_score mg_limit min_gain evac_mode =
110
    let Cluster.Table ini_nl ini_il _ _ = ini_tbl
111
        allowed_next = Cluster.doNextBalance ini_tbl max_rounds min_score
112
        m_fin_tbl = if allowed_next
113
                    then Cluster.tryBalance ini_tbl disk_moves inst_moves
114
                         evac_mode mg_limit min_gain
115
                    else Nothing
116
    in
117
      case m_fin_tbl of
118
        Just fin_tbl ->
119
            do
120
              let
121
                  (Cluster.Table _ _ _ fin_plc) = fin_tbl
122
                  fin_plc_len = length fin_plc
123
                  cur_plc@(idx, _, _, move, _) = head fin_plc
124
                  (sol_line, cmds) = Cluster.printSolutionLine ini_nl ini_il
125
                                     nmlen imlen cur_plc fin_plc_len
126
                  afn = Cluster.involvedNodes ini_il cur_plc
127
                  upd_cmd_strs = (afn, idx, move, cmds):cmd_strs
128
              unless oneline $ do
129
                       putStrLn sol_line
130
                       hFlush stdout
131
              iterateDepth fin_tbl max_rounds disk_moves inst_moves
132
                           nmlen imlen upd_cmd_strs oneline min_score
133
                           mg_limit min_gain evac_mode
134
        Nothing -> return (ini_tbl, cmd_strs)
135

    
136
-- | Formats the solution for the oneline display.
137
formatOneline :: Double -> Int -> Double -> String
138
formatOneline ini_cv plc_len fin_cv =
139
    printf "%.8f %d %.8f %8.3f" ini_cv plc_len fin_cv
140
               (if fin_cv == 0 then 1 else ini_cv / fin_cv)
141

    
142
-- | Polls a set of jobs at a fixed interval until all are finished
143
-- one way or another.
144
waitForJobs :: L.Client -> [String] -> IO (Result [JobStatus])
145
waitForJobs client jids = do
146
  sts <- L.queryJobsStatus client jids
147
  case sts of
148
    Bad x -> return $ Bad x
149
    Ok s -> if any (<= JOB_STATUS_RUNNING) s
150
            then do
151
              -- TODO: replace hardcoded value with a better thing
152
              threadDelay (1000000 * 15)
153
              waitForJobs client jids
154
            else return $ Ok s
155

    
156
-- | Check that a set of job statuses is all success.
157
checkJobsStatus :: [JobStatus] -> Bool
158
checkJobsStatus = all (== JOB_STATUS_SUCCESS)
159

    
160
-- | Wrapper over execJobSet checking for early termination.
161
execWrapper :: String -> Node.List
162
           -> Instance.List -> IORef Int -> [JobSet] -> IO Bool
163
execWrapper _      _  _  _    [] = return True
164
execWrapper master nl il cref alljss = do
165
  cancel <- readIORef cref
166
  (if cancel > 0
167
   then do
168
     hPrintf stderr "Exiting early due to user request, %d\
169
                    \ jobset(s) remaining." (length alljss)::IO ()
170
     return False
171
   else execJobSet master nl il cref alljss)
172

    
173
-- | Execute an entire jobset.
174
execJobSet :: String -> Node.List
175
           -> Instance.List -> IORef Int -> [JobSet] -> IO Bool
176
execJobSet _      _  _  _    [] = return True
177
execJobSet master nl il cref (js:jss) = do
178
  -- map from jobset (htools list of positions) to [[opcodes]]
179
  let jobs = map (\(_, idx, move, _) ->
180
                      Cluster.iMoveToJob nl il idx move) js
181
  let descr = map (\(_, idx, _, _) -> Container.nameOf il idx) js
182
  putStrLn $ "Executing jobset for instances " ++ commaJoin descr
183
  jrs <- bracket (L.getClient master) L.closeClient
184
         (\client -> do
185
            jids <- L.submitManyJobs client jobs
186
            case jids of
187
              Bad x -> return $ Bad x
188
              Ok x -> do
189
                putStrLn $ "Got job IDs " ++ commaJoin x
190
                waitForJobs client x
191
         )
192
  (case jrs of
193
     Bad x -> do
194
       hPutStrLn stderr $ "Cannot compute job status, aborting: " ++ show x
195
       return False
196
     Ok x -> if checkJobsStatus x
197
             then execWrapper master nl il cref jss
198
             else do
199
               hPutStrLn stderr $ "Not all jobs completed successfully: " ++
200
                         show x
201
               hPutStrLn stderr "Aborting."
202
               return False)
203

    
204
-- | Signal handler for graceful termination.
205
hangleSigInt :: IORef Int -> IO ()
206
hangleSigInt cref = do
207
  writeIORef cref 1
208
  putStrLn ("Cancel request registered, will exit at" ++
209
            " the end of the current job set...")
210

    
211
-- | Signal handler for immediate termination.
212
hangleSigTerm :: IORef Int -> IO ()
213
hangleSigTerm cref = do
214
  -- update the cref to 2, just for consistency
215
  writeIORef cref 2
216
  putStrLn "Double cancel request, exiting now..."
217
  exitImmediately $ ExitFailure 2
218

    
219
-- | Runs a job set with handling of signals.
220
runJobSet :: String -> Node.List -> Instance.List -> [JobSet] -> IO Bool
221
runJobSet master fin_nl il cmd_jobs = do
222
  cref <- newIORef 0
223
  mapM_ (\(hnd, sig) -> installHandler sig (Catch (hnd cref)) Nothing)
224
    [(hangleSigTerm, softwareTermination), (hangleSigInt, keyboardSignal)]
225
  execWrapper master fin_nl il cref cmd_jobs
226

    
227
-- | Main function.
228
main :: IO ()
229
main = do
230
  cmd_args <- System.getArgs
231
  (opts, args) <- parseOpts cmd_args "hbal" options
232

    
233
  unless (null args) $ do
234
         hPutStrLn stderr "Error: this program doesn't take any arguments."
235
         exitWith $ ExitFailure 1
236

    
237
  let oneline = optOneline opts
238
      verbose = optVerbose opts
239
      shownodes = optShowNodes opts
240
      showinsts = optShowInsts opts
241

    
242
  ini_cdata@(ClusterData gl fixed_nl ilf ctags) <- loadExternalData opts
243

    
244
  let offline_passed = optOffline opts
245
      all_nodes = Container.elems fixed_nl
246
      offline_lkp = map (lookupName (map Node.name all_nodes)) offline_passed
247
      offline_wrong = filter (not . goodLookupResult) offline_lkp
248
      offline_names = map lrContent offline_lkp
249
      offline_indices = map Node.idx $
250
                        filter (\n -> Node.name n `elem` offline_names)
251
                               all_nodes
252
      m_cpu = optMcpu opts
253
      m_dsk = optMdsk opts
254
      csf = commonSuffix fixed_nl ilf
255

    
256
  when (not (null offline_wrong)) $ do
257
         hPrintf stderr "Error: Wrong node name(s) set as offline: %s\n"
258
                     (commaJoin (map lrContent offline_wrong)) :: IO ()
259
         exitWith $ ExitFailure 1
260

    
261
  let nm = Container.map (\n -> if Node.idx n `elem` offline_indices
262
                                then Node.setOffline n True
263
                                else n) fixed_nl
264
      nlf = Container.map (flip Node.setMdsk m_dsk . flip Node.setMcpu m_cpu)
265
            nm
266

    
267
  when (not oneline && verbose > 1) $
268
       putStrLn $ "Loaded cluster tags: " ++ intercalate "," ctags
269

    
270
  when (Container.size ilf == 0) $ do
271
         (if oneline then putStrLn $ formatOneline 0 0 0
272
          else printf "Cluster is empty, exiting.\n")
273
         exitWith ExitSuccess
274

    
275
  let split_insts = Cluster.findSplitInstances nlf ilf
276
  when (not . null $ split_insts) $ do
277
    hPutStrLn stderr "Found instances belonging to multiple node groups:"
278
    mapM_ (\i -> hPutStrLn stderr $ "  " ++ Instance.name i) split_insts
279
    hPutStrLn stderr "Aborting."
280
    exitWith $ ExitFailure 1
281

    
282
  let ngroups = Cluster.splitCluster nlf ilf
283
  when (length ngroups > 1 && isNothing (optGroup opts)) $ do
284
    hPutStrLn stderr "Found multiple node groups:"
285
    mapM_ (hPutStrLn stderr . ("  " ++) . Group.name .
286
           flip Container.find gl . fst) ngroups
287
    hPutStrLn stderr "Aborting."
288
    exitWith $ ExitFailure 1
289

    
290
  maybeSaveData (optSaveCluster opts) "original" "before balancing" ini_cdata
291

    
292
  unless oneline $ printf "Loaded %d nodes, %d instances\n"
293
             (Container.size nlf)
294
             (Container.size ilf)
295

    
296
  (gname, (nl, il)) <- case optGroup opts of
297
    Nothing -> do
298
         let (gidx, cdata) = head ngroups
299
             grp = Container.find gidx gl
300
         return (Group.name grp, cdata)
301
    Just g -> case Container.findByName gl g of
302
      Nothing -> do
303
        hPutStrLn stderr $ "Node group " ++ g ++
304
          " not found. Node group list is:"
305
        mapM_ (hPutStrLn stderr . ("  " ++) . Group.name ) (Container.elems gl)
306
        hPutStrLn stderr "Aborting."
307
        exitWith $ ExitFailure 1
308
      Just grp ->
309
          case lookup (Group.idx grp) ngroups of
310
            Nothing -> do
311
              -- TODO: while this is unlikely to happen, log here the
312
              -- actual group data to help debugging
313
              hPutStrLn stderr "Internal failure, missing group idx"
314
              exitWith $ ExitFailure 1
315
            Just cdata -> return (Group.name grp, cdata)
316

    
317
  unless oneline $ printf "Group size %d nodes, %d instances\n"
318
             (Container.size nl)
319
             (Container.size il)
320

    
321
  putStrLn $ "Selected node group: " ++ gname
322

    
323
  when (length csf > 0 && not oneline && verbose > 1) $
324
       printf "Note: Stripping common suffix of '%s' from names\n" csf
325

    
326
  let (bad_nodes, bad_instances) = Cluster.computeBadItems nl il
327
  unless (oneline || verbose == 0) $ printf
328
             "Initial check done: %d bad nodes, %d bad instances.\n"
329
             (length bad_nodes) (length bad_instances)
330

    
331
  when (length bad_nodes > 0) $
332
         putStrLn "Cluster is not N+1 happy, continuing but no guarantee \
333
                  \that the cluster will end N+1 happy."
334

    
335
  maybePrintInsts showinsts "Initial" (Cluster.printInsts nl il)
336

    
337
  maybePrintNodes shownodes "Initial cluster" (Cluster.printNodes nl)
338

    
339
  let ini_cv = Cluster.compCV nl
340
      ini_tbl = Cluster.Table nl il ini_cv []
341
      min_cv = optMinScore opts
342

    
343
  when (ini_cv < min_cv) $ do
344
         (if oneline then
345
              putStrLn $ formatOneline ini_cv 0 ini_cv
346
          else printf "Cluster is already well balanced (initial score %.6g,\n\
347
                      \minimum score %.6g).\nNothing to do, exiting\n"
348
                      ini_cv min_cv)
349
         exitWith ExitSuccess
350

    
351
  unless oneline (if verbose > 2 then
352
                      printf "Initial coefficients: overall %.8f, %s\n"
353
                      ini_cv (Cluster.printStats nl)
354
                  else
355
                      printf "Initial score: %.8f\n" ini_cv)
356

    
357
  unless oneline $ putStrLn "Trying to minimize the CV..."
358
  let imlen = maximum . map (length . Instance.alias) $ Container.elems il
359
      nmlen = maximum . map (length . Node.alias) $ Container.elems nl
360

    
361
  (fin_tbl, cmd_strs) <- iterateDepth ini_tbl (optMaxLength opts)
362
                         (optDiskMoves opts)
363
                         (optInstMoves opts)
364
                         nmlen imlen [] oneline min_cv
365
                         (optMinGainLim opts) (optMinGain opts)
366
                         (optEvacMode opts)
367
  let (Cluster.Table fin_nl fin_il fin_cv fin_plc) = fin_tbl
368
      ord_plc = reverse fin_plc
369
      sol_msg = case () of
370
                  _ | null fin_plc -> printf "No solution found\n"
371
                    | verbose > 2 ->
372
                        printf "Final coefficients:   overall %.8f, %s\n"
373
                        fin_cv (Cluster.printStats fin_nl)
374
                    | otherwise ->
375
                        printf "Cluster score improved from %.8f to %.8f\n"
376
                        ini_cv fin_cv ::String
377

    
378
  unless oneline $ putStr sol_msg
379

    
380
  unless (oneline || verbose == 0) $
381
         printf "Solution length=%d\n" (length ord_plc)
382

    
383
  let cmd_jobs = Cluster.splitJobs cmd_strs
384
      cmd_data = Cluster.formatCmds cmd_jobs
385

    
386
  when (isJust $ optShowCmds opts) $
387
       do
388
         let out_path = fromJust $ optShowCmds opts
389
         putStrLn ""
390
         (if out_path == "-" then
391
              printf "Commands to run to reach the above solution:\n%s"
392
                     (unlines . map ("  " ++) .
393
                      filter (/= "  check") .
394
                      lines $ cmd_data)
395
          else do
396
            writeFile out_path (shTemplate ++ cmd_data)
397
            printf "The commands have been written to file '%s'\n" out_path)
398

    
399
  maybeSaveData (optSaveCluster opts) "balanced" "after balancing"
400
                (ClusterData gl fin_nl fin_il ctags)
401

    
402
  maybePrintInsts showinsts "Final" (Cluster.printInsts fin_nl fin_il)
403

    
404
  maybePrintNodes shownodes "Final cluster" (Cluster.printNodes fin_nl)
405

    
406
  when (verbose > 3) $ do
407
         let ini_cs = Cluster.totalResources nl
408
             fin_cs = Cluster.totalResources fin_nl
409
         printf "Original: mem=%d disk=%d\n"
410
                    (Cluster.csFmem ini_cs) (Cluster.csFdsk ini_cs) :: IO ()
411
         printf "Final:    mem=%d disk=%d\n"
412
                    (Cluster.csFmem fin_cs) (Cluster.csFdsk fin_cs)
413
  when oneline $
414
         putStrLn $ formatOneline ini_cv (length ord_plc) fin_cv
415

    
416
  eval <-
417
      if optExecJobs opts && not (null ord_plc)
418
      then (case optLuxi opts of
419
              Nothing -> do
420
                hPutStrLn stderr "Execution of commands possible only on LUXI"
421
                return False
422
              Just master -> runJobSet master fin_nl il cmd_jobs)
423
      else return True
424
  unless eval (exitWith (ExitFailure 1))