{-
-Copyright (C) 2009, 2010, 2011 Google Inc.
+Copyright (C) 2009, 2010, 2011, 2012 Google Inc.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
-}
-module Ganeti.HTools.Program.Hbal (main) where
+module Ganeti.HTools.Program.Hbal
+ ( main
+ , options
+ , arguments
+ , iterateDepth
+ ) where
import Control.Concurrent (threadDelay)
import Control.Exception (bracket)
import Data.List
import Data.Maybe (isJust, isNothing, fromJust)
import Data.IORef
-import System.Environment (getArgs)
import System.Exit
import System.IO
import System.Posix.Process
import qualified Ganeti.HTools.Node as Node
import qualified Ganeti.HTools.Instance as Instance
+import Ganeti.BasicTypes
+import Ganeti.Common
+import Ganeti.Errors
import Ganeti.HTools.CLI
import Ganeti.HTools.ExtLoader
-import Ganeti.HTools.Utils
import Ganeti.HTools.Types
import Ganeti.HTools.Loader
+import Ganeti.Utils
import qualified Ganeti.Luxi as L
import Ganeti.Jobs
-- | Options list and functions.
-options :: [OptType]
-options =
- [ oPrintNodes
- , oPrintInsts
- , oPrintCommands
- , oDataFile
- , oEvacMode
- , oRapiMaster
- , oLuxiSocket
- , oExecJobs
- , oGroup
- , oMaxSolLength
- , oVerbose
- , oQuiet
- , oOfflineNode
- , oMinScore
- , oMaxCpu
- , oMinDisk
- , oMinGain
- , oMinGainLim
- , oDiskMoves
- , oSelInst
- , oInstMoves
- , oDynuFile
- , oExTags
- , oExInst
- , oSaveCluster
- , oShowVer
- , oShowHelp
- ]
+options :: IO [OptType]
+options = do
+ luxi <- oLuxiSocket
+ return
+ [ oPrintNodes
+ , oPrintInsts
+ , oPrintCommands
+ , oDataFile
+ , oEvacMode
+ , oRapiMaster
+ , luxi
+ , oIAllocSrc
+ , oExecJobs
+ , oGroup
+ , oMaxSolLength
+ , oVerbose
+ , oQuiet
+ , oOfflineNode
+ , oMinScore
+ , oMaxCpu
+ , oMinDisk
+ , oMinGain
+ , oMinGainLim
+ , oDiskMoves
+ , oSelInst
+ , oInstMoves
+ , oDynuFile
+ , oExTags
+ , oExInst
+ , oSaveCluster
+ ]
+
+-- | The list of arguments supported by the program.
+arguments :: [ArgCompletion]
+arguments = []
{- | Start computing the solution at the given depth and recurse until
we find a valid solution or we exceed the maximum depth.
-}
-iterateDepth :: Cluster.Table -- ^ The starting table
+iterateDepth :: Bool -- ^ Whether to print moves
+ -> Cluster.Table -- ^ The starting table
-> Int -- ^ Remaining length
-> Bool -- ^ Allow disk moves
-> Bool -- ^ Allow instance moves
-> Bool -- ^ Enable evacuation mode
-> IO (Cluster.Table, [MoveJob]) -- ^ The resulting table
-- and commands
-iterateDepth ini_tbl max_rounds disk_moves inst_moves nmlen imlen
+iterateDepth printmove ini_tbl max_rounds disk_moves inst_moves nmlen imlen
cmd_strs min_score mg_limit min_gain evac_mode =
let Cluster.Table ini_nl ini_il _ _ = ini_tbl
allowed_next = Cluster.doNextBalance ini_tbl max_rounds min_score
nmlen imlen cur_plc fin_plc_len
afn = Cluster.involvedNodes ini_il cur_plc
upd_cmd_strs = (afn, idx, move, cmds):cmd_strs
- putStrLn sol_line
- hFlush stdout
- iterateDepth fin_tbl max_rounds disk_moves inst_moves
+ when printmove $ do
+ putStrLn sol_line
+ hFlush stdout
+ iterateDepth printmove fin_tbl max_rounds disk_moves inst_moves
nmlen imlen upd_cmd_strs min_score
mg_limit min_gain evac_mode
Nothing -> return (ini_tbl, cmd_strs)
-- | Polls a set of jobs at a fixed interval until all are finished
-- one way or another.
-waitForJobs :: L.Client -> [String] -> IO (Result [JobStatus])
+waitForJobs :: L.Client -> [L.JobId] -> IO (Result [JobStatus])
waitForJobs client jids = do
sts <- L.queryJobsStatus client jids
case sts of
- Bad x -> return $ Bad x
+ Bad e -> return . Bad $ "Checking job status: " ++ formatError e
Ok s -> if any (<= JOB_STATUS_RUNNING) s
then do
-- TODO: replace hardcoded value with a better thing
then do
hPrintf stderr "Exiting early due to user request, %d\
\ jobset(s) remaining." (length alljss)::IO ()
- return False
+ return True
else execJobSet master nl il cref alljss
-- | Execute an entire jobset.
(\client -> do
jids <- L.submitManyJobs client jobs
case jids of
- Bad x -> return $ Bad x
+ Bad e -> return . Bad $ "Job submission error: " ++ formatError e
Ok x -> do
- putStrLn $ "Got job IDs " ++ commaJoin x
+ putStrLn $ "Got job IDs " ++ commaJoin (map show x)
waitForJobs client x
)
case jrs of
Bad x -> do
- hPutStrLn stderr $ "Cannot compute job status, aborting: " ++ show x
+ hPutStrLn stderr x
return False
Ok x -> if checkJobsStatus x
then execWrapper master nl il cref jss
hPutStrLn stderr "Found multiple node groups:"
mapM_ (hPutStrLn stderr . (" " ++) . Group.name .
flip Container.find gl . fst) ngroups
- hPutStrLn stderr "Aborting."
- exitWith $ ExitFailure 1
+ exitErr "Aborting."
case optGroup opts of
Nothing -> do
hPutStrLn stderr $ "Node group " ++ g ++
" not found. Node group list is:"
mapM_ (hPutStrLn stderr . (" " ++) . Group.name ) (Container.elems gl)
- hPutStrLn stderr "Aborting."
- exitWith $ ExitFailure 1
+ exitErr "Aborting."
Just grp ->
case lookup (Group.idx grp) ngroups of
Nothing ->
-- nothing to do on an empty cluster
when (Container.null il) $ do
printf "Cluster is empty, exiting.\n"::IO ()
- exitWith ExitSuccess
+ exitSuccess
-- hbal doesn't currently handle split clusters
let split_insts = Cluster.findSplitInstances nl il
- unless (null split_insts) $ do
+ unless (null split_insts || verbose <= 1) $ do
hPutStrLn stderr "Found instances belonging to multiple node groups:"
mapM_ (\i -> hPutStrLn stderr $ " " ++ Instance.name i) split_insts
- hPutStrLn stderr "Aborting."
- exitWith $ ExitFailure 1
+ hPutStrLn stderr "These instances will not be moved."
printf "Loaded %d nodes, %d instances\n"
(Container.size nl)
"Initial check done: %d bad nodes, %d bad instances.\n"
(length bad_nodes) (length bad_instances)
- when (length bad_nodes > 0) $
+ unless (null bad_nodes) $
putStrLn "Cluster is not N+1 happy, continuing but no guarantee \
\that the cluster will end N+1 happy."
printf "Cluster is already well balanced (initial score %.6g,\n\
\minimum score %.6g).\nNothing to do, exiting\n"
ini_cv min_cv:: IO ()
- exitWith ExitSuccess
+ exitSuccess
-- | Main function.
-main :: IO ()
-main = do
- cmd_args <- getArgs
- (opts, args) <- parseOpts cmd_args "hbal" options
-
- unless (null args) $ do
- hPutStrLn stderr "Error: this program doesn't take any arguments."
- exitWith $ ExitFailure 1
+main :: Options -> [String] -> IO ()
+main opts args = do
+ unless (null args) $ exitErr "This program doesn't take any arguments."
let verbose = optVerbose opts
shownodes = optShowNodes opts
checkNeedRebalance opts ini_cv
if verbose > 2
- then printf "Initial coefficients: overall %.8f, %s\n"
- ini_cv (Cluster.printStats nl)::IO ()
+ then printf "Initial coefficients: overall %.8f\n%s"
+ ini_cv (Cluster.printStats " " nl)::IO ()
else printf "Initial score: %.8f\n" ini_cv
putStrLn "Trying to minimize the CV..."
let imlen = maximum . map (length . Instance.alias) $ Container.elems il
nmlen = maximum . map (length . Node.alias) $ Container.elems nl
- (fin_tbl, cmd_strs) <- iterateDepth ini_tbl (optMaxLength opts)
+ (fin_tbl, cmd_strs) <- iterateDepth True ini_tbl (optMaxLength opts)
(optDiskMoves opts)
(optInstMoves opts)
nmlen imlen [] min_cv
sol_msg = case () of
_ | null fin_plc -> printf "No solution found\n"
| verbose > 2 ->
- printf "Final coefficients: overall %.8f, %s\n"
- fin_cv (Cluster.printStats fin_nl)
+ printf "Final coefficients: overall %.8f\n%s"
+ fin_cv (Cluster.printStats " " fin_nl)
| otherwise ->
printf "Cluster score improved from %.8f to %.8f\n"
ini_cv fin_cv ::String
let cmd_jobs = Cluster.splitJobs cmd_strs
- when (isJust $ optShowCmds opts) $
+ when (isJust $ optShowCmds opts) .
saveBalanceCommands opts $ Cluster.formatCmds cmd_jobs
maybeSaveData (optSaveCluster opts) "balanced" "after balancing"