X-Git-Url: https://code.grnet.gr/git/ganeti-local/blobdiff_plain/2ba173628e5597c8cc6b7dd81a108dcf9c128be2..29a3053378d72e6897799c16fe61d136b28ca1e0:/htools/Ganeti/HTools/Program/Hbal.hs diff --git a/htools/Ganeti/HTools/Program/Hbal.hs b/htools/Ganeti/HTools/Program/Hbal.hs index b881033..fa0728b 100644 --- a/htools/Ganeti/HTools/Program/Hbal.hs +++ b/htools/Ganeti/HTools/Program/Hbal.hs @@ -4,7 +4,7 @@ {- -Copyright (C) 2009, 2010, 2011 Google Inc. +Copyright (C) 2009, 2010, 2011, 2012 Google Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -23,7 +23,12 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA -} -module Ganeti.HTools.Program.Hbal (main) where +module Ganeti.HTools.Program.Hbal + ( main + , options + , arguments + , iterateDepth + ) where import Control.Concurrent (threadDelay) import Control.Exception (bracket) @@ -31,11 +36,10 @@ import Control.Monad import Data.List import Data.Maybe (isJust, isNothing, fromJust) import Data.IORef -import System (exitWith, ExitCode(..)) +import System.Exit import System.IO import System.Posix.Process import System.Posix.Signals -import qualified System import Text.Printf (printf, hPrintf) @@ -45,52 +49,61 @@ import qualified Ganeti.HTools.Group as Group import qualified Ganeti.HTools.Node as Node import qualified Ganeti.HTools.Instance as Instance +import Ganeti.BasicTypes +import Ganeti.Common +import Ganeti.Errors import Ganeti.HTools.CLI import Ganeti.HTools.ExtLoader -import Ganeti.HTools.Utils import Ganeti.HTools.Types import Ganeti.HTools.Loader +import Ganeti.Utils import qualified Ganeti.Luxi as L import Ganeti.Jobs -- | Options list and functions. -options :: [OptType] -options = - [ oPrintNodes - , oPrintInsts - , oPrintCommands - , oDataFile - , oEvacMode - , oRapiMaster - , oLuxiSocket - , oExecJobs - , oGroup - , oMaxSolLength - , oVerbose - , oQuiet - , oOfflineNode - , oMinScore - , oMaxCpu - , oMinDisk - , oMinGain - , oMinGainLim - , oDiskMoves - , oSelInst - , oInstMoves - , oDynuFile - , oExTags - , oExInst - , oSaveCluster - , oShowVer - , oShowHelp - ] +options :: IO [OptType] +options = do + luxi <- oLuxiSocket + return + [ oPrintNodes + , oPrintInsts + , oPrintCommands + , oDataFile + , oEvacMode + , oRapiMaster + , luxi + , oIAllocSrc + , oExecJobs + , oGroup + , oMaxSolLength + , oVerbose + , oQuiet + , oOfflineNode + , oMinScore + , oMaxCpu + , oMinDisk + , oMinGain + , oMinGainLim + , oDiskMoves + , oSelInst + , oInstMoves + , oDynuFile + , oExTags + , oExInst + , oSaveCluster + ] + +-- | The list of arguments supported by the program. +arguments :: [ArgCompletion] +arguments = [] {- | Start computing the solution at the given depth and recurse until we find a valid solution or we exceed the maximum depth. -} -iterateDepth :: Cluster.Table -- ^ The starting table +iterateDepth :: Bool -- ^ Whether to print moves + -> Cluster.Table -- ^ The starting table -> Int -- ^ Remaining length -> Bool -- ^ Allow disk moves -> Bool -- ^ Allow instance moves @@ -103,7 +116,7 @@ iterateDepth :: Cluster.Table -- ^ The starting table -> Bool -- ^ Enable evacuation mode -> IO (Cluster.Table, [MoveJob]) -- ^ The resulting table -- and commands -iterateDepth ini_tbl max_rounds disk_moves inst_moves nmlen imlen +iterateDepth printmove ini_tbl max_rounds disk_moves inst_moves nmlen imlen cmd_strs min_score mg_limit min_gain evac_mode = let Cluster.Table ini_nl ini_il _ _ = ini_tbl allowed_next = Cluster.doNextBalance ini_tbl max_rounds min_score @@ -121,9 +134,10 @@ iterateDepth ini_tbl max_rounds disk_moves inst_moves nmlen imlen nmlen imlen cur_plc fin_plc_len afn = Cluster.involvedNodes ini_il cur_plc upd_cmd_strs = (afn, idx, move, cmds):cmd_strs - putStrLn sol_line - hFlush stdout - iterateDepth fin_tbl max_rounds disk_moves inst_moves + when printmove $ do + putStrLn sol_line + hFlush stdout + iterateDepth printmove fin_tbl max_rounds disk_moves inst_moves nmlen imlen upd_cmd_strs min_score mg_limit min_gain evac_mode Nothing -> return (ini_tbl, cmd_strs) @@ -143,22 +157,22 @@ saveBalanceCommands :: Options -> String -> IO () saveBalanceCommands opts cmd_data = do let out_path = fromJust $ optShowCmds opts putStrLn "" - (if out_path == "-" then - printf "Commands to run to reach the above solution:\n%s" - (unlines . map (" " ++) . - filter (/= " check") . - lines $ cmd_data) - else do - writeFile out_path (shTemplate ++ cmd_data) - printf "The commands have been written to file '%s'\n" out_path) + if out_path == "-" + then printf "Commands to run to reach the above solution:\n%s" + (unlines . map (" " ++) . + filter (/= " check") . + lines $ cmd_data) + else do + writeFile out_path (shTemplate ++ cmd_data) + printf "The commands have been written to file '%s'\n" out_path -- | Polls a set of jobs at a fixed interval until all are finished -- one way or another. -waitForJobs :: L.Client -> [String] -> IO (Result [JobStatus]) +waitForJobs :: L.Client -> [L.JobId] -> IO (Result [JobStatus]) waitForJobs client jids = do sts <- L.queryJobsStatus client jids case sts of - Bad x -> return $ Bad x + Bad e -> return . Bad $ "Checking job status: " ++ formatError e Ok s -> if any (<= JOB_STATUS_RUNNING) s then do -- TODO: replace hardcoded value with a better thing @@ -176,12 +190,12 @@ execWrapper :: String -> Node.List execWrapper _ _ _ _ [] = return True execWrapper master nl il cref alljss = do cancel <- readIORef cref - (if cancel > 0 - then do - hPrintf stderr "Exiting early due to user request, %d\ - \ jobset(s) remaining." (length alljss)::IO () - return False - else execJobSet master nl il cref alljss) + if cancel > 0 + then do + hPrintf stderr "Exiting early due to user request, %d\ + \ jobset(s) remaining." (length alljss)::IO () + return True + else execJobSet master nl il cref alljss -- | Execute an entire jobset. execJobSet :: String -> Node.List @@ -197,22 +211,22 @@ execJobSet master nl il cref (js:jss) = do (\client -> do jids <- L.submitManyJobs client jobs case jids of - Bad x -> return $ Bad x + Bad e -> return . Bad $ "Job submission error: " ++ formatError e Ok x -> do - putStrLn $ "Got job IDs " ++ commaJoin x + putStrLn $ "Got job IDs " ++ commaJoin (map show x) waitForJobs client x ) - (case jrs of - Bad x -> do - hPutStrLn stderr $ "Cannot compute job status, aborting: " ++ show x - return False - Ok x -> if checkJobsStatus x - then execWrapper master nl il cref jss - else do - hPutStrLn stderr $ "Not all jobs completed successfully: " ++ - show x - hPutStrLn stderr "Aborting." - return False) + case jrs of + Bad x -> do + hPutStrLn stderr x + return False + Ok x -> if checkJobsStatus x + then execWrapper master nl il cref jss + else do + hPutStrLn stderr $ "Not all jobs completed successfully: " ++ + show x + hPutStrLn stderr "Aborting." + return False -- | Executes the jobs, if possible and desired. maybeExecJobs :: Options @@ -262,8 +276,7 @@ selectGroup opts gl nlf ilf = do hPutStrLn stderr "Found multiple node groups:" mapM_ (hPutStrLn stderr . (" " ++) . Group.name . flip Container.find gl . fst) ngroups - hPutStrLn stderr "Aborting." - exitWith $ ExitFailure 1 + exitErr "Aborting." case optGroup opts of Nothing -> do @@ -275,15 +288,13 @@ selectGroup opts gl nlf ilf = do hPutStrLn stderr $ "Node group " ++ g ++ " not found. Node group list is:" mapM_ (hPutStrLn stderr . (" " ++) . Group.name ) (Container.elems gl) - hPutStrLn stderr "Aborting." - exitWith $ ExitFailure 1 + exitErr "Aborting." Just grp -> case lookup (Group.idx grp) ngroups of - Nothing -> do - -- TODO: while this is unlikely to happen, log here the - -- actual group data to help debugging - hPutStrLn stderr "Internal failure, missing group idx" - exitWith $ ExitFailure 1 + Nothing -> + -- This will only happen if there are no nodes assigned + -- to this group + return (Group.name grp, (Container.empty, Container.empty)) Just cdata -> return (Group.name grp, cdata) -- | Do a few checks on the cluster data. @@ -292,15 +303,14 @@ checkCluster verbose nl il = do -- nothing to do on an empty cluster when (Container.null il) $ do printf "Cluster is empty, exiting.\n"::IO () - exitWith ExitSuccess + exitSuccess -- hbal doesn't currently handle split clusters let split_insts = Cluster.findSplitInstances nl il - unless (null split_insts) $ do + unless (null split_insts || verbose <= 1) $ do hPutStrLn stderr "Found instances belonging to multiple node groups:" mapM_ (\i -> hPutStrLn stderr $ " " ++ Instance.name i) split_insts - hPutStrLn stderr "Aborting." - exitWith $ ExitFailure 1 + hPutStrLn stderr "These instances will not be moved." printf "Loaded %d nodes, %d instances\n" (Container.size nl) @@ -324,7 +334,7 @@ checkGroup verbose gname nl il = do "Initial check done: %d bad nodes, %d bad instances.\n" (length bad_nodes) (length bad_instances) - when (length bad_nodes > 0) $ + unless (null bad_nodes) $ putStrLn "Cluster is not N+1 happy, continuing but no guarantee \ \that the cluster will end N+1 happy." @@ -336,26 +346,22 @@ checkNeedRebalance opts ini_cv = do printf "Cluster is already well balanced (initial score %.6g,\n\ \minimum score %.6g).\nNothing to do, exiting\n" ini_cv min_cv:: IO () - exitWith ExitSuccess + exitSuccess -- | Main function. -main :: IO () -main = do - cmd_args <- System.getArgs - (opts, args) <- parseOpts cmd_args "hbal" options - - unless (null args) $ do - hPutStrLn stderr "Error: this program doesn't take any arguments." - exitWith $ ExitFailure 1 +main :: Options -> [String] -> IO () +main opts args = do + unless (null args) $ exitErr "This program doesn't take any arguments." let verbose = optVerbose opts shownodes = optShowNodes opts showinsts = optShowInsts opts - ini_cdata@(ClusterData gl fixed_nl ilf ctags) <- loadExternalData opts + ini_cdata@(ClusterData gl fixed_nl ilf ctags ipol) <- loadExternalData opts - when (verbose > 1) $ + when (verbose > 1) $ do putStrLn $ "Loaded cluster tags: " ++ intercalate "," ctags + putStrLn $ "Loaded cluster ipolicy: " ++ show ipol nlf <- setNodeStatus opts fixed_nl checkCluster verbose nlf ilf @@ -376,16 +382,16 @@ main = do checkNeedRebalance opts ini_cv - (if verbose > 2 - then printf "Initial coefficients: overall %.8f, %s\n" - ini_cv (Cluster.printStats nl)::IO () - else printf "Initial score: %.8f\n" ini_cv) + if verbose > 2 + then printf "Initial coefficients: overall %.8f\n%s" + ini_cv (Cluster.printStats " " nl)::IO () + else printf "Initial score: %.8f\n" ini_cv putStrLn "Trying to minimize the CV..." let imlen = maximum . map (length . Instance.alias) $ Container.elems il nmlen = maximum . map (length . Node.alias) $ Container.elems nl - (fin_tbl, cmd_strs) <- iterateDepth ini_tbl (optMaxLength opts) + (fin_tbl, cmd_strs) <- iterateDepth True ini_tbl (optMaxLength opts) (optDiskMoves opts) (optInstMoves opts) nmlen imlen [] min_cv @@ -396,8 +402,8 @@ main = do sol_msg = case () of _ | null fin_plc -> printf "No solution found\n" | verbose > 2 -> - printf "Final coefficients: overall %.8f, %s\n" - fin_cv (Cluster.printStats fin_nl) + printf "Final coefficients: overall %.8f\n%s" + fin_cv (Cluster.printStats " " fin_nl) | otherwise -> printf "Cluster score improved from %.8f to %.8f\n" ini_cv fin_cv ::String @@ -409,11 +415,11 @@ main = do let cmd_jobs = Cluster.splitJobs cmd_strs - when (isJust $ optShowCmds opts) $ + when (isJust $ optShowCmds opts) . saveBalanceCommands opts $ Cluster.formatCmds cmd_jobs maybeSaveData (optSaveCluster opts) "balanced" "after balancing" - (ClusterData gl fin_nl fin_il ctags) + ini_cdata { cdNodes = fin_nl, cdInstances = fin_il } maybePrintInsts showinsts "Final" (Cluster.printInsts fin_nl fin_il)