X-Git-Url: https://code.grnet.gr/git/ganeti-local/blobdiff_plain/ba6c6006eda12e2b975c38ca37a991cccf8c25db..f4c7d37aa3d8a81bb403381abdfa9d213271fb9e:/hbal.hs diff --git a/hbal.hs b/hbal.hs index efe54db..a83b7fc 100644 --- a/hbal.hs +++ b/hbal.hs @@ -1,107 +1,89 @@ -{-| Solver for N+1 cluster errors +{-| Cluster rebalancer + +-} + +{- + +Copyright (C) 2009, 2010 Google Inc. + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA +02110-1301, USA. -} module Main (main) where +import Control.Concurrent (threadDelay) +import Control.Exception (bracket) import Data.List -import Data.Function -import Data.Maybe (isJust, fromJust, fromMaybe) +import Data.Maybe (isJust, isNothing, fromJust) +import Data.IORef import Monad -import System +import System (exitWith, ExitCode(..)) import System.IO -import System.Console.GetOpt +import System.Posix.Process +import System.Posix.Signals import qualified System -import Text.Printf (printf) +import Text.Printf (printf, hPrintf) import qualified Ganeti.HTools.Container as Container import qualified Ganeti.HTools.Cluster as Cluster +import qualified Ganeti.HTools.Group as Group import qualified Ganeti.HTools.Node as Node -import qualified Ganeti.HTools.CLI as CLI -import Ganeti.HTools.Rapi +import qualified Ganeti.HTools.Instance as Instance + +import Ganeti.HTools.CLI +import Ganeti.HTools.ExtLoader import Ganeti.HTools.Utils +import Ganeti.HTools.Types + +import Ganeti.HTools.Text (serializeCluster) --- | Command line options structure. -data Options = Options - { optShowNodes :: Bool -- ^ Whether to show node status - , optShowCmds :: Maybe FilePath -- ^ Whether to show the command list - , optOneline :: Bool -- ^ Switch output to a single line - , optNodef :: FilePath -- ^ Path to the nodes file - , optNodeSet :: Bool -- ^ The nodes have been set by options - , optInstf :: FilePath -- ^ Path to the instances file - , optInstSet :: Bool -- ^ The insts have been set by options - , optMaxLength :: Int -- ^ Stop after this many steps - , optMaster :: String -- ^ Collect data from RAPI - , optVerbose :: Int -- ^ Verbosity level - , optOffline :: [String] -- ^ Names of offline nodes - , optMinScore :: Cluster.Score -- ^ The minimum score we aim for - , optShowVer :: Bool -- ^ Just show the program version - , optShowHelp :: Bool -- ^ Just show the help - } deriving Show - --- | Default values for the command line options. -defaultOptions :: Options -defaultOptions = Options - { optShowNodes = False - , optShowCmds = Nothing - , optOneline = False - , optNodef = "nodes" - , optNodeSet = False - , optInstf = "instances" - , optInstSet = False - , optMaxLength = -1 - , optMaster = "" - , optVerbose = 0 - , optOffline = [] - , optMinScore = 1e-9 - , optShowVer = False - , optShowHelp = False - } +import qualified Ganeti.Luxi as L +import Ganeti.Jobs -- | Options list and functions -options :: [OptDescr (Options -> Options)] +options :: [OptType] options = - [ Option ['p'] ["print-nodes"] - (NoArg (\ opts -> opts { optShowNodes = True })) - "print the final node list" - , Option ['C'] ["print-commands"] - (OptArg ((\ f opts -> opts { optShowCmds = Just f }) . fromMaybe "-") - "FILE") - "print the ganeti command list for reaching the solution,\ - \if an argument is passed then write the commands to a file named\ - \ as such" - , Option ['o'] ["oneline"] - (NoArg (\ opts -> opts { optOneline = True })) - "print the ganeti command list for reaching the solution" - , Option ['n'] ["nodes"] - (ReqArg (\ f opts -> opts { optNodef = f, optNodeSet = True }) "FILE") - "the node list FILE" - , Option ['i'] ["instances"] - (ReqArg (\ f opts -> opts { optInstf = f, optInstSet = True }) "FILE") - "the instance list FILE" - , Option ['m'] ["master"] - (ReqArg (\ m opts -> opts { optMaster = m }) "ADDRESS") - "collect data via RAPI at the given ADDRESS" - , Option ['l'] ["max-length"] - (ReqArg (\ i opts -> opts { optMaxLength = (read i)::Int }) "N") - "cap the solution at this many moves (useful for very unbalanced \ - \clusters)" - , Option ['v'] ["verbose"] - (NoArg (\ opts -> opts { optVerbose = (optVerbose opts) + 1 })) - "increase the verbosity level" - , Option ['O'] ["offline"] - (ReqArg (\ n opts -> opts { optOffline = n:optOffline opts }) "NODE") - " set node as offline" - , Option ['e'] ["min-score"] - (ReqArg (\ e opts -> opts { optMinScore = read e }) "EPSILON") - " mininum score to aim for" - , Option ['V'] ["version"] - (NoArg (\ opts -> opts { optShowVer = True})) - "show the version of the program" - , Option ['h'] ["help"] - (NoArg (\ opts -> opts { optShowHelp = True})) - "show help" + [ oPrintNodes + , oPrintInsts + , oPrintCommands + , oOneline + , oDataFile + , oEvacMode + , oRapiMaster + , oLuxiSocket + , oExecJobs + , oGroup + , oMaxSolLength + , oVerbose + , oQuiet + , oOfflineNode + , oMinScore + , oMaxCpu + , oMinDisk + , oMinGain + , oMinGainLim + , oDiskMoves + , oDynuFile + , oExTags + , oExInst + , oSaveCluster + , oShowVer + , oShowHelp ] {- | Start computing the solution at the given depth and recurse until @@ -110,129 +92,242 @@ we find a valid solution or we exceed the maximum depth. -} iterateDepth :: Cluster.Table -- ^ The starting table -> Int -- ^ Remaining length - -> Cluster.NameList -- ^ Node idx to name list - -> Cluster.NameList -- ^ Inst idx to name list + -> Bool -- ^ Allow disk moves -> Int -- ^ Max node name len -> Int -- ^ Max instance name len - -> [[String]] -- ^ Current command list - -> Bool -- ^ Wheter to be silent - -> Cluster.Score -- ^ Score at which to stop - -> IO (Cluster.Table, [[String]]) -- ^ The resulting table and - -- commands -iterateDepth ini_tbl max_rounds ktn kti nmlen imlen - cmd_strs oneline min_score = - let Cluster.Table ini_nl ini_il ini_cv ini_plc = ini_tbl - all_inst = Container.elems ini_il - node_idx = map Node.idx . filter (not . Node.offline) $ - Container.elems ini_nl - fin_tbl = Cluster.checkMove node_idx ini_tbl all_inst - (Cluster.Table _ _ fin_cv fin_plc) = fin_tbl - ini_plc_len = length ini_plc - fin_plc_len = length fin_plc - allowed_next = (max_rounds < 0 || length fin_plc < max_rounds) + -> [MoveJob] -- ^ Current command list + -> Bool -- ^ Whether to be silent + -> Score -- ^ Score at which to stop + -> Score -- ^ Min gain limit + -> Score -- ^ Min score gain + -> Bool -- ^ Enable evacuation mode + -> IO (Cluster.Table, [MoveJob]) -- ^ The resulting table + -- and commands +iterateDepth ini_tbl max_rounds disk_moves nmlen imlen + cmd_strs oneline min_score mg_limit min_gain evac_mode = + let Cluster.Table ini_nl ini_il _ _ = ini_tbl + allowed_next = Cluster.doNextBalance ini_tbl max_rounds min_score + m_fin_tbl = if allowed_next + then Cluster.tryBalance ini_tbl disk_moves evac_mode + mg_limit min_gain + else Nothing in - do - let - (sol_line, cmds) = Cluster.printSolutionLine ini_il ktn kti - nmlen imlen (head fin_plc) fin_plc_len - upd_cmd_strs = cmds:cmd_strs - unless (oneline || fin_plc_len == ini_plc_len) $ do - putStrLn sol_line - hFlush stdout - (if fin_cv < ini_cv then -- this round made success, try deeper - if allowed_next && fin_cv > min_score - then iterateDepth fin_tbl max_rounds ktn kti - nmlen imlen upd_cmd_strs oneline min_score - -- don't go deeper, but return the better solution - else return (fin_tbl, upd_cmd_strs) - else - return (ini_tbl, cmd_strs)) + case m_fin_tbl of + Just fin_tbl -> + do + let + (Cluster.Table _ _ _ fin_plc) = fin_tbl + fin_plc_len = length fin_plc + cur_plc@(idx, _, _, move, _) = head fin_plc + (sol_line, cmds) = Cluster.printSolutionLine ini_nl ini_il + nmlen imlen cur_plc fin_plc_len + afn = Cluster.involvedNodes ini_il cur_plc + upd_cmd_strs = (afn, idx, move, cmds):cmd_strs + unless oneline $ do + putStrLn sol_line + hFlush stdout + iterateDepth fin_tbl max_rounds disk_moves + nmlen imlen upd_cmd_strs oneline min_score + mg_limit min_gain evac_mode + Nothing -> return (ini_tbl, cmd_strs) -- | Formats the solution for the oneline display formatOneline :: Double -> Int -> Double -> String formatOneline ini_cv plc_len fin_cv = printf "%.8f %d %.8f %8.3f" ini_cv plc_len fin_cv - (if fin_cv == 0 then 1 else (ini_cv / fin_cv)) + (if fin_cv == 0 then 1 else ini_cv / fin_cv) + +-- | Polls a set of jobs at a fixed interval until all are finished +-- one way or another +waitForJobs :: L.Client -> [String] -> IO (Result [JobStatus]) +waitForJobs client jids = do + sts <- L.queryJobsStatus client jids + case sts of + Bad x -> return $ Bad x + Ok s -> if any (<= JOB_STATUS_RUNNING) s + then do + -- TODO: replace hardcoded value with a better thing + threadDelay (1000000 * 15) + waitForJobs client jids + else return $ Ok s + +-- | Check that a set of job statuses is all success +checkJobsStatus :: [JobStatus] -> Bool +checkJobsStatus = all (== JOB_STATUS_SUCCESS) + +-- | Execute an entire jobset +execJobSet :: String -> Node.List + -> Instance.List -> IORef Int -> [JobSet] -> IO () +execJobSet _ _ _ _ [] = return () +execJobSet master nl il cref alljss@(js:jss) = do + -- map from jobset (htools list of positions) to [[opcodes]] + cancel <- readIORef cref + when (cancel > 0) $ do + putStrLn ("Exiting early due to user request, " ++ show (length alljss) ++ + " jobset(s) remaining.") + exitWith $ ExitFailure 1 + + let jobs = map (\(_, idx, move, _) -> + Cluster.iMoveToJob nl il idx move) js + let descr = map (\(_, idx, _, _) -> Container.nameOf il idx) js + putStrLn $ "Executing jobset for instances " ++ commaJoin descr + jrs <- bracket (L.getClient master) L.closeClient + (\client -> do + jids <- L.submitManyJobs client jobs + case jids of + Bad x -> return $ Bad x + Ok x -> do + putStrLn $ "Got job IDs " ++ commaJoin x + waitForJobs client x + ) + (case jrs of + Bad x -> do + hPutStrLn stderr $ "Cannot compute job status, aborting: " ++ show x + return () + Ok x -> if checkJobsStatus x + then execJobSet master nl il cref jss + else do + hPutStrLn stderr $ "Not all jobs completed successfully: " ++ + show x + hPutStrLn stderr "Aborting.") + +-- | Signal handler for graceful termination +hangleSigInt :: IORef Int -> IO () +hangleSigInt cref = do + writeIORef cref 1 + putStrLn ("Cancel request registered, will exit at" ++ + " the end of the current job set...") + +-- | Signal handler for immediate termination +hangleSigTerm :: IORef Int -> IO () +hangleSigTerm cref = do + -- update the cref to 2, just for consistency + writeIORef cref 2 + putStrLn "Double cancel request, exiting now..." + exitImmediately $ ExitFailure 2 + +runJobSet :: String -> Node.List -> Instance.List -> [JobSet] -> IO () +runJobSet master fin_nl il cmd_jobs = do + cref <- newIORef 0 + mapM_ (\(hnd, sig) -> installHandler sig (Catch (hnd cref)) Nothing) + [(hangleSigTerm, softwareTermination), (hangleSigInt, keyboardSignal)] + execJobSet master fin_nl il cref cmd_jobs -- | Main function. main :: IO () main = do cmd_args <- System.getArgs - (opts, args) <- CLI.parseOpts cmd_args "hbal" options - defaultOptions optShowHelp + (opts, args) <- parseOpts cmd_args "hbal" options unless (null args) $ do hPutStrLn stderr "Error: this program doesn't take any arguments." exitWith $ ExitFailure 1 - when (optShowVer opts) $ do - putStr $ CLI.showVersion "hbal" - exitWith ExitSuccess - - (env_node, env_inst) <- CLI.parseEnv () - let nodef = if optNodeSet opts then optNodef opts - else env_node - instf = if optInstSet opts then optInstf opts - else env_inst - oneline = optOneline opts + let oneline = optOneline opts verbose = optVerbose opts - (node_data, inst_data) = - case optMaster opts of - "" -> (readFile nodef, - readFile instf) - host -> (readData getNodes host, - readData getInstances host) + shownodes = optShowNodes opts - (loaded_nl, il, csf, ktn, kti) <- liftM2 Cluster.loadData node_data inst_data - let (fix_msgs, fixed_nl) = Cluster.checkData loaded_nl il ktn kti - - unless (null fix_msgs) $ do - putStrLn "Warning: cluster has inconsistent data:" - putStrLn . unlines . map (\s -> printf " - %s" s) $ fix_msgs + (gl, fixed_nl, ilf, ctags) <- loadExternalData opts let offline_names = optOffline opts - all_names = snd . unzip $ ktn - offline_wrong = filter (\n -> not $ elem n all_names) offline_names - offline_indices = fst . unzip . - filter (\(_, n) -> elem n offline_names) $ ktn + all_nodes = Container.elems fixed_nl + all_names = concatMap allNames all_nodes + offline_wrong = filter (`notElem` all_names) offline_names + offline_indices = map Node.idx $ + filter (\n -> + Node.name n `elem` offline_names || + Node.alias n `elem` offline_names) + all_nodes + m_cpu = optMcpu opts + m_dsk = optMdsk opts + csf = commonSuffix fixed_nl ilf when (length offline_wrong > 0) $ do - printf "Wrong node name(s) set as offline: %s\n" - (commaJoin offline_wrong) + hPrintf stderr "Wrong node name(s) set as offline: %s\n" + (commaJoin offline_wrong) :: IO () exitWith $ ExitFailure 1 - let nl = Container.map (\n -> if elem (Node.idx n) offline_indices + let nm = Container.map (\n -> if Node.idx n `elem` offline_indices then Node.setOffline n True else n) fixed_nl + nlf = Container.map (flip Node.setMdsk m_dsk . flip Node.setMcpu m_cpu) + nm - when (Container.size il == 0) $ do - (if oneline then - putStrLn $ formatOneline 0 0 0 - else - printf "Cluster is empty, exiting.\n") + when (not oneline && verbose > 1) $ + putStrLn $ "Loaded cluster tags: " ++ intercalate "," ctags + + when (Container.size ilf == 0) $ do + (if oneline then putStrLn $ formatOneline 0 0 0 + else printf "Cluster is empty, exiting.\n") exitWith ExitSuccess + let split_insts = Cluster.findSplitInstances nlf ilf + when (not . null $ split_insts) $ do + hPutStrLn stderr "Found instances belonging to multiple node groups:" + mapM_ (\i -> hPutStrLn stderr $ " " ++ Instance.name i) split_insts + hPutStrLn stderr "Aborting." + exitWith $ ExitFailure 1 + + let ngroups = Cluster.splitCluster nlf ilf + when (length ngroups > 1 && isNothing (optGroup opts)) $ do + hPutStrLn stderr "Found multiple node groups:" + mapM_ (hPutStrLn stderr . (" " ++) . Group.name . + (flip Container.find gl) . fst) ngroups + hPutStrLn stderr "Aborting." + exitWith $ ExitFailure 1 unless oneline $ printf "Loaded %d nodes, %d instances\n" + (Container.size nlf) + (Container.size ilf) + + (gname, (nl, il)) <- case optGroup opts of + Nothing -> do + let (gidx, cdata) = head ngroups + grp = Container.find gidx gl + return (Group.name grp, cdata) + Just g -> case Container.findByName gl g of + Nothing -> do + hPutStrLn stderr $ "Node group " ++ g ++ + " not found. Node group list is:" + mapM_ (hPutStrLn stderr . (" " ++) . Group.name ) (Container.elems gl) + hPutStrLn stderr "Aborting." + exitWith $ ExitFailure 1 + Just grp -> + case lookup (Group.idx grp) ngroups of + Nothing -> do + -- TODO: while this is unlikely to happen, log here the + -- actual group data to help debugging + hPutStrLn stderr $ "Internal failure, missing group idx" + exitWith $ ExitFailure 1 + Just cdata -> return (Group.name grp, cdata) + + unless oneline $ printf "Group size %d nodes, %d instances\n" (Container.size nl) (Container.size il) - when (length csf > 0 && not oneline && verbose > 0) $ do - printf "Note: Stripping common suffix of '%s' from names\n" csf + putStrLn $ "Selected node group: " ++ gname + + when (length csf > 0 && not oneline && verbose > 1) $ + printf "Note: Stripping common suffix of '%s' from names\n" csf let (bad_nodes, bad_instances) = Cluster.computeBadItems nl il unless (oneline || verbose == 0) $ printf "Initial check done: %d bad nodes, %d bad instances.\n" (length bad_nodes) (length bad_instances) - when (length bad_nodes > 0) $ do + when (length bad_nodes > 0) $ putStrLn "Cluster is not N+1 happy, continuing but no guarantee \ \that the cluster will end N+1 happy." - when (optShowNodes opts) $ + when (optShowInsts opts) $ do + putStrLn "" + putStrLn "Initial instance map:" + putStrLn $ Cluster.printInsts nl il + + when (isJust shownodes) $ do putStrLn "Initial cluster status:" - putStrLn $ Cluster.printNodes ktn nl + putStrLn $ Cluster.printNodes nl (fromJust shownodes) let ini_cv = Cluster.compCV nl ini_tbl = Cluster.Table nl il ini_cv [] @@ -246,36 +341,39 @@ main = do ini_cv min_cv) exitWith ExitSuccess - unless oneline (if verbose > 1 then + unless oneline (if verbose > 2 then printf "Initial coefficients: overall %.8f, %s\n" ini_cv (Cluster.printStats nl) else printf "Initial score: %.8f\n" ini_cv) unless oneline $ putStrLn "Trying to minimize the CV..." - let mlen_fn = maximum . (map length) . snd . unzip - imlen = mlen_fn kti - nmlen = mlen_fn ktn + let imlen = maximum . map (length . Instance.alias) $ Container.elems il + nmlen = maximum . map (length . Node.alias) $ Container.elems nl (fin_tbl, cmd_strs) <- iterateDepth ini_tbl (optMaxLength opts) - ktn kti nmlen imlen [] oneline min_cv - let (Cluster.Table fin_nl _ fin_cv fin_plc) = fin_tbl + (optDiskMoves opts) + nmlen imlen [] oneline min_cv + (optMinGainLim opts) (optMinGain opts) + (optEvacMode opts) + let (Cluster.Table fin_nl fin_il fin_cv fin_plc) = fin_tbl ord_plc = reverse fin_plc sol_msg = if null fin_plc then printf "No solution found\n" - else (if verbose > 1 - then printf "Final coefficients: overall %.8f, %s\n" - fin_cv (Cluster.printStats fin_nl) - else printf "Cluster score improved from %.8f to %.8f\n" - ini_cv fin_cv - ) + else if verbose > 2 + then printf "Final coefficients: overall %.8f, %s\n" + fin_cv (Cluster.printStats fin_nl) + else printf "Cluster score improved from %.8f to %.8f\n" + ini_cv fin_cv + ::String unless oneline $ putStr sol_msg unless (oneline || verbose == 0) $ printf "Solution length=%d\n" (length ord_plc) - let cmd_data = Cluster.formatCmds . reverse $ cmd_strs + let cmd_jobs = Cluster.splitJobs cmd_strs + cmd_data = Cluster.formatCmds cmd_jobs when (isJust $ optShowCmds opts) $ do @@ -284,22 +382,43 @@ main = do (if out_path == "-" then printf "Commands to run to reach the above solution:\n%s" (unlines . map (" " ++) . - filter (/= "check") . + filter (/= " check") . lines $ cmd_data) else do - writeFile out_path (CLI.shTemplate ++ cmd_data) + writeFile out_path (shTemplate ++ cmd_data) printf "The commands have been written to file '%s'\n" out_path) - when (optShowNodes opts) $ + when (isJust $ optSaveCluster opts) $ do - let (orig_mem, orig_disk) = Cluster.totalResources nl - (final_mem, final_disk) = Cluster.totalResources fin_nl + let out_path = fromJust $ optSaveCluster opts + adata = serializeCluster gl fin_nl fin_il ctags + writeFile out_path adata + printf "The cluster state has been written to file '%s'\n" out_path + + when (optShowInsts opts) $ do + putStrLn "" + putStrLn "Final instance map:" + putStr $ Cluster.printInsts fin_nl fin_il + + when (isJust shownodes) $ + do + let ini_cs = Cluster.totalResources nl + fin_cs = Cluster.totalResources fin_nl putStrLn "" putStrLn "Final cluster status:" - putStrLn $ Cluster.printNodes ktn fin_nl - when (verbose > 2) $ + putStrLn $ Cluster.printNodes fin_nl (fromJust shownodes) + when (verbose > 3) $ do - printf "Original: mem=%d disk=%d\n" orig_mem orig_disk - printf "Final: mem=%d disk=%d\n" final_mem final_disk + printf "Original: mem=%d disk=%d\n" + (Cluster.csFmem ini_cs) (Cluster.csFdsk ini_cs) :: IO () + printf "Final: mem=%d disk=%d\n" + (Cluster.csFmem fin_cs) (Cluster.csFdsk fin_cs) when oneline $ putStrLn $ formatOneline ini_cv (length ord_plc) fin_cv + + when (optExecJobs opts && not (null ord_plc)) + (case optLuxi opts of + Nothing -> do + hPutStrLn stderr "Execution of commands possible only on LUXI" + exitWith $ ExitFailure 1 + Just master -> runJobSet master fin_nl il cmd_jobs)