module Main (main) where
+import Control.Concurrent (threadDelay)
+import Control.Exception (bracket)
import Data.List
-import Data.Function
-import Data.Maybe (isJust, fromJust, fromMaybe)
+import Data.Maybe (isJust, fromJust)
import Monad
-import System
+import System (exitWith, ExitCode(..))
import System.IO
-import System.Console.GetOpt
import qualified System
-import Text.Printf (printf)
+import Text.Printf (printf, hPrintf)
import qualified Ganeti.HTools.Container as Container
import qualified Ganeti.HTools.Cluster as Cluster
import qualified Ganeti.HTools.Node as Node
-import qualified Ganeti.HTools.CLI as CLI
+import qualified Ganeti.HTools.Instance as Instance
+import Ganeti.HTools.CLI
+import Ganeti.HTools.ExtLoader
import Ganeti.HTools.Utils
+import Ganeti.HTools.Types
--- | Command line options structure.
-data Options = Options
- { optShowNodes :: Bool -- ^ Whether to show node status
- , optShowCmds :: Maybe FilePath -- ^ Whether to show the command list
- , optOneline :: Bool -- ^ Switch output to a single line
- , optNodef :: FilePath -- ^ Path to the nodes file
- , optNodeSet :: Bool -- ^ The nodes have been set by options
- , optInstf :: FilePath -- ^ Path to the instances file
- , optInstSet :: Bool -- ^ The insts have been set by options
- , optMaxLength :: Int -- ^ Stop after this many steps
- , optMaster :: String -- ^ Collect data from RAPI
- , optVerbose :: Int -- ^ Verbosity level
- , optOffline :: [String] -- ^ Names of offline nodes
- , optMinScore :: Cluster.Score -- ^ The minimum score we aim for
- , optMcpu :: Double -- ^ Max cpu ratio for nodes
- , optMdsk :: Double -- ^ Max disk usage ratio for nodes
- , optShowVer :: Bool -- ^ Just show the program version
- , optShowHelp :: Bool -- ^ Just show the help
- } deriving Show
-
-instance CLI.CLIOptions Options where
- showVersion = optShowVer
- showHelp = optShowHelp
-
-instance CLI.EToolOptions Options where
- nodeFile = optNodef
- nodeSet = optNodeSet
- instFile = optInstf
- instSet = optInstSet
- masterName = optMaster
- silent a = (optVerbose a) == 0
-
--- | Default values for the command line options.
-defaultOptions :: Options
-defaultOptions = Options
- { optShowNodes = False
- , optShowCmds = Nothing
- , optOneline = False
- , optNodef = "nodes"
- , optNodeSet = False
- , optInstf = "instances"
- , optInstSet = False
- , optMaxLength = -1
- , optMaster = ""
- , optVerbose = 1
- , optOffline = []
- , optMinScore = 1e-9
- , optMcpu = -1
- , optMdsk = -1
- , optShowVer = False
- , optShowHelp = False
- }
+import qualified Ganeti.Luxi as L
+import Ganeti.Jobs
-- | Options list and functions
-options :: [OptDescr (Options -> Options)]
+options :: [OptType]
options =
- [ Option ['p'] ["print-nodes"]
- (NoArg (\ opts -> opts { optShowNodes = True }))
- "print the final node list"
- , Option ['C'] ["print-commands"]
- (OptArg ((\ f opts -> opts { optShowCmds = Just f }) . fromMaybe "-")
- "FILE")
- "print the ganeti command list for reaching the solution,\
- \if an argument is passed then write the commands to a file named\
- \ as such"
- , Option ['o'] ["oneline"]
- (NoArg (\ opts -> opts { optOneline = True }))
- "print the ganeti command list for reaching the solution"
- , Option ['n'] ["nodes"]
- (ReqArg (\ f opts -> opts { optNodef = f, optNodeSet = True }) "FILE")
- "the node list FILE"
- , Option ['i'] ["instances"]
- (ReqArg (\ f opts -> opts { optInstf = f, optInstSet = True }) "FILE")
- "the instance list FILE"
- , Option ['m'] ["master"]
- (ReqArg (\ m opts -> opts { optMaster = m }) "ADDRESS")
- "collect data via RAPI at the given ADDRESS"
- , Option ['l'] ["max-length"]
- (ReqArg (\ i opts -> opts { optMaxLength = (read i)::Int }) "N")
- "cap the solution at this many moves (useful for very unbalanced \
- \clusters)"
- , Option ['v'] ["verbose"]
- (NoArg (\ opts -> opts { optVerbose = (optVerbose opts) + 1 }))
- "increase the verbosity level"
- , Option ['q'] ["quiet"]
- (NoArg (\ opts -> opts { optVerbose = (optVerbose opts) - 1 }))
- "decrease the verbosity level"
- , Option ['O'] ["offline"]
- (ReqArg (\ n opts -> opts { optOffline = n:optOffline opts }) "NODE")
- " set node as offline"
- , Option ['e'] ["min-score"]
- (ReqArg (\ e opts -> opts { optMinScore = read e }) "EPSILON")
- " mininum score to aim for"
- , Option [] ["max-cpu"]
- (ReqArg (\ n opts -> opts { optMcpu = read n }) "RATIO")
- "maximum virtual-to-physical cpu ratio for nodes"
- , Option [] ["min-disk"]
- (ReqArg (\ n opts -> opts { optMdsk = read n }) "RATIO")
- "minimum free disk space for nodes (between 0 and 1)"
- , Option ['V'] ["version"]
- (NoArg (\ opts -> opts { optShowVer = True}))
- "show the version of the program"
- , Option ['h'] ["help"]
- (NoArg (\ opts -> opts { optShowHelp = True}))
- "show help"
+ [ oPrintNodes
+ , oPrintInsts
+ , oPrintCommands
+ , oOneline
+ , oDataFile
+ , oEvacMode
+ , oRapiMaster
+ , oLuxiSocket
+ , oExecJobs
+ , oMaxSolLength
+ , oVerbose
+ , oQuiet
+ , oOfflineNode
+ , oMinScore
+ , oMaxCpu
+ , oMinDisk
+ , oDiskMoves
+ , oDynuFile
+ , oExTags
+ , oExInst
+ , oShowVer
+ , oShowHelp
]
{- | Start computing the solution at the given depth and recurse until
-}
iterateDepth :: Cluster.Table -- ^ The starting table
-> Int -- ^ Remaining length
+ -> Bool -- ^ Allow disk moves
-> Int -- ^ Max node name len
-> Int -- ^ Max instance name len
- -> [[String]] -- ^ Current command list
- -> Bool -- ^ Wheter to be silent
- -> Cluster.Score -- ^ Score at which to stop
- -> IO (Cluster.Table, [[String]]) -- ^ The resulting table and
- -- commands
-iterateDepth ini_tbl max_rounds nmlen imlen
- cmd_strs oneline min_score =
- let Cluster.Table ini_nl ini_il ini_cv ini_plc = ini_tbl
- all_inst = Container.elems ini_il
- node_idx = map Node.idx . filter (not . Node.offline) $
- Container.elems ini_nl
- fin_tbl = Cluster.checkMove node_idx ini_tbl all_inst
- (Cluster.Table _ _ fin_cv fin_plc) = fin_tbl
- ini_plc_len = length ini_plc
- fin_plc_len = length fin_plc
- allowed_next = (max_rounds < 0 || length fin_plc < max_rounds)
+ -> [MoveJob] -- ^ Current command list
+ -> Bool -- ^ Whether to be silent
+ -> Score -- ^ Score at which to stop
+ -> Bool -- ^ Enable evacuation mode
+ -> IO (Cluster.Table, [MoveJob]) -- ^ The resulting table
+ -- and commands
+iterateDepth ini_tbl max_rounds disk_moves nmlen imlen
+ cmd_strs oneline min_score evac_mode =
+ let Cluster.Table ini_nl ini_il _ _ = ini_tbl
+ allowed_next = Cluster.doNextBalance ini_tbl max_rounds min_score
+ m_fin_tbl = if allowed_next
+ then Cluster.tryBalance ini_tbl disk_moves evac_mode
+ else Nothing
in
- do
- let
- (sol_line, cmds) = Cluster.printSolutionLine ini_nl ini_il
- nmlen imlen (head fin_plc) fin_plc_len
- upd_cmd_strs = cmds:cmd_strs
- unless (oneline || fin_plc_len == ini_plc_len) $ do
- putStrLn sol_line
- hFlush stdout
- (if fin_cv < ini_cv then -- this round made success, try deeper
- if allowed_next && fin_cv > min_score
- then iterateDepth fin_tbl max_rounds
- nmlen imlen upd_cmd_strs oneline min_score
- -- don't go deeper, but return the better solution
- else return (fin_tbl, upd_cmd_strs)
- else
- return (ini_tbl, cmd_strs))
+ case m_fin_tbl of
+ Just fin_tbl ->
+ do
+ let
+ (Cluster.Table _ _ _ fin_plc) = fin_tbl
+ fin_plc_len = length fin_plc
+ cur_plc@(idx, _, _, move, _) = head fin_plc
+ (sol_line, cmds) = Cluster.printSolutionLine ini_nl ini_il
+ nmlen imlen cur_plc fin_plc_len
+ afn = Cluster.involvedNodes ini_il cur_plc
+ upd_cmd_strs = (afn, idx, move, cmds):cmd_strs
+ unless oneline $ do
+ putStrLn sol_line
+ hFlush stdout
+ iterateDepth fin_tbl max_rounds disk_moves
+ nmlen imlen upd_cmd_strs oneline min_score
+ evac_mode
+ Nothing -> return (ini_tbl, cmd_strs)
-- | Formats the solution for the oneline display
formatOneline :: Double -> Int -> Double -> String
formatOneline ini_cv plc_len fin_cv =
printf "%.8f %d %.8f %8.3f" ini_cv plc_len fin_cv
- (if fin_cv == 0 then 1 else (ini_cv / fin_cv))
+ (if fin_cv == 0 then 1 else ini_cv / fin_cv)
+
+-- | Polls a set of jobs at a fixed interval until all are finished
+-- one way or another
+waitForJobs :: L.Client -> [String] -> IO (Result [JobStatus])
+waitForJobs client jids = do
+ sts <- L.queryJobsStatus client jids
+ case sts of
+ Bad x -> return $ Bad x
+ Ok s -> if any (<= JOB_STATUS_RUNNING) s
+ then do
+ -- TODO: replace hardcoded value with a better thing
+ threadDelay (1000000 * 15)
+ waitForJobs client jids
+ else return $ Ok s
+
+-- | Check that a set of job statuses is all success
+checkJobsStatus :: [JobStatus] -> Bool
+checkJobsStatus = all (== JOB_STATUS_SUCCESS)
+
+-- | Execute an entire jobset
+execJobSet :: String -> Node.List
+ -> Instance.List -> [JobSet] -> IO ()
+execJobSet _ _ _ [] = return ()
+execJobSet master nl il (js:jss) = do
+ -- map from jobset (htools list of positions) to [[opcodes]]
+ let jobs = map (\(_, idx, move, _) ->
+ Cluster.iMoveToJob nl il idx move) js
+ let descr = map (\(_, idx, _, _) -> Container.nameOf il idx) js
+ putStrLn $ "Executing jobset for instances " ++ commaJoin descr
+ jrs <- bracket (L.getClient master) L.closeClient
+ (\client -> do
+ jids <- L.submitManyJobs client jobs
+ case jids of
+ Bad x -> return $ Bad x
+ Ok x -> do
+ putStrLn $ "Got job IDs " ++ commaJoin x
+ waitForJobs client x
+ )
+ (case jrs of
+ Bad x -> do
+ hPutStrLn stderr $ "Cannot compute job status, aborting: " ++ show x
+ return ()
+ Ok x -> if checkJobsStatus x
+ then execJobSet master nl il jss
+ else do
+ hPutStrLn stderr $ "Not all jobs completed successfully: " ++
+ show x
+ hPutStrLn stderr "Aborting.")
-- | Main function.
main :: IO ()
main = do
cmd_args <- System.getArgs
- (opts, args) <- CLI.parseOpts cmd_args "hbal" options defaultOptions
+ (opts, args) <- parseOpts cmd_args "hbal" options
unless (null args) $ do
hPutStrLn stderr "Error: this program doesn't take any arguments."
let oneline = optOneline opts
verbose = optVerbose opts
+ shownodes = optShowNodes opts
- (fixed_nl, il, csf) <- CLI.loadExternalData opts
+ (fixed_nl, il, ctags) <- loadExternalData opts
let offline_names = optOffline opts
all_nodes = Container.elems fixed_nl
- all_names = map Node.name all_nodes
- offline_wrong = filter (\n -> not $ elem n all_names) offline_names
+ all_names = concatMap allNames all_nodes
+ offline_wrong = filter (`notElem` all_names) offline_names
offline_indices = map Node.idx $
- filter (\n -> elem (Node.name n) offline_names)
+ filter (\n ->
+ Node.name n `elem` offline_names ||
+ Node.alias n `elem` offline_names)
all_nodes
m_cpu = optMcpu opts
m_dsk = optMdsk opts
+ csf = commonSuffix fixed_nl il
when (length offline_wrong > 0) $ do
- printf "Wrong node name(s) set as offline: %s\n"
- (commaJoin offline_wrong)
+ hPrintf stderr "Wrong node name(s) set as offline: %s\n"
+ (commaJoin offline_wrong) :: IO ()
exitWith $ ExitFailure 1
- let nm = Container.map (\n -> if elem (Node.idx n) offline_indices
+ let nm = Container.map (\n -> if Node.idx n `elem` offline_indices
then Node.setOffline n True
else n) fixed_nl
nl = Container.map (flip Node.setMdsk m_dsk . flip Node.setMcpu m_cpu)
nm
+ when (not oneline && verbose > 1) $
+ putStrLn $ "Loaded cluster tags: " ++ intercalate "," ctags
+
when (Container.size il == 0) $ do
(if oneline then putStrLn $ formatOneline 0 0 0
else printf "Cluster is empty, exiting.\n")
(Container.size nl)
(Container.size il)
- when (length csf > 0 && not oneline && verbose > 1) $ do
- printf "Note: Stripping common suffix of '%s' from names\n" csf
+ when (length csf > 0 && not oneline && verbose > 1) $
+ printf "Note: Stripping common suffix of '%s' from names\n" csf
let (bad_nodes, bad_instances) = Cluster.computeBadItems nl il
unless (oneline || verbose == 0) $ printf
"Initial check done: %d bad nodes, %d bad instances.\n"
(length bad_nodes) (length bad_instances)
- when (length bad_nodes > 0) $ do
+ when (length bad_nodes > 0) $
putStrLn "Cluster is not N+1 happy, continuing but no guarantee \
\that the cluster will end N+1 happy."
- when (optShowNodes opts) $
+ when (optShowInsts opts) $ do
+ putStrLn ""
+ putStrLn "Initial instance map:"
+ putStrLn $ Cluster.printInsts nl il
+
+ when (isJust shownodes) $
do
putStrLn "Initial cluster status:"
- putStrLn $ Cluster.printNodes nl
+ putStrLn $ Cluster.printNodes nl (fromJust shownodes)
let ini_cv = Cluster.compCV nl
ini_tbl = Cluster.Table nl il ini_cv []
printf "Initial score: %.8f\n" ini_cv)
unless oneline $ putStrLn "Trying to minimize the CV..."
- let imlen = Container.maxNameLen il
- nmlen = Container.maxNameLen nl
+ let imlen = maximum . map (length . Instance.alias) $ Container.elems il
+ nmlen = maximum . map (length . Node.alias) $ Container.elems nl
(fin_tbl, cmd_strs) <- iterateDepth ini_tbl (optMaxLength opts)
- nmlen imlen [] oneline min_cv
- let (Cluster.Table fin_nl _ fin_cv fin_plc) = fin_tbl
+ (optDiskMoves opts)
+ nmlen imlen [] oneline min_cv (optEvacMode opts)
+ let (Cluster.Table fin_nl fin_il fin_cv fin_plc) = fin_tbl
ord_plc = reverse fin_plc
- sol_msg = (if null fin_plc
- then printf "No solution found\n"
- else (if verbose > 2
- then printf "Final coefficients: overall %.8f, %s\n"
- fin_cv (Cluster.printStats fin_nl)
- else printf "Cluster score improved from %.8f to %.8f\n"
- ini_cv fin_cv
- ))::String
+ sol_msg = if null fin_plc
+ then printf "No solution found\n"
+ else if verbose > 2
+ then printf "Final coefficients: overall %.8f, %s\n"
+ fin_cv (Cluster.printStats fin_nl)
+ else printf "Cluster score improved from %.8f to %.8f\n"
+ ini_cv fin_cv
+ ::String
unless oneline $ putStr sol_msg
unless (oneline || verbose == 0) $
printf "Solution length=%d\n" (length ord_plc)
- let cmd_data = Cluster.formatCmds . reverse $ cmd_strs
+ let cmd_jobs = Cluster.splitJobs cmd_strs
+ cmd_data = Cluster.formatCmds cmd_jobs
when (isJust $ optShowCmds opts) $
do
(if out_path == "-" then
printf "Commands to run to reach the above solution:\n%s"
(unlines . map (" " ++) .
- filter (/= "check") .
+ filter (/= " check") .
lines $ cmd_data)
else do
- writeFile out_path (CLI.shTemplate ++ cmd_data)
+ writeFile out_path (shTemplate ++ cmd_data)
printf "The commands have been written to file '%s'\n" out_path)
- when (optShowNodes opts) $
+ when (optExecJobs opts && not (null ord_plc))
+ (case optLuxi opts of
+ Nothing -> do
+ hPutStrLn stderr "Execution of commands possible only on LUXI"
+ exitWith $ ExitFailure 1
+ Just master -> execJobSet master fin_nl il cmd_jobs)
+
+ when (optShowInsts opts) $ do
+ putStrLn ""
+ putStrLn "Final instance map:"
+ putStr $ Cluster.printInsts fin_nl fin_il
+
+ when (isJust shownodes) $
do
- let (orig_mem, orig_disk) = Cluster.totalResources nl
- (final_mem, final_disk) = Cluster.totalResources fin_nl
+ let ini_cs = Cluster.totalResources nl
+ fin_cs = Cluster.totalResources fin_nl
putStrLn ""
putStrLn "Final cluster status:"
- putStrLn $ Cluster.printNodes fin_nl
+ putStrLn $ Cluster.printNodes fin_nl (fromJust shownodes)
when (verbose > 3) $
do
- printf "Original: mem=%d disk=%d\n" orig_mem orig_disk
- printf "Final: mem=%d disk=%d\n" final_mem final_disk
+ printf "Original: mem=%d disk=%d\n"
+ (Cluster.csFmem ini_cs) (Cluster.csFdsk ini_cs) :: IO ()
+ printf "Final: mem=%d disk=%d\n"
+ (Cluster.csFmem fin_cs) (Cluster.csFdsk fin_cs)
when oneline $
putStrLn $ formatOneline ini_cv (length ord_plc) fin_cv