X-Git-Url: https://code.grnet.gr/git/ganeti-local/blobdiff_plain/44763b519ffda6e7ddb35b9d7c0b823fd7f9ccec..1a3cc8ad2d275fa59c4f0fffb372be0d156a7889:/hspace.hs?ds=sidebyside diff --git a/hspace.hs b/hspace.hs index a6d4581..8410f46 100644 --- a/hspace.hs +++ b/hspace.hs @@ -4,7 +4,7 @@ {- -Copyright (C) 2009 Google Inc. +Copyright (C) 2009, 2010 Google Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -25,288 +25,331 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA module Main (main) where +import Data.Char (toUpper, isAlphaNum) import Data.List import Data.Function +import Data.Maybe (isJust, fromJust) +import Data.Ord (comparing) import Monad -import System +import System (exitWith, ExitCode(..)) +import System.FilePath import System.IO -import System.Console.GetOpt import qualified System -import Text.Printf (printf) +import Text.Printf (printf, hPrintf) import qualified Ganeti.HTools.Container as Container import qualified Ganeti.HTools.Cluster as Cluster import qualified Ganeti.HTools.Node as Node import qualified Ganeti.HTools.Instance as Instance -import qualified Ganeti.HTools.CLI as CLI import Ganeti.HTools.Utils import Ganeti.HTools.Types - --- | Command line options structure. -data Options = Options - { optShowNodes :: Bool -- ^ Whether to show node status - , optNodef :: FilePath -- ^ Path to the nodes file - , optNodeSet :: Bool -- ^ The nodes have been set by options - , optInstf :: FilePath -- ^ Path to the instances file - , optInstSet :: Bool -- ^ The insts have been set by options - , optMaster :: String -- ^ Collect data from RAPI - , optVerbose :: Int -- ^ Verbosity level - , optOffline :: [String] -- ^ Names of offline nodes - , optIMem :: Int -- ^ Instance memory - , optIDsk :: Int -- ^ Instance disk - , optIVCPUs :: Int -- ^ Instance VCPUs - , optINodes :: Int -- ^ Nodes required for an instance - , optMcpu :: Double -- ^ Max cpu ratio for nodes - , optMdsk :: Double -- ^ Max disk usage ratio for nodes - , optShowVer :: Bool -- ^ Just show the program version - , optShowHelp :: Bool -- ^ Just show the help - } deriving Show - -instance CLI.CLIOptions Options where - showVersion = optShowVer - showHelp = optShowHelp - -instance CLI.EToolOptions Options where - nodeFile = optNodef - nodeSet = optNodeSet - instFile = optInstf - instSet = optInstSet - masterName = optMaster - silent a = (optVerbose a) == 0 - --- | Default values for the command line options. -defaultOptions :: Options -defaultOptions = Options - { optShowNodes = False - , optNodef = "nodes" - , optNodeSet = False - , optInstf = "instances" - , optInstSet = False - , optMaster = "" - , optVerbose = 1 - , optOffline = [] - , optIMem = 4096 - , optIDsk = 102400 - , optIVCPUs = 1 - , optINodes = 2 - , optMcpu = -1 - , optMdsk = -1 - , optShowVer = False - , optShowHelp = False - } +import Ganeti.HTools.CLI +import Ganeti.HTools.ExtLoader +import Ganeti.HTools.Text (serializeCluster) -- | Options list and functions -options :: [OptDescr (Options -> Options)] +options :: [OptType] options = - [ Option ['p'] ["print-nodes"] - (NoArg (\ opts -> opts { optShowNodes = True })) - "print the final node list" - , Option ['n'] ["nodes"] - (ReqArg (\ f opts -> opts { optNodef = f, optNodeSet = True }) "FILE") - "the node list FILE" - , Option ['i'] ["instances"] - (ReqArg (\ f opts -> opts { optInstf = f, optInstSet = True }) "FILE") - "the instance list FILE" - , Option ['m'] ["master"] - (ReqArg (\ m opts -> opts { optMaster = m }) "ADDRESS") - "collect data via RAPI at the given ADDRESS" - , Option ['v'] ["verbose"] - (NoArg (\ opts -> opts { optVerbose = (optVerbose opts) + 1 })) - "increase the verbosity level" - , Option ['q'] ["quiet"] - (NoArg (\ opts -> opts { optVerbose = (optVerbose opts) - 1 })) - "decrease the verbosity level" - , Option ['O'] ["offline"] - (ReqArg (\ n opts -> opts { optOffline = n:optOffline opts }) "NODE") - "set node as offline" - , Option [] ["memory"] - (ReqArg (\ m opts -> opts { optIMem = read m }) "MEMORY") - "memory size for instances" - , Option [] ["disk"] - (ReqArg (\ d opts -> opts { optIDsk = read d }) "DISK") - "disk size for instances" - , Option [] ["vcpus"] - (ReqArg (\ p opts -> opts { optIVCPUs = read p }) "NUM") - "number of virtual cpus for instances" - , Option [] ["req-nodes"] - (ReqArg (\ n opts -> opts { optINodes = read n }) "NODES") - "number of nodes for the new instances (1=plain, 2=mirrored)" - , Option [] ["max-cpu"] - (ReqArg (\ n opts -> opts { optMcpu = read n }) "RATIO") - "maximum virtual-to-physical cpu ratio for nodes" - , Option [] ["min-disk"] - (ReqArg (\ n opts -> opts { optMdsk = read n }) "RATIO") - "minimum free disk space for nodes (between 0 and 1)" - , Option ['V'] ["version"] - (NoArg (\ opts -> opts { optShowVer = True})) - "show the version of the program" - , Option ['h'] ["help"] - (NoArg (\ opts -> opts { optShowHelp = True})) - "show help" + [ oPrintNodes + , oDataFile + , oNodeSim + , oRapiMaster + , oLuxiSocket + , oVerbose + , oQuiet + , oOfflineNode + , oIMem + , oIDisk + , oIVcpus + , oINodes + , oMaxCpu + , oMinDisk + , oTieredSpec + , oSaveCluster + , oShowVer + , oShowHelp + ] + +-- | The allocation phase we're in (initial, after tiered allocs, or +-- after regular allocation). +data Phase = PInitial + | PFinal + | PTiered + +statsData :: [(String, Cluster.CStats -> String)] +statsData = [ ("SCORE", printf "%.8f" . Cluster.csScore) + , ("INST_CNT", printf "%d" . Cluster.csNinst) + , ("MEM_FREE", printf "%d" . Cluster.csFmem) + , ("MEM_AVAIL", printf "%d" . Cluster.csAmem) + , ("MEM_RESVD", + \cs -> printf "%d" (Cluster.csFmem cs - Cluster.csAmem cs)) + , ("MEM_INST", printf "%d" . Cluster.csImem) + , ("MEM_OVERHEAD", + \cs -> printf "%d" (Cluster.csXmem cs + Cluster.csNmem cs)) + , ("MEM_EFF", + \cs -> printf "%.8f" (fromIntegral (Cluster.csImem cs) / + Cluster.csTmem cs)) + , ("DSK_FREE", printf "%d" . Cluster.csFdsk) + , ("DSK_AVAIL", printf "%d". Cluster.csAdsk) + , ("DSK_RESVD", + \cs -> printf "%d" (Cluster.csFdsk cs - Cluster.csAdsk cs)) + , ("DSK_INST", printf "%d" . Cluster.csIdsk) + , ("DSK_EFF", + \cs -> printf "%.8f" (fromIntegral (Cluster.csIdsk cs) / + Cluster.csTdsk cs)) + , ("CPU_INST", printf "%d" . Cluster.csIcpu) + , ("CPU_EFF", + \cs -> printf "%.8f" (fromIntegral (Cluster.csIcpu cs) / + Cluster.csTcpu cs)) + , ("MNODE_MEM_AVAIL", printf "%d" . Cluster.csMmem) + , ("MNODE_DSK_AVAIL", printf "%d" . Cluster.csMdsk) + ] + +specData :: [(String, RSpec -> String)] +specData = [ ("MEM", printf "%d" . rspecMem) + , ("DSK", printf "%d" . rspecDsk) + , ("CPU", printf "%d" . rspecCpu) + ] + +clusterData :: [(String, Cluster.CStats -> String)] +clusterData = [ ("MEM", printf "%.0f" . Cluster.csTmem) + , ("DSK", printf "%.0f" . Cluster.csTdsk) + , ("CPU", printf "%.0f" . Cluster.csTcpu) + , ("VCPU", printf "%d" . Cluster.csVcpu) + ] + +-- | Function to print stats for a given phase +printStats :: Phase -> Cluster.CStats -> [(String, String)] +printStats ph cs = + map (\(s, fn) -> (printf "%s_%s" kind s, fn cs)) statsData + where kind = case ph of + PInitial -> "INI" + PFinal -> "FIN" + PTiered -> "TRL" + +-- | Print final stats and related metrics +printResults :: Node.List -> Int -> Int -> [(FailMode, Int)] -> IO () +printResults fin_nl num_instances allocs sreason = do + let fin_stats = Cluster.totalResources fin_nl + fin_instances = num_instances + allocs + + when (num_instances + allocs /= Cluster.csNinst fin_stats) $ + do + hPrintf stderr "ERROR: internal inconsistency, allocated (%d)\ + \ != counted (%d)\n" (num_instances + allocs) + (Cluster.csNinst fin_stats) :: IO () + exitWith $ ExitFailure 1 + + printKeys $ printStats PFinal fin_stats + printKeys [ ("ALLOC_USAGE", printf "%.8f" + ((fromIntegral num_instances::Double) / + fromIntegral fin_instances)) + , ("ALLOC_INSTANCES", printf "%d" allocs) + , ("ALLOC_FAIL_REASON", map toUpper . show . fst $ head sreason) + ] + printKeys $ map (\(x, y) -> (printf "ALLOC_%s_CNT" (show x), + printf "%d" y)) sreason + -- this should be the final entry + printKeys [("OK", "1")] + +formatRSpec :: Double -> String -> RSpec -> [(String, String)] +formatRSpec m_cpu s r = + [ ("KM_" ++ s ++ "_CPU", show $ rspecCpu r) + , ("KM_" ++ s ++ "_NPU", show $ fromIntegral (rspecCpu r) / m_cpu) + , ("KM_" ++ s ++ "_MEM", show $ rspecMem r) + , ("KM_" ++ s ++ "_DSK", show $ rspecDsk r) ] -concatFailure :: [(FailMode, Int)] -> FailMode -> [(FailMode, Int)] -concatFailure flst reason = - let cval = lookup reason flst - in case cval of - Nothing -> (reason, 1):flst - Just val -> let plain = filter (\(x, _) -> x /= reason) flst - in (reason, val+1):plain - -filterFails :: Cluster.AllocSolution - -> ([(FailMode, Int)], - [(Node.List, Instance.Instance, [Node.Node])]) -filterFails sols = - let (alst, blst) = unzip . map (\ (onl, i, nn) -> - case onl of - OpFail reason -> ([reason], []) - OpGood gnl -> ([], [(gnl, i, nn)]) - ) $ sols - aval = concat alst - bval = concat blst - in (foldl' concatFailure [] aval, bval) - -processResults :: [(Node.List, Instance.Instance, [Node.Node])] - -> (Node.List, Instance.Instance, [Node.Node]) -processResults sols = - let sols' = map (\e@(nl', _, _) -> (Cluster.compCV nl', e)) sols - sols'' = sortBy (compare `on` fst) sols' - in snd $ head sols'' - -iterateDepth :: Node.List - -> Instance.List - -> Instance.Instance - -> Int - -> [Instance.Instance] - -> ([(FailMode, Int)], Node.List, [Instance.Instance]) -iterateDepth nl il newinst nreq ixes = - let depth = length ixes - newname = (printf "new-%d" depth)::String - newidx = (length $ Container.elems il) + depth - newi2 = Instance.setIdx (Instance.setName newinst newname) newidx - sols = (Cluster.tryAlloc nl il newi2 nreq):: - OpResult Cluster.AllocSolution - in case sols of - OpFail _ -> ([], nl, ixes) - OpGood sols' -> - let (errs, sols3) = filterFails sols' - in if null sols3 - then (errs, nl, ixes) - else let (xnl, xi, _) = processResults sols3 - in iterateDepth xnl il newinst nreq (xi:ixes) - -printStats :: String -> Cluster.CStats -> IO () -printStats kind cs = do - printf "%s free RAM: %d\n" kind (Cluster.cs_fmem cs) - printf "%s allocatable RAM: %d\n" kind (Cluster.cs_amem cs) - printf "%s reserved RAM: %d\n" kind ((Cluster.cs_fmem cs) - - (Cluster.cs_amem cs)) - printf "%s free disk: %d\n" kind (Cluster.cs_fdsk cs) - printf "%s allocatable disk: %d\n" kind (Cluster.cs_adsk cs) - printf "%s reserved disk: %d\n" kind ((Cluster.cs_fdsk cs) - - (Cluster.cs_adsk cs)) - printf "%s max node allocatable RAM: %d\n" kind (Cluster.cs_mmem cs) - printf "%s max node allocatable disk: %d\n" kind (Cluster.cs_mdsk cs) +printAllocationStats :: Double -> Node.List -> Node.List -> IO () +printAllocationStats m_cpu ini_nl fin_nl = do + let ini_stats = Cluster.totalResources ini_nl + fin_stats = Cluster.totalResources fin_nl + (rini, ralo, runa) = Cluster.computeAllocationDelta ini_stats fin_stats + printKeys $ formatRSpec m_cpu "USED" rini + printKeys $ formatRSpec m_cpu "POOL"ralo + printKeys $ formatRSpec m_cpu "UNAV" runa + +-- | Ensure a value is quoted if needed +ensureQuoted :: String -> String +ensureQuoted v = if not (all (\c -> (isAlphaNum c || c == '.')) v) + then '\'':v ++ "'" + else v + +-- | Format a list of key\/values as a shell fragment +printKeys :: [(String, String)] -> IO () +printKeys = mapM_ (\(k, v) -> + printf "HTS_%s=%s\n" (map toUpper k) (ensureQuoted v)) + +printInstance :: Node.List -> Instance.Instance -> [String] +printInstance nl i = [ Instance.name i + , Container.nameOf nl $ Instance.pNode i + , let sdx = Instance.sNode i + in if sdx == Node.noSecondary then "" + else Container.nameOf nl sdx + , show (Instance.mem i) + , show (Instance.dsk i) + , show (Instance.vcpus i) + ] -- | Main function. main :: IO () main = do cmd_args <- System.getArgs - (opts, args) <- CLI.parseOpts cmd_args "hspace" options defaultOptions + (opts, args) <- parseOpts cmd_args "hspace" options unless (null args) $ do hPutStrLn stderr "Error: this program doesn't take any arguments." exitWith $ ExitFailure 1 let verbose = optVerbose opts + ispec = optISpec opts + shownodes = optShowNodes opts + + (gl, fixed_nl, il, ctags) <- loadExternalData opts + + printKeys $ map (\(a, fn) -> ("SPEC_" ++ a, fn ispec)) specData + printKeys [ ("SPEC_RQN", printf "%d" (optINodes opts)) ] - (fixed_nl, il, csf) <- CLI.loadExternalData opts let num_instances = length $ Container.elems il let offline_names = optOffline opts all_nodes = Container.elems fixed_nl all_names = map Node.name all_nodes - offline_wrong = filter (\n -> not $ elem n all_names) offline_names + offline_wrong = filter (`notElem` all_names) offline_names offline_indices = map Node.idx $ - filter (\n -> elem (Node.name n) offline_names) + filter (\n -> + Node.name n `elem` offline_names || + Node.alias n `elem` offline_names) all_nodes req_nodes = optINodes opts m_cpu = optMcpu opts m_dsk = optMdsk opts when (length offline_wrong > 0) $ do - printf "Error: Wrong node name(s) set as offline: %s\n" - (commaJoin offline_wrong) + hPrintf stderr "Error: Wrong node name(s) set as offline: %s\n" + (commaJoin offline_wrong) :: IO () exitWith $ ExitFailure 1 when (req_nodes /= 1 && req_nodes /= 2) $ do - printf "Error: Invalid required nodes (%d)\n" req_nodes + hPrintf stderr "Error: Invalid required nodes (%d)\n" + req_nodes :: IO () exitWith $ ExitFailure 1 - let nm = Container.map (\n -> if elem (Node.idx n) offline_indices + let nm = Container.map (\n -> if Node.idx n `elem` offline_indices then Node.setOffline n True else n) fixed_nl nl = Container.map (flip Node.setMdsk m_dsk . flip Node.setMcpu m_cpu) nm + csf = commonSuffix fixed_nl il - when (length csf > 0 && verbose > 1) $ do - printf "Note: Stripping common suffix of '%s' from names\n" csf + when (length csf > 0 && verbose > 1) $ + hPrintf stderr "Note: Stripping common suffix of '%s' from names\n" csf - let bad_nodes = fst $ Cluster.computeBadItems nl il - when (length bad_nodes > 0) $ do - putStrLn "Error: Cluster not N+1, no space to allocate." - exitWith $ ExitFailure 1 - - when (optShowNodes opts) $ + when (isJust shownodes) $ do - putStrLn "Initial cluster status:" - putStrLn $ Cluster.printNodes nl + hPutStrLn stderr "Initial cluster status:" + hPutStrLn stderr $ Cluster.printNodes nl (fromJust shownodes) let ini_cv = Cluster.compCV nl ini_stats = Cluster.totalResources nl - (if verbose > 2 then - printf "Initial coefficients: overall %.8f, %s\n" - ini_cv (Cluster.printStats nl) - else - printf "Initial score: %.8f\n" ini_cv) - printf "Initial instances: %d\n" num_instances - printStats "Initial" ini_stats + when (verbose > 2) $ + hPrintf stderr "Initial coefficients: overall %.8f, %s\n" + ini_cv (Cluster.printStats nl) - let nmlen = Container.maxNameLen nl - newinst = Instance.create "new" (optIMem opts) (optIDsk opts) - (optIVCPUs opts) "ADMIN_down" (-1) (-1) + printKeys $ map (\(a, fn) -> ("CLUSTER_" ++ a, fn ini_stats)) clusterData + printKeys [("CLUSTER_NODES", printf "%d" (length all_nodes))] + printKeys $ printStats PInitial ini_stats - let (ereason, fin_nl, ixes) = iterateDepth nl il newinst req_nodes [] - allocs = length ixes - fin_instances = num_instances + allocs + let bad_nodes = fst $ Cluster.computeBadItems nl il + stop_allocation = length bad_nodes > 0 + result_noalloc = ([(FailN1, 1)]::FailStats, nl, il, []) + + -- utility functions + let iofspec spx = Instance.create "new" (rspecMem spx) (rspecDsk spx) + (rspecCpu spx) "running" [] (-1) (-1) + exitifbad val = (case val of + Bad s -> do + hPrintf stderr "Failure: %s\n" s :: IO () + exitWith $ ExitFailure 1 + Ok x -> return x) + + + let reqinst = iofspec ispec + + -- Run the tiered allocation, if enabled + + (case optTieredSpec opts of + Nothing -> return () + Just tspec -> do + (_, trl_nl, trl_il, trl_ixes) <- + if stop_allocation + then return result_noalloc + else exitifbad (Cluster.tieredAlloc nl il (iofspec tspec) + req_nodes []) + let fin_trl_ixes = reverse trl_ixes + ix_byspec = groupBy ((==) `on` Instance.specOf) fin_trl_ixes + spec_map = map (\ixs -> (Instance.specOf $ head ixs, length ixs)) + ix_byspec::[(RSpec, Int)] + spec_map' = map (\(spec, cnt) -> + printf "%d,%d,%d=%d" (rspecMem spec) + (rspecDsk spec) (rspecCpu spec) cnt) + spec_map::[String] + + when (verbose > 1) $ do + hPutStrLn stderr "Tiered allocation map" + hPutStr stderr . unlines . map ((:) ' ' . intercalate " ") $ + formatTable (map (printInstance trl_nl) fin_trl_ixes) + [False, False, False, True, True, True] + + when (isJust shownodes) $ do + hPutStrLn stderr "" + hPutStrLn stderr "Tiered allocation status:" + hPutStrLn stderr $ Cluster.printNodes trl_nl (fromJust shownodes) + + when (isJust $ optSaveCluster opts) $ + do + let out_path = (fromJust $ optSaveCluster opts) <.> "tiered" + adata = serializeCluster gl trl_nl trl_il ctags + writeFile out_path adata + hPrintf stderr "The cluster state after tiered allocation\ + \ has been written to file '%s'\n" + out_path + printKeys $ printStats PTiered (Cluster.totalResources trl_nl) + printKeys [("TSPEC", intercalate " " spec_map')] + printAllocationStats m_cpu nl trl_nl) + + -- Run the standard (avg-mode) allocation + + (ereason, fin_nl, fin_il, ixes) <- + if stop_allocation + then return result_noalloc + else exitifbad (Cluster.iterateAlloc nl il reqinst req_nodes []) + + let allocs = length ixes fin_ixes = reverse ixes - ix_namelen = maximum . map (length . Instance.name) $ fin_ixes - fin_stats = Cluster.totalResources fin_nl - sreason = reverse $ sortBy (compare `on` snd) ereason - - printf "Final score: %.8f\n" (Cluster.compCV fin_nl) - printf "Final instances: %d\n" (num_instances + allocs) - printStats "Final" fin_stats - printf "Usage: %.5f\n" (((fromIntegral num_instances)::Double) / - (fromIntegral fin_instances)) - printf "Allocations: %d\n" allocs - putStr (unlines . map (\(x, y) -> printf "%s: %d" (show x) y) $ sreason) - printf "Most likely fail reason: %s\n" (show . fst . head $ sreason) + sreason = reverse $ sortBy (comparing snd) ereason when (verbose > 1) $ do - putStr . unlines . map (\i -> printf "Inst: %*s %-*s %-*s" - ix_namelen (Instance.name i) - nmlen (Container.nameOf fin_nl $ Instance.pnode i) - nmlen (let sdx = Instance.snode i - in if sdx == Node.noSecondary then "" - else Container.nameOf fin_nl sdx)) - $ fin_ixes - - when (optShowNodes opts) $ + hPutStrLn stderr "Instance map" + hPutStr stderr . unlines . map ((:) ' ' . intercalate " ") $ + formatTable (map (printInstance fin_nl) fin_ixes) + [False, False, False, True, True, True] + when (isJust shownodes) $ + do + hPutStrLn stderr "" + hPutStrLn stderr "Final cluster status:" + hPutStrLn stderr $ Cluster.printNodes fin_nl (fromJust shownodes) + + when (isJust $ optSaveCluster opts) $ do - putStrLn "" - putStrLn "Final cluster status:" - putStrLn $ Cluster.printNodes fin_nl + let out_path = (fromJust $ optSaveCluster opts) <.> "alloc" + adata = serializeCluster gl fin_nl fin_il ctags + writeFile out_path adata + hPrintf stderr "The cluster state after standard allocation\ + \ has been written to file '%s'\n" + out_path + + printResults fin_nl num_instances allocs sreason