{-
-Copyright (C) 2009 Google Inc.
+Copyright (C) 2009, 2010 Google Inc.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
module Main (main) where
+import Data.Char (toUpper, isAlphaNum)
import Data.List
import Data.Function
+import Data.Maybe (isJust, fromJust)
+import Data.Ord (comparing)
import Monad
-import System
+import System (exitWith, ExitCode(..))
+import System.FilePath
import System.IO
-import System.Console.GetOpt
import qualified System
-import Text.Printf (printf)
+import Text.Printf (printf, hPrintf)
import qualified Ganeti.HTools.Container as Container
import qualified Ganeti.HTools.Cluster as Cluster
import qualified Ganeti.HTools.Node as Node
import qualified Ganeti.HTools.Instance as Instance
-import qualified Ganeti.HTools.CLI as CLI
import Ganeti.HTools.Utils
import Ganeti.HTools.Types
-
--- | Command line options structure.
-data Options = Options
- { optShowNodes :: Bool -- ^ Whether to show node status
- , optNodef :: FilePath -- ^ Path to the nodes file
- , optNodeSet :: Bool -- ^ The nodes have been set by options
- , optInstf :: FilePath -- ^ Path to the instances file
- , optInstSet :: Bool -- ^ The insts have been set by options
- , optMaster :: String -- ^ Collect data from RAPI
- , optVerbose :: Int -- ^ Verbosity level
- , optOffline :: [String] -- ^ Names of offline nodes
- , optIMem :: Int -- ^ Instance memory
- , optIDsk :: Int -- ^ Instance disk
- , optIVCPUs :: Int -- ^ Instance VCPUs
- , optINodes :: Int -- ^ Nodes required for an instance
- , optMcpu :: Double -- ^ Max cpu ratio for nodes
- , optMdsk :: Double -- ^ Max disk usage ratio for nodes
- , optShowVer :: Bool -- ^ Just show the program version
- , optShowHelp :: Bool -- ^ Just show the help
- } deriving Show
-
-instance CLI.CLIOptions Options where
- showVersion = optShowVer
- showHelp = optShowHelp
-
-instance CLI.EToolOptions Options where
- nodeFile = optNodef
- nodeSet = optNodeSet
- instFile = optInstf
- instSet = optInstSet
- masterName = optMaster
- silent a = optVerbose a == 0
-
--- | Default values for the command line options.
-defaultOptions :: Options
-defaultOptions = Options
- { optShowNodes = False
- , optNodef = "nodes"
- , optNodeSet = False
- , optInstf = "instances"
- , optInstSet = False
- , optMaster = ""
- , optVerbose = 1
- , optOffline = []
- , optIMem = 4096
- , optIDsk = 102400
- , optIVCPUs = 1
- , optINodes = 2
- , optMcpu = -1
- , optMdsk = -1
- , optShowVer = False
- , optShowHelp = False
- }
+import Ganeti.HTools.CLI
+import Ganeti.HTools.ExtLoader
+import Ganeti.HTools.Text (serializeCluster)
-- | Options list and functions
-options :: [OptDescr (Options -> Options)]
+options :: [OptType]
options =
- [ Option ['p'] ["print-nodes"]
- (NoArg (\ opts -> opts { optShowNodes = True }))
- "print the final node list"
- , Option ['n'] ["nodes"]
- (ReqArg (\ f opts -> opts { optNodef = f, optNodeSet = True }) "FILE")
- "the node list FILE"
- , Option ['i'] ["instances"]
- (ReqArg (\ f opts -> opts { optInstf = f, optInstSet = True }) "FILE")
- "the instance list FILE"
- , Option ['m'] ["master"]
- (ReqArg (\ m opts -> opts { optMaster = m }) "ADDRESS")
- "collect data via RAPI at the given ADDRESS"
- , Option ['v'] ["verbose"]
- (NoArg (\ opts -> opts { optVerbose = optVerbose opts + 1 }))
- "increase the verbosity level"
- , Option ['q'] ["quiet"]
- (NoArg (\ opts -> opts { optVerbose = optVerbose opts - 1 }))
- "decrease the verbosity level"
- , Option ['O'] ["offline"]
- (ReqArg (\ n opts -> opts { optOffline = n:optOffline opts }) "NODE")
- "set node as offline"
- , Option [] ["memory"]
- (ReqArg (\ m opts -> opts { optIMem = read m }) "MEMORY")
- "memory size for instances"
- , Option [] ["disk"]
- (ReqArg (\ d opts -> opts { optIDsk = read d }) "DISK")
- "disk size for instances"
- , Option [] ["vcpus"]
- (ReqArg (\ p opts -> opts { optIVCPUs = read p }) "NUM")
- "number of virtual cpus for instances"
- , Option [] ["req-nodes"]
- (ReqArg (\ n opts -> opts { optINodes = read n }) "NODES")
- "number of nodes for the new instances (1=plain, 2=mirrored)"
- , Option [] ["max-cpu"]
- (ReqArg (\ n opts -> opts { optMcpu = read n }) "RATIO")
- "maximum virtual-to-physical cpu ratio for nodes"
- , Option [] ["min-disk"]
- (ReqArg (\ n opts -> opts { optMdsk = read n }) "RATIO")
- "minimum free disk space for nodes (between 0 and 1)"
- , Option ['V'] ["version"]
- (NoArg (\ opts -> opts { optShowVer = True}))
- "show the version of the program"
- , Option ['h'] ["help"]
- (NoArg (\ opts -> opts { optShowHelp = True}))
- "show help"
+ [ oPrintNodes
+ , oDataFile
+ , oNodeSim
+ , oRapiMaster
+ , oLuxiSocket
+ , oVerbose
+ , oQuiet
+ , oOfflineNode
+ , oIMem
+ , oIDisk
+ , oIVcpus
+ , oINodes
+ , oMaxCpu
+ , oMinDisk
+ , oTieredSpec
+ , oSaveCluster
+ , oShowVer
+ , oShowHelp
]
--- | Build failure stats out of a list of failure reasons
-concatFailure :: [(FailMode, Int)] -> FailMode -> [(FailMode, Int)]
-concatFailure flst reason =
- let cval = lookup reason flst
- in case cval of
- Nothing -> (reason, 1):flst
- Just val -> let plain = filter (\(x, _) -> x /= reason) flst
- in (reason, val+1):plain
-
--- | Build list of failures and placements out of an list of possible
--- | allocations
-filterFails :: Cluster.AllocSolution
- -> ([(FailMode, Int)],
- [(Node.List, Instance.Instance, [Node.Node])])
-filterFails sols =
- let (alst, blst) = unzip . map (\ (onl, i, nn) ->
- case onl of
- OpFail reason -> ([reason], [])
- OpGood gnl -> ([], [(gnl, i, nn)])
- ) $ sols
- aval = concat alst
- bval = concat blst
- in (foldl' concatFailure [] aval, bval)
-
--- | Get the placement with best score out of a list of possible placements
-processResults :: [(Node.List, Instance.Instance, [Node.Node])]
- -> (Node.List, Instance.Instance, [Node.Node])
-processResults sols =
- let sols' = map (\e@(nl', _, _) -> (Cluster.compCV nl', e)) sols
- sols'' = sortBy (compare `on` fst) sols'
- in snd $ head sols''
-
--- | Recursively place instances on the cluster until we're out of space
-iterateDepth :: Node.List
- -> Instance.List
- -> Instance.Instance
- -> Int
- -> [Instance.Instance]
- -> ([(FailMode, Int)], Node.List, [Instance.Instance])
-iterateDepth nl il newinst nreq ixes =
- let depth = length ixes
- newname = printf "new-%d" depth::String
- newidx = length (Container.elems il) + depth
- newi2 = Instance.setIdx (Instance.setName newinst newname) newidx
- sols = Cluster.tryAlloc nl il newi2 nreq::
- OpResult Cluster.AllocSolution
- in case sols of
- OpFail _ -> ([], nl, ixes)
- OpGood sols' ->
- let (errs, sols3) = filterFails sols'
- in if null sols3
- then (errs, nl, ixes)
- else let (xnl, xi, _) = processResults sols3
- in iterateDepth xnl il newinst nreq (xi:ixes)
+-- | The allocation phase we're in (initial, after tiered allocs, or
+-- after regular allocation).
+data Phase = PInitial
+ | PFinal
+ | PTiered
+
+statsData :: [(String, Cluster.CStats -> String)]
+statsData = [ ("SCORE", printf "%.8f" . Cluster.csScore)
+ , ("INST_CNT", printf "%d" . Cluster.csNinst)
+ , ("MEM_FREE", printf "%d" . Cluster.csFmem)
+ , ("MEM_AVAIL", printf "%d" . Cluster.csAmem)
+ , ("MEM_RESVD",
+ \cs -> printf "%d" (Cluster.csFmem cs - Cluster.csAmem cs))
+ , ("MEM_INST", printf "%d" . Cluster.csImem)
+ , ("MEM_OVERHEAD",
+ \cs -> printf "%d" (Cluster.csXmem cs + Cluster.csNmem cs))
+ , ("MEM_EFF",
+ \cs -> printf "%.8f" (fromIntegral (Cluster.csImem cs) /
+ Cluster.csTmem cs))
+ , ("DSK_FREE", printf "%d" . Cluster.csFdsk)
+ , ("DSK_AVAIL", printf "%d". Cluster.csAdsk)
+ , ("DSK_RESVD",
+ \cs -> printf "%d" (Cluster.csFdsk cs - Cluster.csAdsk cs))
+ , ("DSK_INST", printf "%d" . Cluster.csIdsk)
+ , ("DSK_EFF",
+ \cs -> printf "%.8f" (fromIntegral (Cluster.csIdsk cs) /
+ Cluster.csTdsk cs))
+ , ("CPU_INST", printf "%d" . Cluster.csIcpu)
+ , ("CPU_EFF",
+ \cs -> printf "%.8f" (fromIntegral (Cluster.csIcpu cs) /
+ Cluster.csTcpu cs))
+ , ("MNODE_MEM_AVAIL", printf "%d" . Cluster.csMmem)
+ , ("MNODE_DSK_AVAIL", printf "%d" . Cluster.csMdsk)
+ ]
+
+specData :: [(String, RSpec -> String)]
+specData = [ ("MEM", printf "%d" . rspecMem)
+ , ("DSK", printf "%d" . rspecDsk)
+ , ("CPU", printf "%d" . rspecCpu)
+ ]
+
+clusterData :: [(String, Cluster.CStats -> String)]
+clusterData = [ ("MEM", printf "%.0f" . Cluster.csTmem)
+ , ("DSK", printf "%.0f" . Cluster.csTdsk)
+ , ("CPU", printf "%.0f" . Cluster.csTcpu)
+ , ("VCPU", printf "%d" . Cluster.csVcpu)
+ ]
-- | Function to print stats for a given phase
-printStats :: String -> Cluster.CStats -> IO ()
-printStats kind cs = do
- printf "%s free RAM: %d\n" kind (Cluster.cs_fmem cs)
- printf "%s allocatable RAM: %d\n" kind (Cluster.cs_amem cs)
- printf "%s reserved RAM: %d\n" kind (Cluster.cs_fmem cs -
- Cluster.cs_amem cs)
- printf "%s free disk: %d\n" kind (Cluster.cs_fdsk cs)
- printf "%s allocatable disk: %d\n" kind (Cluster.cs_adsk cs)
- printf "%s reserved disk: %d\n" kind (Cluster.cs_fdsk cs -
- Cluster.cs_adsk cs)
- printf "%s max node allocatable RAM: %d\n" kind (Cluster.cs_mmem cs)
- printf "%s max node allocatable disk: %d\n" kind (Cluster.cs_mdsk cs)
+printStats :: Phase -> Cluster.CStats -> [(String, String)]
+printStats ph cs =
+ map (\(s, fn) -> (printf "%s_%s" kind s, fn cs)) statsData
+ where kind = case ph of
+ PInitial -> "INI"
+ PFinal -> "FIN"
+ PTiered -> "TRL"
-- | Print final stats and related metrics
printResults :: Node.List -> Int -> Int -> [(FailMode, Int)] -> IO ()
let fin_stats = Cluster.totalResources fin_nl
fin_instances = num_instances + allocs
- printf "Final score: %.8f\n" (Cluster.compCV fin_nl)
- printf "Final instances: %d\n" (num_instances + allocs)
- printStats "Final" fin_stats
- printf "Usage: %.5f\n" ((fromIntegral num_instances::Double) /
- fromIntegral fin_instances)
- printf "Allocations: %d\n" allocs
- putStr (unlines . map (\(x, y) -> printf "%s: %d" (show x) y) $ sreason)
- printf "Most likely fail reason: %s\n" (show . fst . head $ sreason)
+ when (num_instances + allocs /= Cluster.csNinst fin_stats) $
+ do
+ hPrintf stderr "ERROR: internal inconsistency, allocated (%d)\
+ \ != counted (%d)\n" (num_instances + allocs)
+ (Cluster.csNinst fin_stats) :: IO ()
+ exitWith $ ExitFailure 1
+
+ printKeys $ printStats PFinal fin_stats
+ printKeys [ ("ALLOC_USAGE", printf "%.8f"
+ ((fromIntegral num_instances::Double) /
+ fromIntegral fin_instances))
+ , ("ALLOC_INSTANCES", printf "%d" allocs)
+ , ("ALLOC_FAIL_REASON", map toUpper . show . fst $ head sreason)
+ ]
+ printKeys $ map (\(x, y) -> (printf "ALLOC_%s_CNT" (show x),
+ printf "%d" y)) sreason
+ -- this should be the final entry
+ printKeys [("OK", "1")]
+
+formatRSpec :: Double -> String -> RSpec -> [(String, String)]
+formatRSpec m_cpu s r =
+ [ ("KM_" ++ s ++ "_CPU", show $ rspecCpu r)
+ , ("KM_" ++ s ++ "_NPU", show $ fromIntegral (rspecCpu r) / m_cpu)
+ , ("KM_" ++ s ++ "_MEM", show $ rspecMem r)
+ , ("KM_" ++ s ++ "_DSK", show $ rspecDsk r)
+ ]
+
+printAllocationStats :: Double -> Node.List -> Node.List -> IO ()
+printAllocationStats m_cpu ini_nl fin_nl = do
+ let ini_stats = Cluster.totalResources ini_nl
+ fin_stats = Cluster.totalResources fin_nl
+ (rini, ralo, runa) = Cluster.computeAllocationDelta ini_stats fin_stats
+ printKeys $ formatRSpec m_cpu "USED" rini
+ printKeys $ formatRSpec m_cpu "POOL"ralo
+ printKeys $ formatRSpec m_cpu "UNAV" runa
+
+-- | Ensure a value is quoted if needed
+ensureQuoted :: String -> String
+ensureQuoted v = if not (all (\c -> (isAlphaNum c || c == '.')) v)
+ then '\'':v ++ "'"
+ else v
+
+-- | Format a list of key\/values as a shell fragment
+printKeys :: [(String, String)] -> IO ()
+printKeys = mapM_ (\(k, v) ->
+ printf "HTS_%s=%s\n" (map toUpper k) (ensureQuoted v))
+
+printInstance :: Node.List -> Instance.Instance -> [String]
+printInstance nl i = [ Instance.name i
+ , Container.nameOf nl $ Instance.pNode i
+ , let sdx = Instance.sNode i
+ in if sdx == Node.noSecondary then ""
+ else Container.nameOf nl sdx
+ , show (Instance.mem i)
+ , show (Instance.dsk i)
+ , show (Instance.vcpus i)
+ ]
-- | Main function.
main :: IO ()
main = do
cmd_args <- System.getArgs
- (opts, args) <- CLI.parseOpts cmd_args "hspace" options defaultOptions
+ (opts, args) <- parseOpts cmd_args "hspace" options
unless (null args) $ do
hPutStrLn stderr "Error: this program doesn't take any arguments."
exitWith $ ExitFailure 1
- printf "Spec RAM: %d\n" (optIMem opts)
- printf "Spec disk: %d\n" (optIDsk opts)
- printf "Spec CPUs: %d\n" (optIVCPUs opts)
- printf "Spec nodes: %d\n" (optINodes opts)
-
let verbose = optVerbose opts
+ ispec = optISpec opts
+ shownodes = optShowNodes opts
+
+ (fixed_nl, il, _) <- loadExternalData opts
+
+ printKeys $ map (\(a, fn) -> ("SPEC_" ++ a, fn ispec)) specData
+ printKeys [ ("SPEC_RQN", printf "%d" (optINodes opts)) ]
- (fixed_nl, il, csf) <- CLI.loadExternalData opts
let num_instances = length $ Container.elems il
let offline_names = optOffline opts
all_nodes = Container.elems fixed_nl
all_names = map Node.name all_nodes
- offline_wrong = filter (flip notElem all_names) offline_names
+ offline_wrong = filter (`notElem` all_names) offline_names
offline_indices = map Node.idx $
- filter (\n -> elem (Node.name n) offline_names)
+ filter (\n ->
+ Node.name n `elem` offline_names ||
+ Node.alias n `elem` offline_names)
all_nodes
req_nodes = optINodes opts
m_cpu = optMcpu opts
m_dsk = optMdsk opts
when (length offline_wrong > 0) $ do
- printf "Error: Wrong node name(s) set as offline: %s\n"
- (commaJoin offline_wrong)
+ hPrintf stderr "Error: Wrong node name(s) set as offline: %s\n"
+ (commaJoin offline_wrong) :: IO ()
exitWith $ ExitFailure 1
when (req_nodes /= 1 && req_nodes /= 2) $ do
- printf "Error: Invalid required nodes (%d)\n" req_nodes
+ hPrintf stderr "Error: Invalid required nodes (%d)\n"
+ req_nodes :: IO ()
exitWith $ ExitFailure 1
- let nm = Container.map (\n -> if elem (Node.idx n) offline_indices
+ let nm = Container.map (\n -> if Node.idx n `elem` offline_indices
then Node.setOffline n True
else n) fixed_nl
nl = Container.map (flip Node.setMdsk m_dsk . flip Node.setMcpu m_cpu)
nm
+ csf = commonSuffix fixed_nl il
when (length csf > 0 && verbose > 1) $
- printf "Note: Stripping common suffix of '%s' from names\n" csf
+ hPrintf stderr "Note: Stripping common suffix of '%s' from names\n" csf
- when (optShowNodes opts) $
+ when (isJust shownodes) $
do
- putStrLn "Initial cluster status:"
- putStrLn $ Cluster.printNodes nl
+ hPutStrLn stderr "Initial cluster status:"
+ hPutStrLn stderr $ Cluster.printNodes nl (fromJust shownodes)
let ini_cv = Cluster.compCV nl
ini_stats = Cluster.totalResources nl
- (if verbose > 2 then
- printf "Initial coefficients: overall %.8f, %s\n"
- ini_cv (Cluster.printStats nl)
- else
- printf "Initial score: %.8f\n" ini_cv)
- printf "Initial instances: %d\n" num_instances
- printStats "Initial" ini_stats
+ when (verbose > 2) $
+ hPrintf stderr "Initial coefficients: overall %.8f, %s\n"
+ ini_cv (Cluster.printStats nl)
+
+ printKeys $ map (\(a, fn) -> ("CLUSTER_" ++ a, fn ini_stats)) clusterData
+ printKeys [("CLUSTER_NODES", printf "%d" (length all_nodes))]
+ printKeys $ printStats PInitial ini_stats
let bad_nodes = fst $ Cluster.computeBadItems nl il
- when (length bad_nodes > 0) $ do
- -- This is failn1 case, so we print the same final stats and
- -- exit early
- printResults nl num_instances 0 [(FailN1, 1)]
- exitWith ExitSuccess
-
- let nmlen = Container.maxNameLen nl
- newinst = Instance.create "new" (optIMem opts) (optIDsk opts)
- (optIVCPUs opts) "ADMIN_down" (-1) (-1)
-
- let (ereason, fin_nl, ixes) = iterateDepth nl il newinst req_nodes []
- allocs = length ixes
+ stop_allocation = length bad_nodes > 0
+ result_noalloc = ([(FailN1, 1)]::FailStats, nl, il, [])
+
+ -- utility functions
+ let iofspec spx = Instance.create "new" (rspecMem spx) (rspecDsk spx)
+ (rspecCpu spx) "running" [] (-1) (-1)
+ exitifbad val = (case val of
+ Bad s -> do
+ hPrintf stderr "Failure: %s\n" s :: IO ()
+ exitWith $ ExitFailure 1
+ Ok x -> return x)
+
+
+ let reqinst = iofspec ispec
+
+ -- Run the tiered allocation, if enabled
+
+ (case optTieredSpec opts of
+ Nothing -> return ()
+ Just tspec -> do
+ (_, trl_nl, trl_il, trl_ixes) <-
+ if stop_allocation
+ then return result_noalloc
+ else exitifbad (Cluster.tieredAlloc nl il (iofspec tspec)
+ req_nodes [])
+ let fin_trl_ixes = reverse trl_ixes
+ ix_byspec = groupBy ((==) `on` Instance.specOf) fin_trl_ixes
+ spec_map = map (\ixs -> (Instance.specOf $ head ixs, length ixs))
+ ix_byspec::[(RSpec, Int)]
+ spec_map' = map (\(spec, cnt) ->
+ printf "%d,%d,%d=%d" (rspecMem spec)
+ (rspecDsk spec) (rspecCpu spec) cnt)
+ spec_map::[String]
+
+ when (verbose > 1) $ do
+ hPutStrLn stderr "Tiered allocation map"
+ hPutStr stderr . unlines . map ((:) ' ' . intercalate " ") $
+ formatTable (map (printInstance trl_nl) fin_trl_ixes)
+ [False, False, False, True, True, True]
+
+ when (isJust shownodes) $ do
+ hPutStrLn stderr ""
+ hPutStrLn stderr "Tiered allocation status:"
+ hPutStrLn stderr $ Cluster.printNodes trl_nl (fromJust shownodes)
+
+ when (isJust $ optSaveCluster opts) $
+ do
+ let out_path = (fromJust $ optSaveCluster opts) <.> "tiered"
+ adata = serializeCluster trl_nl trl_il
+ writeFile out_path adata
+ hPrintf stderr "The cluster state after tiered allocation\
+ \ has been written to file '%s'\n"
+ out_path
+ printKeys $ printStats PTiered (Cluster.totalResources trl_nl)
+ printKeys [("TSPEC", intercalate " " spec_map')]
+ printAllocationStats m_cpu nl trl_nl)
+
+ -- Run the standard (avg-mode) allocation
+
+ (ereason, fin_nl, fin_il, ixes) <-
+ if stop_allocation
+ then return result_noalloc
+ else exitifbad (Cluster.iterateAlloc nl il reqinst req_nodes [])
+
+ let allocs = length ixes
fin_ixes = reverse ixes
- ix_namelen = maximum . map (length . Instance.name) $ fin_ixes
- sreason = reverse $ sortBy (compare `on` snd) ereason
-
- printResults fin_nl num_instances allocs sreason
-
- when (verbose > 1) $
- putStr . unlines . map (\i -> printf "Inst: %*s %-*s %-*s"
- ix_namelen (Instance.name i)
- nmlen (Container.nameOf fin_nl $ Instance.pnode i)
- nmlen (let sdx = Instance.snode i
- in if sdx == Node.noSecondary then ""
- else Container.nameOf fin_nl sdx))
- $ fin_ixes
+ sreason = reverse $ sortBy (comparing snd) ereason
+
+ when (verbose > 1) $ do
+ hPutStrLn stderr "Instance map"
+ hPutStr stderr . unlines . map ((:) ' ' . intercalate " ") $
+ formatTable (map (printInstance fin_nl) fin_ixes)
+ [False, False, False, True, True, True]
+ when (isJust shownodes) $
+ do
+ hPutStrLn stderr ""
+ hPutStrLn stderr "Final cluster status:"
+ hPutStrLn stderr $ Cluster.printNodes fin_nl (fromJust shownodes)
- when (optShowNodes opts) $
+ when (isJust $ optSaveCluster opts) $
do
- putStrLn ""
- putStrLn "Final cluster status:"
- putStrLn $ Cluster.printNodes fin_nl
+ let out_path = (fromJust $ optSaveCluster opts) <.> "alloc"
+ adata = serializeCluster fin_nl fin_il
+ writeFile out_path adata
+ hPrintf stderr "The cluster state after standard allocation\
+ \ has been written to file '%s'\n"
+ out_path
+
+ printResults fin_nl num_instances allocs sreason