1 {-| Cluster space sizing
7 Copyright (C) 2009 Google Inc.
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 2 of the License, or
12 (at your option) any later version.
14 This program is distributed in the hope that it will be useful, but
15 WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
26 module Main (main) where
33 import System.Console.GetOpt
34 import qualified System
36 import Text.Printf (printf, hPrintf)
38 import qualified Ganeti.HTools.Container as Container
39 import qualified Ganeti.HTools.Cluster as Cluster
40 import qualified Ganeti.HTools.Node as Node
41 import qualified Ganeti.HTools.Instance as Instance
42 import qualified Ganeti.HTools.CLI as CLI
44 import Ganeti.HTools.Utils
45 import Ganeti.HTools.Types
47 -- | Command line options structure.
48 data Options = Options
49 { optShowNodes :: Bool -- ^ Whether to show node status
50 , optNodef :: FilePath -- ^ Path to the nodes file
51 , optNodeSet :: Bool -- ^ The nodes have been set by options
52 , optInstf :: FilePath -- ^ Path to the instances file
53 , optInstSet :: Bool -- ^ The insts have been set by options
54 , optMaster :: String -- ^ Collect data from RAPI
55 , optVerbose :: Int -- ^ Verbosity level
56 , optOffline :: [String] -- ^ Names of offline nodes
57 , optIMem :: Int -- ^ Instance memory
58 , optIDsk :: Int -- ^ Instance disk
59 , optIVCPUs :: Int -- ^ Instance VCPUs
60 , optINodes :: Int -- ^ Nodes required for an instance
61 , optMcpu :: Double -- ^ Max cpu ratio for nodes
62 , optMdsk :: Double -- ^ Max disk usage ratio for nodes
63 , optShowVer :: Bool -- ^ Just show the program version
64 , optShowHelp :: Bool -- ^ Just show the help
67 instance CLI.CLIOptions Options where
68 showVersion = optShowVer
69 showHelp = optShowHelp
71 instance CLI.EToolOptions Options where
76 masterName = optMaster
77 silent a = optVerbose a == 0
79 -- | Default values for the command line options.
80 defaultOptions :: Options
81 defaultOptions = Options
82 { optShowNodes = False
85 , optInstf = "instances"
100 -- | Options list and functions
101 options :: [OptDescr (Options -> Options)]
103 [ Option ['p'] ["print-nodes"]
104 (NoArg (\ opts -> opts { optShowNodes = True }))
105 "print the final node list"
106 , Option ['n'] ["nodes"]
107 (ReqArg (\ f opts -> opts { optNodef = f, optNodeSet = True }) "FILE")
109 , Option ['i'] ["instances"]
110 (ReqArg (\ f opts -> opts { optInstf = f, optInstSet = True }) "FILE")
111 "the instance list FILE"
112 , Option ['m'] ["master"]
113 (ReqArg (\ m opts -> opts { optMaster = m }) "ADDRESS")
114 "collect data via RAPI at the given ADDRESS"
115 , Option ['v'] ["verbose"]
116 (NoArg (\ opts -> opts { optVerbose = optVerbose opts + 1 }))
117 "increase the verbosity level"
118 , Option ['q'] ["quiet"]
119 (NoArg (\ opts -> opts { optVerbose = optVerbose opts - 1 }))
120 "decrease the verbosity level"
121 , Option ['O'] ["offline"]
122 (ReqArg (\ n opts -> opts { optOffline = n:optOffline opts }) "NODE")
123 "set node as offline"
124 , Option [] ["memory"]
125 (ReqArg (\ m opts -> opts { optIMem = read m }) "MEMORY")
126 "memory size for instances"
128 (ReqArg (\ d opts -> opts { optIDsk = read d }) "DISK")
129 "disk size for instances"
130 , Option [] ["vcpus"]
131 (ReqArg (\ p opts -> opts { optIVCPUs = read p }) "NUM")
132 "number of virtual cpus for instances"
133 , Option [] ["req-nodes"]
134 (ReqArg (\ n opts -> opts { optINodes = read n }) "NODES")
135 "number of nodes for the new instances (1=plain, 2=mirrored)"
136 , Option [] ["max-cpu"]
137 (ReqArg (\ n opts -> opts { optMcpu = read n }) "RATIO")
138 "maximum virtual-to-physical cpu ratio for nodes"
139 , Option [] ["min-disk"]
140 (ReqArg (\ n opts -> opts { optMdsk = read n }) "RATIO")
141 "minimum free disk space for nodes (between 0 and 1)"
142 , Option ['V'] ["version"]
143 (NoArg (\ opts -> opts { optShowVer = True}))
144 "show the version of the program"
145 , Option ['h'] ["help"]
146 (NoArg (\ opts -> opts { optShowHelp = True}))
150 -- | Build failure stats out of a list of failure reasons
151 concatFailure :: [(FailMode, Int)] -> FailMode -> [(FailMode, Int)]
152 concatFailure flst reason =
153 let cval = lookup reason flst
155 Nothing -> (reason, 1):flst
156 Just val -> let plain = filter (\(x, _) -> x /= reason) flst
157 in (reason, val+1):plain
159 -- | Build list of failures and placements out of an list of possible
161 filterFails :: Cluster.AllocSolution
162 -> ([(FailMode, Int)],
163 [(Node.List, Instance.Instance, [Node.Node])])
165 let (alst, blst) = unzip . map (\ (onl, i, nn) ->
167 OpFail reason -> ([reason], [])
168 OpGood gnl -> ([], [(gnl, i, nn)])
172 in (foldl' concatFailure [] aval, bval)
174 -- | Get the placement with best score out of a list of possible placements
175 processResults :: [(Node.List, Instance.Instance, [Node.Node])]
176 -> (Node.List, Instance.Instance, [Node.Node])
177 processResults sols =
178 let sols' = map (\e@(nl', _, _) -> (Cluster.compCV nl', e)) sols
179 sols'' = sortBy (compare `on` fst) sols'
182 -- | Recursively place instances on the cluster until we're out of space
183 iterateDepth :: Node.List
187 -> [Instance.Instance]
188 -> ([(FailMode, Int)], Node.List, [Instance.Instance])
189 iterateDepth nl il newinst nreq ixes =
190 let depth = length ixes
191 newname = printf "new-%d" depth::String
192 newidx = length (Container.elems il) + depth
193 newi2 = Instance.setIdx (Instance.setName newinst newname) newidx
194 sols = Cluster.tryAlloc nl il newi2 nreq::
195 OpResult Cluster.AllocSolution
197 OpFail _ -> ([], nl, ixes)
199 let (errs, sols3) = filterFails sols'
201 then (errs, nl, ixes)
202 else let (xnl, xi, _) = processResults sols3
203 in iterateDepth xnl il newinst nreq (xi:ixes)
205 -- | Function to print stats for a given phase
206 printStats :: String -> Cluster.CStats -> IO ()
207 printStats kind cs = do
208 printf "%s score: %.8f\n" kind (Cluster.cs_score cs)
209 printf "%s instances: %d\n" kind (Cluster.cs_ninst cs)
210 printf "%s free RAM: %d\n" kind (Cluster.cs_fmem cs)
211 printf "%s allocatable RAM: %d\n" kind (Cluster.cs_amem cs)
212 printf "%s reserved RAM: %d\n" kind (Cluster.cs_fmem cs -
214 printf "%s instance RAM: %d\n" kind (Cluster.cs_imem cs)
215 printf "%s overhead RAM: %d\n" kind (Cluster.cs_xmem cs + Cluster.cs_nmem cs)
216 printf "%s RAM usage efficiency: %.8f\n"
217 kind (fromIntegral (Cluster.cs_imem cs) / Cluster.cs_tmem cs)
218 printf "%s free disk: %d\n" kind (Cluster.cs_fdsk cs)
219 printf "%s allocatable disk: %d\n" kind (Cluster.cs_adsk cs)
220 printf "%s reserved disk: %d\n" kind (Cluster.cs_fdsk cs -
222 printf "%s instance disk: %d\n" kind (Cluster.cs_idsk cs)
223 printf "%s disk usage efficiency: %.8f\n"
224 kind (fromIntegral (Cluster.cs_idsk cs) / Cluster.cs_tdsk cs)
225 printf "%s instance cpus: %d\n" kind (Cluster.cs_icpu cs)
226 printf "%s cpu usage efficiency: %.8f\n"
227 kind (fromIntegral (Cluster.cs_icpu cs) / Cluster.cs_tcpu cs)
228 printf "%s max node allocatable RAM: %d\n" kind (Cluster.cs_mmem cs)
229 printf "%s max node allocatable disk: %d\n" kind (Cluster.cs_mdsk cs)
231 -- | Print final stats and related metrics
232 printResults :: Node.List -> Int -> Int -> [(FailMode, Int)] -> IO ()
233 printResults fin_nl num_instances allocs sreason = do
234 let fin_stats = Cluster.totalResources fin_nl
235 fin_instances = num_instances + allocs
237 when (num_instances + allocs /= Cluster.cs_ninst fin_stats) $
239 hPrintf stderr "ERROR: internal inconsistency, allocated (%d)\
240 \ != counted (%d)\n" (num_instances + allocs)
241 (Cluster.cs_ninst fin_stats)
242 exitWith $ ExitFailure 1
244 printStats "Final" fin_stats
245 printf "Usage: %.5f\n" ((fromIntegral num_instances::Double) /
246 fromIntegral fin_instances)
247 printf "Allocations: %d\n" allocs
248 putStr (unlines . map (\(x, y) -> printf "%s: %d" (show x) y) $ sreason)
249 printf "Most likely fail reason: %s\n" (show . fst . head $ sreason)
254 cmd_args <- System.getArgs
255 (opts, args) <- CLI.parseOpts cmd_args "hspace" options defaultOptions
257 unless (null args) $ do
258 hPutStrLn stderr "Error: this program doesn't take any arguments."
259 exitWith $ ExitFailure 1
261 let verbose = optVerbose opts
263 (fixed_nl, il, csf) <- CLI.loadExternalData opts
265 printf "Spec RAM: %d\n" (optIMem opts)
266 printf "Spec disk: %d\n" (optIDsk opts)
267 printf "Spec CPUs: %d\n" (optIVCPUs opts)
268 printf "Spec nodes: %d\n" (optINodes opts)
270 let num_instances = length $ Container.elems il
272 let offline_names = optOffline opts
273 all_nodes = Container.elems fixed_nl
274 all_names = map Node.name all_nodes
275 offline_wrong = filter (flip notElem all_names) offline_names
276 offline_indices = map Node.idx $
277 filter (\n -> elem (Node.name n) offline_names)
279 req_nodes = optINodes opts
283 when (length offline_wrong > 0) $ do
284 hPrintf stderr "Error: Wrong node name(s) set as offline: %s\n"
285 (commaJoin offline_wrong)
286 exitWith $ ExitFailure 1
288 when (req_nodes /= 1 && req_nodes /= 2) $ do
289 hPrintf stderr "Error: Invalid required nodes (%d)\n" req_nodes
290 exitWith $ ExitFailure 1
292 let nm = Container.map (\n -> if elem (Node.idx n) offline_indices
293 then Node.setOffline n True
295 nl = Container.map (flip Node.setMdsk m_dsk . flip Node.setMcpu m_cpu)
298 when (length csf > 0 && verbose > 1) $
299 printf "Note: Stripping common suffix of '%s' from names\n" csf
301 when (optShowNodes opts) $
303 putStrLn "Initial cluster status:"
304 putStrLn $ Cluster.printNodes nl
306 let ini_cv = Cluster.compCV nl
307 ini_stats = Cluster.totalResources nl
309 when (verbose > 2) $ do
310 printf "Initial coefficients: overall %.8f, %s\n"
311 ini_cv (Cluster.printStats nl)
313 printf "Cluster RAM: %.0f\n" (Cluster.cs_tmem ini_stats)
314 printf "Cluster disk: %.0f\n" (Cluster.cs_tdsk ini_stats)
315 printf "Cluster cpus: %.0f\n" (Cluster.cs_tcpu ini_stats)
316 printStats "Initial" ini_stats
318 let bad_nodes = fst $ Cluster.computeBadItems nl il
319 when (length bad_nodes > 0) $ do
320 -- This is failn1 case, so we print the same final stats and
322 printResults nl num_instances 0 [(FailN1, 1)]
325 let nmlen = Container.maxNameLen nl
326 newinst = Instance.create "new" (optIMem opts) (optIDsk opts)
327 (optIVCPUs opts) "ADMIN_down" (-1) (-1)
329 let (ereason, fin_nl, ixes) = iterateDepth nl il newinst req_nodes []
331 fin_ixes = reverse ixes
332 ix_namelen = maximum . map (length . Instance.name) $ fin_ixes
333 sreason = reverse $ sortBy (compare `on` snd) ereason
335 printResults fin_nl num_instances allocs sreason
338 putStr . unlines . map (\i -> printf "Inst: %*s %-*s %-*s"
339 ix_namelen (Instance.name i)
340 nmlen (Container.nameOf fin_nl $ Instance.pnode i)
341 nmlen (let sdx = Instance.snode i
342 in if sdx == Node.noSecondary then ""
343 else Container.nameOf fin_nl sdx))
346 when (optShowNodes opts) $
349 putStrLn "Final cluster status:"
350 putStrLn $ Cluster.printNodes fin_nl