1 {-| Cluster space sizing
7 Copyright (C) 2009 Google Inc.
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 2 of the License, or
12 (at your option) any later version.
14 This program is distributed in the hope that it will be useful, but
15 WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
26 module Main (main) where
33 import System.Console.GetOpt
34 import qualified System
36 import Text.Printf (printf, hPrintf)
38 import qualified Ganeti.HTools.Container as Container
39 import qualified Ganeti.HTools.Cluster as Cluster
40 import qualified Ganeti.HTools.Node as Node
41 import qualified Ganeti.HTools.Instance as Instance
42 import qualified Ganeti.HTools.CLI as CLI
44 import Ganeti.HTools.Utils
45 import Ganeti.HTools.Types
47 -- | Command line options structure.
48 data Options = Options
49 { optShowNodes :: Bool -- ^ Whether to show node status
50 , optNodef :: FilePath -- ^ Path to the nodes file
51 , optNodeSet :: Bool -- ^ The nodes have been set by options
52 , optInstf :: FilePath -- ^ Path to the instances file
53 , optInstSet :: Bool -- ^ The insts have been set by options
54 , optMaster :: String -- ^ Collect data from RAPI
55 , optVerbose :: Int -- ^ Verbosity level
56 , optOffline :: [String] -- ^ Names of offline nodes
57 , optIMem :: Int -- ^ Instance memory
58 , optIDsk :: Int -- ^ Instance disk
59 , optIVCPUs :: Int -- ^ Instance VCPUs
60 , optINodes :: Int -- ^ Nodes required for an instance
61 , optMcpu :: Double -- ^ Max cpu ratio for nodes
62 , optMdsk :: Double -- ^ Max disk usage ratio for nodes
63 , optShowVer :: Bool -- ^ Just show the program version
64 , optShowHelp :: Bool -- ^ Just show the help
67 instance CLI.CLIOptions Options where
68 showVersion = optShowVer
69 showHelp = optShowHelp
71 instance CLI.EToolOptions Options where
76 masterName = optMaster
77 silent a = optVerbose a == 0
79 -- | Default values for the command line options.
80 defaultOptions :: Options
81 defaultOptions = Options
82 { optShowNodes = False
85 , optInstf = "instances"
100 -- | Options list and functions
101 options :: [OptDescr (Options -> Options)]
103 [ Option ['p'] ["print-nodes"]
104 (NoArg (\ opts -> opts { optShowNodes = True }))
105 "print the final node list"
106 , Option ['n'] ["nodes"]
107 (ReqArg (\ f opts -> opts { optNodef = f, optNodeSet = True }) "FILE")
109 , Option ['i'] ["instances"]
110 (ReqArg (\ f opts -> opts { optInstf = f, optInstSet = True }) "FILE")
111 "the instance list FILE"
112 , Option ['m'] ["master"]
113 (ReqArg (\ m opts -> opts { optMaster = m }) "ADDRESS")
114 "collect data via RAPI at the given ADDRESS"
115 , Option ['v'] ["verbose"]
116 (NoArg (\ opts -> opts { optVerbose = optVerbose opts + 1 }))
117 "increase the verbosity level"
118 , Option ['q'] ["quiet"]
119 (NoArg (\ opts -> opts { optVerbose = optVerbose opts - 1 }))
120 "decrease the verbosity level"
121 , Option ['O'] ["offline"]
122 (ReqArg (\ n opts -> opts { optOffline = n:optOffline opts }) "NODE")
123 "set node as offline"
124 , Option [] ["memory"]
125 (ReqArg (\ m opts -> opts { optIMem = read m }) "MEMORY")
126 "memory size for instances"
128 (ReqArg (\ d opts -> opts { optIDsk = read d }) "DISK")
129 "disk size for instances"
130 , Option [] ["vcpus"]
131 (ReqArg (\ p opts -> opts { optIVCPUs = read p }) "NUM")
132 "number of virtual cpus for instances"
133 , Option [] ["req-nodes"]
134 (ReqArg (\ n opts -> opts { optINodes = read n }) "NODES")
135 "number of nodes for the new instances (1=plain, 2=mirrored)"
136 , Option [] ["max-cpu"]
137 (ReqArg (\ n opts -> opts { optMcpu = read n }) "RATIO")
138 "maximum virtual-to-physical cpu ratio for nodes"
139 , Option [] ["min-disk"]
140 (ReqArg (\ n opts -> opts { optMdsk = read n }) "RATIO")
141 "minimum free disk space for nodes (between 0 and 1)"
142 , Option ['V'] ["version"]
143 (NoArg (\ opts -> opts { optShowVer = True}))
144 "show the version of the program"
145 , Option ['h'] ["help"]
146 (NoArg (\ opts -> opts { optShowHelp = True}))
150 -- | Build failure stats out of a list of failure reasons
151 concatFailure :: [(FailMode, Int)] -> FailMode -> [(FailMode, Int)]
152 concatFailure flst reason =
153 let cval = lookup reason flst
155 Nothing -> (reason, 1):flst
156 Just val -> let plain = filter (\(x, _) -> x /= reason) flst
157 in (reason, val+1):plain
159 -- | Build list of failures and placements out of an list of possible
161 filterFails :: Cluster.AllocSolution
162 -> ([(FailMode, Int)],
163 [(Node.List, Instance.Instance, [Node.Node])])
165 let (alst, blst) = unzip . map (\ (onl, i, nn) ->
167 OpFail reason -> ([reason], [])
168 OpGood gnl -> ([], [(gnl, i, nn)])
172 in (foldl' concatFailure [] aval, bval)
174 -- | Get the placement with best score out of a list of possible placements
175 processResults :: [(Node.List, Instance.Instance, [Node.Node])]
176 -> (Node.List, Instance.Instance, [Node.Node])
177 processResults sols =
178 let sols' = map (\e@(nl', _, _) -> (Cluster.compCV nl', e)) sols
179 sols'' = sortBy (compare `on` fst) sols'
182 -- | Recursively place instances on the cluster until we're out of space
183 iterateDepth :: Node.List
187 -> [Instance.Instance]
188 -> ([(FailMode, Int)], Node.List, [Instance.Instance])
189 iterateDepth nl il newinst nreq ixes =
190 let depth = length ixes
191 newname = printf "new-%d" depth::String
192 newidx = length (Container.elems il) + depth
193 newi2 = Instance.setIdx (Instance.setName newinst newname) newidx
194 sols = Cluster.tryAlloc nl il newi2 nreq::
195 OpResult Cluster.AllocSolution
197 OpFail _ -> ([], nl, ixes)
199 let (errs, sols3) = filterFails sols'
201 then (errs, nl, ixes)
202 else let (xnl, xi, _) = processResults sols3
203 in iterateDepth xnl il newinst nreq (xi:ixes)
205 -- | Function to print stats for a given phase
206 printStats :: String -> Cluster.CStats -> IO ()
207 printStats kind cs = do
208 printf "%s free RAM: %d\n" kind (Cluster.cs_fmem cs)
209 printf "%s allocatable RAM: %d\n" kind (Cluster.cs_amem cs)
210 printf "%s reserved RAM: %d\n" kind (Cluster.cs_fmem cs -
212 printf "%s instance RAM: %d\n" kind (Cluster.cs_imem cs)
213 printf "%s overhead RAM: %d\n" kind (Cluster.cs_xmem cs + Cluster.cs_nmem cs)
214 printf "%s RAM usage efficiency: %.8f\n"
215 kind (fromIntegral (Cluster.cs_imem cs) / Cluster.cs_tmem cs)
216 printf "%s free disk: %d\n" kind (Cluster.cs_fdsk cs)
217 printf "%s allocatable disk: %d\n" kind (Cluster.cs_adsk cs)
218 printf "%s reserved disk: %d\n" kind (Cluster.cs_fdsk cs -
220 printf "%s instance disk: %d\n" kind (Cluster.cs_idsk cs)
221 printf "%s disk usage efficiency: %.8f\n"
222 kind (fromIntegral (Cluster.cs_idsk cs) / Cluster.cs_tdsk cs)
223 printf "%s instance cpus: %d\n" kind (Cluster.cs_icpu cs)
224 printf "%s cpu usage efficiency: %.8f\n"
225 kind (fromIntegral (Cluster.cs_icpu cs) / Cluster.cs_tcpu cs)
226 printf "%s max node allocatable RAM: %d\n" kind (Cluster.cs_mmem cs)
227 printf "%s max node allocatable disk: %d\n" kind (Cluster.cs_mdsk cs)
229 -- | Print final stats and related metrics
230 printResults :: Node.List -> Int -> Int -> [(FailMode, Int)] -> IO ()
231 printResults fin_nl num_instances allocs sreason = do
232 let fin_stats = Cluster.totalResources fin_nl
233 fin_instances = num_instances + allocs
235 printf "Final score: %.8f\n" (Cluster.compCV fin_nl)
236 printf "Final instances: %d\n" (num_instances + allocs)
237 printStats "Final" fin_stats
238 printf "Usage: %.5f\n" ((fromIntegral num_instances::Double) /
239 fromIntegral fin_instances)
240 printf "Allocations: %d\n" allocs
241 putStr (unlines . map (\(x, y) -> printf "%s: %d" (show x) y) $ sreason)
242 printf "Most likely fail reason: %s\n" (show . fst . head $ sreason)
247 cmd_args <- System.getArgs
248 (opts, args) <- CLI.parseOpts cmd_args "hspace" options defaultOptions
250 unless (null args) $ do
251 hPutStrLn stderr "Error: this program doesn't take any arguments."
252 exitWith $ ExitFailure 1
254 let verbose = optVerbose opts
256 (fixed_nl, il, csf) <- CLI.loadExternalData opts
258 printf "Spec RAM: %d\n" (optIMem opts)
259 printf "Spec disk: %d\n" (optIDsk opts)
260 printf "Spec CPUs: %d\n" (optIVCPUs opts)
261 printf "Spec nodes: %d\n" (optINodes opts)
263 let num_instances = length $ Container.elems il
265 let offline_names = optOffline opts
266 all_nodes = Container.elems fixed_nl
267 all_names = map Node.name all_nodes
268 offline_wrong = filter (flip notElem all_names) offline_names
269 offline_indices = map Node.idx $
270 filter (\n -> elem (Node.name n) offline_names)
272 req_nodes = optINodes opts
276 when (length offline_wrong > 0) $ do
277 hPrintf stderr "Error: Wrong node name(s) set as offline: %s\n"
278 (commaJoin offline_wrong)
279 exitWith $ ExitFailure 1
281 when (req_nodes /= 1 && req_nodes /= 2) $ do
282 hPrintf stderr "Error: Invalid required nodes (%d)\n" req_nodes
283 exitWith $ ExitFailure 1
285 let nm = Container.map (\n -> if elem (Node.idx n) offline_indices
286 then Node.setOffline n True
288 nl = Container.map (flip Node.setMdsk m_dsk . flip Node.setMcpu m_cpu)
291 when (length csf > 0 && verbose > 1) $
292 printf "Note: Stripping common suffix of '%s' from names\n" csf
294 when (optShowNodes opts) $
296 putStrLn "Initial cluster status:"
297 putStrLn $ Cluster.printNodes nl
299 let ini_cv = Cluster.compCV nl
300 ini_stats = Cluster.totalResources nl
303 printf "Initial coefficients: overall %.8f, %s\n"
304 ini_cv (Cluster.printStats nl)
306 printf "Initial score: %.8f\n" ini_cv)
307 printf "Cluster RAM: %.0f\n" (Cluster.cs_tmem ini_stats)
308 printf "Cluster disk: %.0f\n" (Cluster.cs_tdsk ini_stats)
309 printf "Cluster cpus: %.0f\n" (Cluster.cs_tcpu ini_stats)
310 printf "Initial instances: %d\n" num_instances
311 printStats "Initial" ini_stats
313 let bad_nodes = fst $ Cluster.computeBadItems nl il
314 when (length bad_nodes > 0) $ do
315 -- This is failn1 case, so we print the same final stats and
317 printResults nl num_instances 0 [(FailN1, 1)]
320 let nmlen = Container.maxNameLen nl
321 newinst = Instance.create "new" (optIMem opts) (optIDsk opts)
322 (optIVCPUs opts) "ADMIN_down" (-1) (-1)
324 let (ereason, fin_nl, ixes) = iterateDepth nl il newinst req_nodes []
326 fin_ixes = reverse ixes
327 ix_namelen = maximum . map (length . Instance.name) $ fin_ixes
328 sreason = reverse $ sortBy (compare `on` snd) ereason
330 printResults fin_nl num_instances allocs sreason
333 putStr . unlines . map (\i -> printf "Inst: %*s %-*s %-*s"
334 ix_namelen (Instance.name i)
335 nmlen (Container.nameOf fin_nl $ Instance.pnode i)
336 nmlen (let sdx = Instance.snode i
337 in if sdx == Node.noSecondary then ""
338 else Container.nameOf fin_nl sdx))
341 when (optShowNodes opts) $
344 putStrLn "Final cluster status:"
345 putStrLn $ Cluster.printNodes fin_nl