1 {-| Cluster space sizing
7 Copyright (C) 2009 Google Inc.
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 2 of the License, or
12 (at your option) any later version.
14 This program is distributed in the hope that it will be useful, but
15 WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
26 module Main (main) where
33 import System.Console.GetOpt
34 import qualified System
36 import Text.Printf (printf)
38 import qualified Ganeti.HTools.Container as Container
39 import qualified Ganeti.HTools.Cluster as Cluster
40 import qualified Ganeti.HTools.Node as Node
41 import qualified Ganeti.HTools.Instance as Instance
42 import qualified Ganeti.HTools.CLI as CLI
44 import Ganeti.HTools.Utils
45 import Ganeti.HTools.Types
47 -- | Command line options structure.
48 data Options = Options
49 { optShowNodes :: Bool -- ^ Whether to show node status
50 , optNodef :: FilePath -- ^ Path to the nodes file
51 , optNodeSet :: Bool -- ^ The nodes have been set by options
52 , optInstf :: FilePath -- ^ Path to the instances file
53 , optInstSet :: Bool -- ^ The insts have been set by options
54 , optMaster :: String -- ^ Collect data from RAPI
55 , optVerbose :: Int -- ^ Verbosity level
56 , optOffline :: [String] -- ^ Names of offline nodes
57 , optIMem :: Int -- ^ Instance memory
58 , optIDsk :: Int -- ^ Instance disk
59 , optIVCPUs :: Int -- ^ Instance VCPUs
60 , optINodes :: Int -- ^ Nodes required for an instance
61 , optMcpu :: Double -- ^ Max cpu ratio for nodes
62 , optMdsk :: Double -- ^ Max disk usage ratio for nodes
63 , optShowVer :: Bool -- ^ Just show the program version
64 , optShowHelp :: Bool -- ^ Just show the help
67 instance CLI.CLIOptions Options where
68 showVersion = optShowVer
69 showHelp = optShowHelp
71 instance CLI.EToolOptions Options where
76 masterName = optMaster
77 silent a = optVerbose a == 0
79 -- | Default values for the command line options.
80 defaultOptions :: Options
81 defaultOptions = Options
82 { optShowNodes = False
85 , optInstf = "instances"
100 -- | Options list and functions
101 options :: [OptDescr (Options -> Options)]
103 [ Option ['p'] ["print-nodes"]
104 (NoArg (\ opts -> opts { optShowNodes = True }))
105 "print the final node list"
106 , Option ['n'] ["nodes"]
107 (ReqArg (\ f opts -> opts { optNodef = f, optNodeSet = True }) "FILE")
109 , Option ['i'] ["instances"]
110 (ReqArg (\ f opts -> opts { optInstf = f, optInstSet = True }) "FILE")
111 "the instance list FILE"
112 , Option ['m'] ["master"]
113 (ReqArg (\ m opts -> opts { optMaster = m }) "ADDRESS")
114 "collect data via RAPI at the given ADDRESS"
115 , Option ['v'] ["verbose"]
116 (NoArg (\ opts -> opts { optVerbose = optVerbose opts + 1 }))
117 "increase the verbosity level"
118 , Option ['q'] ["quiet"]
119 (NoArg (\ opts -> opts { optVerbose = optVerbose opts - 1 }))
120 "decrease the verbosity level"
121 , Option ['O'] ["offline"]
122 (ReqArg (\ n opts -> opts { optOffline = n:optOffline opts }) "NODE")
123 "set node as offline"
124 , Option [] ["memory"]
125 (ReqArg (\ m opts -> opts { optIMem = read m }) "MEMORY")
126 "memory size for instances"
128 (ReqArg (\ d opts -> opts { optIDsk = read d }) "DISK")
129 "disk size for instances"
130 , Option [] ["vcpus"]
131 (ReqArg (\ p opts -> opts { optIVCPUs = read p }) "NUM")
132 "number of virtual cpus for instances"
133 , Option [] ["req-nodes"]
134 (ReqArg (\ n opts -> opts { optINodes = read n }) "NODES")
135 "number of nodes for the new instances (1=plain, 2=mirrored)"
136 , Option [] ["max-cpu"]
137 (ReqArg (\ n opts -> opts { optMcpu = read n }) "RATIO")
138 "maximum virtual-to-physical cpu ratio for nodes"
139 , Option [] ["min-disk"]
140 (ReqArg (\ n opts -> opts { optMdsk = read n }) "RATIO")
141 "minimum free disk space for nodes (between 0 and 1)"
142 , Option ['V'] ["version"]
143 (NoArg (\ opts -> opts { optShowVer = True}))
144 "show the version of the program"
145 , Option ['h'] ["help"]
146 (NoArg (\ opts -> opts { optShowHelp = True}))
150 -- | Build failure stats out of a list of failure reasons
151 concatFailure :: [(FailMode, Int)] -> FailMode -> [(FailMode, Int)]
152 concatFailure flst reason =
153 let cval = lookup reason flst
155 Nothing -> (reason, 1):flst
156 Just val -> let plain = filter (\(x, _) -> x /= reason) flst
157 in (reason, val+1):plain
159 -- | Build list of failures and placements out of an list of possible
161 filterFails :: Cluster.AllocSolution
162 -> ([(FailMode, Int)],
163 [(Node.List, Instance.Instance, [Node.Node])])
165 let (alst, blst) = unzip . map (\ (onl, i, nn) ->
167 OpFail reason -> ([reason], [])
168 OpGood gnl -> ([], [(gnl, i, nn)])
172 in (foldl' concatFailure [] aval, bval)
174 -- | Get the placement with best score out of a list of possible placements
175 processResults :: [(Node.List, Instance.Instance, [Node.Node])]
176 -> (Node.List, Instance.Instance, [Node.Node])
177 processResults sols =
178 let sols' = map (\e@(nl', _, _) -> (Cluster.compCV nl', e)) sols
179 sols'' = sortBy (compare `on` fst) sols'
182 -- | Recursively place instances on the cluster until we're out of space
183 iterateDepth :: Node.List
187 -> [Instance.Instance]
188 -> ([(FailMode, Int)], Node.List, [Instance.Instance])
189 iterateDepth nl il newinst nreq ixes =
190 let depth = length ixes
191 newname = printf "new-%d" depth::String
192 newidx = length (Container.elems il) + depth
193 newi2 = Instance.setIdx (Instance.setName newinst newname) newidx
194 sols = Cluster.tryAlloc nl il newi2 nreq::
195 OpResult Cluster.AllocSolution
197 OpFail _ -> ([], nl, ixes)
199 let (errs, sols3) = filterFails sols'
201 then (errs, nl, ixes)
202 else let (xnl, xi, _) = processResults sols3
203 in iterateDepth xnl il newinst nreq (xi:ixes)
205 -- | Function to print stats for a given phase
206 printStats :: String -> Cluster.CStats -> IO ()
207 printStats kind cs = do
208 printf "%s free RAM: %d\n" kind (Cluster.cs_fmem cs)
209 printf "%s allocatable RAM: %d\n" kind (Cluster.cs_amem cs)
210 printf "%s reserved RAM: %d\n" kind (Cluster.cs_fmem cs -
212 printf "%s free disk: %d\n" kind (Cluster.cs_fdsk cs)
213 printf "%s allocatable disk: %d\n" kind (Cluster.cs_adsk cs)
214 printf "%s reserved disk: %d\n" kind (Cluster.cs_fdsk cs -
216 printf "%s max node allocatable RAM: %d\n" kind (Cluster.cs_mmem cs)
217 printf "%s max node allocatable disk: %d\n" kind (Cluster.cs_mdsk cs)
222 cmd_args <- System.getArgs
223 (opts, args) <- CLI.parseOpts cmd_args "hspace" options defaultOptions
225 unless (null args) $ do
226 hPutStrLn stderr "Error: this program doesn't take any arguments."
227 exitWith $ ExitFailure 1
229 printf "Spec RAM: %d\n" (optIMem opts)
230 printf "Spec disk: %d\n" (optIDsk opts)
231 printf "Spec CPUs: %d\n" (optIVCPUs opts)
232 printf "Spec nodes: %d\n" (optINodes opts)
234 let verbose = optVerbose opts
236 (fixed_nl, il, csf) <- CLI.loadExternalData opts
237 let num_instances = length $ Container.elems il
239 let offline_names = optOffline opts
240 all_nodes = Container.elems fixed_nl
241 all_names = map Node.name all_nodes
242 offline_wrong = filter (flip notElem all_names) offline_names
243 offline_indices = map Node.idx $
244 filter (\n -> elem (Node.name n) offline_names)
246 req_nodes = optINodes opts
250 when (length offline_wrong > 0) $ do
251 printf "Error: Wrong node name(s) set as offline: %s\n"
252 (commaJoin offline_wrong)
253 exitWith $ ExitFailure 1
255 when (req_nodes /= 1 && req_nodes /= 2) $ do
256 printf "Error: Invalid required nodes (%d)\n" req_nodes
257 exitWith $ ExitFailure 1
259 let nm = Container.map (\n -> if elem (Node.idx n) offline_indices
260 then Node.setOffline n True
262 nl = Container.map (flip Node.setMdsk m_dsk . flip Node.setMcpu m_cpu)
265 when (length csf > 0 && verbose > 1) $
266 printf "Note: Stripping common suffix of '%s' from names\n" csf
268 let bad_nodes = fst $ Cluster.computeBadItems nl il
269 when (length bad_nodes > 0) $ do
270 putStrLn "Error: Cluster not N+1, no space to allocate."
271 exitWith $ ExitFailure 1
273 when (optShowNodes opts) $
275 putStrLn "Initial cluster status:"
276 putStrLn $ Cluster.printNodes nl
278 let ini_cv = Cluster.compCV nl
279 ini_stats = Cluster.totalResources nl
282 printf "Initial coefficients: overall %.8f, %s\n"
283 ini_cv (Cluster.printStats nl)
285 printf "Initial score: %.8f\n" ini_cv)
286 printf "Initial instances: %d\n" num_instances
287 printStats "Initial" ini_stats
289 let nmlen = Container.maxNameLen nl
290 newinst = Instance.create "new" (optIMem opts) (optIDsk opts)
291 (optIVCPUs opts) "ADMIN_down" (-1) (-1)
293 let (ereason, fin_nl, ixes) = iterateDepth nl il newinst req_nodes []
295 fin_instances = num_instances + allocs
296 fin_ixes = reverse ixes
297 ix_namelen = maximum . map (length . Instance.name) $ fin_ixes
298 fin_stats = Cluster.totalResources fin_nl
299 sreason = reverse $ sortBy (compare `on` snd) ereason
301 printf "Final score: %.8f\n" (Cluster.compCV fin_nl)
302 printf "Final instances: %d\n" (num_instances + allocs)
303 printStats "Final" fin_stats
304 printf "Usage: %.5f\n" ((fromIntegral num_instances::Double) /
305 fromIntegral fin_instances)
306 printf "Allocations: %d\n" allocs
307 putStr (unlines . map (\(x, y) -> printf "%s: %d" (show x) y) $ sreason)
308 printf "Most likely fail reason: %s\n" (show . fst . head $ sreason)
311 putStr . unlines . map (\i -> printf "Inst: %*s %-*s %-*s"
312 ix_namelen (Instance.name i)
313 nmlen (Container.nameOf fin_nl $ Instance.pnode i)
314 nmlen (let sdx = Instance.snode i
315 in if sdx == Node.noSecondary then ""
316 else Container.nameOf fin_nl sdx))
319 when (optShowNodes opts) $
322 putStrLn "Final cluster status:"
323 putStrLn $ Cluster.printNodes fin_nl