1 {-| Cluster space sizing
7 Copyright (C) 2009 Google Inc.
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 2 of the License, or
12 (at your option) any later version.
14 This program is distributed in the hope that it will be useful, but
15 WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
26 module Main (main) where
28 import Data.Char (toUpper)
31 import Data.Maybe (fromMaybe)
35 import System.Console.GetOpt
36 import qualified System
38 import Text.Printf (printf, hPrintf)
40 import qualified Ganeti.HTools.Container as Container
41 import qualified Ganeti.HTools.Cluster as Cluster
42 import qualified Ganeti.HTools.Node as Node
43 import qualified Ganeti.HTools.Instance as Instance
44 import qualified Ganeti.HTools.CLI as CLI
46 import Ganeti.HTools.Utils
47 import Ganeti.HTools.Types
49 -- | Command line options structure.
50 data Options = Options
51 { optShowNodes :: Bool -- ^ Whether to show node status
52 , optNodef :: FilePath -- ^ Path to the nodes file
53 , optNodeSet :: Bool -- ^ The nodes have been set by options
54 , optInstf :: FilePath -- ^ Path to the instances file
55 , optInstSet :: Bool -- ^ The insts have been set by options
56 , optMaster :: String -- ^ Collect data from RAPI
57 , optLuxi :: Maybe FilePath -- ^ Collect data from Luxi
58 , optVerbose :: Int -- ^ Verbosity level
59 , optOffline :: [String] -- ^ Names of offline nodes
60 , optIMem :: Int -- ^ Instance memory
61 , optIDsk :: Int -- ^ Instance disk
62 , optIVCPUs :: Int -- ^ Instance VCPUs
63 , optINodes :: Int -- ^ Nodes required for an instance
64 , optMcpu :: Double -- ^ Max cpu ratio for nodes
65 , optMdsk :: Double -- ^ Max disk usage ratio for nodes
66 , optShowVer :: Bool -- ^ Just show the program version
67 , optShowHelp :: Bool -- ^ Just show the help
70 instance CLI.CLIOptions Options where
71 showVersion = optShowVer
72 showHelp = optShowHelp
74 instance CLI.EToolOptions Options where
79 masterName = optMaster
81 silent a = optVerbose a == 0
83 -- | Default values for the command line options.
84 defaultOptions :: Options
85 defaultOptions = Options
86 { optShowNodes = False
89 , optInstf = "instances"
102 , optShowHelp = False
105 -- | Options list and functions
106 options :: [OptDescr (Options -> Options)]
108 [ Option ['p'] ["print-nodes"]
109 (NoArg (\ opts -> opts { optShowNodes = True }))
110 "print the final node list"
111 , Option ['n'] ["nodes"]
112 (ReqArg (\ f opts -> opts { optNodef = f, optNodeSet = True }) "FILE")
114 , Option ['i'] ["instances"]
115 (ReqArg (\ f opts -> opts { optInstf = f, optInstSet = True }) "FILE")
116 "the instance list FILE"
117 , Option ['m'] ["master"]
118 (ReqArg (\ m opts -> opts { optMaster = m }) "ADDRESS")
119 "collect data via RAPI at the given ADDRESS"
120 , Option ['L'] ["luxi"]
121 (OptArg ((\ f opts -> opts { optLuxi = Just f }) .
122 fromMaybe CLI.defaultLuxiSocket) "SOCKET")
123 "collect data via Luxi, optionally using the given SOCKET path"
124 , Option ['v'] ["verbose"]
125 (NoArg (\ opts -> opts { optVerbose = optVerbose opts + 1 }))
126 "increase the verbosity level"
127 , Option ['q'] ["quiet"]
128 (NoArg (\ opts -> opts { optVerbose = optVerbose opts - 1 }))
129 "decrease the verbosity level"
130 , Option ['O'] ["offline"]
131 (ReqArg (\ n opts -> opts { optOffline = n:optOffline opts }) "NODE")
132 "set node as offline"
133 , Option [] ["memory"]
134 (ReqArg (\ m opts -> opts { optIMem = read m }) "MEMORY")
135 "memory size for instances"
137 (ReqArg (\ d opts -> opts { optIDsk = read d }) "DISK")
138 "disk size for instances"
139 , Option [] ["vcpus"]
140 (ReqArg (\ p opts -> opts { optIVCPUs = read p }) "NUM")
141 "number of virtual cpus for instances"
142 , Option [] ["req-nodes"]
143 (ReqArg (\ n opts -> opts { optINodes = read n }) "NODES")
144 "number of nodes for the new instances (1=plain, 2=mirrored)"
145 , Option [] ["max-cpu"]
146 (ReqArg (\ n opts -> opts { optMcpu = read n }) "RATIO")
147 "maximum virtual-to-physical cpu ratio for nodes"
148 , Option [] ["min-disk"]
149 (ReqArg (\ n opts -> opts { optMdsk = read n }) "RATIO")
150 "minimum free disk space for nodes (between 0 and 1)"
151 , Option ['V'] ["version"]
152 (NoArg (\ opts -> opts { optShowVer = True}))
153 "show the version of the program"
154 , Option ['h'] ["help"]
155 (NoArg (\ opts -> opts { optShowHelp = True}))
159 data Phase = PInitial | PFinal
161 statsData :: [(String, Cluster.CStats -> String)]
162 statsData = [ ("SCORE", printf "%.8f" . Cluster.cs_score)
163 , ("INST_CNT", printf "%d" . Cluster.cs_ninst)
164 , ("MEM_FREE", printf "%d" . Cluster.cs_fmem)
165 , ("MEM_AVAIL", printf "%d" . Cluster.cs_amem)
167 \cs -> printf "%d" (Cluster.cs_fmem cs - Cluster.cs_amem cs))
168 , ("MEM_INST", printf "%d" . Cluster.cs_imem)
170 \cs -> printf "%d" (Cluster.cs_xmem cs + Cluster.cs_nmem cs))
172 \cs -> printf "%.8f" (fromIntegral (Cluster.cs_imem cs) /
174 , ("DSK_FREE", printf "%d" . Cluster.cs_fdsk)
175 , ("DSK_AVAIL", printf "%d ". Cluster.cs_adsk)
177 \cs -> printf "%d" (Cluster.cs_fdsk cs - Cluster.cs_adsk cs))
178 , ("DSK_INST", printf "%d" . Cluster.cs_idsk)
180 \cs -> printf "%.8f" (fromIntegral (Cluster.cs_idsk cs) /
182 , ("CPU_INST", printf "%d" . Cluster.cs_icpu)
184 \cs -> printf "%.8f" (fromIntegral (Cluster.cs_icpu cs) /
186 , ("MNODE_MEM_AVAIL", printf "%d" . Cluster.cs_mmem)
187 , ("MNODE_DSK_AVAIL", printf "%d" . Cluster.cs_mdsk)
190 specData :: [(String, Options -> String)]
191 specData = [ ("MEM", printf "%d" . optIMem)
192 , ("DSK", printf "%d" . optIDsk)
193 , ("CPU", printf "%d" . optIVCPUs)
194 , ("RQN", printf "%d" . optINodes)
197 clusterData :: [(String, Cluster.CStats -> String)]
198 clusterData = [ ("MEM", printf "%.0f" . Cluster.cs_tmem)
199 , ("DSK", printf "%.0f" . Cluster.cs_tdsk)
200 , ("CPU", printf "%.0f" . Cluster.cs_tcpu)
203 -- | Recursively place instances on the cluster until we're out of space
204 iterateDepth :: Node.List
208 -> [Instance.Instance]
209 -> Result (FailStats, Node.List, [Instance.Instance])
210 iterateDepth nl il newinst nreq ixes =
211 let depth = length ixes
212 newname = printf "new-%d" depth::String
213 newidx = length (Container.elems il) + depth
214 newi2 = Instance.setIdx (Instance.setName newinst newname) newidx
215 in case Cluster.tryAlloc nl il newi2 nreq of
217 Ok (errs, _, sols3) ->
219 Nothing -> Ok (Cluster.collapseFailures errs, nl, ixes)
220 Just (_, (xnl, xi, _)) ->
221 iterateDepth xnl il newinst nreq $! (xi:ixes)
223 -- | Function to print stats for a given phase
224 printStats :: Phase -> Cluster.CStats -> [(String, String)]
226 map (\(s, fn) -> (printf "%s_%s" kind s, fn cs)) statsData
227 where kind = case ph of
231 -- | Print final stats and related metrics
232 printResults :: Node.List -> Int -> Int -> [(FailMode, Int)] -> IO ()
233 printResults fin_nl num_instances allocs sreason = do
234 let fin_stats = Cluster.totalResources fin_nl
235 fin_instances = num_instances + allocs
237 when (num_instances + allocs /= Cluster.cs_ninst fin_stats) $
239 hPrintf stderr "ERROR: internal inconsistency, allocated (%d)\
240 \ != counted (%d)\n" (num_instances + allocs)
241 (Cluster.cs_ninst fin_stats)
242 exitWith $ ExitFailure 1
244 printKeys $ printStats PFinal fin_stats
245 printKeys [ ("ALLOC_USAGE", printf "%.8f"
246 ((fromIntegral num_instances::Double) /
247 fromIntegral fin_instances))
248 , ("ALLOC_INSTANCES", printf "%d" allocs)
249 , ("ALLOC_FAIL_REASON", map toUpper . show . fst $ head sreason)
251 printKeys $ map (\(x, y) -> (printf "ALLOC_%s_CNT" (show x),
252 printf "%d" y)) sreason
253 -- this should be the final entry
254 printKeys [("OK", "1")]
256 -- | Format a list of key/values as a shell fragment
257 printKeys :: [(String, String)] -> IO ()
258 printKeys = mapM_ (\(k, v) -> printf "HTS_%s=%s\n" (map toUpper k) v)
263 cmd_args <- System.getArgs
264 (opts, args) <- CLI.parseOpts cmd_args "hspace" options defaultOptions
266 unless (null args) $ do
267 hPutStrLn stderr "Error: this program doesn't take any arguments."
268 exitWith $ ExitFailure 1
270 let verbose = optVerbose opts
272 (fixed_nl, il, csf) <- CLI.loadExternalData opts
274 printKeys $ map (\(a, fn) -> ("SPEC_" ++ a, fn opts)) specData
276 let num_instances = length $ Container.elems il
278 let offline_names = optOffline opts
279 all_nodes = Container.elems fixed_nl
280 all_names = map Node.name all_nodes
281 offline_wrong = filter (flip notElem all_names) offline_names
282 offline_indices = map Node.idx $
283 filter (\n -> elem (Node.name n) offline_names)
285 req_nodes = optINodes opts
289 when (length offline_wrong > 0) $ do
290 hPrintf stderr "Error: Wrong node name(s) set as offline: %s\n"
291 (commaJoin offline_wrong)
292 exitWith $ ExitFailure 1
294 when (req_nodes /= 1 && req_nodes /= 2) $ do
295 hPrintf stderr "Error: Invalid required nodes (%d)\n" req_nodes
296 exitWith $ ExitFailure 1
298 let nm = Container.map (\n -> if elem (Node.idx n) offline_indices
299 then Node.setOffline n True
301 nl = Container.map (flip Node.setMdsk m_dsk . flip Node.setMcpu m_cpu)
304 when (length csf > 0 && verbose > 1) $
305 hPrintf stderr "Note: Stripping common suffix of '%s' from names\n" csf
307 when (optShowNodes opts) $
309 hPutStrLn stderr "Initial cluster status:"
310 hPutStrLn stderr $ Cluster.printNodes nl
312 let ini_cv = Cluster.compCV nl
313 ini_stats = Cluster.totalResources nl
316 hPrintf stderr "Initial coefficients: overall %.8f, %s\n"
317 ini_cv (Cluster.printStats nl)
319 printKeys $ map (\(a, fn) -> ("CLUSTER_" ++ a, fn ini_stats)) clusterData
320 printKeys [("CLUSTER_NODES", printf "%d" (length all_nodes))]
321 printKeys $ printStats PInitial ini_stats
323 let bad_nodes = fst $ Cluster.computeBadItems nl il
324 when (length bad_nodes > 0) $ do
325 -- This is failn1 case, so we print the same final stats and
327 printResults nl num_instances 0 [(FailN1, 1)]
330 let nmlen = Container.maxNameLen nl
331 newinst = Instance.create "new" (optIMem opts) (optIDsk opts)
332 (optIVCPUs opts) "ADMIN_down" (-1) (-1)
334 let result = iterateDepth nl il newinst req_nodes []
335 (ereason, fin_nl, ixes) <- (case result of
337 hPrintf stderr "Failure: %s\n" s
338 exitWith $ ExitFailure 1
340 let allocs = length ixes
341 fin_ixes = reverse ixes
342 ix_namelen = maximum . map (length . Instance.name) $ fin_ixes
343 sreason = reverse $ sortBy (compare `on` snd) ereason
346 hPutStr stderr . unlines $
347 map (\i -> printf "Inst: %*s %-*s %-*s"
348 ix_namelen (Instance.name i)
349 nmlen (Container.nameOf fin_nl $ Instance.pnode i)
350 nmlen (let sdx = Instance.snode i
351 in if sdx == Node.noSecondary then ""
352 else Container.nameOf fin_nl sdx)
355 when (optShowNodes opts) $
358 hPutStrLn stderr "Final cluster status:"
359 hPutStrLn stderr $ Cluster.printNodes fin_nl
361 printResults fin_nl num_instances allocs sreason