1 {-| Cluster space sizing
7 Copyright (C) 2009, 2010, 2011 Google Inc.
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 2 of the License, or
12 (at your option) any later version.
14 This program is distributed in the hope that it will be useful, but
15 WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
26 module Ganeti.HTools.Program.Hspace (main) where
29 import Data.Char (toUpper, isAlphaNum, toLower)
30 import Data.Function (on)
32 import Data.Ord (comparing)
33 import System (exitWith, ExitCode(..))
35 import qualified System
37 import Text.Printf (printf, hPrintf)
39 import qualified Ganeti.HTools.Container as Container
40 import qualified Ganeti.HTools.Cluster as Cluster
41 import qualified Ganeti.HTools.Node as Node
42 import qualified Ganeti.HTools.Instance as Instance
44 import Ganeti.HTools.Utils
45 import Ganeti.HTools.Types
46 import Ganeti.HTools.CLI
47 import Ganeti.HTools.ExtLoader
48 import Ganeti.HTools.Loader
50 -- | Options list and functions.
75 -- | The allocation phase we're in (initial, after tiered allocs, or
76 -- after regular allocation).
81 -- | The kind of instance spec we print.
82 data SpecType = SpecNormal
85 -- | What we prefix a spec with.
86 specPrefix :: SpecType -> String
87 specPrefix SpecNormal = "SPEC"
88 specPrefix SpecTiered = "TSPEC_INI"
90 -- | The description of a spec.
91 specDescription :: SpecType -> String
92 specDescription SpecNormal = "Standard (fixed-size)"
93 specDescription SpecTiered = "Tiered (initial size)"
95 -- | Efficiency generic function.
96 effFn :: (Cluster.CStats -> Integer)
97 -> (Cluster.CStats -> Double)
98 -> Cluster.CStats -> Double
99 effFn fi ft cs = fromIntegral (fi cs) / ft cs
101 -- | Memory efficiency.
102 memEff :: Cluster.CStats -> Double
103 memEff = effFn Cluster.csImem Cluster.csTmem
105 -- | Disk efficiency.
106 dskEff :: Cluster.CStats -> Double
107 dskEff = effFn Cluster.csIdsk Cluster.csTdsk
110 cpuEff :: Cluster.CStats -> Double
111 cpuEff = effFn Cluster.csIcpu (fromIntegral . Cluster.csVcpu)
113 -- | Holds data for converting a 'Cluster.CStats' structure into
114 -- detailed statictics.
115 statsData :: [(String, Cluster.CStats -> String)]
116 statsData = [ ("SCORE", printf "%.8f" . Cluster.csScore)
117 , ("INST_CNT", printf "%d" . Cluster.csNinst)
118 , ("MEM_FREE", printf "%d" . Cluster.csFmem)
119 , ("MEM_AVAIL", printf "%d" . Cluster.csAmem)
121 \cs -> printf "%d" (Cluster.csFmem cs - Cluster.csAmem cs))
122 , ("MEM_INST", printf "%d" . Cluster.csImem)
124 \cs -> printf "%d" (Cluster.csXmem cs + Cluster.csNmem cs))
125 , ("MEM_EFF", printf "%.8f" . memEff)
126 , ("DSK_FREE", printf "%d" . Cluster.csFdsk)
127 , ("DSK_AVAIL", printf "%d". Cluster.csAdsk)
129 \cs -> printf "%d" (Cluster.csFdsk cs - Cluster.csAdsk cs))
130 , ("DSK_INST", printf "%d" . Cluster.csIdsk)
131 , ("DSK_EFF", printf "%.8f" . dskEff)
132 , ("CPU_INST", printf "%d" . Cluster.csIcpu)
133 , ("CPU_EFF", printf "%.8f" . cpuEff)
134 , ("MNODE_MEM_AVAIL", printf "%d" . Cluster.csMmem)
135 , ("MNODE_DSK_AVAIL", printf "%d" . Cluster.csMdsk)
138 -- | List holding 'RSpec' formatting information.
139 specData :: [(String, RSpec -> String)]
140 specData = [ ("MEM", printf "%d" . rspecMem)
141 , ("DSK", printf "%d" . rspecDsk)
142 , ("CPU", printf "%d" . rspecCpu)
145 -- | List holding 'Cluster.CStats' formatting information.
146 clusterData :: [(String, Cluster.CStats -> String)]
147 clusterData = [ ("MEM", printf "%.0f" . Cluster.csTmem)
148 , ("DSK", printf "%.0f" . Cluster.csTdsk)
149 , ("CPU", printf "%.0f" . Cluster.csTcpu)
150 , ("VCPU", printf "%d" . Cluster.csVcpu)
153 -- | Function to print stats for a given phase.
154 printStats :: Phase -> Cluster.CStats -> [(String, String)]
156 map (\(s, fn) -> (printf "%s_%s" kind s, fn cs)) statsData
157 where kind = case ph of
162 -- | Print final stats and related metrics.
163 printResults :: Bool -> Node.List -> Node.List -> Int -> Int
164 -> [(FailMode, Int)] -> IO ()
165 printResults True _ fin_nl num_instances allocs sreason = do
166 let fin_stats = Cluster.totalResources fin_nl
167 fin_instances = num_instances + allocs
169 when (num_instances + allocs /= Cluster.csNinst fin_stats) $
171 hPrintf stderr "ERROR: internal inconsistency, allocated (%d)\
172 \ != counted (%d)\n" (num_instances + allocs)
173 (Cluster.csNinst fin_stats) :: IO ()
174 exitWith $ ExitFailure 1
176 printKeys $ printStats PFinal fin_stats
177 printKeys [ ("ALLOC_USAGE", printf "%.8f"
178 ((fromIntegral num_instances::Double) /
179 fromIntegral fin_instances))
180 , ("ALLOC_INSTANCES", printf "%d" allocs)
181 , ("ALLOC_FAIL_REASON", map toUpper . show . fst $ head sreason)
183 printKeys $ map (\(x, y) -> (printf "ALLOC_%s_CNT" (show x),
184 printf "%d" y)) sreason
186 printResults False ini_nl fin_nl _ allocs sreason = do
187 putStrLn "Normal (fixed-size) allocation results:"
188 printf " - %3d instances allocated\n" allocs :: IO ()
189 printf " - most likely failure reason: %s\n" $ failureReason sreason::IO ()
190 printClusterScores ini_nl fin_nl
191 printClusterEff (Cluster.totalResources fin_nl)
193 -- | Prints the final @OK@ marker in machine readable output.
194 printFinal :: Bool -> IO ()
196 -- this should be the final entry
197 printKeys [("OK", "1")]
199 printFinal False = return ()
201 -- | Compute the tiered spec counts from a list of allocated
203 tieredSpecMap :: [Instance.Instance]
205 tieredSpecMap trl_ixes =
206 let fin_trl_ixes = reverse trl_ixes
207 ix_byspec = groupBy ((==) `on` Instance.specOf) fin_trl_ixes
208 spec_map = map (\ixs -> (Instance.specOf $ head ixs, length ixs))
212 -- | Formats a spec map to strings.
213 formatSpecMap :: [(RSpec, Int)] -> [String]
215 map (\(spec, cnt) -> printf "%d,%d,%d=%d" (rspecMem spec)
216 (rspecDsk spec) (rspecCpu spec) cnt)
218 -- | Formats \"key-metrics\" values.
219 formatRSpec :: Double -> String -> RSpec -> [(String, String)]
220 formatRSpec m_cpu s r =
221 [ ("KM_" ++ s ++ "_CPU", show $ rspecCpu r)
222 , ("KM_" ++ s ++ "_NPU", show $ fromIntegral (rspecCpu r) / m_cpu)
223 , ("KM_" ++ s ++ "_MEM", show $ rspecMem r)
224 , ("KM_" ++ s ++ "_DSK", show $ rspecDsk r)
227 -- | Shows allocations stats.
228 printAllocationStats :: Double -> Node.List -> Node.List -> IO ()
229 printAllocationStats m_cpu ini_nl fin_nl = do
230 let ini_stats = Cluster.totalResources ini_nl
231 fin_stats = Cluster.totalResources fin_nl
232 (rini, ralo, runa) = Cluster.computeAllocationDelta ini_stats fin_stats
233 printKeys $ formatRSpec m_cpu "USED" rini
234 printKeys $ formatRSpec m_cpu "POOL"ralo
235 printKeys $ formatRSpec m_cpu "UNAV" runa
237 -- | Ensure a value is quoted if needed.
238 ensureQuoted :: String -> String
239 ensureQuoted v = if not (all (\c -> isAlphaNum c || c == '.') v)
243 -- | Format a list of key\/values as a shell fragment.
244 printKeys :: [(String, String)] -> IO ()
245 printKeys = mapM_ (\(k, v) ->
246 printf "HTS_%s=%s\n" (map toUpper k) (ensureQuoted v))
248 -- | Converts instance data to a list of strings.
249 printInstance :: Node.List -> Instance.Instance -> [String]
250 printInstance nl i = [ Instance.name i
251 , Container.nameOf nl $ Instance.pNode i
252 , let sdx = Instance.sNode i
253 in if sdx == Node.noSecondary then ""
254 else Container.nameOf nl sdx
255 , show (Instance.mem i)
256 , show (Instance.dsk i)
257 , show (Instance.vcpus i)
260 -- | Optionally print the allocation map.
261 printAllocationMap :: Int -> String
262 -> Node.List -> [Instance.Instance] -> IO ()
263 printAllocationMap verbose msg nl ixes =
264 when (verbose > 1) $ do
265 hPutStrLn stderr (msg ++ " map")
266 hPutStr stderr . unlines . map ((:) ' ' . intercalate " ") $
267 formatTable (map (printInstance nl) (reverse ixes))
268 -- This is the numberic-or-not field
269 -- specification; the first three fields are
270 -- strings, whereas the rest are numeric
271 [False, False, False, True, True, True]
273 -- | Formats nicely a list of resources.
274 formatResources :: a -> [(String, a->String)] -> String
275 formatResources res =
276 intercalate ", " . map (\(a, fn) -> a ++ " " ++ fn res)
278 -- | Print the cluster resources.
279 printCluster :: Bool -> Cluster.CStats -> Int -> IO ()
280 printCluster True ini_stats node_count = do
281 printKeys $ map (\(a, fn) -> ("CLUSTER_" ++ a, fn ini_stats)) clusterData
282 printKeys [("CLUSTER_NODES", printf "%d" node_count)]
283 printKeys $ printStats PInitial ini_stats
285 printCluster False ini_stats node_count = do
286 printf "The cluster has %d nodes and the following resources:\n %s.\n"
287 node_count (formatResources ini_stats clusterData)::IO ()
288 printf "There are %s initial instances on the cluster.\n"
289 (if inst_count > 0 then show inst_count else "no" )
290 where inst_count = Cluster.csNinst ini_stats
292 -- | Prints the normal instance spec.
293 printISpec :: Bool -> RSpec -> SpecType -> DiskTemplate -> IO ()
294 printISpec True ispec spec disk_template = do
295 printKeys $ map (\(a, fn) -> (prefix ++ "_" ++ a, fn ispec)) specData
296 printKeys [ (prefix ++ "_RQN", printf "%d" req_nodes) ]
297 printKeys [ (prefix ++ "_DISK_TEMPLATE",
298 diskTemplateToRaw disk_template) ]
299 where req_nodes = Instance.requiredNodes disk_template
300 prefix = specPrefix spec
302 printISpec False ispec spec disk_template =
303 printf "%s instance spec is:\n %s, using disk\
305 (specDescription spec)
306 (formatResources ispec specData) (diskTemplateToRaw disk_template)
308 -- | Prints the tiered results.
309 printTiered :: Bool -> [(RSpec, Int)] -> Double
310 -> Node.List -> Node.List -> [(FailMode, Int)] -> IO ()
311 printTiered True spec_map m_cpu nl trl_nl _ = do
312 printKeys $ printStats PTiered (Cluster.totalResources trl_nl)
313 printKeys [("TSPEC", intercalate " " (formatSpecMap spec_map))]
314 printAllocationStats m_cpu nl trl_nl
316 printTiered False spec_map _ ini_nl fin_nl sreason = do
317 _ <- printf "Tiered allocation results:\n"
318 mapM_ (\(ispec, cnt) ->
319 printf " - %3d instances of spec %s\n" cnt
320 (formatResources ispec specData)) spec_map
321 printf " - most likely failure reason: %s\n" $ failureReason sreason::IO ()
322 printClusterScores ini_nl fin_nl
323 printClusterEff (Cluster.totalResources fin_nl)
325 -- | Displays the initial/final cluster scores.
326 printClusterScores :: Node.List -> Node.List -> IO ()
327 printClusterScores ini_nl fin_nl = do
328 printf " - initial cluster score: %.8f\n" $ Cluster.compCV ini_nl::IO ()
329 printf " - final cluster score: %.8f\n" $ Cluster.compCV fin_nl
331 -- | Displays the cluster efficiency.
332 printClusterEff :: Cluster.CStats -> IO ()
335 printf " - %s usage efficiency: %5.2f%%\n" s (fn cs * 100))
340 -- | Computes the most likely failure reason.
341 failureReason :: [(FailMode, Int)] -> String
342 failureReason = show . fst . head
344 -- | Sorts the failure reasons.
345 sortReasons :: [(FailMode, Int)] -> [(FailMode, Int)]
346 sortReasons = reverse . sortBy (comparing snd)
348 -- | Aborts the program if we get a bad value.
349 exitIfBad :: Result a -> IO a
351 hPrintf stderr "Failure: %s\n" s >> exitWith (ExitFailure 1)
352 exitIfBad (Ok v) = return v
354 -- | Runs an allocation algorithm and saves cluster state.
355 runAllocation :: ClusterData -- ^ Cluster data
356 -> Maybe Cluster.AllocResult -- ^ Optional stop-allocation
357 -> Result Cluster.AllocResult -- ^ Allocation result
358 -> RSpec -- ^ Requested instance spec
359 -> SpecType -- ^ Allocation type
360 -> Options -- ^ CLI options
361 -> IO (FailStats, Node.List, Int, [(RSpec, Int)])
362 runAllocation cdata stop_allocation actual_result spec mode opts = do
363 (reasons, new_nl, new_il, new_ixes, _) <-
364 case stop_allocation of
365 Just result_noalloc -> return result_noalloc
366 Nothing -> exitIfBad actual_result
368 let name = head . words . specDescription $ mode
369 descr = name ++ " allocation"
370 ldescr = "after " ++ map toLower descr
372 printISpec (optMachineReadable opts) spec mode (optDiskTemplate opts)
374 printAllocationMap (optVerbose opts) descr new_nl new_ixes
376 maybePrintNodes (optShowNodes opts) descr (Cluster.printNodes new_nl)
378 maybeSaveData (optSaveCluster opts) (map toLower name) ldescr
379 (cdata { cdNodes = new_nl, cdInstances = new_il})
381 return (sortReasons reasons, new_nl, length new_ixes, tieredSpecMap new_ixes)
386 cmd_args <- System.getArgs
387 (opts, args) <- parseOpts cmd_args "hspace" options
389 unless (null args) $ do
390 hPutStrLn stderr "Error: this program doesn't take any arguments."
391 exitWith $ ExitFailure 1
393 let verbose = optVerbose opts
394 ispec = optISpec opts
395 disk_template = optDiskTemplate opts
396 req_nodes = Instance.requiredNodes disk_template
397 machine_r = optMachineReadable opts
399 (ClusterData gl fixed_nl il ctags) <- loadExternalData opts
400 nl <- setNodeStatus opts fixed_nl
402 let num_instances = Container.size il
403 all_nodes = Container.elems fixed_nl
404 cdata = ClusterData gl nl il ctags
405 csf = commonSuffix fixed_nl il
407 when (not (null csf) && verbose > 1) $
408 hPrintf stderr "Note: Stripping common suffix of '%s' from names\n" csf
410 maybePrintNodes (optShowNodes opts) "Initial cluster" (Cluster.printNodes nl)
413 hPrintf stderr "Initial coefficients: overall %.8f, %s\n"
414 (Cluster.compCV nl) (Cluster.printStats nl)
416 printCluster machine_r (Cluster.totalResources nl) (length all_nodes)
418 let stop_allocation = case Cluster.computeBadItems nl il of
420 _ -> Just ([(FailN1, 1)]::FailStats, nl, il, [], [])
421 alloclimit = if optMaxLength opts == -1
423 else Just (optMaxLength opts)
426 let iofspec spx = Instance.create "new" (rspecMem spx) (rspecDsk spx)
427 (rspecCpu spx) "running" [] True (-1) (-1) disk_template
429 allocnodes <- exitIfBad $ Cluster.genAllocNodes gl nl req_nodes True
431 -- Run the tiered allocation, if enabled
433 (case optTieredSpec opts of
436 (treason, trl_nl, _, spec_map) <-
437 runAllocation cdata stop_allocation
438 (Cluster.tieredAlloc nl il alloclimit (iofspec tspec)
439 allocnodes [] []) tspec SpecTiered opts
441 printTiered machine_r spec_map (optMcpu opts) nl trl_nl treason
444 -- Run the standard (avg-mode) allocation
446 (sreason, fin_nl, allocs, _) <-
447 runAllocation cdata stop_allocation
448 (Cluster.iterateAlloc nl il alloclimit (iofspec ispec)
449 allocnodes [] []) ispec SpecNormal opts
451 printResults machine_r nl fin_nl num_instances allocs sreason