1 {-| Cluster space sizing
7 Copyright (C) 2009, 2010, 2011 Google Inc.
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 2 of the License, or
12 (at your option) any later version.
14 This program is distributed in the hope that it will be useful, but
15 WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
26 module Ganeti.HTools.Program.Hspace (main) where
29 import Data.Char (toUpper, isAlphaNum)
30 import Data.Function (on)
32 import Data.Maybe (isJust, fromJust)
33 import Data.Ord (comparing)
34 import System (exitWith, ExitCode(..))
36 import qualified System
38 import Text.Printf (printf, hPrintf)
40 import qualified Ganeti.HTools.Container as Container
41 import qualified Ganeti.HTools.Cluster as Cluster
42 import qualified Ganeti.HTools.Node as Node
43 import qualified Ganeti.HTools.Instance as Instance
45 import Ganeti.HTools.Utils
46 import Ganeti.HTools.Types
47 import Ganeti.HTools.CLI
48 import Ganeti.HTools.ExtLoader
49 import Ganeti.HTools.Loader
51 -- | Options list and functions.
75 -- | The allocation phase we're in (initial, after tiered allocs, or
76 -- after regular allocation).
81 -- | The kind of instance spec we print.
82 data SpecType = SpecNormal
85 -- | What we prefix a spec with.
86 specPrefix :: SpecType -> String
87 specPrefix SpecNormal = "SPEC"
88 specPrefix SpecTiered = "TSPEC_INI"
90 -- | The description of a spec.
91 specDescription :: SpecType -> String
92 specDescription SpecNormal = "Normal (fixed-size)"
93 specDescription SpecTiered = "Tiered (initial size)"
95 -- | Efficiency generic function.
96 effFn :: (Cluster.CStats -> Integer)
97 -> (Cluster.CStats -> Double)
98 -> Cluster.CStats -> Double
99 effFn fi ft cs = fromIntegral (fi cs) / ft cs
101 -- | Memory efficiency.
102 memEff :: Cluster.CStats -> Double
103 memEff = effFn Cluster.csImem Cluster.csTmem
105 -- | Disk efficiency.
106 dskEff :: Cluster.CStats -> Double
107 dskEff = effFn Cluster.csIdsk Cluster.csTdsk
110 cpuEff :: Cluster.CStats -> Double
111 cpuEff = effFn Cluster.csIcpu (fromIntegral . Cluster.csVcpu)
113 -- | Holds data for converting a 'Cluster.CStats' structure into
114 -- detailed statictics.
115 statsData :: [(String, Cluster.CStats -> String)]
116 statsData = [ ("SCORE", printf "%.8f" . Cluster.csScore)
117 , ("INST_CNT", printf "%d" . Cluster.csNinst)
118 , ("MEM_FREE", printf "%d" . Cluster.csFmem)
119 , ("MEM_AVAIL", printf "%d" . Cluster.csAmem)
121 \cs -> printf "%d" (Cluster.csFmem cs - Cluster.csAmem cs))
122 , ("MEM_INST", printf "%d" . Cluster.csImem)
124 \cs -> printf "%d" (Cluster.csXmem cs + Cluster.csNmem cs))
125 , ("MEM_EFF", printf "%.8f" . memEff)
126 , ("DSK_FREE", printf "%d" . Cluster.csFdsk)
127 , ("DSK_AVAIL", printf "%d". Cluster.csAdsk)
129 \cs -> printf "%d" (Cluster.csFdsk cs - Cluster.csAdsk cs))
130 , ("DSK_INST", printf "%d" . Cluster.csIdsk)
131 , ("DSK_EFF", printf "%.8f" . dskEff)
132 , ("CPU_INST", printf "%d" . Cluster.csIcpu)
133 , ("CPU_EFF", printf "%.8f" . cpuEff)
134 , ("MNODE_MEM_AVAIL", printf "%d" . Cluster.csMmem)
135 , ("MNODE_DSK_AVAIL", printf "%d" . Cluster.csMdsk)
138 -- | List holding 'RSpec' formatting information.
139 specData :: [(String, RSpec -> String)]
140 specData = [ ("MEM", printf "%d" . rspecMem)
141 , ("DSK", printf "%d" . rspecDsk)
142 , ("CPU", printf "%d" . rspecCpu)
145 -- | List holding 'Cluster.CStats' formatting information.
146 clusterData :: [(String, Cluster.CStats -> String)]
147 clusterData = [ ("MEM", printf "%.0f" . Cluster.csTmem)
148 , ("DSK", printf "%.0f" . Cluster.csTdsk)
149 , ("CPU", printf "%.0f" . Cluster.csTcpu)
150 , ("VCPU", printf "%d" . Cluster.csVcpu)
153 -- | Function to print stats for a given phase.
154 printStats :: Phase -> Cluster.CStats -> [(String, String)]
156 map (\(s, fn) -> (printf "%s_%s" kind s, fn cs)) statsData
157 where kind = case ph of
162 -- | Print final stats and related metrics.
163 printResults :: Bool -> Node.List -> Node.List -> Int -> Int
164 -> [(FailMode, Int)] -> IO ()
165 printResults True _ fin_nl num_instances allocs sreason = do
166 let fin_stats = Cluster.totalResources fin_nl
167 fin_instances = num_instances + allocs
169 when (num_instances + allocs /= Cluster.csNinst fin_stats) $
171 hPrintf stderr "ERROR: internal inconsistency, allocated (%d)\
172 \ != counted (%d)\n" (num_instances + allocs)
173 (Cluster.csNinst fin_stats) :: IO ()
174 exitWith $ ExitFailure 1
176 printKeys $ printStats PFinal fin_stats
177 printKeys [ ("ALLOC_USAGE", printf "%.8f"
178 ((fromIntegral num_instances::Double) /
179 fromIntegral fin_instances))
180 , ("ALLOC_INSTANCES", printf "%d" allocs)
181 , ("ALLOC_FAIL_REASON", map toUpper . show . fst $ head sreason)
183 printKeys $ map (\(x, y) -> (printf "ALLOC_%s_CNT" (show x),
184 printf "%d" y)) sreason
186 printResults False ini_nl fin_nl _ allocs sreason = do
187 putStrLn "Normal (fixed-size) allocation results:"
188 printf " - %3d instances allocated\n" allocs :: IO ()
189 printf " - most likely failure reason: %s\n" $ failureReason sreason::IO ()
190 printClusterScores ini_nl fin_nl
191 printClusterEff (Cluster.totalResources fin_nl)
193 -- | Prints the final @OK@ marker in machine readable output.
194 printFinal :: Bool -> IO ()
196 -- this should be the final entry
197 printKeys [("OK", "1")]
199 printFinal False = return ()
201 -- | Compute the tiered spec counts from a list of allocated
203 tieredSpecMap :: [Instance.Instance]
205 tieredSpecMap trl_ixes =
206 let fin_trl_ixes = reverse trl_ixes
207 ix_byspec = groupBy ((==) `on` Instance.specOf) fin_trl_ixes
208 spec_map = map (\ixs -> (Instance.specOf $ head ixs, length ixs))
212 -- | Formats a spec map to strings.
213 formatSpecMap :: [(RSpec, Int)] -> [String]
215 map (\(spec, cnt) -> printf "%d,%d,%d=%d" (rspecMem spec)
216 (rspecDsk spec) (rspecCpu spec) cnt)
218 -- | Formats \"key-metrics\" values.
219 formatRSpec :: Double -> String -> RSpec -> [(String, String)]
220 formatRSpec m_cpu s r =
221 [ ("KM_" ++ s ++ "_CPU", show $ rspecCpu r)
222 , ("KM_" ++ s ++ "_NPU", show $ fromIntegral (rspecCpu r) / m_cpu)
223 , ("KM_" ++ s ++ "_MEM", show $ rspecMem r)
224 , ("KM_" ++ s ++ "_DSK", show $ rspecDsk r)
227 -- | Shows allocations stats.
228 printAllocationStats :: Double -> Node.List -> Node.List -> IO ()
229 printAllocationStats m_cpu ini_nl fin_nl = do
230 let ini_stats = Cluster.totalResources ini_nl
231 fin_stats = Cluster.totalResources fin_nl
232 (rini, ralo, runa) = Cluster.computeAllocationDelta ini_stats fin_stats
233 printKeys $ formatRSpec m_cpu "USED" rini
234 printKeys $ formatRSpec m_cpu "POOL"ralo
235 printKeys $ formatRSpec m_cpu "UNAV" runa
237 -- | Ensure a value is quoted if needed.
238 ensureQuoted :: String -> String
239 ensureQuoted v = if not (all (\c -> isAlphaNum c || c == '.') v)
243 -- | Format a list of key\/values as a shell fragment.
244 printKeys :: [(String, String)] -> IO ()
245 printKeys = mapM_ (\(k, v) ->
246 printf "HTS_%s=%s\n" (map toUpper k) (ensureQuoted v))
248 -- | Converts instance data to a list of strings.
249 printInstance :: Node.List -> Instance.Instance -> [String]
250 printInstance nl i = [ Instance.name i
251 , Container.nameOf nl $ Instance.pNode i
252 , let sdx = Instance.sNode i
253 in if sdx == Node.noSecondary then ""
254 else Container.nameOf nl sdx
255 , show (Instance.mem i)
256 , show (Instance.dsk i)
257 , show (Instance.vcpus i)
260 -- | Optionally print the allocation map.
261 printAllocationMap :: Int -> String
262 -> Node.List -> [Instance.Instance] -> IO ()
263 printAllocationMap verbose msg nl ixes =
264 when (verbose > 1) $ do
266 hPutStr stderr . unlines . map ((:) ' ' . intercalate " ") $
267 formatTable (map (printInstance nl) (reverse ixes))
268 -- This is the numberic-or-not field
269 -- specification; the first three fields are
270 -- strings, whereas the rest are numeric
271 [False, False, False, True, True, True]
273 -- | Formats nicely a list of resources.
274 formatResources :: a -> [(String, a->String)] -> String
275 formatResources res =
276 intercalate ", " . map (\(a, fn) -> a ++ " " ++ fn res)
278 -- | Print the cluster resources.
279 printCluster :: Bool -> Cluster.CStats -> Int -> IO ()
280 printCluster True ini_stats node_count = do
281 printKeys $ map (\(a, fn) -> ("CLUSTER_" ++ a, fn ini_stats)) clusterData
282 printKeys [("CLUSTER_NODES", printf "%d" node_count)]
283 printKeys $ printStats PInitial ini_stats
285 printCluster False ini_stats node_count = do
286 printf "The cluster has %d nodes and the following resources:\n %s.\n"
287 node_count (formatResources ini_stats clusterData)::IO ()
288 printf "There are %s initial instances on the cluster.\n"
289 (if inst_count > 0 then show inst_count else "no" )
290 where inst_count = Cluster.csNinst ini_stats
292 -- | Prints the normal instance spec.
293 printISpec :: Bool -> RSpec -> SpecType -> DiskTemplate -> IO ()
294 printISpec True ispec spec disk_template = do
295 printKeys $ map (\(a, fn) -> (prefix ++ "_" ++ a, fn ispec)) specData
296 printKeys [ (prefix ++ "_RQN", printf "%d" req_nodes) ]
297 printKeys [ (prefix ++ "_DISK_TEMPLATE", dtToString disk_template) ]
298 where req_nodes = Instance.requiredNodes disk_template
299 prefix = specPrefix spec
301 printISpec False ispec spec disk_template =
302 printf "%s instance spec is:\n %s, using disk\
304 (specDescription spec)
305 (formatResources ispec specData) (dtToString disk_template)
307 -- | Prints the tiered results.
308 printTiered :: Bool -> [(RSpec, Int)] -> Double
309 -> Node.List -> Node.List -> [(FailMode, Int)] -> IO ()
310 printTiered True spec_map m_cpu nl trl_nl _ = do
311 printKeys $ printStats PTiered (Cluster.totalResources trl_nl)
312 printKeys [("TSPEC", intercalate " " (formatSpecMap spec_map))]
313 printAllocationStats m_cpu nl trl_nl
315 printTiered False spec_map _ ini_nl fin_nl sreason = do
316 _ <- printf "Tiered allocation results:\n"
317 mapM_ (\(ispec, cnt) ->
318 printf " - %3d instances of spec %s\n" cnt
319 (formatResources ispec specData)) spec_map
320 printf " - most likely failure reason: %s\n" $ failureReason sreason::IO ()
321 printClusterScores ini_nl fin_nl
322 printClusterEff (Cluster.totalResources fin_nl)
324 -- | Displays the initial/final cluster scores.
325 printClusterScores :: Node.List -> Node.List -> IO ()
326 printClusterScores ini_nl fin_nl = do
327 printf " - initial cluster score: %.8f\n" $ Cluster.compCV ini_nl::IO ()
328 printf " - final cluster score: %.8f\n" $ Cluster.compCV fin_nl
330 -- | Displays the cluster efficiency.
331 printClusterEff :: Cluster.CStats -> IO ()
334 printf " - %s usage efficiency: %5.2f%%\n" s (fn cs * 100))
339 -- | Computes the most likely failure reason.
340 failureReason :: [(FailMode, Int)] -> String
341 failureReason = show . fst . head
343 -- | Sorts the failure reasons.
344 sortReasons :: [(FailMode, Int)] -> [(FailMode, Int)]
345 sortReasons = reverse . sortBy (comparing snd)
350 cmd_args <- System.getArgs
351 (opts, args) <- parseOpts cmd_args "hspace" options
353 unless (null args) $ do
354 hPutStrLn stderr "Error: this program doesn't take any arguments."
355 exitWith $ ExitFailure 1
357 let verbose = optVerbose opts
358 ispec = optISpec opts
359 shownodes = optShowNodes opts
360 disk_template = optDiskTemplate opts
361 req_nodes = Instance.requiredNodes disk_template
362 machine_r = optMachineReadable opts
364 (ClusterData gl fixed_nl il ctags) <- loadExternalData opts
366 let num_instances = length $ Container.elems il
368 let offline_passed = optOffline opts
369 all_nodes = Container.elems fixed_nl
370 offline_lkp = map (lookupName (map Node.name all_nodes)) offline_passed
371 offline_wrong = filter (not . goodLookupResult) offline_lkp
372 offline_names = map lrContent offline_lkp
373 offline_indices = map Node.idx $
374 filter (\n -> Node.name n `elem` offline_names)
379 when (not (null offline_wrong)) $ do
380 hPrintf stderr "Error: Wrong node name(s) set as offline: %s\n"
381 (commaJoin (map lrContent offline_wrong)) :: IO ()
382 exitWith $ ExitFailure 1
384 when (req_nodes /= 1 && req_nodes /= 2) $ do
385 hPrintf stderr "Error: Invalid required nodes (%d)\n"
387 exitWith $ ExitFailure 1
389 let nm = Container.map (\n -> if Node.idx n `elem` offline_indices
390 then Node.setOffline n True
392 nl = Container.map (flip Node.setMdsk m_dsk . flip Node.setMcpu m_cpu)
394 csf = commonSuffix fixed_nl il
396 when (length csf > 0 && verbose > 1) $
397 hPrintf stderr "Note: Stripping common suffix of '%s' from names\n" csf
399 when (isJust shownodes) $
401 hPutStrLn stderr "Initial cluster status:"
402 hPutStrLn stderr $ Cluster.printNodes nl (fromJust shownodes)
404 let ini_cv = Cluster.compCV nl
405 ini_stats = Cluster.totalResources nl
408 hPrintf stderr "Initial coefficients: overall %.8f, %s\n"
409 ini_cv (Cluster.printStats nl)
411 printCluster machine_r ini_stats (length all_nodes)
413 printISpec machine_r ispec SpecNormal disk_template
415 let bad_nodes = fst $ Cluster.computeBadItems nl il
416 stop_allocation = length bad_nodes > 0
417 result_noalloc = ([(FailN1, 1)]::FailStats, nl, il, [], [])
420 let iofspec spx = Instance.create "new" (rspecMem spx) (rspecDsk spx)
421 (rspecCpu spx) "running" [] True (-1) (-1) disk_template
422 exitifbad val = (case val of
424 hPrintf stderr "Failure: %s\n" s :: IO ()
425 exitWith $ ExitFailure 1
429 let reqinst = iofspec ispec
431 allocnodes <- exitifbad $ Cluster.genAllocNodes gl nl req_nodes True
433 -- Run the tiered allocation, if enabled
435 (case optTieredSpec opts of
438 (treason, trl_nl, trl_il, trl_ixes, _) <-
440 then return result_noalloc
441 else exitifbad (Cluster.tieredAlloc nl il Nothing (iofspec tspec)
443 let spec_map' = tieredSpecMap trl_ixes
444 treason' = sortReasons treason
446 printAllocationMap verbose "Tiered allocation map" trl_nl trl_ixes
448 maybePrintNodes shownodes "Tiered allocation"
449 (Cluster.printNodes trl_nl)
451 maybeSaveData (optSaveCluster opts) "tiered" "after tiered allocation"
452 (ClusterData gl trl_nl trl_il ctags)
454 printISpec machine_r tspec SpecTiered disk_template
456 printTiered machine_r spec_map' m_cpu nl trl_nl treason'
459 -- Run the standard (avg-mode) allocation
461 (ereason, fin_nl, fin_il, ixes, _) <-
463 then return result_noalloc
464 else exitifbad (Cluster.iterateAlloc nl il Nothing
465 reqinst allocnodes [] [])
467 let allocs = length ixes
468 sreason = sortReasons ereason
470 printAllocationMap verbose "Standard allocation map" fin_nl ixes
472 maybePrintNodes shownodes "Standard allocation" (Cluster.printNodes fin_nl)
474 maybeSaveData (optSaveCluster opts) "alloc" "after standard allocation"
475 (ClusterData gl fin_nl fin_il ctags)
477 printResults machine_r nl fin_nl num_instances allocs sreason