1 {-| Cluster space sizing
7 Copyright (C) 2009, 2010, 2011, 2012 Google Inc.
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 2 of the License, or
12 (at your option) any later version.
14 This program is distributed in the hope that it will be useful, but
15 WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
26 module Ganeti.HTools.Program.Hspace (main) where
29 import Data.Char (toUpper, isAlphaNum, toLower)
30 import Data.Function (on)
32 import Data.Maybe (fromMaybe)
33 import Data.Ord (comparing)
36 import System.Environment (getArgs)
38 import Text.Printf (printf, hPrintf)
40 import qualified Ganeti.HTools.Container as Container
41 import qualified Ganeti.HTools.Cluster as Cluster
42 import qualified Ganeti.HTools.Node as Node
43 import qualified Ganeti.HTools.Instance as Instance
45 import Ganeti.HTools.Utils
46 import Ganeti.HTools.Types
47 import Ganeti.HTools.CLI
48 import Ganeti.HTools.ExtLoader
49 import Ganeti.HTools.Loader
51 -- | Options list and functions.
74 -- | The allocation phase we're in (initial, after tiered allocs, or
75 -- after regular allocation).
80 -- | The kind of instance spec we print.
81 data SpecType = SpecNormal
84 -- | What we prefix a spec with.
85 specPrefix :: SpecType -> String
86 specPrefix SpecNormal = "SPEC"
87 specPrefix SpecTiered = "TSPEC_INI"
89 -- | The description of a spec.
90 specDescription :: SpecType -> String
91 specDescription SpecNormal = "Standard (fixed-size)"
92 specDescription SpecTiered = "Tiered (initial size)"
94 -- | Efficiency generic function.
95 effFn :: (Cluster.CStats -> Integer)
96 -> (Cluster.CStats -> Double)
97 -> Cluster.CStats -> Double
98 effFn fi ft cs = fromIntegral (fi cs) / ft cs
100 -- | Memory efficiency.
101 memEff :: Cluster.CStats -> Double
102 memEff = effFn Cluster.csImem Cluster.csTmem
104 -- | Disk efficiency.
105 dskEff :: Cluster.CStats -> Double
106 dskEff = effFn Cluster.csIdsk Cluster.csTdsk
109 cpuEff :: Cluster.CStats -> Double
110 cpuEff = effFn Cluster.csIcpu (fromIntegral . Cluster.csVcpu)
112 -- | Holds data for converting a 'Cluster.CStats' structure into
113 -- detailed statictics.
114 statsData :: [(String, Cluster.CStats -> String)]
115 statsData = [ ("SCORE", printf "%.8f" . Cluster.csScore)
116 , ("INST_CNT", printf "%d" . Cluster.csNinst)
117 , ("MEM_FREE", printf "%d" . Cluster.csFmem)
118 , ("MEM_AVAIL", printf "%d" . Cluster.csAmem)
120 \cs -> printf "%d" (Cluster.csFmem cs - Cluster.csAmem cs))
121 , ("MEM_INST", printf "%d" . Cluster.csImem)
123 \cs -> printf "%d" (Cluster.csXmem cs + Cluster.csNmem cs))
124 , ("MEM_EFF", printf "%.8f" . memEff)
125 , ("DSK_FREE", printf "%d" . Cluster.csFdsk)
126 , ("DSK_AVAIL", printf "%d". Cluster.csAdsk)
128 \cs -> printf "%d" (Cluster.csFdsk cs - Cluster.csAdsk cs))
129 , ("DSK_INST", printf "%d" . Cluster.csIdsk)
130 , ("DSK_EFF", printf "%.8f" . dskEff)
131 , ("CPU_INST", printf "%d" . Cluster.csIcpu)
132 , ("CPU_EFF", printf "%.8f" . cpuEff)
133 , ("MNODE_MEM_AVAIL", printf "%d" . Cluster.csMmem)
134 , ("MNODE_DSK_AVAIL", printf "%d" . Cluster.csMdsk)
137 -- | List holding 'RSpec' formatting information.
138 specData :: [(String, RSpec -> String)]
139 specData = [ ("MEM", printf "%d" . rspecMem)
140 , ("DSK", printf "%d" . rspecDsk)
141 , ("CPU", printf "%d" . rspecCpu)
144 -- | List holding 'Cluster.CStats' formatting information.
145 clusterData :: [(String, Cluster.CStats -> String)]
146 clusterData = [ ("MEM", printf "%.0f" . Cluster.csTmem)
147 , ("DSK", printf "%.0f" . Cluster.csTdsk)
148 , ("CPU", printf "%.0f" . Cluster.csTcpu)
149 , ("VCPU", printf "%d" . Cluster.csVcpu)
152 -- | Function to print stats for a given phase.
153 printStats :: Phase -> Cluster.CStats -> [(String, String)]
155 map (\(s, fn) -> (printf "%s_%s" kind s, fn cs)) statsData
156 where kind = case ph of
161 -- | Print failure reason and scores
162 printFRScores :: Node.List -> Node.List -> [(FailMode, Int)] -> IO ()
163 printFRScores ini_nl fin_nl sreason = do
164 printf " - most likely failure reason: %s\n" $ failureReason sreason::IO ()
165 printClusterScores ini_nl fin_nl
166 printClusterEff (Cluster.totalResources fin_nl)
168 -- | Print final stats and related metrics.
169 printResults :: Bool -> Node.List -> Node.List -> Int -> Int
170 -> [(FailMode, Int)] -> IO ()
171 printResults True _ fin_nl num_instances allocs sreason = do
172 let fin_stats = Cluster.totalResources fin_nl
173 fin_instances = num_instances + allocs
175 when (num_instances + allocs /= Cluster.csNinst fin_stats) $
177 hPrintf stderr "ERROR: internal inconsistency, allocated (%d)\
178 \ != counted (%d)\n" (num_instances + allocs)
179 (Cluster.csNinst fin_stats) :: IO ()
180 exitWith $ ExitFailure 1
182 printKeys $ printStats PFinal fin_stats
183 printKeys [ ("ALLOC_USAGE", printf "%.8f"
184 ((fromIntegral num_instances::Double) /
185 fromIntegral fin_instances))
186 , ("ALLOC_INSTANCES", printf "%d" allocs)
187 , ("ALLOC_FAIL_REASON", map toUpper . show . fst $ head sreason)
189 printKeys $ map (\(x, y) -> (printf "ALLOC_%s_CNT" (show x),
190 printf "%d" y)) sreason
192 printResults False ini_nl fin_nl _ allocs sreason = do
193 putStrLn "Normal (fixed-size) allocation results:"
194 printf " - %3d instances allocated\n" allocs :: IO ()
195 printFRScores ini_nl fin_nl sreason
197 -- | Prints the final @OK@ marker in machine readable output.
198 printFinal :: Bool -> IO ()
200 -- this should be the final entry
201 printKeys [("OK", "1")]
203 printFinal False = return ()
205 -- | Compute the tiered spec counts from a list of allocated
207 tieredSpecMap :: [Instance.Instance]
209 tieredSpecMap trl_ixes =
210 let fin_trl_ixes = reverse trl_ixes
211 ix_byspec = groupBy ((==) `on` Instance.specOf) fin_trl_ixes
212 spec_map = map (\ixs -> (Instance.specOf $ head ixs, length ixs))
216 -- | Formats a spec map to strings.
217 formatSpecMap :: [(RSpec, Int)] -> [String]
219 map (\(spec, cnt) -> printf "%d,%d,%d=%d" (rspecMem spec)
220 (rspecDsk spec) (rspecCpu spec) cnt)
222 -- | Formats \"key-metrics\" values.
223 formatRSpec :: Double -> String -> RSpec -> [(String, String)]
224 formatRSpec m_cpu s r =
225 [ ("KM_" ++ s ++ "_CPU", show $ rspecCpu r)
226 , ("KM_" ++ s ++ "_NPU", show $ fromIntegral (rspecCpu r) / m_cpu)
227 , ("KM_" ++ s ++ "_MEM", show $ rspecMem r)
228 , ("KM_" ++ s ++ "_DSK", show $ rspecDsk r)
231 -- | Shows allocations stats.
232 printAllocationStats :: Double -> Node.List -> Node.List -> IO ()
233 printAllocationStats m_cpu ini_nl fin_nl = do
234 let ini_stats = Cluster.totalResources ini_nl
235 fin_stats = Cluster.totalResources fin_nl
236 (rini, ralo, runa) = Cluster.computeAllocationDelta ini_stats fin_stats
237 printKeys $ formatRSpec m_cpu "USED" rini
238 printKeys $ formatRSpec m_cpu "POOL"ralo
239 printKeys $ formatRSpec m_cpu "UNAV" runa
241 -- | Ensure a value is quoted if needed.
242 ensureQuoted :: String -> String
243 ensureQuoted v = if not (all (\c -> isAlphaNum c || c == '.') v)
247 -- | Format a list of key\/values as a shell fragment.
248 printKeys :: [(String, String)] -> IO ()
249 printKeys = mapM_ (\(k, v) ->
250 printf "HTS_%s=%s\n" (map toUpper k) (ensureQuoted v))
252 -- | Converts instance data to a list of strings.
253 printInstance :: Node.List -> Instance.Instance -> [String]
254 printInstance nl i = [ Instance.name i
255 , Container.nameOf nl $ Instance.pNode i
256 , let sdx = Instance.sNode i
257 in if sdx == Node.noSecondary then ""
258 else Container.nameOf nl sdx
259 , show (Instance.mem i)
260 , show (Instance.dsk i)
261 , show (Instance.vcpus i)
264 -- | Optionally print the allocation map.
265 printAllocationMap :: Int -> String
266 -> Node.List -> [Instance.Instance] -> IO ()
267 printAllocationMap verbose msg nl ixes =
268 when (verbose > 1) $ do
269 hPutStrLn stderr (msg ++ " map")
270 hPutStr stderr . unlines . map ((:) ' ' . unwords) $
271 formatTable (map (printInstance nl) (reverse ixes))
272 -- This is the numberic-or-not field
273 -- specification; the first three fields are
274 -- strings, whereas the rest are numeric
275 [False, False, False, True, True, True]
277 -- | Formats nicely a list of resources.
278 formatResources :: a -> [(String, a->String)] -> String
279 formatResources res =
280 intercalate ", " . map (\(a, fn) -> a ++ " " ++ fn res)
282 -- | Print the cluster resources.
283 printCluster :: Bool -> Cluster.CStats -> Int -> IO ()
284 printCluster True ini_stats node_count = do
285 printKeys $ map (\(a, fn) -> ("CLUSTER_" ++ a, fn ini_stats)) clusterData
286 printKeys [("CLUSTER_NODES", printf "%d" node_count)]
287 printKeys $ printStats PInitial ini_stats
289 printCluster False ini_stats node_count = do
290 printf "The cluster has %d nodes and the following resources:\n %s.\n"
291 node_count (formatResources ini_stats clusterData)::IO ()
292 printf "There are %s initial instances on the cluster.\n"
293 (if inst_count > 0 then show inst_count else "no" )
294 where inst_count = Cluster.csNinst ini_stats
296 -- | Prints the normal instance spec.
297 printISpec :: Bool -> RSpec -> SpecType -> DiskTemplate -> IO ()
298 printISpec True ispec spec disk_template = do
299 printKeys $ map (\(a, fn) -> (prefix ++ "_" ++ a, fn ispec)) specData
300 printKeys [ (prefix ++ "_RQN", printf "%d" req_nodes) ]
301 printKeys [ (prefix ++ "_DISK_TEMPLATE",
302 diskTemplateToRaw disk_template) ]
303 where req_nodes = Instance.requiredNodes disk_template
304 prefix = specPrefix spec
306 printISpec False ispec spec disk_template =
307 printf "%s instance spec is:\n %s, using disk\
309 (specDescription spec)
310 (formatResources ispec specData) (diskTemplateToRaw disk_template)
312 -- | Prints the tiered results.
313 printTiered :: Bool -> [(RSpec, Int)] -> Double
314 -> Node.List -> Node.List -> [(FailMode, Int)] -> IO ()
315 printTiered True spec_map m_cpu nl trl_nl _ = do
316 printKeys $ printStats PTiered (Cluster.totalResources trl_nl)
317 printKeys [("TSPEC", unwords (formatSpecMap spec_map))]
318 printAllocationStats m_cpu nl trl_nl
320 printTiered False spec_map _ ini_nl fin_nl sreason = do
321 _ <- printf "Tiered allocation results:\n"
323 then putStrLn " - no instances allocated"
324 else mapM_ (\(ispec, cnt) ->
325 printf " - %3d instances of spec %s\n" cnt
326 (formatResources ispec specData)) spec_map
327 printFRScores ini_nl fin_nl sreason
329 -- | Displays the initial/final cluster scores.
330 printClusterScores :: Node.List -> Node.List -> IO ()
331 printClusterScores ini_nl fin_nl = do
332 printf " - initial cluster score: %.8f\n" $ Cluster.compCV ini_nl::IO ()
333 printf " - final cluster score: %.8f\n" $ Cluster.compCV fin_nl
335 -- | Displays the cluster efficiency.
336 printClusterEff :: Cluster.CStats -> IO ()
339 printf " - %s usage efficiency: %5.2f%%\n" s (fn cs * 100))
344 -- | Computes the most likely failure reason.
345 failureReason :: [(FailMode, Int)] -> String
346 failureReason = show . fst . head
348 -- | Sorts the failure reasons.
349 sortReasons :: [(FailMode, Int)] -> [(FailMode, Int)]
350 sortReasons = reverse . sortBy (comparing snd)
352 -- | Aborts the program if we get a bad value.
353 exitIfBad :: Result a -> IO a
355 hPrintf stderr "Failure: %s\n" s >> exitWith (ExitFailure 1)
356 exitIfBad (Ok v) = return v
358 -- | Runs an allocation algorithm and saves cluster state.
359 runAllocation :: ClusterData -- ^ Cluster data
360 -> Maybe Cluster.AllocResult -- ^ Optional stop-allocation
361 -> Result Cluster.AllocResult -- ^ Allocation result
362 -> RSpec -- ^ Requested instance spec
363 -> DiskTemplate -- ^ Requested disk template
364 -> SpecType -- ^ Allocation type
365 -> Options -- ^ CLI options
366 -> IO (FailStats, Node.List, Int, [(RSpec, Int)])
367 runAllocation cdata stop_allocation actual_result spec dt mode opts = do
368 (reasons, new_nl, new_il, new_ixes, _) <-
369 case stop_allocation of
370 Just result_noalloc -> return result_noalloc
371 Nothing -> exitIfBad actual_result
373 let name = head . words . specDescription $ mode
374 descr = name ++ " allocation"
375 ldescr = "after " ++ map toLower descr
377 printISpec (optMachineReadable opts) spec mode dt
379 printAllocationMap (optVerbose opts) descr new_nl new_ixes
381 maybePrintNodes (optShowNodes opts) descr (Cluster.printNodes new_nl)
383 maybeSaveData (optSaveCluster opts) (map toLower name) ldescr
384 (cdata { cdNodes = new_nl, cdInstances = new_il})
386 return (sortReasons reasons, new_nl, length new_ixes, tieredSpecMap new_ixes)
388 -- | Create an instance from a given spec.
389 instFromSpec :: RSpec -> DiskTemplate -> Instance.Instance
390 instFromSpec spx disk_template =
391 Instance.create "new" (rspecMem spx) (rspecDsk spx)
392 (rspecCpu spx) Running [] True (-1) (-1) disk_template
398 (opts, args) <- parseOpts cmd_args "hspace" options
400 unless (null args) $ do
401 hPutStrLn stderr "Error: this program doesn't take any arguments."
402 exitWith $ ExitFailure 1
404 let verbose = optVerbose opts
405 machine_r = optMachineReadable opts
407 orig_cdata@(ClusterData gl fixed_nl il _ ipol) <- loadExternalData opts
408 nl <- setNodeStatus opts fixed_nl
410 cluster_disk_template <-
411 case iPolicyDiskTemplates ipol of
412 first_templ:_ -> return first_templ
414 _ <- hPutStrLn stderr $ "Error: null list of disk templates\
415 \ received from cluster!"
416 exitWith $ ExitFailure 1
418 let num_instances = Container.size il
419 all_nodes = Container.elems fixed_nl
420 cdata = orig_cdata { cdNodes = fixed_nl }
421 disk_template = fromMaybe cluster_disk_template (optDiskTemplate opts)
422 req_nodes = Instance.requiredNodes disk_template
423 csf = commonSuffix fixed_nl il
425 when (not (null csf) && verbose > 1) $
426 hPrintf stderr "Note: Stripping common suffix of '%s' from names\n" csf
428 maybePrintNodes (optShowNodes opts) "Initial cluster" (Cluster.printNodes nl)
431 hPrintf stderr "Initial coefficients: overall %.8f, %s\n"
432 (Cluster.compCV nl) (Cluster.printStats nl)
434 printCluster machine_r (Cluster.totalResources nl) (length all_nodes)
436 let stop_allocation = case Cluster.computeBadItems nl il of
438 _ -> Just ([(FailN1, 1)]::FailStats, nl, il, [], [])
439 alloclimit = if optMaxLength opts == -1
441 else Just (optMaxLength opts)
443 allocnodes <- exitIfBad $ Cluster.genAllocNodes gl nl req_nodes True
445 -- Run the tiered allocation
447 let tspec = fromMaybe (rspecFromISpec (iPolicyMaxSpec ipol))
450 (treason, trl_nl, _, spec_map) <-
451 runAllocation cdata stop_allocation
452 (Cluster.tieredAlloc nl il alloclimit
453 (instFromSpec tspec disk_template) allocnodes [] [])
454 tspec disk_template SpecTiered opts
456 printTiered machine_r spec_map (optMcpu opts) nl trl_nl treason
458 -- Run the standard (avg-mode) allocation
460 let ispec = fromMaybe (rspecFromISpec (iPolicyStdSpec ipol))
463 (sreason, fin_nl, allocs, _) <-
464 runAllocation cdata stop_allocation
465 (Cluster.iterateAlloc nl il alloclimit
466 (instFromSpec ispec disk_template) allocnodes [] [])
467 ispec disk_template SpecNormal opts
469 printResults machine_r nl fin_nl num_instances allocs sreason
471 -- Print final result