Statistics
| Branch: | Tag: | Revision:

root / htools / Ganeti / HTools / Program / Hspace.hs @ 2c9336a4

History | View | Annotate | Download (17.3 kB)

1
{-| Cluster space sizing
2

    
3
-}
4

    
5
{-
6

    
7
Copyright (C) 2009, 2010, 2011 Google Inc.
8

    
9
This program is free software; you can redistribute it and/or modify
10
it under the terms of the GNU General Public License as published by
11
the Free Software Foundation; either version 2 of the License, or
12
(at your option) any later version.
13

    
14
This program is distributed in the hope that it will be useful, but
15
WITHOUT ANY WARRANTY; without even the implied warranty of
16
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17
General Public License for more details.
18

    
19
You should have received a copy of the GNU General Public License
20
along with this program; if not, write to the Free Software
21
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22
02110-1301, USA.
23

    
24
-}
25

    
26
module Ganeti.HTools.Program.Hspace (main) where
27

    
28
import Control.Monad
29
import Data.Char (toUpper, isAlphaNum)
30
import Data.Function (on)
31
import Data.List
32
import Data.Maybe (isJust, fromJust)
33
import Data.Ord (comparing)
34
import System (exitWith, ExitCode(..))
35
import System.IO
36
import qualified System
37

    
38
import Text.Printf (printf, hPrintf)
39

    
40
import qualified Ganeti.HTools.Container as Container
41
import qualified Ganeti.HTools.Cluster as Cluster
42
import qualified Ganeti.HTools.Node as Node
43
import qualified Ganeti.HTools.Instance as Instance
44

    
45
import Ganeti.HTools.Utils
46
import Ganeti.HTools.Types
47
import Ganeti.HTools.CLI
48
import Ganeti.HTools.ExtLoader
49
import Ganeti.HTools.Loader
50

    
51
-- | Options list and functions.
52
options :: [OptType]
53
options =
54
    [ oPrintNodes
55
    , oDataFile
56
    , oDiskTemplate
57
    , oNodeSim
58
    , oRapiMaster
59
    , oLuxiSocket
60
    , oVerbose
61
    , oQuiet
62
    , oOfflineNode
63
    , oIMem
64
    , oIDisk
65
    , oIVcpus
66
    , oMachineReadable
67
    , oMaxCpu
68
    , oMaxSolLength
69
    , oMinDisk
70
    , oTieredSpec
71
    , oSaveCluster
72
    , oShowVer
73
    , oShowHelp
74
    ]
75

    
76
-- | The allocation phase we're in (initial, after tiered allocs, or
77
-- after regular allocation).
78
data Phase = PInitial
79
           | PFinal
80
           | PTiered
81

    
82
-- | The kind of instance spec we print.
83
data SpecType = SpecNormal
84
              | SpecTiered
85

    
86
-- | What we prefix a spec with.
87
specPrefix :: SpecType -> String
88
specPrefix SpecNormal = "SPEC"
89
specPrefix SpecTiered = "TSPEC_INI"
90

    
91
-- | The description of a spec.
92
specDescription :: SpecType -> String
93
specDescription SpecNormal = "Normal (fixed-size)"
94
specDescription SpecTiered = "Tiered (initial size)"
95

    
96
-- | Efficiency generic function.
97
effFn :: (Cluster.CStats -> Integer)
98
      -> (Cluster.CStats -> Double)
99
      -> Cluster.CStats -> Double
100
effFn fi ft cs = fromIntegral (fi cs) / ft cs
101

    
102
-- | Memory efficiency.
103
memEff :: Cluster.CStats -> Double
104
memEff = effFn Cluster.csImem Cluster.csTmem
105

    
106
-- | Disk efficiency.
107
dskEff :: Cluster.CStats -> Double
108
dskEff = effFn Cluster.csIdsk Cluster.csTdsk
109

    
110
-- | Cpu efficiency.
111
cpuEff :: Cluster.CStats -> Double
112
cpuEff = effFn Cluster.csIcpu (fromIntegral . Cluster.csVcpu)
113

    
114
-- | Holds data for converting a 'Cluster.CStats' structure into
115
-- detailed statictics.
116
statsData :: [(String, Cluster.CStats -> String)]
117
statsData = [ ("SCORE", printf "%.8f" . Cluster.csScore)
118
            , ("INST_CNT", printf "%d" . Cluster.csNinst)
119
            , ("MEM_FREE", printf "%d" . Cluster.csFmem)
120
            , ("MEM_AVAIL", printf "%d" . Cluster.csAmem)
121
            , ("MEM_RESVD",
122
               \cs -> printf "%d" (Cluster.csFmem cs - Cluster.csAmem cs))
123
            , ("MEM_INST", printf "%d" . Cluster.csImem)
124
            , ("MEM_OVERHEAD",
125
               \cs -> printf "%d" (Cluster.csXmem cs + Cluster.csNmem cs))
126
            , ("MEM_EFF", printf "%.8f" . memEff)
127
            , ("DSK_FREE", printf "%d" . Cluster.csFdsk)
128
            , ("DSK_AVAIL", printf "%d". Cluster.csAdsk)
129
            , ("DSK_RESVD",
130
               \cs -> printf "%d" (Cluster.csFdsk cs - Cluster.csAdsk cs))
131
            , ("DSK_INST", printf "%d" . Cluster.csIdsk)
132
            , ("DSK_EFF", printf "%.8f" . dskEff)
133
            , ("CPU_INST", printf "%d" . Cluster.csIcpu)
134
            , ("CPU_EFF", printf "%.8f" . cpuEff)
135
            , ("MNODE_MEM_AVAIL", printf "%d" . Cluster.csMmem)
136
            , ("MNODE_DSK_AVAIL", printf "%d" . Cluster.csMdsk)
137
            ]
138

    
139
-- | List holding 'RSpec' formatting information.
140
specData :: [(String, RSpec -> String)]
141
specData = [ ("MEM", printf "%d" . rspecMem)
142
           , ("DSK", printf "%d" . rspecDsk)
143
           , ("CPU", printf "%d" . rspecCpu)
144
           ]
145

    
146
-- | List holding 'Cluster.CStats' formatting information.
147
clusterData :: [(String, Cluster.CStats -> String)]
148
clusterData = [ ("MEM", printf "%.0f" . Cluster.csTmem)
149
              , ("DSK", printf "%.0f" . Cluster.csTdsk)
150
              , ("CPU", printf "%.0f" . Cluster.csTcpu)
151
              , ("VCPU", printf "%d" . Cluster.csVcpu)
152
              ]
153

    
154
-- | Function to print stats for a given phase.
155
printStats :: Phase -> Cluster.CStats -> [(String, String)]
156
printStats ph cs =
157
  map (\(s, fn) -> (printf "%s_%s" kind s, fn cs)) statsData
158
  where kind = case ph of
159
                 PInitial -> "INI"
160
                 PFinal -> "FIN"
161
                 PTiered -> "TRL"
162

    
163
-- | Print final stats and related metrics.
164
printResults :: Bool -> Node.List -> Node.List -> Int -> Int
165
             -> [(FailMode, Int)] -> IO ()
166
printResults True _ fin_nl num_instances allocs sreason = do
167
  let fin_stats = Cluster.totalResources fin_nl
168
      fin_instances = num_instances + allocs
169

    
170
  when (num_instances + allocs /= Cluster.csNinst fin_stats) $
171
       do
172
         hPrintf stderr "ERROR: internal inconsistency, allocated (%d)\
173
                        \ != counted (%d)\n" (num_instances + allocs)
174
                                 (Cluster.csNinst fin_stats) :: IO ()
175
         exitWith $ ExitFailure 1
176

    
177
  printKeys $ printStats PFinal fin_stats
178
  printKeys [ ("ALLOC_USAGE", printf "%.8f"
179
                                ((fromIntegral num_instances::Double) /
180
                                 fromIntegral fin_instances))
181
            , ("ALLOC_INSTANCES", printf "%d" allocs)
182
            , ("ALLOC_FAIL_REASON", map toUpper . show . fst $ head sreason)
183
            ]
184
  printKeys $ map (\(x, y) -> (printf "ALLOC_%s_CNT" (show x),
185
                               printf "%d" y)) sreason
186

    
187
printResults False ini_nl fin_nl _ allocs sreason = do
188
  putStrLn "Normal (fixed-size) allocation results:"
189
  printf "  - %3d instances allocated\n" allocs :: IO ()
190
  printf "  - most likely failure reason: %s\n" $ failureReason sreason::IO ()
191
  printClusterScores ini_nl fin_nl
192
  printClusterEff (Cluster.totalResources fin_nl)
193

    
194
-- | Prints the final @OK@ marker in machine readable output.
195
printFinal :: Bool -> IO ()
196
printFinal True =
197
  -- this should be the final entry
198
  printKeys [("OK", "1")]
199

    
200
printFinal False = return ()
201

    
202
-- | Compute the tiered spec counts from a list of allocated
203
-- instances.
204
tieredSpecMap :: [Instance.Instance]
205
              -> [(RSpec, Int)]
206
tieredSpecMap trl_ixes =
207
    let fin_trl_ixes = reverse trl_ixes
208
        ix_byspec = groupBy ((==) `on` Instance.specOf) fin_trl_ixes
209
        spec_map = map (\ixs -> (Instance.specOf $ head ixs, length ixs))
210
                   ix_byspec
211
    in spec_map
212

    
213
-- | Formats a spec map to strings.
214
formatSpecMap :: [(RSpec, Int)] -> [String]
215
formatSpecMap =
216
    map (\(spec, cnt) -> printf "%d,%d,%d=%d" (rspecMem spec)
217
                         (rspecDsk spec) (rspecCpu spec) cnt)
218

    
219
-- | Formats \"key-metrics\" values.
220
formatRSpec :: Double -> String -> RSpec -> [(String, String)]
221
formatRSpec m_cpu s r =
222
    [ ("KM_" ++ s ++ "_CPU", show $ rspecCpu r)
223
    , ("KM_" ++ s ++ "_NPU", show $ fromIntegral (rspecCpu r) / m_cpu)
224
    , ("KM_" ++ s ++ "_MEM", show $ rspecMem r)
225
    , ("KM_" ++ s ++ "_DSK", show $ rspecDsk r)
226
    ]
227

    
228
-- | Shows allocations stats.
229
printAllocationStats :: Double -> Node.List -> Node.List -> IO ()
230
printAllocationStats m_cpu ini_nl fin_nl = do
231
  let ini_stats = Cluster.totalResources ini_nl
232
      fin_stats = Cluster.totalResources fin_nl
233
      (rini, ralo, runa) = Cluster.computeAllocationDelta ini_stats fin_stats
234
  printKeys $ formatRSpec m_cpu  "USED" rini
235
  printKeys $ formatRSpec m_cpu "POOL"ralo
236
  printKeys $ formatRSpec m_cpu "UNAV" runa
237

    
238
-- | Ensure a value is quoted if needed.
239
ensureQuoted :: String -> String
240
ensureQuoted v = if not (all (\c -> isAlphaNum c || c == '.') v)
241
                 then '\'':v ++ "'"
242
                 else v
243

    
244
-- | Format a list of key\/values as a shell fragment.
245
printKeys :: [(String, String)] -> IO ()
246
printKeys = mapM_ (\(k, v) ->
247
                   printf "HTS_%s=%s\n" (map toUpper k) (ensureQuoted v))
248

    
249
-- | Converts instance data to a list of strings.
250
printInstance :: Node.List -> Instance.Instance -> [String]
251
printInstance nl i = [ Instance.name i
252
                     , Container.nameOf nl $ Instance.pNode i
253
                     , let sdx = Instance.sNode i
254
                       in if sdx == Node.noSecondary then ""
255
                          else Container.nameOf nl sdx
256
                     , show (Instance.mem i)
257
                     , show (Instance.dsk i)
258
                     , show (Instance.vcpus i)
259
                     ]
260

    
261
-- | Optionally print the allocation map.
262
printAllocationMap :: Int -> String
263
                   -> Node.List -> [Instance.Instance] -> IO ()
264
printAllocationMap verbose msg nl ixes =
265
  when (verbose > 1) $ do
266
    hPutStrLn stderr msg
267
    hPutStr stderr . unlines . map ((:) ' ' .  intercalate " ") $
268
            formatTable (map (printInstance nl) (reverse ixes))
269
                        -- This is the numberic-or-not field
270
                        -- specification; the first three fields are
271
                        -- strings, whereas the rest are numeric
272
                       [False, False, False, True, True, True]
273

    
274
-- | Formats nicely a list of resources.
275
formatResources :: a -> [(String, a->String)] -> String
276
formatResources res =
277
    intercalate ", " . map (\(a, fn) -> a ++ " " ++ fn res)
278

    
279
-- | Print the cluster resources.
280
printCluster :: Bool -> Cluster.CStats -> Int -> IO ()
281
printCluster True ini_stats node_count = do
282
  printKeys $ map (\(a, fn) -> ("CLUSTER_" ++ a, fn ini_stats)) clusterData
283
  printKeys [("CLUSTER_NODES", printf "%d" node_count)]
284
  printKeys $ printStats PInitial ini_stats
285

    
286
printCluster False ini_stats node_count = do
287
  printf "The cluster has %d nodes and the following resources:\n  %s.\n"
288
         node_count (formatResources ini_stats clusterData)::IO ()
289
  printf "There are %s initial instances on the cluster.\n"
290
             (if inst_count > 0 then show inst_count else "no" )
291
      where inst_count = Cluster.csNinst ini_stats
292

    
293
-- | Prints the normal instance spec.
294
printISpec :: Bool -> RSpec -> SpecType -> DiskTemplate -> IO ()
295
printISpec True ispec spec disk_template = do
296
  printKeys $ map (\(a, fn) -> (prefix ++ "_" ++ a, fn ispec)) specData
297
  printKeys [ (prefix ++ "_RQN", printf "%d" req_nodes) ]
298
  printKeys [ (prefix ++ "_DISK_TEMPLATE",
299
               diskTemplateToString disk_template) ]
300
      where req_nodes = Instance.requiredNodes disk_template
301
            prefix = specPrefix spec
302

    
303
printISpec False ispec spec disk_template =
304
  printf "%s instance spec is:\n  %s, using disk\
305
         \ template '%s'.\n"
306
         (specDescription spec)
307
         (formatResources ispec specData) (diskTemplateToString disk_template)
308

    
309
-- | Prints the tiered results.
310
printTiered :: Bool -> [(RSpec, Int)] -> Double
311
            -> Node.List -> Node.List -> [(FailMode, Int)] -> IO ()
312
printTiered True spec_map m_cpu nl trl_nl _ = do
313
  printKeys $ printStats PTiered (Cluster.totalResources trl_nl)
314
  printKeys [("TSPEC", intercalate " " (formatSpecMap spec_map))]
315
  printAllocationStats m_cpu nl trl_nl
316

    
317
printTiered False spec_map _ ini_nl fin_nl sreason = do
318
  _ <- printf "Tiered allocation results:\n"
319
  mapM_ (\(ispec, cnt) ->
320
             printf "  - %3d instances of spec %s\n" cnt
321
                        (formatResources ispec specData)) spec_map
322
  printf "  - most likely failure reason: %s\n" $ failureReason sreason::IO ()
323
  printClusterScores ini_nl fin_nl
324
  printClusterEff (Cluster.totalResources fin_nl)
325

    
326
-- | Displays the initial/final cluster scores.
327
printClusterScores :: Node.List -> Node.List -> IO ()
328
printClusterScores ini_nl fin_nl = do
329
  printf "  - initial cluster score: %.8f\n" $ Cluster.compCV ini_nl::IO ()
330
  printf "  -   final cluster score: %.8f\n" $ Cluster.compCV fin_nl
331

    
332
-- | Displays the cluster efficiency.
333
printClusterEff :: Cluster.CStats -> IO ()
334
printClusterEff cs =
335
    mapM_ (\(s, fn) ->
336
               printf "  - %s usage efficiency: %5.2f%%\n" s (fn cs * 100))
337
          [("memory", memEff),
338
           ("  disk", dskEff),
339
           ("  vcpu", cpuEff)]
340

    
341
-- | Computes the most likely failure reason.
342
failureReason :: [(FailMode, Int)] -> String
343
failureReason = show . fst . head
344

    
345
-- | Sorts the failure reasons.
346
sortReasons :: [(FailMode, Int)] -> [(FailMode, Int)]
347
sortReasons = reverse . sortBy (comparing snd)
348

    
349
-- | Main function.
350
main :: IO ()
351
main = do
352
  cmd_args <- System.getArgs
353
  (opts, args) <- parseOpts cmd_args "hspace" options
354

    
355
  unless (null args) $ do
356
         hPutStrLn stderr "Error: this program doesn't take any arguments."
357
         exitWith $ ExitFailure 1
358

    
359
  let verbose = optVerbose opts
360
      ispec = optISpec opts
361
      shownodes = optShowNodes opts
362
      disk_template = optDiskTemplate opts
363
      req_nodes = Instance.requiredNodes disk_template
364
      machine_r = optMachineReadable opts
365

    
366
  (ClusterData gl fixed_nl il ctags) <- loadExternalData opts
367

    
368
  let num_instances = length $ Container.elems il
369

    
370
  let offline_passed = optOffline opts
371
      all_nodes = Container.elems fixed_nl
372
      offline_lkp = map (lookupName (map Node.name all_nodes)) offline_passed
373
      offline_wrong = filter (not . goodLookupResult) offline_lkp
374
      offline_names = map lrContent offline_lkp
375
      offline_indices = map Node.idx $
376
                        filter (\n -> Node.name n `elem` offline_names)
377
                               all_nodes
378
      m_cpu = optMcpu opts
379
      m_dsk = optMdsk opts
380

    
381
  when (not (null offline_wrong)) $ do
382
         hPrintf stderr "Error: Wrong node name(s) set as offline: %s\n"
383
                     (commaJoin (map lrContent offline_wrong)) :: IO ()
384
         exitWith $ ExitFailure 1
385

    
386
  when (req_nodes /= 1 && req_nodes /= 2) $ do
387
         hPrintf stderr "Error: Invalid required nodes (%d)\n"
388
                                            req_nodes :: IO ()
389
         exitWith $ ExitFailure 1
390

    
391
  let nm = Container.map (\n -> if Node.idx n `elem` offline_indices
392
                                then Node.setOffline n True
393
                                else n) fixed_nl
394
      nl = Container.map (flip Node.setMdsk m_dsk . flip Node.setMcpu m_cpu)
395
           nm
396
      csf = commonSuffix fixed_nl il
397

    
398
  when (length csf > 0 && verbose > 1) $
399
       hPrintf stderr "Note: Stripping common suffix of '%s' from names\n" csf
400

    
401
  when (isJust shownodes) $
402
       do
403
         hPutStrLn stderr "Initial cluster status:"
404
         hPutStrLn stderr $ Cluster.printNodes nl (fromJust shownodes)
405

    
406
  let ini_cv = Cluster.compCV nl
407
      ini_stats = Cluster.totalResources nl
408

    
409
  when (verbose > 2) $
410
         hPrintf stderr "Initial coefficients: overall %.8f, %s\n"
411
                 ini_cv (Cluster.printStats nl)
412

    
413
  printCluster machine_r ini_stats (length all_nodes)
414

    
415
  printISpec machine_r ispec SpecNormal disk_template
416

    
417
  let bad_nodes = fst $ Cluster.computeBadItems nl il
418
      stop_allocation = length bad_nodes > 0
419
      result_noalloc = ([(FailN1, 1)]::FailStats, nl, il, [], [])
420

    
421
  -- utility functions
422
  let iofspec spx = Instance.create "new" (rspecMem spx) (rspecDsk spx)
423
                    (rspecCpu spx) "running" [] True (-1) (-1) disk_template
424
      exitifbad val = (case val of
425
                         Bad s -> do
426
                           hPrintf stderr "Failure: %s\n" s :: IO ()
427
                           exitWith $ ExitFailure 1
428
                         Ok x -> return x)
429

    
430

    
431
  let reqinst = iofspec ispec
432
      alloclimit = if optMaxLength opts == -1
433
                   then Nothing
434
                   else Just (optMaxLength opts)
435

    
436
  allocnodes <- exitifbad $ Cluster.genAllocNodes gl nl req_nodes True
437

    
438
  -- Run the tiered allocation, if enabled
439

    
440
  (case optTieredSpec opts of
441
     Nothing -> return ()
442
     Just tspec -> do
443
       (treason, trl_nl, trl_il, trl_ixes, _) <-
444
           if stop_allocation
445
           then return result_noalloc
446
           else exitifbad (Cluster.tieredAlloc nl il alloclimit (iofspec tspec)
447
                                  allocnodes [] [])
448
       let spec_map' = tieredSpecMap trl_ixes
449
           treason' = sortReasons treason
450

    
451
       printAllocationMap verbose "Tiered allocation map" trl_nl trl_ixes
452

    
453
       maybePrintNodes shownodes "Tiered allocation"
454
                           (Cluster.printNodes trl_nl)
455

    
456
       maybeSaveData (optSaveCluster opts) "tiered" "after tiered allocation"
457
                     (ClusterData gl trl_nl trl_il ctags)
458

    
459
       printISpec machine_r tspec SpecTiered disk_template
460

    
461
       printTiered machine_r spec_map' m_cpu nl trl_nl treason'
462
       )
463

    
464
  -- Run the standard (avg-mode) allocation
465

    
466
  (ereason, fin_nl, fin_il, ixes, _) <-
467
      if stop_allocation
468
      then return result_noalloc
469
      else exitifbad (Cluster.iterateAlloc nl il alloclimit
470
                      reqinst allocnodes [] [])
471

    
472
  let allocs = length ixes
473
      sreason = sortReasons ereason
474

    
475
  printAllocationMap verbose "Standard allocation map" fin_nl ixes
476

    
477
  maybePrintNodes shownodes "Standard allocation" (Cluster.printNodes fin_nl)
478

    
479
  maybeSaveData (optSaveCluster opts) "alloc" "after standard allocation"
480
       (ClusterData gl fin_nl fin_il ctags)
481

    
482
  printResults machine_r nl fin_nl num_instances allocs sreason
483

    
484
  printFinal machine_r