Statistics
| Branch: | Tag: | Revision:

root / htools / Ganeti / HTools / Program / Hspace.hs @ 375969eb

History | View | Annotate | Download (16.7 kB)

1
{-| Cluster space sizing
2

    
3
-}
4

    
5
{-
6

    
7
Copyright (C) 2009, 2010, 2011 Google Inc.
8

    
9
This program is free software; you can redistribute it and/or modify
10
it under the terms of the GNU General Public License as published by
11
the Free Software Foundation; either version 2 of the License, or
12
(at your option) any later version.
13

    
14
This program is distributed in the hope that it will be useful, but
15
WITHOUT ANY WARRANTY; without even the implied warranty of
16
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17
General Public License for more details.
18

    
19
You should have received a copy of the GNU General Public License
20
along with this program; if not, write to the Free Software
21
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22
02110-1301, USA.
23

    
24
-}
25

    
26
module Ganeti.HTools.Program.Hspace (main) where
27

    
28
import Control.Monad
29
import Data.Char (toUpper, isAlphaNum)
30
import Data.Function (on)
31
import Data.List
32
import Data.Maybe (isJust, fromJust)
33
import Data.Ord (comparing)
34
import System (exitWith, ExitCode(..))
35
import System.IO
36
import qualified System
37

    
38
import Text.Printf (printf, hPrintf)
39

    
40
import qualified Ganeti.HTools.Container as Container
41
import qualified Ganeti.HTools.Cluster as Cluster
42
import qualified Ganeti.HTools.Node as Node
43
import qualified Ganeti.HTools.Instance as Instance
44

    
45
import Ganeti.HTools.Utils
46
import Ganeti.HTools.Types
47
import Ganeti.HTools.CLI
48
import Ganeti.HTools.ExtLoader
49
import Ganeti.HTools.Loader
50

    
51
-- | Options list and functions
52
options :: [OptType]
53
options =
54
    [ oPrintNodes
55
    , oDataFile
56
    , oDiskTemplate
57
    , oNodeSim
58
    , oRapiMaster
59
    , oLuxiSocket
60
    , oVerbose
61
    , oQuiet
62
    , oOfflineNode
63
    , oIMem
64
    , oIDisk
65
    , oIVcpus
66
    , oMachineReadable
67
    , oMaxCpu
68
    , oMinDisk
69
    , oTieredSpec
70
    , oSaveCluster
71
    , oShowVer
72
    , oShowHelp
73
    ]
74

    
75
-- | The allocation phase we're in (initial, after tiered allocs, or
76
-- after regular allocation).
77
data Phase = PInitial
78
           | PFinal
79
           | PTiered
80

    
81
-- | The kind of instance spec we print.
82
data SpecType = SpecNormal
83
              | SpecTiered
84

    
85
-- | What we prefix a spec with.
86
specPrefix :: SpecType -> String
87
specPrefix SpecNormal = "SPEC"
88
specPrefix SpecTiered = "TSPEC_INI"
89

    
90
-- | The description of a spec.
91
specDescription :: SpecType -> String
92
specDescription SpecNormal = "Normal (fixed-size)"
93
specDescription SpecTiered = "Tiered (initial size)"
94

    
95
-- | Efficiency generic function.
96
effFn :: (Cluster.CStats -> Integer)
97
      -> (Cluster.CStats -> Double)
98
      -> (Cluster.CStats -> Double)
99
effFn fi ft cs = fromIntegral (fi cs) / ft cs
100

    
101
-- | Memory efficiency.
102
memEff :: Cluster.CStats -> Double
103
memEff = effFn Cluster.csImem Cluster.csTmem
104

    
105
-- | Disk efficiency.
106
dskEff :: Cluster.CStats -> Double
107
dskEff = effFn Cluster.csIdsk Cluster.csTdsk
108

    
109
-- | Cpu efficiency.
110
cpuEff :: Cluster.CStats -> Double
111
cpuEff = effFn Cluster.csIcpu (fromIntegral . Cluster.csVcpu)
112

    
113
statsData :: [(String, Cluster.CStats -> String)]
114
statsData = [ ("SCORE", printf "%.8f" . Cluster.csScore)
115
            , ("INST_CNT", printf "%d" . Cluster.csNinst)
116
            , ("MEM_FREE", printf "%d" . Cluster.csFmem)
117
            , ("MEM_AVAIL", printf "%d" . Cluster.csAmem)
118
            , ("MEM_RESVD",
119
               \cs -> printf "%d" (Cluster.csFmem cs - Cluster.csAmem cs))
120
            , ("MEM_INST", printf "%d" . Cluster.csImem)
121
            , ("MEM_OVERHEAD",
122
               \cs -> printf "%d" (Cluster.csXmem cs + Cluster.csNmem cs))
123
            , ("MEM_EFF", printf "%.8f" . memEff)
124
            , ("DSK_FREE", printf "%d" . Cluster.csFdsk)
125
            , ("DSK_AVAIL", printf "%d". Cluster.csAdsk)
126
            , ("DSK_RESVD",
127
               \cs -> printf "%d" (Cluster.csFdsk cs - Cluster.csAdsk cs))
128
            , ("DSK_INST", printf "%d" . Cluster.csIdsk)
129
            , ("DSK_EFF", printf "%.8f" . dskEff)
130
            , ("CPU_INST", printf "%d" . Cluster.csIcpu)
131
            , ("CPU_EFF", printf "%.8f" . cpuEff)
132
            , ("MNODE_MEM_AVAIL", printf "%d" . Cluster.csMmem)
133
            , ("MNODE_DSK_AVAIL", printf "%d" . Cluster.csMdsk)
134
            ]
135

    
136
specData :: [(String, RSpec -> String)]
137
specData = [ ("MEM", printf "%d" . rspecMem)
138
           , ("DSK", printf "%d" . rspecDsk)
139
           , ("CPU", printf "%d" . rspecCpu)
140
           ]
141

    
142
clusterData :: [(String, Cluster.CStats -> String)]
143
clusterData = [ ("MEM", printf "%.0f" . Cluster.csTmem)
144
              , ("DSK", printf "%.0f" . Cluster.csTdsk)
145
              , ("CPU", printf "%.0f" . Cluster.csTcpu)
146
              , ("VCPU", printf "%d" . Cluster.csVcpu)
147
              ]
148

    
149
-- | Function to print stats for a given phase
150
printStats :: Phase -> Cluster.CStats -> [(String, String)]
151
printStats ph cs =
152
  map (\(s, fn) -> (printf "%s_%s" kind s, fn cs)) statsData
153
  where kind = case ph of
154
                 PInitial -> "INI"
155
                 PFinal -> "FIN"
156
                 PTiered -> "TRL"
157

    
158
-- | Print final stats and related metrics.
159
printResults :: Bool -> Node.List -> Node.List -> Int -> Int
160
             -> [(FailMode, Int)] -> IO ()
161
printResults True _ fin_nl num_instances allocs sreason = do
162
  let fin_stats = Cluster.totalResources fin_nl
163
      fin_instances = num_instances + allocs
164

    
165
  when (num_instances + allocs /= Cluster.csNinst fin_stats) $
166
       do
167
         hPrintf stderr "ERROR: internal inconsistency, allocated (%d)\
168
                        \ != counted (%d)\n" (num_instances + allocs)
169
                                 (Cluster.csNinst fin_stats) :: IO ()
170
         exitWith $ ExitFailure 1
171

    
172
  printKeys $ printStats PFinal fin_stats
173
  printKeys [ ("ALLOC_USAGE", printf "%.8f"
174
                                ((fromIntegral num_instances::Double) /
175
                                 fromIntegral fin_instances))
176
            , ("ALLOC_INSTANCES", printf "%d" allocs)
177
            , ("ALLOC_FAIL_REASON", map toUpper . show . fst $ head sreason)
178
            ]
179
  printKeys $ map (\(x, y) -> (printf "ALLOC_%s_CNT" (show x),
180
                               printf "%d" y)) sreason
181

    
182
printResults False ini_nl fin_nl _ allocs sreason = do
183
  putStrLn "Normal (fixed-size) allocation results:"
184
  printf "  - %3d instances allocated\n" allocs :: IO ()
185
  printf "  - most likely failure reason: %s\n" $ failureReason sreason::IO ()
186
  printClusterScores ini_nl fin_nl
187
  printClusterEff (Cluster.totalResources fin_nl)
188

    
189
-- | Prints the final @OK@ marker in machine readable output.
190
printFinal :: Bool -> IO ()
191
printFinal True =
192
  -- this should be the final entry
193
  printKeys [("OK", "1")]
194

    
195
printFinal False = return ()
196

    
197
-- | Compute the tiered spec counts from a list of allocated
198
-- instances.
199
tieredSpecMap :: [Instance.Instance]
200
              -> [(RSpec, Int)]
201
tieredSpecMap trl_ixes =
202
    let fin_trl_ixes = reverse trl_ixes
203
        ix_byspec = groupBy ((==) `on` Instance.specOf) fin_trl_ixes
204
        spec_map = map (\ixs -> (Instance.specOf $ head ixs, length ixs))
205
                   ix_byspec
206
    in spec_map
207

    
208
-- | Formats a spec map to strings.
209
formatSpecMap :: [(RSpec, Int)] -> [String]
210
formatSpecMap =
211
    map (\(spec, cnt) -> printf "%d,%d,%d=%d" (rspecMem spec)
212
                         (rspecDsk spec) (rspecCpu spec) cnt)
213

    
214
formatRSpec :: Double -> String -> RSpec -> [(String, String)]
215
formatRSpec m_cpu s r =
216
    [ ("KM_" ++ s ++ "_CPU", show $ rspecCpu r)
217
    , ("KM_" ++ s ++ "_NPU", show $ fromIntegral (rspecCpu r) / m_cpu)
218
    , ("KM_" ++ s ++ "_MEM", show $ rspecMem r)
219
    , ("KM_" ++ s ++ "_DSK", show $ rspecDsk r)
220
    ]
221

    
222
printAllocationStats :: Double -> Node.List -> Node.List -> IO ()
223
printAllocationStats m_cpu ini_nl fin_nl = do
224
  let ini_stats = Cluster.totalResources ini_nl
225
      fin_stats = Cluster.totalResources fin_nl
226
      (rini, ralo, runa) = Cluster.computeAllocationDelta ini_stats fin_stats
227
  printKeys $ formatRSpec m_cpu  "USED" rini
228
  printKeys $ formatRSpec m_cpu "POOL"ralo
229
  printKeys $ formatRSpec m_cpu "UNAV" runa
230

    
231
-- | Ensure a value is quoted if needed
232
ensureQuoted :: String -> String
233
ensureQuoted v = if not (all (\c -> isAlphaNum c || c == '.') v)
234
                 then '\'':v ++ "'"
235
                 else v
236

    
237
-- | Format a list of key\/values as a shell fragment
238
printKeys :: [(String, String)] -> IO ()
239
printKeys = mapM_ (\(k, v) ->
240
                   printf "HTS_%s=%s\n" (map toUpper k) (ensureQuoted v))
241

    
242
printInstance :: Node.List -> Instance.Instance -> [String]
243
printInstance nl i = [ Instance.name i
244
                     , Container.nameOf nl $ Instance.pNode i
245
                     , let sdx = Instance.sNode i
246
                       in if sdx == Node.noSecondary then ""
247
                          else Container.nameOf nl sdx
248
                     , show (Instance.mem i)
249
                     , show (Instance.dsk i)
250
                     , show (Instance.vcpus i)
251
                     ]
252

    
253
-- | Optionally print the allocation map
254
printAllocationMap :: Int -> String
255
                   -> Node.List -> [Instance.Instance] -> IO ()
256
printAllocationMap verbose msg nl ixes =
257
  when (verbose > 1) $ do
258
    hPutStrLn stderr msg
259
    hPutStr stderr . unlines . map ((:) ' ' .  intercalate " ") $
260
            formatTable (map (printInstance nl) (reverse ixes))
261
                        -- This is the numberic-or-not field
262
                        -- specification; the first three fields are
263
                        -- strings, whereas the rest are numeric
264
                       [False, False, False, True, True, True]
265

    
266
-- | Formats nicely a list of resources.
267
formatResources :: a -> [(String, (a->String))] -> String
268
formatResources res =
269
    intercalate ", " . map (\(a, fn) -> a ++ " " ++ fn res)
270

    
271
-- | Print the cluster resources.
272
printCluster :: Bool -> Cluster.CStats -> Int -> IO ()
273
printCluster True ini_stats node_count = do
274
  printKeys $ map (\(a, fn) -> ("CLUSTER_" ++ a, fn ini_stats)) clusterData
275
  printKeys [("CLUSTER_NODES", printf "%d" node_count)]
276
  printKeys $ printStats PInitial ini_stats
277

    
278
printCluster False ini_stats node_count = do
279
  printf "The cluster has %d nodes and the following resources:\n  %s.\n"
280
         node_count (formatResources ini_stats clusterData)::IO ()
281
  printf "There are %s initial instances on the cluster.\n"
282
             (if inst_count > 0 then show inst_count else "no" )
283
      where inst_count = Cluster.csNinst ini_stats
284

    
285
-- | Prints the normal instance spec.
286
printISpec :: Bool -> RSpec -> SpecType -> DiskTemplate -> IO ()
287
printISpec True ispec spec disk_template = do
288
  printKeys $ map (\(a, fn) -> (prefix ++ "_" ++ a, fn ispec)) specData
289
  printKeys [ (prefix ++ "_RQN", printf "%d" req_nodes) ]
290
  printKeys [ (prefix ++ "_DISK_TEMPLATE", dtToString disk_template) ]
291
      where req_nodes = Instance.requiredNodes disk_template
292
            prefix = specPrefix spec
293

    
294
printISpec False ispec spec disk_template = do
295
  printf "%s instance spec is:\n  %s, using disk\
296
         \ template '%s'.\n"
297
         (specDescription spec)
298
         (formatResources ispec specData) (dtToString disk_template)
299

    
300
-- | Prints the tiered results.
301
printTiered :: Bool -> [(RSpec, Int)] -> Double
302
            -> Node.List -> Node.List -> [(FailMode, Int)] -> IO ()
303
printTiered True spec_map m_cpu nl trl_nl _ = do
304
  printKeys $ printStats PTiered (Cluster.totalResources trl_nl)
305
  printKeys [("TSPEC", intercalate " " (formatSpecMap spec_map))]
306
  printAllocationStats m_cpu nl trl_nl
307

    
308
printTiered False spec_map _ ini_nl fin_nl sreason = do
309
  _ <- printf "Tiered allocation results:\n"
310
  mapM_ (\(ispec, cnt) ->
311
             printf "  - %3d instances of spec %s\n" cnt
312
                        (formatResources ispec specData)) spec_map
313
  printf "  - most likely failure reason: %s\n" $ failureReason sreason::IO ()
314
  printClusterScores ini_nl fin_nl
315
  printClusterEff (Cluster.totalResources fin_nl)
316

    
317
printClusterScores :: Node.List -> Node.List -> IO ()
318
printClusterScores ini_nl fin_nl = do
319
  printf "  - initial cluster score: %.8f\n" $ Cluster.compCV ini_nl::IO ()
320
  printf "  -   final cluster score: %.8f\n" $ Cluster.compCV fin_nl
321

    
322
printClusterEff :: Cluster.CStats -> IO ()
323
printClusterEff cs =
324
    mapM_ (\(s, fn) ->
325
               printf "  - %s usage efficiency: %5.2f%%\n" s (fn cs * 100))
326
          [("memory", memEff),
327
           ("  disk", dskEff),
328
           ("  vcpu", cpuEff)]
329

    
330
-- | Computes the most likely failure reason.
331
failureReason :: [(FailMode, Int)] -> String
332
failureReason = show . fst . head
333

    
334
-- | Sorts the failure reasons.
335
sortReasons :: [(FailMode, Int)] -> [(FailMode, Int)]
336
sortReasons = reverse . sortBy (comparing snd)
337

    
338
-- | Main function.
339
main :: IO ()
340
main = do
341
  cmd_args <- System.getArgs
342
  (opts, args) <- parseOpts cmd_args "hspace" options
343

    
344
  unless (null args) $ do
345
         hPutStrLn stderr "Error: this program doesn't take any arguments."
346
         exitWith $ ExitFailure 1
347

    
348
  let verbose = optVerbose opts
349
      ispec = optISpec opts
350
      shownodes = optShowNodes opts
351
      disk_template = optDiskTemplate opts
352
      req_nodes = Instance.requiredNodes disk_template
353
      machine_r = optMachineReadable opts
354

    
355
  (ClusterData gl fixed_nl il ctags) <- loadExternalData opts
356

    
357
  let num_instances = length $ Container.elems il
358

    
359
  let offline_passed = optOffline opts
360
      all_nodes = Container.elems fixed_nl
361
      offline_lkp = map (lookupName (map Node.name all_nodes)) offline_passed
362
      offline_wrong = filter (not . goodLookupResult) offline_lkp
363
      offline_names = map lrContent offline_lkp
364
      offline_indices = map Node.idx $
365
                        filter (\n -> Node.name n `elem` offline_names)
366
                               all_nodes
367
      m_cpu = optMcpu opts
368
      m_dsk = optMdsk opts
369

    
370
  when (not (null offline_wrong)) $ do
371
         hPrintf stderr "Error: Wrong node name(s) set as offline: %s\n"
372
                     (commaJoin (map lrContent offline_wrong)) :: IO ()
373
         exitWith $ ExitFailure 1
374

    
375
  when (req_nodes /= 1 && req_nodes /= 2) $ do
376
         hPrintf stderr "Error: Invalid required nodes (%d)\n"
377
                                            req_nodes :: IO ()
378
         exitWith $ ExitFailure 1
379

    
380
  let nm = Container.map (\n -> if Node.idx n `elem` offline_indices
381
                                then Node.setOffline n True
382
                                else n) fixed_nl
383
      nl = Container.map (flip Node.setMdsk m_dsk . flip Node.setMcpu m_cpu)
384
           nm
385
      csf = commonSuffix fixed_nl il
386

    
387
  when (length csf > 0 && verbose > 1) $
388
       hPrintf stderr "Note: Stripping common suffix of '%s' from names\n" csf
389

    
390
  when (isJust shownodes) $
391
       do
392
         hPutStrLn stderr "Initial cluster status:"
393
         hPutStrLn stderr $ Cluster.printNodes nl (fromJust shownodes)
394

    
395
  let ini_cv = Cluster.compCV nl
396
      ini_stats = Cluster.totalResources nl
397

    
398
  when (verbose > 2) $
399
         hPrintf stderr "Initial coefficients: overall %.8f, %s\n"
400
                 ini_cv (Cluster.printStats nl)
401

    
402
  printCluster machine_r ini_stats (length all_nodes)
403

    
404
  printISpec machine_r ispec SpecNormal disk_template
405

    
406
  let bad_nodes = fst $ Cluster.computeBadItems nl il
407
      stop_allocation = length bad_nodes > 0
408
      result_noalloc = ([(FailN1, 1)]::FailStats, nl, il, [], [])
409

    
410
  -- utility functions
411
  let iofspec spx = Instance.create "new" (rspecMem spx) (rspecDsk spx)
412
                    (rspecCpu spx) "running" [] True (-1) (-1) disk_template
413
      exitifbad val = (case val of
414
                         Bad s -> do
415
                           hPrintf stderr "Failure: %s\n" s :: IO ()
416
                           exitWith $ ExitFailure 1
417
                         Ok x -> return x)
418

    
419

    
420
  let reqinst = iofspec ispec
421

    
422
  allocnodes <- exitifbad $ Cluster.genAllocNodes gl nl req_nodes True
423

    
424
  -- Run the tiered allocation, if enabled
425

    
426
  (case optTieredSpec opts of
427
     Nothing -> return ()
428
     Just tspec -> do
429
       (treason, trl_nl, trl_il, trl_ixes, _) <-
430
           if stop_allocation
431
           then return result_noalloc
432
           else exitifbad (Cluster.tieredAlloc nl il Nothing (iofspec tspec)
433
                                  allocnodes [] [])
434
       let spec_map' = tieredSpecMap trl_ixes
435
           treason' = sortReasons treason
436

    
437
       printAllocationMap verbose "Tiered allocation map" trl_nl trl_ixes
438

    
439
       maybePrintNodes shownodes "Tiered allocation"
440
                           (Cluster.printNodes trl_nl)
441

    
442
       maybeSaveData (optSaveCluster opts) "tiered" "after tiered allocation"
443
                     (ClusterData gl trl_nl trl_il ctags)
444

    
445
       printISpec machine_r tspec SpecTiered disk_template
446

    
447
       printTiered machine_r spec_map' m_cpu nl trl_nl treason'
448
       )
449

    
450
  -- Run the standard (avg-mode) allocation
451

    
452
  (ereason, fin_nl, fin_il, ixes, _) <-
453
      if stop_allocation
454
      then return result_noalloc
455
      else exitifbad (Cluster.iterateAlloc nl il Nothing
456
                      reqinst allocnodes [] [])
457

    
458
  let allocs = length ixes
459
      sreason = sortReasons ereason
460

    
461
  printAllocationMap verbose "Standard allocation map" fin_nl ixes
462

    
463
  maybePrintNodes shownodes "Standard allocation" (Cluster.printNodes fin_nl)
464

    
465
  maybeSaveData (optSaveCluster opts) "alloc" "after standard allocation"
466
       (ClusterData gl fin_nl fin_il ctags)
467

    
468
  printResults machine_r nl fin_nl num_instances allocs sreason
469

    
470
  printFinal machine_r