Statistics
| Branch: | Tag: | Revision:

root / hspace.hs @ 8880d889

History | View | Annotate | Download (14.6 kB)

1
{-| Cluster space sizing
2

    
3
-}
4

    
5
{-
6

    
7
Copyright (C) 2009 Google Inc.
8

    
9
This program is free software; you can redistribute it and/or modify
10
it under the terms of the GNU General Public License as published by
11
the Free Software Foundation; either version 2 of the License, or
12
(at your option) any later version.
13

    
14
This program is distributed in the hope that it will be useful, but
15
WITHOUT ANY WARRANTY; without even the implied warranty of
16
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17
General Public License for more details.
18

    
19
You should have received a copy of the GNU General Public License
20
along with this program; if not, write to the Free Software
21
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22
02110-1301, USA.
23

    
24
-}
25

    
26
module Main (main) where
27

    
28
import Data.Char (toUpper)
29
import Data.List
30
import Data.Function
31
import Monad
32
import System
33
import System.IO
34
import System.Console.GetOpt
35
import qualified System
36

    
37
import Text.Printf (printf, hPrintf)
38

    
39
import qualified Ganeti.HTools.Container as Container
40
import qualified Ganeti.HTools.Cluster as Cluster
41
import qualified Ganeti.HTools.Node as Node
42
import qualified Ganeti.HTools.Instance as Instance
43
import qualified Ganeti.HTools.CLI as CLI
44

    
45
import Ganeti.HTools.Utils
46
import Ganeti.HTools.Types
47

    
48
-- | Command line options structure.
49
data Options = Options
50
    { optShowNodes :: Bool           -- ^ Whether to show node status
51
    , optNodef     :: FilePath       -- ^ Path to the nodes file
52
    , optNodeSet   :: Bool           -- ^ The nodes have been set by options
53
    , optInstf     :: FilePath       -- ^ Path to the instances file
54
    , optInstSet   :: Bool           -- ^ The insts have been set by options
55
    , optMaster    :: String         -- ^ Collect data from RAPI
56
    , optVerbose   :: Int            -- ^ Verbosity level
57
    , optOffline   :: [String]       -- ^ Names of offline nodes
58
    , optIMem      :: Int            -- ^ Instance memory
59
    , optIDsk      :: Int            -- ^ Instance disk
60
    , optIVCPUs    :: Int            -- ^ Instance VCPUs
61
    , optINodes    :: Int            -- ^ Nodes required for an instance
62
    , optMcpu      :: Double         -- ^ Max cpu ratio for nodes
63
    , optMdsk      :: Double         -- ^ Max disk usage ratio for nodes
64
    , optShowVer   :: Bool           -- ^ Just show the program version
65
    , optShowHelp  :: Bool           -- ^ Just show the help
66
    } deriving Show
67

    
68
instance CLI.CLIOptions Options where
69
    showVersion = optShowVer
70
    showHelp    = optShowHelp
71

    
72
instance CLI.EToolOptions Options where
73
    nodeFile   = optNodef
74
    nodeSet    = optNodeSet
75
    instFile   = optInstf
76
    instSet    = optInstSet
77
    masterName = optMaster
78
    silent a   = optVerbose a == 0
79

    
80
-- | Default values for the command line options.
81
defaultOptions :: Options
82
defaultOptions  = Options
83
 { optShowNodes = False
84
 , optNodef     = "nodes"
85
 , optNodeSet   = False
86
 , optInstf     = "instances"
87
 , optInstSet   = False
88
 , optMaster    = ""
89
 , optVerbose   = 1
90
 , optOffline   = []
91
 , optIMem      = 4096
92
 , optIDsk      = 102400
93
 , optIVCPUs    = 1
94
 , optINodes    = 2
95
 , optMcpu      = -1
96
 , optMdsk      = -1
97
 , optShowVer   = False
98
 , optShowHelp  = False
99
 }
100

    
101
-- | Options list and functions
102
options :: [OptDescr (Options -> Options)]
103
options =
104
    [ Option ['p']     ["print-nodes"]
105
      (NoArg (\ opts -> opts { optShowNodes = True }))
106
      "print the final node list"
107
    , Option ['n']     ["nodes"]
108
      (ReqArg (\ f opts -> opts { optNodef = f, optNodeSet = True }) "FILE")
109
      "the node list FILE"
110
    , Option ['i']     ["instances"]
111
      (ReqArg (\ f opts -> opts { optInstf =  f, optInstSet = True }) "FILE")
112
      "the instance list FILE"
113
    , Option ['m']     ["master"]
114
      (ReqArg (\ m opts -> opts { optMaster = m }) "ADDRESS")
115
      "collect data via RAPI at the given ADDRESS"
116
    , Option ['v']     ["verbose"]
117
      (NoArg (\ opts -> opts { optVerbose = optVerbose opts + 1 }))
118
      "increase the verbosity level"
119
    , Option ['q']     ["quiet"]
120
      (NoArg (\ opts -> opts { optVerbose = optVerbose opts - 1 }))
121
      "decrease the verbosity level"
122
    , Option ['O']     ["offline"]
123
      (ReqArg (\ n opts -> opts { optOffline = n:optOffline opts }) "NODE")
124
      "set node as offline"
125
    , Option []        ["memory"]
126
      (ReqArg (\ m opts -> opts { optIMem = read m }) "MEMORY")
127
      "memory size for instances"
128
    , Option []        ["disk"]
129
      (ReqArg (\ d opts -> opts { optIDsk = read d }) "DISK")
130
      "disk size for instances"
131
    , Option []        ["vcpus"]
132
      (ReqArg (\ p opts -> opts { optIVCPUs = read p }) "NUM")
133
      "number of virtual cpus for instances"
134
    , Option []        ["req-nodes"]
135
      (ReqArg (\ n opts -> opts { optINodes = read n }) "NODES")
136
      "number of nodes for the new instances (1=plain, 2=mirrored)"
137
    , Option []        ["max-cpu"]
138
      (ReqArg (\ n opts -> opts { optMcpu = read n }) "RATIO")
139
      "maximum virtual-to-physical cpu ratio for nodes"
140
    , Option []        ["min-disk"]
141
      (ReqArg (\ n opts -> opts { optMdsk = read n }) "RATIO")
142
      "minimum free disk space for nodes (between 0 and 1)"
143
    , Option ['V']     ["version"]
144
      (NoArg (\ opts -> opts { optShowVer = True}))
145
      "show the version of the program"
146
    , Option ['h']     ["help"]
147
      (NoArg (\ opts -> opts { optShowHelp = True}))
148
      "show help"
149
    ]
150

    
151
data Phase = PInitial | PFinal
152

    
153
statsData :: [(String, Cluster.CStats -> String)]
154
statsData = [ ("SCORE", printf "%.8f" . Cluster.cs_score)
155
            , ("INST_CNT", printf "%d" . Cluster.cs_ninst)
156
            , ("MEM_FREE", printf "%d" . Cluster.cs_fmem)
157
            , ("MEM_AVAIL", printf "%d" . Cluster.cs_amem)
158
            , ("MEM_RESVD",
159
               \cs -> printf "%d" (Cluster.cs_fmem cs - Cluster.cs_amem cs))
160
            , ("MEM_INST", printf "%d" . Cluster.cs_imem)
161
            , ("MEM_OVERHEAD",
162
               \cs -> printf "%d" (Cluster.cs_xmem cs + Cluster.cs_nmem cs))
163
            , ("MEM_EFF",
164
               \cs -> printf "%.8f" (fromIntegral (Cluster.cs_imem cs) /
165
                                     Cluster.cs_tmem cs))
166
            , ("DSK_FREE", printf "%d" . Cluster.cs_fdsk)
167
            , ("DSK_AVAIL", printf "%d ". Cluster.cs_adsk)
168
            , ("DSK_RESVD",
169
               \cs -> printf "%d" (Cluster.cs_fdsk cs - Cluster.cs_adsk cs))
170
            , ("DSK_INST", printf "%d" . Cluster.cs_idsk)
171
            , ("DSK_EFF",
172
               \cs -> printf "%.8f" (fromIntegral (Cluster.cs_idsk cs) /
173
                                    Cluster.cs_tdsk cs))
174
            , ("CPU_INST", printf "%d" . Cluster.cs_icpu)
175
            , ("CPU_EFF",
176
               \cs -> printf "%.8f" (fromIntegral (Cluster.cs_icpu cs) /
177
                                     Cluster.cs_tcpu cs))
178
            , ("MNODE_MEM_AVAIL", printf "%d" . Cluster.cs_mmem)
179
            , ("MNODE_DSK_AVAIL", printf "%d" . Cluster.cs_mdsk)
180
            ]
181

    
182
specData :: [(String, Options -> String)]
183
specData = [ ("MEM", printf "%d" . optIMem)
184
           , ("DSK", printf "%d" . optIDsk)
185
           , ("CPU", printf "%d" . optIVCPUs)
186
           , ("RQN", printf "%d" . optINodes)
187
           ]
188

    
189
clusterData :: [(String, Cluster.CStats -> String)]
190
clusterData = [ ("MEM", printf "%.0f" . Cluster.cs_tmem)
191
              , ("DSK", printf "%.0f" . Cluster.cs_tdsk)
192
              , ("CPU", printf "%.0f" . Cluster.cs_tcpu)
193
              ]
194

    
195
-- | Build failure stats out of a list of failure reasons
196
concatFailure :: [(FailMode, Int)] -> FailMode -> [(FailMode, Int)]
197
concatFailure flst reason =
198
    let cval = lookup reason flst
199
    in case cval of
200
         Nothing -> (reason, 1):flst
201
         Just val -> let plain = filter (\(x, _) -> x /= reason) flst
202
                     in (reason, val+1):plain
203

    
204
-- | Build list of failures and placements out of an list of possible
205
-- | allocations
206
filterFails :: Cluster.AllocSolution
207
            -> ([(FailMode, Int)],
208
                [(Node.List, Instance.Instance, [Node.Node])])
209
filterFails sols =
210
    let (alst, blst) = unzip . map (\ e  ->
211
                                        case e of
212
                                          OpFail reason -> ([reason], [])
213
                                          OpGood (gnl, i, nn) ->
214
                                              ([], [(gnl, i, nn)])
215
                                   ) $ sols
216
        aval = concat alst
217
        bval = concat blst
218
    in (foldl' concatFailure [(x, 0) | x <- [minBound..maxBound]] aval, bval)
219

    
220
-- | Get the placement with best score out of a list of possible placements
221
processResults :: [(Node.List, Instance.Instance, [Node.Node])]
222
               -> (Node.List, Instance.Instance, [Node.Node])
223
processResults sols =
224
    let sols' = map (\e@(nl', _, _) -> (Cluster.compCV  nl', e)) sols
225
        sols'' = sortBy (compare `on` fst) sols'
226
    in snd $ head sols''
227

    
228
-- | Recursively place instances on the cluster until we're out of space
229
iterateDepth :: Node.List
230
             -> Instance.List
231
             -> Instance.Instance
232
             -> Int
233
             -> [Instance.Instance]
234
             -> ([(FailMode, Int)], Node.List, [Instance.Instance])
235
iterateDepth nl il newinst nreq ixes =
236
      let depth = length ixes
237
          newname = printf "new-%d" depth::String
238
          newidx = length (Container.elems il) + depth
239
          newi2 = Instance.setIdx (Instance.setName newinst newname) newidx
240
          sols = Cluster.tryAlloc nl il newi2 nreq::
241
                 OpResult Cluster.AllocSolution
242
      in case sols of
243
           OpFail _ -> ([], nl, ixes)
244
           OpGood sols' ->
245
               let (errs, sols3) = filterFails sols'
246
               in if null sols3
247
                  then (errs, nl, ixes)
248
                  else let (xnl, xi, _) = processResults sols3
249
                       in iterateDepth xnl il newinst nreq (xi:ixes)
250

    
251
-- | Function to print stats for a given phase
252
printStats :: Phase -> Cluster.CStats -> [(String, String)]
253
printStats ph cs =
254
  map (\(s, fn) -> (printf "%s_%s" kind s, fn cs)) statsData
255
  where kind = case ph of
256
                 PInitial -> "INI"
257
                 PFinal -> "FIN"
258

    
259
-- | Print final stats and related metrics
260
printResults :: Node.List -> Int -> Int -> [(FailMode, Int)] -> IO ()
261
printResults fin_nl num_instances allocs sreason = do
262
  let fin_stats = Cluster.totalResources fin_nl
263
      fin_instances = num_instances + allocs
264

    
265
  when (num_instances + allocs /= Cluster.cs_ninst fin_stats) $
266
       do
267
         hPrintf stderr "ERROR: internal inconsistency, allocated (%d)\
268
                        \ != counted (%d)\n" (num_instances + allocs)
269
                                 (Cluster.cs_ninst fin_stats)
270
         exitWith $ ExitFailure 1
271

    
272
  printKeys $ printStats PFinal fin_stats
273
  printKeys [ ("ALLOC_USAGE", printf "%.8f"
274
                                ((fromIntegral num_instances::Double) /
275
                                 fromIntegral fin_instances))
276
            , ("ALLOC_COUNT", printf "%d" allocs)
277
            , ("ALLOC_FAIL_REASON", map toUpper . show . fst $ head sreason)
278
            ]
279
  printKeys $ map (\(x, y) -> (printf "ALLOC_%s_CNT" (show x),
280
                               printf "%d" y)) sreason
281
  -- this should be the final entry
282
  printKeys [("OK", "1")]
283

    
284
-- | Format a list of key/values as a shell fragment
285
printKeys :: [(String, String)] -> IO ()
286
printKeys = mapM_ (\(k, v) -> printf "HTS_%s=%s\n" (map toUpper k) v)
287

    
288
-- | Main function.
289
main :: IO ()
290
main = do
291
  cmd_args <- System.getArgs
292
  (opts, args) <- CLI.parseOpts cmd_args "hspace" options defaultOptions
293

    
294
  unless (null args) $ do
295
         hPutStrLn stderr "Error: this program doesn't take any arguments."
296
         exitWith $ ExitFailure 1
297

    
298
  let verbose = optVerbose opts
299

    
300
  (fixed_nl, il, csf) <- CLI.loadExternalData opts
301

    
302
  printKeys $ map (\(a, fn) -> ("SPEC_" ++ a, fn opts)) specData
303

    
304
  let num_instances = length $ Container.elems il
305

    
306
  let offline_names = optOffline opts
307
      all_nodes = Container.elems fixed_nl
308
      all_names = map Node.name all_nodes
309
      offline_wrong = filter (flip notElem all_names) offline_names
310
      offline_indices = map Node.idx $
311
                        filter (\n -> elem (Node.name n) offline_names)
312
                               all_nodes
313
      req_nodes = optINodes opts
314
      m_cpu = optMcpu opts
315
      m_dsk = optMdsk opts
316

    
317
  when (length offline_wrong > 0) $ do
318
         hPrintf stderr "Error: Wrong node name(s) set as offline: %s\n"
319
                     (commaJoin offline_wrong)
320
         exitWith $ ExitFailure 1
321

    
322
  when (req_nodes /= 1 && req_nodes /= 2) $ do
323
         hPrintf stderr "Error: Invalid required nodes (%d)\n" req_nodes
324
         exitWith $ ExitFailure 1
325

    
326
  let nm = Container.map (\n -> if elem (Node.idx n) offline_indices
327
                                then Node.setOffline n True
328
                                else n) fixed_nl
329
      nl = Container.map (flip Node.setMdsk m_dsk . flip Node.setMcpu m_cpu)
330
           nm
331

    
332
  when (length csf > 0 && verbose > 1) $
333
       hPrintf stderr "Note: Stripping common suffix of '%s' from names\n" csf
334

    
335
  when (optShowNodes opts) $
336
       do
337
         hPutStrLn stderr "Initial cluster status:"
338
         hPutStrLn stderr $ Cluster.printNodes nl
339

    
340
  let ini_cv = Cluster.compCV nl
341
      ini_stats = Cluster.totalResources nl
342

    
343
  when (verbose > 2) $ do
344
         hPrintf stderr "Initial coefficients: overall %.8f, %s\n"
345
                 ini_cv (Cluster.printStats nl)
346

    
347
  printKeys $ map (\(a, fn) -> ("CLUSTER_" ++ a, fn ini_stats)) clusterData
348
  printKeys [("CLUSTER_NODES", printf "%d" (length all_nodes))]
349
  printKeys $ printStats PInitial ini_stats
350

    
351
  let bad_nodes = fst $ Cluster.computeBadItems nl il
352
  when (length bad_nodes > 0) $ do
353
         -- This is failn1 case, so we print the same final stats and
354
         -- exit early
355
         printResults nl num_instances 0 [(FailN1, 1)]
356
         exitWith ExitSuccess
357

    
358
  let nmlen = Container.maxNameLen nl
359
      newinst = Instance.create "new" (optIMem opts) (optIDsk opts)
360
                (optIVCPUs opts) "ADMIN_down" (-1) (-1)
361

    
362
  let (ereason, fin_nl, ixes) = iterateDepth nl il newinst req_nodes []
363
      allocs = length ixes
364
      fin_ixes = reverse ixes
365
      ix_namelen = maximum . map (length . Instance.name) $ fin_ixes
366
      sreason = reverse $ sortBy (compare `on` snd) ereason
367

    
368
  when (verbose > 1) $
369
         hPutStr stderr . unlines $
370
         map (\i -> printf "Inst: %*s %-*s %-*s"
371
                    ix_namelen (Instance.name i)
372
                    nmlen (Container.nameOf fin_nl $ Instance.pnode i)
373
                    nmlen (let sdx = Instance.snode i
374
                           in if sdx == Node.noSecondary then ""
375
                              else Container.nameOf fin_nl sdx)
376
             ) fin_ixes
377

    
378
  when (optShowNodes opts) $
379
       do
380
         hPutStrLn stderr ""
381
         hPutStrLn stderr "Final cluster status:"
382
         hPutStrLn stderr $ Cluster.printNodes fin_nl
383

    
384
  printResults fin_nl num_instances allocs sreason