Statistics
| Branch: | Tag: | Revision:

root / hspace.hs @ 8c4c6a8a

History | View | Annotate | Download (13.1 kB)

1
{-| Cluster space sizing
2

    
3
-}
4

    
5
{-
6

    
7
Copyright (C) 2009 Google Inc.
8

    
9
This program is free software; you can redistribute it and/or modify
10
it under the terms of the GNU General Public License as published by
11
the Free Software Foundation; either version 2 of the License, or
12
(at your option) any later version.
13

    
14
This program is distributed in the hope that it will be useful, but
15
WITHOUT ANY WARRANTY; without even the implied warranty of
16
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17
General Public License for more details.
18

    
19
You should have received a copy of the GNU General Public License
20
along with this program; if not, write to the Free Software
21
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22
02110-1301, USA.
23

    
24
-}
25

    
26
module Main (main) where
27

    
28
import Data.List
29
import Data.Function
30
import Monad
31
import System
32
import System.IO
33
import System.Console.GetOpt
34
import qualified System
35

    
36
import Text.Printf (printf, hPrintf)
37

    
38
import qualified Ganeti.HTools.Container as Container
39
import qualified Ganeti.HTools.Cluster as Cluster
40
import qualified Ganeti.HTools.Node as Node
41
import qualified Ganeti.HTools.Instance as Instance
42
import qualified Ganeti.HTools.CLI as CLI
43

    
44
import Ganeti.HTools.Utils
45
import Ganeti.HTools.Types
46

    
47
-- | Command line options structure.
48
data Options = Options
49
    { optShowNodes :: Bool           -- ^ Whether to show node status
50
    , optNodef     :: FilePath       -- ^ Path to the nodes file
51
    , optNodeSet   :: Bool           -- ^ The nodes have been set by options
52
    , optInstf     :: FilePath       -- ^ Path to the instances file
53
    , optInstSet   :: Bool           -- ^ The insts have been set by options
54
    , optMaster    :: String         -- ^ Collect data from RAPI
55
    , optVerbose   :: Int            -- ^ Verbosity level
56
    , optOffline   :: [String]       -- ^ Names of offline nodes
57
    , optIMem      :: Int            -- ^ Instance memory
58
    , optIDsk      :: Int            -- ^ Instance disk
59
    , optIVCPUs    :: Int            -- ^ Instance VCPUs
60
    , optINodes    :: Int            -- ^ Nodes required for an instance
61
    , optMcpu      :: Double         -- ^ Max cpu ratio for nodes
62
    , optMdsk      :: Double         -- ^ Max disk usage ratio for nodes
63
    , optShowVer   :: Bool           -- ^ Just show the program version
64
    , optShowHelp  :: Bool           -- ^ Just show the help
65
    } deriving Show
66

    
67
instance CLI.CLIOptions Options where
68
    showVersion = optShowVer
69
    showHelp    = optShowHelp
70

    
71
instance CLI.EToolOptions Options where
72
    nodeFile   = optNodef
73
    nodeSet    = optNodeSet
74
    instFile   = optInstf
75
    instSet    = optInstSet
76
    masterName = optMaster
77
    silent a   = optVerbose a == 0
78

    
79
-- | Default values for the command line options.
80
defaultOptions :: Options
81
defaultOptions  = Options
82
 { optShowNodes = False
83
 , optNodef     = "nodes"
84
 , optNodeSet   = False
85
 , optInstf     = "instances"
86
 , optInstSet   = False
87
 , optMaster    = ""
88
 , optVerbose   = 1
89
 , optOffline   = []
90
 , optIMem      = 4096
91
 , optIDsk      = 102400
92
 , optIVCPUs    = 1
93
 , optINodes    = 2
94
 , optMcpu      = -1
95
 , optMdsk      = -1
96
 , optShowVer   = False
97
 , optShowHelp  = False
98
 }
99

    
100
-- | Options list and functions
101
options :: [OptDescr (Options -> Options)]
102
options =
103
    [ Option ['p']     ["print-nodes"]
104
      (NoArg (\ opts -> opts { optShowNodes = True }))
105
      "print the final node list"
106
    , Option ['n']     ["nodes"]
107
      (ReqArg (\ f opts -> opts { optNodef = f, optNodeSet = True }) "FILE")
108
      "the node list FILE"
109
    , Option ['i']     ["instances"]
110
      (ReqArg (\ f opts -> opts { optInstf =  f, optInstSet = True }) "FILE")
111
      "the instance list FILE"
112
    , Option ['m']     ["master"]
113
      (ReqArg (\ m opts -> opts { optMaster = m }) "ADDRESS")
114
      "collect data via RAPI at the given ADDRESS"
115
    , Option ['v']     ["verbose"]
116
      (NoArg (\ opts -> opts { optVerbose = optVerbose opts + 1 }))
117
      "increase the verbosity level"
118
    , Option ['q']     ["quiet"]
119
      (NoArg (\ opts -> opts { optVerbose = optVerbose opts - 1 }))
120
      "decrease the verbosity level"
121
    , Option ['O']     ["offline"]
122
      (ReqArg (\ n opts -> opts { optOffline = n:optOffline opts }) "NODE")
123
      "set node as offline"
124
    , Option []        ["memory"]
125
      (ReqArg (\ m opts -> opts { optIMem = read m }) "MEMORY")
126
      "memory size for instances"
127
    , Option []        ["disk"]
128
      (ReqArg (\ d opts -> opts { optIDsk = read d }) "DISK")
129
      "disk size for instances"
130
    , Option []        ["vcpus"]
131
      (ReqArg (\ p opts -> opts { optIVCPUs = read p }) "NUM")
132
      "number of virtual cpus for instances"
133
    , Option []        ["req-nodes"]
134
      (ReqArg (\ n opts -> opts { optINodes = read n }) "NODES")
135
      "number of nodes for the new instances (1=plain, 2=mirrored)"
136
    , Option []        ["max-cpu"]
137
      (ReqArg (\ n opts -> opts { optMcpu = read n }) "RATIO")
138
      "maximum virtual-to-physical cpu ratio for nodes"
139
    , Option []        ["min-disk"]
140
      (ReqArg (\ n opts -> opts { optMdsk = read n }) "RATIO")
141
      "minimum free disk space for nodes (between 0 and 1)"
142
    , Option ['V']     ["version"]
143
      (NoArg (\ opts -> opts { optShowVer = True}))
144
      "show the version of the program"
145
    , Option ['h']     ["help"]
146
      (NoArg (\ opts -> opts { optShowHelp = True}))
147
      "show help"
148
    ]
149

    
150
-- | Build failure stats out of a list of failure reasons
151
concatFailure :: [(FailMode, Int)] -> FailMode -> [(FailMode, Int)]
152
concatFailure flst reason =
153
    let cval = lookup reason flst
154
    in case cval of
155
         Nothing -> (reason, 1):flst
156
         Just val -> let plain = filter (\(x, _) -> x /= reason) flst
157
                     in (reason, val+1):plain
158

    
159
-- | Build list of failures and placements out of an list of possible
160
-- | allocations
161
filterFails :: Cluster.AllocSolution
162
            -> ([(FailMode, Int)],
163
                [(Node.List, Instance.Instance, [Node.Node])])
164
filterFails sols =
165
    let (alst, blst) = unzip . map (\ (onl, i, nn) ->
166
                                        case onl of
167
                                          OpFail reason -> ([reason], [])
168
                                          OpGood gnl -> ([], [(gnl, i, nn)])
169
                                   ) $ sols
170
        aval = concat alst
171
        bval = concat blst
172
    in (foldl' concatFailure [] aval, bval)
173

    
174
-- | Get the placement with best score out of a list of possible placements
175
processResults :: [(Node.List, Instance.Instance, [Node.Node])]
176
               -> (Node.List, Instance.Instance, [Node.Node])
177
processResults sols =
178
    let sols' = map (\e@(nl', _, _) -> (Cluster.compCV  nl', e)) sols
179
        sols'' = sortBy (compare `on` fst) sols'
180
    in snd $ head sols''
181

    
182
-- | Recursively place instances on the cluster until we're out of space
183
iterateDepth :: Node.List
184
             -> Instance.List
185
             -> Instance.Instance
186
             -> Int
187
             -> [Instance.Instance]
188
             -> ([(FailMode, Int)], Node.List, [Instance.Instance])
189
iterateDepth nl il newinst nreq ixes =
190
      let depth = length ixes
191
          newname = printf "new-%d" depth::String
192
          newidx = length (Container.elems il) + depth
193
          newi2 = Instance.setIdx (Instance.setName newinst newname) newidx
194
          sols = Cluster.tryAlloc nl il newi2 nreq::
195
                 OpResult Cluster.AllocSolution
196
      in case sols of
197
           OpFail _ -> ([], nl, ixes)
198
           OpGood sols' ->
199
               let (errs, sols3) = filterFails sols'
200
               in if null sols3
201
                  then (errs, nl, ixes)
202
                  else let (xnl, xi, _) = processResults sols3
203
                       in iterateDepth xnl il newinst nreq (xi:ixes)
204

    
205
-- | Function to print stats for a given phase
206
printStats :: String -> Cluster.CStats -> IO ()
207
printStats kind cs = do
208
  printf "%s free RAM: %d\n" kind (Cluster.cs_fmem cs)
209
  printf "%s allocatable RAM: %d\n" kind (Cluster.cs_amem cs)
210
  printf "%s reserved RAM: %d\n" kind (Cluster.cs_fmem cs -
211
                                       Cluster.cs_amem cs)
212
  printf "%s instance RAM: %d\n" kind (Cluster.cs_imem cs)
213
  printf "%s overhead RAM: %d\n" kind (Cluster.cs_xmem cs + Cluster.cs_nmem cs)
214
  printf "%s RAM usage efficiency: %.8f\n"
215
         kind (fromIntegral (Cluster.cs_imem cs) / Cluster.cs_tmem cs)
216
  printf "%s free disk: %d\n" kind (Cluster.cs_fdsk cs)
217
  printf "%s allocatable disk: %d\n" kind (Cluster.cs_adsk cs)
218
  printf "%s reserved disk: %d\n" kind (Cluster.cs_fdsk cs -
219
                                        Cluster.cs_adsk cs)
220
  printf "%s instance disk: %d\n" kind (Cluster.cs_idsk cs)
221
  printf "%s disk usage efficiency: %.8f\n"
222
         kind (fromIntegral (Cluster.cs_idsk cs) / Cluster.cs_tdsk cs)
223
  printf "%s instance cpus: %d\n" kind (Cluster.cs_icpu cs)
224
  printf "%s cpu usage efficiency: %.8f\n"
225
         kind (fromIntegral (Cluster.cs_icpu cs) / Cluster.cs_tcpu cs)
226
  printf "%s max node allocatable RAM: %d\n" kind (Cluster.cs_mmem cs)
227
  printf "%s max node allocatable disk: %d\n" kind (Cluster.cs_mdsk cs)
228

    
229
-- | Print final stats and related metrics
230
printResults :: Node.List -> Int -> Int -> [(FailMode, Int)] -> IO ()
231
printResults fin_nl num_instances allocs sreason = do
232
  let fin_stats = Cluster.totalResources fin_nl
233
      fin_instances = num_instances + allocs
234

    
235
  printf "Final score: %.8f\n" (Cluster.compCV fin_nl)
236
  printf "Final instances: %d\n" (num_instances + allocs)
237
  printStats "Final" fin_stats
238
  printf "Usage: %.5f\n" ((fromIntegral num_instances::Double) /
239
                          fromIntegral fin_instances)
240
  printf "Allocations: %d\n" allocs
241
  putStr (unlines . map (\(x, y) -> printf "%s: %d" (show x) y) $ sreason)
242
  printf "Most likely fail reason: %s\n" (show . fst . head $ sreason)
243

    
244
-- | Main function.
245
main :: IO ()
246
main = do
247
  cmd_args <- System.getArgs
248
  (opts, args) <- CLI.parseOpts cmd_args "hspace" options defaultOptions
249

    
250
  unless (null args) $ do
251
         hPutStrLn stderr "Error: this program doesn't take any arguments."
252
         exitWith $ ExitFailure 1
253

    
254
  let verbose = optVerbose opts
255

    
256
  (fixed_nl, il, csf) <- CLI.loadExternalData opts
257

    
258
  printf "Spec RAM: %d\n" (optIMem opts)
259
  printf "Spec disk: %d\n" (optIDsk opts)
260
  printf "Spec CPUs: %d\n" (optIVCPUs opts)
261
  printf "Spec nodes: %d\n" (optINodes opts)
262

    
263
  let num_instances = length $ Container.elems il
264

    
265
  let offline_names = optOffline opts
266
      all_nodes = Container.elems fixed_nl
267
      all_names = map Node.name all_nodes
268
      offline_wrong = filter (flip notElem all_names) offline_names
269
      offline_indices = map Node.idx $
270
                        filter (\n -> elem (Node.name n) offline_names)
271
                               all_nodes
272
      req_nodes = optINodes opts
273
      m_cpu = optMcpu opts
274
      m_dsk = optMdsk opts
275

    
276
  when (length offline_wrong > 0) $ do
277
         hPrintf stderr "Error: Wrong node name(s) set as offline: %s\n"
278
                     (commaJoin offline_wrong)
279
         exitWith $ ExitFailure 1
280

    
281
  when (req_nodes /= 1 && req_nodes /= 2) $ do
282
         hPrintf stderr "Error: Invalid required nodes (%d)\n" req_nodes
283
         exitWith $ ExitFailure 1
284

    
285
  let nm = Container.map (\n -> if elem (Node.idx n) offline_indices
286
                                then Node.setOffline n True
287
                                else n) fixed_nl
288
      nl = Container.map (flip Node.setMdsk m_dsk . flip Node.setMcpu m_cpu)
289
           nm
290

    
291
  when (length csf > 0 && verbose > 1) $
292
       printf "Note: Stripping common suffix of '%s' from names\n" csf
293

    
294
  when (optShowNodes opts) $
295
       do
296
         putStrLn "Initial cluster status:"
297
         putStrLn $ Cluster.printNodes nl
298

    
299
  let ini_cv = Cluster.compCV nl
300
      ini_stats = Cluster.totalResources nl
301

    
302
  (if verbose > 2 then
303
       printf "Initial coefficients: overall %.8f, %s\n"
304
       ini_cv (Cluster.printStats nl)
305
   else
306
       printf "Initial score: %.8f\n" ini_cv)
307
  printf "Cluster RAM: %.0f\n" (Cluster.cs_tmem ini_stats)
308
  printf "Cluster disk: %.0f\n" (Cluster.cs_tdsk ini_stats)
309
  printf "Cluster cpus: %.0f\n" (Cluster.cs_tcpu ini_stats)
310
  printf "Initial instances: %d\n" num_instances
311
  printStats "Initial" ini_stats
312

    
313
  let bad_nodes = fst $ Cluster.computeBadItems nl il
314
  when (length bad_nodes > 0) $ do
315
         -- This is failn1 case, so we print the same final stats and
316
         -- exit early
317
         printResults nl num_instances 0 [(FailN1, 1)]
318
         exitWith ExitSuccess
319

    
320
  let nmlen = Container.maxNameLen nl
321
      newinst = Instance.create "new" (optIMem opts) (optIDsk opts)
322
                (optIVCPUs opts) "ADMIN_down" (-1) (-1)
323

    
324
  let (ereason, fin_nl, ixes) = iterateDepth nl il newinst req_nodes []
325
      allocs = length ixes
326
      fin_ixes = reverse ixes
327
      ix_namelen = maximum . map (length . Instance.name) $ fin_ixes
328
      sreason = reverse $ sortBy (compare `on` snd) ereason
329

    
330
  printResults fin_nl num_instances allocs sreason
331

    
332
  when (verbose > 1) $
333
         putStr . unlines . map (\i -> printf "Inst: %*s %-*s %-*s"
334
                     ix_namelen (Instance.name i)
335
                     nmlen (Container.nameOf fin_nl $ Instance.pnode i)
336
                     nmlen (let sdx = Instance.snode i
337
                            in if sdx == Node.noSecondary then ""
338
                               else Container.nameOf fin_nl sdx))
339
         $ fin_ixes
340

    
341
  when (optShowNodes opts) $
342
       do
343
         putStrLn ""
344
         putStrLn "Final cluster status:"
345
         putStrLn $ Cluster.printNodes fin_nl