Statistics
| Branch: | Tag: | Revision:

root / hspace.hs @ 31e7ac17

History | View | Annotate | Download (13.3 kB)

1
{-| Cluster space sizing
2

    
3
-}
4

    
5
{-
6

    
7
Copyright (C) 2009 Google Inc.
8

    
9
This program is free software; you can redistribute it and/or modify
10
it under the terms of the GNU General Public License as published by
11
the Free Software Foundation; either version 2 of the License, or
12
(at your option) any later version.
13

    
14
This program is distributed in the hope that it will be useful, but
15
WITHOUT ANY WARRANTY; without even the implied warranty of
16
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17
General Public License for more details.
18

    
19
You should have received a copy of the GNU General Public License
20
along with this program; if not, write to the Free Software
21
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22
02110-1301, USA.
23

    
24
-}
25

    
26
module Main (main) where
27

    
28
import Data.Char (toUpper)
29
import Data.List
30
import Data.Function
31
import Monad
32
import System
33
import System.IO
34
import System.Console.GetOpt
35
import qualified System
36

    
37
import Text.Printf (printf, hPrintf)
38

    
39
import qualified Ganeti.HTools.Container as Container
40
import qualified Ganeti.HTools.Cluster as Cluster
41
import qualified Ganeti.HTools.Node as Node
42
import qualified Ganeti.HTools.Instance as Instance
43
import qualified Ganeti.HTools.CLI as CLI
44

    
45
import Ganeti.HTools.Utils
46
import Ganeti.HTools.Types
47

    
48
-- | Command line options structure.
49
data Options = Options
50
    { optShowNodes :: Bool           -- ^ Whether to show node status
51
    , optNodef     :: FilePath       -- ^ Path to the nodes file
52
    , optNodeSet   :: Bool           -- ^ The nodes have been set by options
53
    , optInstf     :: FilePath       -- ^ Path to the instances file
54
    , optInstSet   :: Bool           -- ^ The insts have been set by options
55
    , optMaster    :: String         -- ^ Collect data from RAPI
56
    , optVerbose   :: Int            -- ^ Verbosity level
57
    , optOffline   :: [String]       -- ^ Names of offline nodes
58
    , optIMem      :: Int            -- ^ Instance memory
59
    , optIDsk      :: Int            -- ^ Instance disk
60
    , optIVCPUs    :: Int            -- ^ Instance VCPUs
61
    , optINodes    :: Int            -- ^ Nodes required for an instance
62
    , optMcpu      :: Double         -- ^ Max cpu ratio for nodes
63
    , optMdsk      :: Double         -- ^ Max disk usage ratio for nodes
64
    , optShowVer   :: Bool           -- ^ Just show the program version
65
    , optShowHelp  :: Bool           -- ^ Just show the help
66
    } deriving Show
67

    
68
instance CLI.CLIOptions Options where
69
    showVersion = optShowVer
70
    showHelp    = optShowHelp
71

    
72
instance CLI.EToolOptions Options where
73
    nodeFile   = optNodef
74
    nodeSet    = optNodeSet
75
    instFile   = optInstf
76
    instSet    = optInstSet
77
    masterName = optMaster
78
    silent a   = optVerbose a == 0
79

    
80
-- | Default values for the command line options.
81
defaultOptions :: Options
82
defaultOptions  = Options
83
 { optShowNodes = False
84
 , optNodef     = "nodes"
85
 , optNodeSet   = False
86
 , optInstf     = "instances"
87
 , optInstSet   = False
88
 , optMaster    = ""
89
 , optVerbose   = 1
90
 , optOffline   = []
91
 , optIMem      = 4096
92
 , optIDsk      = 102400
93
 , optIVCPUs    = 1
94
 , optINodes    = 2
95
 , optMcpu      = -1
96
 , optMdsk      = -1
97
 , optShowVer   = False
98
 , optShowHelp  = False
99
 }
100

    
101
-- | Options list and functions
102
options :: [OptDescr (Options -> Options)]
103
options =
104
    [ Option ['p']     ["print-nodes"]
105
      (NoArg (\ opts -> opts { optShowNodes = True }))
106
      "print the final node list"
107
    , Option ['n']     ["nodes"]
108
      (ReqArg (\ f opts -> opts { optNodef = f, optNodeSet = True }) "FILE")
109
      "the node list FILE"
110
    , Option ['i']     ["instances"]
111
      (ReqArg (\ f opts -> opts { optInstf =  f, optInstSet = True }) "FILE")
112
      "the instance list FILE"
113
    , Option ['m']     ["master"]
114
      (ReqArg (\ m opts -> opts { optMaster = m }) "ADDRESS")
115
      "collect data via RAPI at the given ADDRESS"
116
    , Option ['v']     ["verbose"]
117
      (NoArg (\ opts -> opts { optVerbose = optVerbose opts + 1 }))
118
      "increase the verbosity level"
119
    , Option ['q']     ["quiet"]
120
      (NoArg (\ opts -> opts { optVerbose = optVerbose opts - 1 }))
121
      "decrease the verbosity level"
122
    , Option ['O']     ["offline"]
123
      (ReqArg (\ n opts -> opts { optOffline = n:optOffline opts }) "NODE")
124
      "set node as offline"
125
    , Option []        ["memory"]
126
      (ReqArg (\ m opts -> opts { optIMem = read m }) "MEMORY")
127
      "memory size for instances"
128
    , Option []        ["disk"]
129
      (ReqArg (\ d opts -> opts { optIDsk = read d }) "DISK")
130
      "disk size for instances"
131
    , Option []        ["vcpus"]
132
      (ReqArg (\ p opts -> opts { optIVCPUs = read p }) "NUM")
133
      "number of virtual cpus for instances"
134
    , Option []        ["req-nodes"]
135
      (ReqArg (\ n opts -> opts { optINodes = read n }) "NODES")
136
      "number of nodes for the new instances (1=plain, 2=mirrored)"
137
    , Option []        ["max-cpu"]
138
      (ReqArg (\ n opts -> opts { optMcpu = read n }) "RATIO")
139
      "maximum virtual-to-physical cpu ratio for nodes"
140
    , Option []        ["min-disk"]
141
      (ReqArg (\ n opts -> opts { optMdsk = read n }) "RATIO")
142
      "minimum free disk space for nodes (between 0 and 1)"
143
    , Option ['V']     ["version"]
144
      (NoArg (\ opts -> opts { optShowVer = True}))
145
      "show the version of the program"
146
    , Option ['h']     ["help"]
147
      (NoArg (\ opts -> opts { optShowHelp = True}))
148
      "show help"
149
    ]
150

    
151
data Phase = PInitial | PFinal
152

    
153
statsData :: [(String, Cluster.CStats -> String)]
154
statsData = [ ("SCORE", printf "%.8f" . Cluster.cs_score)
155
            , ("INST_CNT", printf "%d" . Cluster.cs_ninst)
156
            , ("MEM_FREE", printf "%d" . Cluster.cs_fmem)
157
            , ("MEM_AVAIL", printf "%d" . Cluster.cs_amem)
158
            , ("MEM_RESVD",
159
               \cs -> printf "%d" (Cluster.cs_fmem cs - Cluster.cs_amem cs))
160
            , ("MEM_INST", printf "%d" . Cluster.cs_imem)
161
            , ("MEM_OVERHEAD",
162
               \cs -> printf "%d" (Cluster.cs_xmem cs + Cluster.cs_nmem cs))
163
            , ("MEM_EFF",
164
               \cs -> printf "%.8f" (fromIntegral (Cluster.cs_imem cs) /
165
                                     Cluster.cs_tmem cs))
166
            , ("DSK_FREE", printf "%d" . Cluster.cs_fdsk)
167
            , ("DSK_AVAIL", printf "%d ". Cluster.cs_adsk)
168
            , ("DSK_RESVD",
169
               \cs -> printf "%d" (Cluster.cs_fdsk cs - Cluster.cs_adsk cs))
170
            , ("DSK_INST", printf "%d" . Cluster.cs_idsk)
171
            , ("DSK_EFF",
172
               \cs -> printf "%.8f" (fromIntegral (Cluster.cs_idsk cs) /
173
                                    Cluster.cs_tdsk cs))
174
            , ("CPU_INST", printf "%d" . Cluster.cs_icpu)
175
            , ("CPU_EFF",
176
               \cs -> printf "%.8f" (fromIntegral (Cluster.cs_icpu cs) /
177
                                     Cluster.cs_tcpu cs))
178
            , ("MNODE_MEM_AVAIL", printf "%d" . Cluster.cs_mmem)
179
            , ("MNODE_DSK_AVAIL", printf "%d" . Cluster.cs_mdsk)
180
            ]
181

    
182
specData :: [(String, Options -> String)]
183
specData = [ ("MEM", printf "%d" . optIMem)
184
           , ("DSK", printf "%d" . optIDsk)
185
           , ("CPU", printf "%d" . optIVCPUs)
186
           , ("RQN", printf "%d" . optINodes)
187
           ]
188

    
189
clusterData :: [(String, Cluster.CStats -> String)]
190
clusterData = [ ("MEM", printf "%.0f" . Cluster.cs_tmem)
191
              , ("DSK", printf "%.0f" . Cluster.cs_tdsk)
192
              , ("CPU", printf "%.0f" . Cluster.cs_tcpu)
193
              ]
194

    
195
-- | Recursively place instances on the cluster until we're out of space
196
iterateDepth :: Node.List
197
             -> Instance.List
198
             -> Instance.Instance
199
             -> Int
200
             -> [Instance.Instance]
201
             -> Result (FailStats, Node.List, [Instance.Instance])
202
iterateDepth nl il newinst nreq ixes =
203
      let depth = length ixes
204
          newname = printf "new-%d" depth::String
205
          newidx = length (Container.elems il) + depth
206
          newi2 = Instance.setIdx (Instance.setName newinst newname) newidx
207
      in case Cluster.tryAlloc nl il newi2 nreq of
208
           Bad s -> Bad s
209
           Ok (errs, _, sols3) ->
210
               case sols3 of
211
                 Nothing -> Ok (Cluster.collapseFailures errs, nl, ixes)
212
                 Just (_, (xnl, xi, _)) ->
213
                     iterateDepth xnl il newinst nreq $! (xi:ixes)
214

    
215
-- | Function to print stats for a given phase
216
printStats :: Phase -> Cluster.CStats -> [(String, String)]
217
printStats ph cs =
218
  map (\(s, fn) -> (printf "%s_%s" kind s, fn cs)) statsData
219
  where kind = case ph of
220
                 PInitial -> "INI"
221
                 PFinal -> "FIN"
222

    
223
-- | Print final stats and related metrics
224
printResults :: Node.List -> Int -> Int -> [(FailMode, Int)] -> IO ()
225
printResults fin_nl num_instances allocs sreason = do
226
  let fin_stats = Cluster.totalResources fin_nl
227
      fin_instances = num_instances + allocs
228

    
229
  when (num_instances + allocs /= Cluster.cs_ninst fin_stats) $
230
       do
231
         hPrintf stderr "ERROR: internal inconsistency, allocated (%d)\
232
                        \ != counted (%d)\n" (num_instances + allocs)
233
                                 (Cluster.cs_ninst fin_stats)
234
         exitWith $ ExitFailure 1
235

    
236
  printKeys $ printStats PFinal fin_stats
237
  printKeys [ ("ALLOC_USAGE", printf "%.8f"
238
                                ((fromIntegral num_instances::Double) /
239
                                 fromIntegral fin_instances))
240
            , ("ALLOC_INSTANCES", printf "%d" allocs)
241
            , ("ALLOC_FAIL_REASON", map toUpper . show . fst $ head sreason)
242
            ]
243
  printKeys $ map (\(x, y) -> (printf "ALLOC_%s_CNT" (show x),
244
                               printf "%d" y)) sreason
245
  -- this should be the final entry
246
  printKeys [("OK", "1")]
247

    
248
-- | Format a list of key/values as a shell fragment
249
printKeys :: [(String, String)] -> IO ()
250
printKeys = mapM_ (\(k, v) -> printf "HTS_%s=%s\n" (map toUpper k) v)
251

    
252
-- | Main function.
253
main :: IO ()
254
main = do
255
  cmd_args <- System.getArgs
256
  (opts, args) <- CLI.parseOpts cmd_args "hspace" options defaultOptions
257

    
258
  unless (null args) $ do
259
         hPutStrLn stderr "Error: this program doesn't take any arguments."
260
         exitWith $ ExitFailure 1
261

    
262
  let verbose = optVerbose opts
263

    
264
  (fixed_nl, il, csf) <- CLI.loadExternalData opts
265

    
266
  printKeys $ map (\(a, fn) -> ("SPEC_" ++ a, fn opts)) specData
267

    
268
  let num_instances = length $ Container.elems il
269

    
270
  let offline_names = optOffline opts
271
      all_nodes = Container.elems fixed_nl
272
      all_names = map Node.name all_nodes
273
      offline_wrong = filter (flip notElem all_names) offline_names
274
      offline_indices = map Node.idx $
275
                        filter (\n -> elem (Node.name n) offline_names)
276
                               all_nodes
277
      req_nodes = optINodes opts
278
      m_cpu = optMcpu opts
279
      m_dsk = optMdsk opts
280

    
281
  when (length offline_wrong > 0) $ do
282
         hPrintf stderr "Error: Wrong node name(s) set as offline: %s\n"
283
                     (commaJoin offline_wrong)
284
         exitWith $ ExitFailure 1
285

    
286
  when (req_nodes /= 1 && req_nodes /= 2) $ do
287
         hPrintf stderr "Error: Invalid required nodes (%d)\n" req_nodes
288
         exitWith $ ExitFailure 1
289

    
290
  let nm = Container.map (\n -> if elem (Node.idx n) offline_indices
291
                                then Node.setOffline n True
292
                                else n) fixed_nl
293
      nl = Container.map (flip Node.setMdsk m_dsk . flip Node.setMcpu m_cpu)
294
           nm
295

    
296
  when (length csf > 0 && verbose > 1) $
297
       hPrintf stderr "Note: Stripping common suffix of '%s' from names\n" csf
298

    
299
  when (optShowNodes opts) $
300
       do
301
         hPutStrLn stderr "Initial cluster status:"
302
         hPutStrLn stderr $ Cluster.printNodes nl
303

    
304
  let ini_cv = Cluster.compCV nl
305
      ini_stats = Cluster.totalResources nl
306

    
307
  when (verbose > 2) $ do
308
         hPrintf stderr "Initial coefficients: overall %.8f, %s\n"
309
                 ini_cv (Cluster.printStats nl)
310

    
311
  printKeys $ map (\(a, fn) -> ("CLUSTER_" ++ a, fn ini_stats)) clusterData
312
  printKeys [("CLUSTER_NODES", printf "%d" (length all_nodes))]
313
  printKeys $ printStats PInitial ini_stats
314

    
315
  let bad_nodes = fst $ Cluster.computeBadItems nl il
316
  when (length bad_nodes > 0) $ do
317
         -- This is failn1 case, so we print the same final stats and
318
         -- exit early
319
         printResults nl num_instances 0 [(FailN1, 1)]
320
         exitWith ExitSuccess
321

    
322
  let nmlen = Container.maxNameLen nl
323
      newinst = Instance.create "new" (optIMem opts) (optIDsk opts)
324
                (optIVCPUs opts) "ADMIN_down" (-1) (-1)
325

    
326
  let result = iterateDepth nl il newinst req_nodes []
327
  (ereason, fin_nl, ixes) <- (case result of
328
                                Bad s -> do
329
                                  hPrintf stderr "Failure: %s\n" s
330
                                  exitWith $ ExitFailure 1
331
                                Ok x -> return x)
332
  let allocs = length ixes
333
      fin_ixes = reverse ixes
334
      ix_namelen = maximum . map (length . Instance.name) $ fin_ixes
335
      sreason = reverse $ sortBy (compare `on` snd) ereason
336

    
337
  when (verbose > 1) $
338
         hPutStr stderr . unlines $
339
         map (\i -> printf "Inst: %*s %-*s %-*s"
340
                    ix_namelen (Instance.name i)
341
                    nmlen (Container.nameOf fin_nl $ Instance.pnode i)
342
                    nmlen (let sdx = Instance.snode i
343
                           in if sdx == Node.noSecondary then ""
344
                              else Container.nameOf fin_nl sdx)
345
             ) fin_ixes
346

    
347
  when (optShowNodes opts) $
348
       do
349
         hPutStrLn stderr ""
350
         hPutStrLn stderr "Final cluster status:"
351
         hPutStrLn stderr $ Cluster.printNodes fin_nl
352

    
353
  printResults fin_nl num_instances allocs sreason