Statistics
| Branch: | Tag: | Revision:

root / hspace.hs @ 94e05c32

History | View | Annotate | Download (12.1 kB)

1
{-| Cluster space sizing
2

    
3
-}
4

    
5
{-
6

    
7
Copyright (C) 2009 Google Inc.
8

    
9
This program is free software; you can redistribute it and/or modify
10
it under the terms of the GNU General Public License as published by
11
the Free Software Foundation; either version 2 of the License, or
12
(at your option) any later version.
13

    
14
This program is distributed in the hope that it will be useful, but
15
WITHOUT ANY WARRANTY; without even the implied warranty of
16
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17
General Public License for more details.
18

    
19
You should have received a copy of the GNU General Public License
20
along with this program; if not, write to the Free Software
21
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22
02110-1301, USA.
23

    
24
-}
25

    
26
module Main (main) where
27

    
28
import Data.Char (toUpper)
29
import Data.List
30
import Data.Function
31
import Data.Maybe (isJust, fromJust)
32
import Monad
33
import System
34
import System.IO
35
import qualified System
36

    
37
import Text.Printf (printf, hPrintf)
38

    
39
import qualified Ganeti.HTools.Container as Container
40
import qualified Ganeti.HTools.Cluster as Cluster
41
import qualified Ganeti.HTools.Node as Node
42
import qualified Ganeti.HTools.Instance as Instance
43

    
44
import Ganeti.HTools.Utils
45
import Ganeti.HTools.Types
46
import Ganeti.HTools.CLI
47
import Ganeti.HTools.ExtLoader
48

    
49
-- | Options list and functions
50
options :: [OptType]
51
options =
52
    [ oPrintNodes
53
    , oNodeFile
54
    , oInstFile
55
    , oNodeSim
56
    , oRapiMaster
57
    , oLuxiSocket
58
    , oVerbose
59
    , oQuiet
60
    , oOfflineNode
61
    , oIMem
62
    , oIDisk
63
    , oIVcpus
64
    , oINodes
65
    , oMaxCpu
66
    , oMinDisk
67
    , oTieredSpec
68
    , oShowVer
69
    , oShowHelp
70
    ]
71

    
72
-- | The allocation phase we're in (initial, after tiered allocs, or
73
-- after regular allocation).
74
data Phase = PInitial
75
           | PFinal
76
           | PTiered
77

    
78
statsData :: [(String, Cluster.CStats -> String)]
79
statsData = [ ("SCORE", printf "%.8f" . Cluster.csScore)
80
            , ("INST_CNT", printf "%d" . Cluster.csNinst)
81
            , ("MEM_FREE", printf "%d" . Cluster.csFmem)
82
            , ("MEM_AVAIL", printf "%d" . Cluster.csAmem)
83
            , ("MEM_RESVD",
84
               \cs -> printf "%d" (Cluster.csFmem cs - Cluster.csAmem cs))
85
            , ("MEM_INST", printf "%d" . Cluster.csImem)
86
            , ("MEM_OVERHEAD",
87
               \cs -> printf "%d" (Cluster.csXmem cs + Cluster.csNmem cs))
88
            , ("MEM_EFF",
89
               \cs -> printf "%.8f" (fromIntegral (Cluster.csImem cs) /
90
                                     Cluster.csTmem cs))
91
            , ("DSK_FREE", printf "%d" . Cluster.csFdsk)
92
            , ("DSK_AVAIL", printf "%d ". Cluster.csAdsk)
93
            , ("DSK_RESVD",
94
               \cs -> printf "%d" (Cluster.csFdsk cs - Cluster.csAdsk cs))
95
            , ("DSK_INST", printf "%d" . Cluster.csIdsk)
96
            , ("DSK_EFF",
97
               \cs -> printf "%.8f" (fromIntegral (Cluster.csIdsk cs) /
98
                                    Cluster.csTdsk cs))
99
            , ("CPU_INST", printf "%d" . Cluster.csIcpu)
100
            , ("CPU_EFF",
101
               \cs -> printf "%.8f" (fromIntegral (Cluster.csIcpu cs) /
102
                                     Cluster.csTcpu cs))
103
            , ("MNODE_MEM_AVAIL", printf "%d" . Cluster.csMmem)
104
            , ("MNODE_DSK_AVAIL", printf "%d" . Cluster.csMdsk)
105
            ]
106

    
107
specData :: [(String, RSpec -> String)]
108
specData = [ ("MEM", printf "%d" . rspecMem)
109
           , ("DSK", printf "%d" . rspecDsk)
110
           , ("CPU", printf "%d" . rspecCpu)
111
           ]
112

    
113
clusterData :: [(String, Cluster.CStats -> String)]
114
clusterData = [ ("MEM", printf "%.0f" . Cluster.csTmem)
115
              , ("DSK", printf "%.0f" . Cluster.csTdsk)
116
              , ("CPU", printf "%.0f" . Cluster.csTcpu)
117
              ]
118

    
119
-- | Recursively place instances on the cluster until we're out of space
120
iterateDepth :: Node.List
121
             -> Instance.List
122
             -> Instance.Instance
123
             -> Int
124
             -> [Instance.Instance]
125
             -> Result (FailStats, Node.List, [Instance.Instance])
126
iterateDepth nl il newinst nreq ixes =
127
      let depth = length ixes
128
          newname = printf "new-%d" depth::String
129
          newidx = length (Container.elems il) + depth
130
          newi2 = Instance.setIdx (Instance.setName newinst newname) newidx
131
      in case Cluster.tryAlloc nl il newi2 nreq of
132
           Bad s -> Bad s
133
           Ok (errs, _, sols3) ->
134
               case sols3 of
135
                 Nothing -> Ok (Cluster.collapseFailures errs, nl, ixes)
136
                 Just (_, (xnl, xi, _)) ->
137
                     iterateDepth xnl il newinst nreq $! (xi:ixes)
138

    
139
tieredAlloc :: Node.List
140
            -> Instance.List
141
            -> Instance.Instance
142
            -> Int
143
            -> [Instance.Instance]
144
            -> Result (FailStats, Node.List, [Instance.Instance])
145
tieredAlloc nl il newinst nreq ixes =
146
    case iterateDepth nl il newinst nreq ixes of
147
      Bad s -> Bad s
148
      Ok (errs, nl', ixes') ->
149
          case Instance.shrinkByType newinst . fst . last $
150
               sortBy (compare `on` snd) errs of
151
            Bad _ -> Ok (errs, nl', ixes')
152
            Ok newinst' ->
153
                tieredAlloc nl' il newinst' nreq ixes'
154

    
155

    
156
-- | Function to print stats for a given phase
157
printStats :: Phase -> Cluster.CStats -> [(String, String)]
158
printStats ph cs =
159
  map (\(s, fn) -> (printf "%s_%s" kind s, fn cs)) statsData
160
  where kind = case ph of
161
                 PInitial -> "INI"
162
                 PFinal -> "FIN"
163
                 PTiered -> "TRL"
164

    
165
-- | Print final stats and related metrics
166
printResults :: Node.List -> Int -> Int -> [(FailMode, Int)] -> IO ()
167
printResults fin_nl num_instances allocs sreason = do
168
  let fin_stats = Cluster.totalResources fin_nl
169
      fin_instances = num_instances + allocs
170

    
171
  when (num_instances + allocs /= Cluster.csNinst fin_stats) $
172
       do
173
         hPrintf stderr "ERROR: internal inconsistency, allocated (%d)\
174
                        \ != counted (%d)\n" (num_instances + allocs)
175
                                 (Cluster.csNinst fin_stats)
176
         exitWith $ ExitFailure 1
177

    
178
  printKeys $ printStats PFinal fin_stats
179
  printKeys [ ("ALLOC_USAGE", printf "%.8f"
180
                                ((fromIntegral num_instances::Double) /
181
                                 fromIntegral fin_instances))
182
            , ("ALLOC_INSTANCES", printf "%d" allocs)
183
            , ("ALLOC_FAIL_REASON", map toUpper . show . fst $ head sreason)
184
            ]
185
  printKeys $ map (\(x, y) -> (printf "ALLOC_%s_CNT" (show x),
186
                               printf "%d" y)) sreason
187
  -- this should be the final entry
188
  printKeys [("OK", "1")]
189

    
190
-- | Format a list of key/values as a shell fragment
191
printKeys :: [(String, String)] -> IO ()
192
printKeys = mapM_ (\(k, v) -> printf "HTS_%s=%s\n" (map toUpper k) v)
193

    
194
printInstance :: Node.List -> Instance.Instance -> [String]
195
printInstance nl i = [ Instance.name i
196
                     , (Container.nameOf nl $ Instance.pNode i)
197
                     , (let sdx = Instance.sNode i
198
                        in if sdx == Node.noSecondary then ""
199
                           else Container.nameOf nl sdx)
200
                     , show (Instance.mem i)
201
                     , show (Instance.dsk i)
202
                     , show (Instance.vcpus i)
203
                     ]
204

    
205
-- | Main function.
206
main :: IO ()
207
main = do
208
  cmd_args <- System.getArgs
209
  (opts, args) <- parseOpts cmd_args "hspace" options
210

    
211
  unless (null args) $ do
212
         hPutStrLn stderr "Error: this program doesn't take any arguments."
213
         exitWith $ ExitFailure 1
214

    
215
  let verbose = optVerbose opts
216
      ispec = optISpec opts
217
      shownodes = optShowNodes opts
218

    
219
  (fixed_nl, il, _, csf) <- loadExternalData opts
220

    
221
  printKeys $ map (\(a, fn) -> ("SPEC_" ++ a, fn ispec)) specData
222
  printKeys [ ("SPEC_RQN", printf "%d" (optINodes opts)) ]
223

    
224
  let num_instances = length $ Container.elems il
225

    
226
  let offline_names = optOffline opts
227
      all_nodes = Container.elems fixed_nl
228
      all_names = map Node.name all_nodes
229
      offline_wrong = filter (flip notElem all_names) offline_names
230
      offline_indices = map Node.idx $
231
                        filter (\n -> elem (Node.name n) offline_names)
232
                               all_nodes
233
      req_nodes = optINodes opts
234
      m_cpu = optMcpu opts
235
      m_dsk = optMdsk opts
236

    
237
  when (length offline_wrong > 0) $ do
238
         hPrintf stderr "Error: Wrong node name(s) set as offline: %s\n"
239
                     (commaJoin offline_wrong)
240
         exitWith $ ExitFailure 1
241

    
242
  when (req_nodes /= 1 && req_nodes /= 2) $ do
243
         hPrintf stderr "Error: Invalid required nodes (%d)\n" req_nodes
244
         exitWith $ ExitFailure 1
245

    
246
  let nm = Container.map (\n -> if elem (Node.idx n) offline_indices
247
                                then Node.setOffline n True
248
                                else n) fixed_nl
249
      nl = Container.map (flip Node.setMdsk m_dsk . flip Node.setMcpu m_cpu)
250
           nm
251

    
252
  when (length csf > 0 && verbose > 1) $
253
       hPrintf stderr "Note: Stripping common suffix of '%s' from names\n" csf
254

    
255
  when (isJust shownodes) $
256
       do
257
         hPutStrLn stderr "Initial cluster status:"
258
         hPutStrLn stderr $ Cluster.printNodes nl (fromJust shownodes)
259

    
260
  let ini_cv = Cluster.compCV nl
261
      ini_stats = Cluster.totalResources nl
262

    
263
  when (verbose > 2) $
264
         hPrintf stderr "Initial coefficients: overall %.8f, %s\n"
265
                 ini_cv (Cluster.printStats nl)
266

    
267
  printKeys $ map (\(a, fn) -> ("CLUSTER_" ++ a, fn ini_stats)) clusterData
268
  printKeys [("CLUSTER_NODES", printf "%d" (length all_nodes))]
269
  printKeys $ printStats PInitial ini_stats
270

    
271
  let bad_nodes = fst $ Cluster.computeBadItems nl il
272
  when (length bad_nodes > 0) $ do
273
         -- This is failn1 case, so we print the same final stats and
274
         -- exit early
275
         printResults nl num_instances 0 [(FailN1, 1)]
276
         exitWith ExitSuccess
277

    
278
  -- utility functions
279
  let iofspec spx = Instance.create "new" (rspecMem spx) (rspecDsk spx)
280
                    (rspecCpu spx) "ADMIN_down" [] (-1) (-1)
281
      exitifbad val = (case val of
282
                         Bad s -> do
283
                           hPrintf stderr "Failure: %s\n" s
284
                           exitWith $ ExitFailure 1
285
                         Ok x -> return x)
286

    
287

    
288
  let reqinst = iofspec ispec
289

    
290
  -- Run the tiered allocation, if enabled
291

    
292
  (case optTieredSpec opts of
293
     Nothing -> return ()
294
     Just tspec -> do
295
       let tresu = tieredAlloc nl il (iofspec tspec) req_nodes []
296
       (_, trl_nl, trl_ixes) <- exitifbad tresu
297
       let fin_trl_ixes = reverse trl_ixes
298
           ix_byspec = groupBy ((==) `on` Instance.specOf) fin_trl_ixes
299
           spec_map = map (\ixs -> (Instance.specOf $ head ixs, length ixs))
300
                      ix_byspec::[(RSpec, Int)]
301
           spec_map' = map (\(spec, cnt) ->
302
                                printf "%d,%d,%d=%d" (rspecMem spec)
303
                                       (rspecDsk spec) (rspecCpu spec) cnt)
304
                       spec_map::[String]
305

    
306
       when (verbose > 1) $ do
307
         hPutStrLn stderr "Tiered allocation map"
308
         hPutStr stderr . unlines . map ((:) ' ' .  intercalate " ") $
309
                 formatTable (map (printInstance trl_nl) fin_trl_ixes)
310
                                 [False, False, False, True, True, True]
311

    
312
       when (isJust shownodes) $ do
313
         hPutStrLn stderr ""
314
         hPutStrLn stderr "Tiered allocation status:"
315
         hPutStrLn stderr $ Cluster.printNodes trl_nl (fromJust shownodes)
316

    
317
       printKeys $ printStats PTiered (Cluster.totalResources trl_nl)
318
       printKeys [("TSPEC", intercalate " " spec_map')])
319

    
320
  -- Run the standard (avg-mode) allocation
321

    
322
  let result = iterateDepth nl il reqinst req_nodes []
323
  (ereason, fin_nl, ixes) <- exitifbad result
324

    
325
  let allocs = length ixes
326
      fin_ixes = reverse ixes
327
      sreason = reverse $ sortBy (compare `on` snd) ereason
328

    
329
  when (verbose > 1) $ do
330
         hPutStrLn stderr "Instance map"
331
         hPutStr stderr . unlines . map ((:) ' ' .  intercalate " ") $
332
                 formatTable (map (printInstance fin_nl) fin_ixes)
333
                                 [False, False, False, True, True, True]
334
  when (isJust shownodes) $
335
       do
336
         hPutStrLn stderr ""
337
         hPutStrLn stderr "Final cluster status:"
338
         hPutStrLn stderr $ Cluster.printNodes fin_nl (fromJust shownodes)
339

    
340
  printResults fin_nl num_instances allocs sreason