Statistics
| Branch: | Tag: | Revision:

root / hspace.hs @ 5182e970

History | View | Annotate | Download (12.5 kB)

1
{-| Cluster space sizing
2

    
3
-}
4

    
5
{-
6

    
7
Copyright (C) 2009 Google Inc.
8

    
9
This program is free software; you can redistribute it and/or modify
10
it under the terms of the GNU General Public License as published by
11
the Free Software Foundation; either version 2 of the License, or
12
(at your option) any later version.
13

    
14
This program is distributed in the hope that it will be useful, but
15
WITHOUT ANY WARRANTY; without even the implied warranty of
16
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17
General Public License for more details.
18

    
19
You should have received a copy of the GNU General Public License
20
along with this program; if not, write to the Free Software
21
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22
02110-1301, USA.
23

    
24
-}
25

    
26
module Main (main) where
27

    
28
import Data.Char (toUpper, isAlphaNum)
29
import Data.List
30
import Data.Function
31
import Data.Maybe (isJust, fromJust)
32
import Data.Ord (comparing)
33
import Monad
34
import System (exitWith, ExitCode(..))
35
import System.IO
36
import qualified System
37

    
38
import Text.Printf (printf, hPrintf)
39

    
40
import qualified Ganeti.HTools.Container as Container
41
import qualified Ganeti.HTools.Cluster as Cluster
42
import qualified Ganeti.HTools.Node as Node
43
import qualified Ganeti.HTools.Instance as Instance
44

    
45
import Ganeti.HTools.Utils
46
import Ganeti.HTools.Types
47
import Ganeti.HTools.CLI
48
import Ganeti.HTools.ExtLoader
49

    
50
-- | Options list and functions
51
options :: [OptType]
52
options =
53
    [ oPrintNodes
54
    , oDataFile
55
    , oNodeSim
56
    , oRapiMaster
57
    , oLuxiSocket
58
    , oVerbose
59
    , oQuiet
60
    , oOfflineNode
61
    , oIMem
62
    , oIDisk
63
    , oIVcpus
64
    , oINodes
65
    , oMaxCpu
66
    , oMinDisk
67
    , oTieredSpec
68
    , oShowVer
69
    , oShowHelp
70
    ]
71

    
72
-- | The allocation phase we're in (initial, after tiered allocs, or
73
-- after regular allocation).
74
data Phase = PInitial
75
           | PFinal
76
           | PTiered
77

    
78
statsData :: [(String, Cluster.CStats -> String)]
79
statsData = [ ("SCORE", printf "%.8f" . Cluster.csScore)
80
            , ("INST_CNT", printf "%d" . Cluster.csNinst)
81
            , ("MEM_FREE", printf "%d" . Cluster.csFmem)
82
            , ("MEM_AVAIL", printf "%d" . Cluster.csAmem)
83
            , ("MEM_RESVD",
84
               \cs -> printf "%d" (Cluster.csFmem cs - Cluster.csAmem cs))
85
            , ("MEM_INST", printf "%d" . Cluster.csImem)
86
            , ("MEM_OVERHEAD",
87
               \cs -> printf "%d" (Cluster.csXmem cs + Cluster.csNmem cs))
88
            , ("MEM_EFF",
89
               \cs -> printf "%.8f" (fromIntegral (Cluster.csImem cs) /
90
                                     Cluster.csTmem cs))
91
            , ("DSK_FREE", printf "%d" . Cluster.csFdsk)
92
            , ("DSK_AVAIL", printf "%d". Cluster.csAdsk)
93
            , ("DSK_RESVD",
94
               \cs -> printf "%d" (Cluster.csFdsk cs - Cluster.csAdsk cs))
95
            , ("DSK_INST", printf "%d" . Cluster.csIdsk)
96
            , ("DSK_EFF",
97
               \cs -> printf "%.8f" (fromIntegral (Cluster.csIdsk cs) /
98
                                    Cluster.csTdsk cs))
99
            , ("CPU_INST", printf "%d" . Cluster.csIcpu)
100
            , ("CPU_EFF",
101
               \cs -> printf "%.8f" (fromIntegral (Cluster.csIcpu cs) /
102
                                     Cluster.csTcpu cs))
103
            , ("MNODE_MEM_AVAIL", printf "%d" . Cluster.csMmem)
104
            , ("MNODE_DSK_AVAIL", printf "%d" . Cluster.csMdsk)
105
            ]
106

    
107
specData :: [(String, RSpec -> String)]
108
specData = [ ("MEM", printf "%d" . rspecMem)
109
           , ("DSK", printf "%d" . rspecDsk)
110
           , ("CPU", printf "%d" . rspecCpu)
111
           ]
112

    
113
clusterData :: [(String, Cluster.CStats -> String)]
114
clusterData = [ ("MEM", printf "%.0f" . Cluster.csTmem)
115
              , ("DSK", printf "%.0f" . Cluster.csTdsk)
116
              , ("CPU", printf "%.0f" . Cluster.csTcpu)
117
              ]
118

    
119
-- | Recursively place instances on the cluster until we're out of space
120
iterateDepth :: Node.List
121
             -> Instance.List
122
             -> Instance.Instance
123
             -> Int
124
             -> [Instance.Instance]
125
             -> Result (FailStats, Node.List, [Instance.Instance])
126
iterateDepth nl il newinst nreq ixes =
127
      let depth = length ixes
128
          newname = printf "new-%d" depth::String
129
          newidx = length (Container.elems il) + depth
130
          newi2 = Instance.setIdx (Instance.setName newinst newname) newidx
131
      in case Cluster.tryAlloc nl il newi2 nreq of
132
           Bad s -> Bad s
133
           Ok (errs, _, sols3) ->
134
               case sols3 of
135
                 [] -> Ok (Cluster.collapseFailures errs, nl, ixes)
136
                 (_, (xnl, xi, _)):[] ->
137
                     iterateDepth xnl il newinst nreq $! (xi:ixes)
138
                 _ -> Bad "Internal error: multiple solutions for single\
139
                          \ allocation"
140

    
141
tieredAlloc :: Node.List
142
            -> Instance.List
143
            -> Instance.Instance
144
            -> Int
145
            -> [Instance.Instance]
146
            -> Result (FailStats, Node.List, [Instance.Instance])
147
tieredAlloc nl il newinst nreq ixes =
148
    case iterateDepth nl il newinst nreq ixes of
149
      Bad s -> Bad s
150
      Ok (errs, nl', ixes') ->
151
          case Instance.shrinkByType newinst . fst . last $
152
               sortBy (comparing snd) errs of
153
            Bad _ -> Ok (errs, nl', ixes')
154
            Ok newinst' ->
155
                tieredAlloc nl' il newinst' nreq ixes'
156

    
157

    
158
-- | Function to print stats for a given phase
159
printStats :: Phase -> Cluster.CStats -> [(String, String)]
160
printStats ph cs =
161
  map (\(s, fn) -> (printf "%s_%s" kind s, fn cs)) statsData
162
  where kind = case ph of
163
                 PInitial -> "INI"
164
                 PFinal -> "FIN"
165
                 PTiered -> "TRL"
166

    
167
-- | Print final stats and related metrics
168
printResults :: Node.List -> Int -> Int -> [(FailMode, Int)] -> IO ()
169
printResults fin_nl num_instances allocs sreason = do
170
  let fin_stats = Cluster.totalResources fin_nl
171
      fin_instances = num_instances + allocs
172

    
173
  when (num_instances + allocs /= Cluster.csNinst fin_stats) $
174
       do
175
         hPrintf stderr "ERROR: internal inconsistency, allocated (%d)\
176
                        \ != counted (%d)\n" (num_instances + allocs)
177
                                 (Cluster.csNinst fin_stats) :: IO ()
178
         exitWith $ ExitFailure 1
179

    
180
  printKeys $ printStats PFinal fin_stats
181
  printKeys [ ("ALLOC_USAGE", printf "%.8f"
182
                                ((fromIntegral num_instances::Double) /
183
                                 fromIntegral fin_instances))
184
            , ("ALLOC_INSTANCES", printf "%d" allocs)
185
            , ("ALLOC_FAIL_REASON", map toUpper . show . fst $ head sreason)
186
            ]
187
  printKeys $ map (\(x, y) -> (printf "ALLOC_%s_CNT" (show x),
188
                               printf "%d" y)) sreason
189
  -- this should be the final entry
190
  printKeys [("OK", "1")]
191

    
192
-- | Ensure a value is quoted if needed
193
ensureQuoted :: String -> String
194
ensureQuoted v = if not (all (\c -> (isAlphaNum c || c == '.')) v)
195
                 then '\'':v ++ "'"
196
                 else v
197

    
198
-- | Format a list of key/values as a shell fragment
199
printKeys :: [(String, String)] -> IO ()
200
printKeys = mapM_ (\(k, v) ->
201
                   printf "HTS_%s=%s\n" (map toUpper k) (ensureQuoted v))
202

    
203
printInstance :: Node.List -> Instance.Instance -> [String]
204
printInstance nl i = [ Instance.name i
205
                     , Container.nameOf nl $ Instance.pNode i
206
                     , let sdx = Instance.sNode i
207
                       in if sdx == Node.noSecondary then ""
208
                          else Container.nameOf nl sdx
209
                     , show (Instance.mem i)
210
                     , show (Instance.dsk i)
211
                     , show (Instance.vcpus i)
212
                     ]
213

    
214
-- | Main function.
215
main :: IO ()
216
main = do
217
  cmd_args <- System.getArgs
218
  (opts, args) <- parseOpts cmd_args "hspace" options
219

    
220
  unless (null args) $ do
221
         hPutStrLn stderr "Error: this program doesn't take any arguments."
222
         exitWith $ ExitFailure 1
223

    
224
  let verbose = optVerbose opts
225
      ispec = optISpec opts
226
      shownodes = optShowNodes opts
227

    
228
  (fixed_nl, il, _, csf) <- loadExternalData opts
229

    
230
  printKeys $ map (\(a, fn) -> ("SPEC_" ++ a, fn ispec)) specData
231
  printKeys [ ("SPEC_RQN", printf "%d" (optINodes opts)) ]
232

    
233
  let num_instances = length $ Container.elems il
234

    
235
  let offline_names = optOffline opts
236
      all_nodes = Container.elems fixed_nl
237
      all_names = map Node.name all_nodes
238
      offline_wrong = filter (`notElem` all_names) offline_names
239
      offline_indices = map Node.idx $
240
                        filter (\n -> Node.name n `elem` offline_names)
241
                               all_nodes
242
      req_nodes = optINodes opts
243
      m_cpu = optMcpu opts
244
      m_dsk = optMdsk opts
245

    
246
  when (length offline_wrong > 0) $ do
247
         hPrintf stderr "Error: Wrong node name(s) set as offline: %s\n"
248
                     (commaJoin offline_wrong) :: IO ()
249
         exitWith $ ExitFailure 1
250

    
251
  when (req_nodes /= 1 && req_nodes /= 2) $ do
252
         hPrintf stderr "Error: Invalid required nodes (%d)\n"
253
                                            req_nodes :: IO ()
254
         exitWith $ ExitFailure 1
255

    
256
  let nm = Container.map (\n -> if Node.idx n `elem` offline_indices
257
                                then Node.setOffline n True
258
                                else n) fixed_nl
259
      nl = Container.map (flip Node.setMdsk m_dsk . flip Node.setMcpu m_cpu)
260
           nm
261

    
262
  when (length csf > 0 && verbose > 1) $
263
       hPrintf stderr "Note: Stripping common suffix of '%s' from names\n" csf
264

    
265
  when (isJust shownodes) $
266
       do
267
         hPutStrLn stderr "Initial cluster status:"
268
         hPutStrLn stderr $ Cluster.printNodes nl (fromJust shownodes)
269

    
270
  let ini_cv = Cluster.compCV nl
271
      ini_stats = Cluster.totalResources nl
272

    
273
  when (verbose > 2) $
274
         hPrintf stderr "Initial coefficients: overall %.8f, %s\n"
275
                 ini_cv (Cluster.printStats nl)
276

    
277
  printKeys $ map (\(a, fn) -> ("CLUSTER_" ++ a, fn ini_stats)) clusterData
278
  printKeys [("CLUSTER_NODES", printf "%d" (length all_nodes))]
279
  printKeys $ printStats PInitial ini_stats
280

    
281
  let bad_nodes = fst $ Cluster.computeBadItems nl il
282
  when (length bad_nodes > 0) $ do
283
         -- This is failn1 case, so we print the same final stats and
284
         -- exit early
285
         printResults nl num_instances 0 [(FailN1, 1)]
286
         exitWith ExitSuccess
287

    
288
  -- utility functions
289
  let iofspec spx = Instance.create "new" (rspecMem spx) (rspecDsk spx)
290
                    (rspecCpu spx) "ADMIN_down" [] (-1) (-1)
291
      exitifbad val = (case val of
292
                         Bad s -> do
293
                           hPrintf stderr "Failure: %s\n" s :: IO ()
294
                           exitWith $ ExitFailure 1
295
                         Ok x -> return x)
296

    
297

    
298
  let reqinst = iofspec ispec
299

    
300
  -- Run the tiered allocation, if enabled
301

    
302
  (case optTieredSpec opts of
303
     Nothing -> return ()
304
     Just tspec -> do
305
       let tresu = tieredAlloc nl il (iofspec tspec) req_nodes []
306
       (_, trl_nl, trl_ixes) <- exitifbad tresu
307
       let fin_trl_ixes = reverse trl_ixes
308
           ix_byspec = groupBy ((==) `on` Instance.specOf) fin_trl_ixes
309
           spec_map = map (\ixs -> (Instance.specOf $ head ixs, length ixs))
310
                      ix_byspec::[(RSpec, Int)]
311
           spec_map' = map (\(spec, cnt) ->
312
                                printf "%d,%d,%d=%d" (rspecMem spec)
313
                                       (rspecDsk spec) (rspecCpu spec) cnt)
314
                       spec_map::[String]
315

    
316
       when (verbose > 1) $ do
317
         hPutStrLn stderr "Tiered allocation map"
318
         hPutStr stderr . unlines . map ((:) ' ' .  intercalate " ") $
319
                 formatTable (map (printInstance trl_nl) fin_trl_ixes)
320
                                 [False, False, False, True, True, True]
321

    
322
       when (isJust shownodes) $ do
323
         hPutStrLn stderr ""
324
         hPutStrLn stderr "Tiered allocation status:"
325
         hPutStrLn stderr $ Cluster.printNodes trl_nl (fromJust shownodes)
326

    
327
       printKeys $ printStats PTiered (Cluster.totalResources trl_nl)
328
       printKeys [("TSPEC", intercalate " " spec_map')])
329

    
330
  -- Run the standard (avg-mode) allocation
331

    
332
  let result = iterateDepth nl il reqinst req_nodes []
333
  (ereason, fin_nl, ixes) <- exitifbad result
334

    
335
  let allocs = length ixes
336
      fin_ixes = reverse ixes
337
      sreason = reverse $ sortBy (comparing snd) ereason
338

    
339
  when (verbose > 1) $ do
340
         hPutStrLn stderr "Instance map"
341
         hPutStr stderr . unlines . map ((:) ' ' .  intercalate " ") $
342
                 formatTable (map (printInstance fin_nl) fin_ixes)
343
                                 [False, False, False, True, True, True]
344
  when (isJust shownodes) $
345
       do
346
         hPutStrLn stderr ""
347
         hPutStrLn stderr "Final cluster status:"
348
         hPutStrLn stderr $ Cluster.printNodes fin_nl (fromJust shownodes)
349

    
350
  printResults fin_nl num_instances allocs sreason