Statistics
| Branch: | Tag: | Revision:

root / hbal.hs @ 75d1edf8

History | View | Annotate | Download (11.9 kB)

1
{-| Solver for N+1 cluster errors
2

    
3
-}
4

    
5
module Main (main) where
6

    
7
import Data.List
8
import Data.Function
9
import Data.Maybe (isJust, fromJust, fromMaybe)
10
import Monad
11
import System
12
import System.IO
13
import System.Console.GetOpt
14
import qualified System
15

    
16
import Text.Printf (printf)
17

    
18
import qualified Ganeti.HTools.Container as Container
19
import qualified Ganeti.HTools.Cluster as Cluster
20
import qualified Ganeti.HTools.Node as Node
21
import qualified Ganeti.HTools.CLI as CLI
22
import qualified Ganeti.HTools.Rapi as Rapi
23
import qualified Ganeti.HTools.Text as Text
24
import qualified Ganeti.HTools.Loader as Loader
25

    
26
import Ganeti.HTools.Utils
27
import Ganeti.HTools.Types
28

    
29
-- | Command line options structure.
30
data Options = Options
31
    { optShowNodes :: Bool           -- ^ Whether to show node status
32
    , optShowCmds  :: Maybe FilePath -- ^ Whether to show the command list
33
    , optOneline   :: Bool           -- ^ Switch output to a single line
34
    , optNodef     :: FilePath       -- ^ Path to the nodes file
35
    , optNodeSet   :: Bool           -- ^ The nodes have been set by options
36
    , optInstf     :: FilePath       -- ^ Path to the instances file
37
    , optInstSet   :: Bool           -- ^ The insts have been set by options
38
    , optMaxLength :: Int            -- ^ Stop after this many steps
39
    , optMaster    :: String         -- ^ Collect data from RAPI
40
    , optVerbose   :: Int            -- ^ Verbosity level
41
    , optOffline   :: [String]       -- ^ Names of offline nodes
42
    , optMinScore  :: Cluster.Score  -- ^ The minimum score we aim for
43
    , optShowVer   :: Bool           -- ^ Just show the program version
44
    , optShowHelp  :: Bool           -- ^ Just show the help
45
    } deriving Show
46

    
47
instance CLI.CLIOptions Options where
48
    showVersion = optShowVer
49
    showHelp    = optShowHelp
50

    
51
-- | Default values for the command line options.
52
defaultOptions :: Options
53
defaultOptions  = Options
54
 { optShowNodes = False
55
 , optShowCmds  = Nothing
56
 , optOneline   = False
57
 , optNodef     = "nodes"
58
 , optNodeSet   = False
59
 , optInstf     = "instances"
60
 , optInstSet   = False
61
 , optMaxLength = -1
62
 , optMaster    = ""
63
 , optVerbose   = 1
64
 , optOffline   = []
65
 , optMinScore  = 1e-9
66
 , optShowVer   = False
67
 , optShowHelp  = False
68
 }
69

    
70
-- | Options list and functions
71
options :: [OptDescr (Options -> Options)]
72
options =
73
    [ Option ['p']     ["print-nodes"]
74
      (NoArg (\ opts -> opts { optShowNodes = True }))
75
      "print the final node list"
76
    , Option ['C']     ["print-commands"]
77
      (OptArg ((\ f opts -> opts { optShowCmds = Just f }) . fromMaybe "-")
78
                  "FILE")
79
      "print the ganeti command list for reaching the solution,\
80
      \if an argument is passed then write the commands to a file named\
81
      \ as such"
82
    , Option ['o']     ["oneline"]
83
      (NoArg (\ opts -> opts { optOneline = True }))
84
      "print the ganeti command list for reaching the solution"
85
    , Option ['n']     ["nodes"]
86
      (ReqArg (\ f opts -> opts { optNodef = f, optNodeSet = True }) "FILE")
87
      "the node list FILE"
88
    , Option ['i']     ["instances"]
89
      (ReqArg (\ f opts -> opts { optInstf =  f, optInstSet = True }) "FILE")
90
      "the instance list FILE"
91
    , Option ['m']     ["master"]
92
      (ReqArg (\ m opts -> opts { optMaster = m }) "ADDRESS")
93
      "collect data via RAPI at the given ADDRESS"
94
    , Option ['l']     ["max-length"]
95
      (ReqArg (\ i opts -> opts { optMaxLength =  (read i)::Int }) "N")
96
      "cap the solution at this many moves (useful for very unbalanced \
97
      \clusters)"
98
    , Option ['v']     ["verbose"]
99
      (NoArg (\ opts -> opts { optVerbose = (optVerbose opts) + 1 }))
100
      "increase the verbosity level"
101
    , Option ['q']     ["quiet"]
102
      (NoArg (\ opts -> opts { optVerbose = (optVerbose opts) - 1 }))
103
      "decrease the verbosity level"
104
    , Option ['O']     ["offline"]
105
      (ReqArg (\ n opts -> opts { optOffline = n:optOffline opts }) "NODE")
106
      " set node as offline"
107
    , Option ['e']     ["min-score"]
108
      (ReqArg (\ e opts -> opts { optMinScore = read e }) "EPSILON")
109
      " mininum score to aim for"
110
    , Option ['V']     ["version"]
111
      (NoArg (\ opts -> opts { optShowVer = True}))
112
      "show the version of the program"
113
    , Option ['h']     ["help"]
114
      (NoArg (\ opts -> opts { optShowHelp = True}))
115
      "show help"
116
    ]
117

    
118
{- | Start computing the solution at the given depth and recurse until
119
we find a valid solution or we exceed the maximum depth.
120

    
121
-}
122
iterateDepth :: Cluster.Table    -- ^ The starting table
123
             -> Int              -- ^ Remaining length
124
             -> Cluster.NameList -- ^ Node idx to name list
125
             -> Cluster.NameList -- ^ Inst idx to name list
126
             -> Int              -- ^ Max node name len
127
             -> Int              -- ^ Max instance name len
128
             -> [[String]]       -- ^ Current command list
129
             -> Bool             -- ^ Wheter to be silent
130
             -> Cluster.Score    -- ^ Score at which to stop
131
             -> IO (Cluster.Table, [[String]]) -- ^ The resulting table and
132
                                               -- commands
133
iterateDepth ini_tbl max_rounds ktn kti nmlen imlen
134
             cmd_strs oneline min_score =
135
    let Cluster.Table ini_nl ini_il ini_cv ini_plc = ini_tbl
136
        all_inst = Container.elems ini_il
137
        node_idx = map Node.idx . filter (not . Node.offline) $
138
                   Container.elems ini_nl
139
        fin_tbl = Cluster.checkMove node_idx ini_tbl all_inst
140
        (Cluster.Table _ _ fin_cv fin_plc) = fin_tbl
141
        ini_plc_len = length ini_plc
142
        fin_plc_len = length fin_plc
143
        allowed_next = (max_rounds < 0 || length fin_plc < max_rounds)
144
    in
145
      do
146
        let
147
            (sol_line, cmds) = Cluster.printSolutionLine ini_il ktn kti
148
                               nmlen imlen (head fin_plc) fin_plc_len
149
            upd_cmd_strs = cmds:cmd_strs
150
        unless (oneline || fin_plc_len == ini_plc_len) $ do
151
          putStrLn sol_line
152
          hFlush stdout
153
        (if fin_cv < ini_cv then -- this round made success, try deeper
154
             if allowed_next && fin_cv > min_score
155
             then iterateDepth fin_tbl max_rounds ktn kti
156
                  nmlen imlen upd_cmd_strs oneline min_score
157
             -- don't go deeper, but return the better solution
158
             else return (fin_tbl, upd_cmd_strs)
159
         else
160
             return (ini_tbl, cmd_strs))
161

    
162
-- | Formats the solution for the oneline display
163
formatOneline :: Double -> Int -> Double -> String
164
formatOneline ini_cv plc_len fin_cv =
165
    printf "%.8f %d %.8f %8.3f" ini_cv plc_len fin_cv
166
               (if fin_cv == 0 then 1 else (ini_cv / fin_cv))
167

    
168
-- | Main function.
169
main :: IO ()
170
main = do
171
  cmd_args <- System.getArgs
172
  (opts, args) <- CLI.parseOpts cmd_args "hbal" options defaultOptions
173

    
174
  unless (null args) $ do
175
         hPutStrLn stderr "Error: this program doesn't take any arguments."
176
         exitWith $ ExitFailure 1
177

    
178
  (env_node, env_inst) <- CLI.parseEnv ()
179
  let nodef = if optNodeSet opts then optNodef opts
180
              else env_node
181
      instf = if optInstSet opts then optInstf opts
182
              else env_inst
183
      oneline = optOneline opts
184
      verbose = optVerbose opts
185
  input_data <-
186
      case optMaster opts of
187
        "" -> Text.loadData nodef instf
188
        host -> Rapi.loadData host
189

    
190
  let ldresult = input_data >>= Loader.mergeData
191

    
192
  (loaded_nl, il, csf, ktn, kti) <-
193
      (case ldresult of
194
         Ok x -> return x
195
         Bad s -> do
196
           printf "Error: failed to load data. Details:\n%s\n" s
197
           exitWith $ ExitFailure 1
198
      )
199
  let (fix_msgs, fixed_nl) = Cluster.checkData loaded_nl il ktn kti
200

    
201
  unless (null fix_msgs || verbose == 0) $ do
202
         putStrLn "Warning: cluster has inconsistent data:"
203
         putStrLn . unlines . map (\s -> printf "  - %s" s) $ fix_msgs
204

    
205
  let offline_names = optOffline opts
206
      all_names = snd . unzip $ ktn
207
      offline_wrong = filter (\n -> not $ elem n all_names) offline_names
208
      offline_indices = fst . unzip .
209
                        filter (\(_, n) -> elem n offline_names) $ ktn
210

    
211
  when (length offline_wrong > 0) $ do
212
         printf "Wrong node name(s) set as offline: %s\n"
213
                (commaJoin offline_wrong)
214
         exitWith $ ExitFailure 1
215

    
216
  let nl = Container.map (\n -> if elem (Node.idx n) offline_indices
217
                                then Node.setOffline n True
218
                                else n) fixed_nl
219

    
220
  when (Container.size il == 0) $ do
221
         (if oneline then
222
              putStrLn $ formatOneline 0 0 0
223
          else
224
              printf "Cluster is empty, exiting.\n")
225
         exitWith ExitSuccess
226

    
227

    
228
  unless oneline $ printf "Loaded %d nodes, %d instances\n"
229
             (Container.size nl)
230
             (Container.size il)
231

    
232
  when (length csf > 0 && not oneline && verbose > 1) $ do
233
         printf "Note: Stripping common suffix of '%s' from names\n" csf
234

    
235
  let (bad_nodes, bad_instances) = Cluster.computeBadItems nl il
236
  unless (oneline || verbose == 0) $ printf
237
             "Initial check done: %d bad nodes, %d bad instances.\n"
238
             (length bad_nodes) (length bad_instances)
239

    
240
  when (length bad_nodes > 0) $ do
241
         putStrLn "Cluster is not N+1 happy, continuing but no guarantee \
242
                  \that the cluster will end N+1 happy."
243

    
244
  when (optShowNodes opts) $
245
       do
246
         putStrLn "Initial cluster status:"
247
         putStrLn $ Cluster.printNodes ktn nl
248

    
249
  let ini_cv = Cluster.compCV nl
250
      ini_tbl = Cluster.Table nl il ini_cv []
251
      min_cv = optMinScore opts
252

    
253
  when (ini_cv < min_cv) $ do
254
         (if oneline then
255
              putStrLn $ formatOneline ini_cv 0 ini_cv
256
          else printf "Cluster is already well balanced (initial score %.6g,\n\
257
                      \minimum score %.6g).\nNothing to do, exiting\n"
258
                      ini_cv min_cv)
259
         exitWith ExitSuccess
260

    
261
  unless oneline (if verbose > 2 then
262
                      printf "Initial coefficients: overall %.8f, %s\n"
263
                      ini_cv (Cluster.printStats nl)
264
                  else
265
                      printf "Initial score: %.8f\n" ini_cv)
266

    
267
  unless oneline $ putStrLn "Trying to minimize the CV..."
268
  let mlen_fn = maximum . (map length) . snd . unzip
269
      imlen = mlen_fn kti
270
      nmlen = mlen_fn ktn
271

    
272
  (fin_tbl, cmd_strs) <- iterateDepth ini_tbl (optMaxLength opts)
273
                         ktn kti nmlen imlen [] oneline min_cv
274
  let (Cluster.Table fin_nl _ fin_cv fin_plc) = fin_tbl
275
      ord_plc = reverse fin_plc
276
      sol_msg = if null fin_plc
277
                then printf "No solution found\n"
278
                else (if verbose > 2
279
                      then printf "Final coefficients:   overall %.8f, %s\n"
280
                           fin_cv (Cluster.printStats fin_nl)
281
                      else printf "Cluster score improved from %.8f to %.8f\n"
282
                           ini_cv fin_cv
283
                     )
284

    
285
  unless oneline $ putStr sol_msg
286

    
287
  unless (oneline || verbose == 0) $
288
         printf "Solution length=%d\n" (length ord_plc)
289

    
290
  let cmd_data = Cluster.formatCmds . reverse $ cmd_strs
291

    
292
  when (isJust $ optShowCmds opts) $
293
       do
294
         let out_path = fromJust $ optShowCmds opts
295
         putStrLn ""
296
         (if out_path == "-" then
297
              printf "Commands to run to reach the above solution:\n%s"
298
                     (unlines . map ("  " ++) .
299
                      filter (/= "check") .
300
                      lines $ cmd_data)
301
          else do
302
            writeFile out_path (CLI.shTemplate ++ cmd_data)
303
            printf "The commands have been written to file '%s'\n" out_path)
304

    
305
  when (optShowNodes opts) $
306
       do
307
         let (orig_mem, orig_disk) = Cluster.totalResources nl
308
             (final_mem, final_disk) = Cluster.totalResources fin_nl
309
         putStrLn ""
310
         putStrLn "Final cluster status:"
311
         putStrLn $ Cluster.printNodes ktn fin_nl
312
         when (verbose > 3) $
313
              do
314
                printf "Original: mem=%d disk=%d\n" orig_mem orig_disk
315
                printf "Final:    mem=%d disk=%d\n" final_mem final_disk
316
  when oneline $
317
         putStrLn $ formatOneline ini_cv (length ord_plc) fin_cv