Statistics
| Branch: | Tag: | Revision:

root / hbal.hs @ 8472a321

History | View | Annotate | Download (11.1 kB)

1
{-| Solver for N+1 cluster errors
2

    
3
-}
4

    
5
module Main (main) where
6

    
7
import Data.List
8
import Data.Function
9
import Data.Maybe (isJust, fromJust, fromMaybe)
10
import Monad
11
import System
12
import System.IO
13
import System.Console.GetOpt
14
import qualified System
15

    
16
import Text.Printf (printf)
17

    
18
import qualified Ganeti.HTools.Container as Container
19
import qualified Ganeti.HTools.Cluster as Cluster
20
import qualified Ganeti.HTools.Node as Node
21
import qualified Ganeti.HTools.CLI as CLI
22

    
23
import Ganeti.HTools.Utils
24
import Ganeti.HTools.Types
25

    
26
-- | Command line options structure.
27
data Options = Options
28
    { optShowNodes :: Bool           -- ^ Whether to show node status
29
    , optShowCmds  :: Maybe FilePath -- ^ Whether to show the command list
30
    , optOneline   :: Bool           -- ^ Switch output to a single line
31
    , optNodef     :: FilePath       -- ^ Path to the nodes file
32
    , optNodeSet   :: Bool           -- ^ The nodes have been set by options
33
    , optInstf     :: FilePath       -- ^ Path to the instances file
34
    , optInstSet   :: Bool           -- ^ The insts have been set by options
35
    , optMaxLength :: Int            -- ^ Stop after this many steps
36
    , optMaster    :: String         -- ^ Collect data from RAPI
37
    , optVerbose   :: Int            -- ^ Verbosity level
38
    , optOffline   :: [String]       -- ^ Names of offline nodes
39
    , optMinScore  :: Cluster.Score  -- ^ The minimum score we aim for
40
    , optShowVer   :: Bool           -- ^ Just show the program version
41
    , optShowHelp  :: Bool           -- ^ Just show the help
42
    } deriving Show
43

    
44
instance CLI.CLIOptions Options where
45
    showVersion = optShowVer
46
    showHelp    = optShowHelp
47

    
48
instance CLI.EToolOptions Options where
49
    nodeFile   = optNodef
50
    nodeSet    = optNodeSet
51
    instFile   = optInstf
52
    instSet    = optInstSet
53
    masterName = optMaster
54
    silent a   = (optVerbose a) == 0
55

    
56
-- | Default values for the command line options.
57
defaultOptions :: Options
58
defaultOptions  = Options
59
 { optShowNodes = False
60
 , optShowCmds  = Nothing
61
 , optOneline   = False
62
 , optNodef     = "nodes"
63
 , optNodeSet   = False
64
 , optInstf     = "instances"
65
 , optInstSet   = False
66
 , optMaxLength = -1
67
 , optMaster    = ""
68
 , optVerbose   = 1
69
 , optOffline   = []
70
 , optMinScore  = 1e-9
71
 , optShowVer   = False
72
 , optShowHelp  = False
73
 }
74

    
75
-- | Options list and functions
76
options :: [OptDescr (Options -> Options)]
77
options =
78
    [ Option ['p']     ["print-nodes"]
79
      (NoArg (\ opts -> opts { optShowNodes = True }))
80
      "print the final node list"
81
    , Option ['C']     ["print-commands"]
82
      (OptArg ((\ f opts -> opts { optShowCmds = Just f }) . fromMaybe "-")
83
                  "FILE")
84
      "print the ganeti command list for reaching the solution,\
85
      \if an argument is passed then write the commands to a file named\
86
      \ as such"
87
    , Option ['o']     ["oneline"]
88
      (NoArg (\ opts -> opts { optOneline = True }))
89
      "print the ganeti command list for reaching the solution"
90
    , Option ['n']     ["nodes"]
91
      (ReqArg (\ f opts -> opts { optNodef = f, optNodeSet = True }) "FILE")
92
      "the node list FILE"
93
    , Option ['i']     ["instances"]
94
      (ReqArg (\ f opts -> opts { optInstf =  f, optInstSet = True }) "FILE")
95
      "the instance list FILE"
96
    , Option ['m']     ["master"]
97
      (ReqArg (\ m opts -> opts { optMaster = m }) "ADDRESS")
98
      "collect data via RAPI at the given ADDRESS"
99
    , Option ['l']     ["max-length"]
100
      (ReqArg (\ i opts -> opts { optMaxLength =  (read i)::Int }) "N")
101
      "cap the solution at this many moves (useful for very unbalanced \
102
      \clusters)"
103
    , Option ['v']     ["verbose"]
104
      (NoArg (\ opts -> opts { optVerbose = (optVerbose opts) + 1 }))
105
      "increase the verbosity level"
106
    , Option ['q']     ["quiet"]
107
      (NoArg (\ opts -> opts { optVerbose = (optVerbose opts) - 1 }))
108
      "decrease the verbosity level"
109
    , Option ['O']     ["offline"]
110
      (ReqArg (\ n opts -> opts { optOffline = n:optOffline opts }) "NODE")
111
      " set node as offline"
112
    , Option ['e']     ["min-score"]
113
      (ReqArg (\ e opts -> opts { optMinScore = read e }) "EPSILON")
114
      " mininum score to aim for"
115
    , Option ['V']     ["version"]
116
      (NoArg (\ opts -> opts { optShowVer = True}))
117
      "show the version of the program"
118
    , Option ['h']     ["help"]
119
      (NoArg (\ opts -> opts { optShowHelp = True}))
120
      "show help"
121
    ]
122

    
123
{- | Start computing the solution at the given depth and recurse until
124
we find a valid solution or we exceed the maximum depth.
125

    
126
-}
127
iterateDepth :: Cluster.Table    -- ^ The starting table
128
             -> Int              -- ^ Remaining length
129
             -> Int              -- ^ Max node name len
130
             -> Int              -- ^ Max instance name len
131
             -> [[String]]       -- ^ Current command list
132
             -> Bool             -- ^ Wheter to be silent
133
             -> Cluster.Score    -- ^ Score at which to stop
134
             -> IO (Cluster.Table, [[String]]) -- ^ The resulting table and
135
                                               -- commands
136
iterateDepth ini_tbl max_rounds nmlen imlen
137
             cmd_strs oneline min_score =
138
    let Cluster.Table ini_nl ini_il ini_cv ini_plc = ini_tbl
139
        all_inst = Container.elems ini_il
140
        node_idx = map Node.idx . filter (not . Node.offline) $
141
                   Container.elems ini_nl
142
        fin_tbl = Cluster.checkMove node_idx ini_tbl all_inst
143
        (Cluster.Table _ _ fin_cv fin_plc) = fin_tbl
144
        ini_plc_len = length ini_plc
145
        fin_plc_len = length fin_plc
146
        allowed_next = (max_rounds < 0 || length fin_plc < max_rounds)
147
    in
148
      do
149
        let
150
            (sol_line, cmds) = Cluster.printSolutionLine ini_nl ini_il
151
                               nmlen imlen (head fin_plc) fin_plc_len
152
            upd_cmd_strs = cmds:cmd_strs
153
        unless (oneline || fin_plc_len == ini_plc_len) $ do
154
          putStrLn sol_line
155
          hFlush stdout
156
        (if fin_cv < ini_cv then -- this round made success, try deeper
157
             if allowed_next && fin_cv > min_score
158
             then iterateDepth fin_tbl max_rounds
159
                  nmlen imlen upd_cmd_strs oneline min_score
160
             -- don't go deeper, but return the better solution
161
             else return (fin_tbl, upd_cmd_strs)
162
         else
163
             return (ini_tbl, cmd_strs))
164

    
165
-- | Formats the solution for the oneline display
166
formatOneline :: Double -> Int -> Double -> String
167
formatOneline ini_cv plc_len fin_cv =
168
    printf "%.8f %d %.8f %8.3f" ini_cv plc_len fin_cv
169
               (if fin_cv == 0 then 1 else (ini_cv / fin_cv))
170

    
171
-- | Main function.
172
main :: IO ()
173
main = do
174
  cmd_args <- System.getArgs
175
  (opts, args) <- CLI.parseOpts cmd_args "hbal" options defaultOptions
176

    
177
  unless (null args) $ do
178
         hPutStrLn stderr "Error: this program doesn't take any arguments."
179
         exitWith $ ExitFailure 1
180

    
181
  let oneline = optOneline opts
182
      verbose = optVerbose opts
183

    
184
  (fixed_nl, il, csf) <- CLI.loadExternalData opts
185

    
186
  let offline_names = optOffline opts
187
      all_nodes = Container.elems fixed_nl
188
      all_names = map Node.name all_nodes
189
      offline_wrong = filter (\n -> not $ elem n all_names) offline_names
190
      offline_indices = map Node.idx $
191
                        filter (\n -> elem (Node.name n) offline_names)
192
                               all_nodes
193

    
194
  when (length offline_wrong > 0) $ do
195
         printf "Wrong node name(s) set as offline: %s\n"
196
                (commaJoin offline_wrong)
197
         exitWith $ ExitFailure 1
198

    
199
  let nl = Container.map (\n -> if elem (Node.idx n) offline_indices
200
                                then Node.setOffline n True
201
                                else n) fixed_nl
202

    
203
  when (Container.size il == 0) $ do
204
         (if oneline then putStrLn $ formatOneline 0 0 0
205
          else printf "Cluster is empty, exiting.\n")
206
         exitWith ExitSuccess
207

    
208
  unless oneline $ printf "Loaded %d nodes, %d instances\n"
209
             (Container.size nl)
210
             (Container.size il)
211

    
212
  when (length csf > 0 && not oneline && verbose > 1) $ do
213
         printf "Note: Stripping common suffix of '%s' from names\n" csf
214

    
215
  let (bad_nodes, bad_instances) = Cluster.computeBadItems nl il
216
  unless (oneline || verbose == 0) $ printf
217
             "Initial check done: %d bad nodes, %d bad instances.\n"
218
             (length bad_nodes) (length bad_instances)
219

    
220
  when (length bad_nodes > 0) $ do
221
         putStrLn "Cluster is not N+1 happy, continuing but no guarantee \
222
                  \that the cluster will end N+1 happy."
223

    
224
  when (optShowNodes opts) $
225
       do
226
         putStrLn "Initial cluster status:"
227
         putStrLn $ Cluster.printNodes nl
228

    
229
  let ini_cv = Cluster.compCV nl
230
      ini_tbl = Cluster.Table nl il ini_cv []
231
      min_cv = optMinScore opts
232

    
233
  when (ini_cv < min_cv) $ do
234
         (if oneline then
235
              putStrLn $ formatOneline ini_cv 0 ini_cv
236
          else printf "Cluster is already well balanced (initial score %.6g,\n\
237
                      \minimum score %.6g).\nNothing to do, exiting\n"
238
                      ini_cv min_cv)
239
         exitWith ExitSuccess
240

    
241
  unless oneline (if verbose > 2 then
242
                      printf "Initial coefficients: overall %.8f, %s\n"
243
                      ini_cv (Cluster.printStats nl)
244
                  else
245
                      printf "Initial score: %.8f\n" ini_cv)
246

    
247
  unless oneline $ putStrLn "Trying to minimize the CV..."
248
  let imlen = cMaxNamelen il
249
      nmlen = cMaxNamelen nl
250

    
251
  (fin_tbl, cmd_strs) <- iterateDepth ini_tbl (optMaxLength opts)
252
                         nmlen imlen [] oneline min_cv
253
  let (Cluster.Table fin_nl _ fin_cv fin_plc) = fin_tbl
254
      ord_plc = reverse fin_plc
255
      sol_msg = if null fin_plc
256
                then printf "No solution found\n"
257
                else (if verbose > 2
258
                      then printf "Final coefficients:   overall %.8f, %s\n"
259
                           fin_cv (Cluster.printStats fin_nl)
260
                      else printf "Cluster score improved from %.8f to %.8f\n"
261
                           ini_cv fin_cv
262
                     )
263

    
264
  unless oneline $ putStr sol_msg
265

    
266
  unless (oneline || verbose == 0) $
267
         printf "Solution length=%d\n" (length ord_plc)
268

    
269
  let cmd_data = Cluster.formatCmds . reverse $ cmd_strs
270

    
271
  when (isJust $ optShowCmds opts) $
272
       do
273
         let out_path = fromJust $ optShowCmds opts
274
         putStrLn ""
275
         (if out_path == "-" then
276
              printf "Commands to run to reach the above solution:\n%s"
277
                     (unlines . map ("  " ++) .
278
                      filter (/= "check") .
279
                      lines $ cmd_data)
280
          else do
281
            writeFile out_path (CLI.shTemplate ++ cmd_data)
282
            printf "The commands have been written to file '%s'\n" out_path)
283

    
284
  when (optShowNodes opts) $
285
       do
286
         let (orig_mem, orig_disk) = Cluster.totalResources nl
287
             (final_mem, final_disk) = Cluster.totalResources fin_nl
288
         putStrLn ""
289
         putStrLn "Final cluster status:"
290
         putStrLn $ Cluster.printNodes fin_nl
291
         when (verbose > 3) $
292
              do
293
                printf "Original: mem=%d disk=%d\n" orig_mem orig_disk
294
                printf "Final:    mem=%d disk=%d\n" final_mem final_disk
295
  when oneline $
296
         putStrLn $ formatOneline ini_cv (length ord_plc) fin_cv