Statistics
| Branch: | Tag: | Revision:

root / hbal.hs @ d09b6ed3

History | View | Annotate | Download (11.7 kB)

1
{-| Solver for N+1 cluster errors
2

    
3
-}
4

    
5
module Main (main) where
6

    
7
import Data.List
8
import Data.Function
9
import Data.Maybe (isJust, fromJust, fromMaybe)
10
import Monad
11
import System
12
import System.IO
13
import System.Console.GetOpt
14
import qualified System
15

    
16
import Text.Printf (printf)
17

    
18
import qualified Ganeti.HTools.Container as Container
19
import qualified Ganeti.HTools.Cluster as Cluster
20
import qualified Ganeti.HTools.Node as Node
21
import qualified Ganeti.HTools.CLI as CLI
22
import Ganeti.HTools.Rapi
23
import Ganeti.HTools.Utils
24

    
25
-- | Command line options structure.
26
data Options = Options
27
    { optShowNodes :: Bool           -- ^ Whether to show node status
28
    , optShowCmds  :: Maybe FilePath -- ^ Whether to show the command list
29
    , optOneline   :: Bool           -- ^ Switch output to a single line
30
    , optNodef     :: FilePath       -- ^ Path to the nodes file
31
    , optNodeSet   :: Bool           -- ^ The nodes have been set by options
32
    , optInstf     :: FilePath       -- ^ Path to the instances file
33
    , optInstSet   :: Bool           -- ^ The insts have been set by options
34
    , optMaxLength :: Int            -- ^ Stop after this many steps
35
    , optMaster    :: String         -- ^ Collect data from RAPI
36
    , optVerbose   :: Int            -- ^ Verbosity level
37
    , optOffline   :: [String]       -- ^ Names of offline nodes
38
    , optMinScore  :: Cluster.Score  -- ^ The minimum score we aim for
39
    , optShowVer   :: Bool           -- ^ Just show the program version
40
    , optShowHelp  :: Bool           -- ^ Just show the help
41
    } deriving Show
42

    
43
-- | Default values for the command line options.
44
defaultOptions :: Options
45
defaultOptions  = Options
46
 { optShowNodes = False
47
 , optShowCmds  = Nothing
48
 , optOneline   = False
49
 , optNodef     = "nodes"
50
 , optNodeSet   = False
51
 , optInstf     = "instances"
52
 , optInstSet   = False
53
 , optMaxLength = -1
54
 , optMaster    = ""
55
 , optVerbose   = 1
56
 , optOffline   = []
57
 , optMinScore  = 1e-9
58
 , optShowVer   = False
59
 , optShowHelp  = False
60
 }
61

    
62
-- | Options list and functions
63
options :: [OptDescr (Options -> Options)]
64
options =
65
    [ Option ['p']     ["print-nodes"]
66
      (NoArg (\ opts -> opts { optShowNodes = True }))
67
      "print the final node list"
68
    , Option ['C']     ["print-commands"]
69
      (OptArg ((\ f opts -> opts { optShowCmds = Just f }) . fromMaybe "-")
70
                  "FILE")
71
      "print the ganeti command list for reaching the solution,\
72
      \if an argument is passed then write the commands to a file named\
73
      \ as such"
74
    , Option ['o']     ["oneline"]
75
      (NoArg (\ opts -> opts { optOneline = True }))
76
      "print the ganeti command list for reaching the solution"
77
    , Option ['n']     ["nodes"]
78
      (ReqArg (\ f opts -> opts { optNodef = f, optNodeSet = True }) "FILE")
79
      "the node list FILE"
80
    , Option ['i']     ["instances"]
81
      (ReqArg (\ f opts -> opts { optInstf =  f, optInstSet = True }) "FILE")
82
      "the instance list FILE"
83
    , Option ['m']     ["master"]
84
      (ReqArg (\ m opts -> opts { optMaster = m }) "ADDRESS")
85
      "collect data via RAPI at the given ADDRESS"
86
    , Option ['l']     ["max-length"]
87
      (ReqArg (\ i opts -> opts { optMaxLength =  (read i)::Int }) "N")
88
      "cap the solution at this many moves (useful for very unbalanced \
89
      \clusters)"
90
    , Option ['v']     ["verbose"]
91
      (NoArg (\ opts -> opts { optVerbose = (optVerbose opts) + 1 }))
92
      "increase the verbosity level"
93
    , Option ['q']     ["quiet"]
94
      (NoArg (\ opts -> opts { optVerbose = (optVerbose opts) - 1 }))
95
      "decrease the verbosity level"
96
    , Option ['O']     ["offline"]
97
      (ReqArg (\ n opts -> opts { optOffline = n:optOffline opts }) "NODE")
98
      " set node as offline"
99
    , Option ['e']     ["min-score"]
100
      (ReqArg (\ e opts -> opts { optMinScore = read e }) "EPSILON")
101
      " mininum score to aim for"
102
    , Option ['V']     ["version"]
103
      (NoArg (\ opts -> opts { optShowVer = True}))
104
      "show the version of the program"
105
    , Option ['h']     ["help"]
106
      (NoArg (\ opts -> opts { optShowHelp = True}))
107
      "show help"
108
    ]
109

    
110
{- | Start computing the solution at the given depth and recurse until
111
we find a valid solution or we exceed the maximum depth.
112

    
113
-}
114
iterateDepth :: Cluster.Table    -- ^ The starting table
115
             -> Int              -- ^ Remaining length
116
             -> Cluster.NameList -- ^ Node idx to name list
117
             -> Cluster.NameList -- ^ Inst idx to name list
118
             -> Int              -- ^ Max node name len
119
             -> Int              -- ^ Max instance name len
120
             -> [[String]]       -- ^ Current command list
121
             -> Bool             -- ^ Wheter to be silent
122
             -> Cluster.Score    -- ^ Score at which to stop
123
             -> IO (Cluster.Table, [[String]]) -- ^ The resulting table and
124
                                               -- commands
125
iterateDepth ini_tbl max_rounds ktn kti nmlen imlen
126
             cmd_strs oneline min_score =
127
    let Cluster.Table ini_nl ini_il ini_cv ini_plc = ini_tbl
128
        all_inst = Container.elems ini_il
129
        node_idx = map Node.idx . filter (not . Node.offline) $
130
                   Container.elems ini_nl
131
        fin_tbl = Cluster.checkMove node_idx ini_tbl all_inst
132
        (Cluster.Table _ _ fin_cv fin_plc) = fin_tbl
133
        ini_plc_len = length ini_plc
134
        fin_plc_len = length fin_plc
135
        allowed_next = (max_rounds < 0 || length fin_plc < max_rounds)
136
    in
137
      do
138
        let
139
            (sol_line, cmds) = Cluster.printSolutionLine ini_il ktn kti
140
                               nmlen imlen (head fin_plc) fin_plc_len
141
            upd_cmd_strs = cmds:cmd_strs
142
        unless (oneline || fin_plc_len == ini_plc_len) $ do
143
          putStrLn sol_line
144
          hFlush stdout
145
        (if fin_cv < ini_cv then -- this round made success, try deeper
146
             if allowed_next && fin_cv > min_score
147
             then iterateDepth fin_tbl max_rounds ktn kti
148
                  nmlen imlen upd_cmd_strs oneline min_score
149
             -- don't go deeper, but return the better solution
150
             else return (fin_tbl, upd_cmd_strs)
151
         else
152
             return (ini_tbl, cmd_strs))
153

    
154
-- | Formats the solution for the oneline display
155
formatOneline :: Double -> Int -> Double -> String
156
formatOneline ini_cv plc_len fin_cv =
157
    printf "%.8f %d %.8f %8.3f" ini_cv plc_len fin_cv
158
               (if fin_cv == 0 then 1 else (ini_cv / fin_cv))
159

    
160
-- | Main function.
161
main :: IO ()
162
main = do
163
  cmd_args <- System.getArgs
164
  (opts, args) <- CLI.parseOpts cmd_args "hbal" options
165
                  defaultOptions optShowHelp
166

    
167
  unless (null args) $ do
168
         hPutStrLn stderr "Error: this program doesn't take any arguments."
169
         exitWith $ ExitFailure 1
170

    
171
  when (optShowVer opts) $ do
172
         putStr $ CLI.showVersion "hbal"
173
         exitWith ExitSuccess
174

    
175
  (env_node, env_inst) <- CLI.parseEnv ()
176
  let nodef = if optNodeSet opts then optNodef opts
177
              else env_node
178
      instf = if optInstSet opts then optInstf opts
179
              else env_inst
180
      oneline = optOneline opts
181
      verbose = optVerbose opts
182
      (node_data, inst_data) =
183
          case optMaster opts of
184
            "" -> (readFile nodef,
185
                   readFile instf)
186
            host -> (readData getNodes host,
187
                     readData getInstances host)
188

    
189
  (loaded_nl, il, csf, ktn, kti) <- liftM2 Cluster.loadData node_data inst_data
190
  let (fix_msgs, fixed_nl) = Cluster.checkData loaded_nl il ktn kti
191

    
192
  unless (null fix_msgs || verbose == 0) $ do
193
         putStrLn "Warning: cluster has inconsistent data:"
194
         putStrLn . unlines . map (\s -> printf "  - %s" s) $ fix_msgs
195

    
196
  let offline_names = optOffline opts
197
      all_names = snd . unzip $ ktn
198
      offline_wrong = filter (\n -> not $ elem n all_names) offline_names
199
      offline_indices = fst . unzip .
200
                        filter (\(_, n) -> elem n offline_names) $ ktn
201

    
202
  when (length offline_wrong > 0) $ do
203
         printf "Wrong node name(s) set as offline: %s\n"
204
                (commaJoin offline_wrong)
205
         exitWith $ ExitFailure 1
206

    
207
  let nl = Container.map (\n -> if elem (Node.idx n) offline_indices
208
                                then Node.setOffline n True
209
                                else n) fixed_nl
210

    
211
  when (Container.size il == 0) $ do
212
         (if oneline then
213
              putStrLn $ formatOneline 0 0 0
214
          else
215
              printf "Cluster is empty, exiting.\n")
216
         exitWith ExitSuccess
217

    
218

    
219
  unless oneline $ printf "Loaded %d nodes, %d instances\n"
220
             (Container.size nl)
221
             (Container.size il)
222

    
223
  when (length csf > 0 && not oneline && verbose > 1) $ do
224
         printf "Note: Stripping common suffix of '%s' from names\n" csf
225

    
226
  let (bad_nodes, bad_instances) = Cluster.computeBadItems nl il
227
  unless (oneline || verbose == 0) $ printf
228
             "Initial check done: %d bad nodes, %d bad instances.\n"
229
             (length bad_nodes) (length bad_instances)
230

    
231
  when (length bad_nodes > 0) $ do
232
         putStrLn "Cluster is not N+1 happy, continuing but no guarantee \
233
                  \that the cluster will end N+1 happy."
234

    
235
  when (optShowNodes opts) $
236
       do
237
         putStrLn "Initial cluster status:"
238
         putStrLn $ Cluster.printNodes ktn nl
239

    
240
  let ini_cv = Cluster.compCV nl
241
      ini_tbl = Cluster.Table nl il ini_cv []
242
      min_cv = optMinScore opts
243

    
244
  when (ini_cv < min_cv) $ do
245
         (if oneline then
246
              putStrLn $ formatOneline ini_cv 0 ini_cv
247
          else printf "Cluster is already well balanced (initial score %.6g,\n\
248
                      \minimum score %.6g).\nNothing to do, exiting\n"
249
                      ini_cv min_cv)
250
         exitWith ExitSuccess
251

    
252
  unless oneline (if verbose > 2 then
253
                      printf "Initial coefficients: overall %.8f, %s\n"
254
                      ini_cv (Cluster.printStats nl)
255
                  else
256
                      printf "Initial score: %.8f\n" ini_cv)
257

    
258
  unless oneline $ putStrLn "Trying to minimize the CV..."
259
  let mlen_fn = maximum . (map length) . snd . unzip
260
      imlen = mlen_fn kti
261
      nmlen = mlen_fn ktn
262

    
263
  (fin_tbl, cmd_strs) <- iterateDepth ini_tbl (optMaxLength opts)
264
                         ktn kti nmlen imlen [] oneline min_cv
265
  let (Cluster.Table fin_nl _ fin_cv fin_plc) = fin_tbl
266
      ord_plc = reverse fin_plc
267
      sol_msg = if null fin_plc
268
                then printf "No solution found\n"
269
                else (if verbose > 2
270
                      then printf "Final coefficients:   overall %.8f, %s\n"
271
                           fin_cv (Cluster.printStats fin_nl)
272
                      else printf "Cluster score improved from %.8f to %.8f\n"
273
                           ini_cv fin_cv
274
                     )
275

    
276
  unless oneline $ putStr sol_msg
277

    
278
  unless (oneline || verbose == 0) $
279
         printf "Solution length=%d\n" (length ord_plc)
280

    
281
  let cmd_data = Cluster.formatCmds . reverse $ cmd_strs
282

    
283
  when (isJust $ optShowCmds opts) $
284
       do
285
         let out_path = fromJust $ optShowCmds opts
286
         putStrLn ""
287
         (if out_path == "-" then
288
              printf "Commands to run to reach the above solution:\n%s"
289
                     (unlines . map ("  " ++) .
290
                      filter (/= "check") .
291
                      lines $ cmd_data)
292
          else do
293
            writeFile out_path (CLI.shTemplate ++ cmd_data)
294
            printf "The commands have been written to file '%s'\n" out_path)
295

    
296
  when (optShowNodes opts) $
297
       do
298
         let (orig_mem, orig_disk) = Cluster.totalResources nl
299
             (final_mem, final_disk) = Cluster.totalResources fin_nl
300
         putStrLn ""
301
         putStrLn "Final cluster status:"
302
         putStrLn $ Cluster.printNodes ktn fin_nl
303
         when (verbose > 3) $
304
              do
305
                printf "Original: mem=%d disk=%d\n" orig_mem orig_disk
306
                printf "Final:    mem=%d disk=%d\n" final_mem final_disk
307
  when oneline $
308
         putStrLn $ formatOneline ini_cv (length ord_plc) fin_cv