Statistics
| Branch: | Tag: | Revision:

root / hbal.hs @ dcbcdb58

History | View | Annotate | Download (11.5 kB)

1
{-| Solver for N+1 cluster errors
2

    
3
-}
4

    
5
module Main (main) where
6

    
7
import Data.List
8
import Data.Function
9
import Data.Maybe (isJust, fromJust, fromMaybe)
10
import Monad
11
import System
12
import System.IO
13
import System.Console.GetOpt
14
import qualified System
15

    
16
import Text.Printf (printf)
17

    
18
import qualified Ganeti.HTools.Container as Container
19
import qualified Ganeti.HTools.Cluster as Cluster
20
import qualified Ganeti.HTools.Node as Node
21
import qualified Ganeti.HTools.CLI as CLI
22
import Ganeti.HTools.Rapi
23
import Ganeti.HTools.Utils
24

    
25
-- | Command line options structure.
26
data Options = Options
27
    { optShowNodes :: Bool           -- ^ Whether to show node status
28
    , optShowCmds  :: Maybe FilePath -- ^ Whether to show the command list
29
    , optOneline   :: Bool           -- ^ Switch output to a single line
30
    , optNodef     :: FilePath       -- ^ Path to the nodes file
31
    , optNodeSet   :: Bool           -- ^ The nodes have been set by options
32
    , optInstf     :: FilePath       -- ^ Path to the instances file
33
    , optInstSet   :: Bool           -- ^ The insts have been set by options
34
    , optMaxLength :: Int            -- ^ Stop after this many steps
35
    , optMaster    :: String         -- ^ Collect data from RAPI
36
    , optVerbose   :: Int            -- ^ Verbosity level
37
    , optOffline   :: [String]       -- ^ Names of offline nodes
38
    , optMinScore  :: Cluster.Score  -- ^ The minimum score we aim for
39
    , optShowVer   :: Bool           -- ^ Just show the program version
40
    , optShowHelp  :: Bool           -- ^ Just show the help
41
    } deriving Show
42

    
43
-- | Default values for the command line options.
44
defaultOptions :: Options
45
defaultOptions  = Options
46
 { optShowNodes = False
47
 , optShowCmds  = Nothing
48
 , optOneline   = False
49
 , optNodef     = "nodes"
50
 , optNodeSet   = False
51
 , optInstf     = "instances"
52
 , optInstSet   = False
53
 , optMaxLength = -1
54
 , optMaster    = ""
55
 , optVerbose   = 0
56
 , optOffline   = []
57
 , optMinScore  = 1e-9
58
 , optShowVer   = False
59
 , optShowHelp  = False
60
 }
61

    
62
-- | Options list and functions
63
options :: [OptDescr (Options -> Options)]
64
options =
65
    [ Option ['p']     ["print-nodes"]
66
      (NoArg (\ opts -> opts { optShowNodes = True }))
67
      "print the final node list"
68
    , Option ['C']     ["print-commands"]
69
      (OptArg ((\ f opts -> opts { optShowCmds = Just f }) . fromMaybe "-")
70
                  "FILE")
71
      "print the ganeti command list for reaching the solution,\
72
      \if an argument is passed then write the commands to a file named\
73
      \ as such"
74
    , Option ['o']     ["oneline"]
75
      (NoArg (\ opts -> opts { optOneline = True }))
76
      "print the ganeti command list for reaching the solution"
77
    , Option ['n']     ["nodes"]
78
      (ReqArg (\ f opts -> opts { optNodef = f, optNodeSet = True }) "FILE")
79
      "the node list FILE"
80
    , Option ['i']     ["instances"]
81
      (ReqArg (\ f opts -> opts { optInstf =  f, optInstSet = True }) "FILE")
82
      "the instance list FILE"
83
    , Option ['m']     ["master"]
84
      (ReqArg (\ m opts -> opts { optMaster = m }) "ADDRESS")
85
      "collect data via RAPI at the given ADDRESS"
86
    , Option ['l']     ["max-length"]
87
      (ReqArg (\ i opts -> opts { optMaxLength =  (read i)::Int }) "N")
88
      "cap the solution at this many moves (useful for very unbalanced \
89
      \clusters)"
90
    , Option ['v']     ["verbose"]
91
      (NoArg (\ opts -> opts { optVerbose = (optVerbose opts) + 1 }))
92
      "increase the verbosity level"
93
    , Option ['O']     ["offline"]
94
      (ReqArg (\ n opts -> opts { optOffline = n:optOffline opts }) "NODE")
95
      " set node as offline"
96
    , Option ['e']     ["min-score"]
97
      (ReqArg (\ e opts -> opts { optMinScore = read e }) "EPSILON")
98
      " mininum score to aim for"
99
    , Option ['V']     ["version"]
100
      (NoArg (\ opts -> opts { optShowVer = True}))
101
      "show the version of the program"
102
    , Option ['h']     ["help"]
103
      (NoArg (\ opts -> opts { optShowHelp = True}))
104
      "show help"
105
    ]
106

    
107
{- | Start computing the solution at the given depth and recurse until
108
we find a valid solution or we exceed the maximum depth.
109

    
110
-}
111
iterateDepth :: Cluster.Table    -- ^ The starting table
112
             -> Int              -- ^ Remaining length
113
             -> Cluster.NameList -- ^ Node idx to name list
114
             -> Cluster.NameList -- ^ Inst idx to name list
115
             -> Int              -- ^ Max node name len
116
             -> Int              -- ^ Max instance name len
117
             -> [[String]]       -- ^ Current command list
118
             -> Bool             -- ^ Wheter to be silent
119
             -> Cluster.Score    -- ^ Score at which to stop
120
             -> IO (Cluster.Table, [[String]]) -- ^ The resulting table and
121
                                               -- commands
122
iterateDepth ini_tbl max_rounds ktn kti nmlen imlen
123
             cmd_strs oneline min_score =
124
    let Cluster.Table ini_nl ini_il ini_cv ini_plc = ini_tbl
125
        all_inst = Container.elems ini_il
126
        node_idx = map Node.idx . filter (not . Node.offline) $
127
                   Container.elems ini_nl
128
        fin_tbl = Cluster.checkMove node_idx ini_tbl all_inst
129
        (Cluster.Table _ _ fin_cv fin_plc) = fin_tbl
130
        ini_plc_len = length ini_plc
131
        fin_plc_len = length fin_plc
132
        allowed_next = (max_rounds < 0 || length fin_plc < max_rounds)
133
    in
134
      do
135
        let
136
            (sol_line, cmds) = Cluster.printSolutionLine ini_il ktn kti
137
                               nmlen imlen (head fin_plc) fin_plc_len
138
            upd_cmd_strs = cmds:cmd_strs
139
        unless (oneline || fin_plc_len == ini_plc_len) $ do
140
          putStrLn sol_line
141
          hFlush stdout
142
        (if fin_cv < ini_cv then -- this round made success, try deeper
143
             if allowed_next && fin_cv > min_score
144
             then iterateDepth fin_tbl max_rounds ktn kti
145
                  nmlen imlen upd_cmd_strs oneline min_score
146
             -- don't go deeper, but return the better solution
147
             else return (fin_tbl, upd_cmd_strs)
148
         else
149
             return (ini_tbl, cmd_strs))
150

    
151
-- | Main function.
152
main :: IO ()
153
main = do
154
  cmd_args <- System.getArgs
155
  (opts, args) <- CLI.parseOpts cmd_args "hbal" options
156
                  defaultOptions optShowHelp
157

    
158
  unless (null args) $ do
159
         hPutStrLn stderr "Error: this program doesn't take any arguments."
160
         exitWith $ ExitFailure 1
161

    
162
  when (optShowVer opts) $ do
163
         putStr $ CLI.showVersion "hbal"
164
         exitWith ExitSuccess
165

    
166
  (env_node, env_inst) <- CLI.parseEnv ()
167
  let nodef = if optNodeSet opts then optNodef opts
168
              else env_node
169
      instf = if optInstSet opts then optInstf opts
170
              else env_inst
171
      oneline = optOneline opts
172
      verbose = optVerbose opts
173
      (node_data, inst_data) =
174
          case optMaster opts of
175
            "" -> (readFile nodef,
176
                   readFile instf)
177
            host -> (readData getNodes host,
178
                     readData getInstances host)
179

    
180
  (loaded_nl, il, csf, ktn, kti) <- liftM2 Cluster.loadData node_data inst_data
181
  let (fix_msgs, fixed_nl) = Cluster.checkData loaded_nl il ktn kti
182

    
183
  unless (null fix_msgs) $ do
184
         putStrLn "Warning: cluster has inconsistent data:"
185
         putStrLn . unlines . map (\s -> printf "  - %s" s) $ fix_msgs
186

    
187
  let offline_names = optOffline opts
188
      all_names = snd . unzip $ ktn
189
      offline_wrong = filter (\n -> not $ elem n all_names) offline_names
190
      offline_indices = fst . unzip .
191
                        filter (\(_, n) -> elem n offline_names) $ ktn
192

    
193
  when (length offline_wrong > 0) $ do
194
         printf "Wrong node name(s) set as offline: %s\n"
195
                (commaJoin offline_wrong)
196
         exitWith $ ExitFailure 1
197

    
198
  let nl = Container.map (\n -> if elem (Node.idx n) offline_indices
199
                                then Node.setOffline n True
200
                                else n) fixed_nl
201

    
202
  when (Container.size il == 0) $ do
203
         (if oneline then
204
              printf "%.8f %d %.8f %8.3f\n"
205
                         (0::Double) (0::Integer) (0::Double) (1::Double)
206
          else
207
              printf "Cluster is empty, exiting.\n")
208
         exitWith ExitSuccess
209

    
210

    
211
  unless oneline $ printf "Loaded %d nodes, %d instances\n"
212
             (Container.size nl)
213
             (Container.size il)
214

    
215
  when (length csf > 0 && not oneline && verbose > 0) $ do
216
         printf "Note: Stripping common suffix of '%s' from names\n" csf
217

    
218
  let (bad_nodes, bad_instances) = Cluster.computeBadItems nl il
219
  unless (oneline || verbose == 0) $ printf
220
             "Initial check done: %d bad nodes, %d bad instances.\n"
221
             (length bad_nodes) (length bad_instances)
222

    
223
  when (length bad_nodes > 0) $ do
224
         putStrLn "Cluster is not N+1 happy, continuing but no guarantee \
225
                  \that the cluster will end N+1 happy."
226

    
227
  when (optShowNodes opts) $
228
       do
229
         putStrLn "Initial cluster status:"
230
         putStrLn $ Cluster.printNodes ktn nl
231

    
232
  let ini_cv = Cluster.compCV nl
233
      ini_tbl = Cluster.Table nl il ini_cv []
234
      min_cv = optMinScore opts
235

    
236
  when (ini_cv < min_cv) $ do
237
         (if oneline then
238
              printf "%.8f %d %.8f %8.3f\n"
239
                     ini_cv (0::Integer) ini_cv (1::Double)
240
          else printf "Cluster is already well balanced (initial score %.6g,\n\
241
                      \minimum score %.6g).\nNothing to do, exiting\n"
242
                      ini_cv min_cv)
243
         exitWith ExitSuccess
244

    
245
  unless oneline (if verbose > 1 then
246
                      printf "Initial coefficients: overall %.8f, %s\n"
247
                      ini_cv (Cluster.printStats nl)
248
                  else
249
                      printf "Initial score: %.8f\n" ini_cv)
250

    
251
  unless oneline $ putStrLn "Trying to minimize the CV..."
252
  let mlen_fn = maximum . (map length) . snd . unzip
253
      imlen = mlen_fn kti
254
      nmlen = mlen_fn ktn
255

    
256
  (fin_tbl, cmd_strs) <- iterateDepth ini_tbl (optMaxLength opts)
257
                         ktn kti nmlen imlen [] oneline min_cv
258
  let (Cluster.Table fin_nl _ fin_cv fin_plc) = fin_tbl
259
      ord_plc = reverse fin_plc
260
      sol_msg = if null fin_plc
261
                then printf "No solution found\n"
262
                else (if verbose > 1
263
                      then printf "Final coefficients:   overall %.8f, %s\n"
264
                           fin_cv (Cluster.printStats fin_nl)
265
                      else printf "Cluster score improved from %.8f to %.8f\n"
266
                           ini_cv fin_cv
267
                     )
268

    
269
  unless oneline $ putStr sol_msg
270

    
271
  unless (oneline || verbose == 0) $
272
         printf "Solution length=%d\n" (length ord_plc)
273

    
274
  let cmd_data = Cluster.formatCmds . reverse $ cmd_strs
275

    
276
  when (isJust $ optShowCmds opts) $
277
       do
278
         let out_path = fromJust $ optShowCmds opts
279
         putStrLn ""
280
         (if out_path == "-" then
281
              printf "Commands to run to reach the above solution:\n%s"
282
                     (unlines . map ("  " ++) .
283
                      filter (/= "check") .
284
                      lines $ cmd_data)
285
          else do
286
            writeFile out_path (CLI.shTemplate ++ cmd_data)
287
            printf "The commands have been written to file '%s'\n" out_path)
288

    
289
  when (optShowNodes opts) $
290
       do
291
         let (orig_mem, orig_disk) = Cluster.totalResources nl
292
             (final_mem, final_disk) = Cluster.totalResources fin_nl
293
         putStrLn ""
294
         putStrLn "Final cluster status:"
295
         putStrLn $ Cluster.printNodes ktn fin_nl
296
         when (verbose > 2) $
297
              do
298
                printf "Original: mem=%d disk=%d\n" orig_mem orig_disk
299
                printf "Final:    mem=%d disk=%d\n" final_mem final_disk
300
  when oneline $ do
301
         printf "%.8f %d %.8f %8.3f\n"
302
                ini_cv (length ord_plc) fin_cv (ini_cv / fin_cv)