Statistics
| Branch: | Tag: | Revision:

root / hbal.hs @ 45f01962

History | View | Annotate | Download (10.1 kB)

1
{-| Solver for N+1 cluster errors
2

    
3
-}
4

    
5
module Main (main) where
6

    
7
import Data.List
8
import Data.Function
9
import Data.Maybe (isJust, fromJust, fromMaybe)
10
import Monad
11
import System
12
import System.IO
13
import System.Console.GetOpt
14
import qualified System
15

    
16
import Text.Printf (printf)
17

    
18
import qualified Ganeti.HTools.Container as Container
19
import qualified Ganeti.HTools.Cluster as Cluster
20
import qualified Ganeti.HTools.Node as Node
21
import qualified Ganeti.HTools.CLI as CLI
22
import Ganeti.HTools.Rapi
23
import Ganeti.HTools.Utils
24

    
25
-- | Command line options structure.
26
data Options = Options
27
    { optShowNodes :: Bool           -- ^ Whether to show node status
28
    , optShowCmds  :: Maybe FilePath -- ^ Whether to show the command list
29
    , optOneline   :: Bool           -- ^ Switch output to a single line
30
    , optNodef     :: FilePath       -- ^ Path to the nodes file
31
    , optInstf     :: FilePath       -- ^ Path to the instances file
32
    , optMaxLength :: Int            -- ^ Stop after this many steps
33
    , optMaster    :: String         -- ^ Collect data from RAPI
34
    , optVerbose   :: Int            -- ^ Verbosity level
35
    , optOffline   :: [String]       -- ^ Names of offline nodes
36
    , optShowVer   :: Bool           -- ^ Just show the program version
37
    , optShowHelp  :: Bool           -- ^ Just show the help
38
    } deriving Show
39

    
40
-- | Default values for the command line options.
41
defaultOptions :: Options
42
defaultOptions  = Options
43
 { optShowNodes = False
44
 , optShowCmds  = Nothing
45
 , optOneline   = False
46
 , optNodef     = "nodes"
47
 , optInstf     = "instances"
48
 , optMaxLength = -1
49
 , optMaster    = ""
50
 , optVerbose   = 0
51
 , optOffline   = []
52
 , optShowVer   = False
53
 , optShowHelp  = False
54
 }
55

    
56
-- | Options list and functions
57
options :: [OptDescr (Options -> Options)]
58
options =
59
    [ Option ['p']     ["print-nodes"]
60
      (NoArg (\ opts -> opts { optShowNodes = True }))
61
      "print the final node list"
62
    , Option ['C']     ["print-commands"]
63
      (OptArg ((\ f opts -> opts { optShowCmds = Just f }) . fromMaybe "-")
64
                  "FILE")
65
      "print the ganeti command list for reaching the solution,\
66
      \if an argument is passed then write the commands to a file named\
67
      \ as such"
68
    , Option ['o']     ["oneline"]
69
      (NoArg (\ opts -> opts { optOneline = True }))
70
      "print the ganeti command list for reaching the solution"
71
    , Option ['n']     ["nodes"]
72
      (ReqArg (\ f opts -> opts { optNodef = f }) "FILE")
73
      "the node list FILE"
74
    , Option ['i']     ["instances"]
75
      (ReqArg (\ f opts -> opts { optInstf =  f }) "FILE")
76
      "the instance list FILE"
77
    , Option ['m']     ["master"]
78
      (ReqArg (\ m opts -> opts { optMaster = m }) "ADDRESS")
79
      "collect data via RAPI at the given ADDRESS"
80
    , Option ['l']     ["max-length"]
81
      (ReqArg (\ i opts -> opts { optMaxLength =  (read i)::Int }) "N")
82
      "cap the solution at this many moves (useful for very unbalanced \
83
      \clusters)"
84
    , Option ['v']     ["verbose"]
85
      (NoArg (\ opts -> opts { optVerbose = (optVerbose opts) + 1 }))
86
      "increase the verbosity level"
87
    , Option ['O']     ["offline"]
88
      (ReqArg (\ n opts -> opts { optOffline = n:optOffline opts }) "NODE")
89
       " set node as offline"
90
    , Option ['V']     ["version"]
91
      (NoArg (\ opts -> opts { optShowVer = True}))
92
      "show the version of the program"
93
    , Option ['h']     ["help"]
94
      (NoArg (\ opts -> opts { optShowHelp = True}))
95
      "show help"
96
    ]
97

    
98
{- | Start computing the solution at the given depth and recurse until
99
we find a valid solution or we exceed the maximum depth.
100

    
101
-}
102
iterateDepth :: Cluster.Table    -- ^ The starting table
103
             -> Int              -- ^ Remaining length
104
             -> Cluster.NameList -- ^ Node idx to name list
105
             -> Cluster.NameList -- ^ Inst idx to name list
106
             -> Int              -- ^ Max node name len
107
             -> Int              -- ^ Max instance name len
108
             -> [[String]]       -- ^ Current command list
109
             -> Bool             -- ^ Wheter to be silent
110
             -> IO (Cluster.Table, [[String]]) -- ^ The resulting table and
111
                                               -- commands
112
iterateDepth ini_tbl max_rounds ktn kti nmlen imlen cmd_strs oneline =
113
    let Cluster.Table ini_nl ini_il ini_cv ini_plc = ini_tbl
114
        all_inst = Container.elems ini_il
115
        node_idx = map Node.idx . filter (not . Node.offline) $
116
                   Container.elems ini_nl
117
        fin_tbl = Cluster.checkMove node_idx ini_tbl all_inst
118
        (Cluster.Table _ _ fin_cv fin_plc) = fin_tbl
119
        ini_plc_len = length ini_plc
120
        fin_plc_len = length fin_plc
121
        allowed_next = (max_rounds < 0 || length fin_plc < max_rounds)
122
    in
123
      do
124
        let
125
            (sol_line, cmds) = Cluster.printSolutionLine ini_il ktn kti
126
                               nmlen imlen (head fin_plc) fin_plc_len
127
            upd_cmd_strs = cmds:cmd_strs
128
        unless (oneline || fin_plc_len == ini_plc_len) $ do
129
          putStrLn sol_line
130
          hFlush stdout
131
        (if fin_cv < ini_cv then -- this round made success, try deeper
132
             if allowed_next
133
             then iterateDepth fin_tbl max_rounds ktn kti
134
                  nmlen imlen upd_cmd_strs oneline
135
             -- don't go deeper, but return the better solution
136
             else return (fin_tbl, upd_cmd_strs)
137
         else
138
             return (ini_tbl, cmd_strs))
139

    
140
-- | Main function.
141
main :: IO ()
142
main = do
143
  cmd_args <- System.getArgs
144
  (opts, args) <- CLI.parseOpts cmd_args "hbal" options
145
                  defaultOptions optShowHelp
146

    
147
  unless (null args) $ do
148
         hPutStrLn stderr "Error: this program doesn't take any arguments."
149
         exitWith $ ExitFailure 1
150

    
151
  when (optShowVer opts) $ do
152
         putStr $ CLI.showVersion "hbal"
153
         exitWith ExitSuccess
154

    
155
  let oneline = optOneline opts
156
      verbose = optVerbose opts
157
      (node_data, inst_data) =
158
          case optMaster opts of
159
            "" -> (readFile $ optNodef opts,
160
                   readFile $ optInstf opts)
161
            host -> (readData getNodes host,
162
                     readData getInstances host)
163

    
164
  (loaded_nl, il, csf, ktn, kti) <- liftM2 Cluster.loadData node_data inst_data
165
  let (fix_msgs, fixed_nl) = Cluster.checkData loaded_nl il ktn kti
166

    
167
  unless (null fix_msgs) $ do
168
         putStrLn "Warning: cluster has inconsistent data:"
169
         putStrLn . unlines . map (\s -> printf "  - %s" s) $ fix_msgs
170

    
171
  let offline_names = optOffline opts
172
      all_names = snd . unzip $ ktn
173
      offline_wrong = filter (\n -> not $ elem n all_names) offline_names
174
      offline_indices = fst . unzip .
175
                        filter (\(_, n) -> elem n offline_names) $ ktn
176

    
177
  when (length offline_wrong > 0) $ do
178
         printf "Wrong node name(s) set as offline: %s\n"
179
                (commaJoin offline_wrong)
180
         exitWith $ ExitFailure 1
181

    
182
  let nl = Container.map (\n -> if elem (Node.idx n) offline_indices
183
                                then Node.setOffline n True
184
                                else n) fixed_nl
185

    
186
  unless oneline $ printf "Loaded %d nodes, %d instances\n"
187
             (Container.size nl)
188
             (Container.size il)
189

    
190
  when (length csf > 0 && not oneline && verbose > 0) $ do
191
         printf "Note: Stripping common suffix of '%s' from names\n" csf
192

    
193
  let (bad_nodes, bad_instances) = Cluster.computeBadItems nl il
194
  unless (oneline || verbose == 0) $ printf
195
             "Initial check done: %d bad nodes, %d bad instances.\n"
196
             (length bad_nodes) (length bad_instances)
197

    
198
  when (length bad_nodes > 0) $ do
199
         putStrLn "Cluster is not N+1 happy, continuing but no guarantee \
200
                  \that the cluster will end N+1 happy."
201

    
202
  when (optShowNodes opts) $
203
       do
204
         putStrLn "Initial cluster status:"
205
         putStrLn $ Cluster.printNodes ktn nl
206

    
207
  let ini_cv = Cluster.compCV nl
208
      ini_tbl = Cluster.Table nl il ini_cv []
209
  unless oneline (if verbose > 1 then
210
                      printf "Initial coefficients: overall %.8f, %s\n"
211
                      ini_cv (Cluster.printStats nl)
212
                  else
213
                      printf "Initial score: %.8f\n" ini_cv)
214

    
215
  unless oneline $ putStrLn "Trying to minimize the CV..."
216
  let mlen_fn = maximum . (map length) . snd . unzip
217
      imlen = mlen_fn kti
218
      nmlen = mlen_fn ktn
219

    
220
  (fin_tbl, cmd_strs) <- iterateDepth ini_tbl (optMaxLength opts)
221
                         ktn kti nmlen imlen [] oneline
222
  let (Cluster.Table fin_nl _ fin_cv fin_plc) = fin_tbl
223
      ord_plc = reverse fin_plc
224
      sol_msg = if null fin_plc
225
                then printf "No solution found\n"
226
                else (if verbose > 1
227
                      then printf "Final coefficients:   overall %.8f, %s\n"
228
                           fin_cv (Cluster.printStats fin_nl)
229
                      else printf "Cluster score improved from %.8f to %.8f\n"
230
                           ini_cv fin_cv
231
                     )
232

    
233
  unless oneline $ putStr sol_msg
234

    
235
  unless (oneline || verbose == 0) $
236
         printf "Solution length=%d\n" (length ord_plc)
237

    
238
  let cmd_data = Cluster.formatCmds . reverse $ cmd_strs
239

    
240
  when (isJust $ optShowCmds opts) $
241
       do
242
         let out_path = fromJust $ optShowCmds opts
243
         putStrLn ""
244
         (if out_path == "-" then
245
              printf "Commands to run to reach the above solution:\n%s"
246
                     (unlines . map ("  " ++) .
247
                      filter (/= "check") .
248
                      lines $ cmd_data)
249
          else do
250
            writeFile out_path (CLI.shTemplate ++ cmd_data)
251
            printf "The commands have been written to file '%s'\n" out_path)
252

    
253
  when (optShowNodes opts) $
254
       do
255
         let (orig_mem, orig_disk) = Cluster.totalResources nl
256
             (final_mem, final_disk) = Cluster.totalResources fin_nl
257
         putStrLn ""
258
         putStrLn "Final cluster status:"
259
         putStrLn $ Cluster.printNodes ktn fin_nl
260
         when (verbose > 2) $
261
              do
262
                printf "Original: mem=%d disk=%d\n" orig_mem orig_disk
263
                printf "Final:    mem=%d disk=%d\n" final_mem final_disk
264
  when oneline $ do
265
         printf "%.8f %d %.8f %8.3f\n"
266
                ini_cv (length ord_plc) fin_cv (ini_cv / fin_cv)