Statistics
| Branch: | Tag: | Revision:

root / hbal.hs @ 3d7cd10b

History | View | Annotate | Download (9.3 kB)

1
{-| Solver for N+1 cluster errors
2

    
3
-}
4

    
5
module Main (main) where
6

    
7
import Data.List
8
import Data.Function
9
import Monad
10
import System
11
import System.IO
12
import System.Console.GetOpt
13
import qualified System
14

    
15
import Text.Printf (printf)
16

    
17
import qualified Ganeti.HTools.Container as Container
18
import qualified Ganeti.HTools.Cluster as Cluster
19
import qualified Ganeti.HTools.Node as Node
20
import qualified Ganeti.HTools.CLI as CLI
21
import Ganeti.HTools.Rapi
22
import Ganeti.HTools.Utils
23

    
24
-- | Command line options structure.
25
data Options = Options
26
    { optShowNodes :: Bool     -- ^ Whether to show node status
27
    , optShowCmds  :: Bool     -- ^ Whether to show the command list
28
    , optOneline   :: Bool     -- ^ Switch output to a single line
29
    , optNodef     :: FilePath -- ^ Path to the nodes file
30
    , optInstf     :: FilePath -- ^ Path to the instances file
31
    , optMaxLength :: Int      -- ^ Stop after this many steps
32
    , optMaster    :: String   -- ^ Collect data from RAPI
33
    , optVerbose   :: Int      -- ^ Verbosity level
34
    , optOffline   :: [String] -- ^ Names of offline nodes
35
    , optShowVer   :: Bool     -- ^ Just show the program version
36
    , optShowHelp  :: Bool     -- ^ Just show the help
37
    } deriving Show
38

    
39
-- | Default values for the command line options.
40
defaultOptions :: Options
41
defaultOptions  = Options
42
 { optShowNodes = False
43
 , optShowCmds  = False
44
 , optOneline   = False
45
 , optNodef     = "nodes"
46
 , optInstf     = "instances"
47
 , optMaxLength = -1
48
 , optMaster    = ""
49
 , optVerbose   = 0
50
 , optOffline   = []
51
 , optShowVer   = False
52
 , optShowHelp  = False
53
 }
54

    
55
-- | Options list and functions
56
options :: [OptDescr (Options -> Options)]
57
options =
58
    [ Option ['p']     ["print-nodes"]
59
      (NoArg (\ opts -> opts { optShowNodes = True }))
60
      "print the final node list"
61
    , Option ['C']     ["print-commands"]
62
      (NoArg (\ opts -> opts { optShowCmds = True }))
63
      "print the ganeti command list for reaching the solution"
64
    , Option ['o']     ["oneline"]
65
      (NoArg (\ opts -> opts { optOneline = True }))
66
      "print the ganeti command list for reaching the solution"
67
    , Option ['n']     ["nodes"]
68
      (ReqArg (\ f opts -> opts { optNodef = f }) "FILE")
69
      "the node list FILE"
70
    , Option ['i']     ["instances"]
71
      (ReqArg (\ f opts -> opts { optInstf =  f }) "FILE")
72
      "the instance list FILE"
73
    , Option ['m']     ["master"]
74
      (ReqArg (\ m opts -> opts { optMaster = m }) "ADDRESS")
75
      "collect data via RAPI at the given ADDRESS"
76
    , Option ['l']     ["max-length"]
77
      (ReqArg (\ i opts -> opts { optMaxLength =  (read i)::Int }) "N")
78
      "cap the solution at this many moves (useful for very unbalanced \
79
      \clusters)"
80
    , Option ['v']     ["verbose"]
81
      (NoArg (\ opts -> opts { optVerbose = (optVerbose opts) + 1 }))
82
      "increase the verbosity level"
83
    , Option ['O']     ["offline"]
84
      (ReqArg (\ n opts -> opts { optOffline = n:optOffline opts }) "NODE")
85
       " set node as offline"
86
    , Option ['V']     ["version"]
87
      (NoArg (\ opts -> opts { optShowVer = True}))
88
      "show the version of the program"
89
    , Option ['h']     ["help"]
90
      (NoArg (\ opts -> opts { optShowHelp = True}))
91
      "show help"
92
    ]
93

    
94
{- | Start computing the solution at the given depth and recurse until
95
we find a valid solution or we exceed the maximum depth.
96

    
97
-}
98
iterateDepth :: Cluster.Table    -- ^ The starting table
99
             -> Int              -- ^ Remaining length
100
             -> Cluster.NameList -- ^ Node idx to name list
101
             -> Cluster.NameList -- ^ Inst idx to name list
102
             -> Int              -- ^ Max node name len
103
             -> Int              -- ^ Max instance name len
104
             -> [[String]]       -- ^ Current command list
105
             -> Bool             -- ^ Wheter to be silent
106
             -> IO (Cluster.Table, [[String]]) -- ^ The resulting table and
107
                                               -- commands
108
iterateDepth ini_tbl max_rounds ktn kti nmlen imlen cmd_strs oneline =
109
    let Cluster.Table ini_nl ini_il ini_cv ini_plc = ini_tbl
110
        all_inst = Container.elems ini_il
111
        node_idx = map Node.idx . filter (not . Node.offline) $
112
                   Container.elems ini_nl
113
        fin_tbl = Cluster.checkMove node_idx ini_tbl all_inst
114
        (Cluster.Table _ _ fin_cv fin_plc) = fin_tbl
115
        ini_plc_len = length ini_plc
116
        fin_plc_len = length fin_plc
117
        allowed_next = (max_rounds < 0 || length fin_plc < max_rounds)
118
    in
119
      do
120
        let
121
            (sol_line, cmds) = Cluster.printSolutionLine ini_il ktn kti
122
                               nmlen imlen (head fin_plc) fin_plc_len
123
            upd_cmd_strs = cmds:cmd_strs
124
        unless (oneline || fin_plc_len == ini_plc_len) $ do
125
          putStrLn sol_line
126
          hFlush stdout
127
        (if fin_cv < ini_cv then -- this round made success, try deeper
128
             if allowed_next
129
             then iterateDepth fin_tbl max_rounds ktn kti
130
                  nmlen imlen upd_cmd_strs oneline
131
             -- don't go deeper, but return the better solution
132
             else return (fin_tbl, upd_cmd_strs)
133
         else
134
             return (ini_tbl, cmd_strs))
135

    
136
-- | Main function.
137
main :: IO ()
138
main = do
139
  cmd_args <- System.getArgs
140
  (opts, _) <- CLI.parseOpts cmd_args "hbal" options defaultOptions optShowHelp
141

    
142
  when (optShowVer opts) $ do
143
         putStr $ CLI.showVersion "hbal"
144
         exitWith ExitSuccess
145

    
146
  let oneline = optOneline opts
147
      verbose = optVerbose opts
148
      (node_data, inst_data) =
149
          case optMaster opts of
150
            "" -> (readFile $ optNodef opts,
151
                   readFile $ optInstf opts)
152
            host -> (readData getNodes host,
153
                     readData getInstances host)
154

    
155
  (loaded_nl, il, csf, ktn, kti) <- liftM2 Cluster.loadData node_data inst_data
156
  let (fix_msgs, fixed_nl) = Cluster.checkData loaded_nl il ktn kti
157

    
158
  unless (null fix_msgs) $ do
159
         putStrLn "Warning: cluster has inconsistent data:"
160
         putStrLn . unlines . map (\s -> printf "  - %s" s) $ fix_msgs
161

    
162
  let offline_names = optOffline opts
163
      all_names = snd . unzip $ ktn
164
      offline_wrong = filter (\n -> not $ elem n all_names) offline_names
165
      offline_indices = fst . unzip .
166
                        filter (\(_, n) -> elem n offline_names) $ ktn
167

    
168
  when (length offline_wrong > 0) $ do
169
         printf "Wrong node name(s) set as offline: %s\n"
170
                (commaJoin offline_wrong)
171
         exitWith $ ExitFailure 1
172

    
173
  let nl = Container.map (\n -> if elem (Node.idx n) offline_indices
174
                                then Node.setOffline n True
175
                                else n) fixed_nl
176

    
177
  unless oneline $ printf "Loaded %d nodes, %d instances\n"
178
             (Container.size nl)
179
             (Container.size il)
180

    
181
  when (length csf > 0 && not oneline && verbose > 0) $ do
182
         printf "Note: Stripping common suffix of '%s' from names\n" csf
183

    
184
  let (bad_nodes, bad_instances) = Cluster.computeBadItems nl il
185
  unless (oneline || verbose == 0) $ printf
186
             "Initial check done: %d bad nodes, %d bad instances.\n"
187
             (length bad_nodes) (length bad_instances)
188

    
189
  when (length bad_nodes > 0) $ do
190
         putStrLn "Cluster is not N+1 happy, continuing but no guarantee \
191
                  \that the cluster will end N+1 happy."
192

    
193
  when (optShowNodes opts) $
194
       do
195
         putStrLn "Initial cluster status:"
196
         putStrLn $ Cluster.printNodes ktn nl
197

    
198
  let ini_cv = Cluster.compCV nl
199
      ini_tbl = Cluster.Table nl il ini_cv []
200
  unless oneline (if verbose > 1 then
201
                      printf "Initial coefficients: overall %.8f, %s\n"
202
                      ini_cv (Cluster.printStats nl)
203
                  else
204
                      printf "Initial score: %.8f\n" ini_cv)
205

    
206
  unless oneline $ putStrLn "Trying to minimize the CV..."
207
  let mlen_fn = maximum . (map length) . snd . unzip
208
      imlen = mlen_fn kti
209
      nmlen = mlen_fn ktn
210

    
211
  (fin_tbl, cmd_strs) <- iterateDepth ini_tbl (optMaxLength opts)
212
                         ktn kti nmlen imlen [] oneline
213
  let (Cluster.Table fin_nl _ fin_cv fin_plc) = fin_tbl
214
      ord_plc = reverse fin_plc
215
      sol_msg = if null fin_plc
216
                then printf "No solution found\n"
217
                else (if verbose > 1
218
                      then printf "Final coefficients:   overall %.8f, %s\n"
219
                           fin_cv (Cluster.printStats fin_nl)
220
                      else printf "Cluster score improved from %.8f to %.8f\n"
221
                           ini_cv fin_cv
222
                     )
223

    
224
  unless oneline $ putStr sol_msg
225

    
226
  unless (oneline || verbose == 0) $
227
         printf "Solution length=%d\n" (length ord_plc)
228

    
229
  when (optShowCmds opts) $
230
       do
231
         putStrLn ""
232
         putStrLn "Commands to run to reach the above solution:"
233
         putStr . Cluster.formatCmds . reverse $ cmd_strs
234
  when (optShowNodes opts) $
235
       do
236
         let (orig_mem, orig_disk) = Cluster.totalResources nl
237
             (final_mem, final_disk) = Cluster.totalResources fin_nl
238
         putStrLn ""
239
         putStrLn "Final cluster status:"
240
         putStrLn $ Cluster.printNodes ktn fin_nl
241
         when (verbose > 2) $
242
              do
243
                printf "Original: mem=%d disk=%d\n" orig_mem orig_disk
244
                printf "Final:    mem=%d disk=%d\n" final_mem final_disk
245
  when oneline $ do
246
         printf "%.8f %d %.8f %8.3f\n"
247
                ini_cv (length ord_plc) fin_cv (ini_cv / fin_cv)