root / hspace.hs @ 8c4c6a8a
History | View | Annotate | Download (13.1 kB)
1 |
{-| Cluster space sizing |
---|---|
2 |
|
3 |
-} |
4 |
|
5 |
{- |
6 |
|
7 |
Copyright (C) 2009 Google Inc. |
8 |
|
9 |
This program is free software; you can redistribute it and/or modify |
10 |
it under the terms of the GNU General Public License as published by |
11 |
the Free Software Foundation; either version 2 of the License, or |
12 |
(at your option) any later version. |
13 |
|
14 |
This program is distributed in the hope that it will be useful, but |
15 |
WITHOUT ANY WARRANTY; without even the implied warranty of |
16 |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
17 |
General Public License for more details. |
18 |
|
19 |
You should have received a copy of the GNU General Public License |
20 |
along with this program; if not, write to the Free Software |
21 |
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA |
22 |
02110-1301, USA. |
23 |
|
24 |
-} |
25 |
|
26 |
module Main (main) where |
27 |
|
28 |
import Data.List |
29 |
import Data.Function |
30 |
import Monad |
31 |
import System |
32 |
import System.IO |
33 |
import System.Console.GetOpt |
34 |
import qualified System |
35 |
|
36 |
import Text.Printf (printf, hPrintf) |
37 |
|
38 |
import qualified Ganeti.HTools.Container as Container |
39 |
import qualified Ganeti.HTools.Cluster as Cluster |
40 |
import qualified Ganeti.HTools.Node as Node |
41 |
import qualified Ganeti.HTools.Instance as Instance |
42 |
import qualified Ganeti.HTools.CLI as CLI |
43 |
|
44 |
import Ganeti.HTools.Utils |
45 |
import Ganeti.HTools.Types |
46 |
|
47 |
-- | Command line options structure. |
48 |
data Options = Options |
49 |
{ optShowNodes :: Bool -- ^ Whether to show node status |
50 |
, optNodef :: FilePath -- ^ Path to the nodes file |
51 |
, optNodeSet :: Bool -- ^ The nodes have been set by options |
52 |
, optInstf :: FilePath -- ^ Path to the instances file |
53 |
, optInstSet :: Bool -- ^ The insts have been set by options |
54 |
, optMaster :: String -- ^ Collect data from RAPI |
55 |
, optVerbose :: Int -- ^ Verbosity level |
56 |
, optOffline :: [String] -- ^ Names of offline nodes |
57 |
, optIMem :: Int -- ^ Instance memory |
58 |
, optIDsk :: Int -- ^ Instance disk |
59 |
, optIVCPUs :: Int -- ^ Instance VCPUs |
60 |
, optINodes :: Int -- ^ Nodes required for an instance |
61 |
, optMcpu :: Double -- ^ Max cpu ratio for nodes |
62 |
, optMdsk :: Double -- ^ Max disk usage ratio for nodes |
63 |
, optShowVer :: Bool -- ^ Just show the program version |
64 |
, optShowHelp :: Bool -- ^ Just show the help |
65 |
} deriving Show |
66 |
|
67 |
instance CLI.CLIOptions Options where |
68 |
showVersion = optShowVer |
69 |
showHelp = optShowHelp |
70 |
|
71 |
instance CLI.EToolOptions Options where |
72 |
nodeFile = optNodef |
73 |
nodeSet = optNodeSet |
74 |
instFile = optInstf |
75 |
instSet = optInstSet |
76 |
masterName = optMaster |
77 |
silent a = optVerbose a == 0 |
78 |
|
79 |
-- | Default values for the command line options. |
80 |
defaultOptions :: Options |
81 |
defaultOptions = Options |
82 |
{ optShowNodes = False |
83 |
, optNodef = "nodes" |
84 |
, optNodeSet = False |
85 |
, optInstf = "instances" |
86 |
, optInstSet = False |
87 |
, optMaster = "" |
88 |
, optVerbose = 1 |
89 |
, optOffline = [] |
90 |
, optIMem = 4096 |
91 |
, optIDsk = 102400 |
92 |
, optIVCPUs = 1 |
93 |
, optINodes = 2 |
94 |
, optMcpu = -1 |
95 |
, optMdsk = -1 |
96 |
, optShowVer = False |
97 |
, optShowHelp = False |
98 |
} |
99 |
|
100 |
-- | Options list and functions |
101 |
options :: [OptDescr (Options -> Options)] |
102 |
options = |
103 |
[ Option ['p'] ["print-nodes"] |
104 |
(NoArg (\ opts -> opts { optShowNodes = True })) |
105 |
"print the final node list" |
106 |
, Option ['n'] ["nodes"] |
107 |
(ReqArg (\ f opts -> opts { optNodef = f, optNodeSet = True }) "FILE") |
108 |
"the node list FILE" |
109 |
, Option ['i'] ["instances"] |
110 |
(ReqArg (\ f opts -> opts { optInstf = f, optInstSet = True }) "FILE") |
111 |
"the instance list FILE" |
112 |
, Option ['m'] ["master"] |
113 |
(ReqArg (\ m opts -> opts { optMaster = m }) "ADDRESS") |
114 |
"collect data via RAPI at the given ADDRESS" |
115 |
, Option ['v'] ["verbose"] |
116 |
(NoArg (\ opts -> opts { optVerbose = optVerbose opts + 1 })) |
117 |
"increase the verbosity level" |
118 |
, Option ['q'] ["quiet"] |
119 |
(NoArg (\ opts -> opts { optVerbose = optVerbose opts - 1 })) |
120 |
"decrease the verbosity level" |
121 |
, Option ['O'] ["offline"] |
122 |
(ReqArg (\ n opts -> opts { optOffline = n:optOffline opts }) "NODE") |
123 |
"set node as offline" |
124 |
, Option [] ["memory"] |
125 |
(ReqArg (\ m opts -> opts { optIMem = read m }) "MEMORY") |
126 |
"memory size for instances" |
127 |
, Option [] ["disk"] |
128 |
(ReqArg (\ d opts -> opts { optIDsk = read d }) "DISK") |
129 |
"disk size for instances" |
130 |
, Option [] ["vcpus"] |
131 |
(ReqArg (\ p opts -> opts { optIVCPUs = read p }) "NUM") |
132 |
"number of virtual cpus for instances" |
133 |
, Option [] ["req-nodes"] |
134 |
(ReqArg (\ n opts -> opts { optINodes = read n }) "NODES") |
135 |
"number of nodes for the new instances (1=plain, 2=mirrored)" |
136 |
, Option [] ["max-cpu"] |
137 |
(ReqArg (\ n opts -> opts { optMcpu = read n }) "RATIO") |
138 |
"maximum virtual-to-physical cpu ratio for nodes" |
139 |
, Option [] ["min-disk"] |
140 |
(ReqArg (\ n opts -> opts { optMdsk = read n }) "RATIO") |
141 |
"minimum free disk space for nodes (between 0 and 1)" |
142 |
, Option ['V'] ["version"] |
143 |
(NoArg (\ opts -> opts { optShowVer = True})) |
144 |
"show the version of the program" |
145 |
, Option ['h'] ["help"] |
146 |
(NoArg (\ opts -> opts { optShowHelp = True})) |
147 |
"show help" |
148 |
] |
149 |
|
150 |
-- | Build failure stats out of a list of failure reasons |
151 |
concatFailure :: [(FailMode, Int)] -> FailMode -> [(FailMode, Int)] |
152 |
concatFailure flst reason = |
153 |
let cval = lookup reason flst |
154 |
in case cval of |
155 |
Nothing -> (reason, 1):flst |
156 |
Just val -> let plain = filter (\(x, _) -> x /= reason) flst |
157 |
in (reason, val+1):plain |
158 |
|
159 |
-- | Build list of failures and placements out of an list of possible |
160 |
-- | allocations |
161 |
filterFails :: Cluster.AllocSolution |
162 |
-> ([(FailMode, Int)], |
163 |
[(Node.List, Instance.Instance, [Node.Node])]) |
164 |
filterFails sols = |
165 |
let (alst, blst) = unzip . map (\ (onl, i, nn) -> |
166 |
case onl of |
167 |
OpFail reason -> ([reason], []) |
168 |
OpGood gnl -> ([], [(gnl, i, nn)]) |
169 |
) $ sols |
170 |
aval = concat alst |
171 |
bval = concat blst |
172 |
in (foldl' concatFailure [] aval, bval) |
173 |
|
174 |
-- | Get the placement with best score out of a list of possible placements |
175 |
processResults :: [(Node.List, Instance.Instance, [Node.Node])] |
176 |
-> (Node.List, Instance.Instance, [Node.Node]) |
177 |
processResults sols = |
178 |
let sols' = map (\e@(nl', _, _) -> (Cluster.compCV nl', e)) sols |
179 |
sols'' = sortBy (compare `on` fst) sols' |
180 |
in snd $ head sols'' |
181 |
|
182 |
-- | Recursively place instances on the cluster until we're out of space |
183 |
iterateDepth :: Node.List |
184 |
-> Instance.List |
185 |
-> Instance.Instance |
186 |
-> Int |
187 |
-> [Instance.Instance] |
188 |
-> ([(FailMode, Int)], Node.List, [Instance.Instance]) |
189 |
iterateDepth nl il newinst nreq ixes = |
190 |
let depth = length ixes |
191 |
newname = printf "new-%d" depth::String |
192 |
newidx = length (Container.elems il) + depth |
193 |
newi2 = Instance.setIdx (Instance.setName newinst newname) newidx |
194 |
sols = Cluster.tryAlloc nl il newi2 nreq:: |
195 |
OpResult Cluster.AllocSolution |
196 |
in case sols of |
197 |
OpFail _ -> ([], nl, ixes) |
198 |
OpGood sols' -> |
199 |
let (errs, sols3) = filterFails sols' |
200 |
in if null sols3 |
201 |
then (errs, nl, ixes) |
202 |
else let (xnl, xi, _) = processResults sols3 |
203 |
in iterateDepth xnl il newinst nreq (xi:ixes) |
204 |
|
205 |
-- | Function to print stats for a given phase |
206 |
printStats :: String -> Cluster.CStats -> IO () |
207 |
printStats kind cs = do |
208 |
printf "%s free RAM: %d\n" kind (Cluster.cs_fmem cs) |
209 |
printf "%s allocatable RAM: %d\n" kind (Cluster.cs_amem cs) |
210 |
printf "%s reserved RAM: %d\n" kind (Cluster.cs_fmem cs - |
211 |
Cluster.cs_amem cs) |
212 |
printf "%s instance RAM: %d\n" kind (Cluster.cs_imem cs) |
213 |
printf "%s overhead RAM: %d\n" kind (Cluster.cs_xmem cs + Cluster.cs_nmem cs) |
214 |
printf "%s RAM usage efficiency: %.8f\n" |
215 |
kind (fromIntegral (Cluster.cs_imem cs) / Cluster.cs_tmem cs) |
216 |
printf "%s free disk: %d\n" kind (Cluster.cs_fdsk cs) |
217 |
printf "%s allocatable disk: %d\n" kind (Cluster.cs_adsk cs) |
218 |
printf "%s reserved disk: %d\n" kind (Cluster.cs_fdsk cs - |
219 |
Cluster.cs_adsk cs) |
220 |
printf "%s instance disk: %d\n" kind (Cluster.cs_idsk cs) |
221 |
printf "%s disk usage efficiency: %.8f\n" |
222 |
kind (fromIntegral (Cluster.cs_idsk cs) / Cluster.cs_tdsk cs) |
223 |
printf "%s instance cpus: %d\n" kind (Cluster.cs_icpu cs) |
224 |
printf "%s cpu usage efficiency: %.8f\n" |
225 |
kind (fromIntegral (Cluster.cs_icpu cs) / Cluster.cs_tcpu cs) |
226 |
printf "%s max node allocatable RAM: %d\n" kind (Cluster.cs_mmem cs) |
227 |
printf "%s max node allocatable disk: %d\n" kind (Cluster.cs_mdsk cs) |
228 |
|
229 |
-- | Print final stats and related metrics |
230 |
printResults :: Node.List -> Int -> Int -> [(FailMode, Int)] -> IO () |
231 |
printResults fin_nl num_instances allocs sreason = do |
232 |
let fin_stats = Cluster.totalResources fin_nl |
233 |
fin_instances = num_instances + allocs |
234 |
|
235 |
printf "Final score: %.8f\n" (Cluster.compCV fin_nl) |
236 |
printf "Final instances: %d\n" (num_instances + allocs) |
237 |
printStats "Final" fin_stats |
238 |
printf "Usage: %.5f\n" ((fromIntegral num_instances::Double) / |
239 |
fromIntegral fin_instances) |
240 |
printf "Allocations: %d\n" allocs |
241 |
putStr (unlines . map (\(x, y) -> printf "%s: %d" (show x) y) $ sreason) |
242 |
printf "Most likely fail reason: %s\n" (show . fst . head $ sreason) |
243 |
|
244 |
-- | Main function. |
245 |
main :: IO () |
246 |
main = do |
247 |
cmd_args <- System.getArgs |
248 |
(opts, args) <- CLI.parseOpts cmd_args "hspace" options defaultOptions |
249 |
|
250 |
unless (null args) $ do |
251 |
hPutStrLn stderr "Error: this program doesn't take any arguments." |
252 |
exitWith $ ExitFailure 1 |
253 |
|
254 |
let verbose = optVerbose opts |
255 |
|
256 |
(fixed_nl, il, csf) <- CLI.loadExternalData opts |
257 |
|
258 |
printf "Spec RAM: %d\n" (optIMem opts) |
259 |
printf "Spec disk: %d\n" (optIDsk opts) |
260 |
printf "Spec CPUs: %d\n" (optIVCPUs opts) |
261 |
printf "Spec nodes: %d\n" (optINodes opts) |
262 |
|
263 |
let num_instances = length $ Container.elems il |
264 |
|
265 |
let offline_names = optOffline opts |
266 |
all_nodes = Container.elems fixed_nl |
267 |
all_names = map Node.name all_nodes |
268 |
offline_wrong = filter (flip notElem all_names) offline_names |
269 |
offline_indices = map Node.idx $ |
270 |
filter (\n -> elem (Node.name n) offline_names) |
271 |
all_nodes |
272 |
req_nodes = optINodes opts |
273 |
m_cpu = optMcpu opts |
274 |
m_dsk = optMdsk opts |
275 |
|
276 |
when (length offline_wrong > 0) $ do |
277 |
hPrintf stderr "Error: Wrong node name(s) set as offline: %s\n" |
278 |
(commaJoin offline_wrong) |
279 |
exitWith $ ExitFailure 1 |
280 |
|
281 |
when (req_nodes /= 1 && req_nodes /= 2) $ do |
282 |
hPrintf stderr "Error: Invalid required nodes (%d)\n" req_nodes |
283 |
exitWith $ ExitFailure 1 |
284 |
|
285 |
let nm = Container.map (\n -> if elem (Node.idx n) offline_indices |
286 |
then Node.setOffline n True |
287 |
else n) fixed_nl |
288 |
nl = Container.map (flip Node.setMdsk m_dsk . flip Node.setMcpu m_cpu) |
289 |
nm |
290 |
|
291 |
when (length csf > 0 && verbose > 1) $ |
292 |
printf "Note: Stripping common suffix of '%s' from names\n" csf |
293 |
|
294 |
when (optShowNodes opts) $ |
295 |
do |
296 |
putStrLn "Initial cluster status:" |
297 |
putStrLn $ Cluster.printNodes nl |
298 |
|
299 |
let ini_cv = Cluster.compCV nl |
300 |
ini_stats = Cluster.totalResources nl |
301 |
|
302 |
(if verbose > 2 then |
303 |
printf "Initial coefficients: overall %.8f, %s\n" |
304 |
ini_cv (Cluster.printStats nl) |
305 |
else |
306 |
printf "Initial score: %.8f\n" ini_cv) |
307 |
printf "Cluster RAM: %.0f\n" (Cluster.cs_tmem ini_stats) |
308 |
printf "Cluster disk: %.0f\n" (Cluster.cs_tdsk ini_stats) |
309 |
printf "Cluster cpus: %.0f\n" (Cluster.cs_tcpu ini_stats) |
310 |
printf "Initial instances: %d\n" num_instances |
311 |
printStats "Initial" ini_stats |
312 |
|
313 |
let bad_nodes = fst $ Cluster.computeBadItems nl il |
314 |
when (length bad_nodes > 0) $ do |
315 |
-- This is failn1 case, so we print the same final stats and |
316 |
-- exit early |
317 |
printResults nl num_instances 0 [(FailN1, 1)] |
318 |
exitWith ExitSuccess |
319 |
|
320 |
let nmlen = Container.maxNameLen nl |
321 |
newinst = Instance.create "new" (optIMem opts) (optIDsk opts) |
322 |
(optIVCPUs opts) "ADMIN_down" (-1) (-1) |
323 |
|
324 |
let (ereason, fin_nl, ixes) = iterateDepth nl il newinst req_nodes [] |
325 |
allocs = length ixes |
326 |
fin_ixes = reverse ixes |
327 |
ix_namelen = maximum . map (length . Instance.name) $ fin_ixes |
328 |
sreason = reverse $ sortBy (compare `on` snd) ereason |
329 |
|
330 |
printResults fin_nl num_instances allocs sreason |
331 |
|
332 |
when (verbose > 1) $ |
333 |
putStr . unlines . map (\i -> printf "Inst: %*s %-*s %-*s" |
334 |
ix_namelen (Instance.name i) |
335 |
nmlen (Container.nameOf fin_nl $ Instance.pnode i) |
336 |
nmlen (let sdx = Instance.snode i |
337 |
in if sdx == Node.noSecondary then "" |
338 |
else Container.nameOf fin_nl sdx)) |
339 |
$ fin_ixes |
340 |
|
341 |
when (optShowNodes opts) $ |
342 |
do |
343 |
putStrLn "" |
344 |
putStrLn "Final cluster status:" |
345 |
putStrLn $ Cluster.printNodes fin_nl |