root / hspace.hs @ 44763b51
History | View | Annotate | Download (11.6 kB)
1 |
{-| Cluster space sizing |
---|---|
2 |
|
3 |
-} |
4 |
|
5 |
{- |
6 |
|
7 |
Copyright (C) 2009 Google Inc. |
8 |
|
9 |
This program is free software; you can redistribute it and/or modify |
10 |
it under the terms of the GNU General Public License as published by |
11 |
the Free Software Foundation; either version 2 of the License, or |
12 |
(at your option) any later version. |
13 |
|
14 |
This program is distributed in the hope that it will be useful, but |
15 |
WITHOUT ANY WARRANTY; without even the implied warranty of |
16 |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
17 |
General Public License for more details. |
18 |
|
19 |
You should have received a copy of the GNU General Public License |
20 |
along with this program; if not, write to the Free Software |
21 |
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA |
22 |
02110-1301, USA. |
23 |
|
24 |
-} |
25 |
|
26 |
module Main (main) where |
27 |
|
28 |
import Data.List |
29 |
import Data.Function |
30 |
import Monad |
31 |
import System |
32 |
import System.IO |
33 |
import System.Console.GetOpt |
34 |
import qualified System |
35 |
|
36 |
import Text.Printf (printf) |
37 |
|
38 |
import qualified Ganeti.HTools.Container as Container |
39 |
import qualified Ganeti.HTools.Cluster as Cluster |
40 |
import qualified Ganeti.HTools.Node as Node |
41 |
import qualified Ganeti.HTools.Instance as Instance |
42 |
import qualified Ganeti.HTools.CLI as CLI |
43 |
|
44 |
import Ganeti.HTools.Utils |
45 |
import Ganeti.HTools.Types |
46 |
|
47 |
-- | Command line options structure. |
48 |
data Options = Options |
49 |
{ optShowNodes :: Bool -- ^ Whether to show node status |
50 |
, optNodef :: FilePath -- ^ Path to the nodes file |
51 |
, optNodeSet :: Bool -- ^ The nodes have been set by options |
52 |
, optInstf :: FilePath -- ^ Path to the instances file |
53 |
, optInstSet :: Bool -- ^ The insts have been set by options |
54 |
, optMaster :: String -- ^ Collect data from RAPI |
55 |
, optVerbose :: Int -- ^ Verbosity level |
56 |
, optOffline :: [String] -- ^ Names of offline nodes |
57 |
, optIMem :: Int -- ^ Instance memory |
58 |
, optIDsk :: Int -- ^ Instance disk |
59 |
, optIVCPUs :: Int -- ^ Instance VCPUs |
60 |
, optINodes :: Int -- ^ Nodes required for an instance |
61 |
, optMcpu :: Double -- ^ Max cpu ratio for nodes |
62 |
, optMdsk :: Double -- ^ Max disk usage ratio for nodes |
63 |
, optShowVer :: Bool -- ^ Just show the program version |
64 |
, optShowHelp :: Bool -- ^ Just show the help |
65 |
} deriving Show |
66 |
|
67 |
instance CLI.CLIOptions Options where |
68 |
showVersion = optShowVer |
69 |
showHelp = optShowHelp |
70 |
|
71 |
instance CLI.EToolOptions Options where |
72 |
nodeFile = optNodef |
73 |
nodeSet = optNodeSet |
74 |
instFile = optInstf |
75 |
instSet = optInstSet |
76 |
masterName = optMaster |
77 |
silent a = (optVerbose a) == 0 |
78 |
|
79 |
-- | Default values for the command line options. |
80 |
defaultOptions :: Options |
81 |
defaultOptions = Options |
82 |
{ optShowNodes = False |
83 |
, optNodef = "nodes" |
84 |
, optNodeSet = False |
85 |
, optInstf = "instances" |
86 |
, optInstSet = False |
87 |
, optMaster = "" |
88 |
, optVerbose = 1 |
89 |
, optOffline = [] |
90 |
, optIMem = 4096 |
91 |
, optIDsk = 102400 |
92 |
, optIVCPUs = 1 |
93 |
, optINodes = 2 |
94 |
, optMcpu = -1 |
95 |
, optMdsk = -1 |
96 |
, optShowVer = False |
97 |
, optShowHelp = False |
98 |
} |
99 |
|
100 |
-- | Options list and functions |
101 |
options :: [OptDescr (Options -> Options)] |
102 |
options = |
103 |
[ Option ['p'] ["print-nodes"] |
104 |
(NoArg (\ opts -> opts { optShowNodes = True })) |
105 |
"print the final node list" |
106 |
, Option ['n'] ["nodes"] |
107 |
(ReqArg (\ f opts -> opts { optNodef = f, optNodeSet = True }) "FILE") |
108 |
"the node list FILE" |
109 |
, Option ['i'] ["instances"] |
110 |
(ReqArg (\ f opts -> opts { optInstf = f, optInstSet = True }) "FILE") |
111 |
"the instance list FILE" |
112 |
, Option ['m'] ["master"] |
113 |
(ReqArg (\ m opts -> opts { optMaster = m }) "ADDRESS") |
114 |
"collect data via RAPI at the given ADDRESS" |
115 |
, Option ['v'] ["verbose"] |
116 |
(NoArg (\ opts -> opts { optVerbose = (optVerbose opts) + 1 })) |
117 |
"increase the verbosity level" |
118 |
, Option ['q'] ["quiet"] |
119 |
(NoArg (\ opts -> opts { optVerbose = (optVerbose opts) - 1 })) |
120 |
"decrease the verbosity level" |
121 |
, Option ['O'] ["offline"] |
122 |
(ReqArg (\ n opts -> opts { optOffline = n:optOffline opts }) "NODE") |
123 |
"set node as offline" |
124 |
, Option [] ["memory"] |
125 |
(ReqArg (\ m opts -> opts { optIMem = read m }) "MEMORY") |
126 |
"memory size for instances" |
127 |
, Option [] ["disk"] |
128 |
(ReqArg (\ d opts -> opts { optIDsk = read d }) "DISK") |
129 |
"disk size for instances" |
130 |
, Option [] ["vcpus"] |
131 |
(ReqArg (\ p opts -> opts { optIVCPUs = read p }) "NUM") |
132 |
"number of virtual cpus for instances" |
133 |
, Option [] ["req-nodes"] |
134 |
(ReqArg (\ n opts -> opts { optINodes = read n }) "NODES") |
135 |
"number of nodes for the new instances (1=plain, 2=mirrored)" |
136 |
, Option [] ["max-cpu"] |
137 |
(ReqArg (\ n opts -> opts { optMcpu = read n }) "RATIO") |
138 |
"maximum virtual-to-physical cpu ratio for nodes" |
139 |
, Option [] ["min-disk"] |
140 |
(ReqArg (\ n opts -> opts { optMdsk = read n }) "RATIO") |
141 |
"minimum free disk space for nodes (between 0 and 1)" |
142 |
, Option ['V'] ["version"] |
143 |
(NoArg (\ opts -> opts { optShowVer = True})) |
144 |
"show the version of the program" |
145 |
, Option ['h'] ["help"] |
146 |
(NoArg (\ opts -> opts { optShowHelp = True})) |
147 |
"show help" |
148 |
] |
149 |
|
150 |
concatFailure :: [(FailMode, Int)] -> FailMode -> [(FailMode, Int)] |
151 |
concatFailure flst reason = |
152 |
let cval = lookup reason flst |
153 |
in case cval of |
154 |
Nothing -> (reason, 1):flst |
155 |
Just val -> let plain = filter (\(x, _) -> x /= reason) flst |
156 |
in (reason, val+1):plain |
157 |
|
158 |
filterFails :: Cluster.AllocSolution |
159 |
-> ([(FailMode, Int)], |
160 |
[(Node.List, Instance.Instance, [Node.Node])]) |
161 |
filterFails sols = |
162 |
let (alst, blst) = unzip . map (\ (onl, i, nn) -> |
163 |
case onl of |
164 |
OpFail reason -> ([reason], []) |
165 |
OpGood gnl -> ([], [(gnl, i, nn)]) |
166 |
) $ sols |
167 |
aval = concat alst |
168 |
bval = concat blst |
169 |
in (foldl' concatFailure [] aval, bval) |
170 |
|
171 |
processResults :: [(Node.List, Instance.Instance, [Node.Node])] |
172 |
-> (Node.List, Instance.Instance, [Node.Node]) |
173 |
processResults sols = |
174 |
let sols' = map (\e@(nl', _, _) -> (Cluster.compCV nl', e)) sols |
175 |
sols'' = sortBy (compare `on` fst) sols' |
176 |
in snd $ head sols'' |
177 |
|
178 |
iterateDepth :: Node.List |
179 |
-> Instance.List |
180 |
-> Instance.Instance |
181 |
-> Int |
182 |
-> [Instance.Instance] |
183 |
-> ([(FailMode, Int)], Node.List, [Instance.Instance]) |
184 |
iterateDepth nl il newinst nreq ixes = |
185 |
let depth = length ixes |
186 |
newname = (printf "new-%d" depth)::String |
187 |
newidx = (length $ Container.elems il) + depth |
188 |
newi2 = Instance.setIdx (Instance.setName newinst newname) newidx |
189 |
sols = (Cluster.tryAlloc nl il newi2 nreq):: |
190 |
OpResult Cluster.AllocSolution |
191 |
in case sols of |
192 |
OpFail _ -> ([], nl, ixes) |
193 |
OpGood sols' -> |
194 |
let (errs, sols3) = filterFails sols' |
195 |
in if null sols3 |
196 |
then (errs, nl, ixes) |
197 |
else let (xnl, xi, _) = processResults sols3 |
198 |
in iterateDepth xnl il newinst nreq (xi:ixes) |
199 |
|
200 |
printStats :: String -> Cluster.CStats -> IO () |
201 |
printStats kind cs = do |
202 |
printf "%s free RAM: %d\n" kind (Cluster.cs_fmem cs) |
203 |
printf "%s allocatable RAM: %d\n" kind (Cluster.cs_amem cs) |
204 |
printf "%s reserved RAM: %d\n" kind ((Cluster.cs_fmem cs) - |
205 |
(Cluster.cs_amem cs)) |
206 |
printf "%s free disk: %d\n" kind (Cluster.cs_fdsk cs) |
207 |
printf "%s allocatable disk: %d\n" kind (Cluster.cs_adsk cs) |
208 |
printf "%s reserved disk: %d\n" kind ((Cluster.cs_fdsk cs) - |
209 |
(Cluster.cs_adsk cs)) |
210 |
printf "%s max node allocatable RAM: %d\n" kind (Cluster.cs_mmem cs) |
211 |
printf "%s max node allocatable disk: %d\n" kind (Cluster.cs_mdsk cs) |
212 |
|
213 |
-- | Main function. |
214 |
main :: IO () |
215 |
main = do |
216 |
cmd_args <- System.getArgs |
217 |
(opts, args) <- CLI.parseOpts cmd_args "hspace" options defaultOptions |
218 |
|
219 |
unless (null args) $ do |
220 |
hPutStrLn stderr "Error: this program doesn't take any arguments." |
221 |
exitWith $ ExitFailure 1 |
222 |
|
223 |
let verbose = optVerbose opts |
224 |
|
225 |
(fixed_nl, il, csf) <- CLI.loadExternalData opts |
226 |
let num_instances = length $ Container.elems il |
227 |
|
228 |
let offline_names = optOffline opts |
229 |
all_nodes = Container.elems fixed_nl |
230 |
all_names = map Node.name all_nodes |
231 |
offline_wrong = filter (\n -> not $ elem n all_names) offline_names |
232 |
offline_indices = map Node.idx $ |
233 |
filter (\n -> elem (Node.name n) offline_names) |
234 |
all_nodes |
235 |
req_nodes = optINodes opts |
236 |
m_cpu = optMcpu opts |
237 |
m_dsk = optMdsk opts |
238 |
|
239 |
when (length offline_wrong > 0) $ do |
240 |
printf "Error: Wrong node name(s) set as offline: %s\n" |
241 |
(commaJoin offline_wrong) |
242 |
exitWith $ ExitFailure 1 |
243 |
|
244 |
when (req_nodes /= 1 && req_nodes /= 2) $ do |
245 |
printf "Error: Invalid required nodes (%d)\n" req_nodes |
246 |
exitWith $ ExitFailure 1 |
247 |
|
248 |
let nm = Container.map (\n -> if elem (Node.idx n) offline_indices |
249 |
then Node.setOffline n True |
250 |
else n) fixed_nl |
251 |
nl = Container.map (flip Node.setMdsk m_dsk . flip Node.setMcpu m_cpu) |
252 |
nm |
253 |
|
254 |
when (length csf > 0 && verbose > 1) $ do |
255 |
printf "Note: Stripping common suffix of '%s' from names\n" csf |
256 |
|
257 |
let bad_nodes = fst $ Cluster.computeBadItems nl il |
258 |
when (length bad_nodes > 0) $ do |
259 |
putStrLn "Error: Cluster not N+1, no space to allocate." |
260 |
exitWith $ ExitFailure 1 |
261 |
|
262 |
when (optShowNodes opts) $ |
263 |
do |
264 |
putStrLn "Initial cluster status:" |
265 |
putStrLn $ Cluster.printNodes nl |
266 |
|
267 |
let ini_cv = Cluster.compCV nl |
268 |
ini_stats = Cluster.totalResources nl |
269 |
|
270 |
(if verbose > 2 then |
271 |
printf "Initial coefficients: overall %.8f, %s\n" |
272 |
ini_cv (Cluster.printStats nl) |
273 |
else |
274 |
printf "Initial score: %.8f\n" ini_cv) |
275 |
printf "Initial instances: %d\n" num_instances |
276 |
printStats "Initial" ini_stats |
277 |
|
278 |
let nmlen = Container.maxNameLen nl |
279 |
newinst = Instance.create "new" (optIMem opts) (optIDsk opts) |
280 |
(optIVCPUs opts) "ADMIN_down" (-1) (-1) |
281 |
|
282 |
let (ereason, fin_nl, ixes) = iterateDepth nl il newinst req_nodes [] |
283 |
allocs = length ixes |
284 |
fin_instances = num_instances + allocs |
285 |
fin_ixes = reverse ixes |
286 |
ix_namelen = maximum . map (length . Instance.name) $ fin_ixes |
287 |
fin_stats = Cluster.totalResources fin_nl |
288 |
sreason = reverse $ sortBy (compare `on` snd) ereason |
289 |
|
290 |
printf "Final score: %.8f\n" (Cluster.compCV fin_nl) |
291 |
printf "Final instances: %d\n" (num_instances + allocs) |
292 |
printStats "Final" fin_stats |
293 |
printf "Usage: %.5f\n" (((fromIntegral num_instances)::Double) / |
294 |
(fromIntegral fin_instances)) |
295 |
printf "Allocations: %d\n" allocs |
296 |
putStr (unlines . map (\(x, y) -> printf "%s: %d" (show x) y) $ sreason) |
297 |
printf "Most likely fail reason: %s\n" (show . fst . head $ sreason) |
298 |
|
299 |
when (verbose > 1) $ do |
300 |
putStr . unlines . map (\i -> printf "Inst: %*s %-*s %-*s" |
301 |
ix_namelen (Instance.name i) |
302 |
nmlen (Container.nameOf fin_nl $ Instance.pnode i) |
303 |
nmlen (let sdx = Instance.snode i |
304 |
in if sdx == Node.noSecondary then "" |
305 |
else Container.nameOf fin_nl sdx)) |
306 |
$ fin_ixes |
307 |
|
308 |
when (optShowNodes opts) $ |
309 |
do |
310 |
putStrLn "" |
311 |
putStrLn "Final cluster status:" |
312 |
putStrLn $ Cluster.printNodes fin_nl |