root / hail.hs @ f826c5e0
History | View | Annotate | Download (8.3 kB)
1 |
{-| Solver for N+1 cluster errors |
---|---|
2 |
|
3 |
-} |
4 |
|
5 |
module Main (main) where |
6 |
|
7 |
import Data.List |
8 |
import Data.Function |
9 |
import Data.Maybe (isJust, fromJust, fromMaybe) |
10 |
import Monad |
11 |
import System |
12 |
import System.IO |
13 |
import System.Console.GetOpt |
14 |
import qualified System |
15 |
|
16 |
import Text.Printf (printf) |
17 |
|
18 |
import qualified Ganeti.HTools.Container as Container |
19 |
import qualified Ganeti.HTools.Cluster as Cluster |
20 |
import qualified Ganeti.HTools.Node as Node |
21 |
import qualified Ganeti.HTools.Instance as Instance |
22 |
import qualified Ganeti.HTools.CLI as CLI |
23 |
import Ganeti.HTools.IAlloc |
24 |
import Ganeti.HTools.Utils |
25 |
import Ganeti.HTools.Types |
26 |
|
27 |
-- | Command line options structure. |
28 |
data Options = Options |
29 |
{ optShowNodes :: Bool -- ^ Whether to show node status |
30 |
, optShowCmds :: Maybe FilePath -- ^ Whether to show the command list |
31 |
, optOneline :: Bool -- ^ Switch output to a single line |
32 |
, optNodef :: FilePath -- ^ Path to the nodes file |
33 |
, optNodeSet :: Bool -- ^ The nodes have been set by options |
34 |
, optInstf :: FilePath -- ^ Path to the instances file |
35 |
, optInstSet :: Bool -- ^ The insts have been set by options |
36 |
, optMaxLength :: Int -- ^ Stop after this many steps |
37 |
, optMaster :: String -- ^ Collect data from RAPI |
38 |
, optVerbose :: Int -- ^ Verbosity level |
39 |
, optOffline :: [String] -- ^ Names of offline nodes |
40 |
, optMinScore :: Cluster.Score -- ^ The minimum score we aim for |
41 |
, optShowVer :: Bool -- ^ Just show the program version |
42 |
, optShowHelp :: Bool -- ^ Just show the help |
43 |
} deriving Show |
44 |
|
45 |
instance CLI.CLIOptions Options where |
46 |
showVersion = optShowVer |
47 |
showHelp = optShowHelp |
48 |
|
49 |
-- | Default values for the command line options. |
50 |
defaultOptions :: Options |
51 |
defaultOptions = Options |
52 |
{ optShowNodes = False |
53 |
, optShowCmds = Nothing |
54 |
, optOneline = False |
55 |
, optNodef = "nodes" |
56 |
, optNodeSet = False |
57 |
, optInstf = "instances" |
58 |
, optInstSet = False |
59 |
, optMaxLength = -1 |
60 |
, optMaster = "" |
61 |
, optVerbose = 1 |
62 |
, optOffline = [] |
63 |
, optMinScore = 1e-9 |
64 |
, optShowVer = False |
65 |
, optShowHelp = False |
66 |
} |
67 |
|
68 |
-- | Options list and functions |
69 |
options :: [OptDescr (Options -> Options)] |
70 |
options = |
71 |
[ Option ['p'] ["print-nodes"] |
72 |
(NoArg (\ opts -> opts { optShowNodes = True })) |
73 |
"print the final node list" |
74 |
, Option ['C'] ["print-commands"] |
75 |
(OptArg ((\ f opts -> opts { optShowCmds = Just f }) . fromMaybe "-") |
76 |
"FILE") |
77 |
"print the ganeti command list for reaching the solution,\ |
78 |
\if an argument is passed then write the commands to a file named\ |
79 |
\ as such" |
80 |
, Option ['o'] ["oneline"] |
81 |
(NoArg (\ opts -> opts { optOneline = True })) |
82 |
"print the ganeti command list for reaching the solution" |
83 |
, Option ['n'] ["nodes"] |
84 |
(ReqArg (\ f opts -> opts { optNodef = f, optNodeSet = True }) "FILE") |
85 |
"the node list FILE" |
86 |
, Option ['i'] ["instances"] |
87 |
(ReqArg (\ f opts -> opts { optInstf = f, optInstSet = True }) "FILE") |
88 |
"the instance list FILE" |
89 |
, Option ['m'] ["master"] |
90 |
(ReqArg (\ m opts -> opts { optMaster = m }) "ADDRESS") |
91 |
"collect data via RAPI at the given ADDRESS" |
92 |
, Option ['l'] ["max-length"] |
93 |
(ReqArg (\ i opts -> opts { optMaxLength = (read i)::Int }) "N") |
94 |
"cap the solution at this many moves (useful for very unbalanced \ |
95 |
\clusters)" |
96 |
, Option ['v'] ["verbose"] |
97 |
(NoArg (\ opts -> opts { optVerbose = (optVerbose opts) + 1 })) |
98 |
"increase the verbosity level" |
99 |
, Option ['q'] ["quiet"] |
100 |
(NoArg (\ opts -> opts { optVerbose = (optVerbose opts) - 1 })) |
101 |
"decrease the verbosity level" |
102 |
, Option ['O'] ["offline"] |
103 |
(ReqArg (\ n opts -> opts { optOffline = n:optOffline opts }) "NODE") |
104 |
" set node as offline" |
105 |
, Option ['e'] ["min-score"] |
106 |
(ReqArg (\ e opts -> opts { optMinScore = read e }) "EPSILON") |
107 |
" mininum score to aim for" |
108 |
, Option ['V'] ["version"] |
109 |
(NoArg (\ opts -> opts { optShowVer = True})) |
110 |
"show the version of the program" |
111 |
, Option ['h'] ["help"] |
112 |
(NoArg (\ opts -> opts { optShowHelp = True})) |
113 |
"show help" |
114 |
] |
115 |
|
116 |
-- | Try to allocate an instance on the cluster |
117 |
tryAlloc :: (Monad m) => |
118 |
NodeList |
119 |
-> InstanceList |
120 |
-> Instance.Instance |
121 |
-> Int |
122 |
-> m [(Maybe NodeList, [Node.Node])] |
123 |
tryAlloc nl il inst 2 = |
124 |
let all_nodes = Container.elems nl |
125 |
all_pairs = liftM2 (,) all_nodes all_nodes |
126 |
ok_pairs = filter (\(x, y) -> Node.idx x /= Node.idx y) all_pairs |
127 |
sols1 = map (\(p, s) -> let pdx = Node.idx p |
128 |
sdx = Node.idx s |
129 |
(mnl, _) = Cluster.allocateOn nl |
130 |
inst pdx sdx |
131 |
in (mnl, [p, s]) |
132 |
) ok_pairs |
133 |
in return sols1 |
134 |
|
135 |
tryAlloc _ _ _ reqn = fail $ "Unsupported number of alllocation \ |
136 |
\destinations required (" ++ (show reqn) ++ |
137 |
"), only two supported" |
138 |
|
139 |
-- | Try to allocate an instance on the cluster |
140 |
tryReloc :: (Monad m) => |
141 |
NodeList |
142 |
-> InstanceList |
143 |
-> Int |
144 |
-> Int |
145 |
-> [Int] |
146 |
-> m [(Maybe NodeList, [Node.Node])] |
147 |
tryReloc nl il xid 1 ex_idx = |
148 |
let all_nodes = Container.elems nl |
149 |
inst = Container.find xid il |
150 |
valid_nodes = filter (not . flip elem ex_idx . idx) all_nodes |
151 |
valid_idxes = map Node.idx valid_nodes |
152 |
nl' = Container.map (\n -> if elem (Node.idx n) ex_idx then |
153 |
Node.setOffline n True |
154 |
else n) nl |
155 |
sols1 = map (\x -> let (mnl, _, _, _) = |
156 |
Cluster.applyMove nl' inst |
157 |
(Cluster.ReplaceSecondary x) |
158 |
in (mnl, [Container.find x nl']) |
159 |
) valid_idxes |
160 |
in return sols1 |
161 |
|
162 |
tryReloc _ _ _ reqn _ = fail $ "Unsupported number of relocation \ |
163 |
\destinations required (" ++ (show reqn) ++ |
164 |
"), only one supported" |
165 |
|
166 |
filterFails :: (Monad m) => [(Maybe NodeList, [Node.Node])] |
167 |
-> m [(NodeList, [Node.Node])] |
168 |
filterFails sols = |
169 |
if null sols then fail "No nodes onto which to allocate at all" |
170 |
else let sols' = filter (isJust . fst) sols |
171 |
in if null sols' then |
172 |
fail "No valid allocation solutions" |
173 |
else |
174 |
return $ map (\(x, y) -> (fromJust x, y)) sols' |
175 |
|
176 |
processResults :: (Monad m) => [(NodeList, [Node.Node])] |
177 |
-> m (String, [Node.Node]) |
178 |
processResults sols = |
179 |
let sols' = map (\(nl', ns) -> (Cluster.compCV nl', ns)) sols |
180 |
sols'' = sortBy (compare `on` fst) sols' |
181 |
(best, w) = head sols'' |
182 |
(worst, l) = last sols'' |
183 |
info = printf "Valid results: %d, best score: %.8f (nodes %s), \ |
184 |
\worst score: %.8f (nodes %s)" (length sols'') |
185 |
best (intercalate "/" . map Node.name $ w) |
186 |
worst (intercalate "/" . map Node.name $ l) |
187 |
in return (info, w) |
188 |
|
189 |
-- | Main function. |
190 |
main :: IO () |
191 |
main = do |
192 |
cmd_args <- System.getArgs |
193 |
(opts, args) <- CLI.parseOpts cmd_args "hail" options |
194 |
defaultOptions |
195 |
|
196 |
when (null args) $ do |
197 |
hPutStrLn stderr "Error: this program needs an input file." |
198 |
exitWith $ ExitFailure 1 |
199 |
|
200 |
let input_file = head args |
201 |
input_data <- readFile input_file |
202 |
|
203 |
request <- case (parseData input_data) of |
204 |
Bad err -> do |
205 |
putStrLn $ "Error: " ++ err |
206 |
exitWith $ ExitFailure 1 |
207 |
Ok rq -> return rq |
208 |
|
209 |
let Request rqtype nl il csf = request |
210 |
new_nodes = case rqtype of |
211 |
Allocate xi reqn -> tryAlloc nl il xi reqn |
212 |
Relocate idx reqn exnodes -> |
213 |
tryReloc nl il idx reqn exnodes |
214 |
let sols = new_nodes >>= filterFails >>= processResults |
215 |
let (ok, info, rn) = case sols of |
216 |
Ok (info, sn) -> (True, "Request successful: " ++ info, |
217 |
map ((++ csf) . name) sn) |
218 |
Bad s -> (False, "Request failed: " ++ s, []) |
219 |
resp = formatResponse ok info rn |
220 |
putStrLn resp |