root / Ganeti / HTools / Cluster.hs @ e2af3156
History | View | Annotate | Download (30 kB)
1 |
{-| Implementation of cluster-wide logic. |
---|---|
2 |
|
3 |
This module holds all pure cluster-logic; I\/O related functionality |
4 |
goes into the "Main" module for the individual binaries. |
5 |
|
6 |
-} |
7 |
|
8 |
{- |
9 |
|
10 |
Copyright (C) 2009 Google Inc. |
11 |
|
12 |
This program is free software; you can redistribute it and/or modify |
13 |
it under the terms of the GNU General Public License as published by |
14 |
the Free Software Foundation; either version 2 of the License, or |
15 |
(at your option) any later version. |
16 |
|
17 |
This program is distributed in the hope that it will be useful, but |
18 |
WITHOUT ANY WARRANTY; without even the implied warranty of |
19 |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
20 |
General Public License for more details. |
21 |
|
22 |
You should have received a copy of the GNU General Public License |
23 |
along with this program; if not, write to the Free Software |
24 |
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA |
25 |
02110-1301, USA. |
26 |
|
27 |
-} |
28 |
|
29 |
module Ganeti.HTools.Cluster |
30 |
( |
31 |
-- * Types |
32 |
Placement |
33 |
, AllocSolution |
34 |
, Solution(..) |
35 |
, Table(..) |
36 |
, Removal |
37 |
, Score |
38 |
, IMove(..) |
39 |
-- * Generic functions |
40 |
, totalResources |
41 |
-- * First phase functions |
42 |
, computeBadItems |
43 |
-- * Second phase functions |
44 |
, computeSolution |
45 |
, applySolution |
46 |
, printSolution |
47 |
, printSolutionLine |
48 |
, formatCmds |
49 |
, printNodes |
50 |
-- * Balacing functions |
51 |
, applyMove |
52 |
, checkMove |
53 |
, compCV |
54 |
, printStats |
55 |
-- * IAllocator functions |
56 |
, allocateOnSingle |
57 |
, allocateOnPair |
58 |
, tryAlloc |
59 |
, tryReloc |
60 |
) where |
61 |
|
62 |
import Data.List |
63 |
import Data.Maybe (isNothing, fromJust) |
64 |
import Text.Printf (printf) |
65 |
import Data.Function |
66 |
import Control.Monad |
67 |
|
68 |
import qualified Ganeti.HTools.Container as Container |
69 |
import qualified Ganeti.HTools.Instance as Instance |
70 |
import qualified Ganeti.HTools.Node as Node |
71 |
import Ganeti.HTools.Types |
72 |
import Ganeti.HTools.Utils |
73 |
|
74 |
-- * Types |
75 |
|
76 |
-- | A separate name for the cluster score type. |
77 |
type Score = Double |
78 |
|
79 |
-- | The description of an instance placement. |
80 |
type Placement = (Idx, Ndx, Ndx, Score) |
81 |
|
82 |
-- | Allocation\/relocation solution. |
83 |
type AllocSolution = [(Maybe Node.List, Instance.Instance, [Node.Node])] |
84 |
|
85 |
-- | A cluster solution described as the solution delta and the list |
86 |
-- of placements. |
87 |
data Solution = Solution Int [Placement] |
88 |
deriving (Eq, Ord, Show) |
89 |
|
90 |
-- | A removal set. |
91 |
data Removal = Removal Node.List [Instance.Instance] |
92 |
|
93 |
-- | An instance move definition |
94 |
data IMove = Failover -- ^ Failover the instance (f) |
95 |
| ReplacePrimary Ndx -- ^ Replace primary (f, r:np, f) |
96 |
| ReplaceSecondary Ndx -- ^ Replace secondary (r:ns) |
97 |
| ReplaceAndFailover Ndx -- ^ Replace secondary, failover (r:np, f) |
98 |
| FailoverAndReplace Ndx -- ^ Failover, replace secondary (f, r:ns) |
99 |
deriving (Show) |
100 |
|
101 |
-- | The complete state for the balancing solution |
102 |
data Table = Table Node.List Instance.List Score [Placement] |
103 |
deriving (Show) |
104 |
|
105 |
-- * Utility functions |
106 |
|
107 |
-- | Returns the delta of a solution or -1 for Nothing. |
108 |
solutionDelta :: Maybe Solution -> Int |
109 |
solutionDelta sol = case sol of |
110 |
Just (Solution d _) -> d |
111 |
_ -> -1 |
112 |
|
113 |
-- | Cap the removal list if needed. |
114 |
capRemovals :: [a] -> Int -> [a] |
115 |
capRemovals removals max_removals = |
116 |
if max_removals > 0 then |
117 |
take max_removals removals |
118 |
else |
119 |
removals |
120 |
|
121 |
-- | Check if the given node list fails the N+1 check. |
122 |
verifyN1Check :: [Node.Node] -> Bool |
123 |
verifyN1Check nl = any Node.failN1 nl |
124 |
|
125 |
-- | Verifies the N+1 status and return the affected nodes. |
126 |
verifyN1 :: [Node.Node] -> [Node.Node] |
127 |
verifyN1 nl = filter Node.failN1 nl |
128 |
|
129 |
{-| Computes the pair of bad nodes and instances. |
130 |
|
131 |
The bad node list is computed via a simple 'verifyN1' check, and the |
132 |
bad instance list is the list of primary and secondary instances of |
133 |
those nodes. |
134 |
|
135 |
-} |
136 |
computeBadItems :: Node.List -> Instance.List -> |
137 |
([Node.Node], [Instance.Instance]) |
138 |
computeBadItems nl il = |
139 |
let bad_nodes = verifyN1 $ getOnline nl |
140 |
bad_instances = map (\idx -> Container.find idx il) $ |
141 |
sort $ nub $ concat $ |
142 |
map (\ n -> (Node.slist n) ++ (Node.plist n)) bad_nodes |
143 |
in |
144 |
(bad_nodes, bad_instances) |
145 |
|
146 |
-- | Compute the total free disk and memory in the cluster. |
147 |
totalResources :: Node.List -> (Int, Int, Int, Int, Int) |
148 |
totalResources nl = |
149 |
foldl' |
150 |
(\ (mem, dsk, amem, mmem, mdsk) node -> |
151 |
let inc_amem = (Node.f_mem node) - (Node.r_mem node) |
152 |
in (mem + (Node.f_mem node), |
153 |
dsk + (Node.f_dsk node), |
154 |
amem + (if inc_amem > 0 then inc_amem else 0), |
155 |
max mmem inc_amem, |
156 |
max mdsk (Node.f_dsk node) |
157 |
) |
158 |
) (0, 0, 0, 0, 0) (Container.elems nl) |
159 |
|
160 |
-- | Compute the mem and disk covariance. |
161 |
compDetailedCV :: Node.List -> (Double, Double, Double, Double, Double, Double) |
162 |
compDetailedCV nl = |
163 |
let |
164 |
all_nodes = Container.elems nl |
165 |
(offline, nodes) = partition Node.offline all_nodes |
166 |
mem_l = map Node.p_mem nodes |
167 |
dsk_l = map Node.p_dsk nodes |
168 |
mem_cv = varianceCoeff mem_l |
169 |
dsk_cv = varianceCoeff dsk_l |
170 |
n1_l = length $ filter Node.failN1 nodes |
171 |
n1_score = ((fromIntegral n1_l) / |
172 |
(fromIntegral $ length nodes))::Double |
173 |
res_l = map Node.p_rem nodes |
174 |
res_cv = varianceCoeff res_l |
175 |
offline_inst = sum . map (\n -> (length . Node.plist $ n) + |
176 |
(length . Node.slist $ n)) $ offline |
177 |
online_inst = sum . map (\n -> (length . Node.plist $ n) + |
178 |
(length . Node.slist $ n)) $ nodes |
179 |
off_score = ((fromIntegral offline_inst) / |
180 |
(fromIntegral $ online_inst + offline_inst))::Double |
181 |
cpu_l = map Node.p_cpu nodes |
182 |
cpu_cv = varianceCoeff cpu_l |
183 |
in (mem_cv, dsk_cv, n1_score, res_cv, off_score, cpu_cv) |
184 |
|
185 |
-- | Compute the /total/ variance. |
186 |
compCV :: Node.List -> Double |
187 |
compCV nl = |
188 |
let (mem_cv, dsk_cv, n1_score, res_cv, off_score, cpu_cv) = |
189 |
compDetailedCV nl |
190 |
in mem_cv + dsk_cv + n1_score + res_cv + off_score + cpu_cv |
191 |
|
192 |
-- | Compute online nodes from a Node.List |
193 |
getOnline :: Node.List -> [Node.Node] |
194 |
getOnline = filter (not . Node.offline) . Container.elems |
195 |
|
196 |
-- * hn1 functions |
197 |
|
198 |
-- | Add an instance and return the new node and instance maps. |
199 |
addInstance :: Node.List -> Instance.Instance -> |
200 |
Node.Node -> Node.Node -> Maybe Node.List |
201 |
addInstance nl idata pri sec = |
202 |
let pdx = Node.idx pri |
203 |
sdx = Node.idx sec |
204 |
in do |
205 |
pnode <- Node.addPri pri idata |
206 |
snode <- Node.addSec sec idata pdx |
207 |
new_nl <- return $ Container.addTwo sdx snode |
208 |
pdx pnode nl |
209 |
return new_nl |
210 |
|
211 |
-- | Remove an instance and return the new node and instance maps. |
212 |
removeInstance :: Node.List -> Instance.Instance -> Node.List |
213 |
removeInstance nl idata = |
214 |
let pnode = Instance.pnode idata |
215 |
snode = Instance.snode idata |
216 |
pn = Container.find pnode nl |
217 |
sn = Container.find snode nl |
218 |
new_nl = Container.addTwo |
219 |
pnode (Node.removePri pn idata) |
220 |
snode (Node.removeSec sn idata) nl in |
221 |
new_nl |
222 |
|
223 |
-- | Remove an instance and return the new node map. |
224 |
removeInstances :: Node.List -> [Instance.Instance] -> Node.List |
225 |
removeInstances = foldl' removeInstance |
226 |
|
227 |
|
228 |
{-| Compute a new version of a cluster given a solution. |
229 |
|
230 |
This is not used for computing the solutions, but for applying a |
231 |
(known-good) solution to the original cluster for final display. |
232 |
|
233 |
It first removes the relocated instances after which it places them on |
234 |
their new nodes. |
235 |
|
236 |
-} |
237 |
applySolution :: Node.List -> Instance.List -> [Placement] -> Node.List |
238 |
applySolution nl il sol = |
239 |
let odxes = map (\ (a, b, c, _) -> (Container.find a il, |
240 |
Node.idx (Container.find b nl), |
241 |
Node.idx (Container.find c nl)) |
242 |
) sol |
243 |
idxes = (\ (x, _, _) -> x) (unzip3 odxes) |
244 |
nc = removeInstances nl idxes |
245 |
in |
246 |
foldl' (\ nz (a, b, c) -> |
247 |
let new_p = Container.find b nz |
248 |
new_s = Container.find c nz in |
249 |
fromJust (addInstance nz a new_p new_s) |
250 |
) nc odxes |
251 |
|
252 |
|
253 |
-- ** First phase functions |
254 |
|
255 |
{-| Given a list 1,2,3..n build a list of pairs [(1, [2..n]), (2, |
256 |
[3..n]), ...] |
257 |
|
258 |
-} |
259 |
genParts :: [a] -> Int -> [(a, [a])] |
260 |
genParts l count = |
261 |
case l of |
262 |
[] -> [] |
263 |
x:xs -> |
264 |
if length l < count then |
265 |
[] |
266 |
else |
267 |
(x, xs) : (genParts xs count) |
268 |
|
269 |
-- | Generates combinations of count items from the names list. |
270 |
genNames :: Int -> [b] -> [[b]] |
271 |
genNames count1 names1 = |
272 |
let aux_fn count names current = |
273 |
case count of |
274 |
0 -> [current] |
275 |
_ -> |
276 |
concatMap |
277 |
(\ (x, xs) -> aux_fn (count - 1) xs (x:current)) |
278 |
(genParts names count) |
279 |
in |
280 |
aux_fn count1 names1 [] |
281 |
|
282 |
{-| Checks if removal of instances results in N+1 pass. |
283 |
|
284 |
Note: the check removal cannot optimize by scanning only the affected |
285 |
nodes, since the cluster is known to be not healthy; only the check |
286 |
placement can make this shortcut. |
287 |
|
288 |
-} |
289 |
checkRemoval :: Node.List -> [Instance.Instance] -> Maybe Removal |
290 |
checkRemoval nl victims = |
291 |
let nx = removeInstances nl victims |
292 |
failN1 = verifyN1Check (Container.elems nx) |
293 |
in |
294 |
if failN1 then |
295 |
Nothing |
296 |
else |
297 |
Just $ Removal nx victims |
298 |
|
299 |
|
300 |
-- | Computes the removals list for a given depth. |
301 |
computeRemovals :: Node.List |
302 |
-> [Instance.Instance] |
303 |
-> Int |
304 |
-> [Maybe Removal] |
305 |
computeRemovals nl bad_instances depth = |
306 |
map (checkRemoval nl) $ genNames depth bad_instances |
307 |
|
308 |
-- ** Second phase functions |
309 |
|
310 |
-- | Single-node relocation cost. |
311 |
nodeDelta :: Ndx -> Ndx -> Ndx -> Int |
312 |
nodeDelta i p s = |
313 |
if i == p || i == s then |
314 |
0 |
315 |
else |
316 |
1 |
317 |
|
318 |
-- | Compute best solution. |
319 |
-- |
320 |
-- This function compares two solutions, choosing the minimum valid |
321 |
-- solution. |
322 |
compareSolutions :: Maybe Solution -> Maybe Solution -> Maybe Solution |
323 |
compareSolutions a b = case (a, b) of |
324 |
(Nothing, x) -> x |
325 |
(x, Nothing) -> x |
326 |
(x, y) -> min x y |
327 |
|
328 |
-- | Check if a given delta is worse then an existing solution. |
329 |
tooHighDelta :: Maybe Solution -> Int -> Int -> Bool |
330 |
tooHighDelta sol new_delta max_delta = |
331 |
if new_delta > max_delta && max_delta >=0 then |
332 |
True |
333 |
else |
334 |
case sol of |
335 |
Nothing -> False |
336 |
Just (Solution old_delta _) -> old_delta <= new_delta |
337 |
|
338 |
{-| Check if placement of instances still keeps the cluster N+1 compliant. |
339 |
|
340 |
This is the workhorse of the allocation algorithm: given the |
341 |
current node and instance maps, the list of instances to be |
342 |
placed, and the current solution, this will return all possible |
343 |
solution by recursing until all target instances are placed. |
344 |
|
345 |
-} |
346 |
checkPlacement :: Node.List -- ^ The current node list |
347 |
-> [Instance.Instance] -- ^ List of instances still to place |
348 |
-> [Placement] -- ^ Partial solution until now |
349 |
-> Int -- ^ The delta of the partial solution |
350 |
-> Maybe Solution -- ^ The previous solution |
351 |
-> Int -- ^ Abort if the we go above this delta |
352 |
-> Maybe Solution -- ^ The new solution |
353 |
checkPlacement nl victims current current_delta prev_sol max_delta = |
354 |
let target = head victims |
355 |
opdx = Instance.pnode target |
356 |
osdx = Instance.snode target |
357 |
vtail = tail victims |
358 |
have_tail = (length vtail) > 0 |
359 |
nodes = Container.elems nl |
360 |
iidx = Instance.idx target |
361 |
in |
362 |
foldl' |
363 |
(\ accu_p pri -> |
364 |
let |
365 |
pri_idx = Node.idx pri |
366 |
upri_delta = current_delta + nodeDelta pri_idx opdx osdx |
367 |
new_pri = Node.addPri pri target |
368 |
fail_delta1 = tooHighDelta accu_p upri_delta max_delta |
369 |
in |
370 |
if fail_delta1 || isNothing(new_pri) then accu_p |
371 |
else let pri_nl = Container.add pri_idx (fromJust new_pri) nl in |
372 |
foldl' |
373 |
(\ accu sec -> |
374 |
let |
375 |
sec_idx = Node.idx sec |
376 |
upd_delta = upri_delta + |
377 |
nodeDelta sec_idx opdx osdx |
378 |
fail_delta2 = tooHighDelta accu upd_delta max_delta |
379 |
new_sec = Node.addSec sec target pri_idx |
380 |
in |
381 |
if sec_idx == pri_idx || fail_delta2 || |
382 |
isNothing new_sec then accu |
383 |
else let |
384 |
nx = Container.add sec_idx (fromJust new_sec) pri_nl |
385 |
upd_cv = compCV nx |
386 |
plc = (iidx, pri_idx, sec_idx, upd_cv) |
387 |
c2 = plc:current |
388 |
result = |
389 |
if have_tail then |
390 |
checkPlacement nx vtail c2 upd_delta |
391 |
accu max_delta |
392 |
else |
393 |
Just (Solution upd_delta c2) |
394 |
in compareSolutions accu result |
395 |
) accu_p nodes |
396 |
) prev_sol nodes |
397 |
|
398 |
{-| Auxiliary function for solution computation. |
399 |
|
400 |
We write this in an explicit recursive fashion in order to control |
401 |
early-abort in case we have met the min delta. We can't use foldr |
402 |
instead of explicit recursion since we need the accumulator for the |
403 |
abort decision. |
404 |
|
405 |
-} |
406 |
advanceSolution :: [Maybe Removal] -- ^ The removal to process |
407 |
-> Int -- ^ Minimum delta parameter |
408 |
-> Int -- ^ Maximum delta parameter |
409 |
-> Maybe Solution -- ^ Current best solution |
410 |
-> Maybe Solution -- ^ New best solution |
411 |
advanceSolution [] _ _ sol = sol |
412 |
advanceSolution (Nothing:xs) m n sol = advanceSolution xs m n sol |
413 |
advanceSolution ((Just (Removal nx removed)):xs) min_d max_d prev_sol = |
414 |
let new_sol = checkPlacement nx removed [] 0 prev_sol max_d |
415 |
new_delta = solutionDelta $! new_sol |
416 |
in |
417 |
if new_delta >= 0 && new_delta <= min_d then |
418 |
new_sol |
419 |
else |
420 |
advanceSolution xs min_d max_d new_sol |
421 |
|
422 |
-- | Computes the placement solution. |
423 |
solutionFromRemovals :: [Maybe Removal] -- ^ The list of (possible) removals |
424 |
-> Int -- ^ Minimum delta parameter |
425 |
-> Int -- ^ Maximum delta parameter |
426 |
-> Maybe Solution -- ^ The best solution found |
427 |
solutionFromRemovals removals min_delta max_delta = |
428 |
advanceSolution removals min_delta max_delta Nothing |
429 |
|
430 |
{-| Computes the solution at the given depth. |
431 |
|
432 |
This is a wrapper over both computeRemovals and |
433 |
solutionFromRemovals. In case we have no solution, we return Nothing. |
434 |
|
435 |
-} |
436 |
computeSolution :: Node.List -- ^ The original node data |
437 |
-> [Instance.Instance] -- ^ The list of /bad/ instances |
438 |
-> Int -- ^ The /depth/ of removals |
439 |
-> Int -- ^ Maximum number of removals to process |
440 |
-> Int -- ^ Minimum delta parameter |
441 |
-> Int -- ^ Maximum delta parameter |
442 |
-> Maybe Solution -- ^ The best solution found (or Nothing) |
443 |
computeSolution nl bad_instances depth max_removals min_delta max_delta = |
444 |
let |
445 |
removals = computeRemovals nl bad_instances depth |
446 |
removals' = capRemovals removals max_removals |
447 |
in |
448 |
solutionFromRemovals removals' min_delta max_delta |
449 |
|
450 |
-- * hbal functions |
451 |
|
452 |
-- | Compute best table. Note that the ordering of the arguments is important. |
453 |
compareTables :: Table -> Table -> Table |
454 |
compareTables a@(Table _ _ a_cv _) b@(Table _ _ b_cv _ ) = |
455 |
if a_cv > b_cv then b else a |
456 |
|
457 |
-- | Applies an instance move to a given node list and instance. |
458 |
applyMove :: Node.List -> Instance.Instance |
459 |
-> IMove -> (Maybe Node.List, Instance.Instance, Ndx, Ndx) |
460 |
-- Failover (f) |
461 |
applyMove nl inst Failover = |
462 |
let old_pdx = Instance.pnode inst |
463 |
old_sdx = Instance.snode inst |
464 |
old_p = Container.find old_pdx nl |
465 |
old_s = Container.find old_sdx nl |
466 |
int_p = Node.removePri old_p inst |
467 |
int_s = Node.removeSec old_s inst |
468 |
new_nl = do -- Maybe monad |
469 |
new_p <- Node.addPri int_s inst |
470 |
new_s <- Node.addSec int_p inst old_sdx |
471 |
return $ Container.addTwo old_pdx new_s old_sdx new_p nl |
472 |
in (new_nl, Instance.setBoth inst old_sdx old_pdx, old_sdx, old_pdx) |
473 |
|
474 |
-- Replace the primary (f:, r:np, f) |
475 |
applyMove nl inst (ReplacePrimary new_pdx) = |
476 |
let old_pdx = Instance.pnode inst |
477 |
old_sdx = Instance.snode inst |
478 |
old_p = Container.find old_pdx nl |
479 |
old_s = Container.find old_sdx nl |
480 |
tgt_n = Container.find new_pdx nl |
481 |
int_p = Node.removePri old_p inst |
482 |
int_s = Node.removeSec old_s inst |
483 |
new_nl = do -- Maybe monad |
484 |
-- check that the current secondary can host the instance |
485 |
-- during the migration |
486 |
tmp_s <- Node.addPri int_s inst |
487 |
let tmp_s' = Node.removePri tmp_s inst |
488 |
new_p <- Node.addPri tgt_n inst |
489 |
new_s <- Node.addSec tmp_s' inst new_pdx |
490 |
return $ Container.add new_pdx new_p $ |
491 |
Container.addTwo old_pdx int_p old_sdx new_s nl |
492 |
in (new_nl, Instance.setPri inst new_pdx, new_pdx, old_sdx) |
493 |
|
494 |
-- Replace the secondary (r:ns) |
495 |
applyMove nl inst (ReplaceSecondary new_sdx) = |
496 |
let old_pdx = Instance.pnode inst |
497 |
old_sdx = Instance.snode inst |
498 |
old_s = Container.find old_sdx nl |
499 |
tgt_n = Container.find new_sdx nl |
500 |
int_s = Node.removeSec old_s inst |
501 |
new_nl = Node.addSec tgt_n inst old_pdx >>= |
502 |
\new_s -> return $ Container.addTwo new_sdx |
503 |
new_s old_sdx int_s nl |
504 |
in (new_nl, Instance.setSec inst new_sdx, old_pdx, new_sdx) |
505 |
|
506 |
-- Replace the secondary and failover (r:np, f) |
507 |
applyMove nl inst (ReplaceAndFailover new_pdx) = |
508 |
let old_pdx = Instance.pnode inst |
509 |
old_sdx = Instance.snode inst |
510 |
old_p = Container.find old_pdx nl |
511 |
old_s = Container.find old_sdx nl |
512 |
tgt_n = Container.find new_pdx nl |
513 |
int_p = Node.removePri old_p inst |
514 |
int_s = Node.removeSec old_s inst |
515 |
new_nl = do -- Maybe monad |
516 |
new_p <- Node.addPri tgt_n inst |
517 |
new_s <- Node.addSec int_p inst new_pdx |
518 |
return $ Container.add new_pdx new_p $ |
519 |
Container.addTwo old_pdx new_s old_sdx int_s nl |
520 |
in (new_nl, Instance.setBoth inst new_pdx old_pdx, new_pdx, old_pdx) |
521 |
|
522 |
-- Failver and replace the secondary (f, r:ns) |
523 |
applyMove nl inst (FailoverAndReplace new_sdx) = |
524 |
let old_pdx = Instance.pnode inst |
525 |
old_sdx = Instance.snode inst |
526 |
old_p = Container.find old_pdx nl |
527 |
old_s = Container.find old_sdx nl |
528 |
tgt_n = Container.find new_sdx nl |
529 |
int_p = Node.removePri old_p inst |
530 |
int_s = Node.removeSec old_s inst |
531 |
new_nl = do -- Maybe monad |
532 |
new_p <- Node.addPri int_s inst |
533 |
new_s <- Node.addSec tgt_n inst old_sdx |
534 |
return $ Container.add new_sdx new_s $ |
535 |
Container.addTwo old_sdx new_p old_pdx int_p nl |
536 |
in (new_nl, Instance.setBoth inst old_sdx new_sdx, old_sdx, new_sdx) |
537 |
|
538 |
-- | Tries to allocate an instance on one given node. |
539 |
allocateOnSingle :: Node.List -> Instance.Instance -> Node.Node |
540 |
-> (Maybe Node.List, Instance.Instance) |
541 |
allocateOnSingle nl inst p = |
542 |
let new_pdx = Node.idx p |
543 |
new_nl = Node.addPri p inst >>= \new_p -> |
544 |
return $ Container.add new_pdx new_p nl |
545 |
in (new_nl, Instance.setBoth inst new_pdx Node.noSecondary) |
546 |
|
547 |
-- | Tries to allocate an instance on a given pair of nodes. |
548 |
allocateOnPair :: Node.List -> Instance.Instance -> Node.Node -> Node.Node |
549 |
-> (Maybe Node.List, Instance.Instance) |
550 |
allocateOnPair nl inst tgt_p tgt_s = |
551 |
let new_pdx = Node.idx tgt_p |
552 |
new_sdx = Node.idx tgt_s |
553 |
new_nl = do -- Maybe monad |
554 |
new_p <- Node.addPri tgt_p inst |
555 |
new_s <- Node.addSec tgt_s inst new_pdx |
556 |
return $ Container.addTwo new_pdx new_p new_sdx new_s nl |
557 |
in (new_nl, Instance.setBoth inst new_pdx new_sdx) |
558 |
|
559 |
-- | Tries to perform an instance move and returns the best table |
560 |
-- between the original one and the new one. |
561 |
checkSingleStep :: Table -- ^ The original table |
562 |
-> Instance.Instance -- ^ The instance to move |
563 |
-> Table -- ^ The current best table |
564 |
-> IMove -- ^ The move to apply |
565 |
-> Table -- ^ The final best table |
566 |
checkSingleStep ini_tbl target cur_tbl move = |
567 |
let |
568 |
Table ini_nl ini_il _ ini_plc = ini_tbl |
569 |
(tmp_nl, new_inst, pri_idx, sec_idx) = applyMove ini_nl target move |
570 |
in |
571 |
if isNothing tmp_nl then cur_tbl |
572 |
else |
573 |
let tgt_idx = Instance.idx target |
574 |
upd_nl = fromJust tmp_nl |
575 |
upd_cvar = compCV upd_nl |
576 |
upd_il = Container.add tgt_idx new_inst ini_il |
577 |
upd_plc = (tgt_idx, pri_idx, sec_idx, upd_cvar):ini_plc |
578 |
upd_tbl = Table upd_nl upd_il upd_cvar upd_plc |
579 |
in |
580 |
compareTables cur_tbl upd_tbl |
581 |
|
582 |
-- | Given the status of the current secondary as a valid new node |
583 |
-- and the current candidate target node, |
584 |
-- generate the possible moves for a instance. |
585 |
possibleMoves :: Bool -> Ndx -> [IMove] |
586 |
possibleMoves True tdx = |
587 |
[ReplaceSecondary tdx, |
588 |
ReplaceAndFailover tdx, |
589 |
ReplacePrimary tdx, |
590 |
FailoverAndReplace tdx] |
591 |
|
592 |
possibleMoves False tdx = |
593 |
[ReplaceSecondary tdx, |
594 |
ReplaceAndFailover tdx] |
595 |
|
596 |
-- | Compute the best move for a given instance. |
597 |
checkInstanceMove :: [Ndx] -- Allowed target node indices |
598 |
-> Table -- Original table |
599 |
-> Instance.Instance -- Instance to move |
600 |
-> Table -- Best new table for this instance |
601 |
checkInstanceMove nodes_idx ini_tbl target = |
602 |
let |
603 |
opdx = Instance.pnode target |
604 |
osdx = Instance.snode target |
605 |
nodes = filter (\idx -> idx /= opdx && idx /= osdx) nodes_idx |
606 |
use_secondary = elem osdx nodes_idx |
607 |
aft_failover = if use_secondary -- if allowed to failover |
608 |
then checkSingleStep ini_tbl target ini_tbl Failover |
609 |
else ini_tbl |
610 |
all_moves = concatMap (possibleMoves use_secondary) nodes |
611 |
in |
612 |
-- iterate over the possible nodes for this instance |
613 |
foldl' (checkSingleStep ini_tbl target) aft_failover all_moves |
614 |
|
615 |
-- | Compute the best next move. |
616 |
checkMove :: [Ndx] -- ^ Allowed target node indices |
617 |
-> Table -- ^ The current solution |
618 |
-> [Instance.Instance] -- ^ List of instances still to move |
619 |
-> Table -- ^ The new solution |
620 |
checkMove nodes_idx ini_tbl victims = |
621 |
let Table _ _ _ ini_plc = ini_tbl |
622 |
-- iterate over all instances, computing the best move |
623 |
best_tbl = |
624 |
foldl' |
625 |
(\ step_tbl elem -> |
626 |
if Instance.snode elem == Node.noSecondary then step_tbl |
627 |
else compareTables step_tbl $ |
628 |
checkInstanceMove nodes_idx ini_tbl elem) |
629 |
ini_tbl victims |
630 |
Table _ _ _ best_plc = best_tbl |
631 |
in |
632 |
if length best_plc == length ini_plc then -- no advancement |
633 |
ini_tbl |
634 |
else |
635 |
best_tbl |
636 |
|
637 |
-- * Alocation functions |
638 |
|
639 |
-- | Try to allocate an instance on the cluster. |
640 |
tryAlloc :: (Monad m) => |
641 |
Node.List -- ^ The node list |
642 |
-> Instance.List -- ^ The instance list |
643 |
-> Instance.Instance -- ^ The instance to allocate |
644 |
-> Int -- ^ Required number of nodes |
645 |
-> m AllocSolution -- ^ Possible solution list |
646 |
tryAlloc nl _ inst 2 = |
647 |
let all_nodes = getOnline nl |
648 |
all_pairs = liftM2 (,) all_nodes all_nodes |
649 |
ok_pairs = filter (\(x, y) -> Node.idx x /= Node.idx y) all_pairs |
650 |
sols = map (\(p, s) -> let (mnl, i) = allocateOnPair nl inst p s |
651 |
in (mnl, i, [p, s])) |
652 |
ok_pairs |
653 |
in return sols |
654 |
|
655 |
tryAlloc nl _ inst 1 = |
656 |
let all_nodes = getOnline nl |
657 |
sols = map (\p -> let (mnl, i) = allocateOnSingle nl inst p |
658 |
in (mnl, i, [p])) |
659 |
all_nodes |
660 |
in return sols |
661 |
|
662 |
tryAlloc _ _ _ reqn = fail $ "Unsupported number of alllocation \ |
663 |
\destinations required (" ++ (show reqn) ++ |
664 |
"), only two supported" |
665 |
|
666 |
-- | Try to allocate an instance on the cluster. |
667 |
tryReloc :: (Monad m) => |
668 |
Node.List -- ^ The node list |
669 |
-> Instance.List -- ^ The instance list |
670 |
-> Idx -- ^ The index of the instance to move |
671 |
-> Int -- ^ The numver of nodes required |
672 |
-> [Ndx] -- ^ Nodes which should not be used |
673 |
-> m AllocSolution -- ^ Solution list |
674 |
tryReloc nl il xid 1 ex_idx = |
675 |
let all_nodes = getOnline nl |
676 |
inst = Container.find xid il |
677 |
ex_idx' = (Instance.pnode inst):ex_idx |
678 |
valid_nodes = filter (not . flip elem ex_idx' . Node.idx) all_nodes |
679 |
valid_idxes = map Node.idx valid_nodes |
680 |
sols1 = map (\x -> let (mnl, i, _, _) = |
681 |
applyMove nl inst (ReplaceSecondary x) |
682 |
in (mnl, i, [Container.find x nl]) |
683 |
) valid_idxes |
684 |
in return sols1 |
685 |
|
686 |
tryReloc _ _ _ reqn _ = fail $ "Unsupported number of relocation \ |
687 |
\destinations required (" ++ (show reqn) ++ |
688 |
"), only one supported" |
689 |
|
690 |
-- * Formatting functions |
691 |
|
692 |
-- | Given the original and final nodes, computes the relocation description. |
693 |
computeMoves :: String -- ^ The instance name |
694 |
-> String -- ^ Original primary |
695 |
-> String -- ^ Original secondary |
696 |
-> String -- ^ New primary |
697 |
-> String -- ^ New secondary |
698 |
-> (String, [String]) |
699 |
-- ^ Tuple of moves and commands list; moves is containing |
700 |
-- either @/f/@ for failover or @/r:name/@ for replace |
701 |
-- secondary, while the command list holds gnt-instance |
702 |
-- commands (without that prefix), e.g \"@failover instance1@\" |
703 |
computeMoves i a b c d = |
704 |
if c == a then {- Same primary -} |
705 |
if d == b then {- Same sec??! -} |
706 |
("-", []) |
707 |
else {- Change of secondary -} |
708 |
(printf "r:%s" d, |
709 |
[printf "replace-disks -n %s %s" d i]) |
710 |
else |
711 |
if c == b then {- Failover and ... -} |
712 |
if d == a then {- that's all -} |
713 |
("f", [printf "migrate -f %s" i]) |
714 |
else |
715 |
(printf "f r:%s" d, |
716 |
[printf "migrate -f %s" i, |
717 |
printf "replace-disks -n %s %s" d i]) |
718 |
else |
719 |
if d == a then {- ... and keep primary as secondary -} |
720 |
(printf "r:%s f" c, |
721 |
[printf "replace-disks -n %s %s" c i, |
722 |
printf "migrate -f %s" i]) |
723 |
else |
724 |
if d == b then {- ... keep same secondary -} |
725 |
(printf "f r:%s f" c, |
726 |
[printf "migrate -f %s" i, |
727 |
printf "replace-disks -n %s %s" c i, |
728 |
printf "migrate -f %s" i]) |
729 |
|
730 |
else {- Nothing in common -} |
731 |
(printf "r:%s f r:%s" c d, |
732 |
[printf "replace-disks -n %s %s" c i, |
733 |
printf "migrate -f %s" i, |
734 |
printf "replace-disks -n %s %s" d i]) |
735 |
|
736 |
-- | Converts a placement to string format. |
737 |
printSolutionLine :: Node.List -- ^ The node list |
738 |
-> Instance.List -- ^ The instance list |
739 |
-> Int -- ^ Maximum node name length |
740 |
-> Int -- ^ Maximum instance name length |
741 |
-> Placement -- ^ The current placement |
742 |
-> Int -- ^ The index of the placement in |
743 |
-- the solution |
744 |
-> (String, [String]) |
745 |
printSolutionLine nl il nmlen imlen plc pos = |
746 |
let |
747 |
pmlen = (2*nmlen + 1) |
748 |
(i, p, s, c) = plc |
749 |
inst = Container.find i il |
750 |
inam = Instance.name inst |
751 |
npri = Container.nameOf nl p |
752 |
nsec = Container.nameOf nl s |
753 |
opri = Container.nameOf nl $ Instance.pnode inst |
754 |
osec = Container.nameOf nl $ Instance.snode inst |
755 |
(moves, cmds) = computeMoves inam opri osec npri nsec |
756 |
ostr = (printf "%s:%s" opri osec)::String |
757 |
nstr = (printf "%s:%s" npri nsec)::String |
758 |
in |
759 |
(printf " %3d. %-*s %-*s => %-*s %.8f a=%s" |
760 |
pos imlen inam pmlen ostr |
761 |
pmlen nstr c moves, |
762 |
cmds) |
763 |
|
764 |
-- | Given a list of commands, prefix them with @gnt-instance@ and |
765 |
-- also beautify the display a little. |
766 |
formatCmds :: [[String]] -> String |
767 |
formatCmds cmd_strs = |
768 |
unlines $ |
769 |
concat $ map (\(a, b) -> |
770 |
(printf "echo step %d" (a::Int)): |
771 |
(printf "check"): |
772 |
(map ("gnt-instance " ++) b)) $ |
773 |
zip [1..] cmd_strs |
774 |
|
775 |
-- | Converts a solution to string format. |
776 |
printSolution :: Node.List |
777 |
-> Instance.List |
778 |
-> [Placement] |
779 |
-> ([String], [[String]]) |
780 |
printSolution nl il sol = |
781 |
let |
782 |
nmlen = Container.maxNameLen nl |
783 |
imlen = Container.maxNameLen il |
784 |
in |
785 |
unzip $ map (uncurry $ printSolutionLine nl il nmlen imlen) $ |
786 |
zip sol [1..] |
787 |
|
788 |
-- | Print the node list. |
789 |
printNodes :: Node.List -> String |
790 |
printNodes nl = |
791 |
let snl = sortBy (compare `on` Node.idx) (Container.elems nl) |
792 |
m_name = maximum . map (length . Node.name) $ snl |
793 |
helper = Node.list m_name |
794 |
header = (printf |
795 |
"%2s %-*s %5s %5s %5s %5s %5s %5s %5s %5s %4s %4s \ |
796 |
\%3s %3s %6s %6s %5s" |
797 |
" F" m_name "Name" |
798 |
"t_mem" "n_mem" "i_mem" "x_mem" "f_mem" "r_mem" |
799 |
"t_dsk" "f_dsk" "pcpu" "vcpu" |
800 |
"pri" "sec" "p_fmem" "p_fdsk" "r_cpu")::String |
801 |
in unlines $ (header:map helper snl) |
802 |
|
803 |
-- | Shows statistics for a given node list. |
804 |
printStats :: Node.List -> String |
805 |
printStats nl = |
806 |
let (mem_cv, dsk_cv, n1_score, res_cv, off_score, cpu_cv) = |
807 |
compDetailedCV nl |
808 |
in printf "f_mem=%.8f, r_mem=%.8f, f_dsk=%.8f, n1=%.3f, \ |
809 |
\uf=%.3f, r_cpu=%.3f" |
810 |
mem_cv res_cv dsk_cv n1_score off_score cpu_cv |