root / Ganeti / HTools / Cluster.hs @ a182df55
History | View | Annotate | Download (30.4 kB)
1 |
{-| Implementation of cluster-wide logic. |
---|---|
2 |
|
3 |
This module holds all pure cluster-logic; I\/O related functionality |
4 |
goes into the "Main" module for the individual binaries. |
5 |
|
6 |
-} |
7 |
|
8 |
{- |
9 |
|
10 |
Copyright (C) 2009 Google Inc. |
11 |
|
12 |
This program is free software; you can redistribute it and/or modify |
13 |
it under the terms of the GNU General Public License as published by |
14 |
the Free Software Foundation; either version 2 of the License, or |
15 |
(at your option) any later version. |
16 |
|
17 |
This program is distributed in the hope that it will be useful, but |
18 |
WITHOUT ANY WARRANTY; without even the implied warranty of |
19 |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
20 |
General Public License for more details. |
21 |
|
22 |
You should have received a copy of the GNU General Public License |
23 |
along with this program; if not, write to the Free Software |
24 |
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA |
25 |
02110-1301, USA. |
26 |
|
27 |
-} |
28 |
|
29 |
module Ganeti.HTools.Cluster |
30 |
( |
31 |
-- * Types |
32 |
AllocSolution |
33 |
, Table(..) |
34 |
, CStats(..) |
35 |
-- * Generic functions |
36 |
, totalResources |
37 |
-- * First phase functions |
38 |
, computeBadItems |
39 |
-- * Second phase functions |
40 |
, printSolution |
41 |
, printSolutionLine |
42 |
, formatCmds |
43 |
, involvedNodes |
44 |
, splitJobs |
45 |
-- * Display functions |
46 |
, printNodes |
47 |
, printInsts |
48 |
-- * Balacing functions |
49 |
, checkMove |
50 |
, doNextBalance |
51 |
, tryBalance |
52 |
, compCV |
53 |
, printStats |
54 |
, iMoveToJob |
55 |
-- * IAllocator functions |
56 |
, tryAlloc |
57 |
, tryReloc |
58 |
, tryEvac |
59 |
, collapseFailures |
60 |
) where |
61 |
|
62 |
import Data.List |
63 |
import Text.Printf (printf) |
64 |
import Data.Function |
65 |
import Control.Monad |
66 |
|
67 |
import qualified Ganeti.HTools.Container as Container |
68 |
import qualified Ganeti.HTools.Instance as Instance |
69 |
import qualified Ganeti.HTools.Node as Node |
70 |
import Ganeti.HTools.Types |
71 |
import Ganeti.HTools.Utils |
72 |
import qualified Ganeti.OpCodes as OpCodes |
73 |
|
74 |
-- * Types |
75 |
|
76 |
-- | Allocation\/relocation solution. |
77 |
type AllocSolution = ([FailMode], Int, [(Score, Node.AllocElement)]) |
78 |
|
79 |
-- | The complete state for the balancing solution |
80 |
data Table = Table Node.List Instance.List Score [Placement] |
81 |
deriving (Show) |
82 |
|
83 |
data CStats = CStats { csFmem :: Int -- ^ Cluster free mem |
84 |
, csFdsk :: Int -- ^ Cluster free disk |
85 |
, csAmem :: Int -- ^ Cluster allocatable mem |
86 |
, csAdsk :: Int -- ^ Cluster allocatable disk |
87 |
, csAcpu :: Int -- ^ Cluster allocatable cpus |
88 |
, csMmem :: Int -- ^ Max node allocatable mem |
89 |
, csMdsk :: Int -- ^ Max node allocatable disk |
90 |
, csMcpu :: Int -- ^ Max node allocatable cpu |
91 |
, csImem :: Int -- ^ Instance used mem |
92 |
, csIdsk :: Int -- ^ Instance used disk |
93 |
, csIcpu :: Int -- ^ Instance used cpu |
94 |
, csTmem :: Double -- ^ Cluster total mem |
95 |
, csTdsk :: Double -- ^ Cluster total disk |
96 |
, csTcpu :: Double -- ^ Cluster total cpus |
97 |
, csXmem :: Int -- ^ Unnacounted for mem |
98 |
, csNmem :: Int -- ^ Node own memory |
99 |
, csScore :: Score -- ^ The cluster score |
100 |
, csNinst :: Int -- ^ The total number of instances |
101 |
} |
102 |
|
103 |
-- * Utility functions |
104 |
|
105 |
-- | Verifies the N+1 status and return the affected nodes. |
106 |
verifyN1 :: [Node.Node] -> [Node.Node] |
107 |
verifyN1 = filter Node.failN1 |
108 |
|
109 |
{-| Computes the pair of bad nodes and instances. |
110 |
|
111 |
The bad node list is computed via a simple 'verifyN1' check, and the |
112 |
bad instance list is the list of primary and secondary instances of |
113 |
those nodes. |
114 |
|
115 |
-} |
116 |
computeBadItems :: Node.List -> Instance.List -> |
117 |
([Node.Node], [Instance.Instance]) |
118 |
computeBadItems nl il = |
119 |
let bad_nodes = verifyN1 $ getOnline nl |
120 |
bad_instances = map (\idx -> Container.find idx il) . |
121 |
sort . nub $ |
122 |
concatMap (\ n -> Node.sList n ++ Node.pList n) bad_nodes |
123 |
in |
124 |
(bad_nodes, bad_instances) |
125 |
|
126 |
-- | Zero-initializer for the CStats type |
127 |
emptyCStats :: CStats |
128 |
emptyCStats = CStats 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 |
129 |
|
130 |
-- | Update stats with data from a new node |
131 |
updateCStats :: CStats -> Node.Node -> CStats |
132 |
updateCStats cs node = |
133 |
let CStats { csFmem = x_fmem, csFdsk = x_fdsk, |
134 |
csAmem = x_amem, csAcpu = x_acpu, csAdsk = x_adsk, |
135 |
csMmem = x_mmem, csMdsk = x_mdsk, csMcpu = x_mcpu, |
136 |
csImem = x_imem, csIdsk = x_idsk, csIcpu = x_icpu, |
137 |
csTmem = x_tmem, csTdsk = x_tdsk, csTcpu = x_tcpu, |
138 |
csXmem = x_xmem, csNmem = x_nmem, csNinst = x_ninst |
139 |
} |
140 |
= cs |
141 |
inc_amem = Node.fMem node - Node.rMem node |
142 |
inc_amem' = if inc_amem > 0 then inc_amem else 0 |
143 |
inc_adsk = Node.availDisk node |
144 |
inc_imem = truncate (Node.tMem node) - Node.nMem node |
145 |
- Node.xMem node - Node.fMem node |
146 |
inc_icpu = Node.uCpu node |
147 |
inc_idsk = truncate (Node.tDsk node) - Node.fDsk node |
148 |
|
149 |
in cs { csFmem = x_fmem + Node.fMem node |
150 |
, csFdsk = x_fdsk + Node.fDsk node |
151 |
, csAmem = x_amem + inc_amem' |
152 |
, csAdsk = x_adsk + inc_adsk |
153 |
, csAcpu = x_acpu |
154 |
, csMmem = max x_mmem inc_amem' |
155 |
, csMdsk = max x_mdsk inc_adsk |
156 |
, csMcpu = x_mcpu |
157 |
, csImem = x_imem + inc_imem |
158 |
, csIdsk = x_idsk + inc_idsk |
159 |
, csIcpu = x_icpu + inc_icpu |
160 |
, csTmem = x_tmem + Node.tMem node |
161 |
, csTdsk = x_tdsk + Node.tDsk node |
162 |
, csTcpu = x_tcpu + Node.tCpu node |
163 |
, csXmem = x_xmem + Node.xMem node |
164 |
, csNmem = x_nmem + Node.nMem node |
165 |
, csNinst = x_ninst + length (Node.pList node) |
166 |
} |
167 |
|
168 |
-- | Compute the total free disk and memory in the cluster. |
169 |
totalResources :: Node.List -> CStats |
170 |
totalResources nl = |
171 |
let cs = foldl' updateCStats emptyCStats . Container.elems $ nl |
172 |
in cs { csScore = compCV nl } |
173 |
|
174 |
-- | The names of the individual elements in the CV list |
175 |
detailedCVNames :: [String] |
176 |
detailedCVNames = [ "free_mem_cv" |
177 |
, "free_disk_cv" |
178 |
, "n1_cnt" |
179 |
, "reserved_mem_cv" |
180 |
, "offline_all_cnt" |
181 |
, "offline_pri_cnt" |
182 |
, "vcpu_ratio_cv" |
183 |
, "cpu_load_cv" |
184 |
, "mem_load_cv" |
185 |
, "disk_load_cv" |
186 |
, "net_load_cv" |
187 |
, "pri_tags_score" |
188 |
] |
189 |
|
190 |
-- | Compute the mem and disk covariance. |
191 |
compDetailedCV :: Node.List -> [Double] |
192 |
compDetailedCV nl = |
193 |
let |
194 |
all_nodes = Container.elems nl |
195 |
(offline, nodes) = partition Node.offline all_nodes |
196 |
mem_l = map Node.pMem nodes |
197 |
dsk_l = map Node.pDsk nodes |
198 |
-- metric: memory covariance |
199 |
mem_cv = varianceCoeff mem_l |
200 |
-- metric: disk covariance |
201 |
dsk_cv = varianceCoeff dsk_l |
202 |
n1_l = length $ filter Node.failN1 nodes |
203 |
-- metric: count of failN1 nodes |
204 |
n1_score = fromIntegral n1_l::Double |
205 |
res_l = map Node.pRem nodes |
206 |
-- metric: reserved memory covariance |
207 |
res_cv = varianceCoeff res_l |
208 |
-- offline instances metrics |
209 |
offline_ipri = sum . map (length . Node.pList) $ offline |
210 |
offline_isec = sum . map (length . Node.sList) $ offline |
211 |
-- metric: count of instances on offline nodes |
212 |
off_score = fromIntegral (offline_ipri + offline_isec)::Double |
213 |
-- metric: count of primary instances on offline nodes (this |
214 |
-- helps with evacuation/failover of primary instances on |
215 |
-- 2-node clusters with one node offline) |
216 |
off_pri_score = fromIntegral offline_ipri::Double |
217 |
cpu_l = map Node.pCpu nodes |
218 |
-- metric: covariance of vcpu/pcpu ratio |
219 |
cpu_cv = varianceCoeff cpu_l |
220 |
-- metrics: covariance of cpu, memory, disk and network load |
221 |
(c_load, m_load, d_load, n_load) = unzip4 $ |
222 |
map (\n -> |
223 |
let DynUtil c1 m1 d1 n1 = Node.utilLoad n |
224 |
DynUtil c2 m2 d2 n2 = Node.utilPool n |
225 |
in (c1/c2, m1/m2, d1/d2, n1/n2) |
226 |
) nodes |
227 |
-- metric: conflicting instance count |
228 |
pri_tags_inst = sum $ map Node.conflictingPrimaries nodes |
229 |
pri_tags_score = fromIntegral pri_tags_inst::Double |
230 |
in [ mem_cv, dsk_cv, n1_score, res_cv, off_score, off_pri_score, cpu_cv |
231 |
, varianceCoeff c_load, varianceCoeff m_load |
232 |
, varianceCoeff d_load, varianceCoeff n_load |
233 |
, pri_tags_score ] |
234 |
|
235 |
-- | Compute the /total/ variance. |
236 |
compCV :: Node.List -> Double |
237 |
compCV = sum . compDetailedCV |
238 |
|
239 |
-- | Compute online nodes from a Node.List |
240 |
getOnline :: Node.List -> [Node.Node] |
241 |
getOnline = filter (not . Node.offline) . Container.elems |
242 |
|
243 |
-- * hbal functions |
244 |
|
245 |
-- | Compute best table. Note that the ordering of the arguments is important. |
246 |
compareTables :: Table -> Table -> Table |
247 |
compareTables a@(Table _ _ a_cv _) b@(Table _ _ b_cv _ ) = |
248 |
if a_cv > b_cv then b else a |
249 |
|
250 |
-- | Applies an instance move to a given node list and instance. |
251 |
applyMove :: Node.List -> Instance.Instance |
252 |
-> IMove -> OpResult (Node.List, Instance.Instance, Ndx, Ndx) |
253 |
-- Failover (f) |
254 |
applyMove nl inst Failover = |
255 |
let old_pdx = Instance.pNode inst |
256 |
old_sdx = Instance.sNode inst |
257 |
old_p = Container.find old_pdx nl |
258 |
old_s = Container.find old_sdx nl |
259 |
int_p = Node.removePri old_p inst |
260 |
int_s = Node.removeSec old_s inst |
261 |
new_nl = do -- Maybe monad |
262 |
new_p <- Node.addPri int_s inst |
263 |
new_s <- Node.addSec int_p inst old_sdx |
264 |
let new_inst = Instance.setBoth inst old_sdx old_pdx |
265 |
return (Container.addTwo old_pdx new_s old_sdx new_p nl, |
266 |
new_inst, old_sdx, old_pdx) |
267 |
in new_nl |
268 |
|
269 |
-- Replace the primary (f:, r:np, f) |
270 |
applyMove nl inst (ReplacePrimary new_pdx) = |
271 |
let old_pdx = Instance.pNode inst |
272 |
old_sdx = Instance.sNode inst |
273 |
old_p = Container.find old_pdx nl |
274 |
old_s = Container.find old_sdx nl |
275 |
tgt_n = Container.find new_pdx nl |
276 |
int_p = Node.removePri old_p inst |
277 |
int_s = Node.removeSec old_s inst |
278 |
new_nl = do -- Maybe monad |
279 |
-- check that the current secondary can host the instance |
280 |
-- during the migration |
281 |
tmp_s <- Node.addPri int_s inst |
282 |
let tmp_s' = Node.removePri tmp_s inst |
283 |
new_p <- Node.addPri tgt_n inst |
284 |
new_s <- Node.addSec tmp_s' inst new_pdx |
285 |
let new_inst = Instance.setPri inst new_pdx |
286 |
return (Container.add new_pdx new_p $ |
287 |
Container.addTwo old_pdx int_p old_sdx new_s nl, |
288 |
new_inst, new_pdx, old_sdx) |
289 |
in new_nl |
290 |
|
291 |
-- Replace the secondary (r:ns) |
292 |
applyMove nl inst (ReplaceSecondary new_sdx) = |
293 |
let old_pdx = Instance.pNode inst |
294 |
old_sdx = Instance.sNode inst |
295 |
old_s = Container.find old_sdx nl |
296 |
tgt_n = Container.find new_sdx nl |
297 |
int_s = Node.removeSec old_s inst |
298 |
new_inst = Instance.setSec inst new_sdx |
299 |
new_nl = Node.addSec tgt_n inst old_pdx >>= |
300 |
\new_s -> return (Container.addTwo new_sdx |
301 |
new_s old_sdx int_s nl, |
302 |
new_inst, old_pdx, new_sdx) |
303 |
in new_nl |
304 |
|
305 |
-- Replace the secondary and failover (r:np, f) |
306 |
applyMove nl inst (ReplaceAndFailover new_pdx) = |
307 |
let old_pdx = Instance.pNode inst |
308 |
old_sdx = Instance.sNode inst |
309 |
old_p = Container.find old_pdx nl |
310 |
old_s = Container.find old_sdx nl |
311 |
tgt_n = Container.find new_pdx nl |
312 |
int_p = Node.removePri old_p inst |
313 |
int_s = Node.removeSec old_s inst |
314 |
new_nl = do -- Maybe monad |
315 |
new_p <- Node.addPri tgt_n inst |
316 |
new_s <- Node.addSec int_p inst new_pdx |
317 |
let new_inst = Instance.setBoth inst new_pdx old_pdx |
318 |
return (Container.add new_pdx new_p $ |
319 |
Container.addTwo old_pdx new_s old_sdx int_s nl, |
320 |
new_inst, new_pdx, old_pdx) |
321 |
in new_nl |
322 |
|
323 |
-- Failver and replace the secondary (f, r:ns) |
324 |
applyMove nl inst (FailoverAndReplace new_sdx) = |
325 |
let old_pdx = Instance.pNode inst |
326 |
old_sdx = Instance.sNode inst |
327 |
old_p = Container.find old_pdx nl |
328 |
old_s = Container.find old_sdx nl |
329 |
tgt_n = Container.find new_sdx nl |
330 |
int_p = Node.removePri old_p inst |
331 |
int_s = Node.removeSec old_s inst |
332 |
new_nl = do -- Maybe monad |
333 |
new_p <- Node.addPri int_s inst |
334 |
new_s <- Node.addSec tgt_n inst old_sdx |
335 |
let new_inst = Instance.setBoth inst old_sdx new_sdx |
336 |
return (Container.add new_sdx new_s $ |
337 |
Container.addTwo old_sdx new_p old_pdx int_p nl, |
338 |
new_inst, old_sdx, new_sdx) |
339 |
in new_nl |
340 |
|
341 |
-- | Tries to allocate an instance on one given node. |
342 |
allocateOnSingle :: Node.List -> Instance.Instance -> Node.Node |
343 |
-> OpResult Node.AllocElement |
344 |
allocateOnSingle nl inst p = |
345 |
let new_pdx = Node.idx p |
346 |
new_inst = Instance.setBoth inst new_pdx Node.noSecondary |
347 |
new_nl = Node.addPri p inst >>= \new_p -> |
348 |
return (Container.add new_pdx new_p nl, new_inst, [new_p]) |
349 |
in new_nl |
350 |
|
351 |
-- | Tries to allocate an instance on a given pair of nodes. |
352 |
allocateOnPair :: Node.List -> Instance.Instance -> Node.Node -> Node.Node |
353 |
-> OpResult Node.AllocElement |
354 |
allocateOnPair nl inst tgt_p tgt_s = |
355 |
let new_pdx = Node.idx tgt_p |
356 |
new_sdx = Node.idx tgt_s |
357 |
new_nl = do -- Maybe monad |
358 |
new_p <- Node.addPri tgt_p inst |
359 |
new_s <- Node.addSec tgt_s inst new_pdx |
360 |
let new_inst = Instance.setBoth inst new_pdx new_sdx |
361 |
return (Container.addTwo new_pdx new_p new_sdx new_s nl, new_inst, |
362 |
[new_p, new_s]) |
363 |
in new_nl |
364 |
|
365 |
-- | Tries to perform an instance move and returns the best table |
366 |
-- between the original one and the new one. |
367 |
checkSingleStep :: Table -- ^ The original table |
368 |
-> Instance.Instance -- ^ The instance to move |
369 |
-> Table -- ^ The current best table |
370 |
-> IMove -- ^ The move to apply |
371 |
-> Table -- ^ The final best table |
372 |
checkSingleStep ini_tbl target cur_tbl move = |
373 |
let |
374 |
Table ini_nl ini_il _ ini_plc = ini_tbl |
375 |
tmp_resu = applyMove ini_nl target move |
376 |
in |
377 |
case tmp_resu of |
378 |
OpFail _ -> cur_tbl |
379 |
OpGood (upd_nl, new_inst, pri_idx, sec_idx) -> |
380 |
let tgt_idx = Instance.idx target |
381 |
upd_cvar = compCV upd_nl |
382 |
upd_il = Container.add tgt_idx new_inst ini_il |
383 |
upd_plc = (tgt_idx, pri_idx, sec_idx, move, upd_cvar):ini_plc |
384 |
upd_tbl = Table upd_nl upd_il upd_cvar upd_plc |
385 |
in |
386 |
compareTables cur_tbl upd_tbl |
387 |
|
388 |
-- | Given the status of the current secondary as a valid new node and |
389 |
-- the current candidate target node, generate the possible moves for |
390 |
-- a instance. |
391 |
possibleMoves :: Bool -- ^ Whether the secondary node is a valid new node |
392 |
-> Ndx -- ^ Target node candidate |
393 |
-> [IMove] -- ^ List of valid result moves |
394 |
possibleMoves True tdx = |
395 |
[ReplaceSecondary tdx, |
396 |
ReplaceAndFailover tdx, |
397 |
ReplacePrimary tdx, |
398 |
FailoverAndReplace tdx] |
399 |
|
400 |
possibleMoves False tdx = |
401 |
[ReplaceSecondary tdx, |
402 |
ReplaceAndFailover tdx] |
403 |
|
404 |
-- | Compute the best move for a given instance. |
405 |
checkInstanceMove :: [Ndx] -- ^ Allowed target node indices |
406 |
-> Bool -- ^ Whether disk moves are allowed |
407 |
-> Table -- ^ Original table |
408 |
-> Instance.Instance -- ^ Instance to move |
409 |
-> Table -- ^ Best new table for this instance |
410 |
checkInstanceMove nodes_idx disk_moves ini_tbl target = |
411 |
let |
412 |
opdx = Instance.pNode target |
413 |
osdx = Instance.sNode target |
414 |
nodes = filter (\idx -> idx /= opdx && idx /= osdx) nodes_idx |
415 |
use_secondary = elem osdx nodes_idx |
416 |
aft_failover = if use_secondary -- if allowed to failover |
417 |
then checkSingleStep ini_tbl target ini_tbl Failover |
418 |
else ini_tbl |
419 |
all_moves = if disk_moves |
420 |
then concatMap (possibleMoves use_secondary) nodes |
421 |
else [] |
422 |
in |
423 |
-- iterate over the possible nodes for this instance |
424 |
foldl' (checkSingleStep ini_tbl target) aft_failover all_moves |
425 |
|
426 |
-- | Compute the best next move. |
427 |
checkMove :: [Ndx] -- ^ Allowed target node indices |
428 |
-> Bool -- ^ Whether disk moves are allowed |
429 |
-> Table -- ^ The current solution |
430 |
-> [Instance.Instance] -- ^ List of instances still to move |
431 |
-> Table -- ^ The new solution |
432 |
checkMove nodes_idx disk_moves ini_tbl victims = |
433 |
let Table _ _ _ ini_plc = ini_tbl |
434 |
-- iterate over all instances, computing the best move |
435 |
best_tbl = |
436 |
foldl' |
437 |
(\ step_tbl em -> |
438 |
compareTables step_tbl $ |
439 |
checkInstanceMove nodes_idx disk_moves ini_tbl em) |
440 |
ini_tbl victims |
441 |
Table _ _ _ best_plc = best_tbl |
442 |
in if length best_plc == length ini_plc |
443 |
then ini_tbl -- no advancement |
444 |
else best_tbl |
445 |
|
446 |
-- | Check if we are allowed to go deeper in the balancing |
447 |
|
448 |
doNextBalance :: Table -- ^ The starting table |
449 |
-> Int -- ^ Remaining length |
450 |
-> Score -- ^ Score at which to stop |
451 |
-> Bool -- ^ The resulting table and commands |
452 |
doNextBalance ini_tbl max_rounds min_score = |
453 |
let Table _ _ ini_cv ini_plc = ini_tbl |
454 |
ini_plc_len = length ini_plc |
455 |
in (max_rounds < 0 || ini_plc_len < max_rounds) && ini_cv > min_score |
456 |
|
457 |
-- | Run a balance move |
458 |
|
459 |
tryBalance :: Table -- ^ The starting table |
460 |
-> Bool -- ^ Allow disk moves |
461 |
-> Bool -- ^ Only evacuate moves |
462 |
-> Maybe Table -- ^ The resulting table and commands |
463 |
tryBalance ini_tbl disk_moves evac_mode = |
464 |
let Table ini_nl ini_il ini_cv _ = ini_tbl |
465 |
all_inst = Container.elems ini_il |
466 |
all_inst' = if evac_mode |
467 |
then let bad_nodes = map Node.idx . filter Node.offline $ |
468 |
Container.elems ini_nl |
469 |
in filter (\e -> Instance.sNode e `elem` bad_nodes || |
470 |
Instance.pNode e `elem` bad_nodes) |
471 |
all_inst |
472 |
else all_inst |
473 |
reloc_inst = filter (\e -> Instance.sNode e /= Node.noSecondary) |
474 |
all_inst' |
475 |
node_idx = map Node.idx . filter (not . Node.offline) $ |
476 |
Container.elems ini_nl |
477 |
fin_tbl = checkMove node_idx disk_moves ini_tbl reloc_inst |
478 |
(Table _ _ fin_cv _) = fin_tbl |
479 |
in |
480 |
if fin_cv < ini_cv |
481 |
then Just fin_tbl -- this round made success, return the new table |
482 |
else Nothing |
483 |
|
484 |
-- * Allocation functions |
485 |
|
486 |
-- | Build failure stats out of a list of failures |
487 |
collapseFailures :: [FailMode] -> FailStats |
488 |
collapseFailures flst = |
489 |
map (\k -> (k, length $ filter ((==) k) flst)) [minBound..maxBound] |
490 |
|
491 |
-- | Update current Allocation solution and failure stats with new |
492 |
-- elements |
493 |
concatAllocs :: AllocSolution -> OpResult Node.AllocElement -> AllocSolution |
494 |
concatAllocs (flst, cntok, sols) (OpFail reason) = (reason:flst, cntok, sols) |
495 |
|
496 |
concatAllocs (flst, cntok, osols) (OpGood ns@(nl, _, _)) = |
497 |
let nscore = compCV nl |
498 |
-- Choose the old or new solution, based on the cluster score |
499 |
nsols = case osols of |
500 |
[] -> [(nscore, ns)] |
501 |
(oscore, _):[] -> |
502 |
if oscore < nscore |
503 |
then osols |
504 |
else [(nscore, ns)] |
505 |
-- FIXME: here we simply concat to lists with more |
506 |
-- than one element; we should instead abort, since |
507 |
-- this is not a valid usage of this function |
508 |
xs -> (nscore, ns):xs |
509 |
nsuc = cntok + 1 |
510 |
-- Note: we force evaluation of nsols here in order to keep the |
511 |
-- memory profile low - we know that we will need nsols for sure |
512 |
-- in the next cycle, so we force evaluation of nsols, since the |
513 |
-- foldl' in the caller will only evaluate the tuple, but not the |
514 |
-- elements of the tuple |
515 |
in nsols `seq` nsuc `seq` (flst, nsuc, nsols) |
516 |
|
517 |
-- | Try to allocate an instance on the cluster. |
518 |
tryAlloc :: (Monad m) => |
519 |
Node.List -- ^ The node list |
520 |
-> Instance.List -- ^ The instance list |
521 |
-> Instance.Instance -- ^ The instance to allocate |
522 |
-> Int -- ^ Required number of nodes |
523 |
-> m AllocSolution -- ^ Possible solution list |
524 |
tryAlloc nl _ inst 2 = |
525 |
let all_nodes = getOnline nl |
526 |
all_pairs = liftM2 (,) all_nodes all_nodes |
527 |
ok_pairs = filter (\(x, y) -> Node.idx x /= Node.idx y) all_pairs |
528 |
sols = foldl' (\cstate (p, s) -> |
529 |
concatAllocs cstate $ allocateOnPair nl inst p s |
530 |
) ([], 0, []) ok_pairs |
531 |
in return sols |
532 |
|
533 |
tryAlloc nl _ inst 1 = |
534 |
let all_nodes = getOnline nl |
535 |
sols = foldl' (\cstate -> |
536 |
concatAllocs cstate . allocateOnSingle nl inst |
537 |
) ([], 0, []) all_nodes |
538 |
in return sols |
539 |
|
540 |
tryAlloc _ _ _ reqn = fail $ "Unsupported number of allocation \ |
541 |
\destinations required (" ++ show reqn ++ |
542 |
"), only two supported" |
543 |
|
544 |
-- | Try to allocate an instance on the cluster. |
545 |
tryReloc :: (Monad m) => |
546 |
Node.List -- ^ The node list |
547 |
-> Instance.List -- ^ The instance list |
548 |
-> Idx -- ^ The index of the instance to move |
549 |
-> Int -- ^ The number of nodes required |
550 |
-> [Ndx] -- ^ Nodes which should not be used |
551 |
-> m AllocSolution -- ^ Solution list |
552 |
tryReloc nl il xid 1 ex_idx = |
553 |
let all_nodes = getOnline nl |
554 |
inst = Container.find xid il |
555 |
ex_idx' = Instance.pNode inst:ex_idx |
556 |
valid_nodes = filter (not . flip elem ex_idx' . Node.idx) all_nodes |
557 |
valid_idxes = map Node.idx valid_nodes |
558 |
sols1 = foldl' (\cstate x -> |
559 |
let em = do |
560 |
(mnl, i, _, _) <- |
561 |
applyMove nl inst (ReplaceSecondary x) |
562 |
return (mnl, i, [Container.find x mnl]) |
563 |
in concatAllocs cstate em |
564 |
) ([], 0, []) valid_idxes |
565 |
in return sols1 |
566 |
|
567 |
tryReloc _ _ _ reqn _ = fail $ "Unsupported number of relocation \ |
568 |
\destinations required (" ++ show reqn ++ |
569 |
"), only one supported" |
570 |
|
571 |
-- | Try to allocate an instance on the cluster. |
572 |
tryEvac :: (Monad m) => |
573 |
Node.List -- ^ The node list |
574 |
-> Instance.List -- ^ The instance list |
575 |
-> [Ndx] -- ^ Nodes to be evacuated |
576 |
-> m AllocSolution -- ^ Solution list |
577 |
tryEvac nl il ex_ndx = |
578 |
let ex_nodes = map (flip Container.find nl) ex_ndx |
579 |
all_insts = nub . concat . map Node.sList $ ex_nodes |
580 |
in do |
581 |
(_, sol) <- foldM (\(nl', (_, _, rsols)) idx -> do |
582 |
-- FIXME: hardcoded one node here |
583 |
(fm, cs, aes) <- tryReloc nl' il idx 1 ex_ndx |
584 |
case aes of |
585 |
csol@(_, (nl'', _, _)):_ -> |
586 |
return (nl'', (fm, cs, csol:rsols)) |
587 |
_ -> fail $ "Can't evacuate instance " ++ |
588 |
show idx |
589 |
) (nl, ([], 0, [])) all_insts |
590 |
return sol |
591 |
|
592 |
-- * Formatting functions |
593 |
|
594 |
-- | Given the original and final nodes, computes the relocation description. |
595 |
computeMoves :: Instance.Instance -- ^ The instance to be moved |
596 |
-> String -- ^ The instance name |
597 |
-> IMove -- ^ The move being performed |
598 |
-> String -- ^ New primary |
599 |
-> String -- ^ New secondary |
600 |
-> (String, [String]) |
601 |
-- ^ Tuple of moves and commands list; moves is containing |
602 |
-- either @/f/@ for failover or @/r:name/@ for replace |
603 |
-- secondary, while the command list holds gnt-instance |
604 |
-- commands (without that prefix), e.g \"@failover instance1@\" |
605 |
computeMoves i inam mv c d = |
606 |
case mv of |
607 |
Failover -> ("f", [mig]) |
608 |
FailoverAndReplace _ -> (printf "f r:%s" d, [mig, rep d]) |
609 |
ReplaceSecondary _ -> (printf "r:%s" d, [rep d]) |
610 |
ReplaceAndFailover _ -> (printf "r:%s f" c, [rep c, mig]) |
611 |
ReplacePrimary _ -> (printf "f r:%s f" c, [mig, rep c, mig]) |
612 |
where morf = if Instance.running i then "migrate" else "failover" |
613 |
mig = printf "%s -f %s" morf inam::String |
614 |
rep n = printf "replace-disks -n %s %s" n inam |
615 |
|
616 |
-- | Converts a placement to string format. |
617 |
printSolutionLine :: Node.List -- ^ The node list |
618 |
-> Instance.List -- ^ The instance list |
619 |
-> Int -- ^ Maximum node name length |
620 |
-> Int -- ^ Maximum instance name length |
621 |
-> Placement -- ^ The current placement |
622 |
-> Int -- ^ The index of the placement in |
623 |
-- the solution |
624 |
-> (String, [String]) |
625 |
printSolutionLine nl il nmlen imlen plc pos = |
626 |
let |
627 |
pmlen = (2*nmlen + 1) |
628 |
(i, p, s, mv, c) = plc |
629 |
inst = Container.find i il |
630 |
inam = Instance.name inst |
631 |
npri = Container.nameOf nl p |
632 |
nsec = Container.nameOf nl s |
633 |
opri = Container.nameOf nl $ Instance.pNode inst |
634 |
osec = Container.nameOf nl $ Instance.sNode inst |
635 |
(moves, cmds) = computeMoves inst inam mv npri nsec |
636 |
ostr = printf "%s:%s" opri osec::String |
637 |
nstr = printf "%s:%s" npri nsec::String |
638 |
in |
639 |
(printf " %3d. %-*s %-*s => %-*s %.8f a=%s" |
640 |
pos imlen inam pmlen ostr |
641 |
pmlen nstr c moves, |
642 |
cmds) |
643 |
|
644 |
-- | Return the instance and involved nodes in an instance move. |
645 |
involvedNodes :: Instance.List -> Placement -> [Ndx] |
646 |
involvedNodes il plc = |
647 |
let (i, np, ns, _, _) = plc |
648 |
inst = Container.find i il |
649 |
op = Instance.pNode inst |
650 |
os = Instance.sNode inst |
651 |
in nub [np, ns, op, os] |
652 |
|
653 |
-- | Inner function for splitJobs, that either appends the next job to |
654 |
-- the current jobset, or starts a new jobset. |
655 |
mergeJobs :: ([JobSet], [Ndx]) -> MoveJob -> ([JobSet], [Ndx]) |
656 |
mergeJobs ([], _) n@(ndx, _, _, _) = ([[n]], ndx) |
657 |
mergeJobs (cjs@(j:js), nbuf) n@(ndx, _, _, _) |
658 |
| null (ndx `intersect` nbuf) = ((n:j):js, ndx ++ nbuf) |
659 |
| otherwise = ([n]:cjs, ndx) |
660 |
|
661 |
-- | Break a list of moves into independent groups. Note that this |
662 |
-- will reverse the order of jobs. |
663 |
splitJobs :: [MoveJob] -> [JobSet] |
664 |
splitJobs = fst . foldl mergeJobs ([], []) |
665 |
|
666 |
-- | Given a list of commands, prefix them with @gnt-instance@ and |
667 |
-- also beautify the display a little. |
668 |
formatJob :: Int -> Int -> (Int, MoveJob) -> [String] |
669 |
formatJob jsn jsl (sn, (_, _, _, cmds)) = |
670 |
let out = |
671 |
printf " echo job %d/%d" jsn sn: |
672 |
printf " check": |
673 |
map (" gnt-instance " ++) cmds |
674 |
in if sn == 1 |
675 |
then ["", printf "echo jobset %d, %d jobs" jsn jsl] ++ out |
676 |
else out |
677 |
|
678 |
-- | Given a list of commands, prefix them with @gnt-instance@ and |
679 |
-- also beautify the display a little. |
680 |
formatCmds :: [JobSet] -> String |
681 |
formatCmds = |
682 |
unlines . |
683 |
concatMap (\(jsn, js) -> concatMap (formatJob jsn (length js)) |
684 |
(zip [1..] js)) . |
685 |
zip [1..] |
686 |
|
687 |
-- | Converts a solution to string format. |
688 |
printSolution :: Node.List |
689 |
-> Instance.List |
690 |
-> [Placement] |
691 |
-> ([String], [[String]]) |
692 |
printSolution nl il sol = |
693 |
let |
694 |
nmlen = Container.maxNameLen nl |
695 |
imlen = Container.maxNameLen il |
696 |
in |
697 |
unzip $ zipWith (printSolutionLine nl il nmlen imlen) sol [1..] |
698 |
|
699 |
-- | Print the node list. |
700 |
printNodes :: Node.List -> [String] -> String |
701 |
printNodes nl fs = |
702 |
let fields = if null fs |
703 |
then Node.defaultFields |
704 |
else fs |
705 |
snl = sortBy (compare `on` Node.idx) (Container.elems nl) |
706 |
(header, isnum) = unzip $ map Node.showHeader fields |
707 |
in unlines . map ((:) ' ' . intercalate " ") $ |
708 |
formatTable (header:map (Node.list fields) snl) isnum |
709 |
|
710 |
-- | Print the instance list. |
711 |
printInsts :: Node.List -> Instance.List -> String |
712 |
printInsts nl il = |
713 |
let sil = sortBy (compare `on` Instance.idx) (Container.elems il) |
714 |
helper inst = [ if Instance.running inst then "R" else " " |
715 |
, Instance.name inst |
716 |
, Container.nameOf nl (Instance.pNode inst) |
717 |
, (let sdx = Instance.sNode inst |
718 |
in if sdx == Node.noSecondary |
719 |
then "" |
720 |
else Container.nameOf nl sdx) |
721 |
, printf "%3d" $ Instance.vcpus inst |
722 |
, printf "%5d" $ Instance.mem inst |
723 |
, printf "%5d" $ Instance.dsk inst `div` 1024 |
724 |
, printf "%5.3f" lC |
725 |
, printf "%5.3f" lM |
726 |
, printf "%5.3f" lD |
727 |
, printf "%5.3f" lN |
728 |
] |
729 |
where DynUtil lC lM lD lN = Instance.util inst |
730 |
header = [ "F", "Name", "Pri_node", "Sec_node", "vcpu", "mem" |
731 |
, "dsk", "lCpu", "lMem", "lDsk", "lNet" ] |
732 |
isnum = False:False:False:False:repeat True |
733 |
in unlines . map ((:) ' ' . intercalate " ") $ |
734 |
formatTable (header:map helper sil) isnum |
735 |
|
736 |
-- | Shows statistics for a given node list. |
737 |
printStats :: Node.List -> String |
738 |
printStats nl = |
739 |
let dcvs = compDetailedCV nl |
740 |
hd = zip (detailedCVNames ++ repeat "unknown") dcvs |
741 |
formatted = map (\(header, val) -> |
742 |
printf "%s=%.8f" header val::String) hd |
743 |
in intercalate ", " formatted |
744 |
|
745 |
-- | Convert a placement into a list of OpCodes (basically a job). |
746 |
iMoveToJob :: String -> Node.List -> Instance.List |
747 |
-> Idx -> IMove -> [OpCodes.OpCode] |
748 |
iMoveToJob csf nl il idx move = |
749 |
let inst = Container.find idx il |
750 |
iname = Instance.name inst ++ csf |
751 |
lookNode n = Just (Container.nameOf nl n ++ csf) |
752 |
opF = if Instance.running inst |
753 |
then OpCodes.OpMigrateInstance iname True False |
754 |
else OpCodes.OpFailoverInstance iname False |
755 |
opR n = OpCodes.OpReplaceDisks iname (lookNode n) |
756 |
OpCodes.ReplaceNewSecondary [] Nothing |
757 |
in case move of |
758 |
Failover -> [ opF ] |
759 |
ReplacePrimary np -> [ opF, opR np, opF ] |
760 |
ReplaceSecondary ns -> [ opR ns ] |
761 |
ReplaceAndFailover np -> [ opR np, opF ] |
762 |
FailoverAndReplace ns -> [ opF, opR ns ] |