Statistics
| Branch: | Tag: | Revision:

root / Ganeti / HTools / Cluster.hs @ 16103319

History | View | Annotate | Download (20.7 kB)

1
{-| Implementation of cluster-wide logic.
2

    
3
This module holds all pure cluster-logic; I\/O related functionality
4
goes into the "Main" module for the individual binaries.
5

    
6
-}
7

    
8
{-
9

    
10
Copyright (C) 2009 Google Inc.
11

    
12
This program is free software; you can redistribute it and/or modify
13
it under the terms of the GNU General Public License as published by
14
the Free Software Foundation; either version 2 of the License, or
15
(at your option) any later version.
16

    
17
This program is distributed in the hope that it will be useful, but
18
WITHOUT ANY WARRANTY; without even the implied warranty of
19
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
20
General Public License for more details.
21

    
22
You should have received a copy of the GNU General Public License
23
along with this program; if not, write to the Free Software
24
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
25
02110-1301, USA.
26

    
27
-}
28

    
29
module Ganeti.HTools.Cluster
30
    (
31
     -- * Types
32
      Placement
33
    , AllocSolution
34
    , Table(..)
35
    , Score
36
    , IMove(..)
37
    , CStats(..)
38
    -- * Generic functions
39
    , totalResources
40
    -- * First phase functions
41
    , computeBadItems
42
    -- * Second phase functions
43
    , printSolution
44
    , printSolutionLine
45
    , formatCmds
46
    , printNodes
47
    -- * Balacing functions
48
    , applyMove
49
    , checkMove
50
    , compCV
51
    , printStats
52
    -- * IAllocator functions
53
    , allocateOnSingle
54
    , allocateOnPair
55
    , tryAlloc
56
    , tryReloc
57
    ) where
58

    
59
import Data.List
60
import Text.Printf (printf)
61
import Data.Function
62
import Control.Monad
63

    
64
import qualified Ganeti.HTools.Container as Container
65
import qualified Ganeti.HTools.Instance as Instance
66
import qualified Ganeti.HTools.Node as Node
67
import Ganeti.HTools.Types
68
import Ganeti.HTools.Utils
69

    
70
-- * Types
71

    
72
-- | A separate name for the cluster score type.
73
type Score = Double
74

    
75
-- | The description of an instance placement.
76
type Placement = (Idx, Ndx, Ndx, Score)
77

    
78
-- | Allocation\/relocation solution.
79
type AllocSolution = [(OpResult Node.List, Instance.Instance, [Node.Node])]
80

    
81
-- | An instance move definition
82
data IMove = Failover                -- ^ Failover the instance (f)
83
           | ReplacePrimary Ndx      -- ^ Replace primary (f, r:np, f)
84
           | ReplaceSecondary Ndx    -- ^ Replace secondary (r:ns)
85
           | ReplaceAndFailover Ndx  -- ^ Replace secondary, failover (r:np, f)
86
           | FailoverAndReplace Ndx  -- ^ Failover, replace secondary (f, r:ns)
87
             deriving (Show)
88

    
89
-- | The complete state for the balancing solution
90
data Table = Table Node.List Instance.List Score [Placement]
91
             deriving (Show)
92

    
93
data CStats = CStats { cs_fmem :: Int -- ^ Cluster free mem
94
                     , cs_fdsk :: Int -- ^ Cluster free disk
95
                     , cs_amem :: Int -- ^ Cluster allocatable mem
96
                     , cs_adsk :: Int -- ^ Cluster allocatable disk
97
                     , cs_acpu :: Int -- ^ Cluster allocatable cpus
98
                     , cs_mmem :: Int -- ^ Max node allocatable mem
99
                     , cs_mdsk :: Int -- ^ Max node allocatable disk
100
                     , cs_mcpu :: Int -- ^ Max node allocatable cpu
101
                     }
102

    
103
-- * Utility functions
104

    
105
-- | Verifies the N+1 status and return the affected nodes.
106
verifyN1 :: [Node.Node] -> [Node.Node]
107
verifyN1 = filter Node.failN1
108

    
109
{-| Computes the pair of bad nodes and instances.
110

    
111
The bad node list is computed via a simple 'verifyN1' check, and the
112
bad instance list is the list of primary and secondary instances of
113
those nodes.
114

    
115
-}
116
computeBadItems :: Node.List -> Instance.List ->
117
                   ([Node.Node], [Instance.Instance])
118
computeBadItems nl il =
119
  let bad_nodes = verifyN1 $ getOnline nl
120
      bad_instances = map (\idx -> Container.find idx il) .
121
                      sort . nub $
122
                      concatMap (\ n -> Node.slist n ++ Node.plist n) bad_nodes
123
  in
124
    (bad_nodes, bad_instances)
125

    
126
emptyCStats :: CStats
127
emptyCStats = CStats { cs_fmem = 0
128
                     , cs_fdsk = 0
129
                     , cs_amem = 0
130
                     , cs_adsk = 0
131
                     , cs_acpu = 0
132
                     , cs_mmem = 0
133
                     , cs_mdsk = 0
134
                     , cs_mcpu = 0
135
                     }
136

    
137
updateCStats :: CStats -> Node.Node -> CStats
138
updateCStats cs node =
139
    let CStats { cs_fmem = x_fmem, cs_fdsk = x_fdsk,
140
                 cs_amem = x_amem, cs_acpu = x_acpu, cs_adsk = x_adsk,
141
                 cs_mmem = x_mmem, cs_mdsk = x_mdsk, cs_mcpu = x_mcpu }
142
            = cs
143
        inc_amem = Node.f_mem node - Node.r_mem node
144
        inc_amem' = if inc_amem > 0 then inc_amem else 0
145
        inc_adsk = Node.availDisk node
146
    in CStats { cs_fmem = x_fmem + Node.f_mem node
147
              , cs_fdsk = x_fdsk + Node.f_dsk node
148
              , cs_amem = x_amem + inc_amem'
149
              , cs_adsk = x_adsk + inc_adsk
150
              , cs_acpu = x_acpu
151
              , cs_mmem = max x_mmem inc_amem'
152
              , cs_mdsk = max x_mdsk inc_adsk
153
              , cs_mcpu = x_mcpu
154
              }
155

    
156
-- | Compute the total free disk and memory in the cluster.
157
totalResources :: Node.List -> CStats
158
totalResources = foldl' updateCStats emptyCStats . Container.elems
159

    
160
-- | Compute the mem and disk covariance.
161
compDetailedCV :: Node.List -> (Double, Double, Double, Double, Double, Double)
162
compDetailedCV nl =
163
    let
164
        all_nodes = Container.elems nl
165
        (offline, nodes) = partition Node.offline all_nodes
166
        mem_l = map Node.p_mem nodes
167
        dsk_l = map Node.p_dsk nodes
168
        mem_cv = varianceCoeff mem_l
169
        dsk_cv = varianceCoeff dsk_l
170
        n1_l = length $ filter Node.failN1 nodes
171
        n1_score = fromIntegral n1_l /
172
                   fromIntegral (length nodes)::Double
173
        res_l = map Node.p_rem nodes
174
        res_cv = varianceCoeff res_l
175
        offline_inst = sum . map (\n -> (length . Node.plist $ n) +
176
                                        (length . Node.slist $ n)) $ offline
177
        online_inst = sum . map (\n -> (length . Node.plist $ n) +
178
                                       (length . Node.slist $ n)) $ nodes
179
        off_score = if offline_inst == 0
180
                    then 0::Double
181
                    else fromIntegral offline_inst /
182
                         fromIntegral (offline_inst + online_inst)::Double
183
        cpu_l = map Node.p_cpu nodes
184
        cpu_cv = varianceCoeff cpu_l
185
    in (mem_cv, dsk_cv, n1_score, res_cv, off_score, cpu_cv)
186

    
187
-- | Compute the /total/ variance.
188
compCV :: Node.List -> Double
189
compCV nl =
190
    let (mem_cv, dsk_cv, n1_score, res_cv, off_score, cpu_cv) =
191
            compDetailedCV nl
192
    in mem_cv + dsk_cv + n1_score + res_cv + off_score + cpu_cv
193

    
194
-- | Compute online nodes from a Node.List
195
getOnline :: Node.List -> [Node.Node]
196
getOnline = filter (not . Node.offline) . Container.elems
197

    
198
-- * hbal functions
199

    
200
-- | Compute best table. Note that the ordering of the arguments is important.
201
compareTables :: Table -> Table -> Table
202
compareTables a@(Table _ _ a_cv _) b@(Table _ _ b_cv _ ) =
203
    if a_cv > b_cv then b else a
204

    
205
-- | Applies an instance move to a given node list and instance.
206
applyMove :: Node.List -> Instance.Instance
207
          -> IMove -> (OpResult Node.List, Instance.Instance, Ndx, Ndx)
208
-- Failover (f)
209
applyMove nl inst Failover =
210
    let old_pdx = Instance.pnode inst
211
        old_sdx = Instance.snode inst
212
        old_p = Container.find old_pdx nl
213
        old_s = Container.find old_sdx nl
214
        int_p = Node.removePri old_p inst
215
        int_s = Node.removeSec old_s inst
216
        new_nl = do -- Maybe monad
217
          new_p <- Node.addPri int_s inst
218
          new_s <- Node.addSec int_p inst old_sdx
219
          return $ Container.addTwo old_pdx new_s old_sdx new_p nl
220
    in (new_nl, Instance.setBoth inst old_sdx old_pdx, old_sdx, old_pdx)
221

    
222
-- Replace the primary (f:, r:np, f)
223
applyMove nl inst (ReplacePrimary new_pdx) =
224
    let old_pdx = Instance.pnode inst
225
        old_sdx = Instance.snode inst
226
        old_p = Container.find old_pdx nl
227
        old_s = Container.find old_sdx nl
228
        tgt_n = Container.find new_pdx nl
229
        int_p = Node.removePri old_p inst
230
        int_s = Node.removeSec old_s inst
231
        new_nl = do -- Maybe monad
232
          -- check that the current secondary can host the instance
233
          -- during the migration
234
          tmp_s <- Node.addPri int_s inst
235
          let tmp_s' = Node.removePri tmp_s inst
236
          new_p <- Node.addPri tgt_n inst
237
          new_s <- Node.addSec tmp_s' inst new_pdx
238
          return . Container.add new_pdx new_p $
239
                 Container.addTwo old_pdx int_p old_sdx new_s nl
240
    in (new_nl, Instance.setPri inst new_pdx, new_pdx, old_sdx)
241

    
242
-- Replace the secondary (r:ns)
243
applyMove nl inst (ReplaceSecondary new_sdx) =
244
    let old_pdx = Instance.pnode inst
245
        old_sdx = Instance.snode inst
246
        old_s = Container.find old_sdx nl
247
        tgt_n = Container.find new_sdx nl
248
        int_s = Node.removeSec old_s inst
249
        new_nl = Node.addSec tgt_n inst old_pdx >>=
250
                 \new_s -> return $ Container.addTwo new_sdx
251
                           new_s old_sdx int_s nl
252
    in (new_nl, Instance.setSec inst new_sdx, old_pdx, new_sdx)
253

    
254
-- Replace the secondary and failover (r:np, f)
255
applyMove nl inst (ReplaceAndFailover new_pdx) =
256
    let old_pdx = Instance.pnode inst
257
        old_sdx = Instance.snode inst
258
        old_p = Container.find old_pdx nl
259
        old_s = Container.find old_sdx nl
260
        tgt_n = Container.find new_pdx nl
261
        int_p = Node.removePri old_p inst
262
        int_s = Node.removeSec old_s inst
263
        new_nl = do -- Maybe monad
264
          new_p <- Node.addPri tgt_n inst
265
          new_s <- Node.addSec int_p inst new_pdx
266
          return . Container.add new_pdx new_p $
267
                 Container.addTwo old_pdx new_s old_sdx int_s nl
268
    in (new_nl, Instance.setBoth inst new_pdx old_pdx, new_pdx, old_pdx)
269

    
270
-- Failver and replace the secondary (f, r:ns)
271
applyMove nl inst (FailoverAndReplace new_sdx) =
272
    let old_pdx = Instance.pnode inst
273
        old_sdx = Instance.snode inst
274
        old_p = Container.find old_pdx nl
275
        old_s = Container.find old_sdx nl
276
        tgt_n = Container.find new_sdx nl
277
        int_p = Node.removePri old_p inst
278
        int_s = Node.removeSec old_s inst
279
        new_nl = do -- Maybe monad
280
          new_p <- Node.addPri int_s inst
281
          new_s <- Node.addSec tgt_n inst old_sdx
282
          return . Container.add new_sdx new_s $
283
                 Container.addTwo old_sdx new_p old_pdx int_p nl
284
    in (new_nl, Instance.setBoth inst old_sdx new_sdx, old_sdx, new_sdx)
285

    
286
-- | Tries to allocate an instance on one given node.
287
allocateOnSingle :: Node.List -> Instance.Instance -> Node.Node
288
                 -> (OpResult Node.List, Instance.Instance)
289
allocateOnSingle nl inst p =
290
    let new_pdx = Node.idx p
291
        new_nl = Node.addPri p inst >>= \new_p ->
292
                 return $ Container.add new_pdx new_p nl
293
    in (new_nl, Instance.setBoth inst new_pdx Node.noSecondary)
294

    
295
-- | Tries to allocate an instance on a given pair of nodes.
296
allocateOnPair :: Node.List -> Instance.Instance -> Node.Node -> Node.Node
297
               -> (OpResult Node.List, Instance.Instance)
298
allocateOnPair nl inst tgt_p tgt_s =
299
    let new_pdx = Node.idx tgt_p
300
        new_sdx = Node.idx tgt_s
301
        new_nl = do -- Maybe monad
302
          new_p <- Node.addPri tgt_p inst
303
          new_s <- Node.addSec tgt_s inst new_pdx
304
          return $ Container.addTwo new_pdx new_p new_sdx new_s nl
305
    in (new_nl, Instance.setBoth inst new_pdx new_sdx)
306

    
307
-- | Tries to perform an instance move and returns the best table
308
-- between the original one and the new one.
309
checkSingleStep :: Table -- ^ The original table
310
                -> Instance.Instance -- ^ The instance to move
311
                -> Table -- ^ The current best table
312
                -> IMove -- ^ The move to apply
313
                -> Table -- ^ The final best table
314
checkSingleStep ini_tbl target cur_tbl move =
315
    let
316
        Table ini_nl ini_il _ ini_plc = ini_tbl
317
        (tmp_nl, new_inst, pri_idx, sec_idx) = applyMove ini_nl target move
318
    in
319
      case tmp_nl of
320
        OpFail _ -> cur_tbl
321
        OpGood upd_nl ->
322
            let tgt_idx = Instance.idx target
323
                upd_cvar = compCV upd_nl
324
                upd_il = Container.add tgt_idx new_inst ini_il
325
                upd_plc = (tgt_idx, pri_idx, sec_idx, upd_cvar):ini_plc
326
                upd_tbl = Table upd_nl upd_il upd_cvar upd_plc
327
            in
328
              compareTables cur_tbl upd_tbl
329

    
330
-- | Given the status of the current secondary as a valid new node
331
-- and the current candidate target node,
332
-- generate the possible moves for a instance.
333
possibleMoves :: Bool -> Ndx -> [IMove]
334
possibleMoves True tdx =
335
    [ReplaceSecondary tdx,
336
     ReplaceAndFailover tdx,
337
     ReplacePrimary tdx,
338
     FailoverAndReplace tdx]
339

    
340
possibleMoves False tdx =
341
    [ReplaceSecondary tdx,
342
     ReplaceAndFailover tdx]
343

    
344
-- | Compute the best move for a given instance.
345
checkInstanceMove :: [Ndx]             -- Allowed target node indices
346
                  -> Table             -- Original table
347
                  -> Instance.Instance -- Instance to move
348
                  -> Table             -- Best new table for this instance
349
checkInstanceMove nodes_idx ini_tbl target =
350
    let
351
        opdx = Instance.pnode target
352
        osdx = Instance.snode target
353
        nodes = filter (\idx -> idx /= opdx && idx /= osdx) nodes_idx
354
        use_secondary = elem osdx nodes_idx
355
        aft_failover = if use_secondary -- if allowed to failover
356
                       then checkSingleStep ini_tbl target ini_tbl Failover
357
                       else ini_tbl
358
        all_moves = concatMap (possibleMoves use_secondary) nodes
359
    in
360
      -- iterate over the possible nodes for this instance
361
      foldl' (checkSingleStep ini_tbl target) aft_failover all_moves
362

    
363
-- | Compute the best next move.
364
checkMove :: [Ndx]               -- ^ Allowed target node indices
365
          -> Table               -- ^ The current solution
366
          -> [Instance.Instance] -- ^ List of instances still to move
367
          -> Table               -- ^ The new solution
368
checkMove nodes_idx ini_tbl victims =
369
    let Table _ _ _ ini_plc = ini_tbl
370
        -- iterate over all instances, computing the best move
371
        best_tbl =
372
            foldl'
373
            (\ step_tbl elem ->
374
                 if Instance.snode elem == Node.noSecondary then step_tbl
375
                    else compareTables step_tbl $
376
                         checkInstanceMove nodes_idx ini_tbl elem)
377
            ini_tbl victims
378
        Table _ _ _ best_plc = best_tbl
379
    in
380
      if length best_plc == length ini_plc then -- no advancement
381
          ini_tbl
382
      else
383
          best_tbl
384

    
385
-- * Alocation functions
386

    
387
-- | Try to allocate an instance on the cluster.
388
tryAlloc :: (Monad m) =>
389
            Node.List         -- ^ The node list
390
         -> Instance.List     -- ^ The instance list
391
         -> Instance.Instance -- ^ The instance to allocate
392
         -> Int               -- ^ Required number of nodes
393
         -> m AllocSolution   -- ^ Possible solution list
394
tryAlloc nl _ inst 2 =
395
    let all_nodes = getOnline nl
396
        all_pairs = liftM2 (,) all_nodes all_nodes
397
        ok_pairs = filter (\(x, y) -> Node.idx x /= Node.idx y) all_pairs
398
        sols = map (\(p, s) -> let (mnl, i) = allocateOnPair nl inst p s
399
                               in (mnl, i, [p, s]))
400
               ok_pairs
401
    in return sols
402

    
403
tryAlloc nl _ inst 1 =
404
    let all_nodes = getOnline nl
405
        sols = map (\p -> let (mnl, i) = allocateOnSingle nl inst p
406
                          in (mnl, i, [p]))
407
               all_nodes
408
    in return sols
409

    
410
tryAlloc _ _ _ reqn = fail $ "Unsupported number of alllocation \
411
                             \destinations required (" ++ show reqn ++
412
                                               "), only two supported"
413

    
414
-- | Try to allocate an instance on the cluster.
415
tryReloc :: (Monad m) =>
416
            Node.List       -- ^ The node list
417
         -> Instance.List   -- ^ The instance list
418
         -> Idx             -- ^ The index of the instance to move
419
         -> Int             -- ^ The numver of nodes required
420
         -> [Ndx]           -- ^ Nodes which should not be used
421
         -> m AllocSolution -- ^ Solution list
422
tryReloc nl il xid 1 ex_idx =
423
    let all_nodes = getOnline nl
424
        inst = Container.find xid il
425
        ex_idx' = Instance.pnode inst:ex_idx
426
        valid_nodes = filter (not . flip elem ex_idx' . Node.idx) all_nodes
427
        valid_idxes = map Node.idx valid_nodes
428
        sols1 = map (\x -> let (mnl, i, _, _) =
429
                                   applyMove nl inst (ReplaceSecondary x)
430
                           in (mnl, i, [Container.find x nl])
431
                     ) valid_idxes
432
    in return sols1
433

    
434
tryReloc _ _ _ reqn _  = fail $ "Unsupported number of relocation \
435
                                \destinations required (" ++ show reqn ++
436
                                                  "), only one supported"
437

    
438
-- * Formatting functions
439

    
440
-- | Given the original and final nodes, computes the relocation description.
441
computeMoves :: String -- ^ The instance name
442
             -> String -- ^ Original primary
443
             -> String -- ^ Original secondary
444
             -> String -- ^ New primary
445
             -> String -- ^ New secondary
446
             -> (String, [String])
447
                -- ^ Tuple of moves and commands list; moves is containing
448
                -- either @/f/@ for failover or @/r:name/@ for replace
449
                -- secondary, while the command list holds gnt-instance
450
                -- commands (without that prefix), e.g \"@failover instance1@\"
451
computeMoves i a b c d
452
    -- same primary
453
    | c == a =
454
        if d == b
455
        then {- Same sec??! -} ("-", [])
456
        else {- Change of secondary -}
457
            (printf "r:%s" d, [rep d])
458
    -- failover and ...
459
    | c == b =
460
        if d == a
461
        then {- that's all -} ("f", [mig])
462
        else (printf "f r:%s" d, [mig, rep d])
463
    -- ... and keep primary as secondary
464
    | d == a =
465
        (printf "r:%s f" c, [rep c, mig])
466
    -- ... keep same secondary
467
    | d == b =
468
        (printf "f r:%s f" c, [mig, rep c, mig])
469
    -- nothing in common -
470
    | otherwise =
471
        (printf "r:%s f r:%s" c d, [rep c, mig, rep d])
472
    where mig = printf "migrate -f %s" i::String
473
          rep n = printf "replace-disks -n %s %s" n i
474

    
475
-- | Converts a placement to string format.
476
printSolutionLine :: Node.List     -- ^ The node list
477
                  -> Instance.List -- ^ The instance list
478
                  -> Int           -- ^ Maximum node name length
479
                  -> Int           -- ^ Maximum instance name length
480
                  -> Placement     -- ^ The current placement
481
                  -> Int           -- ^ The index of the placement in
482
                                   -- the solution
483
                  -> (String, [String])
484
printSolutionLine nl il nmlen imlen plc pos =
485
    let
486
        pmlen = (2*nmlen + 1)
487
        (i, p, s, c) = plc
488
        inst = Container.find i il
489
        inam = Instance.name inst
490
        npri = Container.nameOf nl p
491
        nsec = Container.nameOf nl s
492
        opri = Container.nameOf nl $ Instance.pnode inst
493
        osec = Container.nameOf nl $ Instance.snode inst
494
        (moves, cmds) =  computeMoves inam opri osec npri nsec
495
        ostr = printf "%s:%s" opri osec::String
496
        nstr = printf "%s:%s" npri nsec::String
497
    in
498
      (printf "  %3d. %-*s %-*s => %-*s %.8f a=%s"
499
       pos imlen inam pmlen ostr
500
       pmlen nstr c moves,
501
       cmds)
502

    
503
-- | Given a list of commands, prefix them with @gnt-instance@ and
504
-- also beautify the display a little.
505
formatCmds :: [[String]] -> String
506
formatCmds =
507
    unlines .
508
    concatMap (\(a, b) ->
509
               printf "echo step %d" (a::Int):
510
               printf "check":
511
               map ("gnt-instance " ++) b
512
              ) .
513
    zip [1..]
514

    
515
-- | Converts a solution to string format.
516
printSolution :: Node.List
517
              -> Instance.List
518
              -> [Placement]
519
              -> ([String], [[String]])
520
printSolution nl il sol =
521
    let
522
        nmlen = Container.maxNameLen nl
523
        imlen = Container.maxNameLen il
524
    in
525
      unzip $ zipWith (printSolutionLine nl il nmlen imlen) sol [1..]
526

    
527
-- | Print the node list.
528
printNodes :: Node.List -> String
529
printNodes nl =
530
    let snl = sortBy (compare `on` Node.idx) (Container.elems nl)
531
        m_name = maximum . map (length . Node.name) $ snl
532
        helper = Node.list m_name
533
        header = printf
534
                 "%2s %-*s %5s %5s %5s %5s %5s %5s %5s %5s %4s %4s \
535
                 \%3s %3s %6s %6s %5s"
536
                 " F" m_name "Name"
537
                 "t_mem" "n_mem" "i_mem" "x_mem" "f_mem" "r_mem"
538
                 "t_dsk" "f_dsk" "pcpu" "vcpu"
539
                 "pri" "sec" "p_fmem" "p_fdsk" "r_cpu"::String
540
    in unlines (header:map helper snl)
541

    
542
-- | Shows statistics for a given node list.
543
printStats :: Node.List -> String
544
printStats nl =
545
    let (mem_cv, dsk_cv, n1_score, res_cv, off_score, cpu_cv) =
546
            compDetailedCV nl
547
    in printf "f_mem=%.8f, r_mem=%.8f, f_dsk=%.8f, n1=%.3f, \
548
              \uf=%.3f, r_cpu=%.3f"
549
       mem_cv res_cv dsk_cv n1_score off_score cpu_cv