Statistics
| Branch: | Tag: | Revision:

root / Ganeti / HTools / Cluster.hs @ 3ce8009a

History | View | Annotate | Download (33.2 kB)

1
{-| Implementation of cluster-wide logic.
2

    
3
This module holds all pure cluster-logic; I\/O related functionality
4
goes into the "Main" module for the individual binaries.
5

    
6
-}
7

    
8
{-
9

    
10
Copyright (C) 2009 Google Inc.
11

    
12
This program is free software; you can redistribute it and/or modify
13
it under the terms of the GNU General Public License as published by
14
the Free Software Foundation; either version 2 of the License, or
15
(at your option) any later version.
16

    
17
This program is distributed in the hope that it will be useful, but
18
WITHOUT ANY WARRANTY; without even the implied warranty of
19
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
20
General Public License for more details.
21

    
22
You should have received a copy of the GNU General Public License
23
along with this program; if not, write to the Free Software
24
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
25
02110-1301, USA.
26

    
27
-}
28

    
29
module Ganeti.HTools.Cluster
30
    (
31
     -- * Types
32
      AllocSolution
33
    , Table(..)
34
    , CStats(..)
35
    , AllocStats
36
    -- * Generic functions
37
    , totalResources
38
    , computeAllocationDelta
39
    -- * First phase functions
40
    , computeBadItems
41
    -- * Second phase functions
42
    , printSolution
43
    , printSolutionLine
44
    , formatCmds
45
    , involvedNodes
46
    , splitJobs
47
    -- * Display functions
48
    , printNodes
49
    , printInsts
50
    -- * Balacing functions
51
    , checkMove
52
    , doNextBalance
53
    , tryBalance
54
    , compCV
55
    , printStats
56
    , iMoveToJob
57
    -- * IAllocator functions
58
    , tryAlloc
59
    , tryReloc
60
    , tryEvac
61
    , collapseFailures
62
    -- * Allocation functions
63
    , iterateAlloc
64
    , tieredAlloc
65
    ) where
66

    
67
import Data.List
68
import Data.Ord (comparing)
69
import Text.Printf (printf)
70
import Control.Monad
71

    
72
import qualified Ganeti.HTools.Container as Container
73
import qualified Ganeti.HTools.Instance as Instance
74
import qualified Ganeti.HTools.Node as Node
75
import Ganeti.HTools.Types
76
import Ganeti.HTools.Utils
77
import qualified Ganeti.OpCodes as OpCodes
78

    
79
-- * Types
80

    
81
-- | Allocation\/relocation solution.
82
type AllocSolution = ([FailMode], Int, [(Score, Node.AllocElement)])
83

    
84
-- | The complete state for the balancing solution
85
data Table = Table Node.List Instance.List Score [Placement]
86
             deriving (Show)
87

    
88
data CStats = CStats { csFmem :: Int    -- ^ Cluster free mem
89
                     , csFdsk :: Int    -- ^ Cluster free disk
90
                     , csAmem :: Int    -- ^ Cluster allocatable mem
91
                     , csAdsk :: Int    -- ^ Cluster allocatable disk
92
                     , csAcpu :: Int    -- ^ Cluster allocatable cpus
93
                     , csMmem :: Int    -- ^ Max node allocatable mem
94
                     , csMdsk :: Int    -- ^ Max node allocatable disk
95
                     , csMcpu :: Int    -- ^ Max node allocatable cpu
96
                     , csImem :: Int    -- ^ Instance used mem
97
                     , csIdsk :: Int    -- ^ Instance used disk
98
                     , csIcpu :: Int    -- ^ Instance used cpu
99
                     , csTmem :: Double -- ^ Cluster total mem
100
                     , csTdsk :: Double -- ^ Cluster total disk
101
                     , csTcpu :: Double -- ^ Cluster total cpus
102
                     , csVcpu :: Int    -- ^ Cluster virtual cpus (if
103
                                        -- node pCpu has been set,
104
                                        -- otherwise -1)
105
                     , csXmem :: Int    -- ^ Unnacounted for mem
106
                     , csNmem :: Int    -- ^ Node own memory
107
                     , csScore :: Score -- ^ The cluster score
108
                     , csNinst :: Int   -- ^ The total number of instances
109
                     }
110
            deriving (Show)
111

    
112
-- | Currently used, possibly to allocate, unallocable
113
type AllocStats = (RSpec, RSpec, RSpec)
114

    
115
-- * Utility functions
116

    
117
-- | Verifies the N+1 status and return the affected nodes.
118
verifyN1 :: [Node.Node] -> [Node.Node]
119
verifyN1 = filter Node.failN1
120

    
121
{-| Computes the pair of bad nodes and instances.
122

    
123
The bad node list is computed via a simple 'verifyN1' check, and the
124
bad instance list is the list of primary and secondary instances of
125
those nodes.
126

    
127
-}
128
computeBadItems :: Node.List -> Instance.List ->
129
                   ([Node.Node], [Instance.Instance])
130
computeBadItems nl il =
131
  let bad_nodes = verifyN1 $ getOnline nl
132
      bad_instances = map (`Container.find` il) .
133
                      sort . nub $
134
                      concatMap (\ n -> Node.sList n ++ Node.pList n) bad_nodes
135
  in
136
    (bad_nodes, bad_instances)
137

    
138
-- | Zero-initializer for the CStats type
139
emptyCStats :: CStats
140
emptyCStats = CStats 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
141

    
142
-- | Update stats with data from a new node
143
updateCStats :: CStats -> Node.Node -> CStats
144
updateCStats cs node =
145
    let CStats { csFmem = x_fmem, csFdsk = x_fdsk,
146
                 csAmem = x_amem, csAcpu = x_acpu, csAdsk = x_adsk,
147
                 csMmem = x_mmem, csMdsk = x_mdsk, csMcpu = x_mcpu,
148
                 csImem = x_imem, csIdsk = x_idsk, csIcpu = x_icpu,
149
                 csTmem = x_tmem, csTdsk = x_tdsk, csTcpu = x_tcpu,
150
                 csVcpu = x_vcpu,
151
                 csXmem = x_xmem, csNmem = x_nmem, csNinst = x_ninst
152
               }
153
            = cs
154
        inc_amem = Node.fMem node - Node.rMem node
155
        inc_amem' = if inc_amem > 0 then inc_amem else 0
156
        inc_adsk = Node.availDisk node
157
        inc_imem = truncate (Node.tMem node) - Node.nMem node
158
                   - Node.xMem node - Node.fMem node
159
        inc_icpu = Node.uCpu node
160
        inc_idsk = truncate (Node.tDsk node) - Node.fDsk node
161
        inc_vcpu = Node.hiCpu node
162

    
163
    in cs { csFmem = x_fmem + Node.fMem node
164
          , csFdsk = x_fdsk + Node.fDsk node
165
          , csAmem = x_amem + inc_amem'
166
          , csAdsk = x_adsk + inc_adsk
167
          , csAcpu = x_acpu
168
          , csMmem = max x_mmem inc_amem'
169
          , csMdsk = max x_mdsk inc_adsk
170
          , csMcpu = x_mcpu
171
          , csImem = x_imem + inc_imem
172
          , csIdsk = x_idsk + inc_idsk
173
          , csIcpu = x_icpu + inc_icpu
174
          , csTmem = x_tmem + Node.tMem node
175
          , csTdsk = x_tdsk + Node.tDsk node
176
          , csTcpu = x_tcpu + Node.tCpu node
177
          , csVcpu = x_vcpu + inc_vcpu
178
          , csXmem = x_xmem + Node.xMem node
179
          , csNmem = x_nmem + Node.nMem node
180
          , csNinst = x_ninst + length (Node.pList node)
181
          }
182

    
183
-- | Compute the total free disk and memory in the cluster.
184
totalResources :: Node.List -> CStats
185
totalResources nl =
186
    let cs = foldl' updateCStats emptyCStats . Container.elems $ nl
187
    in cs { csScore = compCV nl }
188

    
189
-- | Compute the delta between two cluster state.
190
--
191
-- This is used when doing allocations, to understand better the
192
-- available cluster resources. The return value is a triple of the
193
-- current used values, the delta that was still allocated, and what
194
-- was left unallocated.
195
computeAllocationDelta :: CStats -> CStats -> AllocStats
196
computeAllocationDelta cini cfin =
197
    let CStats {csImem = i_imem, csIdsk = i_idsk, csIcpu = i_icpu} = cini
198
        CStats {csImem = f_imem, csIdsk = f_idsk, csIcpu = f_icpu,
199
                csTmem = t_mem, csTdsk = t_dsk, csVcpu = v_cpu } = cfin
200
        rini = RSpec i_icpu i_imem i_idsk
201
        rfin = RSpec (f_icpu - i_icpu) (f_imem - i_imem) (f_idsk - i_idsk)
202
        un_cpu = v_cpu - f_icpu
203
        runa = RSpec un_cpu (truncate t_mem - f_imem) (truncate t_dsk - f_idsk)
204
    in (rini, rfin, runa)
205

    
206
-- | The names of the individual elements in the CV list
207
detailedCVNames :: [String]
208
detailedCVNames = [ "free_mem_cv"
209
                  , "free_disk_cv"
210
                  , "n1_cnt"
211
                  , "reserved_mem_cv"
212
                  , "offline_all_cnt"
213
                  , "offline_pri_cnt"
214
                  , "vcpu_ratio_cv"
215
                  , "cpu_load_cv"
216
                  , "mem_load_cv"
217
                  , "disk_load_cv"
218
                  , "net_load_cv"
219
                  , "pri_tags_score"
220
                  ]
221

    
222
-- | Compute the mem and disk covariance.
223
compDetailedCV :: Node.List -> [Double]
224
compDetailedCV nl =
225
    let
226
        all_nodes = Container.elems nl
227
        (offline, nodes) = partition Node.offline all_nodes
228
        mem_l = map Node.pMem nodes
229
        dsk_l = map Node.pDsk nodes
230
        -- metric: memory covariance
231
        mem_cv = varianceCoeff mem_l
232
        -- metric: disk covariance
233
        dsk_cv = varianceCoeff dsk_l
234
        n1_l = length $ filter Node.failN1 nodes
235
        -- metric: count of failN1 nodes
236
        n1_score = fromIntegral n1_l::Double
237
        res_l = map Node.pRem nodes
238
        -- metric: reserved memory covariance
239
        res_cv = varianceCoeff res_l
240
        -- offline instances metrics
241
        offline_ipri = sum . map (length . Node.pList) $ offline
242
        offline_isec = sum . map (length . Node.sList) $ offline
243
        -- metric: count of instances on offline nodes
244
        off_score = fromIntegral (offline_ipri + offline_isec)::Double
245
        -- metric: count of primary instances on offline nodes (this
246
        -- helps with evacuation/failover of primary instances on
247
        -- 2-node clusters with one node offline)
248
        off_pri_score = fromIntegral offline_ipri::Double
249
        cpu_l = map Node.pCpu nodes
250
        -- metric: covariance of vcpu/pcpu ratio
251
        cpu_cv = varianceCoeff cpu_l
252
        -- metrics: covariance of cpu, memory, disk and network load
253
        (c_load, m_load, d_load, n_load) = unzip4 $
254
            map (\n ->
255
                     let DynUtil c1 m1 d1 n1 = Node.utilLoad n
256
                         DynUtil c2 m2 d2 n2 = Node.utilPool n
257
                     in (c1/c2, m1/m2, d1/d2, n1/n2)
258
                ) nodes
259
        -- metric: conflicting instance count
260
        pri_tags_inst = sum $ map Node.conflictingPrimaries nodes
261
        pri_tags_score = fromIntegral pri_tags_inst::Double
262
    in [ mem_cv, dsk_cv, n1_score, res_cv, off_score, off_pri_score, cpu_cv
263
       , varianceCoeff c_load, varianceCoeff m_load
264
       , varianceCoeff d_load, varianceCoeff n_load
265
       , pri_tags_score ]
266

    
267
-- | Compute the /total/ variance.
268
compCV :: Node.List -> Double
269
compCV = sum . compDetailedCV
270

    
271
-- | Compute online nodes from a Node.List
272
getOnline :: Node.List -> [Node.Node]
273
getOnline = filter (not . Node.offline) . Container.elems
274

    
275
-- * hbal functions
276

    
277
-- | Compute best table. Note that the ordering of the arguments is important.
278
compareTables :: Table -> Table -> Table
279
compareTables a@(Table _ _ a_cv _) b@(Table _ _ b_cv _ ) =
280
    if a_cv > b_cv then b else a
281

    
282
-- | Applies an instance move to a given node list and instance.
283
applyMove :: Node.List -> Instance.Instance
284
          -> IMove -> OpResult (Node.List, Instance.Instance, Ndx, Ndx)
285
-- Failover (f)
286
applyMove nl inst Failover =
287
    let old_pdx = Instance.pNode inst
288
        old_sdx = Instance.sNode inst
289
        old_p = Container.find old_pdx nl
290
        old_s = Container.find old_sdx nl
291
        int_p = Node.removePri old_p inst
292
        int_s = Node.removeSec old_s inst
293
        new_nl = do -- Maybe monad
294
          new_p <- Node.addPri int_s inst
295
          new_s <- Node.addSec int_p inst old_sdx
296
          let new_inst = Instance.setBoth inst old_sdx old_pdx
297
          return (Container.addTwo old_pdx new_s old_sdx new_p nl,
298
                  new_inst, old_sdx, old_pdx)
299
    in new_nl
300

    
301
-- Replace the primary (f:, r:np, f)
302
applyMove nl inst (ReplacePrimary new_pdx) =
303
    let old_pdx = Instance.pNode inst
304
        old_sdx = Instance.sNode inst
305
        old_p = Container.find old_pdx nl
306
        old_s = Container.find old_sdx nl
307
        tgt_n = Container.find new_pdx nl
308
        int_p = Node.removePri old_p inst
309
        int_s = Node.removeSec old_s inst
310
        new_nl = do -- Maybe monad
311
          -- check that the current secondary can host the instance
312
          -- during the migration
313
          tmp_s <- Node.addPri int_s inst
314
          let tmp_s' = Node.removePri tmp_s inst
315
          new_p <- Node.addPri tgt_n inst
316
          new_s <- Node.addSec tmp_s' inst new_pdx
317
          let new_inst = Instance.setPri inst new_pdx
318
          return (Container.add new_pdx new_p $
319
                  Container.addTwo old_pdx int_p old_sdx new_s nl,
320
                  new_inst, new_pdx, old_sdx)
321
    in new_nl
322

    
323
-- Replace the secondary (r:ns)
324
applyMove nl inst (ReplaceSecondary new_sdx) =
325
    let old_pdx = Instance.pNode inst
326
        old_sdx = Instance.sNode inst
327
        old_s = Container.find old_sdx nl
328
        tgt_n = Container.find new_sdx nl
329
        int_s = Node.removeSec old_s inst
330
        new_inst = Instance.setSec inst new_sdx
331
        new_nl = Node.addSec tgt_n inst old_pdx >>=
332
                 \new_s -> return (Container.addTwo new_sdx
333
                                   new_s old_sdx int_s nl,
334
                                   new_inst, old_pdx, new_sdx)
335
    in new_nl
336

    
337
-- Replace the secondary and failover (r:np, f)
338
applyMove nl inst (ReplaceAndFailover new_pdx) =
339
    let old_pdx = Instance.pNode inst
340
        old_sdx = Instance.sNode inst
341
        old_p = Container.find old_pdx nl
342
        old_s = Container.find old_sdx nl
343
        tgt_n = Container.find new_pdx nl
344
        int_p = Node.removePri old_p inst
345
        int_s = Node.removeSec old_s inst
346
        new_nl = do -- Maybe monad
347
          new_p <- Node.addPri tgt_n inst
348
          new_s <- Node.addSec int_p inst new_pdx
349
          let new_inst = Instance.setBoth inst new_pdx old_pdx
350
          return (Container.add new_pdx new_p $
351
                  Container.addTwo old_pdx new_s old_sdx int_s nl,
352
                  new_inst, new_pdx, old_pdx)
353
    in new_nl
354

    
355
-- Failver and replace the secondary (f, r:ns)
356
applyMove nl inst (FailoverAndReplace new_sdx) =
357
    let old_pdx = Instance.pNode inst
358
        old_sdx = Instance.sNode inst
359
        old_p = Container.find old_pdx nl
360
        old_s = Container.find old_sdx nl
361
        tgt_n = Container.find new_sdx nl
362
        int_p = Node.removePri old_p inst
363
        int_s = Node.removeSec old_s inst
364
        new_nl = do -- Maybe monad
365
          new_p <- Node.addPri int_s inst
366
          new_s <- Node.addSec tgt_n inst old_sdx
367
          let new_inst = Instance.setBoth inst old_sdx new_sdx
368
          return (Container.add new_sdx new_s $
369
                  Container.addTwo old_sdx new_p old_pdx int_p nl,
370
                  new_inst, old_sdx, new_sdx)
371
    in new_nl
372

    
373
-- | Tries to allocate an instance on one given node.
374
allocateOnSingle :: Node.List -> Instance.Instance -> Node.Node
375
                 -> OpResult Node.AllocElement
376
allocateOnSingle nl inst p =
377
    let new_pdx = Node.idx p
378
        new_inst = Instance.setBoth inst new_pdx Node.noSecondary
379
        new_nl = Node.addPri p inst >>= \new_p ->
380
                 return (Container.add new_pdx new_p nl, new_inst, [new_p])
381
    in new_nl
382

    
383
-- | Tries to allocate an instance on a given pair of nodes.
384
allocateOnPair :: Node.List -> Instance.Instance -> Node.Node -> Node.Node
385
               -> OpResult Node.AllocElement
386
allocateOnPair nl inst tgt_p tgt_s =
387
    let new_pdx = Node.idx tgt_p
388
        new_sdx = Node.idx tgt_s
389
        new_nl = do -- Maybe monad
390
          new_p <- Node.addPri tgt_p inst
391
          new_s <- Node.addSec tgt_s inst new_pdx
392
          let new_inst = Instance.setBoth inst new_pdx new_sdx
393
          return (Container.addTwo new_pdx new_p new_sdx new_s nl, new_inst,
394
                 [new_p, new_s])
395
    in new_nl
396

    
397
-- | Tries to perform an instance move and returns the best table
398
-- between the original one and the new one.
399
checkSingleStep :: Table -- ^ The original table
400
                -> Instance.Instance -- ^ The instance to move
401
                -> Table -- ^ The current best table
402
                -> IMove -- ^ The move to apply
403
                -> Table -- ^ The final best table
404
checkSingleStep ini_tbl target cur_tbl move =
405
    let
406
        Table ini_nl ini_il _ ini_plc = ini_tbl
407
        tmp_resu = applyMove ini_nl target move
408
    in
409
      case tmp_resu of
410
        OpFail _ -> cur_tbl
411
        OpGood (upd_nl, new_inst, pri_idx, sec_idx) ->
412
            let tgt_idx = Instance.idx target
413
                upd_cvar = compCV upd_nl
414
                upd_il = Container.add tgt_idx new_inst ini_il
415
                upd_plc = (tgt_idx, pri_idx, sec_idx, move, upd_cvar):ini_plc
416
                upd_tbl = Table upd_nl upd_il upd_cvar upd_plc
417
            in
418
              compareTables cur_tbl upd_tbl
419

    
420
-- | Given the status of the current secondary as a valid new node and
421
-- the current candidate target node, generate the possible moves for
422
-- a instance.
423
possibleMoves :: Bool      -- ^ Whether the secondary node is a valid new node
424
              -> Ndx       -- ^ Target node candidate
425
              -> [IMove]   -- ^ List of valid result moves
426
possibleMoves True tdx =
427
    [ReplaceSecondary tdx,
428
     ReplaceAndFailover tdx,
429
     ReplacePrimary tdx,
430
     FailoverAndReplace tdx]
431

    
432
possibleMoves False tdx =
433
    [ReplaceSecondary tdx,
434
     ReplaceAndFailover tdx]
435

    
436
-- | Compute the best move for a given instance.
437
checkInstanceMove :: [Ndx]             -- ^ Allowed target node indices
438
                  -> Bool              -- ^ Whether disk moves are allowed
439
                  -> Table             -- ^ Original table
440
                  -> Instance.Instance -- ^ Instance to move
441
                  -> Table             -- ^ Best new table for this instance
442
checkInstanceMove nodes_idx disk_moves ini_tbl target =
443
    let
444
        opdx = Instance.pNode target
445
        osdx = Instance.sNode target
446
        nodes = filter (\idx -> idx /= opdx && idx /= osdx) nodes_idx
447
        use_secondary = elem osdx nodes_idx
448
        aft_failover = if use_secondary -- if allowed to failover
449
                       then checkSingleStep ini_tbl target ini_tbl Failover
450
                       else ini_tbl
451
        all_moves = if disk_moves
452
                    then concatMap (possibleMoves use_secondary) nodes
453
                    else []
454
    in
455
      -- iterate over the possible nodes for this instance
456
      foldl' (checkSingleStep ini_tbl target) aft_failover all_moves
457

    
458
-- | Compute the best next move.
459
checkMove :: [Ndx]               -- ^ Allowed target node indices
460
          -> Bool                -- ^ Whether disk moves are allowed
461
          -> Table               -- ^ The current solution
462
          -> [Instance.Instance] -- ^ List of instances still to move
463
          -> Table               -- ^ The new solution
464
checkMove nodes_idx disk_moves ini_tbl victims =
465
    let Table _ _ _ ini_plc = ini_tbl
466
        -- iterate over all instances, computing the best move
467
        best_tbl =
468
            foldl'
469
            (\ step_tbl em ->
470
                 compareTables step_tbl $
471
                 checkInstanceMove nodes_idx disk_moves ini_tbl em)
472
            ini_tbl victims
473
        Table _ _ _ best_plc = best_tbl
474
    in if length best_plc == length ini_plc
475
       then ini_tbl -- no advancement
476
       else best_tbl
477

    
478
-- | Check if we are allowed to go deeper in the balancing
479

    
480
doNextBalance :: Table       -- ^ The starting table
481
              -> Int         -- ^ Remaining length
482
              -> Score       -- ^ Score at which to stop
483
              -> Bool -- ^ The resulting table and commands
484
doNextBalance ini_tbl max_rounds min_score =
485
    let Table _ _ ini_cv ini_plc = ini_tbl
486
        ini_plc_len = length ini_plc
487
    in (max_rounds < 0 || ini_plc_len < max_rounds) && ini_cv > min_score
488

    
489
-- | Run a balance move
490

    
491
tryBalance :: Table       -- ^ The starting table
492
           -> Bool        -- ^ Allow disk moves
493
           -> Bool        -- ^ Only evacuate moves
494
           -> Maybe Table -- ^ The resulting table and commands
495
tryBalance ini_tbl disk_moves evac_mode =
496
    let Table ini_nl ini_il ini_cv _ = ini_tbl
497
        all_inst = Container.elems ini_il
498
        all_inst' = if evac_mode
499
                    then let bad_nodes = map Node.idx . filter Node.offline $
500
                                         Container.elems ini_nl
501
                         in filter (\e -> Instance.sNode e `elem` bad_nodes ||
502
                                          Instance.pNode e `elem` bad_nodes)
503
                            all_inst
504
                    else all_inst
505
        reloc_inst = filter Instance.movable all_inst'
506
        node_idx = map Node.idx . filter (not . Node.offline) $
507
                   Container.elems ini_nl
508
        fin_tbl = checkMove node_idx disk_moves ini_tbl reloc_inst
509
        (Table _ _ fin_cv _) = fin_tbl
510
    in
511
      if fin_cv < ini_cv
512
      then Just fin_tbl -- this round made success, return the new table
513
      else Nothing
514

    
515
-- * Allocation functions
516

    
517
-- | Build failure stats out of a list of failures
518
collapseFailures :: [FailMode] -> FailStats
519
collapseFailures flst =
520
    map (\k -> (k, length $ filter (k ==) flst)) [minBound..maxBound]
521

    
522
-- | Update current Allocation solution and failure stats with new
523
-- elements
524
concatAllocs :: AllocSolution -> OpResult Node.AllocElement -> AllocSolution
525
concatAllocs (flst, cntok, sols) (OpFail reason) = (reason:flst, cntok, sols)
526

    
527
concatAllocs (flst, cntok, osols) (OpGood ns@(nl, _, _)) =
528
    let nscore = compCV nl
529
        -- Choose the old or new solution, based on the cluster score
530
        nsols = case osols of
531
                  [] -> [(nscore, ns)]
532
                  (oscore, _):[] ->
533
                      if oscore < nscore
534
                      then osols
535
                      else [(nscore, ns)]
536
                  -- FIXME: here we simply concat to lists with more
537
                  -- than one element; we should instead abort, since
538
                  -- this is not a valid usage of this function
539
                  xs -> (nscore, ns):xs
540
        nsuc = cntok + 1
541
    -- Note: we force evaluation of nsols here in order to keep the
542
    -- memory profile low - we know that we will need nsols for sure
543
    -- in the next cycle, so we force evaluation of nsols, since the
544
    -- foldl' in the caller will only evaluate the tuple, but not the
545
    -- elements of the tuple
546
    in nsols `seq` nsuc `seq` (flst, nsuc, nsols)
547

    
548
-- | Try to allocate an instance on the cluster.
549
tryAlloc :: (Monad m) =>
550
            Node.List         -- ^ The node list
551
         -> Instance.List     -- ^ The instance list
552
         -> Instance.Instance -- ^ The instance to allocate
553
         -> Int               -- ^ Required number of nodes
554
         -> m AllocSolution   -- ^ Possible solution list
555
tryAlloc nl _ inst 2 =
556
    let all_nodes = getOnline nl
557
        all_pairs = liftM2 (,) all_nodes all_nodes
558
        ok_pairs = filter (\(x, y) -> Node.idx x /= Node.idx y) all_pairs
559
        sols = foldl' (\cstate (p, s) ->
560
                           concatAllocs cstate $ allocateOnPair nl inst p s
561
                      ) ([], 0, []) ok_pairs
562
    in return sols
563

    
564
tryAlloc nl _ inst 1 =
565
    let all_nodes = getOnline nl
566
        sols = foldl' (\cstate ->
567
                           concatAllocs cstate . allocateOnSingle nl inst
568
                      ) ([], 0, []) all_nodes
569
    in return sols
570

    
571
tryAlloc _ _ _ reqn = fail $ "Unsupported number of allocation \
572
                             \destinations required (" ++ show reqn ++
573
                                               "), only two supported"
574

    
575
-- | Try to allocate an instance on the cluster.
576
tryReloc :: (Monad m) =>
577
            Node.List       -- ^ The node list
578
         -> Instance.List   -- ^ The instance list
579
         -> Idx             -- ^ The index of the instance to move
580
         -> Int             -- ^ The number of nodes required
581
         -> [Ndx]           -- ^ Nodes which should not be used
582
         -> m AllocSolution -- ^ Solution list
583
tryReloc nl il xid 1 ex_idx =
584
    let all_nodes = getOnline nl
585
        inst = Container.find xid il
586
        ex_idx' = Instance.pNode inst:ex_idx
587
        valid_nodes = filter (not . flip elem ex_idx' . Node.idx) all_nodes
588
        valid_idxes = map Node.idx valid_nodes
589
        sols1 = foldl' (\cstate x ->
590
                            let em = do
591
                                  (mnl, i, _, _) <-
592
                                      applyMove nl inst (ReplaceSecondary x)
593
                                  return (mnl, i, [Container.find x mnl])
594
                            in concatAllocs cstate em
595
                       ) ([], 0, []) valid_idxes
596
    in return sols1
597

    
598
tryReloc _ _ _ reqn _  = fail $ "Unsupported number of relocation \
599
                                \destinations required (" ++ show reqn ++
600
                                                  "), only one supported"
601

    
602
-- | Try to allocate an instance on the cluster.
603
tryEvac :: (Monad m) =>
604
            Node.List       -- ^ The node list
605
         -> Instance.List   -- ^ The instance list
606
         -> [Ndx]           -- ^ Nodes to be evacuated
607
         -> m AllocSolution -- ^ Solution list
608
tryEvac nl il ex_ndx =
609
    let ex_nodes = map (`Container.find` nl) ex_ndx
610
        all_insts = nub . concatMap Node.sList $ ex_nodes
611
    in do
612
      (_, sol) <- foldM (\(nl', (_, _, rsols)) idx -> do
613
                           -- FIXME: hardcoded one node here
614
                           (fm, cs, aes) <- tryReloc nl' il idx 1 ex_ndx
615
                           case aes of
616
                             csol@(_, (nl'', _, _)):_ ->
617
                                 return (nl'', (fm, cs, csol:rsols))
618
                             _ -> fail $ "Can't evacuate instance " ++
619
                                  show idx
620
                        ) (nl, ([], 0, [])) all_insts
621
      return sol
622

    
623
-- | Recursively place instances on the cluster until we're out of space
624
iterateAlloc :: Node.List
625
             -> Instance.List
626
             -> Instance.Instance
627
             -> Int
628
             -> [Instance.Instance]
629
             -> Result (FailStats, Node.List, [Instance.Instance])
630
iterateAlloc nl il newinst nreq ixes =
631
      let depth = length ixes
632
          newname = printf "new-%d" depth::String
633
          newidx = length (Container.elems il) + depth
634
          newi2 = Instance.setIdx (Instance.setName newinst newname) newidx
635
      in case tryAlloc nl il newi2 nreq of
636
           Bad s -> Bad s
637
           Ok (errs, _, sols3) ->
638
               case sols3 of
639
                 [] -> Ok (collapseFailures errs, nl, ixes)
640
                 (_, (xnl, xi, _)):[] ->
641
                     iterateAlloc xnl il newinst nreq $! (xi:ixes)
642
                 _ -> Bad "Internal error: multiple solutions for single\
643
                          \ allocation"
644

    
645
tieredAlloc :: Node.List
646
            -> Instance.List
647
            -> Instance.Instance
648
            -> Int
649
            -> [Instance.Instance]
650
            -> Result (FailStats, Node.List, [Instance.Instance])
651
tieredAlloc nl il newinst nreq ixes =
652
    case iterateAlloc nl il newinst nreq ixes of
653
      Bad s -> Bad s
654
      Ok (errs, nl', ixes') ->
655
          case Instance.shrinkByType newinst . fst . last $
656
               sortBy (comparing snd) errs of
657
            Bad _ -> Ok (errs, nl', ixes')
658
            Ok newinst' ->
659
                tieredAlloc nl' il newinst' nreq ixes'
660

    
661
-- * Formatting functions
662

    
663
-- | Given the original and final nodes, computes the relocation description.
664
computeMoves :: Instance.Instance -- ^ The instance to be moved
665
             -> String -- ^ The instance name
666
             -> IMove  -- ^ The move being performed
667
             -> String -- ^ New primary
668
             -> String -- ^ New secondary
669
             -> (String, [String])
670
                -- ^ Tuple of moves and commands list; moves is containing
671
                -- either @/f/@ for failover or @/r:name/@ for replace
672
                -- secondary, while the command list holds gnt-instance
673
                -- commands (without that prefix), e.g \"@failover instance1@\"
674
computeMoves i inam mv c d =
675
    case mv of
676
      Failover -> ("f", [mig])
677
      FailoverAndReplace _ -> (printf "f r:%s" d, [mig, rep d])
678
      ReplaceSecondary _ -> (printf "r:%s" d, [rep d])
679
      ReplaceAndFailover _ -> (printf "r:%s f" c, [rep c, mig])
680
      ReplacePrimary _ -> (printf "f r:%s f" c, [mig, rep c, mig])
681
    where morf = if Instance.running i then "migrate" else "failover"
682
          mig = printf "%s -f %s" morf inam::String
683
          rep n = printf "replace-disks -n %s %s" n inam
684

    
685
-- | Converts a placement to string format.
686
printSolutionLine :: Node.List     -- ^ The node list
687
                  -> Instance.List -- ^ The instance list
688
                  -> Int           -- ^ Maximum node name length
689
                  -> Int           -- ^ Maximum instance name length
690
                  -> Placement     -- ^ The current placement
691
                  -> Int           -- ^ The index of the placement in
692
                                   -- the solution
693
                  -> (String, [String])
694
printSolutionLine nl il nmlen imlen plc pos =
695
    let
696
        pmlen = (2*nmlen + 1)
697
        (i, p, s, mv, c) = plc
698
        inst = Container.find i il
699
        inam = Instance.name inst
700
        npri = Container.nameOf nl p
701
        nsec = Container.nameOf nl s
702
        opri = Container.nameOf nl $ Instance.pNode inst
703
        osec = Container.nameOf nl $ Instance.sNode inst
704
        (moves, cmds) =  computeMoves inst inam mv npri nsec
705
        ostr = printf "%s:%s" opri osec::String
706
        nstr = printf "%s:%s" npri nsec::String
707
    in
708
      (printf "  %3d. %-*s %-*s => %-*s %.8f a=%s"
709
       pos imlen inam pmlen ostr
710
       pmlen nstr c moves,
711
       cmds)
712

    
713
-- | Return the instance and involved nodes in an instance move.
714
involvedNodes :: Instance.List -> Placement -> [Ndx]
715
involvedNodes il plc =
716
    let (i, np, ns, _, _) = plc
717
        inst = Container.find i il
718
        op = Instance.pNode inst
719
        os = Instance.sNode inst
720
    in nub [np, ns, op, os]
721

    
722
-- | Inner function for splitJobs, that either appends the next job to
723
-- the current jobset, or starts a new jobset.
724
mergeJobs :: ([JobSet], [Ndx]) -> MoveJob -> ([JobSet], [Ndx])
725
mergeJobs ([], _) n@(ndx, _, _, _) = ([[n]], ndx)
726
mergeJobs (cjs@(j:js), nbuf) n@(ndx, _, _, _)
727
    | null (ndx `intersect` nbuf) = ((n:j):js, ndx ++ nbuf)
728
    | otherwise = ([n]:cjs, ndx)
729

    
730
-- | Break a list of moves into independent groups. Note that this
731
-- will reverse the order of jobs.
732
splitJobs :: [MoveJob] -> [JobSet]
733
splitJobs = fst . foldl mergeJobs ([], [])
734

    
735
-- | Given a list of commands, prefix them with @gnt-instance@ and
736
-- also beautify the display a little.
737
formatJob :: Int -> Int -> (Int, MoveJob) -> [String]
738
formatJob jsn jsl (sn, (_, _, _, cmds)) =
739
    let out =
740
            printf "  echo job %d/%d" jsn sn:
741
            printf "  check":
742
            map ("  gnt-instance " ++) cmds
743
    in if sn == 1
744
       then ["", printf "echo jobset %d, %d jobs" jsn jsl] ++ out
745
       else out
746

    
747
-- | Given a list of commands, prefix them with @gnt-instance@ and
748
-- also beautify the display a little.
749
formatCmds :: [JobSet] -> String
750
formatCmds =
751
    unlines .
752
    concatMap (\(jsn, js) -> concatMap (formatJob jsn (length js))
753
                             (zip [1..] js)) .
754
    zip [1..]
755

    
756
-- | Converts a solution to string format.
757
printSolution :: Node.List
758
              -> Instance.List
759
              -> [Placement]
760
              -> ([String], [[String]])
761
printSolution nl il sol =
762
    let
763
        nmlen = Container.maxNameLen nl
764
        imlen = Container.maxNameLen il
765
    in
766
      unzip $ zipWith (printSolutionLine nl il nmlen imlen) sol [1..]
767

    
768
-- | Print the node list.
769
printNodes :: Node.List -> [String] -> String
770
printNodes nl fs =
771
    let fields = if null fs
772
                 then Node.defaultFields
773
                 else fs
774
        snl = sortBy (comparing Node.idx) (Container.elems nl)
775
        (header, isnum) = unzip $ map Node.showHeader fields
776
    in unlines . map ((:) ' ' .  intercalate " ") $
777
       formatTable (header:map (Node.list fields) snl) isnum
778

    
779
-- | Print the instance list.
780
printInsts :: Node.List -> Instance.List -> String
781
printInsts nl il =
782
    let sil = sortBy (comparing Instance.idx) (Container.elems il)
783
        helper inst = [ if Instance.running inst then "R" else " "
784
                      , Instance.name inst
785
                      , Container.nameOf nl (Instance.pNode inst)
786
                      , let sdx = Instance.sNode inst
787
                        in if sdx == Node.noSecondary
788
                           then  ""
789
                           else Container.nameOf nl sdx
790
                      , printf "%3d" $ Instance.vcpus inst
791
                      , printf "%5d" $ Instance.mem inst
792
                      , printf "%5d" $ Instance.dsk inst `div` 1024
793
                      , printf "%5.3f" lC
794
                      , printf "%5.3f" lM
795
                      , printf "%5.3f" lD
796
                      , printf "%5.3f" lN
797
                      ]
798
            where DynUtil lC lM lD lN = Instance.util inst
799
        header = [ "F", "Name", "Pri_node", "Sec_node", "vcpu", "mem"
800
                 , "dsk", "lCpu", "lMem", "lDsk", "lNet" ]
801
        isnum = False:False:False:False:repeat True
802
    in unlines . map ((:) ' ' . intercalate " ") $
803
       formatTable (header:map helper sil) isnum
804

    
805
-- | Shows statistics for a given node list.
806
printStats :: Node.List -> String
807
printStats nl =
808
    let dcvs = compDetailedCV nl
809
        hd = zip (detailedCVNames ++ repeat "unknown") dcvs
810
        formatted = map (\(header, val) ->
811
                             printf "%s=%.8f" header val::String) hd
812
    in intercalate ", " formatted
813

    
814
-- | Convert a placement into a list of OpCodes (basically a job).
815
iMoveToJob :: Node.List -> Instance.List
816
          -> Idx -> IMove -> [OpCodes.OpCode]
817
iMoveToJob nl il idx move =
818
    let inst = Container.find idx il
819
        iname = Instance.name inst
820
        lookNode  = Just . Container.nameOf nl
821
        opF = if Instance.running inst
822
              then OpCodes.OpMigrateInstance iname True False
823
              else OpCodes.OpFailoverInstance iname False
824
        opR n = OpCodes.OpReplaceDisks iname (lookNode n)
825
                OpCodes.ReplaceNewSecondary [] Nothing
826
    in case move of
827
         Failover -> [ opF ]
828
         ReplacePrimary np -> [ opF, opR np, opF ]
829
         ReplaceSecondary ns -> [ opR ns ]
830
         ReplaceAndFailover np -> [ opR np, opF ]
831
         FailoverAndReplace ns -> [ opF, opR ns ]