Statistics
| Branch: | Tag: | Revision:

root / htools / Ganeti / HTools / Cluster.hs @ a3d1dc0a

History | View | Annotate | Download (64.3 kB)

1
{-| Implementation of cluster-wide logic.
2

    
3
This module holds all pure cluster-logic; I\/O related functionality
4
goes into the /Main/ module for the individual binaries.
5

    
6
-}
7

    
8
{-
9

    
10
Copyright (C) 2009, 2010, 2011, 2012 Google Inc.
11

    
12
This program is free software; you can redistribute it and/or modify
13
it under the terms of the GNU General Public License as published by
14
the Free Software Foundation; either version 2 of the License, or
15
(at your option) any later version.
16

    
17
This program is distributed in the hope that it will be useful, but
18
WITHOUT ANY WARRANTY; without even the implied warranty of
19
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
20
General Public License for more details.
21

    
22
You should have received a copy of the GNU General Public License
23
along with this program; if not, write to the Free Software
24
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
25
02110-1301, USA.
26

    
27
-}
28

    
29
module Ganeti.HTools.Cluster
30
  (
31
    -- * Types
32
    AllocSolution(..)
33
  , EvacSolution(..)
34
  , Table(..)
35
  , CStats(..)
36
  , AllocResult
37
  , AllocMethod
38
  -- * Generic functions
39
  , totalResources
40
  , computeAllocationDelta
41
  -- * First phase functions
42
  , computeBadItems
43
  -- * Second phase functions
44
  , printSolutionLine
45
  , formatCmds
46
  , involvedNodes
47
  , splitJobs
48
  -- * Display functions
49
  , printNodes
50
  , printInsts
51
  -- * Balacing functions
52
  , checkMove
53
  , doNextBalance
54
  , tryBalance
55
  , compCV
56
  , compCVNodes
57
  , compDetailedCV
58
  , printStats
59
  , iMoveToJob
60
  -- * IAllocator functions
61
  , genAllocNodes
62
  , tryAlloc
63
  , tryMGAlloc
64
  , tryNodeEvac
65
  , tryChangeGroup
66
  , collapseFailures
67
  -- * Allocation functions
68
  , iterateAlloc
69
  , tieredAlloc
70
  -- * Node group functions
71
  , instanceGroup
72
  , findSplitInstances
73
  , splitCluster
74
  ) where
75

    
76
import qualified Data.IntSet as IntSet
77
import Data.List
78
import Data.Maybe (fromJust, isNothing)
79
import Data.Ord (comparing)
80
import Text.Printf (printf)
81

    
82
import qualified Ganeti.HTools.Container as Container
83
import qualified Ganeti.HTools.Instance as Instance
84
import qualified Ganeti.HTools.Node as Node
85
import qualified Ganeti.HTools.Group as Group
86
import Ganeti.HTools.Types
87
import Ganeti.HTools.Utils
88
import Ganeti.HTools.Compat
89
import qualified Ganeti.OpCodes as OpCodes
90

    
91
-- * Types
92

    
93
-- | Allocation\/relocation solution.
94
data AllocSolution = AllocSolution
95
  { asFailures :: [FailMode]              -- ^ Failure counts
96
  , asAllocs   :: Int                     -- ^ Good allocation count
97
  , asSolution :: Maybe Node.AllocElement -- ^ The actual allocation result
98
  , asLog      :: [String]                -- ^ Informational messages
99
  }
100

    
101
-- | Node evacuation/group change iallocator result type. This result
102
-- type consists of actual opcodes (a restricted subset) that are
103
-- transmitted back to Ganeti.
104
data EvacSolution = EvacSolution
105
  { esMoved   :: [(Idx, Gdx, [Ndx])]  -- ^ Instances moved successfully
106
  , esFailed  :: [(Idx, String)]      -- ^ Instances which were not
107
                                      -- relocated
108
  , esOpCodes :: [[OpCodes.OpCode]]   -- ^ List of jobs
109
  } deriving (Show)
110

    
111
-- | Allocation results, as used in 'iterateAlloc' and 'tieredAlloc'.
112
type AllocResult = (FailStats, Node.List, Instance.List,
113
                    [Instance.Instance], [CStats])
114

    
115
-- | A type denoting the valid allocation mode/pairs.
116
--
117
-- For a one-node allocation, this will be a @Left ['Ndx']@, whereas
118
-- for a two-node allocation, this will be a @Right [('Ndx',
119
-- ['Ndx'])]@. In the latter case, the list is basically an
120
-- association list, grouped by primary node and holding the potential
121
-- secondary nodes in the sub-list.
122
type AllocNodes = Either [Ndx] [(Ndx, [Ndx])]
123

    
124
-- | The empty solution we start with when computing allocations.
125
emptyAllocSolution :: AllocSolution
126
emptyAllocSolution = AllocSolution { asFailures = [], asAllocs = 0
127
                                   , asSolution = Nothing, asLog = [] }
128

    
129
-- | The empty evac solution.
130
emptyEvacSolution :: EvacSolution
131
emptyEvacSolution = EvacSolution { esMoved = []
132
                                 , esFailed = []
133
                                 , esOpCodes = []
134
                                 }
135

    
136
-- | The complete state for the balancing solution.
137
data Table = Table Node.List Instance.List Score [Placement]
138
             deriving (Show, Read)
139

    
140
-- | Cluster statistics data type.
141
data CStats = CStats
142
  { csFmem :: Integer -- ^ Cluster free mem
143
  , csFdsk :: Integer -- ^ Cluster free disk
144
  , csAmem :: Integer -- ^ Cluster allocatable mem
145
  , csAdsk :: Integer -- ^ Cluster allocatable disk
146
  , csAcpu :: Integer -- ^ Cluster allocatable cpus
147
  , csMmem :: Integer -- ^ Max node allocatable mem
148
  , csMdsk :: Integer -- ^ Max node allocatable disk
149
  , csMcpu :: Integer -- ^ Max node allocatable cpu
150
  , csImem :: Integer -- ^ Instance used mem
151
  , csIdsk :: Integer -- ^ Instance used disk
152
  , csIcpu :: Integer -- ^ Instance used cpu
153
  , csTmem :: Double  -- ^ Cluster total mem
154
  , csTdsk :: Double  -- ^ Cluster total disk
155
  , csTcpu :: Double  -- ^ Cluster total cpus
156
  , csVcpu :: Integer -- ^ Cluster total virtual cpus
157
  , csNcpu :: Double  -- ^ Equivalent to 'csIcpu' but in terms of
158
                      -- physical CPUs, i.e. normalised used phys CPUs
159
  , csXmem :: Integer -- ^ Unnacounted for mem
160
  , csNmem :: Integer -- ^ Node own memory
161
  , csScore :: Score  -- ^ The cluster score
162
  , csNinst :: Int    -- ^ The total number of instances
163
  } deriving (Show, Read)
164

    
165
-- | A simple type for allocation functions.
166
type AllocMethod =  Node.List           -- ^ Node list
167
                 -> Instance.List       -- ^ Instance list
168
                 -> Maybe Int           -- ^ Optional allocation limit
169
                 -> Instance.Instance   -- ^ Instance spec for allocation
170
                 -> AllocNodes          -- ^ Which nodes we should allocate on
171
                 -> [Instance.Instance] -- ^ Allocated instances
172
                 -> [CStats]            -- ^ Running cluster stats
173
                 -> Result AllocResult  -- ^ Allocation result
174

    
175
-- | A simple type for the running solution of evacuations.
176
type EvacInnerState =
177
  Either String (Node.List, Instance.Instance, Score, Ndx)
178

    
179
-- * Utility functions
180

    
181
-- | Verifies the N+1 status and return the affected nodes.
182
verifyN1 :: [Node.Node] -> [Node.Node]
183
verifyN1 = filter Node.failN1
184

    
185
{-| Computes the pair of bad nodes and instances.
186

    
187
The bad node list is computed via a simple 'verifyN1' check, and the
188
bad instance list is the list of primary and secondary instances of
189
those nodes.
190

    
191
-}
192
computeBadItems :: Node.List -> Instance.List ->
193
                   ([Node.Node], [Instance.Instance])
194
computeBadItems nl il =
195
  let bad_nodes = verifyN1 $ getOnline nl
196
      bad_instances = map (`Container.find` il) .
197
                      sort . nub $
198
                      concatMap (\ n -> Node.sList n ++ Node.pList n) bad_nodes
199
  in
200
    (bad_nodes, bad_instances)
201

    
202
-- | Extracts the node pairs for an instance. This can fail if the
203
-- instance is single-homed. FIXME: this needs to be improved,
204
-- together with the general enhancement for handling non-DRBD moves.
205
instanceNodes :: Node.List -> Instance.Instance ->
206
                 (Ndx, Ndx, Node.Node, Node.Node)
207
instanceNodes nl inst =
208
  let old_pdx = Instance.pNode inst
209
      old_sdx = Instance.sNode inst
210
      old_p = Container.find old_pdx nl
211
      old_s = Container.find old_sdx nl
212
  in (old_pdx, old_sdx, old_p, old_s)
213

    
214
-- | Zero-initializer for the CStats type.
215
emptyCStats :: CStats
216
emptyCStats = CStats 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
217

    
218
-- | Update stats with data from a new node.
219
updateCStats :: CStats -> Node.Node -> CStats
220
updateCStats cs node =
221
  let CStats { csFmem = x_fmem, csFdsk = x_fdsk,
222
               csAmem = x_amem, csAcpu = x_acpu, csAdsk = x_adsk,
223
               csMmem = x_mmem, csMdsk = x_mdsk, csMcpu = x_mcpu,
224
               csImem = x_imem, csIdsk = x_idsk, csIcpu = x_icpu,
225
               csTmem = x_tmem, csTdsk = x_tdsk, csTcpu = x_tcpu,
226
               csVcpu = x_vcpu, csNcpu = x_ncpu,
227
               csXmem = x_xmem, csNmem = x_nmem, csNinst = x_ninst
228
             }
229
        = cs
230
      inc_amem = Node.fMem node - Node.rMem node
231
      inc_amem' = if inc_amem > 0 then inc_amem else 0
232
      inc_adsk = Node.availDisk node
233
      inc_imem = truncate (Node.tMem node) - Node.nMem node
234
                 - Node.xMem node - Node.fMem node
235
      inc_icpu = Node.uCpu node
236
      inc_idsk = truncate (Node.tDsk node) - Node.fDsk node
237
      inc_vcpu = Node.hiCpu node
238
      inc_acpu = Node.availCpu node
239
      inc_ncpu = fromIntegral (Node.uCpu node) /
240
                 iPolicyVcpuRatio (Node.iPolicy node)
241
  in cs { csFmem = x_fmem + fromIntegral (Node.fMem node)
242
        , csFdsk = x_fdsk + fromIntegral (Node.fDsk node)
243
        , csAmem = x_amem + fromIntegral inc_amem'
244
        , csAdsk = x_adsk + fromIntegral inc_adsk
245
        , csAcpu = x_acpu + fromIntegral inc_acpu
246
        , csMmem = max x_mmem (fromIntegral inc_amem')
247
        , csMdsk = max x_mdsk (fromIntegral inc_adsk)
248
        , csMcpu = max x_mcpu (fromIntegral inc_acpu)
249
        , csImem = x_imem + fromIntegral inc_imem
250
        , csIdsk = x_idsk + fromIntegral inc_idsk
251
        , csIcpu = x_icpu + fromIntegral inc_icpu
252
        , csTmem = x_tmem + Node.tMem node
253
        , csTdsk = x_tdsk + Node.tDsk node
254
        , csTcpu = x_tcpu + Node.tCpu node
255
        , csVcpu = x_vcpu + fromIntegral inc_vcpu
256
        , csNcpu = x_ncpu + inc_ncpu
257
        , csXmem = x_xmem + fromIntegral (Node.xMem node)
258
        , csNmem = x_nmem + fromIntegral (Node.nMem node)
259
        , csNinst = x_ninst + length (Node.pList node)
260
        }
261

    
262
-- | Compute the total free disk and memory in the cluster.
263
totalResources :: Node.List -> CStats
264
totalResources nl =
265
  let cs = foldl' updateCStats emptyCStats . Container.elems $ nl
266
  in cs { csScore = compCV nl }
267

    
268
-- | Compute the delta between two cluster state.
269
--
270
-- This is used when doing allocations, to understand better the
271
-- available cluster resources. The return value is a triple of the
272
-- current used values, the delta that was still allocated, and what
273
-- was left unallocated.
274
computeAllocationDelta :: CStats -> CStats -> AllocStats
275
computeAllocationDelta cini cfin =
276
  let CStats {csImem = i_imem, csIdsk = i_idsk, csIcpu = i_icpu,
277
              csNcpu = i_ncpu } = cini
278
      CStats {csImem = f_imem, csIdsk = f_idsk, csIcpu = f_icpu,
279
              csTmem = t_mem, csTdsk = t_dsk, csVcpu = f_vcpu,
280
              csNcpu = f_ncpu, csTcpu = f_tcpu } = cfin
281
      rini = AllocInfo { allocInfoVCpus = fromIntegral i_icpu
282
                       , allocInfoNCpus = i_ncpu
283
                       , allocInfoMem   = fromIntegral i_imem
284
                       , allocInfoDisk  = fromIntegral i_idsk
285
                       }
286
      rfin = AllocInfo { allocInfoVCpus = fromIntegral (f_icpu - i_icpu)
287
                       , allocInfoNCpus = f_ncpu - i_ncpu
288
                       , allocInfoMem   = fromIntegral (f_imem - i_imem)
289
                       , allocInfoDisk  = fromIntegral (f_idsk - i_idsk)
290
                       }
291
      runa = AllocInfo { allocInfoVCpus = fromIntegral (f_vcpu - f_icpu)
292
                       , allocInfoNCpus = f_tcpu - f_ncpu
293
                       , allocInfoMem   = truncate t_mem - fromIntegral f_imem
294
                       , allocInfoDisk  = truncate t_dsk - fromIntegral f_idsk
295
                       }
296
  in (rini, rfin, runa)
297

    
298
-- | The names and weights of the individual elements in the CV list.
299
detailedCVInfo :: [(Double, String)]
300
detailedCVInfo = [ (1,  "free_mem_cv")
301
                 , (1,  "free_disk_cv")
302
                 , (1,  "n1_cnt")
303
                 , (1,  "reserved_mem_cv")
304
                 , (4,  "offline_all_cnt")
305
                 , (16, "offline_pri_cnt")
306
                 , (1,  "vcpu_ratio_cv")
307
                 , (1,  "cpu_load_cv")
308
                 , (1,  "mem_load_cv")
309
                 , (1,  "disk_load_cv")
310
                 , (1,  "net_load_cv")
311
                 , (2,  "pri_tags_score")
312
                 , (1,  "spindles_cv")
313
                 ]
314

    
315
-- | Holds the weights used by 'compCVNodes' for each metric.
316
detailedCVWeights :: [Double]
317
detailedCVWeights = map fst detailedCVInfo
318

    
319
-- | Compute the mem and disk covariance.
320
compDetailedCV :: [Node.Node] -> [Double]
321
compDetailedCV all_nodes =
322
  let (offline, nodes) = partition Node.offline all_nodes
323
      mem_l = map Node.pMem nodes
324
      dsk_l = map Node.pDsk nodes
325
      -- metric: memory covariance
326
      mem_cv = stdDev mem_l
327
      -- metric: disk covariance
328
      dsk_cv = stdDev dsk_l
329
      -- metric: count of instances living on N1 failing nodes
330
      n1_score = fromIntegral . sum . map (\n -> length (Node.sList n) +
331
                                                 length (Node.pList n)) .
332
                 filter Node.failN1 $ nodes :: Double
333
      res_l = map Node.pRem nodes
334
      -- metric: reserved memory covariance
335
      res_cv = stdDev res_l
336
      -- offline instances metrics
337
      offline_ipri = sum . map (length . Node.pList) $ offline
338
      offline_isec = sum . map (length . Node.sList) $ offline
339
      -- metric: count of instances on offline nodes
340
      off_score = fromIntegral (offline_ipri + offline_isec)::Double
341
      -- metric: count of primary instances on offline nodes (this
342
      -- helps with evacuation/failover of primary instances on
343
      -- 2-node clusters with one node offline)
344
      off_pri_score = fromIntegral offline_ipri::Double
345
      cpu_l = map Node.pCpu nodes
346
      -- metric: covariance of vcpu/pcpu ratio
347
      cpu_cv = stdDev cpu_l
348
      -- metrics: covariance of cpu, memory, disk and network load
349
      (c_load, m_load, d_load, n_load) =
350
        unzip4 $ map (\n ->
351
                      let DynUtil c1 m1 d1 n1 = Node.utilLoad n
352
                          DynUtil c2 m2 d2 n2 = Node.utilPool n
353
                      in (c1/c2, m1/m2, d1/d2, n1/n2)) nodes
354
      -- metric: conflicting instance count
355
      pri_tags_inst = sum $ map Node.conflictingPrimaries nodes
356
      pri_tags_score = fromIntegral pri_tags_inst::Double
357
      -- metric: spindles %
358
      spindles_cv = map (\n -> Node.instSpindles n / Node.hiSpindles n) nodes
359
  in [ mem_cv, dsk_cv, n1_score, res_cv, off_score, off_pri_score, cpu_cv
360
     , stdDev c_load, stdDev m_load , stdDev d_load, stdDev n_load
361
     , pri_tags_score, stdDev spindles_cv ]
362

    
363
-- | Compute the /total/ variance.
364
compCVNodes :: [Node.Node] -> Double
365
compCVNodes = sum . zipWith (*) detailedCVWeights . compDetailedCV
366

    
367
-- | Wrapper over 'compCVNodes' for callers that have a 'Node.List'.
368
compCV :: Node.List -> Double
369
compCV = compCVNodes . Container.elems
370

    
371
-- | Compute online nodes from a 'Node.List'.
372
getOnline :: Node.List -> [Node.Node]
373
getOnline = filter (not . Node.offline) . Container.elems
374

    
375
-- * Balancing functions
376

    
377
-- | Compute best table. Note that the ordering of the arguments is important.
378
compareTables :: Table -> Table -> Table
379
compareTables a@(Table _ _ a_cv _) b@(Table _ _ b_cv _ ) =
380
  if a_cv > b_cv then b else a
381

    
382
-- | Applies an instance move to a given node list and instance.
383
applyMove :: Node.List -> Instance.Instance
384
          -> IMove -> OpResult (Node.List, Instance.Instance, Ndx, Ndx)
385
-- Failover (f)
386
applyMove nl inst Failover =
387
  let (old_pdx, old_sdx, old_p, old_s) = instanceNodes nl inst
388
      int_p = Node.removePri old_p inst
389
      int_s = Node.removeSec old_s inst
390
      new_nl = do -- Maybe monad
391
        new_p <- Node.addPriEx (Node.offline old_p) int_s inst
392
        new_s <- Node.addSec int_p inst old_sdx
393
        let new_inst = Instance.setBoth inst old_sdx old_pdx
394
        return (Container.addTwo old_pdx new_s old_sdx new_p nl,
395
                new_inst, old_sdx, old_pdx)
396
  in new_nl
397

    
398
-- Failover to any (fa)
399
applyMove nl inst (FailoverToAny new_pdx) = do
400
  let (old_pdx, old_sdx, old_pnode, _) = instanceNodes nl inst
401
      new_pnode = Container.find new_pdx nl
402
      force_failover = Node.offline old_pnode
403
  new_pnode' <- Node.addPriEx force_failover new_pnode inst
404
  let old_pnode' = Node.removePri old_pnode inst
405
      inst' = Instance.setPri inst new_pdx
406
      nl' = Container.addTwo old_pdx old_pnode' new_pdx new_pnode' nl
407
  return (nl', inst', new_pdx, old_sdx)
408

    
409
-- Replace the primary (f:, r:np, f)
410
applyMove nl inst (ReplacePrimary new_pdx) =
411
  let (old_pdx, old_sdx, old_p, old_s) = instanceNodes nl inst
412
      tgt_n = Container.find new_pdx nl
413
      int_p = Node.removePri old_p inst
414
      int_s = Node.removeSec old_s inst
415
      force_p = Node.offline old_p
416
      new_nl = do -- Maybe monad
417
                  -- check that the current secondary can host the instance
418
                  -- during the migration
419
        tmp_s <- Node.addPriEx force_p int_s inst
420
        let tmp_s' = Node.removePri tmp_s inst
421
        new_p <- Node.addPriEx force_p tgt_n inst
422
        new_s <- Node.addSecEx force_p tmp_s' inst new_pdx
423
        let new_inst = Instance.setPri inst new_pdx
424
        return (Container.add new_pdx new_p $
425
                Container.addTwo old_pdx int_p old_sdx new_s nl,
426
                new_inst, new_pdx, old_sdx)
427
  in new_nl
428

    
429
-- Replace the secondary (r:ns)
430
applyMove nl inst (ReplaceSecondary new_sdx) =
431
  let old_pdx = Instance.pNode inst
432
      old_sdx = Instance.sNode inst
433
      old_s = Container.find old_sdx nl
434
      tgt_n = Container.find new_sdx nl
435
      int_s = Node.removeSec old_s inst
436
      force_s = Node.offline old_s
437
      new_inst = Instance.setSec inst new_sdx
438
      new_nl = Node.addSecEx force_s tgt_n inst old_pdx >>=
439
               \new_s -> return (Container.addTwo new_sdx
440
                                 new_s old_sdx int_s nl,
441
                                 new_inst, old_pdx, new_sdx)
442
  in new_nl
443

    
444
-- Replace the secondary and failover (r:np, f)
445
applyMove nl inst (ReplaceAndFailover new_pdx) =
446
  let (old_pdx, old_sdx, old_p, old_s) = instanceNodes nl inst
447
      tgt_n = Container.find new_pdx nl
448
      int_p = Node.removePri old_p inst
449
      int_s = Node.removeSec old_s inst
450
      force_s = Node.offline old_s
451
      new_nl = do -- Maybe monad
452
        new_p <- Node.addPri tgt_n inst
453
        new_s <- Node.addSecEx force_s int_p inst new_pdx
454
        let new_inst = Instance.setBoth inst new_pdx old_pdx
455
        return (Container.add new_pdx new_p $
456
                Container.addTwo old_pdx new_s old_sdx int_s nl,
457
                new_inst, new_pdx, old_pdx)
458
  in new_nl
459

    
460
-- Failver and replace the secondary (f, r:ns)
461
applyMove nl inst (FailoverAndReplace new_sdx) =
462
  let (old_pdx, old_sdx, old_p, old_s) = instanceNodes nl inst
463
      tgt_n = Container.find new_sdx nl
464
      int_p = Node.removePri old_p inst
465
      int_s = Node.removeSec old_s inst
466
      force_p = Node.offline old_p
467
      new_nl = do -- Maybe monad
468
        new_p <- Node.addPriEx force_p int_s inst
469
        new_s <- Node.addSecEx force_p tgt_n inst old_sdx
470
        let new_inst = Instance.setBoth inst old_sdx new_sdx
471
        return (Container.add new_sdx new_s $
472
                Container.addTwo old_sdx new_p old_pdx int_p nl,
473
                new_inst, old_sdx, new_sdx)
474
  in new_nl
475

    
476
-- | Tries to allocate an instance on one given node.
477
allocateOnSingle :: Node.List -> Instance.Instance -> Ndx
478
                 -> OpResult Node.AllocElement
479
allocateOnSingle nl inst new_pdx =
480
  let p = Container.find new_pdx nl
481
      new_inst = Instance.setBoth inst new_pdx Node.noSecondary
482
  in do
483
    Instance.instMatchesPolicy inst (Node.iPolicy p)
484
    new_p <- Node.addPri p inst
485
    let new_nl = Container.add new_pdx new_p nl
486
        new_score = compCV new_nl
487
    return (new_nl, new_inst, [new_p], new_score)
488

    
489
-- | Tries to allocate an instance on a given pair of nodes.
490
allocateOnPair :: Node.List -> Instance.Instance -> Ndx -> Ndx
491
               -> OpResult Node.AllocElement
492
allocateOnPair nl inst new_pdx new_sdx =
493
  let tgt_p = Container.find new_pdx nl
494
      tgt_s = Container.find new_sdx nl
495
  in do
496
    Instance.instMatchesPolicy inst (Node.iPolicy tgt_p)
497
    new_p <- Node.addPri tgt_p inst
498
    new_s <- Node.addSec tgt_s inst new_pdx
499
    let new_inst = Instance.setBoth inst new_pdx new_sdx
500
        new_nl = Container.addTwo new_pdx new_p new_sdx new_s nl
501
    return (new_nl, new_inst, [new_p, new_s], compCV new_nl)
502

    
503
-- | Tries to perform an instance move and returns the best table
504
-- between the original one and the new one.
505
checkSingleStep :: Table -- ^ The original table
506
                -> Instance.Instance -- ^ The instance to move
507
                -> Table -- ^ The current best table
508
                -> IMove -- ^ The move to apply
509
                -> Table -- ^ The final best table
510
checkSingleStep ini_tbl target cur_tbl move =
511
  let Table ini_nl ini_il _ ini_plc = ini_tbl
512
      tmp_resu = applyMove ini_nl target move
513
  in case tmp_resu of
514
       OpFail _ -> cur_tbl
515
       OpGood (upd_nl, new_inst, pri_idx, sec_idx) ->
516
         let tgt_idx = Instance.idx target
517
             upd_cvar = compCV upd_nl
518
             upd_il = Container.add tgt_idx new_inst ini_il
519
             upd_plc = (tgt_idx, pri_idx, sec_idx, move, upd_cvar):ini_plc
520
             upd_tbl = Table upd_nl upd_il upd_cvar upd_plc
521
         in compareTables cur_tbl upd_tbl
522

    
523
-- | Given the status of the current secondary as a valid new node and
524
-- the current candidate target node, generate the possible moves for
525
-- a instance.
526
possibleMoves :: MirrorType -- ^ The mirroring type of the instance
527
              -> Bool       -- ^ Whether the secondary node is a valid new node
528
              -> Bool       -- ^ Whether we can change the primary node
529
              -> Ndx        -- ^ Target node candidate
530
              -> [IMove]    -- ^ List of valid result moves
531

    
532
possibleMoves MirrorNone _ _ _ = []
533

    
534
possibleMoves MirrorExternal _ False _ = []
535

    
536
possibleMoves MirrorExternal _ True tdx =
537
  [ FailoverToAny tdx ]
538

    
539
possibleMoves MirrorInternal _ False tdx =
540
  [ ReplaceSecondary tdx ]
541

    
542
possibleMoves MirrorInternal True True tdx =
543
  [ ReplaceSecondary tdx
544
  , ReplaceAndFailover tdx
545
  , ReplacePrimary tdx
546
  , FailoverAndReplace tdx
547
  ]
548

    
549
possibleMoves MirrorInternal False True tdx =
550
  [ ReplaceSecondary tdx
551
  , ReplaceAndFailover tdx
552
  ]
553

    
554
-- | Compute the best move for a given instance.
555
checkInstanceMove :: [Ndx]             -- ^ Allowed target node indices
556
                  -> Bool              -- ^ Whether disk moves are allowed
557
                  -> Bool              -- ^ Whether instance moves are allowed
558
                  -> Table             -- ^ Original table
559
                  -> Instance.Instance -- ^ Instance to move
560
                  -> Table             -- ^ Best new table for this instance
561
checkInstanceMove nodes_idx disk_moves inst_moves ini_tbl target =
562
  let opdx = Instance.pNode target
563
      osdx = Instance.sNode target
564
      bad_nodes = [opdx, osdx]
565
      nodes = filter (`notElem` bad_nodes) nodes_idx
566
      mir_type = Instance.mirrorType target
567
      use_secondary = elem osdx nodes_idx && inst_moves
568
      aft_failover = if mir_type == MirrorInternal && use_secondary
569
                       -- if drbd and allowed to failover
570
                       then checkSingleStep ini_tbl target ini_tbl Failover
571
                       else ini_tbl
572
      all_moves =
573
        if disk_moves
574
          then concatMap (possibleMoves mir_type use_secondary inst_moves)
575
               nodes
576
          else []
577
    in
578
      -- iterate over the possible nodes for this instance
579
      foldl' (checkSingleStep ini_tbl target) aft_failover all_moves
580

    
581
-- | Compute the best next move.
582
checkMove :: [Ndx]               -- ^ Allowed target node indices
583
          -> Bool                -- ^ Whether disk moves are allowed
584
          -> Bool                -- ^ Whether instance moves are allowed
585
          -> Table               -- ^ The current solution
586
          -> [Instance.Instance] -- ^ List of instances still to move
587
          -> Table               -- ^ The new solution
588
checkMove nodes_idx disk_moves inst_moves ini_tbl victims =
589
  let Table _ _ _ ini_plc = ini_tbl
590
      -- we're using rwhnf from the Control.Parallel.Strategies
591
      -- package; we don't need to use rnf as that would force too
592
      -- much evaluation in single-threaded cases, and in
593
      -- multi-threaded case the weak head normal form is enough to
594
      -- spark the evaluation
595
      tables = parMap rwhnf (checkInstanceMove nodes_idx disk_moves
596
                             inst_moves ini_tbl)
597
               victims
598
      -- iterate over all instances, computing the best move
599
      best_tbl = foldl' compareTables ini_tbl tables
600
      Table _ _ _ best_plc = best_tbl
601
  in if length best_plc == length ini_plc
602
       then ini_tbl -- no advancement
603
       else best_tbl
604

    
605
-- | Check if we are allowed to go deeper in the balancing.
606
doNextBalance :: Table     -- ^ The starting table
607
              -> Int       -- ^ Remaining length
608
              -> Score     -- ^ Score at which to stop
609
              -> Bool      -- ^ The resulting table and commands
610
doNextBalance ini_tbl max_rounds min_score =
611
  let Table _ _ ini_cv ini_plc = ini_tbl
612
      ini_plc_len = length ini_plc
613
  in (max_rounds < 0 || ini_plc_len < max_rounds) && ini_cv > min_score
614

    
615
-- | Run a balance move.
616
tryBalance :: Table       -- ^ The starting table
617
           -> Bool        -- ^ Allow disk moves
618
           -> Bool        -- ^ Allow instance moves
619
           -> Bool        -- ^ Only evacuate moves
620
           -> Score       -- ^ Min gain threshold
621
           -> Score       -- ^ Min gain
622
           -> Maybe Table -- ^ The resulting table and commands
623
tryBalance ini_tbl disk_moves inst_moves evac_mode mg_limit min_gain =
624
    let Table ini_nl ini_il ini_cv _ = ini_tbl
625
        all_inst = Container.elems ini_il
626
        all_nodes = Container.elems ini_nl
627
        (offline_nodes, online_nodes) = partition Node.offline all_nodes
628
        all_inst' = if evac_mode
629
                      then let bad_nodes = map Node.idx offline_nodes
630
                           in filter (any (`elem` bad_nodes) .
631
                                          Instance.allNodes) all_inst
632
                      else all_inst
633
        reloc_inst = filter (\i -> Instance.movable i &&
634
                                   Instance.autoBalance i) all_inst'
635
        node_idx = map Node.idx online_nodes
636
        fin_tbl = checkMove node_idx disk_moves inst_moves ini_tbl reloc_inst
637
        (Table _ _ fin_cv _) = fin_tbl
638
    in
639
      if fin_cv < ini_cv && (ini_cv > mg_limit || ini_cv - fin_cv >= min_gain)
640
      then Just fin_tbl -- this round made success, return the new table
641
      else Nothing
642

    
643
-- * Allocation functions
644

    
645
-- | Build failure stats out of a list of failures.
646
collapseFailures :: [FailMode] -> FailStats
647
collapseFailures flst =
648
    map (\k -> (k, foldl' (\a e -> if e == k then a + 1 else a) 0 flst))
649
            [minBound..maxBound]
650

    
651
-- | Compares two Maybe AllocElement and chooses the besst score.
652
bestAllocElement :: Maybe Node.AllocElement
653
                 -> Maybe Node.AllocElement
654
                 -> Maybe Node.AllocElement
655
bestAllocElement a Nothing = a
656
bestAllocElement Nothing b = b
657
bestAllocElement a@(Just (_, _, _, ascore)) b@(Just (_, _, _, bscore)) =
658
  if ascore < bscore then a else b
659

    
660
-- | Update current Allocation solution and failure stats with new
661
-- elements.
662
concatAllocs :: AllocSolution -> OpResult Node.AllocElement -> AllocSolution
663
concatAllocs as (OpFail reason) = as { asFailures = reason : asFailures as }
664

    
665
concatAllocs as (OpGood ns) =
666
  let -- Choose the old or new solution, based on the cluster score
667
    cntok = asAllocs as
668
    osols = asSolution as
669
    nsols = bestAllocElement osols (Just ns)
670
    nsuc = cntok + 1
671
    -- Note: we force evaluation of nsols here in order to keep the
672
    -- memory profile low - we know that we will need nsols for sure
673
    -- in the next cycle, so we force evaluation of nsols, since the
674
    -- foldl' in the caller will only evaluate the tuple, but not the
675
    -- elements of the tuple
676
  in nsols `seq` nsuc `seq` as { asAllocs = nsuc, asSolution = nsols }
677

    
678
-- | Sums two 'AllocSolution' structures.
679
sumAllocs :: AllocSolution -> AllocSolution -> AllocSolution
680
sumAllocs (AllocSolution aFails aAllocs aSols aLog)
681
          (AllocSolution bFails bAllocs bSols bLog) =
682
  -- note: we add b first, since usually it will be smaller; when
683
  -- fold'ing, a will grow and grow whereas b is the per-group
684
  -- result, hence smaller
685
  let nFails  = bFails ++ aFails
686
      nAllocs = aAllocs + bAllocs
687
      nSols   = bestAllocElement aSols bSols
688
      nLog    = bLog ++ aLog
689
  in AllocSolution nFails nAllocs nSols nLog
690

    
691
-- | Given a solution, generates a reasonable description for it.
692
describeSolution :: AllocSolution -> String
693
describeSolution as =
694
  let fcnt = asFailures as
695
      sols = asSolution as
696
      freasons =
697
        intercalate ", " . map (\(a, b) -> printf "%s: %d" (show a) b) .
698
        filter ((> 0) . snd) . collapseFailures $ fcnt
699
  in case sols of
700
     Nothing -> "No valid allocation solutions, failure reasons: " ++
701
                (if null fcnt then "unknown reasons" else freasons)
702
     Just (_, _, nodes, cv) ->
703
         printf ("score: %.8f, successes %d, failures %d (%s)" ++
704
                 " for node(s) %s") cv (asAllocs as) (length fcnt) freasons
705
               (intercalate "/" . map Node.name $ nodes)
706

    
707
-- | Annotates a solution with the appropriate string.
708
annotateSolution :: AllocSolution -> AllocSolution
709
annotateSolution as = as { asLog = describeSolution as : asLog as }
710

    
711
-- | Reverses an evacuation solution.
712
--
713
-- Rationale: we always concat the results to the top of the lists, so
714
-- for proper jobset execution, we should reverse all lists.
715
reverseEvacSolution :: EvacSolution -> EvacSolution
716
reverseEvacSolution (EvacSolution f m o) =
717
  EvacSolution (reverse f) (reverse m) (reverse o)
718

    
719
-- | Generate the valid node allocation singles or pairs for a new instance.
720
genAllocNodes :: Group.List        -- ^ Group list
721
              -> Node.List         -- ^ The node map
722
              -> Int               -- ^ The number of nodes required
723
              -> Bool              -- ^ Whether to drop or not
724
                                   -- unallocable nodes
725
              -> Result AllocNodes -- ^ The (monadic) result
726
genAllocNodes gl nl count drop_unalloc =
727
  let filter_fn = if drop_unalloc
728
                    then filter (Group.isAllocable .
729
                                 flip Container.find gl . Node.group)
730
                    else id
731
      all_nodes = filter_fn $ getOnline nl
732
      all_pairs = [(Node.idx p,
733
                    [Node.idx s | s <- all_nodes,
734
                                       Node.idx p /= Node.idx s,
735
                                       Node.group p == Node.group s]) |
736
                   p <- all_nodes]
737
  in case count of
738
       1 -> Ok (Left (map Node.idx all_nodes))
739
       2 -> Ok (Right (filter (not . null . snd) all_pairs))
740
       _ -> Bad "Unsupported number of nodes, only one or two  supported"
741

    
742
-- | Try to allocate an instance on the cluster.
743
tryAlloc :: (Monad m) =>
744
            Node.List         -- ^ The node list
745
         -> Instance.List     -- ^ The instance list
746
         -> Instance.Instance -- ^ The instance to allocate
747
         -> AllocNodes        -- ^ The allocation targets
748
         -> m AllocSolution   -- ^ Possible solution list
749
tryAlloc _  _ _    (Right []) = fail "Not enough online nodes"
750
tryAlloc nl _ inst (Right ok_pairs) =
751
  let psols = parMap rwhnf (\(p, ss) ->
752
                              foldl' (\cstate ->
753
                                        concatAllocs cstate .
754
                                        allocateOnPair nl inst p)
755
                              emptyAllocSolution ss) ok_pairs
756
      sols = foldl' sumAllocs emptyAllocSolution psols
757
  in return $ annotateSolution sols
758

    
759
tryAlloc _  _ _    (Left []) = fail "No online nodes"
760
tryAlloc nl _ inst (Left all_nodes) =
761
  let sols = foldl' (\cstate ->
762
                       concatAllocs cstate . allocateOnSingle nl inst
763
                    ) emptyAllocSolution all_nodes
764
  in return $ annotateSolution sols
765

    
766
-- | Given a group/result, describe it as a nice (list of) messages.
767
solutionDescription :: Group.List -> (Gdx, Result AllocSolution) -> [String]
768
solutionDescription gl (groupId, result) =
769
  case result of
770
    Ok solution -> map (printf "Group %s (%s): %s" gname pol) (asLog solution)
771
    Bad message -> [printf "Group %s: error %s" gname message]
772
  where grp = Container.find groupId gl
773
        gname = Group.name grp
774
        pol = allocPolicyToRaw (Group.allocPolicy grp)
775

    
776
-- | From a list of possibly bad and possibly empty solutions, filter
777
-- only the groups with a valid result. Note that the result will be
778
-- reversed compared to the original list.
779
filterMGResults :: Group.List
780
                -> [(Gdx, Result AllocSolution)]
781
                -> [(Gdx, AllocSolution)]
782
filterMGResults gl = foldl' fn []
783
  where unallocable = not . Group.isAllocable . flip Container.find gl
784
        fn accu (gdx, rasol) =
785
          case rasol of
786
            Bad _ -> accu
787
            Ok sol | isNothing (asSolution sol) -> accu
788
                   | unallocable gdx -> accu
789
                   | otherwise -> (gdx, sol):accu
790

    
791
-- | Sort multigroup results based on policy and score.
792
sortMGResults :: Group.List
793
             -> [(Gdx, AllocSolution)]
794
             -> [(Gdx, AllocSolution)]
795
sortMGResults gl sols =
796
  let extractScore (_, _, _, x) = x
797
      solScore (gdx, sol) = (Group.allocPolicy (Container.find gdx gl),
798
                             (extractScore . fromJust . asSolution) sol)
799
  in sortBy (comparing solScore) sols
800

    
801
-- | Finds the best group for an instance on a multi-group cluster.
802
--
803
-- Only solutions in @preferred@ and @last_resort@ groups will be
804
-- accepted as valid, and additionally if the allowed groups parameter
805
-- is not null then allocation will only be run for those group
806
-- indices.
807
findBestAllocGroup :: Group.List           -- ^ The group list
808
                   -> Node.List            -- ^ The node list
809
                   -> Instance.List        -- ^ The instance list
810
                   -> Maybe [Gdx]          -- ^ The allowed groups
811
                   -> Instance.Instance    -- ^ The instance to allocate
812
                   -> Int                  -- ^ Required number of nodes
813
                   -> Result (Gdx, AllocSolution, [String])
814
findBestAllocGroup mggl mgnl mgil allowed_gdxs inst cnt =
815
  let groups = splitCluster mgnl mgil
816
      groups' = maybe groups (\gs -> filter ((`elem` gs) . fst) groups)
817
                allowed_gdxs
818
      sols = map (\(gid, (nl, il)) ->
819
                   (gid, genAllocNodes mggl nl cnt False >>=
820
                       tryAlloc nl il inst))
821
             groups'::[(Gdx, Result AllocSolution)]
822
      all_msgs = concatMap (solutionDescription mggl) sols
823
      goodSols = filterMGResults mggl sols
824
      sortedSols = sortMGResults mggl goodSols
825
  in if null sortedSols
826
       then if null groups'
827
              then Bad $ "no groups for evacuation: allowed groups was" ++
828
                     show allowed_gdxs ++ ", all groups: " ++
829
                     show (map fst groups)
830
              else Bad $ intercalate ", " all_msgs
831
       else let (final_group, final_sol) = head sortedSols
832
            in return (final_group, final_sol, all_msgs)
833

    
834
-- | Try to allocate an instance on a multi-group cluster.
835
tryMGAlloc :: Group.List           -- ^ The group list
836
           -> Node.List            -- ^ The node list
837
           -> Instance.List        -- ^ The instance list
838
           -> Instance.Instance    -- ^ The instance to allocate
839
           -> Int                  -- ^ Required number of nodes
840
           -> Result AllocSolution -- ^ Possible solution list
841
tryMGAlloc mggl mgnl mgil inst cnt = do
842
  (best_group, solution, all_msgs) <-
843
      findBestAllocGroup mggl mgnl mgil Nothing inst cnt
844
  let group_name = Group.name $ Container.find best_group mggl
845
      selmsg = "Selected group: " ++ group_name
846
  return $ solution { asLog = selmsg:all_msgs }
847

    
848
-- | Function which fails if the requested mode is change secondary.
849
--
850
-- This is useful since except DRBD, no other disk template can
851
-- execute change secondary; thus, we can just call this function
852
-- instead of always checking for secondary mode. After the call to
853
-- this function, whatever mode we have is just a primary change.
854
failOnSecondaryChange :: (Monad m) => EvacMode -> DiskTemplate -> m ()
855
failOnSecondaryChange ChangeSecondary dt =
856
  fail $ "Instances with disk template '" ++ diskTemplateToRaw dt ++
857
         "' can't execute change secondary"
858
failOnSecondaryChange _ _ = return ()
859

    
860
-- | Run evacuation for a single instance.
861
--
862
-- /Note:/ this function should correctly execute both intra-group
863
-- evacuations (in all modes) and inter-group evacuations (in the
864
-- 'ChangeAll' mode). Of course, this requires that the correct list
865
-- of target nodes is passed.
866
nodeEvacInstance :: Node.List         -- ^ The node list (cluster-wide)
867
                 -> Instance.List     -- ^ Instance list (cluster-wide)
868
                 -> EvacMode          -- ^ The evacuation mode
869
                 -> Instance.Instance -- ^ The instance to be evacuated
870
                 -> Gdx               -- ^ The group we're targetting
871
                 -> [Ndx]             -- ^ The list of available nodes
872
                                      -- for allocation
873
                 -> Result (Node.List, Instance.List, [OpCodes.OpCode])
874
nodeEvacInstance nl il mode inst@(Instance.Instance
875
                                  {Instance.diskTemplate = dt@DTDiskless})
876
                 gdx avail_nodes =
877
                   failOnSecondaryChange mode dt >>
878
                   evacOneNodeOnly nl il inst gdx avail_nodes
879

    
880
nodeEvacInstance _ _ _ (Instance.Instance
881
                        {Instance.diskTemplate = DTPlain}) _ _ =
882
                  fail "Instances of type plain cannot be relocated"
883

    
884
nodeEvacInstance _ _ _ (Instance.Instance
885
                        {Instance.diskTemplate = DTFile}) _ _ =
886
                  fail "Instances of type file cannot be relocated"
887

    
888
nodeEvacInstance nl il mode inst@(Instance.Instance
889
                                  {Instance.diskTemplate = dt@DTSharedFile})
890
                 gdx avail_nodes =
891
                   failOnSecondaryChange mode dt >>
892
                   evacOneNodeOnly nl il inst gdx avail_nodes
893

    
894
nodeEvacInstance nl il mode inst@(Instance.Instance
895
                                  {Instance.diskTemplate = dt@DTBlock})
896
                 gdx avail_nodes =
897
                   failOnSecondaryChange mode dt >>
898
                   evacOneNodeOnly nl il inst gdx avail_nodes
899

    
900
nodeEvacInstance nl il mode inst@(Instance.Instance
901
                                  {Instance.diskTemplate = dt@DTRbd})
902
                 gdx avail_nodes =
903
                   failOnSecondaryChange mode dt >>
904
                   evacOneNodeOnly nl il inst gdx avail_nodes
905

    
906
nodeEvacInstance nl il ChangePrimary
907
                 inst@(Instance.Instance {Instance.diskTemplate = DTDrbd8})
908
                 _ _ =
909
  do
910
    (nl', inst', _, _) <- opToResult $ applyMove nl inst Failover
911
    let idx = Instance.idx inst
912
        il' = Container.add idx inst' il
913
        ops = iMoveToJob nl' il' idx Failover
914
    return (nl', il', ops)
915

    
916
nodeEvacInstance nl il ChangeSecondary
917
                 inst@(Instance.Instance {Instance.diskTemplate = DTDrbd8})
918
                 gdx avail_nodes =
919
  evacOneNodeOnly nl il inst gdx avail_nodes
920

    
921
-- The algorithm for ChangeAll is as follows:
922
--
923
-- * generate all (primary, secondary) node pairs for the target groups
924
-- * for each pair, execute the needed moves (r:s, f, r:s) and compute
925
--   the final node list state and group score
926
-- * select the best choice via a foldl that uses the same Either
927
--   String solution as the ChangeSecondary mode
928
nodeEvacInstance nl il ChangeAll
929
                 inst@(Instance.Instance {Instance.diskTemplate = DTDrbd8})
930
                 gdx avail_nodes =
931
  do
932
    let no_nodes = Left "no nodes available"
933
        node_pairs = [(p,s) | p <- avail_nodes, s <- avail_nodes, p /= s]
934
    (nl', il', ops, _) <-
935
        annotateResult "Can't find any good nodes for relocation" $
936
        eitherToResult $
937
        foldl'
938
        (\accu nodes -> case evacDrbdAllInner nl il inst gdx nodes of
939
                          Bad msg ->
940
                              case accu of
941
                                Right _ -> accu
942
                                -- we don't need more details (which
943
                                -- nodes, etc.) as we only selected
944
                                -- this group if we can allocate on
945
                                -- it, hence failures will not
946
                                -- propagate out of this fold loop
947
                                Left _ -> Left $ "Allocation failed: " ++ msg
948
                          Ok result@(_, _, _, new_cv) ->
949
                              let new_accu = Right result in
950
                              case accu of
951
                                Left _ -> new_accu
952
                                Right (_, _, _, old_cv) ->
953
                                    if old_cv < new_cv
954
                                    then accu
955
                                    else new_accu
956
        ) no_nodes node_pairs
957

    
958
    return (nl', il', ops)
959

    
960
-- | Generic function for changing one node of an instance.
961
--
962
-- This is similar to 'nodeEvacInstance' but will be used in a few of
963
-- its sub-patterns. It folds the inner function 'evacOneNodeInner'
964
-- over the list of available nodes, which results in the best choice
965
-- for relocation.
966
evacOneNodeOnly :: Node.List         -- ^ The node list (cluster-wide)
967
                -> Instance.List     -- ^ Instance list (cluster-wide)
968
                -> Instance.Instance -- ^ The instance to be evacuated
969
                -> Gdx               -- ^ The group we're targetting
970
                -> [Ndx]             -- ^ The list of available nodes
971
                                      -- for allocation
972
                -> Result (Node.List, Instance.List, [OpCodes.OpCode])
973
evacOneNodeOnly nl il inst gdx avail_nodes = do
974
  op_fn <- case Instance.mirrorType inst of
975
             MirrorNone -> Bad "Can't relocate/evacuate non-mirrored instances"
976
             MirrorInternal -> Ok ReplaceSecondary
977
             MirrorExternal -> Ok FailoverToAny
978
  (nl', inst', _, ndx) <- annotateResult "Can't find any good node" $
979
                          eitherToResult $
980
                          foldl' (evacOneNodeInner nl inst gdx op_fn)
981
                          (Left "no nodes available") avail_nodes
982
  let idx = Instance.idx inst
983
      il' = Container.add idx inst' il
984
      ops = iMoveToJob nl' il' idx (op_fn ndx)
985
  return (nl', il', ops)
986

    
987
-- | Inner fold function for changing one node of an instance.
988
--
989
-- Depending on the instance disk template, this will either change
990
-- the secondary (for DRBD) or the primary node (for shared
991
-- storage). However, the operation is generic otherwise.
992
--
993
-- The running solution is either a @Left String@, which means we
994
-- don't have yet a working solution, or a @Right (...)@, which
995
-- represents a valid solution; it holds the modified node list, the
996
-- modified instance (after evacuation), the score of that solution,
997
-- and the new secondary node index.
998
evacOneNodeInner :: Node.List         -- ^ Cluster node list
999
                 -> Instance.Instance -- ^ Instance being evacuated
1000
                 -> Gdx               -- ^ The group index of the instance
1001
                 -> (Ndx -> IMove)    -- ^ Operation constructor
1002
                 -> EvacInnerState    -- ^ Current best solution
1003
                 -> Ndx               -- ^ Node we're evaluating as target
1004
                 -> EvacInnerState    -- ^ New best solution
1005
evacOneNodeInner nl inst gdx op_fn accu ndx =
1006
  case applyMove nl inst (op_fn ndx) of
1007
    OpFail fm -> let fail_msg = "Node " ++ Container.nameOf nl ndx ++
1008
                                " failed: " ++ show fm
1009
                 in either (const $ Left fail_msg) (const accu) accu
1010
    OpGood (nl', inst', _, _) ->
1011
      let nodes = Container.elems nl'
1012
          -- The fromJust below is ugly (it can fail nastily), but
1013
          -- at this point we should have any internal mismatches,
1014
          -- and adding a monad here would be quite involved
1015
          grpnodes = fromJust (gdx `lookup` Node.computeGroups nodes)
1016
          new_cv = compCVNodes grpnodes
1017
          new_accu = Right (nl', inst', new_cv, ndx)
1018
      in case accu of
1019
           Left _ -> new_accu
1020
           Right (_, _, old_cv, _) ->
1021
             if old_cv < new_cv
1022
               then accu
1023
               else new_accu
1024

    
1025
-- | Compute result of changing all nodes of a DRBD instance.
1026
--
1027
-- Given the target primary and secondary node (which might be in a
1028
-- different group or not), this function will 'execute' all the
1029
-- required steps and assuming all operations succceed, will return
1030
-- the modified node and instance lists, the opcodes needed for this
1031
-- and the new group score.
1032
evacDrbdAllInner :: Node.List         -- ^ Cluster node list
1033
                 -> Instance.List     -- ^ Cluster instance list
1034
                 -> Instance.Instance -- ^ The instance to be moved
1035
                 -> Gdx               -- ^ The target group index
1036
                                      -- (which can differ from the
1037
                                      -- current group of the
1038
                                      -- instance)
1039
                 -> (Ndx, Ndx)        -- ^ Tuple of new
1040
                                      -- primary\/secondary nodes
1041
                 -> Result (Node.List, Instance.List, [OpCodes.OpCode], Score)
1042
evacDrbdAllInner nl il inst gdx (t_pdx, t_sdx) = do
1043
  let primary = Container.find (Instance.pNode inst) nl
1044
      idx = Instance.idx inst
1045
  -- if the primary is offline, then we first failover
1046
  (nl1, inst1, ops1) <-
1047
    if Node.offline primary
1048
      then do
1049
        (nl', inst', _, _) <-
1050
          annotateResult "Failing over to the secondary" $
1051
          opToResult $ applyMove nl inst Failover
1052
        return (nl', inst', [Failover])
1053
      else return (nl, inst, [])
1054
  let (o1, o2, o3) = (ReplaceSecondary t_pdx,
1055
                      Failover,
1056
                      ReplaceSecondary t_sdx)
1057
  -- we now need to execute a replace secondary to the future
1058
  -- primary node
1059
  (nl2, inst2, _, _) <-
1060
    annotateResult "Changing secondary to new primary" $
1061
    opToResult $
1062
    applyMove nl1 inst1 o1
1063
  let ops2 = o1:ops1
1064
  -- we now execute another failover, the primary stays fixed now
1065
  (nl3, inst3, _, _) <- annotateResult "Failing over to new primary" $
1066
                        opToResult $ applyMove nl2 inst2 o2
1067
  let ops3 = o2:ops2
1068
  -- and finally another replace secondary, to the final secondary
1069
  (nl4, inst4, _, _) <-
1070
    annotateResult "Changing secondary to final secondary" $
1071
    opToResult $
1072
    applyMove nl3 inst3 o3
1073
  let ops4 = o3:ops3
1074
      il' = Container.add idx inst4 il
1075
      ops = concatMap (iMoveToJob nl4 il' idx) $ reverse ops4
1076
  let nodes = Container.elems nl4
1077
      -- The fromJust below is ugly (it can fail nastily), but
1078
      -- at this point we should have any internal mismatches,
1079
      -- and adding a monad here would be quite involved
1080
      grpnodes = fromJust (gdx `lookup` Node.computeGroups nodes)
1081
      new_cv = compCVNodes grpnodes
1082
  return (nl4, il', ops, new_cv)
1083

    
1084
-- | Computes the nodes in a given group which are available for
1085
-- allocation.
1086
availableGroupNodes :: [(Gdx, [Ndx])] -- ^ Group index/node index assoc list
1087
                    -> IntSet.IntSet  -- ^ Nodes that are excluded
1088
                    -> Gdx            -- ^ The group for which we
1089
                                      -- query the nodes
1090
                    -> Result [Ndx]   -- ^ List of available node indices
1091
availableGroupNodes group_nodes excl_ndx gdx = do
1092
  local_nodes <- maybe (Bad $ "Can't find group with index " ++ show gdx)
1093
                 Ok (lookup gdx group_nodes)
1094
  let avail_nodes = filter (not . flip IntSet.member excl_ndx) local_nodes
1095
  return avail_nodes
1096

    
1097
-- | Updates the evac solution with the results of an instance
1098
-- evacuation.
1099
updateEvacSolution :: (Node.List, Instance.List, EvacSolution)
1100
                   -> Idx
1101
                   -> Result (Node.List, Instance.List, [OpCodes.OpCode])
1102
                   -> (Node.List, Instance.List, EvacSolution)
1103
updateEvacSolution (nl, il, es) idx (Bad msg) =
1104
  (nl, il, es { esFailed = (idx, msg):esFailed es})
1105
updateEvacSolution (_, _, es) idx (Ok (nl, il, opcodes)) =
1106
  (nl, il, es { esMoved = new_elem:esMoved es
1107
              , esOpCodes = opcodes:esOpCodes es })
1108
    where inst = Container.find idx il
1109
          new_elem = (idx,
1110
                      instancePriGroup nl inst,
1111
                      Instance.allNodes inst)
1112

    
1113
-- | Node-evacuation IAllocator mode main function.
1114
tryNodeEvac :: Group.List    -- ^ The cluster groups
1115
            -> Node.List     -- ^ The node list (cluster-wide, not per group)
1116
            -> Instance.List -- ^ Instance list (cluster-wide)
1117
            -> EvacMode      -- ^ The evacuation mode
1118
            -> [Idx]         -- ^ List of instance (indices) to be evacuated
1119
            -> Result (Node.List, Instance.List, EvacSolution)
1120
tryNodeEvac _ ini_nl ini_il mode idxs =
1121
  let evac_ndx = nodesToEvacuate ini_il mode idxs
1122
      offline = map Node.idx . filter Node.offline $ Container.elems ini_nl
1123
      excl_ndx = foldl' (flip IntSet.insert) evac_ndx offline
1124
      group_ndx = map (\(gdx, (nl, _)) -> (gdx, map Node.idx
1125
                                           (Container.elems nl))) $
1126
                  splitCluster ini_nl ini_il
1127
      (fin_nl, fin_il, esol) =
1128
        foldl' (\state@(nl, il, _) inst ->
1129
                  let gdx = instancePriGroup nl inst
1130
                      pdx = Instance.pNode inst in
1131
                  updateEvacSolution state (Instance.idx inst) $
1132
                  availableGroupNodes group_ndx
1133
                    (IntSet.insert pdx excl_ndx) gdx >>=
1134
                      nodeEvacInstance nl il mode inst gdx
1135
               )
1136
        (ini_nl, ini_il, emptyEvacSolution)
1137
        (map (`Container.find` ini_il) idxs)
1138
  in return (fin_nl, fin_il, reverseEvacSolution esol)
1139

    
1140
-- | Change-group IAllocator mode main function.
1141
--
1142
-- This is very similar to 'tryNodeEvac', the only difference is that
1143
-- we don't choose as target group the current instance group, but
1144
-- instead:
1145
--
1146
--   1. at the start of the function, we compute which are the target
1147
--   groups; either no groups were passed in, in which case we choose
1148
--   all groups out of which we don't evacuate instance, or there were
1149
--   some groups passed, in which case we use those
1150
--
1151
--   2. for each instance, we use 'findBestAllocGroup' to choose the
1152
--   best group to hold the instance, and then we do what
1153
--   'tryNodeEvac' does, except for this group instead of the current
1154
--   instance group.
1155
--
1156
-- Note that the correct behaviour of this function relies on the
1157
-- function 'nodeEvacInstance' to be able to do correctly both
1158
-- intra-group and inter-group moves when passed the 'ChangeAll' mode.
1159
tryChangeGroup :: Group.List    -- ^ The cluster groups
1160
               -> Node.List     -- ^ The node list (cluster-wide)
1161
               -> Instance.List -- ^ Instance list (cluster-wide)
1162
               -> [Gdx]         -- ^ Target groups; if empty, any
1163
                                -- groups not being evacuated
1164
               -> [Idx]         -- ^ List of instance (indices) to be evacuated
1165
               -> Result (Node.List, Instance.List, EvacSolution)
1166
tryChangeGroup gl ini_nl ini_il gdxs idxs =
1167
  let evac_gdxs = nub $ map (instancePriGroup ini_nl .
1168
                             flip Container.find ini_il) idxs
1169
      target_gdxs = (if null gdxs
1170
                       then Container.keys gl
1171
                       else gdxs) \\ evac_gdxs
1172
      offline = map Node.idx . filter Node.offline $ Container.elems ini_nl
1173
      excl_ndx = foldl' (flip IntSet.insert) IntSet.empty offline
1174
      group_ndx = map (\(gdx, (nl, _)) -> (gdx, map Node.idx
1175
                                           (Container.elems nl))) $
1176
                  splitCluster ini_nl ini_il
1177
      (fin_nl, fin_il, esol) =
1178
        foldl' (\state@(nl, il, _) inst ->
1179
                  let solution = do
1180
                        let ncnt = Instance.requiredNodes $
1181
                                   Instance.diskTemplate inst
1182
                        (gdx, _, _) <- findBestAllocGroup gl nl il
1183
                                       (Just target_gdxs) inst ncnt
1184
                        av_nodes <- availableGroupNodes group_ndx
1185
                                    excl_ndx gdx
1186
                        nodeEvacInstance nl il ChangeAll inst gdx av_nodes
1187
                  in updateEvacSolution state (Instance.idx inst) solution
1188
               )
1189
        (ini_nl, ini_il, emptyEvacSolution)
1190
        (map (`Container.find` ini_il) idxs)
1191
  in return (fin_nl, fin_il, reverseEvacSolution esol)
1192

    
1193
-- | Standard-sized allocation method.
1194
--
1195
-- This places instances of the same size on the cluster until we're
1196
-- out of space. The result will be a list of identically-sized
1197
-- instances.
1198
iterateAlloc :: AllocMethod
1199
iterateAlloc nl il limit newinst allocnodes ixes cstats =
1200
  let depth = length ixes
1201
      newname = printf "new-%d" depth::String
1202
      newidx = Container.size il
1203
      newi2 = Instance.setIdx (Instance.setName newinst newname) newidx
1204
      newlimit = fmap (flip (-) 1) limit
1205
  in case tryAlloc nl il newi2 allocnodes of
1206
       Bad s -> Bad s
1207
       Ok (AllocSolution { asFailures = errs, asSolution = sols3 }) ->
1208
         let newsol = Ok (collapseFailures errs, nl, il, ixes, cstats) in
1209
         case sols3 of
1210
           Nothing -> newsol
1211
           Just (xnl, xi, _, _) ->
1212
             if limit == Just 0
1213
               then newsol
1214
               else iterateAlloc xnl (Container.add newidx xi il)
1215
                      newlimit newinst allocnodes (xi:ixes)
1216
                      (totalResources xnl:cstats)
1217

    
1218
-- | Tiered allocation method.
1219
--
1220
-- This places instances on the cluster, and decreases the spec until
1221
-- we can allocate again. The result will be a list of decreasing
1222
-- instance specs.
1223
tieredAlloc :: AllocMethod
1224
tieredAlloc nl il limit newinst allocnodes ixes cstats =
1225
  case iterateAlloc nl il limit newinst allocnodes ixes cstats of
1226
    Bad s -> Bad s
1227
    Ok (errs, nl', il', ixes', cstats') ->
1228
      let newsol = Ok (errs, nl', il', ixes', cstats')
1229
          ixes_cnt = length ixes'
1230
          (stop, newlimit) = case limit of
1231
                               Nothing -> (False, Nothing)
1232
                               Just n -> (n <= ixes_cnt,
1233
                                            Just (n - ixes_cnt)) in
1234
      if stop then newsol else
1235
          case Instance.shrinkByType newinst . fst . last $
1236
               sortBy (comparing snd) errs of
1237
            Bad _ -> newsol
1238
            Ok newinst' -> tieredAlloc nl' il' newlimit
1239
                           newinst' allocnodes ixes' cstats'
1240

    
1241
-- * Formatting functions
1242

    
1243
-- | Given the original and final nodes, computes the relocation description.
1244
computeMoves :: Instance.Instance -- ^ The instance to be moved
1245
             -> String -- ^ The instance name
1246
             -> IMove  -- ^ The move being performed
1247
             -> String -- ^ New primary
1248
             -> String -- ^ New secondary
1249
             -> (String, [String])
1250
                -- ^ Tuple of moves and commands list; moves is containing
1251
                -- either @/f/@ for failover or @/r:name/@ for replace
1252
                -- secondary, while the command list holds gnt-instance
1253
                -- commands (without that prefix), e.g \"@failover instance1@\"
1254
computeMoves i inam mv c d =
1255
  case mv of
1256
    Failover -> ("f", [mig])
1257
    FailoverToAny _ -> (printf "fa:%s" c, [mig_any])
1258
    FailoverAndReplace _ -> (printf "f r:%s" d, [mig, rep d])
1259
    ReplaceSecondary _ -> (printf "r:%s" d, [rep d])
1260
    ReplaceAndFailover _ -> (printf "r:%s f" c, [rep c, mig])
1261
    ReplacePrimary _ -> (printf "f r:%s f" c, [mig, rep c, mig])
1262
  where morf = if Instance.isRunning i then "migrate" else "failover"
1263
        mig = printf "%s -f %s" morf inam::String
1264
        mig_any = printf "%s -f -n %s %s" morf c inam::String
1265
        rep n = printf "replace-disks -n %s %s" n inam::String
1266

    
1267
-- | Converts a placement to string format.
1268
printSolutionLine :: Node.List     -- ^ The node list
1269
                  -> Instance.List -- ^ The instance list
1270
                  -> Int           -- ^ Maximum node name length
1271
                  -> Int           -- ^ Maximum instance name length
1272
                  -> Placement     -- ^ The current placement
1273
                  -> Int           -- ^ The index of the placement in
1274
                                   -- the solution
1275
                  -> (String, [String])
1276
printSolutionLine nl il nmlen imlen plc pos =
1277
  let pmlen = (2*nmlen + 1)
1278
      (i, p, s, mv, c) = plc
1279
      old_sec = Instance.sNode inst
1280
      inst = Container.find i il
1281
      inam = Instance.alias inst
1282
      npri = Node.alias $ Container.find p nl
1283
      nsec = Node.alias $ Container.find s nl
1284
      opri = Node.alias $ Container.find (Instance.pNode inst) nl
1285
      osec = Node.alias $ Container.find old_sec nl
1286
      (moves, cmds) =  computeMoves inst inam mv npri nsec
1287
      -- FIXME: this should check instead/also the disk template
1288
      ostr = if old_sec == Node.noSecondary
1289
               then printf "%s" opri::String
1290
               else printf "%s:%s" opri osec::String
1291
      nstr = if s == Node.noSecondary
1292
               then printf "%s" npri::String
1293
               else printf "%s:%s" npri nsec::String
1294
  in (printf "  %3d. %-*s %-*s => %-*s %12.8f a=%s"
1295
      pos imlen inam pmlen ostr pmlen nstr c moves,
1296
      cmds)
1297

    
1298
-- | Return the instance and involved nodes in an instance move.
1299
--
1300
-- Note that the output list length can vary, and is not required nor
1301
-- guaranteed to be of any specific length.
1302
involvedNodes :: Instance.List -- ^ Instance list, used for retrieving
1303
                               -- the instance from its index; note
1304
                               -- that this /must/ be the original
1305
                               -- instance list, so that we can
1306
                               -- retrieve the old nodes
1307
              -> Placement     -- ^ The placement we're investigating,
1308
                               -- containing the new nodes and
1309
                               -- instance index
1310
              -> [Ndx]         -- ^ Resulting list of node indices
1311
involvedNodes il plc =
1312
  let (i, np, ns, _, _) = plc
1313
      inst = Container.find i il
1314
  in nub $ [np, ns] ++ Instance.allNodes inst
1315

    
1316
-- | Inner function for splitJobs, that either appends the next job to
1317
-- the current jobset, or starts a new jobset.
1318
mergeJobs :: ([JobSet], [Ndx]) -> MoveJob -> ([JobSet], [Ndx])
1319
mergeJobs ([], _) n@(ndx, _, _, _) = ([[n]], ndx)
1320
mergeJobs (cjs@(j:js), nbuf) n@(ndx, _, _, _)
1321
  | null (ndx `intersect` nbuf) = ((n:j):js, ndx ++ nbuf)
1322
  | otherwise = ([n]:cjs, ndx)
1323

    
1324
-- | Break a list of moves into independent groups. Note that this
1325
-- will reverse the order of jobs.
1326
splitJobs :: [MoveJob] -> [JobSet]
1327
splitJobs = fst . foldl mergeJobs ([], [])
1328

    
1329
-- | Given a list of commands, prefix them with @gnt-instance@ and
1330
-- also beautify the display a little.
1331
formatJob :: Int -> Int -> (Int, MoveJob) -> [String]
1332
formatJob jsn jsl (sn, (_, _, _, cmds)) =
1333
  let out =
1334
        printf "  echo job %d/%d" jsn sn:
1335
        printf "  check":
1336
        map ("  gnt-instance " ++) cmds
1337
  in if sn == 1
1338
       then ["", printf "echo jobset %d, %d jobs" jsn jsl] ++ out
1339
       else out
1340

    
1341
-- | Given a list of commands, prefix them with @gnt-instance@ and
1342
-- also beautify the display a little.
1343
formatCmds :: [JobSet] -> String
1344
formatCmds =
1345
  unlines .
1346
  concatMap (\(jsn, js) -> concatMap (formatJob jsn (length js))
1347
                           (zip [1..] js)) .
1348
  zip [1..]
1349

    
1350
-- | Print the node list.
1351
printNodes :: Node.List -> [String] -> String
1352
printNodes nl fs =
1353
  let fields = case fs of
1354
                 [] -> Node.defaultFields
1355
                 "+":rest -> Node.defaultFields ++ rest
1356
                 _ -> fs
1357
      snl = sortBy (comparing Node.idx) (Container.elems nl)
1358
      (header, isnum) = unzip $ map Node.showHeader fields
1359
  in printTable "" header (map (Node.list fields) snl) isnum
1360

    
1361
-- | Print the instance list.
1362
printInsts :: Node.List -> Instance.List -> String
1363
printInsts nl il =
1364
  let sil = sortBy (comparing Instance.idx) (Container.elems il)
1365
      helper inst = [ if Instance.isRunning inst then "R" else " "
1366
                    , Instance.name inst
1367
                    , Container.nameOf nl (Instance.pNode inst)
1368
                    , let sdx = Instance.sNode inst
1369
                      in if sdx == Node.noSecondary
1370
                           then  ""
1371
                           else Container.nameOf nl sdx
1372
                    , if Instance.autoBalance inst then "Y" else "N"
1373
                    , printf "%3d" $ Instance.vcpus inst
1374
                    , printf "%5d" $ Instance.mem inst
1375
                    , printf "%5d" $ Instance.dsk inst `div` 1024
1376
                    , printf "%5.3f" lC
1377
                    , printf "%5.3f" lM
1378
                    , printf "%5.3f" lD
1379
                    , printf "%5.3f" lN
1380
                    ]
1381
          where DynUtil lC lM lD lN = Instance.util inst
1382
      header = [ "F", "Name", "Pri_node", "Sec_node", "Auto_bal"
1383
               , "vcpu", "mem" , "dsk", "lCpu", "lMem", "lDsk", "lNet" ]
1384
      isnum = False:False:False:False:False:repeat True
1385
  in printTable "" header (map helper sil) isnum
1386

    
1387
-- | Shows statistics for a given node list.
1388
printStats :: String -> Node.List -> String
1389
printStats lp nl =
1390
  let dcvs = compDetailedCV $ Container.elems nl
1391
      (weights, names) = unzip detailedCVInfo
1392
      hd = zip3 (weights ++ repeat 1) (names ++ repeat "unknown") dcvs
1393
      header = [ "Field", "Value", "Weight" ]
1394
      formatted = map (\(w, h, val) ->
1395
                         [ h
1396
                         , printf "%.8f" val
1397
                         , printf "x%.2f" w
1398
                         ]) hd
1399
  in printTable lp header formatted $ False:repeat True
1400

    
1401
-- | Convert a placement into a list of OpCodes (basically a job).
1402
iMoveToJob :: Node.List        -- ^ The node list; only used for node
1403
                               -- names, so any version is good
1404
                               -- (before or after the operation)
1405
           -> Instance.List    -- ^ The instance list; also used for
1406
                               -- names only
1407
           -> Idx              -- ^ The index of the instance being
1408
                               -- moved
1409
           -> IMove            -- ^ The actual move to be described
1410
           -> [OpCodes.OpCode] -- ^ The list of opcodes equivalent to
1411
                               -- the given move
1412
iMoveToJob nl il idx move =
1413
  let inst = Container.find idx il
1414
      iname = Instance.name inst
1415
      lookNode  = Just . Container.nameOf nl
1416
      opF = OpCodes.OpInstanceMigrate iname True False True Nothing
1417
      opFA n = OpCodes.OpInstanceMigrate iname True False True (lookNode n)
1418
      opR n = OpCodes.OpInstanceReplaceDisks iname (lookNode n)
1419
              OpCodes.ReplaceNewSecondary [] Nothing
1420
  in case move of
1421
       Failover -> [ opF ]
1422
       FailoverToAny np -> [ opFA np ]
1423
       ReplacePrimary np -> [ opF, opR np, opF ]
1424
       ReplaceSecondary ns -> [ opR ns ]
1425
       ReplaceAndFailover np -> [ opR np, opF ]
1426
       FailoverAndReplace ns -> [ opF, opR ns ]
1427

    
1428
-- * Node group functions
1429

    
1430
-- | Computes the group of an instance.
1431
instanceGroup :: Node.List -> Instance.Instance -> Result Gdx
1432
instanceGroup nl i =
1433
  let sidx = Instance.sNode i
1434
      pnode = Container.find (Instance.pNode i) nl
1435
      snode = if sidx == Node.noSecondary
1436
              then pnode
1437
              else Container.find sidx nl
1438
      pgroup = Node.group pnode
1439
      sgroup = Node.group snode
1440
  in if pgroup /= sgroup
1441
       then fail ("Instance placed accross two node groups, primary " ++
1442
                  show pgroup ++ ", secondary " ++ show sgroup)
1443
       else return pgroup
1444

    
1445
-- | Computes the group of an instance per the primary node.
1446
instancePriGroup :: Node.List -> Instance.Instance -> Gdx
1447
instancePriGroup nl i =
1448
  let pnode = Container.find (Instance.pNode i) nl
1449
  in  Node.group pnode
1450

    
1451
-- | Compute the list of badly allocated instances (split across node
1452
-- groups).
1453
findSplitInstances :: Node.List -> Instance.List -> [Instance.Instance]
1454
findSplitInstances nl =
1455
  filter (not . isOk . instanceGroup nl) . Container.elems
1456

    
1457
-- | Splits a cluster into the component node groups.
1458
splitCluster :: Node.List -> Instance.List ->
1459
                [(Gdx, (Node.List, Instance.List))]
1460
splitCluster nl il =
1461
  let ngroups = Node.computeGroups (Container.elems nl)
1462
  in map (\(guuid, nodes) ->
1463
           let nidxs = map Node.idx nodes
1464
               nodes' = zip nidxs nodes
1465
               instances = Container.filter ((`elem` nidxs) . Instance.pNode) il
1466
           in (guuid, (Container.fromList nodes', instances))) ngroups
1467

    
1468
-- | Compute the list of nodes that are to be evacuated, given a list
1469
-- of instances and an evacuation mode.
1470
nodesToEvacuate :: Instance.List -- ^ The cluster-wide instance list
1471
                -> EvacMode      -- ^ The evacuation mode we're using
1472
                -> [Idx]         -- ^ List of instance indices being evacuated
1473
                -> IntSet.IntSet -- ^ Set of node indices
1474
nodesToEvacuate il mode =
1475
  IntSet.delete Node.noSecondary .
1476
  foldl' (\ns idx ->
1477
            let i = Container.find idx il
1478
                pdx = Instance.pNode i
1479
                sdx = Instance.sNode i
1480
                dt = Instance.diskTemplate i
1481
                withSecondary = case dt of
1482
                                  DTDrbd8 -> IntSet.insert sdx ns
1483
                                  _ -> ns
1484
            in case mode of
1485
                 ChangePrimary   -> IntSet.insert pdx ns
1486
                 ChangeSecondary -> withSecondary
1487
                 ChangeAll       -> IntSet.insert pdx withSecondary
1488
         ) IntSet.empty