Statistics
| Branch: | Tag: | Revision:

root / htools / Ganeti / HTools / Cluster.hs @ c85abf30

History | View | Annotate | Download (65.8 kB)

1
{-| Implementation of cluster-wide logic.
2

    
3
This module holds all pure cluster-logic; I\/O related functionality
4
goes into the /Main/ module for the individual binaries.
5

    
6
-}
7

    
8
{-
9

    
10
Copyright (C) 2009, 2010, 2011, 2012 Google Inc.
11

    
12
This program is free software; you can redistribute it and/or modify
13
it under the terms of the GNU General Public License as published by
14
the Free Software Foundation; either version 2 of the License, or
15
(at your option) any later version.
16

    
17
This program is distributed in the hope that it will be useful, but
18
WITHOUT ANY WARRANTY; without even the implied warranty of
19
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
20
General Public License for more details.
21

    
22
You should have received a copy of the GNU General Public License
23
along with this program; if not, write to the Free Software
24
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
25
02110-1301, USA.
26

    
27
-}
28

    
29
module Ganeti.HTools.Cluster
30
  (
31
    -- * Types
32
    AllocSolution(..)
33
  , EvacSolution(..)
34
  , Table(..)
35
  , CStats(..)
36
  , AllocResult
37
  , AllocMethod
38
  , AllocSolutionList
39
  -- * Generic functions
40
  , totalResources
41
  , computeAllocationDelta
42
  -- * First phase functions
43
  , computeBadItems
44
  -- * Second phase functions
45
  , printSolutionLine
46
  , formatCmds
47
  , involvedNodes
48
  , splitJobs
49
  -- * Display functions
50
  , printNodes
51
  , printInsts
52
  -- * Balacing functions
53
  , checkMove
54
  , doNextBalance
55
  , tryBalance
56
  , compCV
57
  , compCVNodes
58
  , compDetailedCV
59
  , printStats
60
  , iMoveToJob
61
  -- * IAllocator functions
62
  , genAllocNodes
63
  , tryAlloc
64
  , tryMGAlloc
65
  , tryNodeEvac
66
  , tryChangeGroup
67
  , collapseFailures
68
  , allocList
69
  -- * Allocation functions
70
  , iterateAlloc
71
  , tieredAlloc
72
  -- * Node group functions
73
  , instanceGroup
74
  , findSplitInstances
75
  , splitCluster
76
  ) where
77

    
78
import qualified Data.IntSet as IntSet
79
import Data.List
80
import Data.Maybe (fromJust, isNothing)
81
import Data.Ord (comparing)
82
import Text.Printf (printf)
83

    
84
import qualified Ganeti.HTools.Container as Container
85
import qualified Ganeti.HTools.Instance as Instance
86
import qualified Ganeti.HTools.Node as Node
87
import qualified Ganeti.HTools.Group as Group
88
import Ganeti.HTools.Types
89
import Ganeti.HTools.Utils
90
import Ganeti.HTools.Compat
91
import qualified Ganeti.OpCodes as OpCodes
92

    
93
-- * Types
94

    
95
-- | Allocation\/relocation solution.
96
data AllocSolution = AllocSolution
97
  { asFailures :: [FailMode]              -- ^ Failure counts
98
  , asAllocs   :: Int                     -- ^ Good allocation count
99
  , asSolution :: Maybe Node.AllocElement -- ^ The actual allocation result
100
  , asLog      :: [String]                -- ^ Informational messages
101
  }
102

    
103
-- | Node evacuation/group change iallocator result type. This result
104
-- type consists of actual opcodes (a restricted subset) that are
105
-- transmitted back to Ganeti.
106
data EvacSolution = EvacSolution
107
  { esMoved   :: [(Idx, Gdx, [Ndx])]  -- ^ Instances moved successfully
108
  , esFailed  :: [(Idx, String)]      -- ^ Instances which were not
109
                                      -- relocated
110
  , esOpCodes :: [[OpCodes.OpCode]]   -- ^ List of jobs
111
  } deriving (Show)
112

    
113
-- | Allocation results, as used in 'iterateAlloc' and 'tieredAlloc'.
114
type AllocResult = (FailStats, Node.List, Instance.List,
115
                    [Instance.Instance], [CStats])
116

    
117
-- | Type alias for easier handling.
118
type AllocSolutionList = [(Instance.Instance, AllocSolution)]
119

    
120
-- | A type denoting the valid allocation mode/pairs.
121
--
122
-- For a one-node allocation, this will be a @Left ['Ndx']@, whereas
123
-- for a two-node allocation, this will be a @Right [('Ndx',
124
-- ['Ndx'])]@. In the latter case, the list is basically an
125
-- association list, grouped by primary node and holding the potential
126
-- secondary nodes in the sub-list.
127
type AllocNodes = Either [Ndx] [(Ndx, [Ndx])]
128

    
129
-- | The empty solution we start with when computing allocations.
130
emptyAllocSolution :: AllocSolution
131
emptyAllocSolution = AllocSolution { asFailures = [], asAllocs = 0
132
                                   , asSolution = Nothing, asLog = [] }
133

    
134
-- | The empty evac solution.
135
emptyEvacSolution :: EvacSolution
136
emptyEvacSolution = EvacSolution { esMoved = []
137
                                 , esFailed = []
138
                                 , esOpCodes = []
139
                                 }
140

    
141
-- | The complete state for the balancing solution.
142
data Table = Table Node.List Instance.List Score [Placement]
143
             deriving (Show, Read)
144

    
145
-- | Cluster statistics data type.
146
data CStats = CStats
147
  { csFmem :: Integer -- ^ Cluster free mem
148
  , csFdsk :: Integer -- ^ Cluster free disk
149
  , csAmem :: Integer -- ^ Cluster allocatable mem
150
  , csAdsk :: Integer -- ^ Cluster allocatable disk
151
  , csAcpu :: Integer -- ^ Cluster allocatable cpus
152
  , csMmem :: Integer -- ^ Max node allocatable mem
153
  , csMdsk :: Integer -- ^ Max node allocatable disk
154
  , csMcpu :: Integer -- ^ Max node allocatable cpu
155
  , csImem :: Integer -- ^ Instance used mem
156
  , csIdsk :: Integer -- ^ Instance used disk
157
  , csIcpu :: Integer -- ^ Instance used cpu
158
  , csTmem :: Double  -- ^ Cluster total mem
159
  , csTdsk :: Double  -- ^ Cluster total disk
160
  , csTcpu :: Double  -- ^ Cluster total cpus
161
  , csVcpu :: Integer -- ^ Cluster total virtual cpus
162
  , csNcpu :: Double  -- ^ Equivalent to 'csIcpu' but in terms of
163
                      -- physical CPUs, i.e. normalised used phys CPUs
164
  , csXmem :: Integer -- ^ Unnacounted for mem
165
  , csNmem :: Integer -- ^ Node own memory
166
  , csScore :: Score  -- ^ The cluster score
167
  , csNinst :: Int    -- ^ The total number of instances
168
  } deriving (Show, Read)
169

    
170
-- | A simple type for allocation functions.
171
type AllocMethod =  Node.List           -- ^ Node list
172
                 -> Instance.List       -- ^ Instance list
173
                 -> Maybe Int           -- ^ Optional allocation limit
174
                 -> Instance.Instance   -- ^ Instance spec for allocation
175
                 -> AllocNodes          -- ^ Which nodes we should allocate on
176
                 -> [Instance.Instance] -- ^ Allocated instances
177
                 -> [CStats]            -- ^ Running cluster stats
178
                 -> Result AllocResult  -- ^ Allocation result
179

    
180
-- | A simple type for the running solution of evacuations.
181
type EvacInnerState =
182
  Either String (Node.List, Instance.Instance, Score, Ndx)
183

    
184
-- * Utility functions
185

    
186
-- | Verifies the N+1 status and return the affected nodes.
187
verifyN1 :: [Node.Node] -> [Node.Node]
188
verifyN1 = filter Node.failN1
189

    
190
{-| Computes the pair of bad nodes and instances.
191

    
192
The bad node list is computed via a simple 'verifyN1' check, and the
193
bad instance list is the list of primary and secondary instances of
194
those nodes.
195

    
196
-}
197
computeBadItems :: Node.List -> Instance.List ->
198
                   ([Node.Node], [Instance.Instance])
199
computeBadItems nl il =
200
  let bad_nodes = verifyN1 $ getOnline nl
201
      bad_instances = map (`Container.find` il) .
202
                      sort . nub $
203
                      concatMap (\ n -> Node.sList n ++ Node.pList n) bad_nodes
204
  in
205
    (bad_nodes, bad_instances)
206

    
207
-- | Extracts the node pairs for an instance. This can fail if the
208
-- instance is single-homed. FIXME: this needs to be improved,
209
-- together with the general enhancement for handling non-DRBD moves.
210
instanceNodes :: Node.List -> Instance.Instance ->
211
                 (Ndx, Ndx, Node.Node, Node.Node)
212
instanceNodes nl inst =
213
  let old_pdx = Instance.pNode inst
214
      old_sdx = Instance.sNode inst
215
      old_p = Container.find old_pdx nl
216
      old_s = Container.find old_sdx nl
217
  in (old_pdx, old_sdx, old_p, old_s)
218

    
219
-- | Zero-initializer for the CStats type.
220
emptyCStats :: CStats
221
emptyCStats = CStats 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
222

    
223
-- | Update stats with data from a new node.
224
updateCStats :: CStats -> Node.Node -> CStats
225
updateCStats cs node =
226
  let CStats { csFmem = x_fmem, csFdsk = x_fdsk,
227
               csAmem = x_amem, csAcpu = x_acpu, csAdsk = x_adsk,
228
               csMmem = x_mmem, csMdsk = x_mdsk, csMcpu = x_mcpu,
229
               csImem = x_imem, csIdsk = x_idsk, csIcpu = x_icpu,
230
               csTmem = x_tmem, csTdsk = x_tdsk, csTcpu = x_tcpu,
231
               csVcpu = x_vcpu, csNcpu = x_ncpu,
232
               csXmem = x_xmem, csNmem = x_nmem, csNinst = x_ninst
233
             }
234
        = cs
235
      inc_amem = Node.fMem node - Node.rMem node
236
      inc_amem' = if inc_amem > 0 then inc_amem else 0
237
      inc_adsk = Node.availDisk node
238
      inc_imem = truncate (Node.tMem node) - Node.nMem node
239
                 - Node.xMem node - Node.fMem node
240
      inc_icpu = Node.uCpu node
241
      inc_idsk = truncate (Node.tDsk node) - Node.fDsk node
242
      inc_vcpu = Node.hiCpu node
243
      inc_acpu = Node.availCpu node
244
      inc_ncpu = fromIntegral (Node.uCpu node) /
245
                 iPolicyVcpuRatio (Node.iPolicy node)
246
  in cs { csFmem = x_fmem + fromIntegral (Node.fMem node)
247
        , csFdsk = x_fdsk + fromIntegral (Node.fDsk node)
248
        , csAmem = x_amem + fromIntegral inc_amem'
249
        , csAdsk = x_adsk + fromIntegral inc_adsk
250
        , csAcpu = x_acpu + fromIntegral inc_acpu
251
        , csMmem = max x_mmem (fromIntegral inc_amem')
252
        , csMdsk = max x_mdsk (fromIntegral inc_adsk)
253
        , csMcpu = max x_mcpu (fromIntegral inc_acpu)
254
        , csImem = x_imem + fromIntegral inc_imem
255
        , csIdsk = x_idsk + fromIntegral inc_idsk
256
        , csIcpu = x_icpu + fromIntegral inc_icpu
257
        , csTmem = x_tmem + Node.tMem node
258
        , csTdsk = x_tdsk + Node.tDsk node
259
        , csTcpu = x_tcpu + Node.tCpu node
260
        , csVcpu = x_vcpu + fromIntegral inc_vcpu
261
        , csNcpu = x_ncpu + inc_ncpu
262
        , csXmem = x_xmem + fromIntegral (Node.xMem node)
263
        , csNmem = x_nmem + fromIntegral (Node.nMem node)
264
        , csNinst = x_ninst + length (Node.pList node)
265
        }
266

    
267
-- | Compute the total free disk and memory in the cluster.
268
totalResources :: Node.List -> CStats
269
totalResources nl =
270
  let cs = foldl' updateCStats emptyCStats . Container.elems $ nl
271
  in cs { csScore = compCV nl }
272

    
273
-- | Compute the delta between two cluster state.
274
--
275
-- This is used when doing allocations, to understand better the
276
-- available cluster resources. The return value is a triple of the
277
-- current used values, the delta that was still allocated, and what
278
-- was left unallocated.
279
computeAllocationDelta :: CStats -> CStats -> AllocStats
280
computeAllocationDelta cini cfin =
281
  let CStats {csImem = i_imem, csIdsk = i_idsk, csIcpu = i_icpu,
282
              csNcpu = i_ncpu } = cini
283
      CStats {csImem = f_imem, csIdsk = f_idsk, csIcpu = f_icpu,
284
              csTmem = t_mem, csTdsk = t_dsk, csVcpu = f_vcpu,
285
              csNcpu = f_ncpu, csTcpu = f_tcpu } = cfin
286
      rini = AllocInfo { allocInfoVCpus = fromIntegral i_icpu
287
                       , allocInfoNCpus = i_ncpu
288
                       , allocInfoMem   = fromIntegral i_imem
289
                       , allocInfoDisk  = fromIntegral i_idsk
290
                       }
291
      rfin = AllocInfo { allocInfoVCpus = fromIntegral (f_icpu - i_icpu)
292
                       , allocInfoNCpus = f_ncpu - i_ncpu
293
                       , allocInfoMem   = fromIntegral (f_imem - i_imem)
294
                       , allocInfoDisk  = fromIntegral (f_idsk - i_idsk)
295
                       }
296
      runa = AllocInfo { allocInfoVCpus = fromIntegral (f_vcpu - f_icpu)
297
                       , allocInfoNCpus = f_tcpu - f_ncpu
298
                       , allocInfoMem   = truncate t_mem - fromIntegral f_imem
299
                       , allocInfoDisk  = truncate t_dsk - fromIntegral f_idsk
300
                       }
301
  in (rini, rfin, runa)
302

    
303
-- | The names and weights of the individual elements in the CV list.
304
detailedCVInfo :: [(Double, String)]
305
detailedCVInfo = [ (1,  "free_mem_cv")
306
                 , (1,  "free_disk_cv")
307
                 , (1,  "n1_cnt")
308
                 , (1,  "reserved_mem_cv")
309
                 , (4,  "offline_all_cnt")
310
                 , (16, "offline_pri_cnt")
311
                 , (1,  "vcpu_ratio_cv")
312
                 , (1,  "cpu_load_cv")
313
                 , (1,  "mem_load_cv")
314
                 , (1,  "disk_load_cv")
315
                 , (1,  "net_load_cv")
316
                 , (2,  "pri_tags_score")
317
                 , (1,  "spindles_cv")
318
                 ]
319

    
320
-- | Holds the weights used by 'compCVNodes' for each metric.
321
detailedCVWeights :: [Double]
322
detailedCVWeights = map fst detailedCVInfo
323

    
324
-- | Compute the mem and disk covariance.
325
compDetailedCV :: [Node.Node] -> [Double]
326
compDetailedCV all_nodes =
327
  let (offline, nodes) = partition Node.offline all_nodes
328
      mem_l = map Node.pMem nodes
329
      dsk_l = map Node.pDsk nodes
330
      -- metric: memory covariance
331
      mem_cv = stdDev mem_l
332
      -- metric: disk covariance
333
      dsk_cv = stdDev dsk_l
334
      -- metric: count of instances living on N1 failing nodes
335
      n1_score = fromIntegral . sum . map (\n -> length (Node.sList n) +
336
                                                 length (Node.pList n)) .
337
                 filter Node.failN1 $ nodes :: Double
338
      res_l = map Node.pRem nodes
339
      -- metric: reserved memory covariance
340
      res_cv = stdDev res_l
341
      -- offline instances metrics
342
      offline_ipri = sum . map (length . Node.pList) $ offline
343
      offline_isec = sum . map (length . Node.sList) $ offline
344
      -- metric: count of instances on offline nodes
345
      off_score = fromIntegral (offline_ipri + offline_isec)::Double
346
      -- metric: count of primary instances on offline nodes (this
347
      -- helps with evacuation/failover of primary instances on
348
      -- 2-node clusters with one node offline)
349
      off_pri_score = fromIntegral offline_ipri::Double
350
      cpu_l = map Node.pCpu nodes
351
      -- metric: covariance of vcpu/pcpu ratio
352
      cpu_cv = stdDev cpu_l
353
      -- metrics: covariance of cpu, memory, disk and network load
354
      (c_load, m_load, d_load, n_load) =
355
        unzip4 $ map (\n ->
356
                      let DynUtil c1 m1 d1 n1 = Node.utilLoad n
357
                          DynUtil c2 m2 d2 n2 = Node.utilPool n
358
                      in (c1/c2, m1/m2, d1/d2, n1/n2)) nodes
359
      -- metric: conflicting instance count
360
      pri_tags_inst = sum $ map Node.conflictingPrimaries nodes
361
      pri_tags_score = fromIntegral pri_tags_inst::Double
362
      -- metric: spindles %
363
      spindles_cv = map (\n -> Node.instSpindles n / Node.hiSpindles n) nodes
364
  in [ mem_cv, dsk_cv, n1_score, res_cv, off_score, off_pri_score, cpu_cv
365
     , stdDev c_load, stdDev m_load , stdDev d_load, stdDev n_load
366
     , pri_tags_score, stdDev spindles_cv ]
367

    
368
-- | Compute the /total/ variance.
369
compCVNodes :: [Node.Node] -> Double
370
compCVNodes = sum . zipWith (*) detailedCVWeights . compDetailedCV
371

    
372
-- | Wrapper over 'compCVNodes' for callers that have a 'Node.List'.
373
compCV :: Node.List -> Double
374
compCV = compCVNodes . Container.elems
375

    
376
-- | Compute online nodes from a 'Node.List'.
377
getOnline :: Node.List -> [Node.Node]
378
getOnline = filter (not . Node.offline) . Container.elems
379

    
380
-- * Balancing functions
381

    
382
-- | Compute best table. Note that the ordering of the arguments is important.
383
compareTables :: Table -> Table -> Table
384
compareTables a@(Table _ _ a_cv _) b@(Table _ _ b_cv _ ) =
385
  if a_cv > b_cv then b else a
386

    
387
-- | Applies an instance move to a given node list and instance.
388
applyMove :: Node.List -> Instance.Instance
389
          -> IMove -> OpResult (Node.List, Instance.Instance, Ndx, Ndx)
390
-- Failover (f)
391
applyMove nl inst Failover =
392
  let (old_pdx, old_sdx, old_p, old_s) = instanceNodes nl inst
393
      int_p = Node.removePri old_p inst
394
      int_s = Node.removeSec old_s inst
395
      new_nl = do -- Maybe monad
396
        new_p <- Node.addPriEx (Node.offline old_p) int_s inst
397
        new_s <- Node.addSec int_p inst old_sdx
398
        let new_inst = Instance.setBoth inst old_sdx old_pdx
399
        return (Container.addTwo old_pdx new_s old_sdx new_p nl,
400
                new_inst, old_sdx, old_pdx)
401
  in new_nl
402

    
403
-- Failover to any (fa)
404
applyMove nl inst (FailoverToAny new_pdx) = do
405
  let (old_pdx, old_sdx, old_pnode, _) = instanceNodes nl inst
406
      new_pnode = Container.find new_pdx nl
407
      force_failover = Node.offline old_pnode
408
  new_pnode' <- Node.addPriEx force_failover new_pnode inst
409
  let old_pnode' = Node.removePri old_pnode inst
410
      inst' = Instance.setPri inst new_pdx
411
      nl' = Container.addTwo old_pdx old_pnode' new_pdx new_pnode' nl
412
  return (nl', inst', new_pdx, old_sdx)
413

    
414
-- Replace the primary (f:, r:np, f)
415
applyMove nl inst (ReplacePrimary new_pdx) =
416
  let (old_pdx, old_sdx, old_p, old_s) = instanceNodes nl inst
417
      tgt_n = Container.find new_pdx nl
418
      int_p = Node.removePri old_p inst
419
      int_s = Node.removeSec old_s inst
420
      force_p = Node.offline old_p
421
      new_nl = do -- Maybe monad
422
                  -- check that the current secondary can host the instance
423
                  -- during the migration
424
        tmp_s <- Node.addPriEx force_p int_s inst
425
        let tmp_s' = Node.removePri tmp_s inst
426
        new_p <- Node.addPriEx force_p tgt_n inst
427
        new_s <- Node.addSecEx force_p tmp_s' inst new_pdx
428
        let new_inst = Instance.setPri inst new_pdx
429
        return (Container.add new_pdx new_p $
430
                Container.addTwo old_pdx int_p old_sdx new_s nl,
431
                new_inst, new_pdx, old_sdx)
432
  in new_nl
433

    
434
-- Replace the secondary (r:ns)
435
applyMove nl inst (ReplaceSecondary new_sdx) =
436
  let old_pdx = Instance.pNode inst
437
      old_sdx = Instance.sNode inst
438
      old_s = Container.find old_sdx nl
439
      tgt_n = Container.find new_sdx nl
440
      int_s = Node.removeSec old_s inst
441
      force_s = Node.offline old_s
442
      new_inst = Instance.setSec inst new_sdx
443
      new_nl = Node.addSecEx force_s tgt_n inst old_pdx >>=
444
               \new_s -> return (Container.addTwo new_sdx
445
                                 new_s old_sdx int_s nl,
446
                                 new_inst, old_pdx, new_sdx)
447
  in new_nl
448

    
449
-- Replace the secondary and failover (r:np, f)
450
applyMove nl inst (ReplaceAndFailover new_pdx) =
451
  let (old_pdx, old_sdx, old_p, old_s) = instanceNodes nl inst
452
      tgt_n = Container.find new_pdx nl
453
      int_p = Node.removePri old_p inst
454
      int_s = Node.removeSec old_s inst
455
      force_s = Node.offline old_s
456
      new_nl = do -- Maybe monad
457
        new_p <- Node.addPri tgt_n inst
458
        new_s <- Node.addSecEx force_s int_p inst new_pdx
459
        let new_inst = Instance.setBoth inst new_pdx old_pdx
460
        return (Container.add new_pdx new_p $
461
                Container.addTwo old_pdx new_s old_sdx int_s nl,
462
                new_inst, new_pdx, old_pdx)
463
  in new_nl
464

    
465
-- Failver and replace the secondary (f, r:ns)
466
applyMove nl inst (FailoverAndReplace new_sdx) =
467
  let (old_pdx, old_sdx, old_p, old_s) = instanceNodes nl inst
468
      tgt_n = Container.find new_sdx nl
469
      int_p = Node.removePri old_p inst
470
      int_s = Node.removeSec old_s inst
471
      force_p = Node.offline old_p
472
      new_nl = do -- Maybe monad
473
        new_p <- Node.addPriEx force_p int_s inst
474
        new_s <- Node.addSecEx force_p tgt_n inst old_sdx
475
        let new_inst = Instance.setBoth inst old_sdx new_sdx
476
        return (Container.add new_sdx new_s $
477
                Container.addTwo old_sdx new_p old_pdx int_p nl,
478
                new_inst, old_sdx, new_sdx)
479
  in new_nl
480

    
481
-- | Tries to allocate an instance on one given node.
482
allocateOnSingle :: Node.List -> Instance.Instance -> Ndx
483
                 -> OpResult Node.AllocElement
484
allocateOnSingle nl inst new_pdx =
485
  let p = Container.find new_pdx nl
486
      new_inst = Instance.setBoth inst new_pdx Node.noSecondary
487
  in do
488
    Instance.instMatchesPolicy inst (Node.iPolicy p)
489
    new_p <- Node.addPri p inst
490
    let new_nl = Container.add new_pdx new_p nl
491
        new_score = compCV nl
492
    return (new_nl, new_inst, [new_p], new_score)
493

    
494
-- | Tries to allocate an instance on a given pair of nodes.
495
allocateOnPair :: Node.List -> Instance.Instance -> Ndx -> Ndx
496
               -> OpResult Node.AllocElement
497
allocateOnPair nl inst new_pdx new_sdx =
498
  let tgt_p = Container.find new_pdx nl
499
      tgt_s = Container.find new_sdx nl
500
  in do
501
    Instance.instMatchesPolicy inst (Node.iPolicy tgt_p)
502
    new_p <- Node.addPri tgt_p inst
503
    new_s <- Node.addSec tgt_s inst new_pdx
504
    let new_inst = Instance.setBoth inst new_pdx new_sdx
505
        new_nl = Container.addTwo new_pdx new_p new_sdx new_s nl
506
    return (new_nl, new_inst, [new_p, new_s], compCV new_nl)
507

    
508
-- | Tries to perform an instance move and returns the best table
509
-- between the original one and the new one.
510
checkSingleStep :: Table -- ^ The original table
511
                -> Instance.Instance -- ^ The instance to move
512
                -> Table -- ^ The current best table
513
                -> IMove -- ^ The move to apply
514
                -> Table -- ^ The final best table
515
checkSingleStep ini_tbl target cur_tbl move =
516
  let Table ini_nl ini_il _ ini_plc = ini_tbl
517
      tmp_resu = applyMove ini_nl target move
518
  in case tmp_resu of
519
       OpFail _ -> cur_tbl
520
       OpGood (upd_nl, new_inst, pri_idx, sec_idx) ->
521
         let tgt_idx = Instance.idx target
522
             upd_cvar = compCV upd_nl
523
             upd_il = Container.add tgt_idx new_inst ini_il
524
             upd_plc = (tgt_idx, pri_idx, sec_idx, move, upd_cvar):ini_plc
525
             upd_tbl = Table upd_nl upd_il upd_cvar upd_plc
526
         in compareTables cur_tbl upd_tbl
527

    
528
-- | Given the status of the current secondary as a valid new node and
529
-- the current candidate target node, generate the possible moves for
530
-- a instance.
531
possibleMoves :: MirrorType -- ^ The mirroring type of the instance
532
              -> Bool       -- ^ Whether the secondary node is a valid new node
533
              -> Bool       -- ^ Whether we can change the primary node
534
              -> Ndx        -- ^ Target node candidate
535
              -> [IMove]    -- ^ List of valid result moves
536

    
537
possibleMoves MirrorNone _ _ _ = []
538

    
539
possibleMoves MirrorExternal _ False _ = []
540

    
541
possibleMoves MirrorExternal _ True tdx =
542
  [ FailoverToAny tdx ]
543

    
544
possibleMoves MirrorInternal _ False tdx =
545
  [ ReplaceSecondary tdx ]
546

    
547
possibleMoves MirrorInternal True True tdx =
548
  [ ReplaceSecondary tdx
549
  , ReplaceAndFailover tdx
550
  , ReplacePrimary tdx
551
  , FailoverAndReplace tdx
552
  ]
553

    
554
possibleMoves MirrorInternal False True tdx =
555
  [ ReplaceSecondary tdx
556
  , ReplaceAndFailover tdx
557
  ]
558

    
559
-- | Compute the best move for a given instance.
560
checkInstanceMove :: [Ndx]             -- ^ Allowed target node indices
561
                  -> Bool              -- ^ Whether disk moves are allowed
562
                  -> Bool              -- ^ Whether instance moves are allowed
563
                  -> Table             -- ^ Original table
564
                  -> Instance.Instance -- ^ Instance to move
565
                  -> Table             -- ^ Best new table for this instance
566
checkInstanceMove nodes_idx disk_moves inst_moves ini_tbl target =
567
  let opdx = Instance.pNode target
568
      osdx = Instance.sNode target
569
      bad_nodes = [opdx, osdx]
570
      nodes = filter (`notElem` bad_nodes) nodes_idx
571
      mir_type = Instance.mirrorType target
572
      use_secondary = elem osdx nodes_idx && inst_moves
573
      aft_failover = if mir_type == MirrorInternal && use_secondary
574
                       -- if drbd and allowed to failover
575
                       then checkSingleStep ini_tbl target ini_tbl Failover
576
                       else ini_tbl
577
      all_moves =
578
        if disk_moves
579
          then concatMap (possibleMoves mir_type use_secondary inst_moves)
580
               nodes
581
          else []
582
    in
583
      -- iterate over the possible nodes for this instance
584
      foldl' (checkSingleStep ini_tbl target) aft_failover all_moves
585

    
586
-- | Compute the best next move.
587
checkMove :: [Ndx]               -- ^ Allowed target node indices
588
          -> Bool                -- ^ Whether disk moves are allowed
589
          -> Bool                -- ^ Whether instance moves are allowed
590
          -> Table               -- ^ The current solution
591
          -> [Instance.Instance] -- ^ List of instances still to move
592
          -> Table               -- ^ The new solution
593
checkMove nodes_idx disk_moves inst_moves ini_tbl victims =
594
  let Table _ _ _ ini_plc = ini_tbl
595
      -- we're using rwhnf from the Control.Parallel.Strategies
596
      -- package; we don't need to use rnf as that would force too
597
      -- much evaluation in single-threaded cases, and in
598
      -- multi-threaded case the weak head normal form is enough to
599
      -- spark the evaluation
600
      tables = parMap rwhnf (checkInstanceMove nodes_idx disk_moves
601
                             inst_moves ini_tbl)
602
               victims
603
      -- iterate over all instances, computing the best move
604
      best_tbl = foldl' compareTables ini_tbl tables
605
      Table _ _ _ best_plc = best_tbl
606
  in if length best_plc == length ini_plc
607
       then ini_tbl -- no advancement
608
       else best_tbl
609

    
610
-- | Check if we are allowed to go deeper in the balancing.
611
doNextBalance :: Table     -- ^ The starting table
612
              -> Int       -- ^ Remaining length
613
              -> Score     -- ^ Score at which to stop
614
              -> Bool      -- ^ The resulting table and commands
615
doNextBalance ini_tbl max_rounds min_score =
616
  let Table _ _ ini_cv ini_plc = ini_tbl
617
      ini_plc_len = length ini_plc
618
  in (max_rounds < 0 || ini_plc_len < max_rounds) && ini_cv > min_score
619

    
620
-- | Run a balance move.
621
tryBalance :: Table       -- ^ The starting table
622
           -> Bool        -- ^ Allow disk moves
623
           -> Bool        -- ^ Allow instance moves
624
           -> Bool        -- ^ Only evacuate moves
625
           -> Score       -- ^ Min gain threshold
626
           -> Score       -- ^ Min gain
627
           -> Maybe Table -- ^ The resulting table and commands
628
tryBalance ini_tbl disk_moves inst_moves evac_mode mg_limit min_gain =
629
    let Table ini_nl ini_il ini_cv _ = ini_tbl
630
        all_inst = Container.elems ini_il
631
        all_nodes = Container.elems ini_nl
632
        (offline_nodes, online_nodes) = partition Node.offline all_nodes
633
        all_inst' = if evac_mode
634
                      then let bad_nodes = map Node.idx offline_nodes
635
                           in filter (any (`elem` bad_nodes) .
636
                                          Instance.allNodes) all_inst
637
                      else all_inst
638
        reloc_inst = filter Instance.movable all_inst'
639
        node_idx = map Node.idx online_nodes
640
        fin_tbl = checkMove node_idx disk_moves inst_moves ini_tbl reloc_inst
641
        (Table _ _ fin_cv _) = fin_tbl
642
    in
643
      if fin_cv < ini_cv && (ini_cv > mg_limit || ini_cv - fin_cv >= min_gain)
644
      then Just fin_tbl -- this round made success, return the new table
645
      else Nothing
646

    
647
-- * Allocation functions
648

    
649
-- | Build failure stats out of a list of failures.
650
collapseFailures :: [FailMode] -> FailStats
651
collapseFailures flst =
652
    map (\k -> (k, foldl' (\a e -> if e == k then a + 1 else a) 0 flst))
653
            [minBound..maxBound]
654

    
655
-- | Compares two Maybe AllocElement and chooses the besst score.
656
bestAllocElement :: Maybe Node.AllocElement
657
                 -> Maybe Node.AllocElement
658
                 -> Maybe Node.AllocElement
659
bestAllocElement a Nothing = a
660
bestAllocElement Nothing b = b
661
bestAllocElement a@(Just (_, _, _, ascore)) b@(Just (_, _, _, bscore)) =
662
  if ascore < bscore then a else b
663

    
664
-- | Update current Allocation solution and failure stats with new
665
-- elements.
666
concatAllocs :: AllocSolution -> OpResult Node.AllocElement -> AllocSolution
667
concatAllocs as (OpFail reason) = as { asFailures = reason : asFailures as }
668

    
669
concatAllocs as (OpGood ns) =
670
  let -- Choose the old or new solution, based on the cluster score
671
    cntok = asAllocs as
672
    osols = asSolution as
673
    nsols = bestAllocElement osols (Just ns)
674
    nsuc = cntok + 1
675
    -- Note: we force evaluation of nsols here in order to keep the
676
    -- memory profile low - we know that we will need nsols for sure
677
    -- in the next cycle, so we force evaluation of nsols, since the
678
    -- foldl' in the caller will only evaluate the tuple, but not the
679
    -- elements of the tuple
680
  in nsols `seq` nsuc `seq` as { asAllocs = nsuc, asSolution = nsols }
681

    
682
-- | Sums two 'AllocSolution' structures.
683
sumAllocs :: AllocSolution -> AllocSolution -> AllocSolution
684
sumAllocs (AllocSolution aFails aAllocs aSols aLog)
685
          (AllocSolution bFails bAllocs bSols bLog) =
686
  -- note: we add b first, since usually it will be smaller; when
687
  -- fold'ing, a will grow and grow whereas b is the per-group
688
  -- result, hence smaller
689
  let nFails  = bFails ++ aFails
690
      nAllocs = aAllocs + bAllocs
691
      nSols   = bestAllocElement aSols bSols
692
      nLog    = bLog ++ aLog
693
  in AllocSolution nFails nAllocs nSols nLog
694

    
695
-- | Given a solution, generates a reasonable description for it.
696
describeSolution :: AllocSolution -> String
697
describeSolution as =
698
  let fcnt = asFailures as
699
      sols = asSolution as
700
      freasons =
701
        intercalate ", " . map (\(a, b) -> printf "%s: %d" (show a) b) .
702
        filter ((> 0) . snd) . collapseFailures $ fcnt
703
  in case sols of
704
     Nothing -> "No valid allocation solutions, failure reasons: " ++
705
                (if null fcnt then "unknown reasons" else freasons)
706
     Just (_, _, nodes, cv) ->
707
         printf ("score: %.8f, successes %d, failures %d (%s)" ++
708
                 " for node(s) %s") cv (asAllocs as) (length fcnt) freasons
709
               (intercalate "/" . map Node.name $ nodes)
710

    
711
-- | Annotates a solution with the appropriate string.
712
annotateSolution :: AllocSolution -> AllocSolution
713
annotateSolution as = as { asLog = describeSolution as : asLog as }
714

    
715
-- | Reverses an evacuation solution.
716
--
717
-- Rationale: we always concat the results to the top of the lists, so
718
-- for proper jobset execution, we should reverse all lists.
719
reverseEvacSolution :: EvacSolution -> EvacSolution
720
reverseEvacSolution (EvacSolution f m o) =
721
  EvacSolution (reverse f) (reverse m) (reverse o)
722

    
723
-- | Generate the valid node allocation singles or pairs for a new instance.
724
genAllocNodes :: Group.List        -- ^ Group list
725
              -> Node.List         -- ^ The node map
726
              -> Int               -- ^ The number of nodes required
727
              -> Bool              -- ^ Whether to drop or not
728
                                   -- unallocable nodes
729
              -> Result AllocNodes -- ^ The (monadic) result
730
genAllocNodes gl nl count drop_unalloc =
731
  let filter_fn = if drop_unalloc
732
                    then filter (Group.isAllocable .
733
                                 flip Container.find gl . Node.group)
734
                    else id
735
      all_nodes = filter_fn $ getOnline nl
736
      all_pairs = [(Node.idx p,
737
                    [Node.idx s | s <- all_nodes,
738
                                       Node.idx p /= Node.idx s,
739
                                       Node.group p == Node.group s]) |
740
                   p <- all_nodes]
741
  in case count of
742
       1 -> Ok (Left (map Node.idx all_nodes))
743
       2 -> Ok (Right (filter (not . null . snd) all_pairs))
744
       _ -> Bad "Unsupported number of nodes, only one or two  supported"
745

    
746
-- | Try to allocate an instance on the cluster.
747
tryAlloc :: (Monad m) =>
748
            Node.List         -- ^ The node list
749
         -> Instance.List     -- ^ The instance list
750
         -> Instance.Instance -- ^ The instance to allocate
751
         -> AllocNodes        -- ^ The allocation targets
752
         -> m AllocSolution   -- ^ Possible solution list
753
tryAlloc _  _ _    (Right []) = fail "Not enough online nodes"
754
tryAlloc nl _ inst (Right ok_pairs) =
755
  let psols = parMap rwhnf (\(p, ss) ->
756
                              foldl' (\cstate ->
757
                                        concatAllocs cstate .
758
                                        allocateOnPair nl inst p)
759
                              emptyAllocSolution ss) ok_pairs
760
      sols = foldl' sumAllocs emptyAllocSolution psols
761
  in return $ annotateSolution sols
762

    
763
tryAlloc _  _ _    (Left []) = fail "No online nodes"
764
tryAlloc nl _ inst (Left all_nodes) =
765
  let sols = foldl' (\cstate ->
766
                       concatAllocs cstate . allocateOnSingle nl inst
767
                    ) emptyAllocSolution all_nodes
768
  in return $ annotateSolution sols
769

    
770
-- | Given a group/result, describe it as a nice (list of) messages.
771
solutionDescription :: Group.List -> (Gdx, Result AllocSolution) -> [String]
772
solutionDescription gl (groupId, result) =
773
  case result of
774
    Ok solution -> map (printf "Group %s (%s): %s" gname pol) (asLog solution)
775
    Bad message -> [printf "Group %s: error %s" gname message]
776
  where grp = Container.find groupId gl
777
        gname = Group.name grp
778
        pol = allocPolicyToRaw (Group.allocPolicy grp)
779

    
780
-- | From a list of possibly bad and possibly empty solutions, filter
781
-- only the groups with a valid result. Note that the result will be
782
-- reversed compared to the original list.
783
filterMGResults :: Group.List
784
                -> [(Gdx, Result AllocSolution)]
785
                -> [(Gdx, AllocSolution)]
786
filterMGResults gl = foldl' fn []
787
  where unallocable = not . Group.isAllocable . flip Container.find gl
788
        fn accu (gdx, rasol) =
789
          case rasol of
790
            Bad _ -> accu
791
            Ok sol | isNothing (asSolution sol) -> accu
792
                   | unallocable gdx -> accu
793
                   | otherwise -> (gdx, sol):accu
794

    
795
-- | Sort multigroup results based on policy and score.
796
sortMGResults :: Group.List
797
             -> [(Gdx, AllocSolution)]
798
             -> [(Gdx, AllocSolution)]
799
sortMGResults gl sols =
800
  let extractScore (_, _, _, x) = x
801
      solScore (gdx, sol) = (Group.allocPolicy (Container.find gdx gl),
802
                             (extractScore . fromJust . asSolution) sol)
803
  in sortBy (comparing solScore) sols
804

    
805
-- | Finds the best group for an instance on a multi-group cluster.
806
--
807
-- Only solutions in @preferred@ and @last_resort@ groups will be
808
-- accepted as valid, and additionally if the allowed groups parameter
809
-- is not null then allocation will only be run for those group
810
-- indices.
811
findBestAllocGroup :: Group.List           -- ^ The group list
812
                   -> Node.List            -- ^ The node list
813
                   -> Instance.List        -- ^ The instance list
814
                   -> Maybe [Gdx]          -- ^ The allowed groups
815
                   -> Instance.Instance    -- ^ The instance to allocate
816
                   -> Int                  -- ^ Required number of nodes
817
                   -> Result (Gdx, AllocSolution, [String])
818
findBestAllocGroup mggl mgnl mgil allowed_gdxs inst cnt =
819
  let groups = splitCluster mgnl mgil
820
      groups' = maybe groups (\gs -> filter ((`elem` gs) . fst) groups)
821
                allowed_gdxs
822
      sols = map (\(gid, (nl, il)) ->
823
                   (gid, genAllocNodes mggl nl cnt False >>=
824
                       tryAlloc nl il inst))
825
             groups'::[(Gdx, Result AllocSolution)]
826
      all_msgs = concatMap (solutionDescription mggl) sols
827
      goodSols = filterMGResults mggl sols
828
      sortedSols = sortMGResults mggl goodSols
829
  in if null sortedSols
830
       then if null groups'
831
              then Bad $ "no groups for evacuation: allowed groups was" ++
832
                     show allowed_gdxs ++ ", all groups: " ++
833
                     show (map fst groups)
834
              else Bad $ intercalate ", " all_msgs
835
       else let (final_group, final_sol) = head sortedSols
836
            in return (final_group, final_sol, all_msgs)
837

    
838
-- | Try to allocate an instance on a multi-group cluster.
839
tryMGAlloc :: Group.List           -- ^ The group list
840
           -> Node.List            -- ^ The node list
841
           -> Instance.List        -- ^ The instance list
842
           -> Instance.Instance    -- ^ The instance to allocate
843
           -> Int                  -- ^ Required number of nodes
844
           -> Result AllocSolution -- ^ Possible solution list
845
tryMGAlloc mggl mgnl mgil inst cnt = do
846
  (best_group, solution, all_msgs) <-
847
      findBestAllocGroup mggl mgnl mgil Nothing inst cnt
848
  let group_name = Group.name $ Container.find best_group mggl
849
      selmsg = "Selected group: " ++ group_name
850
  return $ solution { asLog = selmsg:all_msgs }
851

    
852
-- | Calculate the new instance list after allocation solution.
853
updateIl :: Instance.List           -- ^ The original instance list
854
         -> Maybe Node.AllocElement -- ^ The result of the allocation attempt
855
         -> Instance.List           -- ^ The updated instance list
856
updateIl il Nothing = il
857
updateIl il (Just (_, xi, _, _)) = Container.add (Container.size il) xi il
858

    
859
-- | Extract the the new node list from the allocation solution.
860
extractNl :: Node.List               -- ^ The original node list
861
          -> Maybe Node.AllocElement -- ^ The result of the allocation attempt
862
          -> Node.List               -- ^ The new node list
863
extractNl nl Nothing = nl
864
extractNl _ (Just (xnl, _, _, _)) = xnl
865

    
866
-- | Try to allocate a list of instances on a multi-group cluster.
867
allocList :: Group.List                  -- ^ The group list
868
          -> Node.List                   -- ^ The node list
869
          -> Instance.List               -- ^ The instance list
870
          -> [(Instance.Instance, Int)]  -- ^ The instance to allocate
871
          -> AllocSolutionList           -- ^ Possible solution list
872
          -> Result (Node.List, Instance.List,
873
                     AllocSolutionList)  -- ^ The final solution list
874
allocList _  nl il [] result = Ok (nl, il, result)
875
allocList gl nl il ((xi, xicnt):xies) result = do
876
  ares <- tryMGAlloc gl nl il xi xicnt
877
  let sol = asSolution ares
878
      nl' = extractNl nl sol
879
      il' = updateIl il sol
880
  allocList gl nl' il' xies ((xi, ares):result)
881

    
882
-- | Function which fails if the requested mode is change secondary.
883
--
884
-- This is useful since except DRBD, no other disk template can
885
-- execute change secondary; thus, we can just call this function
886
-- instead of always checking for secondary mode. After the call to
887
-- this function, whatever mode we have is just a primary change.
888
failOnSecondaryChange :: (Monad m) => EvacMode -> DiskTemplate -> m ()
889
failOnSecondaryChange ChangeSecondary dt =
890
  fail $ "Instances with disk template '" ++ diskTemplateToRaw dt ++
891
         "' can't execute change secondary"
892
failOnSecondaryChange _ _ = return ()
893

    
894
-- | Run evacuation for a single instance.
895
--
896
-- /Note:/ this function should correctly execute both intra-group
897
-- evacuations (in all modes) and inter-group evacuations (in the
898
-- 'ChangeAll' mode). Of course, this requires that the correct list
899
-- of target nodes is passed.
900
nodeEvacInstance :: Node.List         -- ^ The node list (cluster-wide)
901
                 -> Instance.List     -- ^ Instance list (cluster-wide)
902
                 -> EvacMode          -- ^ The evacuation mode
903
                 -> Instance.Instance -- ^ The instance to be evacuated
904
                 -> Gdx               -- ^ The group we're targetting
905
                 -> [Ndx]             -- ^ The list of available nodes
906
                                      -- for allocation
907
                 -> Result (Node.List, Instance.List, [OpCodes.OpCode])
908
nodeEvacInstance nl il mode inst@(Instance.Instance
909
                                  {Instance.diskTemplate = dt@DTDiskless})
910
                 gdx avail_nodes =
911
                   failOnSecondaryChange mode dt >>
912
                   evacOneNodeOnly nl il inst gdx avail_nodes
913

    
914
nodeEvacInstance _ _ _ (Instance.Instance
915
                        {Instance.diskTemplate = DTPlain}) _ _ =
916
                  fail "Instances of type plain cannot be relocated"
917

    
918
nodeEvacInstance _ _ _ (Instance.Instance
919
                        {Instance.diskTemplate = DTFile}) _ _ =
920
                  fail "Instances of type file cannot be relocated"
921

    
922
nodeEvacInstance nl il mode inst@(Instance.Instance
923
                                  {Instance.diskTemplate = dt@DTSharedFile})
924
                 gdx avail_nodes =
925
                   failOnSecondaryChange mode dt >>
926
                   evacOneNodeOnly nl il inst gdx avail_nodes
927

    
928
nodeEvacInstance nl il mode inst@(Instance.Instance
929
                                  {Instance.diskTemplate = dt@DTBlock})
930
                 gdx avail_nodes =
931
                   failOnSecondaryChange mode dt >>
932
                   evacOneNodeOnly nl il inst gdx avail_nodes
933

    
934
nodeEvacInstance nl il mode inst@(Instance.Instance
935
                                  {Instance.diskTemplate = dt@DTRbd})
936
                 gdx avail_nodes =
937
                   failOnSecondaryChange mode dt >>
938
                   evacOneNodeOnly nl il inst gdx avail_nodes
939

    
940
nodeEvacInstance nl il ChangePrimary
941
                 inst@(Instance.Instance {Instance.diskTemplate = DTDrbd8})
942
                 _ _ =
943
  do
944
    (nl', inst', _, _) <- opToResult $ applyMove nl inst Failover
945
    let idx = Instance.idx inst
946
        il' = Container.add idx inst' il
947
        ops = iMoveToJob nl' il' idx Failover
948
    return (nl', il', ops)
949

    
950
nodeEvacInstance nl il ChangeSecondary
951
                 inst@(Instance.Instance {Instance.diskTemplate = DTDrbd8})
952
                 gdx avail_nodes =
953
  evacOneNodeOnly nl il inst gdx avail_nodes
954

    
955
-- The algorithm for ChangeAll is as follows:
956
--
957
-- * generate all (primary, secondary) node pairs for the target groups
958
-- * for each pair, execute the needed moves (r:s, f, r:s) and compute
959
--   the final node list state and group score
960
-- * select the best choice via a foldl that uses the same Either
961
--   String solution as the ChangeSecondary mode
962
nodeEvacInstance nl il ChangeAll
963
                 inst@(Instance.Instance {Instance.diskTemplate = DTDrbd8})
964
                 gdx avail_nodes =
965
  do
966
    let no_nodes = Left "no nodes available"
967
        node_pairs = [(p,s) | p <- avail_nodes, s <- avail_nodes, p /= s]
968
    (nl', il', ops, _) <-
969
        annotateResult "Can't find any good nodes for relocation" .
970
        eitherToResult $
971
        foldl'
972
        (\accu nodes -> case evacDrbdAllInner nl il inst gdx nodes of
973
                          Bad msg ->
974
                              case accu of
975
                                Right _ -> accu
976
                                -- we don't need more details (which
977
                                -- nodes, etc.) as we only selected
978
                                -- this group if we can allocate on
979
                                -- it, hence failures will not
980
                                -- propagate out of this fold loop
981
                                Left _ -> Left $ "Allocation failed: " ++ msg
982
                          Ok result@(_, _, _, new_cv) ->
983
                              let new_accu = Right result in
984
                              case accu of
985
                                Left _ -> new_accu
986
                                Right (_, _, _, old_cv) ->
987
                                    if old_cv < new_cv
988
                                    then accu
989
                                    else new_accu
990
        ) no_nodes node_pairs
991

    
992
    return (nl', il', ops)
993

    
994
-- | Generic function for changing one node of an instance.
995
--
996
-- This is similar to 'nodeEvacInstance' but will be used in a few of
997
-- its sub-patterns. It folds the inner function 'evacOneNodeInner'
998
-- over the list of available nodes, which results in the best choice
999
-- for relocation.
1000
evacOneNodeOnly :: Node.List         -- ^ The node list (cluster-wide)
1001
                -> Instance.List     -- ^ Instance list (cluster-wide)
1002
                -> Instance.Instance -- ^ The instance to be evacuated
1003
                -> Gdx               -- ^ The group we're targetting
1004
                -> [Ndx]             -- ^ The list of available nodes
1005
                                      -- for allocation
1006
                -> Result (Node.List, Instance.List, [OpCodes.OpCode])
1007
evacOneNodeOnly nl il inst gdx avail_nodes = do
1008
  op_fn <- case Instance.mirrorType inst of
1009
             MirrorNone -> Bad "Can't relocate/evacuate non-mirrored instances"
1010
             MirrorInternal -> Ok ReplaceSecondary
1011
             MirrorExternal -> Ok FailoverToAny
1012
  (nl', inst', _, ndx) <- annotateResult "Can't find any good node" .
1013
                          eitherToResult $
1014
                          foldl' (evacOneNodeInner nl inst gdx op_fn)
1015
                          (Left "no nodes available") avail_nodes
1016
  let idx = Instance.idx inst
1017
      il' = Container.add idx inst' il
1018
      ops = iMoveToJob nl' il' idx (op_fn ndx)
1019
  return (nl', il', ops)
1020

    
1021
-- | Inner fold function for changing one node of an instance.
1022
--
1023
-- Depending on the instance disk template, this will either change
1024
-- the secondary (for DRBD) or the primary node (for shared
1025
-- storage). However, the operation is generic otherwise.
1026
--
1027
-- The running solution is either a @Left String@, which means we
1028
-- don't have yet a working solution, or a @Right (...)@, which
1029
-- represents a valid solution; it holds the modified node list, the
1030
-- modified instance (after evacuation), the score of that solution,
1031
-- and the new secondary node index.
1032
evacOneNodeInner :: Node.List         -- ^ Cluster node list
1033
                 -> Instance.Instance -- ^ Instance being evacuated
1034
                 -> Gdx               -- ^ The group index of the instance
1035
                 -> (Ndx -> IMove)    -- ^ Operation constructor
1036
                 -> EvacInnerState    -- ^ Current best solution
1037
                 -> Ndx               -- ^ Node we're evaluating as target
1038
                 -> EvacInnerState    -- ^ New best solution
1039
evacOneNodeInner nl inst gdx op_fn accu ndx =
1040
  case applyMove nl inst (op_fn ndx) of
1041
    OpFail fm -> let fail_msg = "Node " ++ Container.nameOf nl ndx ++
1042
                                " failed: " ++ show fm
1043
                 in either (const $ Left fail_msg) (const accu) accu
1044
    OpGood (nl', inst', _, _) ->
1045
      let nodes = Container.elems nl'
1046
          -- The fromJust below is ugly (it can fail nastily), but
1047
          -- at this point we should have any internal mismatches,
1048
          -- and adding a monad here would be quite involved
1049
          grpnodes = fromJust (gdx `lookup` Node.computeGroups nodes)
1050
          new_cv = compCVNodes grpnodes
1051
          new_accu = Right (nl', inst', new_cv, ndx)
1052
      in case accu of
1053
           Left _ -> new_accu
1054
           Right (_, _, old_cv, _) ->
1055
             if old_cv < new_cv
1056
               then accu
1057
               else new_accu
1058

    
1059
-- | Compute result of changing all nodes of a DRBD instance.
1060
--
1061
-- Given the target primary and secondary node (which might be in a
1062
-- different group or not), this function will 'execute' all the
1063
-- required steps and assuming all operations succceed, will return
1064
-- the modified node and instance lists, the opcodes needed for this
1065
-- and the new group score.
1066
evacDrbdAllInner :: Node.List         -- ^ Cluster node list
1067
                 -> Instance.List     -- ^ Cluster instance list
1068
                 -> Instance.Instance -- ^ The instance to be moved
1069
                 -> Gdx               -- ^ The target group index
1070
                                      -- (which can differ from the
1071
                                      -- current group of the
1072
                                      -- instance)
1073
                 -> (Ndx, Ndx)        -- ^ Tuple of new
1074
                                      -- primary\/secondary nodes
1075
                 -> Result (Node.List, Instance.List, [OpCodes.OpCode], Score)
1076
evacDrbdAllInner nl il inst gdx (t_pdx, t_sdx) = do
1077
  let primary = Container.find (Instance.pNode inst) nl
1078
      idx = Instance.idx inst
1079
  -- if the primary is offline, then we first failover
1080
  (nl1, inst1, ops1) <-
1081
    if Node.offline primary
1082
      then do
1083
        (nl', inst', _, _) <-
1084
          annotateResult "Failing over to the secondary" .
1085
          opToResult $ applyMove nl inst Failover
1086
        return (nl', inst', [Failover])
1087
      else return (nl, inst, [])
1088
  let (o1, o2, o3) = (ReplaceSecondary t_pdx,
1089
                      Failover,
1090
                      ReplaceSecondary t_sdx)
1091
  -- we now need to execute a replace secondary to the future
1092
  -- primary node
1093
  (nl2, inst2, _, _) <-
1094
    annotateResult "Changing secondary to new primary" .
1095
    opToResult $
1096
    applyMove nl1 inst1 o1
1097
  let ops2 = o1:ops1
1098
  -- we now execute another failover, the primary stays fixed now
1099
  (nl3, inst3, _, _) <- annotateResult "Failing over to new primary" .
1100
                        opToResult $ applyMove nl2 inst2 o2
1101
  let ops3 = o2:ops2
1102
  -- and finally another replace secondary, to the final secondary
1103
  (nl4, inst4, _, _) <-
1104
    annotateResult "Changing secondary to final secondary" .
1105
    opToResult $
1106
    applyMove nl3 inst3 o3
1107
  let ops4 = o3:ops3
1108
      il' = Container.add idx inst4 il
1109
      ops = concatMap (iMoveToJob nl4 il' idx) $ reverse ops4
1110
  let nodes = Container.elems nl4
1111
      -- The fromJust below is ugly (it can fail nastily), but
1112
      -- at this point we should have any internal mismatches,
1113
      -- and adding a monad here would be quite involved
1114
      grpnodes = fromJust (gdx `lookup` Node.computeGroups nodes)
1115
      new_cv = compCVNodes grpnodes
1116
  return (nl4, il', ops, new_cv)
1117

    
1118
-- | Computes the nodes in a given group which are available for
1119
-- allocation.
1120
availableGroupNodes :: [(Gdx, [Ndx])] -- ^ Group index/node index assoc list
1121
                    -> IntSet.IntSet  -- ^ Nodes that are excluded
1122
                    -> Gdx            -- ^ The group for which we
1123
                                      -- query the nodes
1124
                    -> Result [Ndx]   -- ^ List of available node indices
1125
availableGroupNodes group_nodes excl_ndx gdx = do
1126
  local_nodes <- maybe (Bad $ "Can't find group with index " ++ show gdx)
1127
                 Ok (lookup gdx group_nodes)
1128
  let avail_nodes = filter (not . flip IntSet.member excl_ndx) local_nodes
1129
  return avail_nodes
1130

    
1131
-- | Updates the evac solution with the results of an instance
1132
-- evacuation.
1133
updateEvacSolution :: (Node.List, Instance.List, EvacSolution)
1134
                   -> Idx
1135
                   -> Result (Node.List, Instance.List, [OpCodes.OpCode])
1136
                   -> (Node.List, Instance.List, EvacSolution)
1137
updateEvacSolution (nl, il, es) idx (Bad msg) =
1138
  (nl, il, es { esFailed = (idx, msg):esFailed es})
1139
updateEvacSolution (_, _, es) idx (Ok (nl, il, opcodes)) =
1140
  (nl, il, es { esMoved = new_elem:esMoved es
1141
              , esOpCodes = opcodes:esOpCodes es })
1142
    where inst = Container.find idx il
1143
          new_elem = (idx,
1144
                      instancePriGroup nl inst,
1145
                      Instance.allNodes inst)
1146

    
1147
-- | Node-evacuation IAllocator mode main function.
1148
tryNodeEvac :: Group.List    -- ^ The cluster groups
1149
            -> Node.List     -- ^ The node list (cluster-wide, not per group)
1150
            -> Instance.List -- ^ Instance list (cluster-wide)
1151
            -> EvacMode      -- ^ The evacuation mode
1152
            -> [Idx]         -- ^ List of instance (indices) to be evacuated
1153
            -> Result (Node.List, Instance.List, EvacSolution)
1154
tryNodeEvac _ ini_nl ini_il mode idxs =
1155
  let evac_ndx = nodesToEvacuate ini_il mode idxs
1156
      offline = map Node.idx . filter Node.offline $ Container.elems ini_nl
1157
      excl_ndx = foldl' (flip IntSet.insert) evac_ndx offline
1158
      group_ndx = map (\(gdx, (nl, _)) -> (gdx, map Node.idx
1159
                                           (Container.elems nl))) $
1160
                  splitCluster ini_nl ini_il
1161
      (fin_nl, fin_il, esol) =
1162
        foldl' (\state@(nl, il, _) inst ->
1163
                  let gdx = instancePriGroup nl inst
1164
                      pdx = Instance.pNode inst in
1165
                  updateEvacSolution state (Instance.idx inst) $
1166
                  availableGroupNodes group_ndx
1167
                    (IntSet.insert pdx excl_ndx) gdx >>=
1168
                      nodeEvacInstance nl il mode inst gdx
1169
               )
1170
        (ini_nl, ini_il, emptyEvacSolution)
1171
        (map (`Container.find` ini_il) idxs)
1172
  in return (fin_nl, fin_il, reverseEvacSolution esol)
1173

    
1174
-- | Change-group IAllocator mode main function.
1175
--
1176
-- This is very similar to 'tryNodeEvac', the only difference is that
1177
-- we don't choose as target group the current instance group, but
1178
-- instead:
1179
--
1180
--   1. at the start of the function, we compute which are the target
1181
--   groups; either no groups were passed in, in which case we choose
1182
--   all groups out of which we don't evacuate instance, or there were
1183
--   some groups passed, in which case we use those
1184
--
1185
--   2. for each instance, we use 'findBestAllocGroup' to choose the
1186
--   best group to hold the instance, and then we do what
1187
--   'tryNodeEvac' does, except for this group instead of the current
1188
--   instance group.
1189
--
1190
-- Note that the correct behaviour of this function relies on the
1191
-- function 'nodeEvacInstance' to be able to do correctly both
1192
-- intra-group and inter-group moves when passed the 'ChangeAll' mode.
1193
tryChangeGroup :: Group.List    -- ^ The cluster groups
1194
               -> Node.List     -- ^ The node list (cluster-wide)
1195
               -> Instance.List -- ^ Instance list (cluster-wide)
1196
               -> [Gdx]         -- ^ Target groups; if empty, any
1197
                                -- groups not being evacuated
1198
               -> [Idx]         -- ^ List of instance (indices) to be evacuated
1199
               -> Result (Node.List, Instance.List, EvacSolution)
1200
tryChangeGroup gl ini_nl ini_il gdxs idxs =
1201
  let evac_gdxs = nub $ map (instancePriGroup ini_nl .
1202
                             flip Container.find ini_il) idxs
1203
      target_gdxs = (if null gdxs
1204
                       then Container.keys gl
1205
                       else gdxs) \\ evac_gdxs
1206
      offline = map Node.idx . filter Node.offline $ Container.elems ini_nl
1207
      excl_ndx = foldl' (flip IntSet.insert) IntSet.empty offline
1208
      group_ndx = map (\(gdx, (nl, _)) -> (gdx, map Node.idx
1209
                                           (Container.elems nl))) $
1210
                  splitCluster ini_nl ini_il
1211
      (fin_nl, fin_il, esol) =
1212
        foldl' (\state@(nl, il, _) inst ->
1213
                  let solution = do
1214
                        let ncnt = Instance.requiredNodes $
1215
                                   Instance.diskTemplate inst
1216
                        (gdx, _, _) <- findBestAllocGroup gl nl il
1217
                                       (Just target_gdxs) inst ncnt
1218
                        av_nodes <- availableGroupNodes group_ndx
1219
                                    excl_ndx gdx
1220
                        nodeEvacInstance nl il ChangeAll inst gdx av_nodes
1221
                  in updateEvacSolution state (Instance.idx inst) solution
1222
               )
1223
        (ini_nl, ini_il, emptyEvacSolution)
1224
        (map (`Container.find` ini_il) idxs)
1225
  in return (fin_nl, fin_il, reverseEvacSolution esol)
1226

    
1227
-- | Standard-sized allocation method.
1228
--
1229
-- This places instances of the same size on the cluster until we're
1230
-- out of space. The result will be a list of identically-sized
1231
-- instances.
1232
iterateAlloc :: AllocMethod
1233
iterateAlloc nl il limit newinst allocnodes ixes cstats =
1234
  let depth = length ixes
1235
      newname = printf "new-%d" depth::String
1236
      newidx = Container.size il
1237
      newi2 = Instance.setIdx (Instance.setName newinst newname) newidx
1238
      newlimit = fmap (flip (-) 1) limit
1239
  in case tryAlloc nl il newi2 allocnodes of
1240
       Bad s -> Bad s
1241
       Ok (AllocSolution { asFailures = errs, asSolution = sols3 }) ->
1242
         let newsol = Ok (collapseFailures errs, nl, il, ixes, cstats) in
1243
         case sols3 of
1244
           Nothing -> newsol
1245
           Just (xnl, xi, _, _) ->
1246
             if limit == Just 0
1247
               then newsol
1248
               else iterateAlloc xnl (Container.add newidx xi il)
1249
                      newlimit newinst allocnodes (xi:ixes)
1250
                      (totalResources xnl:cstats)
1251

    
1252
-- | Tiered allocation method.
1253
--
1254
-- This places instances on the cluster, and decreases the spec until
1255
-- we can allocate again. The result will be a list of decreasing
1256
-- instance specs.
1257
tieredAlloc :: AllocMethod
1258
tieredAlloc nl il limit newinst allocnodes ixes cstats =
1259
  case iterateAlloc nl il limit newinst allocnodes ixes cstats of
1260
    Bad s -> Bad s
1261
    Ok (errs, nl', il', ixes', cstats') ->
1262
      let newsol = Ok (errs, nl', il', ixes', cstats')
1263
          ixes_cnt = length ixes'
1264
          (stop, newlimit) = case limit of
1265
                               Nothing -> (False, Nothing)
1266
                               Just n -> (n <= ixes_cnt,
1267
                                            Just (n - ixes_cnt)) in
1268
      if stop then newsol else
1269
          case Instance.shrinkByType newinst . fst . last $
1270
               sortBy (comparing snd) errs of
1271
            Bad _ -> newsol
1272
            Ok newinst' -> tieredAlloc nl' il' newlimit
1273
                           newinst' allocnodes ixes' cstats'
1274

    
1275
-- * Formatting functions
1276

    
1277
-- | Given the original and final nodes, computes the relocation description.
1278
computeMoves :: Instance.Instance -- ^ The instance to be moved
1279
             -> String -- ^ The instance name
1280
             -> IMove  -- ^ The move being performed
1281
             -> String -- ^ New primary
1282
             -> String -- ^ New secondary
1283
             -> (String, [String])
1284
                -- ^ Tuple of moves and commands list; moves is containing
1285
                -- either @/f/@ for failover or @/r:name/@ for replace
1286
                -- secondary, while the command list holds gnt-instance
1287
                -- commands (without that prefix), e.g \"@failover instance1@\"
1288
computeMoves i inam mv c d =
1289
  case mv of
1290
    Failover -> ("f", [mig])
1291
    FailoverToAny _ -> (printf "fa:%s" c, [mig_any])
1292
    FailoverAndReplace _ -> (printf "f r:%s" d, [mig, rep d])
1293
    ReplaceSecondary _ -> (printf "r:%s" d, [rep d])
1294
    ReplaceAndFailover _ -> (printf "r:%s f" c, [rep c, mig])
1295
    ReplacePrimary _ -> (printf "f r:%s f" c, [mig, rep c, mig])
1296
  where morf = if Instance.isRunning i then "migrate" else "failover"
1297
        mig = printf "%s -f %s" morf inam::String
1298
        mig_any = printf "%s -f -n %s %s" morf c inam::String
1299
        rep n = printf "replace-disks -n %s %s" n inam::String
1300

    
1301
-- | Converts a placement to string format.
1302
printSolutionLine :: Node.List     -- ^ The node list
1303
                  -> Instance.List -- ^ The instance list
1304
                  -> Int           -- ^ Maximum node name length
1305
                  -> Int           -- ^ Maximum instance name length
1306
                  -> Placement     -- ^ The current placement
1307
                  -> Int           -- ^ The index of the placement in
1308
                                   -- the solution
1309
                  -> (String, [String])
1310
printSolutionLine nl il nmlen imlen plc pos =
1311
  let pmlen = (2*nmlen + 1)
1312
      (i, p, s, mv, c) = plc
1313
      old_sec = Instance.sNode inst
1314
      inst = Container.find i il
1315
      inam = Instance.alias inst
1316
      npri = Node.alias $ Container.find p nl
1317
      nsec = Node.alias $ Container.find s nl
1318
      opri = Node.alias $ Container.find (Instance.pNode inst) nl
1319
      osec = Node.alias $ Container.find old_sec nl
1320
      (moves, cmds) =  computeMoves inst inam mv npri nsec
1321
      -- FIXME: this should check instead/also the disk template
1322
      ostr = if old_sec == Node.noSecondary
1323
               then printf "%s" opri::String
1324
               else printf "%s:%s" opri osec::String
1325
      nstr = if s == Node.noSecondary
1326
               then printf "%s" npri::String
1327
               else printf "%s:%s" npri nsec::String
1328
  in (printf "  %3d. %-*s %-*s => %-*s %12.8f a=%s"
1329
      pos imlen inam pmlen ostr pmlen nstr c moves,
1330
      cmds)
1331

    
1332
-- | Return the instance and involved nodes in an instance move.
1333
--
1334
-- Note that the output list length can vary, and is not required nor
1335
-- guaranteed to be of any specific length.
1336
involvedNodes :: Instance.List -- ^ Instance list, used for retrieving
1337
                               -- the instance from its index; note
1338
                               -- that this /must/ be the original
1339
                               -- instance list, so that we can
1340
                               -- retrieve the old nodes
1341
              -> Placement     -- ^ The placement we're investigating,
1342
                               -- containing the new nodes and
1343
                               -- instance index
1344
              -> [Ndx]         -- ^ Resulting list of node indices
1345
involvedNodes il plc =
1346
  let (i, np, ns, _, _) = plc
1347
      inst = Container.find i il
1348
  in nub $ [np, ns] ++ Instance.allNodes inst
1349

    
1350
-- | Inner function for splitJobs, that either appends the next job to
1351
-- the current jobset, or starts a new jobset.
1352
mergeJobs :: ([JobSet], [Ndx]) -> MoveJob -> ([JobSet], [Ndx])
1353
mergeJobs ([], _) n@(ndx, _, _, _) = ([[n]], ndx)
1354
mergeJobs (cjs@(j:js), nbuf) n@(ndx, _, _, _)
1355
  | null (ndx `intersect` nbuf) = ((n:j):js, ndx ++ nbuf)
1356
  | otherwise = ([n]:cjs, ndx)
1357

    
1358
-- | Break a list of moves into independent groups. Note that this
1359
-- will reverse the order of jobs.
1360
splitJobs :: [MoveJob] -> [JobSet]
1361
splitJobs = fst . foldl mergeJobs ([], [])
1362

    
1363
-- | Given a list of commands, prefix them with @gnt-instance@ and
1364
-- also beautify the display a little.
1365
formatJob :: Int -> Int -> (Int, MoveJob) -> [String]
1366
formatJob jsn jsl (sn, (_, _, _, cmds)) =
1367
  let out =
1368
        printf "  echo job %d/%d" jsn sn:
1369
        printf "  check":
1370
        map ("  gnt-instance " ++) cmds
1371
  in if sn == 1
1372
       then ["", printf "echo jobset %d, %d jobs" jsn jsl] ++ out
1373
       else out
1374

    
1375
-- | Given a list of commands, prefix them with @gnt-instance@ and
1376
-- also beautify the display a little.
1377
formatCmds :: [JobSet] -> String
1378
formatCmds =
1379
  unlines .
1380
  concatMap (\(jsn, js) -> concatMap (formatJob jsn (length js))
1381
                           (zip [1..] js)) .
1382
  zip [1..]
1383

    
1384
-- | Print the node list.
1385
printNodes :: Node.List -> [String] -> String
1386
printNodes nl fs =
1387
  let fields = case fs of
1388
                 [] -> Node.defaultFields
1389
                 "+":rest -> Node.defaultFields ++ rest
1390
                 _ -> fs
1391
      snl = sortBy (comparing Node.idx) (Container.elems nl)
1392
      (header, isnum) = unzip $ map Node.showHeader fields
1393
  in printTable "" header (map (Node.list fields) snl) isnum
1394

    
1395
-- | Print the instance list.
1396
printInsts :: Node.List -> Instance.List -> String
1397
printInsts nl il =
1398
  let sil = sortBy (comparing Instance.idx) (Container.elems il)
1399
      helper inst = [ if Instance.isRunning inst then "R" else " "
1400
                    , Instance.name inst
1401
                    , Container.nameOf nl (Instance.pNode inst)
1402
                    , let sdx = Instance.sNode inst
1403
                      in if sdx == Node.noSecondary
1404
                           then  ""
1405
                           else Container.nameOf nl sdx
1406
                    , if Instance.autoBalance inst then "Y" else "N"
1407
                    , printf "%3d" $ Instance.vcpus inst
1408
                    , printf "%5d" $ Instance.mem inst
1409
                    , printf "%5d" $ Instance.dsk inst `div` 1024
1410
                    , printf "%5.3f" lC
1411
                    , printf "%5.3f" lM
1412
                    , printf "%5.3f" lD
1413
                    , printf "%5.3f" lN
1414
                    ]
1415
          where DynUtil lC lM lD lN = Instance.util inst
1416
      header = [ "F", "Name", "Pri_node", "Sec_node", "Auto_bal"
1417
               , "vcpu", "mem" , "dsk", "lCpu", "lMem", "lDsk", "lNet" ]
1418
      isnum = False:False:False:False:False:repeat True
1419
  in printTable "" header (map helper sil) isnum
1420

    
1421
-- | Shows statistics for a given node list.
1422
printStats :: String -> Node.List -> String
1423
printStats lp nl =
1424
  let dcvs = compDetailedCV $ Container.elems nl
1425
      (weights, names) = unzip detailedCVInfo
1426
      hd = zip3 (weights ++ repeat 1) (names ++ repeat "unknown") dcvs
1427
      header = [ "Field", "Value", "Weight" ]
1428
      formatted = map (\(w, h, val) ->
1429
                         [ h
1430
                         , printf "%.8f" val
1431
                         , printf "x%.2f" w
1432
                         ]) hd
1433
  in printTable lp header formatted $ False:repeat True
1434

    
1435
-- | Convert a placement into a list of OpCodes (basically a job).
1436
iMoveToJob :: Node.List        -- ^ The node list; only used for node
1437
                               -- names, so any version is good
1438
                               -- (before or after the operation)
1439
           -> Instance.List    -- ^ The instance list; also used for
1440
                               -- names only
1441
           -> Idx              -- ^ The index of the instance being
1442
                               -- moved
1443
           -> IMove            -- ^ The actual move to be described
1444
           -> [OpCodes.OpCode] -- ^ The list of opcodes equivalent to
1445
                               -- the given move
1446
iMoveToJob nl il idx move =
1447
  let inst = Container.find idx il
1448
      iname = Instance.name inst
1449
      lookNode  = Just . Container.nameOf nl
1450
      opF = OpCodes.OpInstanceMigrate iname True False True Nothing
1451
      opFA n = OpCodes.OpInstanceMigrate iname True False True (lookNode n)
1452
      opR n = OpCodes.OpInstanceReplaceDisks iname (lookNode n)
1453
              OpCodes.ReplaceNewSecondary [] Nothing
1454
  in case move of
1455
       Failover -> [ opF ]
1456
       FailoverToAny np -> [ opFA np ]
1457
       ReplacePrimary np -> [ opF, opR np, opF ]
1458
       ReplaceSecondary ns -> [ opR ns ]
1459
       ReplaceAndFailover np -> [ opR np, opF ]
1460
       FailoverAndReplace ns -> [ opF, opR ns ]
1461

    
1462
-- * Node group functions
1463

    
1464
-- | Computes the group of an instance.
1465
instanceGroup :: Node.List -> Instance.Instance -> Result Gdx
1466
instanceGroup nl i =
1467
  let sidx = Instance.sNode i
1468
      pnode = Container.find (Instance.pNode i) nl
1469
      snode = if sidx == Node.noSecondary
1470
              then pnode
1471
              else Container.find sidx nl
1472
      pgroup = Node.group pnode
1473
      sgroup = Node.group snode
1474
  in if pgroup /= sgroup
1475
       then fail ("Instance placed accross two node groups, primary " ++
1476
                  show pgroup ++ ", secondary " ++ show sgroup)
1477
       else return pgroup
1478

    
1479
-- | Computes the group of an instance per the primary node.
1480
instancePriGroup :: Node.List -> Instance.Instance -> Gdx
1481
instancePriGroup nl i =
1482
  let pnode = Container.find (Instance.pNode i) nl
1483
  in  Node.group pnode
1484

    
1485
-- | Compute the list of badly allocated instances (split across node
1486
-- groups).
1487
findSplitInstances :: Node.List -> Instance.List -> [Instance.Instance]
1488
findSplitInstances nl =
1489
  filter (not . isOk . instanceGroup nl) . Container.elems
1490

    
1491
-- | Splits a cluster into the component node groups.
1492
splitCluster :: Node.List -> Instance.List ->
1493
                [(Gdx, (Node.List, Instance.List))]
1494
splitCluster nl il =
1495
  let ngroups = Node.computeGroups (Container.elems nl)
1496
  in map (\(guuid, nodes) ->
1497
           let nidxs = map Node.idx nodes
1498
               nodes' = zip nidxs nodes
1499
               instances = Container.filter ((`elem` nidxs) . Instance.pNode) il
1500
           in (guuid, (Container.fromList nodes', instances))) ngroups
1501

    
1502
-- | Compute the list of nodes that are to be evacuated, given a list
1503
-- of instances and an evacuation mode.
1504
nodesToEvacuate :: Instance.List -- ^ The cluster-wide instance list
1505
                -> EvacMode      -- ^ The evacuation mode we're using
1506
                -> [Idx]         -- ^ List of instance indices being evacuated
1507
                -> IntSet.IntSet -- ^ Set of node indices
1508
nodesToEvacuate il mode =
1509
  IntSet.delete Node.noSecondary .
1510
  foldl' (\ns idx ->
1511
            let i = Container.find idx il
1512
                pdx = Instance.pNode i
1513
                sdx = Instance.sNode i
1514
                dt = Instance.diskTemplate i
1515
                withSecondary = case dt of
1516
                                  DTDrbd8 -> IntSet.insert sdx ns
1517
                                  _ -> ns
1518
            in case mode of
1519
                 ChangePrimary   -> IntSet.insert pdx ns
1520
                 ChangeSecondary -> withSecondary
1521
                 ChangeAll       -> IntSet.insert pdx withSecondary
1522
         ) IntSet.empty