Statistics
| Branch: | Tag: | Revision:

root / Ganeti / HTools / Cluster.hs @ d6c76bd5

History | View | Annotate | Download (44.5 kB)

1
{-| Implementation of cluster-wide logic.
2

    
3
This module holds all pure cluster-logic; I\/O related functionality
4
goes into the "Main" module for the individual binaries.
5

    
6
-}
7

    
8
{-
9

    
10
Copyright (C) 2009, 2010, 2011 Google Inc.
11

    
12
This program is free software; you can redistribute it and/or modify
13
it under the terms of the GNU General Public License as published by
14
the Free Software Foundation; either version 2 of the License, or
15
(at your option) any later version.
16

    
17
This program is distributed in the hope that it will be useful, but
18
WITHOUT ANY WARRANTY; without even the implied warranty of
19
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
20
General Public License for more details.
21

    
22
You should have received a copy of the GNU General Public License
23
along with this program; if not, write to the Free Software
24
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
25
02110-1301, USA.
26

    
27
-}
28

    
29
module Ganeti.HTools.Cluster
30
    (
31
     -- * Types
32
      AllocSolution(..)
33
    , Table(..)
34
    , CStats(..)
35
    , AllocStats
36
    -- * Generic functions
37
    , totalResources
38
    , computeAllocationDelta
39
    -- * First phase functions
40
    , computeBadItems
41
    -- * Second phase functions
42
    , printSolutionLine
43
    , formatCmds
44
    , involvedNodes
45
    , splitJobs
46
    -- * Display functions
47
    , printNodes
48
    , printInsts
49
    -- * Balacing functions
50
    , checkMove
51
    , doNextBalance
52
    , tryBalance
53
    , compCV
54
    , compDetailedCV
55
    , printStats
56
    , iMoveToJob
57
    -- * IAllocator functions
58
    , tryAlloc
59
    , tryMGAlloc
60
    , tryReloc
61
    , tryMGReloc
62
    , tryEvac
63
    , tryMGEvac
64
    , collapseFailures
65
    -- * Allocation functions
66
    , iterateAlloc
67
    , tieredAlloc
68
    , tieredSpecMap
69
     -- * Node group functions
70
    , instanceGroup
71
    , findSplitInstances
72
    , splitCluster
73
    ) where
74

    
75
import Data.Function (on)
76
import Data.List
77
import Data.Ord (comparing)
78
import Text.Printf (printf)
79
import Control.Monad
80
import Control.Parallel.Strategies
81

    
82
import qualified Ganeti.HTools.Container as Container
83
import qualified Ganeti.HTools.Instance as Instance
84
import qualified Ganeti.HTools.Node as Node
85
import qualified Ganeti.HTools.Group as Group
86
import Ganeti.HTools.Types
87
import Ganeti.HTools.Utils
88
import qualified Ganeti.OpCodes as OpCodes
89

    
90
-- * Types
91

    
92
-- | Allocation\/relocation solution.
93
data AllocSolution = AllocSolution
94
  { asFailures  :: [FailMode]          -- ^ Failure counts
95
  , asAllocs    :: Int                 -- ^ Good allocation count
96
  , asSolutions :: [Node.AllocElement] -- ^ The actual result, length
97
                                       -- of the list depends on the
98
                                       -- allocation/relocation mode
99
  , asLog       :: [String]            -- ^ A list of informational messages
100
  }
101

    
102
-- | Allocation results, as used in 'iterateAlloc' and 'tieredAlloc'.
103
type AllocResult = (FailStats, Node.List, Instance.List,
104
                    [Instance.Instance], [CStats])
105

    
106
-- | The empty solution we start with when computing allocations
107
emptySolution :: AllocSolution
108
emptySolution = AllocSolution { asFailures = [], asAllocs = 0
109
                              , asSolutions = [], asLog = [] }
110

    
111
-- | The complete state for the balancing solution
112
data Table = Table Node.List Instance.List Score [Placement]
113
             deriving (Show, Read)
114

    
115
data CStats = CStats { csFmem :: Int    -- ^ Cluster free mem
116
                     , csFdsk :: Int    -- ^ Cluster free disk
117
                     , csAmem :: Int    -- ^ Cluster allocatable mem
118
                     , csAdsk :: Int    -- ^ Cluster allocatable disk
119
                     , csAcpu :: Int    -- ^ Cluster allocatable cpus
120
                     , csMmem :: Int    -- ^ Max node allocatable mem
121
                     , csMdsk :: Int    -- ^ Max node allocatable disk
122
                     , csMcpu :: Int    -- ^ Max node allocatable cpu
123
                     , csImem :: Int    -- ^ Instance used mem
124
                     , csIdsk :: Int    -- ^ Instance used disk
125
                     , csIcpu :: Int    -- ^ Instance used cpu
126
                     , csTmem :: Double -- ^ Cluster total mem
127
                     , csTdsk :: Double -- ^ Cluster total disk
128
                     , csTcpu :: Double -- ^ Cluster total cpus
129
                     , csVcpu :: Int    -- ^ Cluster virtual cpus (if
130
                                        -- node pCpu has been set,
131
                                        -- otherwise -1)
132
                     , csXmem :: Int    -- ^ Unnacounted for mem
133
                     , csNmem :: Int    -- ^ Node own memory
134
                     , csScore :: Score -- ^ The cluster score
135
                     , csNinst :: Int   -- ^ The total number of instances
136
                     }
137
            deriving (Show, Read)
138

    
139
-- | Currently used, possibly to allocate, unallocable
140
type AllocStats = (RSpec, RSpec, RSpec)
141

    
142
-- * Utility functions
143

    
144
-- | Verifies the N+1 status and return the affected nodes.
145
verifyN1 :: [Node.Node] -> [Node.Node]
146
verifyN1 = filter Node.failN1
147

    
148
{-| Computes the pair of bad nodes and instances.
149

    
150
The bad node list is computed via a simple 'verifyN1' check, and the
151
bad instance list is the list of primary and secondary instances of
152
those nodes.
153

    
154
-}
155
computeBadItems :: Node.List -> Instance.List ->
156
                   ([Node.Node], [Instance.Instance])
157
computeBadItems nl il =
158
  let bad_nodes = verifyN1 $ getOnline nl
159
      bad_instances = map (`Container.find` il) .
160
                      sort . nub $
161
                      concatMap (\ n -> Node.sList n ++ Node.pList n) bad_nodes
162
  in
163
    (bad_nodes, bad_instances)
164

    
165
-- | Zero-initializer for the CStats type
166
emptyCStats :: CStats
167
emptyCStats = CStats 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
168

    
169
-- | Update stats with data from a new node
170
updateCStats :: CStats -> Node.Node -> CStats
171
updateCStats cs node =
172
    let CStats { csFmem = x_fmem, csFdsk = x_fdsk,
173
                 csAmem = x_amem, csAcpu = x_acpu, csAdsk = x_adsk,
174
                 csMmem = x_mmem, csMdsk = x_mdsk, csMcpu = x_mcpu,
175
                 csImem = x_imem, csIdsk = x_idsk, csIcpu = x_icpu,
176
                 csTmem = x_tmem, csTdsk = x_tdsk, csTcpu = x_tcpu,
177
                 csVcpu = x_vcpu,
178
                 csXmem = x_xmem, csNmem = x_nmem, csNinst = x_ninst
179
               }
180
            = cs
181
        inc_amem = Node.fMem node - Node.rMem node
182
        inc_amem' = if inc_amem > 0 then inc_amem else 0
183
        inc_adsk = Node.availDisk node
184
        inc_imem = truncate (Node.tMem node) - Node.nMem node
185
                   - Node.xMem node - Node.fMem node
186
        inc_icpu = Node.uCpu node
187
        inc_idsk = truncate (Node.tDsk node) - Node.fDsk node
188
        inc_vcpu = Node.hiCpu node
189
        inc_acpu = Node.availCpu node
190

    
191
    in cs { csFmem = x_fmem + Node.fMem node
192
          , csFdsk = x_fdsk + Node.fDsk node
193
          , csAmem = x_amem + inc_amem'
194
          , csAdsk = x_adsk + inc_adsk
195
          , csAcpu = x_acpu + inc_acpu
196
          , csMmem = max x_mmem inc_amem'
197
          , csMdsk = max x_mdsk inc_adsk
198
          , csMcpu = max x_mcpu inc_acpu
199
          , csImem = x_imem + inc_imem
200
          , csIdsk = x_idsk + inc_idsk
201
          , csIcpu = x_icpu + inc_icpu
202
          , csTmem = x_tmem + Node.tMem node
203
          , csTdsk = x_tdsk + Node.tDsk node
204
          , csTcpu = x_tcpu + Node.tCpu node
205
          , csVcpu = x_vcpu + inc_vcpu
206
          , csXmem = x_xmem + Node.xMem node
207
          , csNmem = x_nmem + Node.nMem node
208
          , csNinst = x_ninst + length (Node.pList node)
209
          }
210

    
211
-- | Compute the total free disk and memory in the cluster.
212
totalResources :: Node.List -> CStats
213
totalResources nl =
214
    let cs = foldl' updateCStats emptyCStats . Container.elems $ nl
215
    in cs { csScore = compCV nl }
216

    
217
-- | Compute the delta between two cluster state.
218
--
219
-- This is used when doing allocations, to understand better the
220
-- available cluster resources. The return value is a triple of the
221
-- current used values, the delta that was still allocated, and what
222
-- was left unallocated.
223
computeAllocationDelta :: CStats -> CStats -> AllocStats
224
computeAllocationDelta cini cfin =
225
    let CStats {csImem = i_imem, csIdsk = i_idsk, csIcpu = i_icpu} = cini
226
        CStats {csImem = f_imem, csIdsk = f_idsk, csIcpu = f_icpu,
227
                csTmem = t_mem, csTdsk = t_dsk, csVcpu = v_cpu } = cfin
228
        rini = RSpec i_icpu i_imem i_idsk
229
        rfin = RSpec (f_icpu - i_icpu) (f_imem - i_imem) (f_idsk - i_idsk)
230
        un_cpu = v_cpu - f_icpu
231
        runa = RSpec un_cpu (truncate t_mem - f_imem) (truncate t_dsk - f_idsk)
232
    in (rini, rfin, runa)
233

    
234
-- | The names and weights of the individual elements in the CV list
235
detailedCVInfo :: [(Double, String)]
236
detailedCVInfo = [ (1,  "free_mem_cv")
237
                 , (1,  "free_disk_cv")
238
                 , (1,  "n1_cnt")
239
                 , (1,  "reserved_mem_cv")
240
                 , (4,  "offline_all_cnt")
241
                 , (16, "offline_pri_cnt")
242
                 , (1,  "vcpu_ratio_cv")
243
                 , (1,  "cpu_load_cv")
244
                 , (1,  "mem_load_cv")
245
                 , (1,  "disk_load_cv")
246
                 , (1,  "net_load_cv")
247
                 , (2,  "pri_tags_score")
248
                 ]
249

    
250
detailedCVWeights :: [Double]
251
detailedCVWeights = map fst detailedCVInfo
252

    
253
-- | Compute the mem and disk covariance.
254
compDetailedCV :: Node.List -> [Double]
255
compDetailedCV nl =
256
    let
257
        all_nodes = Container.elems nl
258
        (offline, nodes) = partition Node.offline all_nodes
259
        mem_l = map Node.pMem nodes
260
        dsk_l = map Node.pDsk nodes
261
        -- metric: memory covariance
262
        mem_cv = stdDev mem_l
263
        -- metric: disk covariance
264
        dsk_cv = stdDev dsk_l
265
        -- metric: count of instances living on N1 failing nodes
266
        n1_score = fromIntegral . sum . map (\n -> length (Node.sList n) +
267
                                                   length (Node.pList n)) .
268
                   filter Node.failN1 $ nodes :: Double
269
        res_l = map Node.pRem nodes
270
        -- metric: reserved memory covariance
271
        res_cv = stdDev res_l
272
        -- offline instances metrics
273
        offline_ipri = sum . map (length . Node.pList) $ offline
274
        offline_isec = sum . map (length . Node.sList) $ offline
275
        -- metric: count of instances on offline nodes
276
        off_score = fromIntegral (offline_ipri + offline_isec)::Double
277
        -- metric: count of primary instances on offline nodes (this
278
        -- helps with evacuation/failover of primary instances on
279
        -- 2-node clusters with one node offline)
280
        off_pri_score = fromIntegral offline_ipri::Double
281
        cpu_l = map Node.pCpu nodes
282
        -- metric: covariance of vcpu/pcpu ratio
283
        cpu_cv = stdDev cpu_l
284
        -- metrics: covariance of cpu, memory, disk and network load
285
        (c_load, m_load, d_load, n_load) = unzip4 $
286
            map (\n ->
287
                     let DynUtil c1 m1 d1 n1 = Node.utilLoad n
288
                         DynUtil c2 m2 d2 n2 = Node.utilPool n
289
                     in (c1/c2, m1/m2, d1/d2, n1/n2)
290
                ) nodes
291
        -- metric: conflicting instance count
292
        pri_tags_inst = sum $ map Node.conflictingPrimaries nodes
293
        pri_tags_score = fromIntegral pri_tags_inst::Double
294
    in [ mem_cv, dsk_cv, n1_score, res_cv, off_score, off_pri_score, cpu_cv
295
       , stdDev c_load, stdDev m_load , stdDev d_load, stdDev n_load
296
       , pri_tags_score ]
297

    
298
-- | Compute the /total/ variance.
299
compCV :: Node.List -> Double
300
compCV = sum . zipWith (*) detailedCVWeights . compDetailedCV
301

    
302
-- | Compute online nodes from a Node.List
303
getOnline :: Node.List -> [Node.Node]
304
getOnline = filter (not . Node.offline) . Container.elems
305

    
306
-- * hbal functions
307

    
308
-- | Compute best table. Note that the ordering of the arguments is important.
309
compareTables :: Table -> Table -> Table
310
compareTables a@(Table _ _ a_cv _) b@(Table _ _ b_cv _ ) =
311
    if a_cv > b_cv then b else a
312

    
313
-- | Applies an instance move to a given node list and instance.
314
applyMove :: Node.List -> Instance.Instance
315
          -> IMove -> OpResult (Node.List, Instance.Instance, Ndx, Ndx)
316
-- Failover (f)
317
applyMove nl inst Failover =
318
    let old_pdx = Instance.pNode inst
319
        old_sdx = Instance.sNode inst
320
        old_p = Container.find old_pdx nl
321
        old_s = Container.find old_sdx nl
322
        int_p = Node.removePri old_p inst
323
        int_s = Node.removeSec old_s inst
324
        force_p = Node.offline old_p
325
        new_nl = do -- Maybe monad
326
          new_p <- Node.addPriEx force_p int_s inst
327
          new_s <- Node.addSec int_p inst old_sdx
328
          let new_inst = Instance.setBoth inst old_sdx old_pdx
329
          return (Container.addTwo old_pdx new_s old_sdx new_p nl,
330
                  new_inst, old_sdx, old_pdx)
331
    in new_nl
332

    
333
-- Replace the primary (f:, r:np, f)
334
applyMove nl inst (ReplacePrimary new_pdx) =
335
    let old_pdx = Instance.pNode inst
336
        old_sdx = Instance.sNode inst
337
        old_p = Container.find old_pdx nl
338
        old_s = Container.find old_sdx nl
339
        tgt_n = Container.find new_pdx nl
340
        int_p = Node.removePri old_p inst
341
        int_s = Node.removeSec old_s inst
342
        force_p = Node.offline old_p
343
        new_nl = do -- Maybe monad
344
          -- check that the current secondary can host the instance
345
          -- during the migration
346
          tmp_s <- Node.addPriEx force_p int_s inst
347
          let tmp_s' = Node.removePri tmp_s inst
348
          new_p <- Node.addPriEx force_p tgt_n inst
349
          new_s <- Node.addSecEx force_p tmp_s' inst new_pdx
350
          let new_inst = Instance.setPri inst new_pdx
351
          return (Container.add new_pdx new_p $
352
                  Container.addTwo old_pdx int_p old_sdx new_s nl,
353
                  new_inst, new_pdx, old_sdx)
354
    in new_nl
355

    
356
-- Replace the secondary (r:ns)
357
applyMove nl inst (ReplaceSecondary new_sdx) =
358
    let old_pdx = Instance.pNode inst
359
        old_sdx = Instance.sNode inst
360
        old_s = Container.find old_sdx nl
361
        tgt_n = Container.find new_sdx nl
362
        int_s = Node.removeSec old_s inst
363
        force_s = Node.offline old_s
364
        new_inst = Instance.setSec inst new_sdx
365
        new_nl = Node.addSecEx force_s tgt_n inst old_pdx >>=
366
                 \new_s -> return (Container.addTwo new_sdx
367
                                   new_s old_sdx int_s nl,
368
                                   new_inst, old_pdx, new_sdx)
369
    in new_nl
370

    
371
-- Replace the secondary and failover (r:np, f)
372
applyMove nl inst (ReplaceAndFailover new_pdx) =
373
    let old_pdx = Instance.pNode inst
374
        old_sdx = Instance.sNode inst
375
        old_p = Container.find old_pdx nl
376
        old_s = Container.find old_sdx nl
377
        tgt_n = Container.find new_pdx nl
378
        int_p = Node.removePri old_p inst
379
        int_s = Node.removeSec old_s inst
380
        force_s = Node.offline old_s
381
        new_nl = do -- Maybe monad
382
          new_p <- Node.addPri tgt_n inst
383
          new_s <- Node.addSecEx force_s int_p inst new_pdx
384
          let new_inst = Instance.setBoth inst new_pdx old_pdx
385
          return (Container.add new_pdx new_p $
386
                  Container.addTwo old_pdx new_s old_sdx int_s nl,
387
                  new_inst, new_pdx, old_pdx)
388
    in new_nl
389

    
390
-- Failver and replace the secondary (f, r:ns)
391
applyMove nl inst (FailoverAndReplace new_sdx) =
392
    let old_pdx = Instance.pNode inst
393
        old_sdx = Instance.sNode inst
394
        old_p = Container.find old_pdx nl
395
        old_s = Container.find old_sdx nl
396
        tgt_n = Container.find new_sdx nl
397
        int_p = Node.removePri old_p inst
398
        int_s = Node.removeSec old_s inst
399
        force_p = Node.offline old_p
400
        new_nl = do -- Maybe monad
401
          new_p <- Node.addPriEx force_p int_s inst
402
          new_s <- Node.addSecEx force_p tgt_n inst old_sdx
403
          let new_inst = Instance.setBoth inst old_sdx new_sdx
404
          return (Container.add new_sdx new_s $
405
                  Container.addTwo old_sdx new_p old_pdx int_p nl,
406
                  new_inst, old_sdx, new_sdx)
407
    in new_nl
408

    
409
-- | Tries to allocate an instance on one given node.
410
allocateOnSingle :: Node.List -> Instance.Instance -> Node.Node
411
                 -> OpResult Node.AllocElement
412
allocateOnSingle nl inst p =
413
    let new_pdx = Node.idx p
414
        new_inst = Instance.setBoth inst new_pdx Node.noSecondary
415
    in  Node.addPri p inst >>= \new_p -> do
416
      let new_nl = Container.add new_pdx new_p nl
417
          new_score = compCV nl
418
      return (new_nl, new_inst, [new_p], new_score)
419

    
420
-- | Tries to allocate an instance on a given pair of nodes.
421
allocateOnPair :: Node.List -> Instance.Instance -> Node.Node -> Node.Node
422
               -> OpResult Node.AllocElement
423
allocateOnPair nl inst tgt_p tgt_s =
424
    let new_pdx = Node.idx tgt_p
425
        new_sdx = Node.idx tgt_s
426
    in do
427
      new_p <- Node.addPri tgt_p inst
428
      new_s <- Node.addSec tgt_s inst new_pdx
429
      let new_inst = Instance.setBoth inst new_pdx new_sdx
430
          new_nl = Container.addTwo new_pdx new_p new_sdx new_s nl
431
      return (new_nl, new_inst, [new_p, new_s], compCV new_nl)
432

    
433
-- | Tries to perform an instance move and returns the best table
434
-- between the original one and the new one.
435
checkSingleStep :: Table -- ^ The original table
436
                -> Instance.Instance -- ^ The instance to move
437
                -> Table -- ^ The current best table
438
                -> IMove -- ^ The move to apply
439
                -> Table -- ^ The final best table
440
checkSingleStep ini_tbl target cur_tbl move =
441
    let
442
        Table ini_nl ini_il _ ini_plc = ini_tbl
443
        tmp_resu = applyMove ini_nl target move
444
    in
445
      case tmp_resu of
446
        OpFail _ -> cur_tbl
447
        OpGood (upd_nl, new_inst, pri_idx, sec_idx) ->
448
            let tgt_idx = Instance.idx target
449
                upd_cvar = compCV upd_nl
450
                upd_il = Container.add tgt_idx new_inst ini_il
451
                upd_plc = (tgt_idx, pri_idx, sec_idx, move, upd_cvar):ini_plc
452
                upd_tbl = Table upd_nl upd_il upd_cvar upd_plc
453
            in
454
              compareTables cur_tbl upd_tbl
455

    
456
-- | Given the status of the current secondary as a valid new node and
457
-- the current candidate target node, generate the possible moves for
458
-- a instance.
459
possibleMoves :: Bool      -- ^ Whether the secondary node is a valid new node
460
              -> Ndx       -- ^ Target node candidate
461
              -> [IMove]   -- ^ List of valid result moves
462
possibleMoves True tdx =
463
    [ReplaceSecondary tdx,
464
     ReplaceAndFailover tdx,
465
     ReplacePrimary tdx,
466
     FailoverAndReplace tdx]
467

    
468
possibleMoves False tdx =
469
    [ReplaceSecondary tdx,
470
     ReplaceAndFailover tdx]
471

    
472
-- | Compute the best move for a given instance.
473
checkInstanceMove :: [Ndx]             -- ^ Allowed target node indices
474
                  -> Bool              -- ^ Whether disk moves are allowed
475
                  -> Table             -- ^ Original table
476
                  -> Instance.Instance -- ^ Instance to move
477
                  -> Table             -- ^ Best new table for this instance
478
checkInstanceMove nodes_idx disk_moves ini_tbl target =
479
    let
480
        opdx = Instance.pNode target
481
        osdx = Instance.sNode target
482
        nodes = filter (\idx -> idx /= opdx && idx /= osdx) nodes_idx
483
        use_secondary = elem osdx nodes_idx
484
        aft_failover = if use_secondary -- if allowed to failover
485
                       then checkSingleStep ini_tbl target ini_tbl Failover
486
                       else ini_tbl
487
        all_moves = if disk_moves
488
                    then concatMap (possibleMoves use_secondary) nodes
489
                    else []
490
    in
491
      -- iterate over the possible nodes for this instance
492
      foldl' (checkSingleStep ini_tbl target) aft_failover all_moves
493

    
494
-- | Compute the best next move.
495
checkMove :: [Ndx]               -- ^ Allowed target node indices
496
          -> Bool                -- ^ Whether disk moves are allowed
497
          -> Table               -- ^ The current solution
498
          -> [Instance.Instance] -- ^ List of instances still to move
499
          -> Table               -- ^ The new solution
500
checkMove nodes_idx disk_moves ini_tbl victims =
501
    let Table _ _ _ ini_plc = ini_tbl
502
        -- we're using rwhnf from the Control.Parallel.Strategies
503
        -- package; we don't need to use rnf as that would force too
504
        -- much evaluation in single-threaded cases, and in
505
        -- multi-threaded case the weak head normal form is enough to
506
        -- spark the evaluation
507
        tables = parMap rwhnf (checkInstanceMove nodes_idx disk_moves ini_tbl)
508
                 victims
509
        -- iterate over all instances, computing the best move
510
        best_tbl =
511
            foldl'
512
            (\ step_tbl new_tbl -> compareTables step_tbl new_tbl)
513
            ini_tbl tables
514
        Table _ _ _ best_plc = best_tbl
515
    in if length best_plc == length ini_plc
516
       then ini_tbl -- no advancement
517
       else best_tbl
518

    
519
-- | Check if we are allowed to go deeper in the balancing
520
doNextBalance :: Table     -- ^ The starting table
521
              -> Int       -- ^ Remaining length
522
              -> Score     -- ^ Score at which to stop
523
              -> Bool      -- ^ The resulting table and commands
524
doNextBalance ini_tbl max_rounds min_score =
525
    let Table _ _ ini_cv ini_plc = ini_tbl
526
        ini_plc_len = length ini_plc
527
    in (max_rounds < 0 || ini_plc_len < max_rounds) && ini_cv > min_score
528

    
529
-- | Run a balance move
530
tryBalance :: Table       -- ^ The starting table
531
           -> Bool        -- ^ Allow disk moves
532
           -> Bool        -- ^ Only evacuate moves
533
           -> Score       -- ^ Min gain threshold
534
           -> Score       -- ^ Min gain
535
           -> Maybe Table -- ^ The resulting table and commands
536
tryBalance ini_tbl disk_moves evac_mode mg_limit min_gain =
537
    let Table ini_nl ini_il ini_cv _ = ini_tbl
538
        all_inst = Container.elems ini_il
539
        all_inst' = if evac_mode
540
                    then let bad_nodes = map Node.idx . filter Node.offline $
541
                                         Container.elems ini_nl
542
                         in filter (\e -> Instance.sNode e `elem` bad_nodes ||
543
                                          Instance.pNode e `elem` bad_nodes)
544
                            all_inst
545
                    else all_inst
546
        reloc_inst = filter Instance.movable all_inst'
547
        node_idx = map Node.idx . filter (not . Node.offline) $
548
                   Container.elems ini_nl
549
        fin_tbl = checkMove node_idx disk_moves ini_tbl reloc_inst
550
        (Table _ _ fin_cv _) = fin_tbl
551
    in
552
      if fin_cv < ini_cv && (ini_cv > mg_limit || ini_cv - fin_cv >= min_gain)
553
      then Just fin_tbl -- this round made success, return the new table
554
      else Nothing
555

    
556
-- * Allocation functions
557

    
558
-- | Build failure stats out of a list of failures
559
collapseFailures :: [FailMode] -> FailStats
560
collapseFailures flst =
561
    map (\k -> (k, length $ filter (k ==) flst)) [minBound..maxBound]
562

    
563
-- | Update current Allocation solution and failure stats with new
564
-- elements
565
concatAllocs :: AllocSolution -> OpResult Node.AllocElement -> AllocSolution
566
concatAllocs as (OpFail reason) = as { asFailures = reason : asFailures as }
567

    
568
concatAllocs as (OpGood ns@(_, _, _, nscore)) =
569
    let -- Choose the old or new solution, based on the cluster score
570
        cntok = asAllocs as
571
        osols = asSolutions as
572
        nsols = case osols of
573
                  [] -> [ns]
574
                  (_, _, _, oscore):[] ->
575
                      if oscore < nscore
576
                      then osols
577
                      else [ns]
578
                  -- FIXME: here we simply concat to lists with more
579
                  -- than one element; we should instead abort, since
580
                  -- this is not a valid usage of this function
581
                  xs -> ns:xs
582
        nsuc = cntok + 1
583
    -- Note: we force evaluation of nsols here in order to keep the
584
    -- memory profile low - we know that we will need nsols for sure
585
    -- in the next cycle, so we force evaluation of nsols, since the
586
    -- foldl' in the caller will only evaluate the tuple, but not the
587
    -- elements of the tuple
588
    in nsols `seq` nsuc `seq` as { asAllocs = nsuc, asSolutions = nsols }
589

    
590
-- | Sums two allocation solutions (e.g. for two separate node groups).
591
sumAllocs :: AllocSolution -> AllocSolution -> AllocSolution
592
sumAllocs (AllocSolution af aa as al) (AllocSolution bf ba bs bl) =
593
    AllocSolution (af ++ bf) (aa + ba) (as ++ bs) (al ++ bl)
594

    
595
-- | Given a solution, generates a reasonable description for it
596
describeSolution :: AllocSolution -> String
597
describeSolution as =
598
  let fcnt = asFailures as
599
      sols = asSolutions as
600
      freasons =
601
        intercalate ", " . map (\(a, b) -> printf "%s: %d" (show a) b) .
602
        filter ((> 0) . snd) . collapseFailures $ fcnt
603
  in if null sols
604
     then "No valid allocation solutions, failure reasons: " ++
605
          (if null fcnt
606
           then "unknown reasons"
607
           else freasons)
608
     else let (_, _, nodes, cv) = head sols
609
          in printf ("score: %.8f, successes %d, failures %d (%s)" ++
610
                     " for node(s) %s") cv (asAllocs as) (length fcnt) freasons
611
             (intercalate "/" . map Node.name $ nodes)
612

    
613
-- | Annotates a solution with the appropriate string
614
annotateSolution :: AllocSolution -> AllocSolution
615
annotateSolution as = as { asLog = describeSolution as : asLog as }
616

    
617
-- | Try to allocate an instance on the cluster.
618
tryAlloc :: (Monad m) =>
619
            Node.List         -- ^ The node list
620
         -> Instance.List     -- ^ The instance list
621
         -> Instance.Instance -- ^ The instance to allocate
622
         -> Int               -- ^ Required number of nodes
623
         -> m AllocSolution   -- ^ Possible solution list
624
tryAlloc nl _ inst 2 =
625
    let all_nodes = getOnline nl
626
        all_pairs = liftM2 (,) all_nodes all_nodes
627
        ok_pairs = filter (\(x, y) -> Node.idx x /= Node.idx y &&
628
                                      Node.group x == Node.group y) all_pairs
629
        sols = foldl' (\cstate (p, s) ->
630
                           concatAllocs cstate $ allocateOnPair nl inst p s
631
                      ) emptySolution ok_pairs
632

    
633
    in if null ok_pairs -- means we have just one node
634
       then fail "Not enough online nodes"
635
       else return $ annotateSolution sols
636

    
637
tryAlloc nl _ inst 1 =
638
    let all_nodes = getOnline nl
639
        sols = foldl' (\cstate ->
640
                           concatAllocs cstate . allocateOnSingle nl inst
641
                      ) emptySolution all_nodes
642
    in if null all_nodes
643
       then fail "No online nodes"
644
       else return $ annotateSolution sols
645

    
646
tryAlloc _ _ _ reqn = fail $ "Unsupported number of allocation \
647
                             \destinations required (" ++ show reqn ++
648
                                               "), only two supported"
649

    
650
-- | Given a group/result, describe it as a nice (list of) messages
651
solutionDescription :: Group.List -> (Gdx, Result AllocSolution) -> [String]
652
solutionDescription gl (groupId, result) =
653
  case result of
654
    Ok solution -> map (printf "Group %s (%s): %s" gname pol) (asLog solution)
655
    Bad message -> [printf "Group %s: error %s" gname message]
656
  where grp = Container.find groupId gl
657
        gname = Group.name grp
658
        pol = apolToString (Group.allocPolicy grp)
659

    
660
-- | From a list of possibly bad and possibly empty solutions, filter
661
-- only the groups with a valid result
662
filterMGResults :: Group.List
663
                -> [(Gdx, Result AllocSolution)]
664
                -> [(Gdx, AllocSolution)]
665
filterMGResults gl=
666
  filter ((/= AllocUnallocable) . Group.allocPolicy .
667
             flip Container.find gl . fst) .
668
  filter (not . null . asSolutions . snd) .
669
  map (\(y, Ok x) -> (y, x)) .
670
  filter (isOk . snd)
671

    
672
-- | Sort multigroup results based on policy and score
673
sortMGResults :: Group.List
674
             -> [(Gdx, AllocSolution)]
675
             -> [(Gdx, AllocSolution)]
676
sortMGResults gl sols =
677
    let extractScore = \(_, _, _, x) -> x
678
        solScore (gdx, sol) = (Group.allocPolicy (Container.find gdx gl),
679
                               (extractScore . head . asSolutions) sol)
680
    in sortBy (comparing solScore) sols
681

    
682
-- | Try to allocate an instance on a multi-group cluster.
683
tryMGAlloc :: Group.List           -- ^ The group list
684
           -> Node.List            -- ^ The node list
685
           -> Instance.List        -- ^ The instance list
686
           -> Instance.Instance    -- ^ The instance to allocate
687
           -> Int                  -- ^ Required number of nodes
688
           -> Result AllocSolution -- ^ Possible solution list
689
tryMGAlloc mggl mgnl mgil inst cnt =
690
  let groups = splitCluster mgnl mgil
691
      -- TODO: currently we consider all groups preferred
692
      sols = map (\(gid, (nl, il)) ->
693
                   (gid, tryAlloc nl il inst cnt)) groups::
694
        [(Gdx, Result AllocSolution)]
695
      all_msgs = concatMap (solutionDescription mggl) sols
696
      goodSols = filterMGResults mggl sols
697
      sortedSols = sortMGResults mggl goodSols
698
  in if null sortedSols
699
     then Bad $ intercalate ", " all_msgs
700
     else let (final_group, final_sol) = head sortedSols
701
              final_name = Group.name $ Container.find final_group mggl
702
              selmsg = "Selected group: " ++  final_name
703
          in Ok $ final_sol { asLog = selmsg:all_msgs }
704

    
705
-- | Try to relocate an instance on the cluster.
706
tryReloc :: (Monad m) =>
707
            Node.List       -- ^ The node list
708
         -> Instance.List   -- ^ The instance list
709
         -> Idx             -- ^ The index of the instance to move
710
         -> Int             -- ^ The number of nodes required
711
         -> [Ndx]           -- ^ Nodes which should not be used
712
         -> m AllocSolution -- ^ Solution list
713
tryReloc nl il xid 1 ex_idx =
714
    let all_nodes = getOnline nl
715
        inst = Container.find xid il
716
        ex_idx' = Instance.pNode inst:ex_idx
717
        valid_nodes = filter (not . flip elem ex_idx' . Node.idx) all_nodes
718
        valid_idxes = map Node.idx valid_nodes
719
        sols1 = foldl' (\cstate x ->
720
                            let em = do
721
                                  (mnl, i, _, _) <-
722
                                      applyMove nl inst (ReplaceSecondary x)
723
                                  return (mnl, i, [Container.find x mnl],
724
                                          compCV mnl)
725
                            in concatAllocs cstate em
726
                       ) emptySolution valid_idxes
727
    in return sols1
728

    
729
tryReloc _ _ _ reqn _  = fail $ "Unsupported number of relocation \
730
                                \destinations required (" ++ show reqn ++
731
                                                  "), only one supported"
732

    
733
tryMGReloc :: (Monad m) =>
734
              Group.List      -- ^ The group list
735
           -> Node.List       -- ^ The node list
736
           -> Instance.List   -- ^ The instance list
737
           -> Idx             -- ^ The index of the instance to move
738
           -> Int             -- ^ The number of nodes required
739
           -> [Ndx]           -- ^ Nodes which should not be used
740
           -> m AllocSolution -- ^ Solution list
741
tryMGReloc _ mgnl mgil xid ncount ex_ndx = do
742
  let groups = splitCluster mgnl mgil
743
      -- TODO: we only relocate inside the group for now
744
      inst = Container.find xid mgil
745
  (nl, il) <- case lookup (instancePriGroup mgnl inst) groups of
746
                Nothing -> fail $ "Cannot find group for instance " ++
747
                           Instance.name inst
748
                Just v -> return v
749
  tryReloc nl il xid ncount ex_ndx
750

    
751
-- | Change an instance's secondary node
752
evacInstance :: (Monad m) =>
753
                [Ndx]                      -- ^ Excluded nodes
754
             -> Instance.List              -- ^ The current instance list
755
             -> (Node.List, AllocSolution) -- ^ The current state
756
             -> Idx                        -- ^ The instance to evacuate
757
             -> m (Node.List, AllocSolution)
758
evacInstance ex_ndx il (nl, old_as) idx = do
759
  -- FIXME: hardcoded one node here
760

    
761
  -- Longer explanation: evacuation is currently hardcoded to DRBD
762
  -- instances (which have one secondary); hence, even if the
763
  -- IAllocator protocol can request N nodes for an instance, and all
764
  -- the message parsing/loading pass this, this implementation only
765
  -- supports one; this situation needs to be revisited if we ever
766
  -- support more than one secondary, or if we change the storage
767
  -- model
768
  new_as <- tryReloc nl il idx 1 ex_ndx
769
  case asSolutions new_as of
770
    -- an individual relocation succeeded, we kind of compose the data
771
    -- from the two solutions
772
    csol@(nl', _, _, _):_ ->
773
        return (nl', new_as { asSolutions = csol:asSolutions old_as })
774
    -- this relocation failed, so we fail the entire evac
775
    _ -> fail $ "Can't evacuate instance " ++
776
         Instance.name (Container.find idx il) ++
777
             ": " ++ describeSolution new_as
778

    
779
-- | Try to evacuate a list of nodes.
780
tryEvac :: (Monad m) =>
781
            Node.List       -- ^ The node list
782
         -> Instance.List   -- ^ The instance list
783
         -> [Idx]           -- ^ Instances to be evacuated
784
         -> [Ndx]           -- ^ Restricted nodes (the ones being evacuated)
785
         -> m AllocSolution -- ^ Solution list
786
tryEvac nl il idxs ex_ndx = do
787
  (_, sol) <- foldM (evacInstance ex_ndx il) (nl, emptySolution) idxs
788
  return sol
789

    
790
-- | Multi-group evacuation of a list of nodes.
791
tryMGEvac :: (Monad m) =>
792
             Group.List -- ^ The group list
793
          -> Node.List       -- ^ The node list
794
          -> Instance.List   -- ^ The instance list
795
          -> [Ndx]           -- ^ Nodes to be evacuated
796
          -> m AllocSolution -- ^ Solution list
797
tryMGEvac _ nl il ex_ndx =
798
    let ex_nodes = map (`Container.find` nl) ex_ndx
799
        all_insts = nub . concatMap Node.sList $ ex_nodes
800
        gni = splitCluster nl il
801
        -- we run the instance index list through a couple of maps to
802
        -- get finally to a structure of the type [(group index,
803
        -- [instance indices])]
804
        all_insts' = map (\idx ->
805
                              (instancePriGroup nl (Container.find idx il),
806
                               idx)) all_insts
807
        all_insts'' = groupBy ((==) `on` fst) all_insts'
808
        all_insts3 = map (\xs -> let (gdxs, idxs) = unzip xs
809
                                 in (head gdxs, idxs)) all_insts''
810
    in do
811
      -- that done, we now add the per-group nl/il to the tuple
812
      all_insts4 <-
813
          mapM (\(gdx, idxs) -> do
814
                  case lookup gdx gni of
815
                    Nothing -> fail $ "Can't find group index " ++ show gdx
816
                    Just (gnl, gil) -> return (gdx, gnl, gil, idxs))
817
          all_insts3
818
      results <- mapM (\(_, gnl, gil, idxs) -> tryEvac gnl gil idxs ex_ndx)
819
                 all_insts4
820
      let sol = foldl' (\orig_sol group_sol ->
821
                        sumAllocs orig_sol group_sol) emptySolution results
822
      return $ annotateSolution sol
823

    
824
-- | Recursively place instances on the cluster until we're out of space
825
iterateAlloc :: Node.List
826
             -> Instance.List
827
             -> Instance.Instance
828
             -> Int
829
             -> [Instance.Instance]
830
             -> [CStats]
831
             -> Result AllocResult
832
iterateAlloc nl il newinst nreq ixes cstats =
833
      let depth = length ixes
834
          newname = printf "new-%d" depth::String
835
          newidx = length (Container.elems il) + depth
836
          newi2 = Instance.setIdx (Instance.setName newinst newname) newidx
837
      in case tryAlloc nl il newi2 nreq of
838
           Bad s -> Bad s
839
           Ok (AllocSolution { asFailures = errs, asSolutions = sols3 }) ->
840
               case sols3 of
841
                 [] -> Ok (collapseFailures errs, nl, il, ixes, cstats)
842
                 (xnl, xi, _, _):[] ->
843
                     iterateAlloc xnl (Container.add newidx xi il)
844
                                  newinst nreq (xi:ixes)
845
                                  (totalResources xnl:cstats)
846
                 _ -> Bad "Internal error: multiple solutions for single\
847
                          \ allocation"
848

    
849
-- | The core of the tiered allocation mode
850
tieredAlloc :: Node.List
851
            -> Instance.List
852
            -> Instance.Instance
853
            -> Int
854
            -> [Instance.Instance]
855
            -> [CStats]
856
            -> Result AllocResult
857
tieredAlloc nl il newinst nreq ixes cstats =
858
    case iterateAlloc nl il newinst nreq ixes cstats of
859
      Bad s -> Bad s
860
      Ok (errs, nl', il', ixes', cstats') ->
861
          case Instance.shrinkByType newinst . fst . last $
862
               sortBy (comparing snd) errs of
863
            Bad _ -> Ok (errs, nl', il', ixes', cstats')
864
            Ok newinst' ->
865
                tieredAlloc nl' il' newinst' nreq ixes' cstats'
866

    
867
-- | Compute the tiered spec string description from a list of
868
-- allocated instances.
869
tieredSpecMap :: [Instance.Instance]
870
              -> [String]
871
tieredSpecMap trl_ixes =
872
    let fin_trl_ixes = reverse trl_ixes
873
        ix_byspec = groupBy ((==) `on` Instance.specOf) fin_trl_ixes
874
        spec_map = map (\ixs -> (Instance.specOf $ head ixs, length ixs))
875
                   ix_byspec
876
    in  map (\(spec, cnt) -> printf "%d,%d,%d=%d" (rspecMem spec)
877
                             (rspecDsk spec) (rspecCpu spec) cnt) spec_map
878

    
879
-- * Formatting functions
880

    
881
-- | Given the original and final nodes, computes the relocation description.
882
computeMoves :: Instance.Instance -- ^ The instance to be moved
883
             -> String -- ^ The instance name
884
             -> IMove  -- ^ The move being performed
885
             -> String -- ^ New primary
886
             -> String -- ^ New secondary
887
             -> (String, [String])
888
                -- ^ Tuple of moves and commands list; moves is containing
889
                -- either @/f/@ for failover or @/r:name/@ for replace
890
                -- secondary, while the command list holds gnt-instance
891
                -- commands (without that prefix), e.g \"@failover instance1@\"
892
computeMoves i inam mv c d =
893
    case mv of
894
      Failover -> ("f", [mig])
895
      FailoverAndReplace _ -> (printf "f r:%s" d, [mig, rep d])
896
      ReplaceSecondary _ -> (printf "r:%s" d, [rep d])
897
      ReplaceAndFailover _ -> (printf "r:%s f" c, [rep c, mig])
898
      ReplacePrimary _ -> (printf "f r:%s f" c, [mig, rep c, mig])
899
    where morf = if Instance.running i then "migrate" else "failover"
900
          mig = printf "%s -f %s" morf inam::String
901
          rep n = printf "replace-disks -n %s %s" n inam
902

    
903
-- | Converts a placement to string format.
904
printSolutionLine :: Node.List     -- ^ The node list
905
                  -> Instance.List -- ^ The instance list
906
                  -> Int           -- ^ Maximum node name length
907
                  -> Int           -- ^ Maximum instance name length
908
                  -> Placement     -- ^ The current placement
909
                  -> Int           -- ^ The index of the placement in
910
                                   -- the solution
911
                  -> (String, [String])
912
printSolutionLine nl il nmlen imlen plc pos =
913
    let
914
        pmlen = (2*nmlen + 1)
915
        (i, p, s, mv, c) = plc
916
        inst = Container.find i il
917
        inam = Instance.alias inst
918
        npri = Node.alias $ Container.find p nl
919
        nsec = Node.alias $ Container.find s nl
920
        opri = Node.alias $ Container.find (Instance.pNode inst) nl
921
        osec = Node.alias $ Container.find (Instance.sNode inst) nl
922
        (moves, cmds) =  computeMoves inst inam mv npri nsec
923
        ostr = printf "%s:%s" opri osec::String
924
        nstr = printf "%s:%s" npri nsec::String
925
    in
926
      (printf "  %3d. %-*s %-*s => %-*s %.8f a=%s"
927
       pos imlen inam pmlen ostr
928
       pmlen nstr c moves,
929
       cmds)
930

    
931
-- | Return the instance and involved nodes in an instance move.
932
involvedNodes :: Instance.List -> Placement -> [Ndx]
933
involvedNodes il plc =
934
    let (i, np, ns, _, _) = plc
935
        inst = Container.find i il
936
        op = Instance.pNode inst
937
        os = Instance.sNode inst
938
    in nub [np, ns, op, os]
939

    
940
-- | Inner function for splitJobs, that either appends the next job to
941
-- the current jobset, or starts a new jobset.
942
mergeJobs :: ([JobSet], [Ndx]) -> MoveJob -> ([JobSet], [Ndx])
943
mergeJobs ([], _) n@(ndx, _, _, _) = ([[n]], ndx)
944
mergeJobs (cjs@(j:js), nbuf) n@(ndx, _, _, _)
945
    | null (ndx `intersect` nbuf) = ((n:j):js, ndx ++ nbuf)
946
    | otherwise = ([n]:cjs, ndx)
947

    
948
-- | Break a list of moves into independent groups. Note that this
949
-- will reverse the order of jobs.
950
splitJobs :: [MoveJob] -> [JobSet]
951
splitJobs = fst . foldl mergeJobs ([], [])
952

    
953
-- | Given a list of commands, prefix them with @gnt-instance@ and
954
-- also beautify the display a little.
955
formatJob :: Int -> Int -> (Int, MoveJob) -> [String]
956
formatJob jsn jsl (sn, (_, _, _, cmds)) =
957
    let out =
958
            printf "  echo job %d/%d" jsn sn:
959
            printf "  check":
960
            map ("  gnt-instance " ++) cmds
961
    in if sn == 1
962
       then ["", printf "echo jobset %d, %d jobs" jsn jsl] ++ out
963
       else out
964

    
965
-- | Given a list of commands, prefix them with @gnt-instance@ and
966
-- also beautify the display a little.
967
formatCmds :: [JobSet] -> String
968
formatCmds =
969
    unlines .
970
    concatMap (\(jsn, js) -> concatMap (formatJob jsn (length js))
971
                             (zip [1..] js)) .
972
    zip [1..]
973

    
974
-- | Print the node list.
975
printNodes :: Node.List -> [String] -> String
976
printNodes nl fs =
977
    let fields = case fs of
978
          [] -> Node.defaultFields
979
          "+":rest -> Node.defaultFields ++ rest
980
          _ -> fs
981
        snl = sortBy (comparing Node.idx) (Container.elems nl)
982
        (header, isnum) = unzip $ map Node.showHeader fields
983
    in unlines . map ((:) ' ' .  intercalate " ") $
984
       formatTable (header:map (Node.list fields) snl) isnum
985

    
986
-- | Print the instance list.
987
printInsts :: Node.List -> Instance.List -> String
988
printInsts nl il =
989
    let sil = sortBy (comparing Instance.idx) (Container.elems il)
990
        helper inst = [ if Instance.running inst then "R" else " "
991
                      , Instance.name inst
992
                      , Container.nameOf nl (Instance.pNode inst)
993
                      , let sdx = Instance.sNode inst
994
                        in if sdx == Node.noSecondary
995
                           then  ""
996
                           else Container.nameOf nl sdx
997
                      , printf "%3d" $ Instance.vcpus inst
998
                      , printf "%5d" $ Instance.mem inst
999
                      , printf "%5d" $ Instance.dsk inst `div` 1024
1000
                      , printf "%5.3f" lC
1001
                      , printf "%5.3f" lM
1002
                      , printf "%5.3f" lD
1003
                      , printf "%5.3f" lN
1004
                      ]
1005
            where DynUtil lC lM lD lN = Instance.util inst
1006
        header = [ "F", "Name", "Pri_node", "Sec_node", "vcpu", "mem"
1007
                 , "dsk", "lCpu", "lMem", "lDsk", "lNet" ]
1008
        isnum = False:False:False:False:repeat True
1009
    in unlines . map ((:) ' ' . intercalate " ") $
1010
       formatTable (header:map helper sil) isnum
1011

    
1012
-- | Shows statistics for a given node list.
1013
printStats :: Node.List -> String
1014
printStats nl =
1015
    let dcvs = compDetailedCV nl
1016
        (weights, names) = unzip detailedCVInfo
1017
        hd = zip3 (weights ++ repeat 1) (names ++ repeat "unknown") dcvs
1018
        formatted = map (\(w, header, val) ->
1019
                             printf "%s=%.8f(x%.2f)" header val w::String) hd
1020
    in intercalate ", " formatted
1021

    
1022
-- | Convert a placement into a list of OpCodes (basically a job).
1023
iMoveToJob :: Node.List -> Instance.List
1024
          -> Idx -> IMove -> [OpCodes.OpCode]
1025
iMoveToJob nl il idx move =
1026
    let inst = Container.find idx il
1027
        iname = Instance.name inst
1028
        lookNode  = Just . Container.nameOf nl
1029
        opF = if Instance.running inst
1030
              then OpCodes.OpMigrateInstance iname True False
1031
              else OpCodes.OpFailoverInstance iname False
1032
        opR n = OpCodes.OpReplaceDisks iname (lookNode n)
1033
                OpCodes.ReplaceNewSecondary [] Nothing
1034
    in case move of
1035
         Failover -> [ opF ]
1036
         ReplacePrimary np -> [ opF, opR np, opF ]
1037
         ReplaceSecondary ns -> [ opR ns ]
1038
         ReplaceAndFailover np -> [ opR np, opF ]
1039
         FailoverAndReplace ns -> [ opF, opR ns ]
1040

    
1041
-- * Node group functions
1042

    
1043
-- | Computes the group of an instance
1044
instanceGroup :: Node.List -> Instance.Instance -> Result Gdx
1045
instanceGroup nl i =
1046
  let sidx = Instance.sNode i
1047
      pnode = Container.find (Instance.pNode i) nl
1048
      snode = if sidx == Node.noSecondary
1049
              then pnode
1050
              else Container.find sidx nl
1051
      pgroup = Node.group pnode
1052
      sgroup = Node.group snode
1053
  in if pgroup /= sgroup
1054
     then fail ("Instance placed accross two node groups, primary " ++
1055
                show pgroup ++ ", secondary " ++ show sgroup)
1056
     else return pgroup
1057

    
1058
-- | Computes the group of an instance per the primary node
1059
instancePriGroup :: Node.List -> Instance.Instance -> Gdx
1060
instancePriGroup nl i =
1061
  let pnode = Container.find (Instance.pNode i) nl
1062
  in  Node.group pnode
1063

    
1064
-- | Compute the list of badly allocated instances (split across node
1065
-- groups)
1066
findSplitInstances :: Node.List -> Instance.List -> [Instance.Instance]
1067
findSplitInstances nl il =
1068
  filter (not . isOk . instanceGroup nl) (Container.elems il)
1069

    
1070
-- | Splits a cluster into the component node groups
1071
splitCluster :: Node.List -> Instance.List ->
1072
                [(Gdx, (Node.List, Instance.List))]
1073
splitCluster nl il =
1074
  let ngroups = Node.computeGroups (Container.elems nl)
1075
  in map (\(guuid, nodes) ->
1076
           let nidxs = map Node.idx nodes
1077
               nodes' = zip nidxs nodes
1078
               instances = Container.filter ((`elem` nidxs) . Instance.pNode) il
1079
           in (guuid, (Container.fromList nodes', instances))) ngroups