Statistics
| Branch: | Tag: | Revision:

root / Ganeti / HTools / Cluster.hs @ 0d66ea67

History | View | Annotate | Download (45.2 kB)

1
{-| Implementation of cluster-wide logic.
2

    
3
This module holds all pure cluster-logic; I\/O related functionality
4
goes into the "Main" module for the individual binaries.
5

    
6
-}
7

    
8
{-
9

    
10
Copyright (C) 2009, 2010, 2011 Google Inc.
11

    
12
This program is free software; you can redistribute it and/or modify
13
it under the terms of the GNU General Public License as published by
14
the Free Software Foundation; either version 2 of the License, or
15
(at your option) any later version.
16

    
17
This program is distributed in the hope that it will be useful, but
18
WITHOUT ANY WARRANTY; without even the implied warranty of
19
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
20
General Public License for more details.
21

    
22
You should have received a copy of the GNU General Public License
23
along with this program; if not, write to the Free Software
24
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
25
02110-1301, USA.
26

    
27
-}
28

    
29
module Ganeti.HTools.Cluster
30
    (
31
     -- * Types
32
      AllocSolution(..)
33
    , Table(..)
34
    , CStats(..)
35
    , AllocStats
36
    -- * Generic functions
37
    , totalResources
38
    , computeAllocationDelta
39
    -- * First phase functions
40
    , computeBadItems
41
    -- * Second phase functions
42
    , printSolutionLine
43
    , formatCmds
44
    , involvedNodes
45
    , splitJobs
46
    -- * Display functions
47
    , printNodes
48
    , printInsts
49
    -- * Balacing functions
50
    , checkMove
51
    , doNextBalance
52
    , tryBalance
53
    , compCV
54
    , compDetailedCV
55
    , printStats
56
    , iMoveToJob
57
    -- * IAllocator functions
58
    , genAllocNodes
59
    , tryAlloc
60
    , tryMGAlloc
61
    , tryReloc
62
    , tryMGReloc
63
    , tryEvac
64
    , tryMGEvac
65
    , collapseFailures
66
    -- * Allocation functions
67
    , iterateAlloc
68
    , tieredAlloc
69
    , tieredSpecMap
70
     -- * Node group functions
71
    , instanceGroup
72
    , findSplitInstances
73
    , splitCluster
74
    ) where
75

    
76
import Data.Function (on)
77
import Data.List
78
import Data.Ord (comparing)
79
import Text.Printf (printf)
80
import Control.Monad
81
import Control.Parallel.Strategies
82

    
83
import qualified Ganeti.HTools.Container as Container
84
import qualified Ganeti.HTools.Instance as Instance
85
import qualified Ganeti.HTools.Node as Node
86
import qualified Ganeti.HTools.Group as Group
87
import Ganeti.HTools.Types
88
import Ganeti.HTools.Utils
89
import qualified Ganeti.OpCodes as OpCodes
90

    
91
-- * Types
92

    
93
-- | Allocation\/relocation solution.
94
data AllocSolution = AllocSolution
95
  { asFailures  :: [FailMode]          -- ^ Failure counts
96
  , asAllocs    :: Int                 -- ^ Good allocation count
97
  , asSolutions :: [Node.AllocElement] -- ^ The actual result, length
98
                                       -- of the list depends on the
99
                                       -- allocation/relocation mode
100
  , asLog       :: [String]            -- ^ A list of informational messages
101
  }
102

    
103
-- | Allocation results, as used in 'iterateAlloc' and 'tieredAlloc'.
104
type AllocResult = (FailStats, Node.List, Instance.List,
105
                    [Instance.Instance], [CStats])
106

    
107

    
108
-- | A type denoting the valid allocation mode/pairs.
109
-- For a one-node allocation, this will be a @Left ['Node.Node']@,
110
-- whereas for a two-node allocation, this will be a @Right
111
-- [('Node.Node', 'Node.Node')]@.
112
type AllocNodes = Either [Ndx] [(Ndx, Ndx)]
113

    
114
-- | The empty solution we start with when computing allocations
115
emptySolution :: AllocSolution
116
emptySolution = AllocSolution { asFailures = [], asAllocs = 0
117
                              , asSolutions = [], asLog = [] }
118

    
119
-- | The complete state for the balancing solution
120
data Table = Table Node.List Instance.List Score [Placement]
121
             deriving (Show, Read)
122

    
123
data CStats = CStats { csFmem :: Int    -- ^ Cluster free mem
124
                     , csFdsk :: Int    -- ^ Cluster free disk
125
                     , csAmem :: Int    -- ^ Cluster allocatable mem
126
                     , csAdsk :: Int    -- ^ Cluster allocatable disk
127
                     , csAcpu :: Int    -- ^ Cluster allocatable cpus
128
                     , csMmem :: Int    -- ^ Max node allocatable mem
129
                     , csMdsk :: Int    -- ^ Max node allocatable disk
130
                     , csMcpu :: Int    -- ^ Max node allocatable cpu
131
                     , csImem :: Int    -- ^ Instance used mem
132
                     , csIdsk :: Int    -- ^ Instance used disk
133
                     , csIcpu :: Int    -- ^ Instance used cpu
134
                     , csTmem :: Double -- ^ Cluster total mem
135
                     , csTdsk :: Double -- ^ Cluster total disk
136
                     , csTcpu :: Double -- ^ Cluster total cpus
137
                     , csVcpu :: Int    -- ^ Cluster virtual cpus (if
138
                                        -- node pCpu has been set,
139
                                        -- otherwise -1)
140
                     , csXmem :: Int    -- ^ Unnacounted for mem
141
                     , csNmem :: Int    -- ^ Node own memory
142
                     , csScore :: Score -- ^ The cluster score
143
                     , csNinst :: Int   -- ^ The total number of instances
144
                     }
145
            deriving (Show, Read)
146

    
147
-- | Currently used, possibly to allocate, unallocable
148
type AllocStats = (RSpec, RSpec, RSpec)
149

    
150
-- * Utility functions
151

    
152
-- | Verifies the N+1 status and return the affected nodes.
153
verifyN1 :: [Node.Node] -> [Node.Node]
154
verifyN1 = filter Node.failN1
155

    
156
{-| Computes the pair of bad nodes and instances.
157

    
158
The bad node list is computed via a simple 'verifyN1' check, and the
159
bad instance list is the list of primary and secondary instances of
160
those nodes.
161

    
162
-}
163
computeBadItems :: Node.List -> Instance.List ->
164
                   ([Node.Node], [Instance.Instance])
165
computeBadItems nl il =
166
  let bad_nodes = verifyN1 $ getOnline nl
167
      bad_instances = map (`Container.find` il) .
168
                      sort . nub $
169
                      concatMap (\ n -> Node.sList n ++ Node.pList n) bad_nodes
170
  in
171
    (bad_nodes, bad_instances)
172

    
173
-- | Zero-initializer for the CStats type
174
emptyCStats :: CStats
175
emptyCStats = CStats 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
176

    
177
-- | Update stats with data from a new node
178
updateCStats :: CStats -> Node.Node -> CStats
179
updateCStats cs node =
180
    let CStats { csFmem = x_fmem, csFdsk = x_fdsk,
181
                 csAmem = x_amem, csAcpu = x_acpu, csAdsk = x_adsk,
182
                 csMmem = x_mmem, csMdsk = x_mdsk, csMcpu = x_mcpu,
183
                 csImem = x_imem, csIdsk = x_idsk, csIcpu = x_icpu,
184
                 csTmem = x_tmem, csTdsk = x_tdsk, csTcpu = x_tcpu,
185
                 csVcpu = x_vcpu,
186
                 csXmem = x_xmem, csNmem = x_nmem, csNinst = x_ninst
187
               }
188
            = cs
189
        inc_amem = Node.fMem node - Node.rMem node
190
        inc_amem' = if inc_amem > 0 then inc_amem else 0
191
        inc_adsk = Node.availDisk node
192
        inc_imem = truncate (Node.tMem node) - Node.nMem node
193
                   - Node.xMem node - Node.fMem node
194
        inc_icpu = Node.uCpu node
195
        inc_idsk = truncate (Node.tDsk node) - Node.fDsk node
196
        inc_vcpu = Node.hiCpu node
197
        inc_acpu = Node.availCpu node
198

    
199
    in cs { csFmem = x_fmem + Node.fMem node
200
          , csFdsk = x_fdsk + Node.fDsk node
201
          , csAmem = x_amem + inc_amem'
202
          , csAdsk = x_adsk + inc_adsk
203
          , csAcpu = x_acpu + inc_acpu
204
          , csMmem = max x_mmem inc_amem'
205
          , csMdsk = max x_mdsk inc_adsk
206
          , csMcpu = max x_mcpu inc_acpu
207
          , csImem = x_imem + inc_imem
208
          , csIdsk = x_idsk + inc_idsk
209
          , csIcpu = x_icpu + inc_icpu
210
          , csTmem = x_tmem + Node.tMem node
211
          , csTdsk = x_tdsk + Node.tDsk node
212
          , csTcpu = x_tcpu + Node.tCpu node
213
          , csVcpu = x_vcpu + inc_vcpu
214
          , csXmem = x_xmem + Node.xMem node
215
          , csNmem = x_nmem + Node.nMem node
216
          , csNinst = x_ninst + length (Node.pList node)
217
          }
218

    
219
-- | Compute the total free disk and memory in the cluster.
220
totalResources :: Node.List -> CStats
221
totalResources nl =
222
    let cs = foldl' updateCStats emptyCStats . Container.elems $ nl
223
    in cs { csScore = compCV nl }
224

    
225
-- | Compute the delta between two cluster state.
226
--
227
-- This is used when doing allocations, to understand better the
228
-- available cluster resources. The return value is a triple of the
229
-- current used values, the delta that was still allocated, and what
230
-- was left unallocated.
231
computeAllocationDelta :: CStats -> CStats -> AllocStats
232
computeAllocationDelta cini cfin =
233
    let CStats {csImem = i_imem, csIdsk = i_idsk, csIcpu = i_icpu} = cini
234
        CStats {csImem = f_imem, csIdsk = f_idsk, csIcpu = f_icpu,
235
                csTmem = t_mem, csTdsk = t_dsk, csVcpu = v_cpu } = cfin
236
        rini = RSpec i_icpu i_imem i_idsk
237
        rfin = RSpec (f_icpu - i_icpu) (f_imem - i_imem) (f_idsk - i_idsk)
238
        un_cpu = v_cpu - f_icpu
239
        runa = RSpec un_cpu (truncate t_mem - f_imem) (truncate t_dsk - f_idsk)
240
    in (rini, rfin, runa)
241

    
242
-- | The names and weights of the individual elements in the CV list
243
detailedCVInfo :: [(Double, String)]
244
detailedCVInfo = [ (1,  "free_mem_cv")
245
                 , (1,  "free_disk_cv")
246
                 , (1,  "n1_cnt")
247
                 , (1,  "reserved_mem_cv")
248
                 , (4,  "offline_all_cnt")
249
                 , (16, "offline_pri_cnt")
250
                 , (1,  "vcpu_ratio_cv")
251
                 , (1,  "cpu_load_cv")
252
                 , (1,  "mem_load_cv")
253
                 , (1,  "disk_load_cv")
254
                 , (1,  "net_load_cv")
255
                 , (2,  "pri_tags_score")
256
                 ]
257

    
258
detailedCVWeights :: [Double]
259
detailedCVWeights = map fst detailedCVInfo
260

    
261
-- | Compute the mem and disk covariance.
262
compDetailedCV :: Node.List -> [Double]
263
compDetailedCV nl =
264
    let
265
        all_nodes = Container.elems nl
266
        (offline, nodes) = partition Node.offline all_nodes
267
        mem_l = map Node.pMem nodes
268
        dsk_l = map Node.pDsk nodes
269
        -- metric: memory covariance
270
        mem_cv = stdDev mem_l
271
        -- metric: disk covariance
272
        dsk_cv = stdDev dsk_l
273
        -- metric: count of instances living on N1 failing nodes
274
        n1_score = fromIntegral . sum . map (\n -> length (Node.sList n) +
275
                                                   length (Node.pList n)) .
276
                   filter Node.failN1 $ nodes :: Double
277
        res_l = map Node.pRem nodes
278
        -- metric: reserved memory covariance
279
        res_cv = stdDev res_l
280
        -- offline instances metrics
281
        offline_ipri = sum . map (length . Node.pList) $ offline
282
        offline_isec = sum . map (length . Node.sList) $ offline
283
        -- metric: count of instances on offline nodes
284
        off_score = fromIntegral (offline_ipri + offline_isec)::Double
285
        -- metric: count of primary instances on offline nodes (this
286
        -- helps with evacuation/failover of primary instances on
287
        -- 2-node clusters with one node offline)
288
        off_pri_score = fromIntegral offline_ipri::Double
289
        cpu_l = map Node.pCpu nodes
290
        -- metric: covariance of vcpu/pcpu ratio
291
        cpu_cv = stdDev cpu_l
292
        -- metrics: covariance of cpu, memory, disk and network load
293
        (c_load, m_load, d_load, n_load) = unzip4 $
294
            map (\n ->
295
                     let DynUtil c1 m1 d1 n1 = Node.utilLoad n
296
                         DynUtil c2 m2 d2 n2 = Node.utilPool n
297
                     in (c1/c2, m1/m2, d1/d2, n1/n2)
298
                ) nodes
299
        -- metric: conflicting instance count
300
        pri_tags_inst = sum $ map Node.conflictingPrimaries nodes
301
        pri_tags_score = fromIntegral pri_tags_inst::Double
302
    in [ mem_cv, dsk_cv, n1_score, res_cv, off_score, off_pri_score, cpu_cv
303
       , stdDev c_load, stdDev m_load , stdDev d_load, stdDev n_load
304
       , pri_tags_score ]
305

    
306
-- | Compute the /total/ variance.
307
compCV :: Node.List -> Double
308
compCV = sum . zipWith (*) detailedCVWeights . compDetailedCV
309

    
310
-- | Compute online nodes from a Node.List
311
getOnline :: Node.List -> [Node.Node]
312
getOnline = filter (not . Node.offline) . Container.elems
313

    
314
-- * hbal functions
315

    
316
-- | Compute best table. Note that the ordering of the arguments is important.
317
compareTables :: Table -> Table -> Table
318
compareTables a@(Table _ _ a_cv _) b@(Table _ _ b_cv _ ) =
319
    if a_cv > b_cv then b else a
320

    
321
-- | Applies an instance move to a given node list and instance.
322
applyMove :: Node.List -> Instance.Instance
323
          -> IMove -> OpResult (Node.List, Instance.Instance, Ndx, Ndx)
324
-- Failover (f)
325
applyMove nl inst Failover =
326
    let old_pdx = Instance.pNode inst
327
        old_sdx = Instance.sNode inst
328
        old_p = Container.find old_pdx nl
329
        old_s = Container.find old_sdx nl
330
        int_p = Node.removePri old_p inst
331
        int_s = Node.removeSec old_s inst
332
        force_p = Node.offline old_p
333
        new_nl = do -- Maybe monad
334
          new_p <- Node.addPriEx force_p int_s inst
335
          new_s <- Node.addSec int_p inst old_sdx
336
          let new_inst = Instance.setBoth inst old_sdx old_pdx
337
          return (Container.addTwo old_pdx new_s old_sdx new_p nl,
338
                  new_inst, old_sdx, old_pdx)
339
    in new_nl
340

    
341
-- Replace the primary (f:, r:np, f)
342
applyMove nl inst (ReplacePrimary new_pdx) =
343
    let old_pdx = Instance.pNode inst
344
        old_sdx = Instance.sNode inst
345
        old_p = Container.find old_pdx nl
346
        old_s = Container.find old_sdx nl
347
        tgt_n = Container.find new_pdx nl
348
        int_p = Node.removePri old_p inst
349
        int_s = Node.removeSec old_s inst
350
        force_p = Node.offline old_p
351
        new_nl = do -- Maybe monad
352
          -- check that the current secondary can host the instance
353
          -- during the migration
354
          tmp_s <- Node.addPriEx force_p int_s inst
355
          let tmp_s' = Node.removePri tmp_s inst
356
          new_p <- Node.addPriEx force_p tgt_n inst
357
          new_s <- Node.addSecEx force_p tmp_s' inst new_pdx
358
          let new_inst = Instance.setPri inst new_pdx
359
          return (Container.add new_pdx new_p $
360
                  Container.addTwo old_pdx int_p old_sdx new_s nl,
361
                  new_inst, new_pdx, old_sdx)
362
    in new_nl
363

    
364
-- Replace the secondary (r:ns)
365
applyMove nl inst (ReplaceSecondary new_sdx) =
366
    let old_pdx = Instance.pNode inst
367
        old_sdx = Instance.sNode inst
368
        old_s = Container.find old_sdx nl
369
        tgt_n = Container.find new_sdx nl
370
        int_s = Node.removeSec old_s inst
371
        force_s = Node.offline old_s
372
        new_inst = Instance.setSec inst new_sdx
373
        new_nl = Node.addSecEx force_s tgt_n inst old_pdx >>=
374
                 \new_s -> return (Container.addTwo new_sdx
375
                                   new_s old_sdx int_s nl,
376
                                   new_inst, old_pdx, new_sdx)
377
    in new_nl
378

    
379
-- Replace the secondary and failover (r:np, f)
380
applyMove nl inst (ReplaceAndFailover new_pdx) =
381
    let old_pdx = Instance.pNode inst
382
        old_sdx = Instance.sNode inst
383
        old_p = Container.find old_pdx nl
384
        old_s = Container.find old_sdx nl
385
        tgt_n = Container.find new_pdx nl
386
        int_p = Node.removePri old_p inst
387
        int_s = Node.removeSec old_s inst
388
        force_s = Node.offline old_s
389
        new_nl = do -- Maybe monad
390
          new_p <- Node.addPri tgt_n inst
391
          new_s <- Node.addSecEx force_s int_p inst new_pdx
392
          let new_inst = Instance.setBoth inst new_pdx old_pdx
393
          return (Container.add new_pdx new_p $
394
                  Container.addTwo old_pdx new_s old_sdx int_s nl,
395
                  new_inst, new_pdx, old_pdx)
396
    in new_nl
397

    
398
-- Failver and replace the secondary (f, r:ns)
399
applyMove nl inst (FailoverAndReplace new_sdx) =
400
    let old_pdx = Instance.pNode inst
401
        old_sdx = Instance.sNode inst
402
        old_p = Container.find old_pdx nl
403
        old_s = Container.find old_sdx nl
404
        tgt_n = Container.find new_sdx nl
405
        int_p = Node.removePri old_p inst
406
        int_s = Node.removeSec old_s inst
407
        force_p = Node.offline old_p
408
        new_nl = do -- Maybe monad
409
          new_p <- Node.addPriEx force_p int_s inst
410
          new_s <- Node.addSecEx force_p tgt_n inst old_sdx
411
          let new_inst = Instance.setBoth inst old_sdx new_sdx
412
          return (Container.add new_sdx new_s $
413
                  Container.addTwo old_sdx new_p old_pdx int_p nl,
414
                  new_inst, old_sdx, new_sdx)
415
    in new_nl
416

    
417
-- | Tries to allocate an instance on one given node.
418
allocateOnSingle :: Node.List -> Instance.Instance -> Ndx
419
                 -> OpResult Node.AllocElement
420
allocateOnSingle nl inst new_pdx =
421
    let p = Container.find new_pdx nl
422
        new_inst = Instance.setBoth inst new_pdx Node.noSecondary
423
    in  Node.addPri p inst >>= \new_p -> do
424
      let new_nl = Container.add new_pdx new_p nl
425
          new_score = compCV nl
426
      return (new_nl, new_inst, [new_p], new_score)
427

    
428
-- | Tries to allocate an instance on a given pair of nodes.
429
allocateOnPair :: Node.List -> Instance.Instance -> Ndx -> Ndx
430
               -> OpResult Node.AllocElement
431
allocateOnPair nl inst new_pdx new_sdx =
432
    let tgt_p = Container.find new_pdx nl
433
        tgt_s = Container.find new_sdx nl
434
    in do
435
      new_p <- Node.addPri tgt_p inst
436
      new_s <- Node.addSec tgt_s inst new_pdx
437
      let new_inst = Instance.setBoth inst new_pdx new_sdx
438
          new_nl = Container.addTwo new_pdx new_p new_sdx new_s nl
439
      return (new_nl, new_inst, [new_p, new_s], compCV new_nl)
440

    
441
-- | Tries to perform an instance move and returns the best table
442
-- between the original one and the new one.
443
checkSingleStep :: Table -- ^ The original table
444
                -> Instance.Instance -- ^ The instance to move
445
                -> Table -- ^ The current best table
446
                -> IMove -- ^ The move to apply
447
                -> Table -- ^ The final best table
448
checkSingleStep ini_tbl target cur_tbl move =
449
    let
450
        Table ini_nl ini_il _ ini_plc = ini_tbl
451
        tmp_resu = applyMove ini_nl target move
452
    in
453
      case tmp_resu of
454
        OpFail _ -> cur_tbl
455
        OpGood (upd_nl, new_inst, pri_idx, sec_idx) ->
456
            let tgt_idx = Instance.idx target
457
                upd_cvar = compCV upd_nl
458
                upd_il = Container.add tgt_idx new_inst ini_il
459
                upd_plc = (tgt_idx, pri_idx, sec_idx, move, upd_cvar):ini_plc
460
                upd_tbl = Table upd_nl upd_il upd_cvar upd_plc
461
            in
462
              compareTables cur_tbl upd_tbl
463

    
464
-- | Given the status of the current secondary as a valid new node and
465
-- the current candidate target node, generate the possible moves for
466
-- a instance.
467
possibleMoves :: Bool      -- ^ Whether the secondary node is a valid new node
468
              -> Ndx       -- ^ Target node candidate
469
              -> [IMove]   -- ^ List of valid result moves
470
possibleMoves True tdx =
471
    [ReplaceSecondary tdx,
472
     ReplaceAndFailover tdx,
473
     ReplacePrimary tdx,
474
     FailoverAndReplace tdx]
475

    
476
possibleMoves False tdx =
477
    [ReplaceSecondary tdx,
478
     ReplaceAndFailover tdx]
479

    
480
-- | Compute the best move for a given instance.
481
checkInstanceMove :: [Ndx]             -- ^ Allowed target node indices
482
                  -> Bool              -- ^ Whether disk moves are allowed
483
                  -> Table             -- ^ Original table
484
                  -> Instance.Instance -- ^ Instance to move
485
                  -> Table             -- ^ Best new table for this instance
486
checkInstanceMove nodes_idx disk_moves ini_tbl target =
487
    let
488
        opdx = Instance.pNode target
489
        osdx = Instance.sNode target
490
        nodes = filter (\idx -> idx /= opdx && idx /= osdx) nodes_idx
491
        use_secondary = elem osdx nodes_idx
492
        aft_failover = if use_secondary -- if allowed to failover
493
                       then checkSingleStep ini_tbl target ini_tbl Failover
494
                       else ini_tbl
495
        all_moves = if disk_moves
496
                    then concatMap (possibleMoves use_secondary) nodes
497
                    else []
498
    in
499
      -- iterate over the possible nodes for this instance
500
      foldl' (checkSingleStep ini_tbl target) aft_failover all_moves
501

    
502
-- | Compute the best next move.
503
checkMove :: [Ndx]               -- ^ Allowed target node indices
504
          -> Bool                -- ^ Whether disk moves are allowed
505
          -> Table               -- ^ The current solution
506
          -> [Instance.Instance] -- ^ List of instances still to move
507
          -> Table               -- ^ The new solution
508
checkMove nodes_idx disk_moves ini_tbl victims =
509
    let Table _ _ _ ini_plc = ini_tbl
510
        -- we're using rwhnf from the Control.Parallel.Strategies
511
        -- package; we don't need to use rnf as that would force too
512
        -- much evaluation in single-threaded cases, and in
513
        -- multi-threaded case the weak head normal form is enough to
514
        -- spark the evaluation
515
        tables = parMap rwhnf (checkInstanceMove nodes_idx disk_moves ini_tbl)
516
                 victims
517
        -- iterate over all instances, computing the best move
518
        best_tbl =
519
            foldl'
520
            (\ step_tbl new_tbl -> compareTables step_tbl new_tbl)
521
            ini_tbl tables
522
        Table _ _ _ best_plc = best_tbl
523
    in if length best_plc == length ini_plc
524
       then ini_tbl -- no advancement
525
       else best_tbl
526

    
527
-- | Check if we are allowed to go deeper in the balancing
528
doNextBalance :: Table     -- ^ The starting table
529
              -> Int       -- ^ Remaining length
530
              -> Score     -- ^ Score at which to stop
531
              -> Bool      -- ^ The resulting table and commands
532
doNextBalance ini_tbl max_rounds min_score =
533
    let Table _ _ ini_cv ini_plc = ini_tbl
534
        ini_plc_len = length ini_plc
535
    in (max_rounds < 0 || ini_plc_len < max_rounds) && ini_cv > min_score
536

    
537
-- | Run a balance move
538
tryBalance :: Table       -- ^ The starting table
539
           -> Bool        -- ^ Allow disk moves
540
           -> Bool        -- ^ Only evacuate moves
541
           -> Score       -- ^ Min gain threshold
542
           -> Score       -- ^ Min gain
543
           -> Maybe Table -- ^ The resulting table and commands
544
tryBalance ini_tbl disk_moves evac_mode mg_limit min_gain =
545
    let Table ini_nl ini_il ini_cv _ = ini_tbl
546
        all_inst = Container.elems ini_il
547
        all_inst' = if evac_mode
548
                    then let bad_nodes = map Node.idx . filter Node.offline $
549
                                         Container.elems ini_nl
550
                         in filter (\e -> Instance.sNode e `elem` bad_nodes ||
551
                                          Instance.pNode e `elem` bad_nodes)
552
                            all_inst
553
                    else all_inst
554
        reloc_inst = filter Instance.movable all_inst'
555
        node_idx = map Node.idx . filter (not . Node.offline) $
556
                   Container.elems ini_nl
557
        fin_tbl = checkMove node_idx disk_moves ini_tbl reloc_inst
558
        (Table _ _ fin_cv _) = fin_tbl
559
    in
560
      if fin_cv < ini_cv && (ini_cv > mg_limit || ini_cv - fin_cv >= min_gain)
561
      then Just fin_tbl -- this round made success, return the new table
562
      else Nothing
563

    
564
-- * Allocation functions
565

    
566
-- | Build failure stats out of a list of failures
567
collapseFailures :: [FailMode] -> FailStats
568
collapseFailures flst =
569
    map (\k -> (k, length $ filter (k ==) flst)) [minBound..maxBound]
570

    
571
-- | Update current Allocation solution and failure stats with new
572
-- elements
573
concatAllocs :: AllocSolution -> OpResult Node.AllocElement -> AllocSolution
574
concatAllocs as (OpFail reason) = as { asFailures = reason : asFailures as }
575

    
576
concatAllocs as (OpGood ns@(_, _, _, nscore)) =
577
    let -- Choose the old or new solution, based on the cluster score
578
        cntok = asAllocs as
579
        osols = asSolutions as
580
        nsols = case osols of
581
                  [] -> [ns]
582
                  (_, _, _, oscore):[] ->
583
                      if oscore < nscore
584
                      then osols
585
                      else [ns]
586
                  -- FIXME: here we simply concat to lists with more
587
                  -- than one element; we should instead abort, since
588
                  -- this is not a valid usage of this function
589
                  xs -> ns:xs
590
        nsuc = cntok + 1
591
    -- Note: we force evaluation of nsols here in order to keep the
592
    -- memory profile low - we know that we will need nsols for sure
593
    -- in the next cycle, so we force evaluation of nsols, since the
594
    -- foldl' in the caller will only evaluate the tuple, but not the
595
    -- elements of the tuple
596
    in nsols `seq` nsuc `seq` as { asAllocs = nsuc, asSolutions = nsols }
597

    
598
-- | Sums two allocation solutions (e.g. for two separate node groups).
599
sumAllocs :: AllocSolution -> AllocSolution -> AllocSolution
600
sumAllocs (AllocSolution af aa as al) (AllocSolution bf ba bs bl) =
601
    AllocSolution (af ++ bf) (aa + ba) (as ++ bs) (al ++ bl)
602

    
603
-- | Given a solution, generates a reasonable description for it
604
describeSolution :: AllocSolution -> String
605
describeSolution as =
606
  let fcnt = asFailures as
607
      sols = asSolutions as
608
      freasons =
609
        intercalate ", " . map (\(a, b) -> printf "%s: %d" (show a) b) .
610
        filter ((> 0) . snd) . collapseFailures $ fcnt
611
  in if null sols
612
     then "No valid allocation solutions, failure reasons: " ++
613
          (if null fcnt
614
           then "unknown reasons"
615
           else freasons)
616
     else let (_, _, nodes, cv) = head sols
617
          in printf ("score: %.8f, successes %d, failures %d (%s)" ++
618
                     " for node(s) %s") cv (asAllocs as) (length fcnt) freasons
619
             (intercalate "/" . map Node.name $ nodes)
620

    
621
-- | Annotates a solution with the appropriate string
622
annotateSolution :: AllocSolution -> AllocSolution
623
annotateSolution as = as { asLog = describeSolution as : asLog as }
624

    
625
-- | Generate the valid node allocation singles or pairs for a new instance.
626
genAllocNodes :: Node.List         -- ^ The node map
627
              -> Int               -- ^ The number of nodes required
628
              -> Result AllocNodes -- ^ The (monadic) result
629
genAllocNodes nl count =
630
    let all_nodes = getOnline nl
631
        all_pairs = liftM2 (,) all_nodes all_nodes
632
        ok_pairs = filter (\(x, y) -> Node.idx x /= Node.idx y &&
633
                                      Node.group x == Node.group y) all_pairs
634
    in case count of
635
         1 -> Ok (Left (map Node.idx all_nodes))
636
         2 -> Ok (Right (map (\(p, s) -> (Node.idx p, Node.idx s)) ok_pairs))
637
         _ -> Bad "Unsupported number of nodes, only one or two  supported"
638

    
639
-- | Try to allocate an instance on the cluster.
640
tryAlloc :: (Monad m) =>
641
            Node.List         -- ^ The node list
642
         -> Instance.List     -- ^ The instance list
643
         -> Instance.Instance -- ^ The instance to allocate
644
         -> AllocNodes        -- ^ The allocation targets
645
         -> m AllocSolution   -- ^ Possible solution list
646
tryAlloc nl _ inst (Right ok_pairs) =
647
    let sols = foldl' (\cstate (p, s) ->
648
                           concatAllocs cstate $ allocateOnPair nl inst p s
649
                      ) emptySolution ok_pairs
650

    
651
    in if null ok_pairs -- means we have just one node
652
       then fail "Not enough online nodes"
653
       else return $ annotateSolution sols
654

    
655
tryAlloc nl _ inst (Left all_nodes) =
656
    let sols = foldl' (\cstate ->
657
                           concatAllocs cstate . allocateOnSingle nl inst
658
                      ) emptySolution all_nodes
659
    in if null all_nodes
660
       then fail "No online nodes"
661
       else return $ annotateSolution sols
662

    
663
-- | Given a group/result, describe it as a nice (list of) messages
664
solutionDescription :: Group.List -> (Gdx, Result AllocSolution) -> [String]
665
solutionDescription gl (groupId, result) =
666
  case result of
667
    Ok solution -> map (printf "Group %s (%s): %s" gname pol) (asLog solution)
668
    Bad message -> [printf "Group %s: error %s" gname message]
669
  where grp = Container.find groupId gl
670
        gname = Group.name grp
671
        pol = apolToString (Group.allocPolicy grp)
672

    
673
-- | From a list of possibly bad and possibly empty solutions, filter
674
-- only the groups with a valid result
675
filterMGResults :: Group.List
676
                -> [(Gdx, Result AllocSolution)]
677
                -> [(Gdx, AllocSolution)]
678
filterMGResults gl=
679
  filter ((/= AllocUnallocable) . Group.allocPolicy .
680
             flip Container.find gl . fst) .
681
  filter (not . null . asSolutions . snd) .
682
  map (\(y, Ok x) -> (y, x)) .
683
  filter (isOk . snd)
684

    
685
-- | Sort multigroup results based on policy and score
686
sortMGResults :: Group.List
687
             -> [(Gdx, AllocSolution)]
688
             -> [(Gdx, AllocSolution)]
689
sortMGResults gl sols =
690
    let extractScore = \(_, _, _, x) -> x
691
        solScore (gdx, sol) = (Group.allocPolicy (Container.find gdx gl),
692
                               (extractScore . head . asSolutions) sol)
693
    in sortBy (comparing solScore) sols
694

    
695
-- | Try to allocate an instance on a multi-group cluster.
696
tryMGAlloc :: Group.List           -- ^ The group list
697
           -> Node.List            -- ^ The node list
698
           -> Instance.List        -- ^ The instance list
699
           -> Instance.Instance    -- ^ The instance to allocate
700
           -> Int                  -- ^ Required number of nodes
701
           -> Result AllocSolution -- ^ Possible solution list
702
tryMGAlloc mggl mgnl mgil inst cnt =
703
  let groups = splitCluster mgnl mgil
704
      -- TODO: currently we consider all groups preferred
705
      sols = map (\(gid, (nl, il)) ->
706
                   (gid, genAllocNodes nl cnt >>= tryAlloc nl il inst))
707
             groups::[(Gdx, Result AllocSolution)]
708
      all_msgs = concatMap (solutionDescription mggl) sols
709
      goodSols = filterMGResults mggl sols
710
      sortedSols = sortMGResults mggl goodSols
711
  in if null sortedSols
712
     then Bad $ intercalate ", " all_msgs
713
     else let (final_group, final_sol) = head sortedSols
714
              final_name = Group.name $ Container.find final_group mggl
715
              selmsg = "Selected group: " ++  final_name
716
          in Ok $ final_sol { asLog = selmsg:all_msgs }
717

    
718
-- | Try to relocate an instance on the cluster.
719
tryReloc :: (Monad m) =>
720
            Node.List       -- ^ The node list
721
         -> Instance.List   -- ^ The instance list
722
         -> Idx             -- ^ The index of the instance to move
723
         -> Int             -- ^ The number of nodes required
724
         -> [Ndx]           -- ^ Nodes which should not be used
725
         -> m AllocSolution -- ^ Solution list
726
tryReloc nl il xid 1 ex_idx =
727
    let all_nodes = getOnline nl
728
        inst = Container.find xid il
729
        ex_idx' = Instance.pNode inst:ex_idx
730
        valid_nodes = filter (not . flip elem ex_idx' . Node.idx) all_nodes
731
        valid_idxes = map Node.idx valid_nodes
732
        sols1 = foldl' (\cstate x ->
733
                            let em = do
734
                                  (mnl, i, _, _) <-
735
                                      applyMove nl inst (ReplaceSecondary x)
736
                                  return (mnl, i, [Container.find x mnl],
737
                                          compCV mnl)
738
                            in concatAllocs cstate em
739
                       ) emptySolution valid_idxes
740
    in return sols1
741

    
742
tryReloc _ _ _ reqn _  = fail $ "Unsupported number of relocation \
743
                                \destinations required (" ++ show reqn ++
744
                                                  "), only one supported"
745

    
746
tryMGReloc :: (Monad m) =>
747
              Group.List      -- ^ The group list
748
           -> Node.List       -- ^ The node list
749
           -> Instance.List   -- ^ The instance list
750
           -> Idx             -- ^ The index of the instance to move
751
           -> Int             -- ^ The number of nodes required
752
           -> [Ndx]           -- ^ Nodes which should not be used
753
           -> m AllocSolution -- ^ Solution list
754
tryMGReloc _ mgnl mgil xid ncount ex_ndx = do
755
  let groups = splitCluster mgnl mgil
756
      -- TODO: we only relocate inside the group for now
757
      inst = Container.find xid mgil
758
  (nl, il) <- case lookup (instancePriGroup mgnl inst) groups of
759
                Nothing -> fail $ "Cannot find group for instance " ++
760
                           Instance.name inst
761
                Just v -> return v
762
  tryReloc nl il xid ncount ex_ndx
763

    
764
-- | Change an instance's secondary node
765
evacInstance :: (Monad m) =>
766
                [Ndx]                      -- ^ Excluded nodes
767
             -> Instance.List              -- ^ The current instance list
768
             -> (Node.List, AllocSolution) -- ^ The current state
769
             -> Idx                        -- ^ The instance to evacuate
770
             -> m (Node.List, AllocSolution)
771
evacInstance ex_ndx il (nl, old_as) idx = do
772
  -- FIXME: hardcoded one node here
773

    
774
  -- Longer explanation: evacuation is currently hardcoded to DRBD
775
  -- instances (which have one secondary); hence, even if the
776
  -- IAllocator protocol can request N nodes for an instance, and all
777
  -- the message parsing/loading pass this, this implementation only
778
  -- supports one; this situation needs to be revisited if we ever
779
  -- support more than one secondary, or if we change the storage
780
  -- model
781
  new_as <- tryReloc nl il idx 1 ex_ndx
782
  case asSolutions new_as of
783
    -- an individual relocation succeeded, we kind of compose the data
784
    -- from the two solutions
785
    csol@(nl', _, _, _):_ ->
786
        return (nl', new_as { asSolutions = csol:asSolutions old_as })
787
    -- this relocation failed, so we fail the entire evac
788
    _ -> fail $ "Can't evacuate instance " ++
789
         Instance.name (Container.find idx il) ++
790
             ": " ++ describeSolution new_as
791

    
792
-- | Try to evacuate a list of nodes.
793
tryEvac :: (Monad m) =>
794
            Node.List       -- ^ The node list
795
         -> Instance.List   -- ^ The instance list
796
         -> [Idx]           -- ^ Instances to be evacuated
797
         -> [Ndx]           -- ^ Restricted nodes (the ones being evacuated)
798
         -> m AllocSolution -- ^ Solution list
799
tryEvac nl il idxs ex_ndx = do
800
  (_, sol) <- foldM (evacInstance ex_ndx il) (nl, emptySolution) idxs
801
  return sol
802

    
803
-- | Multi-group evacuation of a list of nodes.
804
tryMGEvac :: (Monad m) =>
805
             Group.List -- ^ The group list
806
          -> Node.List       -- ^ The node list
807
          -> Instance.List   -- ^ The instance list
808
          -> [Ndx]           -- ^ Nodes to be evacuated
809
          -> m AllocSolution -- ^ Solution list
810
tryMGEvac _ nl il ex_ndx =
811
    let ex_nodes = map (`Container.find` nl) ex_ndx
812
        all_insts = nub . concatMap Node.sList $ ex_nodes
813
        gni = splitCluster nl il
814
        -- we run the instance index list through a couple of maps to
815
        -- get finally to a structure of the type [(group index,
816
        -- [instance indices])]
817
        all_insts' = map (\idx ->
818
                              (instancePriGroup nl (Container.find idx il),
819
                               idx)) all_insts
820
        all_insts'' = groupBy ((==) `on` fst) all_insts'
821
        all_insts3 = map (\xs -> let (gdxs, idxs) = unzip xs
822
                                 in (head gdxs, idxs)) all_insts''
823
    in do
824
      -- that done, we now add the per-group nl/il to the tuple
825
      all_insts4 <-
826
          mapM (\(gdx, idxs) -> do
827
                  case lookup gdx gni of
828
                    Nothing -> fail $ "Can't find group index " ++ show gdx
829
                    Just (gnl, gil) -> return (gdx, gnl, gil, idxs))
830
          all_insts3
831
      results <- mapM (\(_, gnl, gil, idxs) -> tryEvac gnl gil idxs ex_ndx)
832
                 all_insts4
833
      let sol = foldl' (\orig_sol group_sol ->
834
                        sumAllocs orig_sol group_sol) emptySolution results
835
      return $ annotateSolution sol
836

    
837
-- | Recursively place instances on the cluster until we're out of space
838
iterateAlloc :: Node.List
839
             -> Instance.List
840
             -> Instance.Instance
841
             -> Int
842
             -> [Instance.Instance]
843
             -> [CStats]
844
             -> Result AllocResult
845
iterateAlloc nl il newinst nreq ixes cstats =
846
      let depth = length ixes
847
          newname = printf "new-%d" depth::String
848
          newidx = length (Container.elems il) + depth
849
          newi2 = Instance.setIdx (Instance.setName newinst newname) newidx
850
      in case genAllocNodes nl nreq >>= tryAlloc nl il newi2 of
851
           Bad s -> Bad s
852
           Ok (AllocSolution { asFailures = errs, asSolutions = sols3 }) ->
853
               case sols3 of
854
                 [] -> Ok (collapseFailures errs, nl, il, ixes, cstats)
855
                 (xnl, xi, _, _):[] ->
856
                     iterateAlloc xnl (Container.add newidx xi il)
857
                                  newinst nreq (xi:ixes)
858
                                  (totalResources xnl:cstats)
859
                 _ -> Bad "Internal error: multiple solutions for single\
860
                          \ allocation"
861

    
862
-- | The core of the tiered allocation mode
863
tieredAlloc :: Node.List
864
            -> Instance.List
865
            -> Instance.Instance
866
            -> Int
867
            -> [Instance.Instance]
868
            -> [CStats]
869
            -> Result AllocResult
870
tieredAlloc nl il newinst nreq ixes cstats =
871
    case iterateAlloc nl il newinst nreq ixes cstats of
872
      Bad s -> Bad s
873
      Ok (errs, nl', il', ixes', cstats') ->
874
          case Instance.shrinkByType newinst . fst . last $
875
               sortBy (comparing snd) errs of
876
            Bad _ -> Ok (errs, nl', il', ixes', cstats')
877
            Ok newinst' ->
878
                tieredAlloc nl' il' newinst' nreq ixes' cstats'
879

    
880
-- | Compute the tiered spec string description from a list of
881
-- allocated instances.
882
tieredSpecMap :: [Instance.Instance]
883
              -> [String]
884
tieredSpecMap trl_ixes =
885
    let fin_trl_ixes = reverse trl_ixes
886
        ix_byspec = groupBy ((==) `on` Instance.specOf) fin_trl_ixes
887
        spec_map = map (\ixs -> (Instance.specOf $ head ixs, length ixs))
888
                   ix_byspec
889
    in  map (\(spec, cnt) -> printf "%d,%d,%d=%d" (rspecMem spec)
890
                             (rspecDsk spec) (rspecCpu spec) cnt) spec_map
891

    
892
-- * Formatting functions
893

    
894
-- | Given the original and final nodes, computes the relocation description.
895
computeMoves :: Instance.Instance -- ^ The instance to be moved
896
             -> String -- ^ The instance name
897
             -> IMove  -- ^ The move being performed
898
             -> String -- ^ New primary
899
             -> String -- ^ New secondary
900
             -> (String, [String])
901
                -- ^ Tuple of moves and commands list; moves is containing
902
                -- either @/f/@ for failover or @/r:name/@ for replace
903
                -- secondary, while the command list holds gnt-instance
904
                -- commands (without that prefix), e.g \"@failover instance1@\"
905
computeMoves i inam mv c d =
906
    case mv of
907
      Failover -> ("f", [mig])
908
      FailoverAndReplace _ -> (printf "f r:%s" d, [mig, rep d])
909
      ReplaceSecondary _ -> (printf "r:%s" d, [rep d])
910
      ReplaceAndFailover _ -> (printf "r:%s f" c, [rep c, mig])
911
      ReplacePrimary _ -> (printf "f r:%s f" c, [mig, rep c, mig])
912
    where morf = if Instance.running i then "migrate" else "failover"
913
          mig = printf "%s -f %s" morf inam::String
914
          rep n = printf "replace-disks -n %s %s" n inam
915

    
916
-- | Converts a placement to string format.
917
printSolutionLine :: Node.List     -- ^ The node list
918
                  -> Instance.List -- ^ The instance list
919
                  -> Int           -- ^ Maximum node name length
920
                  -> Int           -- ^ Maximum instance name length
921
                  -> Placement     -- ^ The current placement
922
                  -> Int           -- ^ The index of the placement in
923
                                   -- the solution
924
                  -> (String, [String])
925
printSolutionLine nl il nmlen imlen plc pos =
926
    let
927
        pmlen = (2*nmlen + 1)
928
        (i, p, s, mv, c) = plc
929
        inst = Container.find i il
930
        inam = Instance.alias inst
931
        npri = Node.alias $ Container.find p nl
932
        nsec = Node.alias $ Container.find s nl
933
        opri = Node.alias $ Container.find (Instance.pNode inst) nl
934
        osec = Node.alias $ Container.find (Instance.sNode inst) nl
935
        (moves, cmds) =  computeMoves inst inam mv npri nsec
936
        ostr = printf "%s:%s" opri osec::String
937
        nstr = printf "%s:%s" npri nsec::String
938
    in
939
      (printf "  %3d. %-*s %-*s => %-*s %.8f a=%s"
940
       pos imlen inam pmlen ostr
941
       pmlen nstr c moves,
942
       cmds)
943

    
944
-- | Return the instance and involved nodes in an instance move.
945
involvedNodes :: Instance.List -> Placement -> [Ndx]
946
involvedNodes il plc =
947
    let (i, np, ns, _, _) = plc
948
        inst = Container.find i il
949
        op = Instance.pNode inst
950
        os = Instance.sNode inst
951
    in nub [np, ns, op, os]
952

    
953
-- | Inner function for splitJobs, that either appends the next job to
954
-- the current jobset, or starts a new jobset.
955
mergeJobs :: ([JobSet], [Ndx]) -> MoveJob -> ([JobSet], [Ndx])
956
mergeJobs ([], _) n@(ndx, _, _, _) = ([[n]], ndx)
957
mergeJobs (cjs@(j:js), nbuf) n@(ndx, _, _, _)
958
    | null (ndx `intersect` nbuf) = ((n:j):js, ndx ++ nbuf)
959
    | otherwise = ([n]:cjs, ndx)
960

    
961
-- | Break a list of moves into independent groups. Note that this
962
-- will reverse the order of jobs.
963
splitJobs :: [MoveJob] -> [JobSet]
964
splitJobs = fst . foldl mergeJobs ([], [])
965

    
966
-- | Given a list of commands, prefix them with @gnt-instance@ and
967
-- also beautify the display a little.
968
formatJob :: Int -> Int -> (Int, MoveJob) -> [String]
969
formatJob jsn jsl (sn, (_, _, _, cmds)) =
970
    let out =
971
            printf "  echo job %d/%d" jsn sn:
972
            printf "  check":
973
            map ("  gnt-instance " ++) cmds
974
    in if sn == 1
975
       then ["", printf "echo jobset %d, %d jobs" jsn jsl] ++ out
976
       else out
977

    
978
-- | Given a list of commands, prefix them with @gnt-instance@ and
979
-- also beautify the display a little.
980
formatCmds :: [JobSet] -> String
981
formatCmds =
982
    unlines .
983
    concatMap (\(jsn, js) -> concatMap (formatJob jsn (length js))
984
                             (zip [1..] js)) .
985
    zip [1..]
986

    
987
-- | Print the node list.
988
printNodes :: Node.List -> [String] -> String
989
printNodes nl fs =
990
    let fields = case fs of
991
          [] -> Node.defaultFields
992
          "+":rest -> Node.defaultFields ++ rest
993
          _ -> fs
994
        snl = sortBy (comparing Node.idx) (Container.elems nl)
995
        (header, isnum) = unzip $ map Node.showHeader fields
996
    in unlines . map ((:) ' ' .  intercalate " ") $
997
       formatTable (header:map (Node.list fields) snl) isnum
998

    
999
-- | Print the instance list.
1000
printInsts :: Node.List -> Instance.List -> String
1001
printInsts nl il =
1002
    let sil = sortBy (comparing Instance.idx) (Container.elems il)
1003
        helper inst = [ if Instance.running inst then "R" else " "
1004
                      , Instance.name inst
1005
                      , Container.nameOf nl (Instance.pNode inst)
1006
                      , let sdx = Instance.sNode inst
1007
                        in if sdx == Node.noSecondary
1008
                           then  ""
1009
                           else Container.nameOf nl sdx
1010
                      , printf "%3d" $ Instance.vcpus inst
1011
                      , printf "%5d" $ Instance.mem inst
1012
                      , printf "%5d" $ Instance.dsk inst `div` 1024
1013
                      , printf "%5.3f" lC
1014
                      , printf "%5.3f" lM
1015
                      , printf "%5.3f" lD
1016
                      , printf "%5.3f" lN
1017
                      ]
1018
            where DynUtil lC lM lD lN = Instance.util inst
1019
        header = [ "F", "Name", "Pri_node", "Sec_node", "vcpu", "mem"
1020
                 , "dsk", "lCpu", "lMem", "lDsk", "lNet" ]
1021
        isnum = False:False:False:False:repeat True
1022
    in unlines . map ((:) ' ' . intercalate " ") $
1023
       formatTable (header:map helper sil) isnum
1024

    
1025
-- | Shows statistics for a given node list.
1026
printStats :: Node.List -> String
1027
printStats nl =
1028
    let dcvs = compDetailedCV nl
1029
        (weights, names) = unzip detailedCVInfo
1030
        hd = zip3 (weights ++ repeat 1) (names ++ repeat "unknown") dcvs
1031
        formatted = map (\(w, header, val) ->
1032
                             printf "%s=%.8f(x%.2f)" header val w::String) hd
1033
    in intercalate ", " formatted
1034

    
1035
-- | Convert a placement into a list of OpCodes (basically a job).
1036
iMoveToJob :: Node.List -> Instance.List
1037
          -> Idx -> IMove -> [OpCodes.OpCode]
1038
iMoveToJob nl il idx move =
1039
    let inst = Container.find idx il
1040
        iname = Instance.name inst
1041
        lookNode  = Just . Container.nameOf nl
1042
        opF = if Instance.running inst
1043
              then OpCodes.OpMigrateInstance iname True False
1044
              else OpCodes.OpFailoverInstance iname False
1045
        opR n = OpCodes.OpReplaceDisks iname (lookNode n)
1046
                OpCodes.ReplaceNewSecondary [] Nothing
1047
    in case move of
1048
         Failover -> [ opF ]
1049
         ReplacePrimary np -> [ opF, opR np, opF ]
1050
         ReplaceSecondary ns -> [ opR ns ]
1051
         ReplaceAndFailover np -> [ opR np, opF ]
1052
         FailoverAndReplace ns -> [ opF, opR ns ]
1053

    
1054
-- * Node group functions
1055

    
1056
-- | Computes the group of an instance
1057
instanceGroup :: Node.List -> Instance.Instance -> Result Gdx
1058
instanceGroup nl i =
1059
  let sidx = Instance.sNode i
1060
      pnode = Container.find (Instance.pNode i) nl
1061
      snode = if sidx == Node.noSecondary
1062
              then pnode
1063
              else Container.find sidx nl
1064
      pgroup = Node.group pnode
1065
      sgroup = Node.group snode
1066
  in if pgroup /= sgroup
1067
     then fail ("Instance placed accross two node groups, primary " ++
1068
                show pgroup ++ ", secondary " ++ show sgroup)
1069
     else return pgroup
1070

    
1071
-- | Computes the group of an instance per the primary node
1072
instancePriGroup :: Node.List -> Instance.Instance -> Gdx
1073
instancePriGroup nl i =
1074
  let pnode = Container.find (Instance.pNode i) nl
1075
  in  Node.group pnode
1076

    
1077
-- | Compute the list of badly allocated instances (split across node
1078
-- groups)
1079
findSplitInstances :: Node.List -> Instance.List -> [Instance.Instance]
1080
findSplitInstances nl il =
1081
  filter (not . isOk . instanceGroup nl) (Container.elems il)
1082

    
1083
-- | Splits a cluster into the component node groups
1084
splitCluster :: Node.List -> Instance.List ->
1085
                [(Gdx, (Node.List, Instance.List))]
1086
splitCluster nl il =
1087
  let ngroups = Node.computeGroups (Container.elems nl)
1088
  in map (\(guuid, nodes) ->
1089
           let nidxs = map Node.idx nodes
1090
               nodes' = zip nidxs nodes
1091
               instances = Container.filter ((`elem` nidxs) . Instance.pNode) il
1092
           in (guuid, (Container.fromList nodes', instances))) ngroups