Statistics
| Branch: | Tag: | Revision:

root / htools / Ganeti / HTools / Cluster.hs @ 63a78055

History | View | Annotate | Download (47.3 kB)

1
{-| Implementation of cluster-wide logic.
2

    
3
This module holds all pure cluster-logic; I\/O related functionality
4
goes into the /Main/ module for the individual binaries.
5

    
6
-}
7

    
8
{-
9

    
10
Copyright (C) 2009, 2010, 2011 Google Inc.
11

    
12
This program is free software; you can redistribute it and/or modify
13
it under the terms of the GNU General Public License as published by
14
the Free Software Foundation; either version 2 of the License, or
15
(at your option) any later version.
16

    
17
This program is distributed in the hope that it will be useful, but
18
WITHOUT ANY WARRANTY; without even the implied warranty of
19
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
20
General Public License for more details.
21

    
22
You should have received a copy of the GNU General Public License
23
along with this program; if not, write to the Free Software
24
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
25
02110-1301, USA.
26

    
27
-}
28

    
29
module Ganeti.HTools.Cluster
30
    (
31
     -- * Types
32
      AllocSolution(..)
33
    , Table(..)
34
    , CStats(..)
35
    , AllocStats
36
    -- * Generic functions
37
    , totalResources
38
    , computeAllocationDelta
39
    -- * First phase functions
40
    , computeBadItems
41
    -- * Second phase functions
42
    , printSolutionLine
43
    , formatCmds
44
    , involvedNodes
45
    , splitJobs
46
    -- * Display functions
47
    , printNodes
48
    , printInsts
49
    -- * Balacing functions
50
    , checkMove
51
    , doNextBalance
52
    , tryBalance
53
    , compCV
54
    , compDetailedCV
55
    , printStats
56
    , iMoveToJob
57
    -- * IAllocator functions
58
    , genAllocNodes
59
    , tryAlloc
60
    , tryMGAlloc
61
    , tryReloc
62
    , tryMGReloc
63
    , tryEvac
64
    , tryMGEvac
65
    , collapseFailures
66
    -- * Allocation functions
67
    , iterateAlloc
68
    , tieredAlloc
69
    , tieredSpecMap
70
     -- * Node group functions
71
    , instanceGroup
72
    , findSplitInstances
73
    , splitCluster
74
    ) where
75

    
76
import Data.Function (on)
77
import qualified Data.IntSet as IntSet
78
import Data.List
79
import Data.Ord (comparing)
80
import Text.Printf (printf)
81
import Control.Monad
82
import Control.Parallel.Strategies
83

    
84
import qualified Ganeti.HTools.Container as Container
85
import qualified Ganeti.HTools.Instance as Instance
86
import qualified Ganeti.HTools.Node as Node
87
import qualified Ganeti.HTools.Group as Group
88
import Ganeti.HTools.Types
89
import Ganeti.HTools.Utils
90
import qualified Ganeti.OpCodes as OpCodes
91

    
92
-- * Types
93

    
94
-- | Allocation\/relocation solution.
95
data AllocSolution = AllocSolution
96
  { asFailures  :: [FailMode]          -- ^ Failure counts
97
  , asAllocs    :: Int                 -- ^ Good allocation count
98
  , asSolutions :: [Node.AllocElement] -- ^ The actual result, length
99
                                       -- of the list depends on the
100
                                       -- allocation/relocation mode
101
  , asLog       :: [String]            -- ^ A list of informational messages
102
  }
103

    
104
-- | Allocation results, as used in 'iterateAlloc' and 'tieredAlloc'.
105
type AllocResult = (FailStats, Node.List, Instance.List,
106
                    [Instance.Instance], [CStats])
107

    
108

    
109
-- | A type denoting the valid allocation mode/pairs.
110
--
111
-- For a one-node allocation, this will be a @Left ['Node.Node']@,
112
-- whereas for a two-node allocation, this will be a @Right
113
-- [('Node.Node', 'Node.Node')]@.
114
type AllocNodes = Either [Ndx] [(Ndx, Ndx)]
115

    
116
-- | The empty solution we start with when computing allocations.
117
emptyAllocSolution :: AllocSolution
118
emptyAllocSolution = AllocSolution { asFailures = [], asAllocs = 0
119
                                   , asSolutions = [], asLog = [] }
120

    
121
-- | The complete state for the balancing solution.
122
data Table = Table Node.List Instance.List Score [Placement]
123
             deriving (Show, Read)
124

    
125
data CStats = CStats { csFmem :: Integer -- ^ Cluster free mem
126
                     , csFdsk :: Integer -- ^ Cluster free disk
127
                     , csAmem :: Integer -- ^ Cluster allocatable mem
128
                     , csAdsk :: Integer -- ^ Cluster allocatable disk
129
                     , csAcpu :: Integer -- ^ Cluster allocatable cpus
130
                     , csMmem :: Integer -- ^ Max node allocatable mem
131
                     , csMdsk :: Integer -- ^ Max node allocatable disk
132
                     , csMcpu :: Integer -- ^ Max node allocatable cpu
133
                     , csImem :: Integer -- ^ Instance used mem
134
                     , csIdsk :: Integer -- ^ Instance used disk
135
                     , csIcpu :: Integer -- ^ Instance used cpu
136
                     , csTmem :: Double  -- ^ Cluster total mem
137
                     , csTdsk :: Double  -- ^ Cluster total disk
138
                     , csTcpu :: Double  -- ^ Cluster total cpus
139
                     , csVcpu :: Integer -- ^ Cluster virtual cpus (if
140
                                         -- node pCpu has been set,
141
                                         -- otherwise -1)
142
                     , csXmem :: Integer -- ^ Unnacounted for mem
143
                     , csNmem :: Integer -- ^ Node own memory
144
                     , csScore :: Score  -- ^ The cluster score
145
                     , csNinst :: Int    -- ^ The total number of instances
146
                     }
147
            deriving (Show, Read)
148

    
149
-- | Currently used, possibly to allocate, unallocable.
150
type AllocStats = (RSpec, RSpec, RSpec)
151

    
152
-- * Utility functions
153

    
154
-- | Verifies the N+1 status and return the affected nodes.
155
verifyN1 :: [Node.Node] -> [Node.Node]
156
verifyN1 = filter Node.failN1
157

    
158
{-| Computes the pair of bad nodes and instances.
159

    
160
The bad node list is computed via a simple 'verifyN1' check, and the
161
bad instance list is the list of primary and secondary instances of
162
those nodes.
163

    
164
-}
165
computeBadItems :: Node.List -> Instance.List ->
166
                   ([Node.Node], [Instance.Instance])
167
computeBadItems nl il =
168
  let bad_nodes = verifyN1 $ getOnline nl
169
      bad_instances = map (`Container.find` il) .
170
                      sort . nub $
171
                      concatMap (\ n -> Node.sList n ++ Node.pList n) bad_nodes
172
  in
173
    (bad_nodes, bad_instances)
174

    
175
-- | Zero-initializer for the CStats type.
176
emptyCStats :: CStats
177
emptyCStats = CStats 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
178

    
179
-- | Update stats with data from a new node.
180
updateCStats :: CStats -> Node.Node -> CStats
181
updateCStats cs node =
182
    let CStats { csFmem = x_fmem, csFdsk = x_fdsk,
183
                 csAmem = x_amem, csAcpu = x_acpu, csAdsk = x_adsk,
184
                 csMmem = x_mmem, csMdsk = x_mdsk, csMcpu = x_mcpu,
185
                 csImem = x_imem, csIdsk = x_idsk, csIcpu = x_icpu,
186
                 csTmem = x_tmem, csTdsk = x_tdsk, csTcpu = x_tcpu,
187
                 csVcpu = x_vcpu,
188
                 csXmem = x_xmem, csNmem = x_nmem, csNinst = x_ninst
189
               }
190
            = cs
191
        inc_amem = Node.fMem node - Node.rMem node
192
        inc_amem' = if inc_amem > 0 then inc_amem else 0
193
        inc_adsk = Node.availDisk node
194
        inc_imem = truncate (Node.tMem node) - Node.nMem node
195
                   - Node.xMem node - Node.fMem node
196
        inc_icpu = Node.uCpu node
197
        inc_idsk = truncate (Node.tDsk node) - Node.fDsk node
198
        inc_vcpu = Node.hiCpu node
199
        inc_acpu = Node.availCpu node
200

    
201
    in cs { csFmem = x_fmem + fromIntegral (Node.fMem node)
202
          , csFdsk = x_fdsk + fromIntegral (Node.fDsk node)
203
          , csAmem = x_amem + fromIntegral inc_amem'
204
          , csAdsk = x_adsk + fromIntegral inc_adsk
205
          , csAcpu = x_acpu + fromIntegral inc_acpu
206
          , csMmem = max x_mmem (fromIntegral inc_amem')
207
          , csMdsk = max x_mdsk (fromIntegral inc_adsk)
208
          , csMcpu = max x_mcpu (fromIntegral inc_acpu)
209
          , csImem = x_imem + fromIntegral inc_imem
210
          , csIdsk = x_idsk + fromIntegral inc_idsk
211
          , csIcpu = x_icpu + fromIntegral inc_icpu
212
          , csTmem = x_tmem + Node.tMem node
213
          , csTdsk = x_tdsk + Node.tDsk node
214
          , csTcpu = x_tcpu + Node.tCpu node
215
          , csVcpu = x_vcpu + fromIntegral inc_vcpu
216
          , csXmem = x_xmem + fromIntegral (Node.xMem node)
217
          , csNmem = x_nmem + fromIntegral (Node.nMem node)
218
          , csNinst = x_ninst + length (Node.pList node)
219
          }
220

    
221
-- | Compute the total free disk and memory in the cluster.
222
totalResources :: Node.List -> CStats
223
totalResources nl =
224
    let cs = foldl' updateCStats emptyCStats . Container.elems $ nl
225
    in cs { csScore = compCV nl }
226

    
227
-- | Compute the delta between two cluster state.
228
--
229
-- This is used when doing allocations, to understand better the
230
-- available cluster resources. The return value is a triple of the
231
-- current used values, the delta that was still allocated, and what
232
-- was left unallocated.
233
computeAllocationDelta :: CStats -> CStats -> AllocStats
234
computeAllocationDelta cini cfin =
235
    let CStats {csImem = i_imem, csIdsk = i_idsk, csIcpu = i_icpu} = cini
236
        CStats {csImem = f_imem, csIdsk = f_idsk, csIcpu = f_icpu,
237
                csTmem = t_mem, csTdsk = t_dsk, csVcpu = v_cpu } = cfin
238
        rini = RSpec (fromIntegral i_icpu) (fromIntegral i_imem)
239
               (fromIntegral i_idsk)
240
        rfin = RSpec (fromIntegral (f_icpu - i_icpu))
241
               (fromIntegral (f_imem - i_imem))
242
               (fromIntegral (f_idsk - i_idsk))
243
        un_cpu = fromIntegral (v_cpu - f_icpu)::Int
244
        runa = RSpec un_cpu (truncate t_mem - fromIntegral f_imem)
245
               (truncate t_dsk - fromIntegral f_idsk)
246
    in (rini, rfin, runa)
247

    
248
-- | The names and weights of the individual elements in the CV list.
249
detailedCVInfo :: [(Double, String)]
250
detailedCVInfo = [ (1,  "free_mem_cv")
251
                 , (1,  "free_disk_cv")
252
                 , (1,  "n1_cnt")
253
                 , (1,  "reserved_mem_cv")
254
                 , (4,  "offline_all_cnt")
255
                 , (16, "offline_pri_cnt")
256
                 , (1,  "vcpu_ratio_cv")
257
                 , (1,  "cpu_load_cv")
258
                 , (1,  "mem_load_cv")
259
                 , (1,  "disk_load_cv")
260
                 , (1,  "net_load_cv")
261
                 , (2,  "pri_tags_score")
262
                 ]
263

    
264
detailedCVWeights :: [Double]
265
detailedCVWeights = map fst detailedCVInfo
266

    
267
-- | Compute the mem and disk covariance.
268
compDetailedCV :: Node.List -> [Double]
269
compDetailedCV nl =
270
    let
271
        all_nodes = Container.elems nl
272
        (offline, nodes) = partition Node.offline all_nodes
273
        mem_l = map Node.pMem nodes
274
        dsk_l = map Node.pDsk nodes
275
        -- metric: memory covariance
276
        mem_cv = stdDev mem_l
277
        -- metric: disk covariance
278
        dsk_cv = stdDev dsk_l
279
        -- metric: count of instances living on N1 failing nodes
280
        n1_score = fromIntegral . sum . map (\n -> length (Node.sList n) +
281
                                                   length (Node.pList n)) .
282
                   filter Node.failN1 $ nodes :: Double
283
        res_l = map Node.pRem nodes
284
        -- metric: reserved memory covariance
285
        res_cv = stdDev res_l
286
        -- offline instances metrics
287
        offline_ipri = sum . map (length . Node.pList) $ offline
288
        offline_isec = sum . map (length . Node.sList) $ offline
289
        -- metric: count of instances on offline nodes
290
        off_score = fromIntegral (offline_ipri + offline_isec)::Double
291
        -- metric: count of primary instances on offline nodes (this
292
        -- helps with evacuation/failover of primary instances on
293
        -- 2-node clusters with one node offline)
294
        off_pri_score = fromIntegral offline_ipri::Double
295
        cpu_l = map Node.pCpu nodes
296
        -- metric: covariance of vcpu/pcpu ratio
297
        cpu_cv = stdDev cpu_l
298
        -- metrics: covariance of cpu, memory, disk and network load
299
        (c_load, m_load, d_load, n_load) = unzip4 $
300
            map (\n ->
301
                     let DynUtil c1 m1 d1 n1 = Node.utilLoad n
302
                         DynUtil c2 m2 d2 n2 = Node.utilPool n
303
                     in (c1/c2, m1/m2, d1/d2, n1/n2)
304
                ) nodes
305
        -- metric: conflicting instance count
306
        pri_tags_inst = sum $ map Node.conflictingPrimaries nodes
307
        pri_tags_score = fromIntegral pri_tags_inst::Double
308
    in [ mem_cv, dsk_cv, n1_score, res_cv, off_score, off_pri_score, cpu_cv
309
       , stdDev c_load, stdDev m_load , stdDev d_load, stdDev n_load
310
       , pri_tags_score ]
311

    
312
-- | Compute the /total/ variance.
313
compCV :: Node.List -> Double
314
compCV = sum . zipWith (*) detailedCVWeights . compDetailedCV
315

    
316
-- | Compute online nodes from a 'Node.List'.
317
getOnline :: Node.List -> [Node.Node]
318
getOnline = filter (not . Node.offline) . Container.elems
319

    
320
-- * Balancing functions
321

    
322
-- | Compute best table. Note that the ordering of the arguments is important.
323
compareTables :: Table -> Table -> Table
324
compareTables a@(Table _ _ a_cv _) b@(Table _ _ b_cv _ ) =
325
    if a_cv > b_cv then b else a
326

    
327
-- | Applies an instance move to a given node list and instance.
328
applyMove :: Node.List -> Instance.Instance
329
          -> IMove -> OpResult (Node.List, Instance.Instance, Ndx, Ndx)
330
-- Failover (f)
331
applyMove nl inst Failover =
332
    let old_pdx = Instance.pNode inst
333
        old_sdx = Instance.sNode inst
334
        old_p = Container.find old_pdx nl
335
        old_s = Container.find old_sdx nl
336
        int_p = Node.removePri old_p inst
337
        int_s = Node.removeSec old_s inst
338
        force_p = Node.offline old_p
339
        new_nl = do -- Maybe monad
340
          new_p <- Node.addPriEx force_p int_s inst
341
          new_s <- Node.addSec int_p inst old_sdx
342
          let new_inst = Instance.setBoth inst old_sdx old_pdx
343
          return (Container.addTwo old_pdx new_s old_sdx new_p nl,
344
                  new_inst, old_sdx, old_pdx)
345
    in new_nl
346

    
347
-- Replace the primary (f:, r:np, f)
348
applyMove nl inst (ReplacePrimary new_pdx) =
349
    let old_pdx = Instance.pNode inst
350
        old_sdx = Instance.sNode inst
351
        old_p = Container.find old_pdx nl
352
        old_s = Container.find old_sdx nl
353
        tgt_n = Container.find new_pdx nl
354
        int_p = Node.removePri old_p inst
355
        int_s = Node.removeSec old_s inst
356
        force_p = Node.offline old_p
357
        new_nl = do -- Maybe monad
358
          -- check that the current secondary can host the instance
359
          -- during the migration
360
          tmp_s <- Node.addPriEx force_p int_s inst
361
          let tmp_s' = Node.removePri tmp_s inst
362
          new_p <- Node.addPriEx force_p tgt_n inst
363
          new_s <- Node.addSecEx force_p tmp_s' inst new_pdx
364
          let new_inst = Instance.setPri inst new_pdx
365
          return (Container.add new_pdx new_p $
366
                  Container.addTwo old_pdx int_p old_sdx new_s nl,
367
                  new_inst, new_pdx, old_sdx)
368
    in new_nl
369

    
370
-- Replace the secondary (r:ns)
371
applyMove nl inst (ReplaceSecondary new_sdx) =
372
    let old_pdx = Instance.pNode inst
373
        old_sdx = Instance.sNode inst
374
        old_s = Container.find old_sdx nl
375
        tgt_n = Container.find new_sdx nl
376
        int_s = Node.removeSec old_s inst
377
        force_s = Node.offline old_s
378
        new_inst = Instance.setSec inst new_sdx
379
        new_nl = Node.addSecEx force_s tgt_n inst old_pdx >>=
380
                 \new_s -> return (Container.addTwo new_sdx
381
                                   new_s old_sdx int_s nl,
382
                                   new_inst, old_pdx, new_sdx)
383
    in new_nl
384

    
385
-- Replace the secondary and failover (r:np, f)
386
applyMove nl inst (ReplaceAndFailover new_pdx) =
387
    let old_pdx = Instance.pNode inst
388
        old_sdx = Instance.sNode inst
389
        old_p = Container.find old_pdx nl
390
        old_s = Container.find old_sdx nl
391
        tgt_n = Container.find new_pdx nl
392
        int_p = Node.removePri old_p inst
393
        int_s = Node.removeSec old_s inst
394
        force_s = Node.offline old_s
395
        new_nl = do -- Maybe monad
396
          new_p <- Node.addPri tgt_n inst
397
          new_s <- Node.addSecEx force_s int_p inst new_pdx
398
          let new_inst = Instance.setBoth inst new_pdx old_pdx
399
          return (Container.add new_pdx new_p $
400
                  Container.addTwo old_pdx new_s old_sdx int_s nl,
401
                  new_inst, new_pdx, old_pdx)
402
    in new_nl
403

    
404
-- Failver and replace the secondary (f, r:ns)
405
applyMove nl inst (FailoverAndReplace new_sdx) =
406
    let old_pdx = Instance.pNode inst
407
        old_sdx = Instance.sNode inst
408
        old_p = Container.find old_pdx nl
409
        old_s = Container.find old_sdx nl
410
        tgt_n = Container.find new_sdx nl
411
        int_p = Node.removePri old_p inst
412
        int_s = Node.removeSec old_s inst
413
        force_p = Node.offline old_p
414
        new_nl = do -- Maybe monad
415
          new_p <- Node.addPriEx force_p int_s inst
416
          new_s <- Node.addSecEx force_p tgt_n inst old_sdx
417
          let new_inst = Instance.setBoth inst old_sdx new_sdx
418
          return (Container.add new_sdx new_s $
419
                  Container.addTwo old_sdx new_p old_pdx int_p nl,
420
                  new_inst, old_sdx, new_sdx)
421
    in new_nl
422

    
423
-- | Tries to allocate an instance on one given node.
424
allocateOnSingle :: Node.List -> Instance.Instance -> Ndx
425
                 -> OpResult Node.AllocElement
426
allocateOnSingle nl inst new_pdx =
427
    let p = Container.find new_pdx nl
428
        new_inst = Instance.setBoth inst new_pdx Node.noSecondary
429
    in  Node.addPri p inst >>= \new_p -> do
430
      let new_nl = Container.add new_pdx new_p nl
431
          new_score = compCV nl
432
      return (new_nl, new_inst, [new_p], new_score)
433

    
434
-- | Tries to allocate an instance on a given pair of nodes.
435
allocateOnPair :: Node.List -> Instance.Instance -> Ndx -> Ndx
436
               -> OpResult Node.AllocElement
437
allocateOnPair nl inst new_pdx new_sdx =
438
    let tgt_p = Container.find new_pdx nl
439
        tgt_s = Container.find new_sdx nl
440
    in do
441
      new_p <- Node.addPri tgt_p inst
442
      new_s <- Node.addSec tgt_s inst new_pdx
443
      let new_inst = Instance.setBoth inst new_pdx new_sdx
444
          new_nl = Container.addTwo new_pdx new_p new_sdx new_s nl
445
      return (new_nl, new_inst, [new_p, new_s], compCV new_nl)
446

    
447
-- | Tries to perform an instance move and returns the best table
448
-- between the original one and the new one.
449
checkSingleStep :: Table -- ^ The original table
450
                -> Instance.Instance -- ^ The instance to move
451
                -> Table -- ^ The current best table
452
                -> IMove -- ^ The move to apply
453
                -> Table -- ^ The final best table
454
checkSingleStep ini_tbl target cur_tbl move =
455
    let
456
        Table ini_nl ini_il _ ini_plc = ini_tbl
457
        tmp_resu = applyMove ini_nl target move
458
    in
459
      case tmp_resu of
460
        OpFail _ -> cur_tbl
461
        OpGood (upd_nl, new_inst, pri_idx, sec_idx) ->
462
            let tgt_idx = Instance.idx target
463
                upd_cvar = compCV upd_nl
464
                upd_il = Container.add tgt_idx new_inst ini_il
465
                upd_plc = (tgt_idx, pri_idx, sec_idx, move, upd_cvar):ini_plc
466
                upd_tbl = Table upd_nl upd_il upd_cvar upd_plc
467
            in
468
              compareTables cur_tbl upd_tbl
469

    
470
-- | Given the status of the current secondary as a valid new node and
471
-- the current candidate target node, generate the possible moves for
472
-- a instance.
473
possibleMoves :: Bool      -- ^ Whether the secondary node is a valid new node
474
              -> Bool      -- ^ Whether we can change the primary node
475
              -> Ndx       -- ^ Target node candidate
476
              -> [IMove]   -- ^ List of valid result moves
477

    
478
possibleMoves _ False tdx =
479
    [ReplaceSecondary tdx]
480

    
481
possibleMoves True True tdx =
482
    [ReplaceSecondary tdx,
483
     ReplaceAndFailover tdx,
484
     ReplacePrimary tdx,
485
     FailoverAndReplace tdx]
486

    
487
possibleMoves False True tdx =
488
    [ReplaceSecondary tdx,
489
     ReplaceAndFailover tdx]
490

    
491
-- | Compute the best move for a given instance.
492
checkInstanceMove :: [Ndx]             -- ^ Allowed target node indices
493
                  -> Bool              -- ^ Whether disk moves are allowed
494
                  -> Bool              -- ^ Whether instance moves are allowed
495
                  -> Table             -- ^ Original table
496
                  -> Instance.Instance -- ^ Instance to move
497
                  -> Table             -- ^ Best new table for this instance
498
checkInstanceMove nodes_idx disk_moves inst_moves ini_tbl target =
499
    let
500
        opdx = Instance.pNode target
501
        osdx = Instance.sNode target
502
        nodes = filter (\idx -> idx /= opdx && idx /= osdx) nodes_idx
503
        use_secondary = elem osdx nodes_idx && inst_moves
504
        aft_failover = if use_secondary -- if allowed to failover
505
                       then checkSingleStep ini_tbl target ini_tbl Failover
506
                       else ini_tbl
507
        all_moves = if disk_moves
508
                    then concatMap
509
                         (possibleMoves use_secondary inst_moves) nodes
510
                    else []
511
    in
512
      -- iterate over the possible nodes for this instance
513
      foldl' (checkSingleStep ini_tbl target) aft_failover all_moves
514

    
515
-- | Compute the best next move.
516
checkMove :: [Ndx]               -- ^ Allowed target node indices
517
          -> Bool                -- ^ Whether disk moves are allowed
518
          -> Bool                -- ^ Whether instance moves are allowed
519
          -> Table               -- ^ The current solution
520
          -> [Instance.Instance] -- ^ List of instances still to move
521
          -> Table               -- ^ The new solution
522
checkMove nodes_idx disk_moves inst_moves ini_tbl victims =
523
    let Table _ _ _ ini_plc = ini_tbl
524
        -- we're using rwhnf from the Control.Parallel.Strategies
525
        -- package; we don't need to use rnf as that would force too
526
        -- much evaluation in single-threaded cases, and in
527
        -- multi-threaded case the weak head normal form is enough to
528
        -- spark the evaluation
529
        tables = parMap rwhnf (checkInstanceMove nodes_idx disk_moves
530
                               inst_moves ini_tbl)
531
                 victims
532
        -- iterate over all instances, computing the best move
533
        best_tbl = foldl' compareTables ini_tbl tables
534
        Table _ _ _ best_plc = best_tbl
535
    in if length best_plc == length ini_plc
536
       then ini_tbl -- no advancement
537
       else best_tbl
538

    
539
-- | Check if we are allowed to go deeper in the balancing.
540
doNextBalance :: Table     -- ^ The starting table
541
              -> Int       -- ^ Remaining length
542
              -> Score     -- ^ Score at which to stop
543
              -> Bool      -- ^ The resulting table and commands
544
doNextBalance ini_tbl max_rounds min_score =
545
    let Table _ _ ini_cv ini_plc = ini_tbl
546
        ini_plc_len = length ini_plc
547
    in (max_rounds < 0 || ini_plc_len < max_rounds) && ini_cv > min_score
548

    
549
-- | Run a balance move.
550
tryBalance :: Table       -- ^ The starting table
551
           -> Bool        -- ^ Allow disk moves
552
           -> Bool        -- ^ Allow instance moves
553
           -> Bool        -- ^ Only evacuate moves
554
           -> Score       -- ^ Min gain threshold
555
           -> Score       -- ^ Min gain
556
           -> Maybe Table -- ^ The resulting table and commands
557
tryBalance ini_tbl disk_moves inst_moves evac_mode mg_limit min_gain =
558
    let Table ini_nl ini_il ini_cv _ = ini_tbl
559
        all_inst = Container.elems ini_il
560
        all_inst' = if evac_mode
561
                    then let bad_nodes = map Node.idx . filter Node.offline $
562
                                         Container.elems ini_nl
563
                         in filter (\e -> Instance.sNode e `elem` bad_nodes ||
564
                                          Instance.pNode e `elem` bad_nodes)
565
                            all_inst
566
                    else all_inst
567
        reloc_inst = filter Instance.movable all_inst'
568
        node_idx = map Node.idx . filter (not . Node.offline) $
569
                   Container.elems ini_nl
570
        fin_tbl = checkMove node_idx disk_moves inst_moves ini_tbl reloc_inst
571
        (Table _ _ fin_cv _) = fin_tbl
572
    in
573
      if fin_cv < ini_cv && (ini_cv > mg_limit || ini_cv - fin_cv >= min_gain)
574
      then Just fin_tbl -- this round made success, return the new table
575
      else Nothing
576

    
577
-- * Allocation functions
578

    
579
-- | Build failure stats out of a list of failures.
580
collapseFailures :: [FailMode] -> FailStats
581
collapseFailures flst =
582
    map (\k -> (k, length $ filter (k ==) flst)) [minBound..maxBound]
583

    
584
-- | Update current Allocation solution and failure stats with new
585
-- elements.
586
concatAllocs :: AllocSolution -> OpResult Node.AllocElement -> AllocSolution
587
concatAllocs as (OpFail reason) = as { asFailures = reason : asFailures as }
588

    
589
concatAllocs as (OpGood ns@(_, _, _, nscore)) =
590
    let -- Choose the old or new solution, based on the cluster score
591
        cntok = asAllocs as
592
        osols = asSolutions as
593
        nsols = case osols of
594
                  [] -> [ns]
595
                  (_, _, _, oscore):[] ->
596
                      if oscore < nscore
597
                      then osols
598
                      else [ns]
599
                  -- FIXME: here we simply concat to lists with more
600
                  -- than one element; we should instead abort, since
601
                  -- this is not a valid usage of this function
602
                  xs -> ns:xs
603
        nsuc = cntok + 1
604
    -- Note: we force evaluation of nsols here in order to keep the
605
    -- memory profile low - we know that we will need nsols for sure
606
    -- in the next cycle, so we force evaluation of nsols, since the
607
    -- foldl' in the caller will only evaluate the tuple, but not the
608
    -- elements of the tuple
609
    in nsols `seq` nsuc `seq` as { asAllocs = nsuc, asSolutions = nsols }
610

    
611
-- | Sums two allocation solutions (e.g. for two separate node groups).
612
sumAllocs :: AllocSolution -> AllocSolution -> AllocSolution
613
sumAllocs (AllocSolution af aa as al) (AllocSolution bf ba bs bl) =
614
    AllocSolution (af ++ bf) (aa + ba) (as ++ bs) (al ++ bl)
615

    
616
-- | Given a solution, generates a reasonable description for it.
617
describeSolution :: AllocSolution -> String
618
describeSolution as =
619
  let fcnt = asFailures as
620
      sols = asSolutions as
621
      freasons =
622
        intercalate ", " . map (\(a, b) -> printf "%s: %d" (show a) b) .
623
        filter ((> 0) . snd) . collapseFailures $ fcnt
624
  in if null sols
625
     then "No valid allocation solutions, failure reasons: " ++
626
          (if null fcnt
627
           then "unknown reasons"
628
           else freasons)
629
     else let (_, _, nodes, cv) = head sols
630
          in printf ("score: %.8f, successes %d, failures %d (%s)" ++
631
                     " for node(s) %s") cv (asAllocs as) (length fcnt) freasons
632
             (intercalate "/" . map Node.name $ nodes)
633

    
634
-- | Annotates a solution with the appropriate string.
635
annotateSolution :: AllocSolution -> AllocSolution
636
annotateSolution as = as { asLog = describeSolution as : asLog as }
637

    
638
-- | Generate the valid node allocation singles or pairs for a new instance.
639
genAllocNodes :: Group.List        -- ^ Group list
640
              -> Node.List         -- ^ The node map
641
              -> Int               -- ^ The number of nodes required
642
              -> Bool              -- ^ Whether to drop or not
643
                                   -- unallocable nodes
644
              -> Result AllocNodes -- ^ The (monadic) result
645
genAllocNodes gl nl count drop_unalloc =
646
    let filter_fn = if drop_unalloc
647
                    then filter (Group.isAllocable .
648
                                 flip Container.find gl . Node.group)
649
                    else id
650
        all_nodes = filter_fn $ getOnline nl
651
        all_pairs = liftM2 (,) all_nodes all_nodes
652
        ok_pairs = filter (\(x, y) -> Node.idx x /= Node.idx y &&
653
                                      Node.group x == Node.group y) all_pairs
654
    in case count of
655
         1 -> Ok (Left (map Node.idx all_nodes))
656
         2 -> Ok (Right (map (\(p, s) -> (Node.idx p, Node.idx s)) ok_pairs))
657
         _ -> Bad "Unsupported number of nodes, only one or two  supported"
658

    
659
-- | Try to allocate an instance on the cluster.
660
tryAlloc :: (Monad m) =>
661
            Node.List         -- ^ The node list
662
         -> Instance.List     -- ^ The instance list
663
         -> Instance.Instance -- ^ The instance to allocate
664
         -> AllocNodes        -- ^ The allocation targets
665
         -> m AllocSolution   -- ^ Possible solution list
666
tryAlloc nl _ inst (Right ok_pairs) =
667
    let sols = foldl' (\cstate (p, s) ->
668
                           concatAllocs cstate $ allocateOnPair nl inst p s
669
                      ) emptyAllocSolution ok_pairs
670

    
671
    in if null ok_pairs -- means we have just one node
672
       then fail "Not enough online nodes"
673
       else return $ annotateSolution sols
674

    
675
tryAlloc nl _ inst (Left all_nodes) =
676
    let sols = foldl' (\cstate ->
677
                           concatAllocs cstate . allocateOnSingle nl inst
678
                      ) emptyAllocSolution all_nodes
679
    in if null all_nodes
680
       then fail "No online nodes"
681
       else return $ annotateSolution sols
682

    
683
-- | Given a group/result, describe it as a nice (list of) messages.
684
solutionDescription :: Group.List -> (Gdx, Result AllocSolution) -> [String]
685
solutionDescription gl (groupId, result) =
686
  case result of
687
    Ok solution -> map (printf "Group %s (%s): %s" gname pol) (asLog solution)
688
    Bad message -> [printf "Group %s: error %s" gname message]
689
  where grp = Container.find groupId gl
690
        gname = Group.name grp
691
        pol = apolToString (Group.allocPolicy grp)
692

    
693
-- | From a list of possibly bad and possibly empty solutions, filter
694
-- only the groups with a valid result. Note that the result will be
695
-- reversed compared to the original list.
696
filterMGResults :: Group.List
697
                -> [(Gdx, Result AllocSolution)]
698
                -> [(Gdx, AllocSolution)]
699
filterMGResults gl = foldl' fn []
700
    where unallocable = not . Group.isAllocable . flip Container.find gl
701
          fn accu (gdx, rasol) =
702
              case rasol of
703
                Bad _ -> accu
704
                Ok sol | null (asSolutions sol) -> accu
705
                       | unallocable gdx -> accu
706
                       | otherwise -> (gdx, sol):accu
707

    
708
-- | Sort multigroup results based on policy and score.
709
sortMGResults :: Group.List
710
             -> [(Gdx, AllocSolution)]
711
             -> [(Gdx, AllocSolution)]
712
sortMGResults gl sols =
713
    let extractScore (_, _, _, x) = x
714
        solScore (gdx, sol) = (Group.allocPolicy (Container.find gdx gl),
715
                               (extractScore . head . asSolutions) sol)
716
    in sortBy (comparing solScore) sols
717

    
718
-- | Try to allocate an instance on a multi-group cluster.
719
tryMGAlloc :: Group.List           -- ^ The group list
720
           -> Node.List            -- ^ The node list
721
           -> Instance.List        -- ^ The instance list
722
           -> Instance.Instance    -- ^ The instance to allocate
723
           -> Int                  -- ^ Required number of nodes
724
           -> Result AllocSolution -- ^ Possible solution list
725
tryMGAlloc mggl mgnl mgil inst cnt =
726
  let groups = splitCluster mgnl mgil
727
      sols = map (\(gid, (nl, il)) ->
728
                   (gid, genAllocNodes mggl nl cnt False >>=
729
                       tryAlloc nl il inst))
730
             groups::[(Gdx, Result AllocSolution)]
731
      all_msgs = concatMap (solutionDescription mggl) sols
732
      goodSols = filterMGResults mggl sols
733
      sortedSols = sortMGResults mggl goodSols
734
  in if null sortedSols
735
     then Bad $ intercalate ", " all_msgs
736
     else let (final_group, final_sol) = head sortedSols
737
              final_name = Group.name $ Container.find final_group mggl
738
              selmsg = "Selected group: " ++  final_name
739
          in Ok $ final_sol { asLog = selmsg:all_msgs }
740

    
741
-- | Try to relocate an instance on the cluster.
742
tryReloc :: (Monad m) =>
743
            Node.List       -- ^ The node list
744
         -> Instance.List   -- ^ The instance list
745
         -> Idx             -- ^ The index of the instance to move
746
         -> Int             -- ^ The number of nodes required
747
         -> [Ndx]           -- ^ Nodes which should not be used
748
         -> m AllocSolution -- ^ Solution list
749
tryReloc nl il xid 1 ex_idx =
750
    let all_nodes = getOnline nl
751
        inst = Container.find xid il
752
        ex_idx' = Instance.pNode inst:ex_idx
753
        valid_nodes = filter (not . flip elem ex_idx' . Node.idx) all_nodes
754
        valid_idxes = map Node.idx valid_nodes
755
        sols1 = foldl' (\cstate x ->
756
                            let em = do
757
                                  (mnl, i, _, _) <-
758
                                      applyMove nl inst (ReplaceSecondary x)
759
                                  return (mnl, i, [Container.find x mnl],
760
                                          compCV mnl)
761
                            in concatAllocs cstate em
762
                       ) emptyAllocSolution valid_idxes
763
    in return sols1
764

    
765
tryReloc _ _ _ reqn _  = fail $ "Unsupported number of relocation \
766
                                \destinations required (" ++ show reqn ++
767
                                                  "), only one supported"
768

    
769
tryMGReloc :: (Monad m) =>
770
              Group.List      -- ^ The group list
771
           -> Node.List       -- ^ The node list
772
           -> Instance.List   -- ^ The instance list
773
           -> Idx             -- ^ The index of the instance to move
774
           -> Int             -- ^ The number of nodes required
775
           -> [Ndx]           -- ^ Nodes which should not be used
776
           -> m AllocSolution -- ^ Solution list
777
tryMGReloc _ mgnl mgil xid ncount ex_ndx = do
778
  let groups = splitCluster mgnl mgil
779
      -- TODO: we only relocate inside the group for now
780
      inst = Container.find xid mgil
781
  (nl, il) <- case lookup (instancePriGroup mgnl inst) groups of
782
                Nothing -> fail $ "Cannot find group for instance " ++
783
                           Instance.name inst
784
                Just v -> return v
785
  tryReloc nl il xid ncount ex_ndx
786

    
787
-- | Change an instance's secondary node.
788
evacInstance :: (Monad m) =>
789
                [Ndx]                      -- ^ Excluded nodes
790
             -> Instance.List              -- ^ The current instance list
791
             -> (Node.List, AllocSolution) -- ^ The current state
792
             -> Idx                        -- ^ The instance to evacuate
793
             -> m (Node.List, AllocSolution)
794
evacInstance ex_ndx il (nl, old_as) idx = do
795
  -- FIXME: hardcoded one node here
796

    
797
  -- Longer explanation: evacuation is currently hardcoded to DRBD
798
  -- instances (which have one secondary); hence, even if the
799
  -- IAllocator protocol can request N nodes for an instance, and all
800
  -- the message parsing/loading pass this, this implementation only
801
  -- supports one; this situation needs to be revisited if we ever
802
  -- support more than one secondary, or if we change the storage
803
  -- model
804
  new_as <- tryReloc nl il idx 1 ex_ndx
805
  case asSolutions new_as of
806
    -- an individual relocation succeeded, we kind of compose the data
807
    -- from the two solutions
808
    csol@(nl', _, _, _):_ ->
809
        return (nl', new_as { asSolutions = csol:asSolutions old_as })
810
    -- this relocation failed, so we fail the entire evac
811
    _ -> fail $ "Can't evacuate instance " ++
812
         Instance.name (Container.find idx il) ++
813
             ": " ++ describeSolution new_as
814

    
815
-- | Try to evacuate a list of nodes.
816
tryEvac :: (Monad m) =>
817
            Node.List       -- ^ The node list
818
         -> Instance.List   -- ^ The instance list
819
         -> [Idx]           -- ^ Instances to be evacuated
820
         -> [Ndx]           -- ^ Restricted nodes (the ones being evacuated)
821
         -> m AllocSolution -- ^ Solution list
822
tryEvac nl il idxs ex_ndx = do
823
  (_, sol) <- foldM (evacInstance ex_ndx il) (nl, emptyAllocSolution) idxs
824
  return sol
825

    
826
-- | Multi-group evacuation of a list of nodes.
827
tryMGEvac :: (Monad m) =>
828
             Group.List -- ^ The group list
829
          -> Node.List       -- ^ The node list
830
          -> Instance.List   -- ^ The instance list
831
          -> [Ndx]           -- ^ Nodes to be evacuated
832
          -> m AllocSolution -- ^ Solution list
833
tryMGEvac _ nl il ex_ndx =
834
    let ex_nodes = map (`Container.find` nl) ex_ndx
835
        all_insts = nub . concatMap Node.sList $ ex_nodes
836
        all_insts' = associateIdxs all_insts $ splitCluster nl il
837
    in do
838
      results <- mapM (\(_, (gnl, gil, idxs)) -> tryEvac gnl gil idxs ex_ndx)
839
                 all_insts'
840
      let sol = foldl' sumAllocs emptyAllocSolution results
841
      return $ annotateSolution sol
842

    
843
-- | Recursively place instances on the cluster until we're out of space.
844
iterateAlloc :: Node.List
845
             -> Instance.List
846
             -> Instance.Instance
847
             -> AllocNodes
848
             -> [Instance.Instance]
849
             -> [CStats]
850
             -> Result AllocResult
851
iterateAlloc nl il newinst allocnodes ixes cstats =
852
      let depth = length ixes
853
          newname = printf "new-%d" depth::String
854
          newidx = length (Container.elems il) + depth
855
          newi2 = Instance.setIdx (Instance.setName newinst newname) newidx
856
      in case tryAlloc nl il newi2 allocnodes of
857
           Bad s -> Bad s
858
           Ok (AllocSolution { asFailures = errs, asSolutions = sols3 }) ->
859
               case sols3 of
860
                 [] -> Ok (collapseFailures errs, nl, il, ixes, cstats)
861
                 (xnl, xi, _, _):[] ->
862
                     iterateAlloc xnl (Container.add newidx xi il)
863
                                  newinst allocnodes (xi:ixes)
864
                                  (totalResources xnl:cstats)
865
                 _ -> Bad "Internal error: multiple solutions for single\
866
                          \ allocation"
867

    
868
-- | The core of the tiered allocation mode.
869
tieredAlloc :: Node.List
870
            -> Instance.List
871
            -> Instance.Instance
872
            -> AllocNodes
873
            -> [Instance.Instance]
874
            -> [CStats]
875
            -> Result AllocResult
876
tieredAlloc nl il newinst allocnodes ixes cstats =
877
    case iterateAlloc nl il newinst allocnodes ixes cstats of
878
      Bad s -> Bad s
879
      Ok (errs, nl', il', ixes', cstats') ->
880
          case Instance.shrinkByType newinst . fst . last $
881
               sortBy (comparing snd) errs of
882
            Bad _ -> Ok (errs, nl', il', ixes', cstats')
883
            Ok newinst' ->
884
                tieredAlloc nl' il' newinst' allocnodes ixes' cstats'
885

    
886
-- | Compute the tiered spec string description from a list of
887
-- allocated instances.
888
tieredSpecMap :: [Instance.Instance]
889
              -> [String]
890
tieredSpecMap trl_ixes =
891
    let fin_trl_ixes = reverse trl_ixes
892
        ix_byspec = groupBy ((==) `on` Instance.specOf) fin_trl_ixes
893
        spec_map = map (\ixs -> (Instance.specOf $ head ixs, length ixs))
894
                   ix_byspec
895
    in  map (\(spec, cnt) -> printf "%d,%d,%d=%d" (rspecMem spec)
896
                             (rspecDsk spec) (rspecCpu spec) cnt) spec_map
897

    
898
-- * Formatting functions
899

    
900
-- | Given the original and final nodes, computes the relocation description.
901
computeMoves :: Instance.Instance -- ^ The instance to be moved
902
             -> String -- ^ The instance name
903
             -> IMove  -- ^ The move being performed
904
             -> String -- ^ New primary
905
             -> String -- ^ New secondary
906
             -> (String, [String])
907
                -- ^ Tuple of moves and commands list; moves is containing
908
                -- either @/f/@ for failover or @/r:name/@ for replace
909
                -- secondary, while the command list holds gnt-instance
910
                -- commands (without that prefix), e.g \"@failover instance1@\"
911
computeMoves i inam mv c d =
912
    case mv of
913
      Failover -> ("f", [mig])
914
      FailoverAndReplace _ -> (printf "f r:%s" d, [mig, rep d])
915
      ReplaceSecondary _ -> (printf "r:%s" d, [rep d])
916
      ReplaceAndFailover _ -> (printf "r:%s f" c, [rep c, mig])
917
      ReplacePrimary _ -> (printf "f r:%s f" c, [mig, rep c, mig])
918
    where morf = if Instance.running i then "migrate" else "failover"
919
          mig = printf "%s -f %s" morf inam::String
920
          rep n = printf "replace-disks -n %s %s" n inam
921

    
922
-- | Converts a placement to string format.
923
printSolutionLine :: Node.List     -- ^ The node list
924
                  -> Instance.List -- ^ The instance list
925
                  -> Int           -- ^ Maximum node name length
926
                  -> Int           -- ^ Maximum instance name length
927
                  -> Placement     -- ^ The current placement
928
                  -> Int           -- ^ The index of the placement in
929
                                   -- the solution
930
                  -> (String, [String])
931
printSolutionLine nl il nmlen imlen plc pos =
932
    let
933
        pmlen = (2*nmlen + 1)
934
        (i, p, s, mv, c) = plc
935
        inst = Container.find i il
936
        inam = Instance.alias inst
937
        npri = Node.alias $ Container.find p nl
938
        nsec = Node.alias $ Container.find s nl
939
        opri = Node.alias $ Container.find (Instance.pNode inst) nl
940
        osec = Node.alias $ Container.find (Instance.sNode inst) nl
941
        (moves, cmds) =  computeMoves inst inam mv npri nsec
942
        ostr = printf "%s:%s" opri osec::String
943
        nstr = printf "%s:%s" npri nsec::String
944
    in
945
      (printf "  %3d. %-*s %-*s => %-*s %.8f a=%s"
946
       pos imlen inam pmlen ostr
947
       pmlen nstr c moves,
948
       cmds)
949

    
950
-- | Return the instance and involved nodes in an instance move.
951
involvedNodes :: Instance.List -> Placement -> [Ndx]
952
involvedNodes il plc =
953
    let (i, np, ns, _, _) = plc
954
        inst = Container.find i il
955
        op = Instance.pNode inst
956
        os = Instance.sNode inst
957
    in nub [np, ns, op, os]
958

    
959
-- | Inner function for splitJobs, that either appends the next job to
960
-- the current jobset, or starts a new jobset.
961
mergeJobs :: ([JobSet], [Ndx]) -> MoveJob -> ([JobSet], [Ndx])
962
mergeJobs ([], _) n@(ndx, _, _, _) = ([[n]], ndx)
963
mergeJobs (cjs@(j:js), nbuf) n@(ndx, _, _, _)
964
    | null (ndx `intersect` nbuf) = ((n:j):js, ndx ++ nbuf)
965
    | otherwise = ([n]:cjs, ndx)
966

    
967
-- | Break a list of moves into independent groups. Note that this
968
-- will reverse the order of jobs.
969
splitJobs :: [MoveJob] -> [JobSet]
970
splitJobs = fst . foldl mergeJobs ([], [])
971

    
972
-- | Given a list of commands, prefix them with @gnt-instance@ and
973
-- also beautify the display a little.
974
formatJob :: Int -> Int -> (Int, MoveJob) -> [String]
975
formatJob jsn jsl (sn, (_, _, _, cmds)) =
976
    let out =
977
            printf "  echo job %d/%d" jsn sn:
978
            printf "  check":
979
            map ("  gnt-instance " ++) cmds
980
    in if sn == 1
981
       then ["", printf "echo jobset %d, %d jobs" jsn jsl] ++ out
982
       else out
983

    
984
-- | Given a list of commands, prefix them with @gnt-instance@ and
985
-- also beautify the display a little.
986
formatCmds :: [JobSet] -> String
987
formatCmds =
988
    unlines .
989
    concatMap (\(jsn, js) -> concatMap (formatJob jsn (length js))
990
                             (zip [1..] js)) .
991
    zip [1..]
992

    
993
-- | Print the node list.
994
printNodes :: Node.List -> [String] -> String
995
printNodes nl fs =
996
    let fields = case fs of
997
          [] -> Node.defaultFields
998
          "+":rest -> Node.defaultFields ++ rest
999
          _ -> fs
1000
        snl = sortBy (comparing Node.idx) (Container.elems nl)
1001
        (header, isnum) = unzip $ map Node.showHeader fields
1002
    in unlines . map ((:) ' ' .  intercalate " ") $
1003
       formatTable (header:map (Node.list fields) snl) isnum
1004

    
1005
-- | Print the instance list.
1006
printInsts :: Node.List -> Instance.List -> String
1007
printInsts nl il =
1008
    let sil = sortBy (comparing Instance.idx) (Container.elems il)
1009
        helper inst = [ if Instance.running inst then "R" else " "
1010
                      , Instance.name inst
1011
                      , Container.nameOf nl (Instance.pNode inst)
1012
                      , let sdx = Instance.sNode inst
1013
                        in if sdx == Node.noSecondary
1014
                           then  ""
1015
                           else Container.nameOf nl sdx
1016
                      , if Instance.autoBalance inst then "Y" else "N"
1017
                      , printf "%3d" $ Instance.vcpus inst
1018
                      , printf "%5d" $ Instance.mem inst
1019
                      , printf "%5d" $ Instance.dsk inst `div` 1024
1020
                      , printf "%5.3f" lC
1021
                      , printf "%5.3f" lM
1022
                      , printf "%5.3f" lD
1023
                      , printf "%5.3f" lN
1024
                      ]
1025
            where DynUtil lC lM lD lN = Instance.util inst
1026
        header = [ "F", "Name", "Pri_node", "Sec_node", "Auto_bal"
1027
                 , "vcpu", "mem" , "dsk", "lCpu", "lMem", "lDsk", "lNet" ]
1028
        isnum = False:False:False:False:False:repeat True
1029
    in unlines . map ((:) ' ' . intercalate " ") $
1030
       formatTable (header:map helper sil) isnum
1031

    
1032
-- | Shows statistics for a given node list.
1033
printStats :: Node.List -> String
1034
printStats nl =
1035
    let dcvs = compDetailedCV nl
1036
        (weights, names) = unzip detailedCVInfo
1037
        hd = zip3 (weights ++ repeat 1) (names ++ repeat "unknown") dcvs
1038
        formatted = map (\(w, header, val) ->
1039
                             printf "%s=%.8f(x%.2f)" header val w::String) hd
1040
    in intercalate ", " formatted
1041

    
1042
-- | Convert a placement into a list of OpCodes (basically a job).
1043
iMoveToJob :: Node.List -> Instance.List
1044
          -> Idx -> IMove -> [OpCodes.OpCode]
1045
iMoveToJob nl il idx move =
1046
    let inst = Container.find idx il
1047
        iname = Instance.name inst
1048
        lookNode  = Just . Container.nameOf nl
1049
        opF = OpCodes.OpInstanceMigrate iname True False True
1050
        opR n = OpCodes.OpInstanceReplaceDisks iname (lookNode n)
1051
                OpCodes.ReplaceNewSecondary [] Nothing
1052
    in case move of
1053
         Failover -> [ opF ]
1054
         ReplacePrimary np -> [ opF, opR np, opF ]
1055
         ReplaceSecondary ns -> [ opR ns ]
1056
         ReplaceAndFailover np -> [ opR np, opF ]
1057
         FailoverAndReplace ns -> [ opF, opR ns ]
1058

    
1059
-- * Node group functions
1060

    
1061
-- | Computes the group of an instance.
1062
instanceGroup :: Node.List -> Instance.Instance -> Result Gdx
1063
instanceGroup nl i =
1064
  let sidx = Instance.sNode i
1065
      pnode = Container.find (Instance.pNode i) nl
1066
      snode = if sidx == Node.noSecondary
1067
              then pnode
1068
              else Container.find sidx nl
1069
      pgroup = Node.group pnode
1070
      sgroup = Node.group snode
1071
  in if pgroup /= sgroup
1072
     then fail ("Instance placed accross two node groups, primary " ++
1073
                show pgroup ++ ", secondary " ++ show sgroup)
1074
     else return pgroup
1075

    
1076
-- | Computes the group of an instance per the primary node.
1077
instancePriGroup :: Node.List -> Instance.Instance -> Gdx
1078
instancePriGroup nl i =
1079
  let pnode = Container.find (Instance.pNode i) nl
1080
  in  Node.group pnode
1081

    
1082
-- | Compute the list of badly allocated instances (split across node
1083
-- groups).
1084
findSplitInstances :: Node.List -> Instance.List -> [Instance.Instance]
1085
findSplitInstances nl =
1086
  filter (not . isOk . instanceGroup nl) . Container.elems
1087

    
1088
-- | Splits a cluster into the component node groups.
1089
splitCluster :: Node.List -> Instance.List ->
1090
                [(Gdx, (Node.List, Instance.List))]
1091
splitCluster nl il =
1092
  let ngroups = Node.computeGroups (Container.elems nl)
1093
  in map (\(guuid, nodes) ->
1094
           let nidxs = map Node.idx nodes
1095
               nodes' = zip nidxs nodes
1096
               instances = Container.filter ((`elem` nidxs) . Instance.pNode) il
1097
           in (guuid, (Container.fromList nodes', instances))) ngroups
1098

    
1099
-- | Split a global instance index map into per-group, and associate
1100
-- it with the group/node/instance lists.
1101
associateIdxs :: [Idx] -- ^ Instance indices to be split/associated
1102
              -> [(Gdx, (Node.List, Instance.List))]        -- ^ Input groups
1103
              -> [(Gdx, (Node.List, Instance.List, [Idx]))] -- ^ Result
1104
associateIdxs idxs =
1105
    map (\(gdx, (nl, il)) ->
1106
             (gdx, (nl, il, filter (`Container.member` il) idxs)))
1107

    
1108
-- | Compute the list of nodes that are to be evacuated, given a list
1109
-- of instances and an evacuation mode.
1110
nodesToEvacuate :: Instance.List -- ^ The cluster-wide instance list
1111
                -> EvacMode      -- ^ The evacuation mode we're using
1112
                -> [Idx]         -- ^ List of instance indices being evacuated
1113
                -> IntSet.IntSet -- ^ Set of node indices
1114
nodesToEvacuate il mode =
1115
    IntSet.delete Node.noSecondary .
1116
    foldl' (\ns idx ->
1117
                let i = Container.find idx il
1118
                    pdx = Instance.pNode i
1119
                    sdx = Instance.sNode i
1120
                    dt = Instance.diskTemplate i
1121
                    withSecondary = case dt of
1122
                                      DTDrbd8 -> IntSet.insert sdx ns
1123
                                      _ -> ns
1124
                in case mode of
1125
                     ChangePrimary   -> IntSet.insert pdx ns
1126
                     ChangeSecondary -> withSecondary
1127
                     ChangeAll       -> IntSet.insert pdx withSecondary
1128
           ) IntSet.empty