Statistics
| Branch: | Tag: | Revision:

root / htools / Ganeti / HTools / Cluster.hs @ 1f4ae205

History | View | Annotate | Download (46.1 kB)

1
{-| Implementation of cluster-wide logic.
2

    
3
This module holds all pure cluster-logic; I\/O related functionality
4
goes into the /Main/ module for the individual binaries.
5

    
6
-}
7

    
8
{-
9

    
10
Copyright (C) 2009, 2010, 2011 Google Inc.
11

    
12
This program is free software; you can redistribute it and/or modify
13
it under the terms of the GNU General Public License as published by
14
the Free Software Foundation; either version 2 of the License, or
15
(at your option) any later version.
16

    
17
This program is distributed in the hope that it will be useful, but
18
WITHOUT ANY WARRANTY; without even the implied warranty of
19
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
20
General Public License for more details.
21

    
22
You should have received a copy of the GNU General Public License
23
along with this program; if not, write to the Free Software
24
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
25
02110-1301, USA.
26

    
27
-}
28

    
29
module Ganeti.HTools.Cluster
30
    (
31
     -- * Types
32
      AllocSolution(..)
33
    , Table(..)
34
    , CStats(..)
35
    , AllocStats
36
    -- * Generic functions
37
    , totalResources
38
    , computeAllocationDelta
39
    -- * First phase functions
40
    , computeBadItems
41
    -- * Second phase functions
42
    , printSolutionLine
43
    , formatCmds
44
    , involvedNodes
45
    , splitJobs
46
    -- * Display functions
47
    , printNodes
48
    , printInsts
49
    -- * Balacing functions
50
    , checkMove
51
    , doNextBalance
52
    , tryBalance
53
    , compCV
54
    , compDetailedCV
55
    , printStats
56
    , iMoveToJob
57
    -- * IAllocator functions
58
    , genAllocNodes
59
    , tryAlloc
60
    , tryMGAlloc
61
    , tryReloc
62
    , tryMGReloc
63
    , tryEvac
64
    , tryMGEvac
65
    , collapseFailures
66
    -- * Allocation functions
67
    , iterateAlloc
68
    , tieredAlloc
69
    , tieredSpecMap
70
     -- * Node group functions
71
    , instanceGroup
72
    , findSplitInstances
73
    , splitCluster
74
    ) where
75

    
76
import Data.Function (on)
77
import Data.List
78
import Data.Ord (comparing)
79
import Text.Printf (printf)
80
import Control.Monad
81
import Control.Parallel.Strategies
82

    
83
import qualified Ganeti.HTools.Container as Container
84
import qualified Ganeti.HTools.Instance as Instance
85
import qualified Ganeti.HTools.Node as Node
86
import qualified Ganeti.HTools.Group as Group
87
import Ganeti.HTools.Types
88
import Ganeti.HTools.Utils
89
import qualified Ganeti.OpCodes as OpCodes
90

    
91
-- * Types
92

    
93
-- | Allocation\/relocation solution.
94
data AllocSolution = AllocSolution
95
  { asFailures  :: [FailMode]          -- ^ Failure counts
96
  , asAllocs    :: Int                 -- ^ Good allocation count
97
  , asSolutions :: [Node.AllocElement] -- ^ The actual result, length
98
                                       -- of the list depends on the
99
                                       -- allocation/relocation mode
100
  , asLog       :: [String]            -- ^ A list of informational messages
101
  }
102

    
103
-- | Allocation results, as used in 'iterateAlloc' and 'tieredAlloc'.
104
type AllocResult = (FailStats, Node.List, Instance.List,
105
                    [Instance.Instance], [CStats])
106

    
107

    
108
-- | A type denoting the valid allocation mode/pairs.
109
--
110
-- For a one-node allocation, this will be a @Left ['Node.Node']@,
111
-- whereas for a two-node allocation, this will be a @Right
112
-- [('Node.Node', 'Node.Node')]@.
113
type AllocNodes = Either [Ndx] [(Ndx, Ndx)]
114

    
115
-- | The empty solution we start with when computing allocations.
116
emptySolution :: AllocSolution
117
emptySolution = AllocSolution { asFailures = [], asAllocs = 0
118
                              , asSolutions = [], asLog = [] }
119

    
120
-- | The complete state for the balancing solution.
121
data Table = Table Node.List Instance.List Score [Placement]
122
             deriving (Show, Read)
123

    
124
data CStats = CStats { csFmem :: Integer -- ^ Cluster free mem
125
                     , csFdsk :: Integer -- ^ Cluster free disk
126
                     , csAmem :: Integer -- ^ Cluster allocatable mem
127
                     , csAdsk :: Integer -- ^ Cluster allocatable disk
128
                     , csAcpu :: Integer -- ^ Cluster allocatable cpus
129
                     , csMmem :: Integer -- ^ Max node allocatable mem
130
                     , csMdsk :: Integer -- ^ Max node allocatable disk
131
                     , csMcpu :: Integer -- ^ Max node allocatable cpu
132
                     , csImem :: Integer -- ^ Instance used mem
133
                     , csIdsk :: Integer -- ^ Instance used disk
134
                     , csIcpu :: Integer -- ^ Instance used cpu
135
                     , csTmem :: Double  -- ^ Cluster total mem
136
                     , csTdsk :: Double  -- ^ Cluster total disk
137
                     , csTcpu :: Double  -- ^ Cluster total cpus
138
                     , csVcpu :: Integer -- ^ Cluster virtual cpus (if
139
                                         -- node pCpu has been set,
140
                                         -- otherwise -1)
141
                     , csXmem :: Integer -- ^ Unnacounted for mem
142
                     , csNmem :: Integer -- ^ Node own memory
143
                     , csScore :: Score  -- ^ The cluster score
144
                     , csNinst :: Int    -- ^ The total number of instances
145
                     }
146
            deriving (Show, Read)
147

    
148
-- | Currently used, possibly to allocate, unallocable.
149
type AllocStats = (RSpec, RSpec, RSpec)
150

    
151
-- * Utility functions
152

    
153
-- | Verifies the N+1 status and return the affected nodes.
154
verifyN1 :: [Node.Node] -> [Node.Node]
155
verifyN1 = filter Node.failN1
156

    
157
{-| Computes the pair of bad nodes and instances.
158

    
159
The bad node list is computed via a simple 'verifyN1' check, and the
160
bad instance list is the list of primary and secondary instances of
161
those nodes.
162

    
163
-}
164
computeBadItems :: Node.List -> Instance.List ->
165
                   ([Node.Node], [Instance.Instance])
166
computeBadItems nl il =
167
  let bad_nodes = verifyN1 $ getOnline nl
168
      bad_instances = map (`Container.find` il) .
169
                      sort . nub $
170
                      concatMap (\ n -> Node.sList n ++ Node.pList n) bad_nodes
171
  in
172
    (bad_nodes, bad_instances)
173

    
174
-- | Zero-initializer for the CStats type.
175
emptyCStats :: CStats
176
emptyCStats = CStats 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
177

    
178
-- | Update stats with data from a new node.
179
updateCStats :: CStats -> Node.Node -> CStats
180
updateCStats cs node =
181
    let CStats { csFmem = x_fmem, csFdsk = x_fdsk,
182
                 csAmem = x_amem, csAcpu = x_acpu, csAdsk = x_adsk,
183
                 csMmem = x_mmem, csMdsk = x_mdsk, csMcpu = x_mcpu,
184
                 csImem = x_imem, csIdsk = x_idsk, csIcpu = x_icpu,
185
                 csTmem = x_tmem, csTdsk = x_tdsk, csTcpu = x_tcpu,
186
                 csVcpu = x_vcpu,
187
                 csXmem = x_xmem, csNmem = x_nmem, csNinst = x_ninst
188
               }
189
            = cs
190
        inc_amem = Node.fMem node - Node.rMem node
191
        inc_amem' = if inc_amem > 0 then inc_amem else 0
192
        inc_adsk = Node.availDisk node
193
        inc_imem = truncate (Node.tMem node) - Node.nMem node
194
                   - Node.xMem node - Node.fMem node
195
        inc_icpu = Node.uCpu node
196
        inc_idsk = truncate (Node.tDsk node) - Node.fDsk node
197
        inc_vcpu = Node.hiCpu node
198
        inc_acpu = Node.availCpu node
199

    
200
    in cs { csFmem = x_fmem + fromIntegral (Node.fMem node)
201
          , csFdsk = x_fdsk + fromIntegral (Node.fDsk node)
202
          , csAmem = x_amem + fromIntegral inc_amem'
203
          , csAdsk = x_adsk + fromIntegral inc_adsk
204
          , csAcpu = x_acpu + fromIntegral inc_acpu
205
          , csMmem = max x_mmem (fromIntegral inc_amem')
206
          , csMdsk = max x_mdsk (fromIntegral inc_adsk)
207
          , csMcpu = max x_mcpu (fromIntegral inc_acpu)
208
          , csImem = x_imem + fromIntegral inc_imem
209
          , csIdsk = x_idsk + fromIntegral inc_idsk
210
          , csIcpu = x_icpu + fromIntegral inc_icpu
211
          , csTmem = x_tmem + Node.tMem node
212
          , csTdsk = x_tdsk + Node.tDsk node
213
          , csTcpu = x_tcpu + Node.tCpu node
214
          , csVcpu = x_vcpu + fromIntegral inc_vcpu
215
          , csXmem = x_xmem + fromIntegral (Node.xMem node)
216
          , csNmem = x_nmem + fromIntegral (Node.nMem node)
217
          , csNinst = x_ninst + length (Node.pList node)
218
          }
219

    
220
-- | Compute the total free disk and memory in the cluster.
221
totalResources :: Node.List -> CStats
222
totalResources nl =
223
    let cs = foldl' updateCStats emptyCStats . Container.elems $ nl
224
    in cs { csScore = compCV nl }
225

    
226
-- | Compute the delta between two cluster state.
227
--
228
-- This is used when doing allocations, to understand better the
229
-- available cluster resources. The return value is a triple of the
230
-- current used values, the delta that was still allocated, and what
231
-- was left unallocated.
232
computeAllocationDelta :: CStats -> CStats -> AllocStats
233
computeAllocationDelta cini cfin =
234
    let CStats {csImem = i_imem, csIdsk = i_idsk, csIcpu = i_icpu} = cini
235
        CStats {csImem = f_imem, csIdsk = f_idsk, csIcpu = f_icpu,
236
                csTmem = t_mem, csTdsk = t_dsk, csVcpu = v_cpu } = cfin
237
        rini = RSpec (fromIntegral i_icpu) (fromIntegral i_imem)
238
               (fromIntegral i_idsk)
239
        rfin = RSpec (fromIntegral (f_icpu - i_icpu))
240
               (fromIntegral (f_imem - i_imem))
241
               (fromIntegral (f_idsk - i_idsk))
242
        un_cpu = fromIntegral (v_cpu - f_icpu)::Int
243
        runa = RSpec un_cpu (truncate t_mem - fromIntegral f_imem)
244
               (truncate t_dsk - fromIntegral f_idsk)
245
    in (rini, rfin, runa)
246

    
247
-- | The names and weights of the individual elements in the CV list.
248
detailedCVInfo :: [(Double, String)]
249
detailedCVInfo = [ (1,  "free_mem_cv")
250
                 , (1,  "free_disk_cv")
251
                 , (1,  "n1_cnt")
252
                 , (1,  "reserved_mem_cv")
253
                 , (4,  "offline_all_cnt")
254
                 , (16, "offline_pri_cnt")
255
                 , (1,  "vcpu_ratio_cv")
256
                 , (1,  "cpu_load_cv")
257
                 , (1,  "mem_load_cv")
258
                 , (1,  "disk_load_cv")
259
                 , (1,  "net_load_cv")
260
                 , (2,  "pri_tags_score")
261
                 ]
262

    
263
detailedCVWeights :: [Double]
264
detailedCVWeights = map fst detailedCVInfo
265

    
266
-- | Compute the mem and disk covariance.
267
compDetailedCV :: Node.List -> [Double]
268
compDetailedCV nl =
269
    let
270
        all_nodes = Container.elems nl
271
        (offline, nodes) = partition Node.offline all_nodes
272
        mem_l = map Node.pMem nodes
273
        dsk_l = map Node.pDsk nodes
274
        -- metric: memory covariance
275
        mem_cv = stdDev mem_l
276
        -- metric: disk covariance
277
        dsk_cv = stdDev dsk_l
278
        -- metric: count of instances living on N1 failing nodes
279
        n1_score = fromIntegral . sum . map (\n -> length (Node.sList n) +
280
                                                   length (Node.pList n)) .
281
                   filter Node.failN1 $ nodes :: Double
282
        res_l = map Node.pRem nodes
283
        -- metric: reserved memory covariance
284
        res_cv = stdDev res_l
285
        -- offline instances metrics
286
        offline_ipri = sum . map (length . Node.pList) $ offline
287
        offline_isec = sum . map (length . Node.sList) $ offline
288
        -- metric: count of instances on offline nodes
289
        off_score = fromIntegral (offline_ipri + offline_isec)::Double
290
        -- metric: count of primary instances on offline nodes (this
291
        -- helps with evacuation/failover of primary instances on
292
        -- 2-node clusters with one node offline)
293
        off_pri_score = fromIntegral offline_ipri::Double
294
        cpu_l = map Node.pCpu nodes
295
        -- metric: covariance of vcpu/pcpu ratio
296
        cpu_cv = stdDev cpu_l
297
        -- metrics: covariance of cpu, memory, disk and network load
298
        (c_load, m_load, d_load, n_load) = unzip4 $
299
            map (\n ->
300
                     let DynUtil c1 m1 d1 n1 = Node.utilLoad n
301
                         DynUtil c2 m2 d2 n2 = Node.utilPool n
302
                     in (c1/c2, m1/m2, d1/d2, n1/n2)
303
                ) nodes
304
        -- metric: conflicting instance count
305
        pri_tags_inst = sum $ map Node.conflictingPrimaries nodes
306
        pri_tags_score = fromIntegral pri_tags_inst::Double
307
    in [ mem_cv, dsk_cv, n1_score, res_cv, off_score, off_pri_score, cpu_cv
308
       , stdDev c_load, stdDev m_load , stdDev d_load, stdDev n_load
309
       , pri_tags_score ]
310

    
311
-- | Compute the /total/ variance.
312
compCV :: Node.List -> Double
313
compCV = sum . zipWith (*) detailedCVWeights . compDetailedCV
314

    
315
-- | Compute online nodes from a 'Node.List'.
316
getOnline :: Node.List -> [Node.Node]
317
getOnline = filter (not . Node.offline) . Container.elems
318

    
319
-- * Balancing functions
320

    
321
-- | Compute best table. Note that the ordering of the arguments is important.
322
compareTables :: Table -> Table -> Table
323
compareTables a@(Table _ _ a_cv _) b@(Table _ _ b_cv _ ) =
324
    if a_cv > b_cv then b else a
325

    
326
-- | Applies an instance move to a given node list and instance.
327
applyMove :: Node.List -> Instance.Instance
328
          -> IMove -> OpResult (Node.List, Instance.Instance, Ndx, Ndx)
329
-- Failover (f)
330
applyMove nl inst Failover =
331
    let old_pdx = Instance.pNode inst
332
        old_sdx = Instance.sNode inst
333
        old_p = Container.find old_pdx nl
334
        old_s = Container.find old_sdx nl
335
        int_p = Node.removePri old_p inst
336
        int_s = Node.removeSec old_s inst
337
        force_p = Node.offline old_p
338
        new_nl = do -- Maybe monad
339
          new_p <- Node.addPriEx force_p int_s inst
340
          new_s <- Node.addSec int_p inst old_sdx
341
          let new_inst = Instance.setBoth inst old_sdx old_pdx
342
          return (Container.addTwo old_pdx new_s old_sdx new_p nl,
343
                  new_inst, old_sdx, old_pdx)
344
    in new_nl
345

    
346
-- Replace the primary (f:, r:np, f)
347
applyMove nl inst (ReplacePrimary new_pdx) =
348
    let old_pdx = Instance.pNode inst
349
        old_sdx = Instance.sNode inst
350
        old_p = Container.find old_pdx nl
351
        old_s = Container.find old_sdx nl
352
        tgt_n = Container.find new_pdx nl
353
        int_p = Node.removePri old_p inst
354
        int_s = Node.removeSec old_s inst
355
        force_p = Node.offline old_p
356
        new_nl = do -- Maybe monad
357
          -- check that the current secondary can host the instance
358
          -- during the migration
359
          tmp_s <- Node.addPriEx force_p int_s inst
360
          let tmp_s' = Node.removePri tmp_s inst
361
          new_p <- Node.addPriEx force_p tgt_n inst
362
          new_s <- Node.addSecEx force_p tmp_s' inst new_pdx
363
          let new_inst = Instance.setPri inst new_pdx
364
          return (Container.add new_pdx new_p $
365
                  Container.addTwo old_pdx int_p old_sdx new_s nl,
366
                  new_inst, new_pdx, old_sdx)
367
    in new_nl
368

    
369
-- Replace the secondary (r:ns)
370
applyMove nl inst (ReplaceSecondary new_sdx) =
371
    let old_pdx = Instance.pNode inst
372
        old_sdx = Instance.sNode inst
373
        old_s = Container.find old_sdx nl
374
        tgt_n = Container.find new_sdx nl
375
        int_s = Node.removeSec old_s inst
376
        force_s = Node.offline old_s
377
        new_inst = Instance.setSec inst new_sdx
378
        new_nl = Node.addSecEx force_s tgt_n inst old_pdx >>=
379
                 \new_s -> return (Container.addTwo new_sdx
380
                                   new_s old_sdx int_s nl,
381
                                   new_inst, old_pdx, new_sdx)
382
    in new_nl
383

    
384
-- Replace the secondary and failover (r:np, f)
385
applyMove nl inst (ReplaceAndFailover new_pdx) =
386
    let old_pdx = Instance.pNode inst
387
        old_sdx = Instance.sNode inst
388
        old_p = Container.find old_pdx nl
389
        old_s = Container.find old_sdx nl
390
        tgt_n = Container.find new_pdx nl
391
        int_p = Node.removePri old_p inst
392
        int_s = Node.removeSec old_s inst
393
        force_s = Node.offline old_s
394
        new_nl = do -- Maybe monad
395
          new_p <- Node.addPri tgt_n inst
396
          new_s <- Node.addSecEx force_s int_p inst new_pdx
397
          let new_inst = Instance.setBoth inst new_pdx old_pdx
398
          return (Container.add new_pdx new_p $
399
                  Container.addTwo old_pdx new_s old_sdx int_s nl,
400
                  new_inst, new_pdx, old_pdx)
401
    in new_nl
402

    
403
-- Failver and replace the secondary (f, r:ns)
404
applyMove nl inst (FailoverAndReplace new_sdx) =
405
    let old_pdx = Instance.pNode inst
406
        old_sdx = Instance.sNode inst
407
        old_p = Container.find old_pdx nl
408
        old_s = Container.find old_sdx nl
409
        tgt_n = Container.find new_sdx nl
410
        int_p = Node.removePri old_p inst
411
        int_s = Node.removeSec old_s inst
412
        force_p = Node.offline old_p
413
        new_nl = do -- Maybe monad
414
          new_p <- Node.addPriEx force_p int_s inst
415
          new_s <- Node.addSecEx force_p tgt_n inst old_sdx
416
          let new_inst = Instance.setBoth inst old_sdx new_sdx
417
          return (Container.add new_sdx new_s $
418
                  Container.addTwo old_sdx new_p old_pdx int_p nl,
419
                  new_inst, old_sdx, new_sdx)
420
    in new_nl
421

    
422
-- | Tries to allocate an instance on one given node.
423
allocateOnSingle :: Node.List -> Instance.Instance -> Ndx
424
                 -> OpResult Node.AllocElement
425
allocateOnSingle nl inst new_pdx =
426
    let p = Container.find new_pdx nl
427
        new_inst = Instance.setBoth inst new_pdx Node.noSecondary
428
    in  Node.addPri p inst >>= \new_p -> do
429
      let new_nl = Container.add new_pdx new_p nl
430
          new_score = compCV nl
431
      return (new_nl, new_inst, [new_p], new_score)
432

    
433
-- | Tries to allocate an instance on a given pair of nodes.
434
allocateOnPair :: Node.List -> Instance.Instance -> Ndx -> Ndx
435
               -> OpResult Node.AllocElement
436
allocateOnPair nl inst new_pdx new_sdx =
437
    let tgt_p = Container.find new_pdx nl
438
        tgt_s = Container.find new_sdx nl
439
    in do
440
      new_p <- Node.addPri tgt_p inst
441
      new_s <- Node.addSec tgt_s inst new_pdx
442
      let new_inst = Instance.setBoth inst new_pdx new_sdx
443
          new_nl = Container.addTwo new_pdx new_p new_sdx new_s nl
444
      return (new_nl, new_inst, [new_p, new_s], compCV new_nl)
445

    
446
-- | Tries to perform an instance move and returns the best table
447
-- between the original one and the new one.
448
checkSingleStep :: Table -- ^ The original table
449
                -> Instance.Instance -- ^ The instance to move
450
                -> Table -- ^ The current best table
451
                -> IMove -- ^ The move to apply
452
                -> Table -- ^ The final best table
453
checkSingleStep ini_tbl target cur_tbl move =
454
    let
455
        Table ini_nl ini_il _ ini_plc = ini_tbl
456
        tmp_resu = applyMove ini_nl target move
457
    in
458
      case tmp_resu of
459
        OpFail _ -> cur_tbl
460
        OpGood (upd_nl, new_inst, pri_idx, sec_idx) ->
461
            let tgt_idx = Instance.idx target
462
                upd_cvar = compCV upd_nl
463
                upd_il = Container.add tgt_idx new_inst ini_il
464
                upd_plc = (tgt_idx, pri_idx, sec_idx, move, upd_cvar):ini_plc
465
                upd_tbl = Table upd_nl upd_il upd_cvar upd_plc
466
            in
467
              compareTables cur_tbl upd_tbl
468

    
469
-- | Given the status of the current secondary as a valid new node and
470
-- the current candidate target node, generate the possible moves for
471
-- a instance.
472
possibleMoves :: Bool      -- ^ Whether the secondary node is a valid new node
473
              -> Bool      -- ^ Whether we can change the primary node
474
              -> Ndx       -- ^ Target node candidate
475
              -> [IMove]   -- ^ List of valid result moves
476

    
477
possibleMoves _ False tdx =
478
    [ReplaceSecondary tdx]
479

    
480
possibleMoves True True tdx =
481
    [ReplaceSecondary tdx,
482
     ReplaceAndFailover tdx,
483
     ReplacePrimary tdx,
484
     FailoverAndReplace tdx]
485

    
486
possibleMoves False True tdx =
487
    [ReplaceSecondary tdx,
488
     ReplaceAndFailover tdx]
489

    
490
-- | Compute the best move for a given instance.
491
checkInstanceMove :: [Ndx]             -- ^ Allowed target node indices
492
                  -> Bool              -- ^ Whether disk moves are allowed
493
                  -> Bool              -- ^ Whether instance moves are allowed
494
                  -> Table             -- ^ Original table
495
                  -> Instance.Instance -- ^ Instance to move
496
                  -> Table             -- ^ Best new table for this instance
497
checkInstanceMove nodes_idx disk_moves inst_moves ini_tbl target =
498
    let
499
        opdx = Instance.pNode target
500
        osdx = Instance.sNode target
501
        nodes = filter (\idx -> idx /= opdx && idx /= osdx) nodes_idx
502
        use_secondary = elem osdx nodes_idx && inst_moves
503
        aft_failover = if use_secondary -- if allowed to failover
504
                       then checkSingleStep ini_tbl target ini_tbl Failover
505
                       else ini_tbl
506
        all_moves = if disk_moves
507
                    then concatMap
508
                         (possibleMoves use_secondary inst_moves) nodes
509
                    else []
510
    in
511
      -- iterate over the possible nodes for this instance
512
      foldl' (checkSingleStep ini_tbl target) aft_failover all_moves
513

    
514
-- | Compute the best next move.
515
checkMove :: [Ndx]               -- ^ Allowed target node indices
516
          -> Bool                -- ^ Whether disk moves are allowed
517
          -> Bool                -- ^ Whether instance moves are allowed
518
          -> Table               -- ^ The current solution
519
          -> [Instance.Instance] -- ^ List of instances still to move
520
          -> Table               -- ^ The new solution
521
checkMove nodes_idx disk_moves inst_moves ini_tbl victims =
522
    let Table _ _ _ ini_plc = ini_tbl
523
        -- we're using rwhnf from the Control.Parallel.Strategies
524
        -- package; we don't need to use rnf as that would force too
525
        -- much evaluation in single-threaded cases, and in
526
        -- multi-threaded case the weak head normal form is enough to
527
        -- spark the evaluation
528
        tables = parMap rwhnf (checkInstanceMove nodes_idx disk_moves
529
                               inst_moves ini_tbl)
530
                 victims
531
        -- iterate over all instances, computing the best move
532
        best_tbl = foldl' compareTables ini_tbl tables
533
        Table _ _ _ best_plc = best_tbl
534
    in if length best_plc == length ini_plc
535
       then ini_tbl -- no advancement
536
       else best_tbl
537

    
538
-- | Check if we are allowed to go deeper in the balancing.
539
doNextBalance :: Table     -- ^ The starting table
540
              -> Int       -- ^ Remaining length
541
              -> Score     -- ^ Score at which to stop
542
              -> Bool      -- ^ The resulting table and commands
543
doNextBalance ini_tbl max_rounds min_score =
544
    let Table _ _ ini_cv ini_plc = ini_tbl
545
        ini_plc_len = length ini_plc
546
    in (max_rounds < 0 || ini_plc_len < max_rounds) && ini_cv > min_score
547

    
548
-- | Run a balance move.
549
tryBalance :: Table       -- ^ The starting table
550
           -> Bool        -- ^ Allow disk moves
551
           -> Bool        -- ^ Allow instance moves
552
           -> Bool        -- ^ Only evacuate moves
553
           -> Score       -- ^ Min gain threshold
554
           -> Score       -- ^ Min gain
555
           -> Maybe Table -- ^ The resulting table and commands
556
tryBalance ini_tbl disk_moves inst_moves evac_mode mg_limit min_gain =
557
    let Table ini_nl ini_il ini_cv _ = ini_tbl
558
        all_inst = Container.elems ini_il
559
        all_inst' = if evac_mode
560
                    then let bad_nodes = map Node.idx . filter Node.offline $
561
                                         Container.elems ini_nl
562
                         in filter (\e -> Instance.sNode e `elem` bad_nodes ||
563
                                          Instance.pNode e `elem` bad_nodes)
564
                            all_inst
565
                    else all_inst
566
        reloc_inst = filter Instance.movable all_inst'
567
        node_idx = map Node.idx . filter (not . Node.offline) $
568
                   Container.elems ini_nl
569
        fin_tbl = checkMove node_idx disk_moves inst_moves ini_tbl reloc_inst
570
        (Table _ _ fin_cv _) = fin_tbl
571
    in
572
      if fin_cv < ini_cv && (ini_cv > mg_limit || ini_cv - fin_cv >= min_gain)
573
      then Just fin_tbl -- this round made success, return the new table
574
      else Nothing
575

    
576
-- * Allocation functions
577

    
578
-- | Build failure stats out of a list of failures.
579
collapseFailures :: [FailMode] -> FailStats
580
collapseFailures flst =
581
    map (\k -> (k, length $ filter (k ==) flst)) [minBound..maxBound]
582

    
583
-- | Update current Allocation solution and failure stats with new
584
-- elements.
585
concatAllocs :: AllocSolution -> OpResult Node.AllocElement -> AllocSolution
586
concatAllocs as (OpFail reason) = as { asFailures = reason : asFailures as }
587

    
588
concatAllocs as (OpGood ns@(_, _, _, nscore)) =
589
    let -- Choose the old or new solution, based on the cluster score
590
        cntok = asAllocs as
591
        osols = asSolutions as
592
        nsols = case osols of
593
                  [] -> [ns]
594
                  (_, _, _, oscore):[] ->
595
                      if oscore < nscore
596
                      then osols
597
                      else [ns]
598
                  -- FIXME: here we simply concat to lists with more
599
                  -- than one element; we should instead abort, since
600
                  -- this is not a valid usage of this function
601
                  xs -> ns:xs
602
        nsuc = cntok + 1
603
    -- Note: we force evaluation of nsols here in order to keep the
604
    -- memory profile low - we know that we will need nsols for sure
605
    -- in the next cycle, so we force evaluation of nsols, since the
606
    -- foldl' in the caller will only evaluate the tuple, but not the
607
    -- elements of the tuple
608
    in nsols `seq` nsuc `seq` as { asAllocs = nsuc, asSolutions = nsols }
609

    
610
-- | Sums two allocation solutions (e.g. for two separate node groups).
611
sumAllocs :: AllocSolution -> AllocSolution -> AllocSolution
612
sumAllocs (AllocSolution af aa as al) (AllocSolution bf ba bs bl) =
613
    AllocSolution (af ++ bf) (aa + ba) (as ++ bs) (al ++ bl)
614

    
615
-- | Given a solution, generates a reasonable description for it.
616
describeSolution :: AllocSolution -> String
617
describeSolution as =
618
  let fcnt = asFailures as
619
      sols = asSolutions as
620
      freasons =
621
        intercalate ", " . map (\(a, b) -> printf "%s: %d" (show a) b) .
622
        filter ((> 0) . snd) . collapseFailures $ fcnt
623
  in if null sols
624
     then "No valid allocation solutions, failure reasons: " ++
625
          (if null fcnt
626
           then "unknown reasons"
627
           else freasons)
628
     else let (_, _, nodes, cv) = head sols
629
          in printf ("score: %.8f, successes %d, failures %d (%s)" ++
630
                     " for node(s) %s") cv (asAllocs as) (length fcnt) freasons
631
             (intercalate "/" . map Node.name $ nodes)
632

    
633
-- | Annotates a solution with the appropriate string.
634
annotateSolution :: AllocSolution -> AllocSolution
635
annotateSolution as = as { asLog = describeSolution as : asLog as }
636

    
637
-- | Generate the valid node allocation singles or pairs for a new instance.
638
genAllocNodes :: Group.List        -- ^ Group list
639
              -> Node.List         -- ^ The node map
640
              -> Int               -- ^ The number of nodes required
641
              -> Bool              -- ^ Whether to drop or not
642
                                   -- unallocable nodes
643
              -> Result AllocNodes -- ^ The (monadic) result
644
genAllocNodes gl nl count drop_unalloc =
645
    let filter_fn = if drop_unalloc
646
                    then filter (Group.isAllocable .
647
                                 flip Container.find gl . Node.group)
648
                    else id
649
        all_nodes = filter_fn $ getOnline nl
650
        all_pairs = liftM2 (,) all_nodes all_nodes
651
        ok_pairs = filter (\(x, y) -> Node.idx x /= Node.idx y &&
652
                                      Node.group x == Node.group y) all_pairs
653
    in case count of
654
         1 -> Ok (Left (map Node.idx all_nodes))
655
         2 -> Ok (Right (map (\(p, s) -> (Node.idx p, Node.idx s)) ok_pairs))
656
         _ -> Bad "Unsupported number of nodes, only one or two  supported"
657

    
658
-- | Try to allocate an instance on the cluster.
659
tryAlloc :: (Monad m) =>
660
            Node.List         -- ^ The node list
661
         -> Instance.List     -- ^ The instance list
662
         -> Instance.Instance -- ^ The instance to allocate
663
         -> AllocNodes        -- ^ The allocation targets
664
         -> m AllocSolution   -- ^ Possible solution list
665
tryAlloc nl _ inst (Right ok_pairs) =
666
    let sols = foldl' (\cstate (p, s) ->
667
                           concatAllocs cstate $ allocateOnPair nl inst p s
668
                      ) emptySolution ok_pairs
669

    
670
    in if null ok_pairs -- means we have just one node
671
       then fail "Not enough online nodes"
672
       else return $ annotateSolution sols
673

    
674
tryAlloc nl _ inst (Left all_nodes) =
675
    let sols = foldl' (\cstate ->
676
                           concatAllocs cstate . allocateOnSingle nl inst
677
                      ) emptySolution all_nodes
678
    in if null all_nodes
679
       then fail "No online nodes"
680
       else return $ annotateSolution sols
681

    
682
-- | Given a group/result, describe it as a nice (list of) messages.
683
solutionDescription :: Group.List -> (Gdx, Result AllocSolution) -> [String]
684
solutionDescription gl (groupId, result) =
685
  case result of
686
    Ok solution -> map (printf "Group %s (%s): %s" gname pol) (asLog solution)
687
    Bad message -> [printf "Group %s: error %s" gname message]
688
  where grp = Container.find groupId gl
689
        gname = Group.name grp
690
        pol = apolToString (Group.allocPolicy grp)
691

    
692
-- | From a list of possibly bad and possibly empty solutions, filter
693
-- only the groups with a valid result. Note that the result will be
694
-- reversed compared to the original list.
695
filterMGResults :: Group.List
696
                -> [(Gdx, Result AllocSolution)]
697
                -> [(Gdx, AllocSolution)]
698
filterMGResults gl = foldl' fn []
699
    where unallocable = not . Group.isAllocable . flip Container.find gl
700
          fn accu (gdx, rasol) =
701
              case rasol of
702
                Bad _ -> accu
703
                Ok sol | null (asSolutions sol) -> accu
704
                       | unallocable gdx -> accu
705
                       | otherwise -> (gdx, sol):accu
706

    
707
-- | Sort multigroup results based on policy and score.
708
sortMGResults :: Group.List
709
             -> [(Gdx, AllocSolution)]
710
             -> [(Gdx, AllocSolution)]
711
sortMGResults gl sols =
712
    let extractScore (_, _, _, x) = x
713
        solScore (gdx, sol) = (Group.allocPolicy (Container.find gdx gl),
714
                               (extractScore . head . asSolutions) sol)
715
    in sortBy (comparing solScore) sols
716

    
717
-- | Try to allocate an instance on a multi-group cluster.
718
tryMGAlloc :: Group.List           -- ^ The group list
719
           -> Node.List            -- ^ The node list
720
           -> Instance.List        -- ^ The instance list
721
           -> Instance.Instance    -- ^ The instance to allocate
722
           -> Int                  -- ^ Required number of nodes
723
           -> Result AllocSolution -- ^ Possible solution list
724
tryMGAlloc mggl mgnl mgil inst cnt =
725
  let groups = splitCluster mgnl mgil
726
      sols = map (\(gid, (nl, il)) ->
727
                   (gid, genAllocNodes mggl nl cnt False >>=
728
                       tryAlloc nl il inst))
729
             groups::[(Gdx, Result AllocSolution)]
730
      all_msgs = concatMap (solutionDescription mggl) sols
731
      goodSols = filterMGResults mggl sols
732
      sortedSols = sortMGResults mggl goodSols
733
  in if null sortedSols
734
     then Bad $ intercalate ", " all_msgs
735
     else let (final_group, final_sol) = head sortedSols
736
              final_name = Group.name $ Container.find final_group mggl
737
              selmsg = "Selected group: " ++  final_name
738
          in Ok $ final_sol { asLog = selmsg:all_msgs }
739

    
740
-- | Try to relocate an instance on the cluster.
741
tryReloc :: (Monad m) =>
742
            Node.List       -- ^ The node list
743
         -> Instance.List   -- ^ The instance list
744
         -> Idx             -- ^ The index of the instance to move
745
         -> Int             -- ^ The number of nodes required
746
         -> [Ndx]           -- ^ Nodes which should not be used
747
         -> m AllocSolution -- ^ Solution list
748
tryReloc nl il xid 1 ex_idx =
749
    let all_nodes = getOnline nl
750
        inst = Container.find xid il
751
        ex_idx' = Instance.pNode inst:ex_idx
752
        valid_nodes = filter (not . flip elem ex_idx' . Node.idx) all_nodes
753
        valid_idxes = map Node.idx valid_nodes
754
        sols1 = foldl' (\cstate x ->
755
                            let em = do
756
                                  (mnl, i, _, _) <-
757
                                      applyMove nl inst (ReplaceSecondary x)
758
                                  return (mnl, i, [Container.find x mnl],
759
                                          compCV mnl)
760
                            in concatAllocs cstate em
761
                       ) emptySolution valid_idxes
762
    in return sols1
763

    
764
tryReloc _ _ _ reqn _  = fail $ "Unsupported number of relocation \
765
                                \destinations required (" ++ show reqn ++
766
                                                  "), only one supported"
767

    
768
tryMGReloc :: (Monad m) =>
769
              Group.List      -- ^ The group list
770
           -> Node.List       -- ^ The node list
771
           -> Instance.List   -- ^ The instance list
772
           -> Idx             -- ^ The index of the instance to move
773
           -> Int             -- ^ The number of nodes required
774
           -> [Ndx]           -- ^ Nodes which should not be used
775
           -> m AllocSolution -- ^ Solution list
776
tryMGReloc _ mgnl mgil xid ncount ex_ndx = do
777
  let groups = splitCluster mgnl mgil
778
      -- TODO: we only relocate inside the group for now
779
      inst = Container.find xid mgil
780
  (nl, il) <- case lookup (instancePriGroup mgnl inst) groups of
781
                Nothing -> fail $ "Cannot find group for instance " ++
782
                           Instance.name inst
783
                Just v -> return v
784
  tryReloc nl il xid ncount ex_ndx
785

    
786
-- | Change an instance's secondary node.
787
evacInstance :: (Monad m) =>
788
                [Ndx]                      -- ^ Excluded nodes
789
             -> Instance.List              -- ^ The current instance list
790
             -> (Node.List, AllocSolution) -- ^ The current state
791
             -> Idx                        -- ^ The instance to evacuate
792
             -> m (Node.List, AllocSolution)
793
evacInstance ex_ndx il (nl, old_as) idx = do
794
  -- FIXME: hardcoded one node here
795

    
796
  -- Longer explanation: evacuation is currently hardcoded to DRBD
797
  -- instances (which have one secondary); hence, even if the
798
  -- IAllocator protocol can request N nodes for an instance, and all
799
  -- the message parsing/loading pass this, this implementation only
800
  -- supports one; this situation needs to be revisited if we ever
801
  -- support more than one secondary, or if we change the storage
802
  -- model
803
  new_as <- tryReloc nl il idx 1 ex_ndx
804
  case asSolutions new_as of
805
    -- an individual relocation succeeded, we kind of compose the data
806
    -- from the two solutions
807
    csol@(nl', _, _, _):_ ->
808
        return (nl', new_as { asSolutions = csol:asSolutions old_as })
809
    -- this relocation failed, so we fail the entire evac
810
    _ -> fail $ "Can't evacuate instance " ++
811
         Instance.name (Container.find idx il) ++
812
             ": " ++ describeSolution new_as
813

    
814
-- | Try to evacuate a list of nodes.
815
tryEvac :: (Monad m) =>
816
            Node.List       -- ^ The node list
817
         -> Instance.List   -- ^ The instance list
818
         -> [Idx]           -- ^ Instances to be evacuated
819
         -> [Ndx]           -- ^ Restricted nodes (the ones being evacuated)
820
         -> m AllocSolution -- ^ Solution list
821
tryEvac nl il idxs ex_ndx = do
822
  (_, sol) <- foldM (evacInstance ex_ndx il) (nl, emptySolution) idxs
823
  return sol
824

    
825
-- | Multi-group evacuation of a list of nodes.
826
tryMGEvac :: (Monad m) =>
827
             Group.List -- ^ The group list
828
          -> Node.List       -- ^ The node list
829
          -> Instance.List   -- ^ The instance list
830
          -> [Ndx]           -- ^ Nodes to be evacuated
831
          -> m AllocSolution -- ^ Solution list
832
tryMGEvac _ nl il ex_ndx =
833
    let ex_nodes = map (`Container.find` nl) ex_ndx
834
        all_insts = nub . concatMap Node.sList $ ex_nodes
835
        all_insts' = associateIdxs all_insts $ splitCluster nl il
836
    in do
837
      results <- mapM (\(_, (gnl, gil, idxs)) -> tryEvac gnl gil idxs ex_ndx)
838
                 all_insts'
839
      let sol = foldl' sumAllocs emptySolution results
840
      return $ annotateSolution sol
841

    
842
-- | Recursively place instances on the cluster until we're out of space.
843
iterateAlloc :: Node.List
844
             -> Instance.List
845
             -> Instance.Instance
846
             -> AllocNodes
847
             -> [Instance.Instance]
848
             -> [CStats]
849
             -> Result AllocResult
850
iterateAlloc nl il newinst allocnodes ixes cstats =
851
      let depth = length ixes
852
          newname = printf "new-%d" depth::String
853
          newidx = length (Container.elems il) + depth
854
          newi2 = Instance.setIdx (Instance.setName newinst newname) newidx
855
      in case tryAlloc nl il newi2 allocnodes of
856
           Bad s -> Bad s
857
           Ok (AllocSolution { asFailures = errs, asSolutions = sols3 }) ->
858
               case sols3 of
859
                 [] -> Ok (collapseFailures errs, nl, il, ixes, cstats)
860
                 (xnl, xi, _, _):[] ->
861
                     iterateAlloc xnl (Container.add newidx xi il)
862
                                  newinst allocnodes (xi:ixes)
863
                                  (totalResources xnl:cstats)
864
                 _ -> Bad "Internal error: multiple solutions for single\
865
                          \ allocation"
866

    
867
-- | The core of the tiered allocation mode.
868
tieredAlloc :: Node.List
869
            -> Instance.List
870
            -> Instance.Instance
871
            -> AllocNodes
872
            -> [Instance.Instance]
873
            -> [CStats]
874
            -> Result AllocResult
875
tieredAlloc nl il newinst allocnodes ixes cstats =
876
    case iterateAlloc nl il newinst allocnodes ixes cstats of
877
      Bad s -> Bad s
878
      Ok (errs, nl', il', ixes', cstats') ->
879
          case Instance.shrinkByType newinst . fst . last $
880
               sortBy (comparing snd) errs of
881
            Bad _ -> Ok (errs, nl', il', ixes', cstats')
882
            Ok newinst' ->
883
                tieredAlloc nl' il' newinst' allocnodes ixes' cstats'
884

    
885
-- | Compute the tiered spec string description from a list of
886
-- allocated instances.
887
tieredSpecMap :: [Instance.Instance]
888
              -> [String]
889
tieredSpecMap trl_ixes =
890
    let fin_trl_ixes = reverse trl_ixes
891
        ix_byspec = groupBy ((==) `on` Instance.specOf) fin_trl_ixes
892
        spec_map = map (\ixs -> (Instance.specOf $ head ixs, length ixs))
893
                   ix_byspec
894
    in  map (\(spec, cnt) -> printf "%d,%d,%d=%d" (rspecMem spec)
895
                             (rspecDsk spec) (rspecCpu spec) cnt) spec_map
896

    
897
-- * Formatting functions
898

    
899
-- | Given the original and final nodes, computes the relocation description.
900
computeMoves :: Instance.Instance -- ^ The instance to be moved
901
             -> String -- ^ The instance name
902
             -> IMove  -- ^ The move being performed
903
             -> String -- ^ New primary
904
             -> String -- ^ New secondary
905
             -> (String, [String])
906
                -- ^ Tuple of moves and commands list; moves is containing
907
                -- either @/f/@ for failover or @/r:name/@ for replace
908
                -- secondary, while the command list holds gnt-instance
909
                -- commands (without that prefix), e.g \"@failover instance1@\"
910
computeMoves i inam mv c d =
911
    case mv of
912
      Failover -> ("f", [mig])
913
      FailoverAndReplace _ -> (printf "f r:%s" d, [mig, rep d])
914
      ReplaceSecondary _ -> (printf "r:%s" d, [rep d])
915
      ReplaceAndFailover _ -> (printf "r:%s f" c, [rep c, mig])
916
      ReplacePrimary _ -> (printf "f r:%s f" c, [mig, rep c, mig])
917
    where morf = if Instance.running i then "migrate" else "failover"
918
          mig = printf "%s -f %s" morf inam::String
919
          rep n = printf "replace-disks -n %s %s" n inam
920

    
921
-- | Converts a placement to string format.
922
printSolutionLine :: Node.List     -- ^ The node list
923
                  -> Instance.List -- ^ The instance list
924
                  -> Int           -- ^ Maximum node name length
925
                  -> Int           -- ^ Maximum instance name length
926
                  -> Placement     -- ^ The current placement
927
                  -> Int           -- ^ The index of the placement in
928
                                   -- the solution
929
                  -> (String, [String])
930
printSolutionLine nl il nmlen imlen plc pos =
931
    let
932
        pmlen = (2*nmlen + 1)
933
        (i, p, s, mv, c) = plc
934
        inst = Container.find i il
935
        inam = Instance.alias inst
936
        npri = Node.alias $ Container.find p nl
937
        nsec = Node.alias $ Container.find s nl
938
        opri = Node.alias $ Container.find (Instance.pNode inst) nl
939
        osec = Node.alias $ Container.find (Instance.sNode inst) nl
940
        (moves, cmds) =  computeMoves inst inam mv npri nsec
941
        ostr = printf "%s:%s" opri osec::String
942
        nstr = printf "%s:%s" npri nsec::String
943
    in
944
      (printf "  %3d. %-*s %-*s => %-*s %.8f a=%s"
945
       pos imlen inam pmlen ostr
946
       pmlen nstr c moves,
947
       cmds)
948

    
949
-- | Return the instance and involved nodes in an instance move.
950
involvedNodes :: Instance.List -> Placement -> [Ndx]
951
involvedNodes il plc =
952
    let (i, np, ns, _, _) = plc
953
        inst = Container.find i il
954
        op = Instance.pNode inst
955
        os = Instance.sNode inst
956
    in nub [np, ns, op, os]
957

    
958
-- | Inner function for splitJobs, that either appends the next job to
959
-- the current jobset, or starts a new jobset.
960
mergeJobs :: ([JobSet], [Ndx]) -> MoveJob -> ([JobSet], [Ndx])
961
mergeJobs ([], _) n@(ndx, _, _, _) = ([[n]], ndx)
962
mergeJobs (cjs@(j:js), nbuf) n@(ndx, _, _, _)
963
    | null (ndx `intersect` nbuf) = ((n:j):js, ndx ++ nbuf)
964
    | otherwise = ([n]:cjs, ndx)
965

    
966
-- | Break a list of moves into independent groups. Note that this
967
-- will reverse the order of jobs.
968
splitJobs :: [MoveJob] -> [JobSet]
969
splitJobs = fst . foldl mergeJobs ([], [])
970

    
971
-- | Given a list of commands, prefix them with @gnt-instance@ and
972
-- also beautify the display a little.
973
formatJob :: Int -> Int -> (Int, MoveJob) -> [String]
974
formatJob jsn jsl (sn, (_, _, _, cmds)) =
975
    let out =
976
            printf "  echo job %d/%d" jsn sn:
977
            printf "  check":
978
            map ("  gnt-instance " ++) cmds
979
    in if sn == 1
980
       then ["", printf "echo jobset %d, %d jobs" jsn jsl] ++ out
981
       else out
982

    
983
-- | Given a list of commands, prefix them with @gnt-instance@ and
984
-- also beautify the display a little.
985
formatCmds :: [JobSet] -> String
986
formatCmds =
987
    unlines .
988
    concatMap (\(jsn, js) -> concatMap (formatJob jsn (length js))
989
                             (zip [1..] js)) .
990
    zip [1..]
991

    
992
-- | Print the node list.
993
printNodes :: Node.List -> [String] -> String
994
printNodes nl fs =
995
    let fields = case fs of
996
          [] -> Node.defaultFields
997
          "+":rest -> Node.defaultFields ++ rest
998
          _ -> fs
999
        snl = sortBy (comparing Node.idx) (Container.elems nl)
1000
        (header, isnum) = unzip $ map Node.showHeader fields
1001
    in unlines . map ((:) ' ' .  intercalate " ") $
1002
       formatTable (header:map (Node.list fields) snl) isnum
1003

    
1004
-- | Print the instance list.
1005
printInsts :: Node.List -> Instance.List -> String
1006
printInsts nl il =
1007
    let sil = sortBy (comparing Instance.idx) (Container.elems il)
1008
        helper inst = [ if Instance.running inst then "R" else " "
1009
                      , Instance.name inst
1010
                      , Container.nameOf nl (Instance.pNode inst)
1011
                      , let sdx = Instance.sNode inst
1012
                        in if sdx == Node.noSecondary
1013
                           then  ""
1014
                           else Container.nameOf nl sdx
1015
                      , if Instance.autoBalance inst then "Y" else "N"
1016
                      , printf "%3d" $ Instance.vcpus inst
1017
                      , printf "%5d" $ Instance.mem inst
1018
                      , printf "%5d" $ Instance.dsk inst `div` 1024
1019
                      , printf "%5.3f" lC
1020
                      , printf "%5.3f" lM
1021
                      , printf "%5.3f" lD
1022
                      , printf "%5.3f" lN
1023
                      ]
1024
            where DynUtil lC lM lD lN = Instance.util inst
1025
        header = [ "F", "Name", "Pri_node", "Sec_node", "Auto_bal"
1026
                 , "vcpu", "mem" , "dsk", "lCpu", "lMem", "lDsk", "lNet" ]
1027
        isnum = False:False:False:False:False:repeat True
1028
    in unlines . map ((:) ' ' . intercalate " ") $
1029
       formatTable (header:map helper sil) isnum
1030

    
1031
-- | Shows statistics for a given node list.
1032
printStats :: Node.List -> String
1033
printStats nl =
1034
    let dcvs = compDetailedCV nl
1035
        (weights, names) = unzip detailedCVInfo
1036
        hd = zip3 (weights ++ repeat 1) (names ++ repeat "unknown") dcvs
1037
        formatted = map (\(w, header, val) ->
1038
                             printf "%s=%.8f(x%.2f)" header val w::String) hd
1039
    in intercalate ", " formatted
1040

    
1041
-- | Convert a placement into a list of OpCodes (basically a job).
1042
iMoveToJob :: Node.List -> Instance.List
1043
          -> Idx -> IMove -> [OpCodes.OpCode]
1044
iMoveToJob nl il idx move =
1045
    let inst = Container.find idx il
1046
        iname = Instance.name inst
1047
        lookNode  = Just . Container.nameOf nl
1048
        opF = OpCodes.OpInstanceMigrate iname True False True
1049
        opR n = OpCodes.OpInstanceReplaceDisks iname (lookNode n)
1050
                OpCodes.ReplaceNewSecondary [] Nothing
1051
    in case move of
1052
         Failover -> [ opF ]
1053
         ReplacePrimary np -> [ opF, opR np, opF ]
1054
         ReplaceSecondary ns -> [ opR ns ]
1055
         ReplaceAndFailover np -> [ opR np, opF ]
1056
         FailoverAndReplace ns -> [ opF, opR ns ]
1057

    
1058
-- * Node group functions
1059

    
1060
-- | Computes the group of an instance.
1061
instanceGroup :: Node.List -> Instance.Instance -> Result Gdx
1062
instanceGroup nl i =
1063
  let sidx = Instance.sNode i
1064
      pnode = Container.find (Instance.pNode i) nl
1065
      snode = if sidx == Node.noSecondary
1066
              then pnode
1067
              else Container.find sidx nl
1068
      pgroup = Node.group pnode
1069
      sgroup = Node.group snode
1070
  in if pgroup /= sgroup
1071
     then fail ("Instance placed accross two node groups, primary " ++
1072
                show pgroup ++ ", secondary " ++ show sgroup)
1073
     else return pgroup
1074

    
1075
-- | Computes the group of an instance per the primary node.
1076
instancePriGroup :: Node.List -> Instance.Instance -> Gdx
1077
instancePriGroup nl i =
1078
  let pnode = Container.find (Instance.pNode i) nl
1079
  in  Node.group pnode
1080

    
1081
-- | Compute the list of badly allocated instances (split across node
1082
-- groups).
1083
findSplitInstances :: Node.List -> Instance.List -> [Instance.Instance]
1084
findSplitInstances nl =
1085
  filter (not . isOk . instanceGroup nl) . Container.elems
1086

    
1087
-- | Splits a cluster into the component node groups.
1088
splitCluster :: Node.List -> Instance.List ->
1089
                [(Gdx, (Node.List, Instance.List))]
1090
splitCluster nl il =
1091
  let ngroups = Node.computeGroups (Container.elems nl)
1092
  in map (\(guuid, nodes) ->
1093
           let nidxs = map Node.idx nodes
1094
               nodes' = zip nidxs nodes
1095
               instances = Container.filter ((`elem` nidxs) . Instance.pNode) il
1096
           in (guuid, (Container.fromList nodes', instances))) ngroups
1097

    
1098
-- | Split a global instance index map into per-group, and associate
1099
-- it with the group/node/instance lists.
1100
associateIdxs :: [Idx] -- ^ Instance indices to be split/associated
1101
              -> [(Gdx, (Node.List, Instance.List))]        -- ^ Input groups
1102
              -> [(Gdx, (Node.List, Instance.List, [Idx]))] -- ^ Result
1103
associateIdxs idxs =
1104
    map (\(gdx, (nl, il)) ->
1105
             (gdx, (nl, il, filter (`Container.member` il) idxs)))