Statistics
| Branch: | Tag: | Revision:

root / htools / Ganeti / HTools / Cluster.hs @ 0e09422b

History | View | Annotate | Download (46.5 kB)

1
{-| Implementation of cluster-wide logic.
2

    
3
This module holds all pure cluster-logic; I\/O related functionality
4
goes into the /Main/ module for the individual binaries.
5

    
6
-}
7

    
8
{-
9

    
10
Copyright (C) 2009, 2010, 2011 Google Inc.
11

    
12
This program is free software; you can redistribute it and/or modify
13
it under the terms of the GNU General Public License as published by
14
the Free Software Foundation; either version 2 of the License, or
15
(at your option) any later version.
16

    
17
This program is distributed in the hope that it will be useful, but
18
WITHOUT ANY WARRANTY; without even the implied warranty of
19
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
20
General Public License for more details.
21

    
22
You should have received a copy of the GNU General Public License
23
along with this program; if not, write to the Free Software
24
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
25
02110-1301, USA.
26

    
27
-}
28

    
29
module Ganeti.HTools.Cluster
30
    (
31
     -- * Types
32
      AllocSolution(..)
33
    , Table(..)
34
    , CStats(..)
35
    , AllocStats
36
    -- * Generic functions
37
    , totalResources
38
    , computeAllocationDelta
39
    -- * First phase functions
40
    , computeBadItems
41
    -- * Second phase functions
42
    , printSolutionLine
43
    , formatCmds
44
    , involvedNodes
45
    , splitJobs
46
    -- * Display functions
47
    , printNodes
48
    , printInsts
49
    -- * Balacing functions
50
    , checkMove
51
    , doNextBalance
52
    , tryBalance
53
    , compCV
54
    , compDetailedCV
55
    , printStats
56
    , iMoveToJob
57
    -- * IAllocator functions
58
    , genAllocNodes
59
    , tryAlloc
60
    , tryMGAlloc
61
    , tryReloc
62
    , tryMGReloc
63
    , tryEvac
64
    , tryMGEvac
65
    , collapseFailures
66
    -- * Allocation functions
67
    , iterateAlloc
68
    , tieredAlloc
69
    , tieredSpecMap
70
     -- * Node group functions
71
    , instanceGroup
72
    , findSplitInstances
73
    , splitCluster
74
    ) where
75

    
76
import Data.Function (on)
77
import Data.List
78
import Data.Ord (comparing)
79
import Text.Printf (printf)
80
import Control.Monad
81
import Control.Parallel.Strategies
82

    
83
import qualified Ganeti.HTools.Container as Container
84
import qualified Ganeti.HTools.Instance as Instance
85
import qualified Ganeti.HTools.Node as Node
86
import qualified Ganeti.HTools.Group as Group
87
import Ganeti.HTools.Types
88
import Ganeti.HTools.Utils
89
import qualified Ganeti.OpCodes as OpCodes
90

    
91
-- * Types
92

    
93
-- | Allocation\/relocation solution.
94
data AllocSolution = AllocSolution
95
  { asFailures  :: [FailMode]          -- ^ Failure counts
96
  , asAllocs    :: Int                 -- ^ Good allocation count
97
  , asSolutions :: [Node.AllocElement] -- ^ The actual result, length
98
                                       -- of the list depends on the
99
                                       -- allocation/relocation mode
100
  , asLog       :: [String]            -- ^ A list of informational messages
101
  }
102

    
103
-- | Allocation results, as used in 'iterateAlloc' and 'tieredAlloc'.
104
type AllocResult = (FailStats, Node.List, Instance.List,
105
                    [Instance.Instance], [CStats])
106

    
107

    
108
-- | A type denoting the valid allocation mode/pairs.
109
--
110
-- For a one-node allocation, this will be a @Left ['Node.Node']@,
111
-- whereas for a two-node allocation, this will be a @Right
112
-- [('Node.Node', 'Node.Node')]@.
113
type AllocNodes = Either [Ndx] [(Ndx, Ndx)]
114

    
115
-- | The empty solution we start with when computing allocations.
116
emptySolution :: AllocSolution
117
emptySolution = AllocSolution { asFailures = [], asAllocs = 0
118
                              , asSolutions = [], asLog = [] }
119

    
120
-- | The complete state for the balancing solution.
121
data Table = Table Node.List Instance.List Score [Placement]
122
             deriving (Show, Read)
123

    
124
data CStats = CStats { csFmem :: Integer -- ^ Cluster free mem
125
                     , csFdsk :: Integer -- ^ Cluster free disk
126
                     , csAmem :: Integer -- ^ Cluster allocatable mem
127
                     , csAdsk :: Integer -- ^ Cluster allocatable disk
128
                     , csAcpu :: Integer -- ^ Cluster allocatable cpus
129
                     , csMmem :: Integer -- ^ Max node allocatable mem
130
                     , csMdsk :: Integer -- ^ Max node allocatable disk
131
                     , csMcpu :: Integer -- ^ Max node allocatable cpu
132
                     , csImem :: Integer -- ^ Instance used mem
133
                     , csIdsk :: Integer -- ^ Instance used disk
134
                     , csIcpu :: Integer -- ^ Instance used cpu
135
                     , csTmem :: Double  -- ^ Cluster total mem
136
                     , csTdsk :: Double  -- ^ Cluster total disk
137
                     , csTcpu :: Double  -- ^ Cluster total cpus
138
                     , csVcpu :: Integer -- ^ Cluster virtual cpus (if
139
                                         -- node pCpu has been set,
140
                                         -- otherwise -1)
141
                     , csXmem :: Integer -- ^ Unnacounted for mem
142
                     , csNmem :: Integer -- ^ Node own memory
143
                     , csScore :: Score  -- ^ The cluster score
144
                     , csNinst :: Int    -- ^ The total number of instances
145
                     }
146
            deriving (Show, Read)
147

    
148
-- | Currently used, possibly to allocate, unallocable.
149
type AllocStats = (RSpec, RSpec, RSpec)
150

    
151
-- * Utility functions
152

    
153
-- | Verifies the N+1 status and return the affected nodes.
154
verifyN1 :: [Node.Node] -> [Node.Node]
155
verifyN1 = filter Node.failN1
156

    
157
{-| Computes the pair of bad nodes and instances.
158

    
159
The bad node list is computed via a simple 'verifyN1' check, and the
160
bad instance list is the list of primary and secondary instances of
161
those nodes.
162

    
163
-}
164
computeBadItems :: Node.List -> Instance.List ->
165
                   ([Node.Node], [Instance.Instance])
166
computeBadItems nl il =
167
  let bad_nodes = verifyN1 $ getOnline nl
168
      bad_instances = map (`Container.find` il) .
169
                      sort . nub $
170
                      concatMap (\ n -> Node.sList n ++ Node.pList n) bad_nodes
171
  in
172
    (bad_nodes, bad_instances)
173

    
174
-- | Zero-initializer for the CStats type.
175
emptyCStats :: CStats
176
emptyCStats = CStats 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
177

    
178
-- | Update stats with data from a new node.
179
updateCStats :: CStats -> Node.Node -> CStats
180
updateCStats cs node =
181
    let CStats { csFmem = x_fmem, csFdsk = x_fdsk,
182
                 csAmem = x_amem, csAcpu = x_acpu, csAdsk = x_adsk,
183
                 csMmem = x_mmem, csMdsk = x_mdsk, csMcpu = x_mcpu,
184
                 csImem = x_imem, csIdsk = x_idsk, csIcpu = x_icpu,
185
                 csTmem = x_tmem, csTdsk = x_tdsk, csTcpu = x_tcpu,
186
                 csVcpu = x_vcpu,
187
                 csXmem = x_xmem, csNmem = x_nmem, csNinst = x_ninst
188
               }
189
            = cs
190
        inc_amem = Node.fMem node - Node.rMem node
191
        inc_amem' = if inc_amem > 0 then inc_amem else 0
192
        inc_adsk = Node.availDisk node
193
        inc_imem = truncate (Node.tMem node) - Node.nMem node
194
                   - Node.xMem node - Node.fMem node
195
        inc_icpu = Node.uCpu node
196
        inc_idsk = truncate (Node.tDsk node) - Node.fDsk node
197
        inc_vcpu = Node.hiCpu node
198
        inc_acpu = Node.availCpu node
199

    
200
    in cs { csFmem = x_fmem + fromIntegral (Node.fMem node)
201
          , csFdsk = x_fdsk + fromIntegral (Node.fDsk node)
202
          , csAmem = x_amem + fromIntegral inc_amem'
203
          , csAdsk = x_adsk + fromIntegral inc_adsk
204
          , csAcpu = x_acpu + fromIntegral inc_acpu
205
          , csMmem = max x_mmem (fromIntegral inc_amem')
206
          , csMdsk = max x_mdsk (fromIntegral inc_adsk)
207
          , csMcpu = max x_mcpu (fromIntegral inc_acpu)
208
          , csImem = x_imem + fromIntegral inc_imem
209
          , csIdsk = x_idsk + fromIntegral inc_idsk
210
          , csIcpu = x_icpu + fromIntegral inc_icpu
211
          , csTmem = x_tmem + Node.tMem node
212
          , csTdsk = x_tdsk + Node.tDsk node
213
          , csTcpu = x_tcpu + Node.tCpu node
214
          , csVcpu = x_vcpu + fromIntegral inc_vcpu
215
          , csXmem = x_xmem + fromIntegral (Node.xMem node)
216
          , csNmem = x_nmem + fromIntegral (Node.nMem node)
217
          , csNinst = x_ninst + length (Node.pList node)
218
          }
219

    
220
-- | Compute the total free disk and memory in the cluster.
221
totalResources :: Node.List -> CStats
222
totalResources nl =
223
    let cs = foldl' updateCStats emptyCStats . Container.elems $ nl
224
    in cs { csScore = compCV nl }
225

    
226
-- | Compute the delta between two cluster state.
227
--
228
-- This is used when doing allocations, to understand better the
229
-- available cluster resources. The return value is a triple of the
230
-- current used values, the delta that was still allocated, and what
231
-- was left unallocated.
232
computeAllocationDelta :: CStats -> CStats -> AllocStats
233
computeAllocationDelta cini cfin =
234
    let CStats {csImem = i_imem, csIdsk = i_idsk, csIcpu = i_icpu} = cini
235
        CStats {csImem = f_imem, csIdsk = f_idsk, csIcpu = f_icpu,
236
                csTmem = t_mem, csTdsk = t_dsk, csVcpu = v_cpu } = cfin
237
        rini = RSpec (fromIntegral i_icpu) (fromIntegral i_imem)
238
               (fromIntegral i_idsk)
239
        rfin = RSpec (fromIntegral (f_icpu - i_icpu))
240
               (fromIntegral (f_imem - i_imem))
241
               (fromIntegral (f_idsk - i_idsk))
242
        un_cpu = fromIntegral (v_cpu - f_icpu)::Int
243
        runa = RSpec un_cpu (truncate t_mem - fromIntegral f_imem)
244
               (truncate t_dsk - fromIntegral f_idsk)
245
    in (rini, rfin, runa)
246

    
247
-- | The names and weights of the individual elements in the CV list.
248
detailedCVInfo :: [(Double, String)]
249
detailedCVInfo = [ (1,  "free_mem_cv")
250
                 , (1,  "free_disk_cv")
251
                 , (1,  "n1_cnt")
252
                 , (1,  "reserved_mem_cv")
253
                 , (4,  "offline_all_cnt")
254
                 , (16, "offline_pri_cnt")
255
                 , (1,  "vcpu_ratio_cv")
256
                 , (1,  "cpu_load_cv")
257
                 , (1,  "mem_load_cv")
258
                 , (1,  "disk_load_cv")
259
                 , (1,  "net_load_cv")
260
                 , (2,  "pri_tags_score")
261
                 ]
262

    
263
detailedCVWeights :: [Double]
264
detailedCVWeights = map fst detailedCVInfo
265

    
266
-- | Compute the mem and disk covariance.
267
compDetailedCV :: Node.List -> [Double]
268
compDetailedCV nl =
269
    let
270
        all_nodes = Container.elems nl
271
        (offline, nodes) = partition Node.offline all_nodes
272
        mem_l = map Node.pMem nodes
273
        dsk_l = map Node.pDsk nodes
274
        -- metric: memory covariance
275
        mem_cv = stdDev mem_l
276
        -- metric: disk covariance
277
        dsk_cv = stdDev dsk_l
278
        -- metric: count of instances living on N1 failing nodes
279
        n1_score = fromIntegral . sum . map (\n -> length (Node.sList n) +
280
                                                   length (Node.pList n)) .
281
                   filter Node.failN1 $ nodes :: Double
282
        res_l = map Node.pRem nodes
283
        -- metric: reserved memory covariance
284
        res_cv = stdDev res_l
285
        -- offline instances metrics
286
        offline_ipri = sum . map (length . Node.pList) $ offline
287
        offline_isec = sum . map (length . Node.sList) $ offline
288
        -- metric: count of instances on offline nodes
289
        off_score = fromIntegral (offline_ipri + offline_isec)::Double
290
        -- metric: count of primary instances on offline nodes (this
291
        -- helps with evacuation/failover of primary instances on
292
        -- 2-node clusters with one node offline)
293
        off_pri_score = fromIntegral offline_ipri::Double
294
        cpu_l = map Node.pCpu nodes
295
        -- metric: covariance of vcpu/pcpu ratio
296
        cpu_cv = stdDev cpu_l
297
        -- metrics: covariance of cpu, memory, disk and network load
298
        (c_load, m_load, d_load, n_load) = unzip4 $
299
            map (\n ->
300
                     let DynUtil c1 m1 d1 n1 = Node.utilLoad n
301
                         DynUtil c2 m2 d2 n2 = Node.utilPool n
302
                     in (c1/c2, m1/m2, d1/d2, n1/n2)
303
                ) nodes
304
        -- metric: conflicting instance count
305
        pri_tags_inst = sum $ map Node.conflictingPrimaries nodes
306
        pri_tags_score = fromIntegral pri_tags_inst::Double
307
    in [ mem_cv, dsk_cv, n1_score, res_cv, off_score, off_pri_score, cpu_cv
308
       , stdDev c_load, stdDev m_load , stdDev d_load, stdDev n_load
309
       , pri_tags_score ]
310

    
311
-- | Compute the /total/ variance.
312
compCV :: Node.List -> Double
313
compCV = sum . zipWith (*) detailedCVWeights . compDetailedCV
314

    
315
-- | Compute online nodes from a 'Node.List'.
316
getOnline :: Node.List -> [Node.Node]
317
getOnline = filter (not . Node.offline) . Container.elems
318

    
319
-- * Balancing functions
320

    
321
-- | Compute best table. Note that the ordering of the arguments is important.
322
compareTables :: Table -> Table -> Table
323
compareTables a@(Table _ _ a_cv _) b@(Table _ _ b_cv _ ) =
324
    if a_cv > b_cv then b else a
325

    
326
-- | Applies an instance move to a given node list and instance.
327
applyMove :: Node.List -> Instance.Instance
328
          -> IMove -> OpResult (Node.List, Instance.Instance, Ndx, Ndx)
329
-- Failover (f)
330
applyMove nl inst Failover =
331
    let old_pdx = Instance.pNode inst
332
        old_sdx = Instance.sNode inst
333
        old_p = Container.find old_pdx nl
334
        old_s = Container.find old_sdx nl
335
        int_p = Node.removePri old_p inst
336
        int_s = Node.removeSec old_s inst
337
        force_p = Node.offline old_p
338
        new_nl = do -- Maybe monad
339
          new_p <- Node.addPriEx force_p int_s inst
340
          new_s <- Node.addSec int_p inst old_sdx
341
          let new_inst = Instance.setBoth inst old_sdx old_pdx
342
          return (Container.addTwo old_pdx new_s old_sdx new_p nl,
343
                  new_inst, old_sdx, old_pdx)
344
    in new_nl
345

    
346
-- Replace the primary (f:, r:np, f)
347
applyMove nl inst (ReplacePrimary new_pdx) =
348
    let old_pdx = Instance.pNode inst
349
        old_sdx = Instance.sNode inst
350
        old_p = Container.find old_pdx nl
351
        old_s = Container.find old_sdx nl
352
        tgt_n = Container.find new_pdx nl
353
        int_p = Node.removePri old_p inst
354
        int_s = Node.removeSec old_s inst
355
        force_p = Node.offline old_p
356
        new_nl = do -- Maybe monad
357
          -- check that the current secondary can host the instance
358
          -- during the migration
359
          tmp_s <- Node.addPriEx force_p int_s inst
360
          let tmp_s' = Node.removePri tmp_s inst
361
          new_p <- Node.addPriEx force_p tgt_n inst
362
          new_s <- Node.addSecEx force_p tmp_s' inst new_pdx
363
          let new_inst = Instance.setPri inst new_pdx
364
          return (Container.add new_pdx new_p $
365
                  Container.addTwo old_pdx int_p old_sdx new_s nl,
366
                  new_inst, new_pdx, old_sdx)
367
    in new_nl
368

    
369
-- Replace the secondary (r:ns)
370
applyMove nl inst (ReplaceSecondary new_sdx) =
371
    let old_pdx = Instance.pNode inst
372
        old_sdx = Instance.sNode inst
373
        old_s = Container.find old_sdx nl
374
        tgt_n = Container.find new_sdx nl
375
        int_s = Node.removeSec old_s inst
376
        force_s = Node.offline old_s
377
        new_inst = Instance.setSec inst new_sdx
378
        new_nl = Node.addSecEx force_s tgt_n inst old_pdx >>=
379
                 \new_s -> return (Container.addTwo new_sdx
380
                                   new_s old_sdx int_s nl,
381
                                   new_inst, old_pdx, new_sdx)
382
    in new_nl
383

    
384
-- Replace the secondary and failover (r:np, f)
385
applyMove nl inst (ReplaceAndFailover new_pdx) =
386
    let old_pdx = Instance.pNode inst
387
        old_sdx = Instance.sNode inst
388
        old_p = Container.find old_pdx nl
389
        old_s = Container.find old_sdx nl
390
        tgt_n = Container.find new_pdx nl
391
        int_p = Node.removePri old_p inst
392
        int_s = Node.removeSec old_s inst
393
        force_s = Node.offline old_s
394
        new_nl = do -- Maybe monad
395
          new_p <- Node.addPri tgt_n inst
396
          new_s <- Node.addSecEx force_s int_p inst new_pdx
397
          let new_inst = Instance.setBoth inst new_pdx old_pdx
398
          return (Container.add new_pdx new_p $
399
                  Container.addTwo old_pdx new_s old_sdx int_s nl,
400
                  new_inst, new_pdx, old_pdx)
401
    in new_nl
402

    
403
-- Failver and replace the secondary (f, r:ns)
404
applyMove nl inst (FailoverAndReplace new_sdx) =
405
    let old_pdx = Instance.pNode inst
406
        old_sdx = Instance.sNode inst
407
        old_p = Container.find old_pdx nl
408
        old_s = Container.find old_sdx nl
409
        tgt_n = Container.find new_sdx nl
410
        int_p = Node.removePri old_p inst
411
        int_s = Node.removeSec old_s inst
412
        force_p = Node.offline old_p
413
        new_nl = do -- Maybe monad
414
          new_p <- Node.addPriEx force_p int_s inst
415
          new_s <- Node.addSecEx force_p tgt_n inst old_sdx
416
          let new_inst = Instance.setBoth inst old_sdx new_sdx
417
          return (Container.add new_sdx new_s $
418
                  Container.addTwo old_sdx new_p old_pdx int_p nl,
419
                  new_inst, old_sdx, new_sdx)
420
    in new_nl
421

    
422
-- | Tries to allocate an instance on one given node.
423
allocateOnSingle :: Node.List -> Instance.Instance -> Ndx
424
                 -> OpResult Node.AllocElement
425
allocateOnSingle nl inst new_pdx =
426
    let p = Container.find new_pdx nl
427
        new_inst = Instance.setBoth inst new_pdx Node.noSecondary
428
    in  Node.addPri p inst >>= \new_p -> do
429
      let new_nl = Container.add new_pdx new_p nl
430
          new_score = compCV nl
431
      return (new_nl, new_inst, [new_p], new_score)
432

    
433
-- | Tries to allocate an instance on a given pair of nodes.
434
allocateOnPair :: Node.List -> Instance.Instance -> Ndx -> Ndx
435
               -> OpResult Node.AllocElement
436
allocateOnPair nl inst new_pdx new_sdx =
437
    let tgt_p = Container.find new_pdx nl
438
        tgt_s = Container.find new_sdx nl
439
    in do
440
      new_p <- Node.addPri tgt_p inst
441
      new_s <- Node.addSec tgt_s inst new_pdx
442
      let new_inst = Instance.setBoth inst new_pdx new_sdx
443
          new_nl = Container.addTwo new_pdx new_p new_sdx new_s nl
444
      return (new_nl, new_inst, [new_p, new_s], compCV new_nl)
445

    
446
-- | Tries to perform an instance move and returns the best table
447
-- between the original one and the new one.
448
checkSingleStep :: Table -- ^ The original table
449
                -> Instance.Instance -- ^ The instance to move
450
                -> Table -- ^ The current best table
451
                -> IMove -- ^ The move to apply
452
                -> Table -- ^ The final best table
453
checkSingleStep ini_tbl target cur_tbl move =
454
    let
455
        Table ini_nl ini_il _ ini_plc = ini_tbl
456
        tmp_resu = applyMove ini_nl target move
457
    in
458
      case tmp_resu of
459
        OpFail _ -> cur_tbl
460
        OpGood (upd_nl, new_inst, pri_idx, sec_idx) ->
461
            let tgt_idx = Instance.idx target
462
                upd_cvar = compCV upd_nl
463
                upd_il = Container.add tgt_idx new_inst ini_il
464
                upd_plc = (tgt_idx, pri_idx, sec_idx, move, upd_cvar):ini_plc
465
                upd_tbl = Table upd_nl upd_il upd_cvar upd_plc
466
            in
467
              compareTables cur_tbl upd_tbl
468

    
469
-- | Given the status of the current secondary as a valid new node and
470
-- the current candidate target node, generate the possible moves for
471
-- a instance.
472
possibleMoves :: Bool      -- ^ Whether the secondary node is a valid new node
473
              -> Bool      -- ^ Whether we can change the primary node
474
              -> Ndx       -- ^ Target node candidate
475
              -> [IMove]   -- ^ List of valid result moves
476

    
477
possibleMoves _ False tdx =
478
    [ReplaceSecondary tdx]
479

    
480
possibleMoves True True tdx =
481
    [ReplaceSecondary tdx,
482
     ReplaceAndFailover tdx,
483
     ReplacePrimary tdx,
484
     FailoverAndReplace tdx]
485

    
486
possibleMoves False True tdx =
487
    [ReplaceSecondary tdx,
488
     ReplaceAndFailover tdx]
489

    
490
-- | Compute the best move for a given instance.
491
checkInstanceMove :: [Ndx]             -- ^ Allowed target node indices
492
                  -> Bool              -- ^ Whether disk moves are allowed
493
                  -> Bool              -- ^ Whether instance moves are allowed
494
                  -> Table             -- ^ Original table
495
                  -> Instance.Instance -- ^ Instance to move
496
                  -> Table             -- ^ Best new table for this instance
497
checkInstanceMove nodes_idx disk_moves inst_moves ini_tbl target =
498
    let
499
        opdx = Instance.pNode target
500
        osdx = Instance.sNode target
501
        nodes = filter (\idx -> idx /= opdx && idx /= osdx) nodes_idx
502
        use_secondary = elem osdx nodes_idx && inst_moves
503
        aft_failover = if use_secondary -- if allowed to failover
504
                       then checkSingleStep ini_tbl target ini_tbl Failover
505
                       else ini_tbl
506
        all_moves = if disk_moves
507
                    then concatMap
508
                         (possibleMoves use_secondary inst_moves) nodes
509
                    else []
510
    in
511
      -- iterate over the possible nodes for this instance
512
      foldl' (checkSingleStep ini_tbl target) aft_failover all_moves
513

    
514
-- | Compute the best next move.
515
checkMove :: [Ndx]               -- ^ Allowed target node indices
516
          -> Bool                -- ^ Whether disk moves are allowed
517
          -> Bool                -- ^ Whether instance moves are allowed
518
          -> Table               -- ^ The current solution
519
          -> [Instance.Instance] -- ^ List of instances still to move
520
          -> Table               -- ^ The new solution
521
checkMove nodes_idx disk_moves inst_moves ini_tbl victims =
522
    let Table _ _ _ ini_plc = ini_tbl
523
        -- we're using rwhnf from the Control.Parallel.Strategies
524
        -- package; we don't need to use rnf as that would force too
525
        -- much evaluation in single-threaded cases, and in
526
        -- multi-threaded case the weak head normal form is enough to
527
        -- spark the evaluation
528
        tables = parMap rwhnf (checkInstanceMove nodes_idx disk_moves
529
                               inst_moves ini_tbl)
530
                 victims
531
        -- iterate over all instances, computing the best move
532
        best_tbl = foldl' compareTables ini_tbl tables
533
        Table _ _ _ best_plc = best_tbl
534
    in if length best_plc == length ini_plc
535
       then ini_tbl -- no advancement
536
       else best_tbl
537

    
538
-- | Check if we are allowed to go deeper in the balancing.
539
doNextBalance :: Table     -- ^ The starting table
540
              -> Int       -- ^ Remaining length
541
              -> Score     -- ^ Score at which to stop
542
              -> Bool      -- ^ The resulting table and commands
543
doNextBalance ini_tbl max_rounds min_score =
544
    let Table _ _ ini_cv ini_plc = ini_tbl
545
        ini_plc_len = length ini_plc
546
    in (max_rounds < 0 || ini_plc_len < max_rounds) && ini_cv > min_score
547

    
548
-- | Run a balance move.
549
tryBalance :: Table       -- ^ The starting table
550
           -> Bool        -- ^ Allow disk moves
551
           -> Bool        -- ^ Allow instance moves
552
           -> Bool        -- ^ Only evacuate moves
553
           -> Score       -- ^ Min gain threshold
554
           -> Score       -- ^ Min gain
555
           -> Maybe Table -- ^ The resulting table and commands
556
tryBalance ini_tbl disk_moves inst_moves evac_mode mg_limit min_gain =
557
    let Table ini_nl ini_il ini_cv _ = ini_tbl
558
        all_inst = Container.elems ini_il
559
        all_inst' = if evac_mode
560
                    then let bad_nodes = map Node.idx . filter Node.offline $
561
                                         Container.elems ini_nl
562
                         in filter (\e -> Instance.sNode e `elem` bad_nodes ||
563
                                          Instance.pNode e `elem` bad_nodes)
564
                            all_inst
565
                    else all_inst
566
        reloc_inst = filter Instance.movable all_inst'
567
        node_idx = map Node.idx . filter (not . Node.offline) $
568
                   Container.elems ini_nl
569
        fin_tbl = checkMove node_idx disk_moves inst_moves ini_tbl reloc_inst
570
        (Table _ _ fin_cv _) = fin_tbl
571
    in
572
      if fin_cv < ini_cv && (ini_cv > mg_limit || ini_cv - fin_cv >= min_gain)
573
      then Just fin_tbl -- this round made success, return the new table
574
      else Nothing
575

    
576
-- * Allocation functions
577

    
578
-- | Build failure stats out of a list of failures.
579
collapseFailures :: [FailMode] -> FailStats
580
collapseFailures flst =
581
    map (\k -> (k, length $ filter (k ==) flst)) [minBound..maxBound]
582

    
583
-- | Update current Allocation solution and failure stats with new
584
-- elements.
585
concatAllocs :: AllocSolution -> OpResult Node.AllocElement -> AllocSolution
586
concatAllocs as (OpFail reason) = as { asFailures = reason : asFailures as }
587

    
588
concatAllocs as (OpGood ns@(_, _, _, nscore)) =
589
    let -- Choose the old or new solution, based on the cluster score
590
        cntok = asAllocs as
591
        osols = asSolutions as
592
        nsols = case osols of
593
                  [] -> [ns]
594
                  (_, _, _, oscore):[] ->
595
                      if oscore < nscore
596
                      then osols
597
                      else [ns]
598
                  -- FIXME: here we simply concat to lists with more
599
                  -- than one element; we should instead abort, since
600
                  -- this is not a valid usage of this function
601
                  xs -> ns:xs
602
        nsuc = cntok + 1
603
    -- Note: we force evaluation of nsols here in order to keep the
604
    -- memory profile low - we know that we will need nsols for sure
605
    -- in the next cycle, so we force evaluation of nsols, since the
606
    -- foldl' in the caller will only evaluate the tuple, but not the
607
    -- elements of the tuple
608
    in nsols `seq` nsuc `seq` as { asAllocs = nsuc, asSolutions = nsols }
609

    
610
-- | Sums two allocation solutions (e.g. for two separate node groups).
611
sumAllocs :: AllocSolution -> AllocSolution -> AllocSolution
612
sumAllocs (AllocSolution af aa as al) (AllocSolution bf ba bs bl) =
613
    AllocSolution (af ++ bf) (aa + ba) (as ++ bs) (al ++ bl)
614

    
615
-- | Given a solution, generates a reasonable description for it.
616
describeSolution :: AllocSolution -> String
617
describeSolution as =
618
  let fcnt = asFailures as
619
      sols = asSolutions as
620
      freasons =
621
        intercalate ", " . map (\(a, b) -> printf "%s: %d" (show a) b) .
622
        filter ((> 0) . snd) . collapseFailures $ fcnt
623
  in if null sols
624
     then "No valid allocation solutions, failure reasons: " ++
625
          (if null fcnt
626
           then "unknown reasons"
627
           else freasons)
628
     else let (_, _, nodes, cv) = head sols
629
          in printf ("score: %.8f, successes %d, failures %d (%s)" ++
630
                     " for node(s) %s") cv (asAllocs as) (length fcnt) freasons
631
             (intercalate "/" . map Node.name $ nodes)
632

    
633
-- | Annotates a solution with the appropriate string.
634
annotateSolution :: AllocSolution -> AllocSolution
635
annotateSolution as = as { asLog = describeSolution as : asLog as }
636

    
637
-- | Generate the valid node allocation singles or pairs for a new instance.
638
genAllocNodes :: Group.List        -- ^ Group list
639
              -> Node.List         -- ^ The node map
640
              -> Int               -- ^ The number of nodes required
641
              -> Bool              -- ^ Whether to drop or not
642
                                   -- unallocable nodes
643
              -> Result AllocNodes -- ^ The (monadic) result
644
genAllocNodes gl nl count drop_unalloc =
645
    let filter_fn = if drop_unalloc
646
                    then filter (Group.isAllocable .
647
                                 flip Container.find gl . Node.group)
648
                    else id
649
        all_nodes = filter_fn $ getOnline nl
650
        all_pairs = liftM2 (,) all_nodes all_nodes
651
        ok_pairs = filter (\(x, y) -> Node.idx x /= Node.idx y &&
652
                                      Node.group x == Node.group y) all_pairs
653
    in case count of
654
         1 -> Ok (Left (map Node.idx all_nodes))
655
         2 -> Ok (Right (map (\(p, s) -> (Node.idx p, Node.idx s)) ok_pairs))
656
         _ -> Bad "Unsupported number of nodes, only one or two  supported"
657

    
658
-- | Try to allocate an instance on the cluster.
659
tryAlloc :: (Monad m) =>
660
            Node.List         -- ^ The node list
661
         -> Instance.List     -- ^ The instance list
662
         -> Instance.Instance -- ^ The instance to allocate
663
         -> AllocNodes        -- ^ The allocation targets
664
         -> m AllocSolution   -- ^ Possible solution list
665
tryAlloc nl _ inst (Right ok_pairs) =
666
    let sols = foldl' (\cstate (p, s) ->
667
                           concatAllocs cstate $ allocateOnPair nl inst p s
668
                      ) emptySolution ok_pairs
669

    
670
    in if null ok_pairs -- means we have just one node
671
       then fail "Not enough online nodes"
672
       else return $ annotateSolution sols
673

    
674
tryAlloc nl _ inst (Left all_nodes) =
675
    let sols = foldl' (\cstate ->
676
                           concatAllocs cstate . allocateOnSingle nl inst
677
                      ) emptySolution all_nodes
678
    in if null all_nodes
679
       then fail "No online nodes"
680
       else return $ annotateSolution sols
681

    
682
-- | Given a group/result, describe it as a nice (list of) messages.
683
solutionDescription :: Group.List -> (Gdx, Result AllocSolution) -> [String]
684
solutionDescription gl (groupId, result) =
685
  case result of
686
    Ok solution -> map (printf "Group %s (%s): %s" gname pol) (asLog solution)
687
    Bad message -> [printf "Group %s: error %s" gname message]
688
  where grp = Container.find groupId gl
689
        gname = Group.name grp
690
        pol = apolToString (Group.allocPolicy grp)
691

    
692
-- | From a list of possibly bad and possibly empty solutions, filter
693
-- only the groups with a valid result. Note that the result will be
694
-- reversed compared to the original list.
695
filterMGResults :: Group.List
696
                -> [(Gdx, Result AllocSolution)]
697
                -> [(Gdx, AllocSolution)]
698
filterMGResults gl = foldl' fn []
699
    where unallocable = not . Group.isAllocable . flip Container.find gl
700
          fn accu (gdx, rasol) =
701
              case rasol of
702
                Bad _ -> accu
703
                Ok sol | null (asSolutions sol) -> accu
704
                       | unallocable gdx -> accu
705
                       | otherwise -> (gdx, sol):accu
706

    
707
-- | Sort multigroup results based on policy and score.
708
sortMGResults :: Group.List
709
             -> [(Gdx, AllocSolution)]
710
             -> [(Gdx, AllocSolution)]
711
sortMGResults gl sols =
712
    let extractScore (_, _, _, x) = x
713
        solScore (gdx, sol) = (Group.allocPolicy (Container.find gdx gl),
714
                               (extractScore . head . asSolutions) sol)
715
    in sortBy (comparing solScore) sols
716

    
717
-- | Try to allocate an instance on a multi-group cluster.
718
tryMGAlloc :: Group.List           -- ^ The group list
719
           -> Node.List            -- ^ The node list
720
           -> Instance.List        -- ^ The instance list
721
           -> Instance.Instance    -- ^ The instance to allocate
722
           -> Int                  -- ^ Required number of nodes
723
           -> Result AllocSolution -- ^ Possible solution list
724
tryMGAlloc mggl mgnl mgil inst cnt =
725
  let groups = splitCluster mgnl mgil
726
      sols = map (\(gid, (nl, il)) ->
727
                   (gid, genAllocNodes mggl nl cnt False >>=
728
                       tryAlloc nl il inst))
729
             groups::[(Gdx, Result AllocSolution)]
730
      all_msgs = concatMap (solutionDescription mggl) sols
731
      goodSols = filterMGResults mggl sols
732
      sortedSols = sortMGResults mggl goodSols
733
  in if null sortedSols
734
     then Bad $ intercalate ", " all_msgs
735
     else let (final_group, final_sol) = head sortedSols
736
              final_name = Group.name $ Container.find final_group mggl
737
              selmsg = "Selected group: " ++  final_name
738
          in Ok $ final_sol { asLog = selmsg:all_msgs }
739

    
740
-- | Try to relocate an instance on the cluster.
741
tryReloc :: (Monad m) =>
742
            Node.List       -- ^ The node list
743
         -> Instance.List   -- ^ The instance list
744
         -> Idx             -- ^ The index of the instance to move
745
         -> Int             -- ^ The number of nodes required
746
         -> [Ndx]           -- ^ Nodes which should not be used
747
         -> m AllocSolution -- ^ Solution list
748
tryReloc nl il xid 1 ex_idx =
749
    let all_nodes = getOnline nl
750
        inst = Container.find xid il
751
        ex_idx' = Instance.pNode inst:ex_idx
752
        valid_nodes = filter (not . flip elem ex_idx' . Node.idx) all_nodes
753
        valid_idxes = map Node.idx valid_nodes
754
        sols1 = foldl' (\cstate x ->
755
                            let em = do
756
                                  (mnl, i, _, _) <-
757
                                      applyMove nl inst (ReplaceSecondary x)
758
                                  return (mnl, i, [Container.find x mnl],
759
                                          compCV mnl)
760
                            in concatAllocs cstate em
761
                       ) emptySolution valid_idxes
762
    in return sols1
763

    
764
tryReloc _ _ _ reqn _  = fail $ "Unsupported number of relocation \
765
                                \destinations required (" ++ show reqn ++
766
                                                  "), only one supported"
767

    
768
tryMGReloc :: (Monad m) =>
769
              Group.List      -- ^ The group list
770
           -> Node.List       -- ^ The node list
771
           -> Instance.List   -- ^ The instance list
772
           -> Idx             -- ^ The index of the instance to move
773
           -> Int             -- ^ The number of nodes required
774
           -> [Ndx]           -- ^ Nodes which should not be used
775
           -> m AllocSolution -- ^ Solution list
776
tryMGReloc _ mgnl mgil xid ncount ex_ndx = do
777
  let groups = splitCluster mgnl mgil
778
      -- TODO: we only relocate inside the group for now
779
      inst = Container.find xid mgil
780
  (nl, il) <- case lookup (instancePriGroup mgnl inst) groups of
781
                Nothing -> fail $ "Cannot find group for instance " ++
782
                           Instance.name inst
783
                Just v -> return v
784
  tryReloc nl il xid ncount ex_ndx
785

    
786
-- | Change an instance's secondary node.
787
evacInstance :: (Monad m) =>
788
                [Ndx]                      -- ^ Excluded nodes
789
             -> Instance.List              -- ^ The current instance list
790
             -> (Node.List, AllocSolution) -- ^ The current state
791
             -> Idx                        -- ^ The instance to evacuate
792
             -> m (Node.List, AllocSolution)
793
evacInstance ex_ndx il (nl, old_as) idx = do
794
  -- FIXME: hardcoded one node here
795

    
796
  -- Longer explanation: evacuation is currently hardcoded to DRBD
797
  -- instances (which have one secondary); hence, even if the
798
  -- IAllocator protocol can request N nodes for an instance, and all
799
  -- the message parsing/loading pass this, this implementation only
800
  -- supports one; this situation needs to be revisited if we ever
801
  -- support more than one secondary, or if we change the storage
802
  -- model
803
  new_as <- tryReloc nl il idx 1 ex_ndx
804
  case asSolutions new_as of
805
    -- an individual relocation succeeded, we kind of compose the data
806
    -- from the two solutions
807
    csol@(nl', _, _, _):_ ->
808
        return (nl', new_as { asSolutions = csol:asSolutions old_as })
809
    -- this relocation failed, so we fail the entire evac
810
    _ -> fail $ "Can't evacuate instance " ++
811
         Instance.name (Container.find idx il) ++
812
             ": " ++ describeSolution new_as
813

    
814
-- | Try to evacuate a list of nodes.
815
tryEvac :: (Monad m) =>
816
            Node.List       -- ^ The node list
817
         -> Instance.List   -- ^ The instance list
818
         -> [Idx]           -- ^ Instances to be evacuated
819
         -> [Ndx]           -- ^ Restricted nodes (the ones being evacuated)
820
         -> m AllocSolution -- ^ Solution list
821
tryEvac nl il idxs ex_ndx = do
822
  (_, sol) <- foldM (evacInstance ex_ndx il) (nl, emptySolution) idxs
823
  return sol
824

    
825
-- | Multi-group evacuation of a list of nodes.
826
tryMGEvac :: (Monad m) =>
827
             Group.List -- ^ The group list
828
          -> Node.List       -- ^ The node list
829
          -> Instance.List   -- ^ The instance list
830
          -> [Ndx]           -- ^ Nodes to be evacuated
831
          -> m AllocSolution -- ^ Solution list
832
tryMGEvac _ nl il ex_ndx =
833
    let ex_nodes = map (`Container.find` nl) ex_ndx
834
        all_insts = nub . concatMap Node.sList $ ex_nodes
835
        gni = splitCluster nl il
836
        -- we run the instance index list through a couple of maps to
837
        -- get finally to a structure of the type [(group index,
838
        -- [instance indices])]
839
        all_insts' = map (\idx ->
840
                              (instancePriGroup nl (Container.find idx il),
841
                               idx)) all_insts
842
        all_insts'' = groupBy ((==) `on` fst) all_insts'
843
        all_insts3 = map (\xs -> let (gdxs, idxs) = unzip xs
844
                                 in (head gdxs, idxs)) all_insts''
845
    in do
846
      -- that done, we now add the per-group nl/il to the tuple
847
      all_insts4 <-
848
          mapM (\(gdx, idxs) ->
849
                case lookup gdx gni of
850
                    Nothing -> fail $ "Can't find group index " ++ show gdx
851
                    Just (gnl, gil) -> return (gdx, gnl, gil, idxs))
852
          all_insts3
853
      results <- mapM (\(_, gnl, gil, idxs) -> tryEvac gnl gil idxs ex_ndx)
854
                 all_insts4
855
      let sol = foldl' sumAllocs emptySolution results
856
      return $ annotateSolution sol
857

    
858
-- | Recursively place instances on the cluster until we're out of space.
859
iterateAlloc :: Node.List
860
             -> Instance.List
861
             -> Instance.Instance
862
             -> AllocNodes
863
             -> [Instance.Instance]
864
             -> [CStats]
865
             -> Result AllocResult
866
iterateAlloc nl il newinst allocnodes ixes cstats =
867
      let depth = length ixes
868
          newname = printf "new-%d" depth::String
869
          newidx = length (Container.elems il) + depth
870
          newi2 = Instance.setIdx (Instance.setName newinst newname) newidx
871
      in case tryAlloc nl il newi2 allocnodes of
872
           Bad s -> Bad s
873
           Ok (AllocSolution { asFailures = errs, asSolutions = sols3 }) ->
874
               case sols3 of
875
                 [] -> Ok (collapseFailures errs, nl, il, ixes, cstats)
876
                 (xnl, xi, _, _):[] ->
877
                     iterateAlloc xnl (Container.add newidx xi il)
878
                                  newinst allocnodes (xi:ixes)
879
                                  (totalResources xnl:cstats)
880
                 _ -> Bad "Internal error: multiple solutions for single\
881
                          \ allocation"
882

    
883
-- | The core of the tiered allocation mode.
884
tieredAlloc :: Node.List
885
            -> Instance.List
886
            -> Instance.Instance
887
            -> AllocNodes
888
            -> [Instance.Instance]
889
            -> [CStats]
890
            -> Result AllocResult
891
tieredAlloc nl il newinst allocnodes ixes cstats =
892
    case iterateAlloc nl il newinst allocnodes ixes cstats of
893
      Bad s -> Bad s
894
      Ok (errs, nl', il', ixes', cstats') ->
895
          case Instance.shrinkByType newinst . fst . last $
896
               sortBy (comparing snd) errs of
897
            Bad _ -> Ok (errs, nl', il', ixes', cstats')
898
            Ok newinst' ->
899
                tieredAlloc nl' il' newinst' allocnodes ixes' cstats'
900

    
901
-- | Compute the tiered spec string description from a list of
902
-- allocated instances.
903
tieredSpecMap :: [Instance.Instance]
904
              -> [String]
905
tieredSpecMap trl_ixes =
906
    let fin_trl_ixes = reverse trl_ixes
907
        ix_byspec = groupBy ((==) `on` Instance.specOf) fin_trl_ixes
908
        spec_map = map (\ixs -> (Instance.specOf $ head ixs, length ixs))
909
                   ix_byspec
910
    in  map (\(spec, cnt) -> printf "%d,%d,%d=%d" (rspecMem spec)
911
                             (rspecDsk spec) (rspecCpu spec) cnt) spec_map
912

    
913
-- * Formatting functions
914

    
915
-- | Given the original and final nodes, computes the relocation description.
916
computeMoves :: Instance.Instance -- ^ The instance to be moved
917
             -> String -- ^ The instance name
918
             -> IMove  -- ^ The move being performed
919
             -> String -- ^ New primary
920
             -> String -- ^ New secondary
921
             -> (String, [String])
922
                -- ^ Tuple of moves and commands list; moves is containing
923
                -- either @/f/@ for failover or @/r:name/@ for replace
924
                -- secondary, while the command list holds gnt-instance
925
                -- commands (without that prefix), e.g \"@failover instance1@\"
926
computeMoves i inam mv c d =
927
    case mv of
928
      Failover -> ("f", [mig])
929
      FailoverAndReplace _ -> (printf "f r:%s" d, [mig, rep d])
930
      ReplaceSecondary _ -> (printf "r:%s" d, [rep d])
931
      ReplaceAndFailover _ -> (printf "r:%s f" c, [rep c, mig])
932
      ReplacePrimary _ -> (printf "f r:%s f" c, [mig, rep c, mig])
933
    where morf = if Instance.running i then "migrate" else "failover"
934
          mig = printf "%s -f %s" morf inam::String
935
          rep n = printf "replace-disks -n %s %s" n inam
936

    
937
-- | Converts a placement to string format.
938
printSolutionLine :: Node.List     -- ^ The node list
939
                  -> Instance.List -- ^ The instance list
940
                  -> Int           -- ^ Maximum node name length
941
                  -> Int           -- ^ Maximum instance name length
942
                  -> Placement     -- ^ The current placement
943
                  -> Int           -- ^ The index of the placement in
944
                                   -- the solution
945
                  -> (String, [String])
946
printSolutionLine nl il nmlen imlen plc pos =
947
    let
948
        pmlen = (2*nmlen + 1)
949
        (i, p, s, mv, c) = plc
950
        inst = Container.find i il
951
        inam = Instance.alias inst
952
        npri = Node.alias $ Container.find p nl
953
        nsec = Node.alias $ Container.find s nl
954
        opri = Node.alias $ Container.find (Instance.pNode inst) nl
955
        osec = Node.alias $ Container.find (Instance.sNode inst) nl
956
        (moves, cmds) =  computeMoves inst inam mv npri nsec
957
        ostr = printf "%s:%s" opri osec::String
958
        nstr = printf "%s:%s" npri nsec::String
959
    in
960
      (printf "  %3d. %-*s %-*s => %-*s %.8f a=%s"
961
       pos imlen inam pmlen ostr
962
       pmlen nstr c moves,
963
       cmds)
964

    
965
-- | Return the instance and involved nodes in an instance move.
966
involvedNodes :: Instance.List -> Placement -> [Ndx]
967
involvedNodes il plc =
968
    let (i, np, ns, _, _) = plc
969
        inst = Container.find i il
970
        op = Instance.pNode inst
971
        os = Instance.sNode inst
972
    in nub [np, ns, op, os]
973

    
974
-- | Inner function for splitJobs, that either appends the next job to
975
-- the current jobset, or starts a new jobset.
976
mergeJobs :: ([JobSet], [Ndx]) -> MoveJob -> ([JobSet], [Ndx])
977
mergeJobs ([], _) n@(ndx, _, _, _) = ([[n]], ndx)
978
mergeJobs (cjs@(j:js), nbuf) n@(ndx, _, _, _)
979
    | null (ndx `intersect` nbuf) = ((n:j):js, ndx ++ nbuf)
980
    | otherwise = ([n]:cjs, ndx)
981

    
982
-- | Break a list of moves into independent groups. Note that this
983
-- will reverse the order of jobs.
984
splitJobs :: [MoveJob] -> [JobSet]
985
splitJobs = fst . foldl mergeJobs ([], [])
986

    
987
-- | Given a list of commands, prefix them with @gnt-instance@ and
988
-- also beautify the display a little.
989
formatJob :: Int -> Int -> (Int, MoveJob) -> [String]
990
formatJob jsn jsl (sn, (_, _, _, cmds)) =
991
    let out =
992
            printf "  echo job %d/%d" jsn sn:
993
            printf "  check":
994
            map ("  gnt-instance " ++) cmds
995
    in if sn == 1
996
       then ["", printf "echo jobset %d, %d jobs" jsn jsl] ++ out
997
       else out
998

    
999
-- | Given a list of commands, prefix them with @gnt-instance@ and
1000
-- also beautify the display a little.
1001
formatCmds :: [JobSet] -> String
1002
formatCmds =
1003
    unlines .
1004
    concatMap (\(jsn, js) -> concatMap (formatJob jsn (length js))
1005
                             (zip [1..] js)) .
1006
    zip [1..]
1007

    
1008
-- | Print the node list.
1009
printNodes :: Node.List -> [String] -> String
1010
printNodes nl fs =
1011
    let fields = case fs of
1012
          [] -> Node.defaultFields
1013
          "+":rest -> Node.defaultFields ++ rest
1014
          _ -> fs
1015
        snl = sortBy (comparing Node.idx) (Container.elems nl)
1016
        (header, isnum) = unzip $ map Node.showHeader fields
1017
    in unlines . map ((:) ' ' .  intercalate " ") $
1018
       formatTable (header:map (Node.list fields) snl) isnum
1019

    
1020
-- | Print the instance list.
1021
printInsts :: Node.List -> Instance.List -> String
1022
printInsts nl il =
1023
    let sil = sortBy (comparing Instance.idx) (Container.elems il)
1024
        helper inst = [ if Instance.running inst then "R" else " "
1025
                      , Instance.name inst
1026
                      , Container.nameOf nl (Instance.pNode inst)
1027
                      , let sdx = Instance.sNode inst
1028
                        in if sdx == Node.noSecondary
1029
                           then  ""
1030
                           else Container.nameOf nl sdx
1031
                      , if Instance.autoBalance inst then "Y" else "N"
1032
                      , printf "%3d" $ Instance.vcpus inst
1033
                      , printf "%5d" $ Instance.mem inst
1034
                      , printf "%5d" $ Instance.dsk inst `div` 1024
1035
                      , printf "%5.3f" lC
1036
                      , printf "%5.3f" lM
1037
                      , printf "%5.3f" lD
1038
                      , printf "%5.3f" lN
1039
                      ]
1040
            where DynUtil lC lM lD lN = Instance.util inst
1041
        header = [ "F", "Name", "Pri_node", "Sec_node", "Auto_bal"
1042
                 , "vcpu", "mem" , "dsk", "lCpu", "lMem", "lDsk", "lNet" ]
1043
        isnum = False:False:False:False:False:repeat True
1044
    in unlines . map ((:) ' ' . intercalate " ") $
1045
       formatTable (header:map helper sil) isnum
1046

    
1047
-- | Shows statistics for a given node list.
1048
printStats :: Node.List -> String
1049
printStats nl =
1050
    let dcvs = compDetailedCV nl
1051
        (weights, names) = unzip detailedCVInfo
1052
        hd = zip3 (weights ++ repeat 1) (names ++ repeat "unknown") dcvs
1053
        formatted = map (\(w, header, val) ->
1054
                             printf "%s=%.8f(x%.2f)" header val w::String) hd
1055
    in intercalate ", " formatted
1056

    
1057
-- | Convert a placement into a list of OpCodes (basically a job).
1058
iMoveToJob :: Node.List -> Instance.List
1059
          -> Idx -> IMove -> [OpCodes.OpCode]
1060
iMoveToJob nl il idx move =
1061
    let inst = Container.find idx il
1062
        iname = Instance.name inst
1063
        lookNode  = Just . Container.nameOf nl
1064
        opF = OpCodes.OpInstanceMigrate iname True False True
1065
        opR n = OpCodes.OpInstanceReplaceDisks iname (lookNode n)
1066
                OpCodes.ReplaceNewSecondary [] Nothing
1067
    in case move of
1068
         Failover -> [ opF ]
1069
         ReplacePrimary np -> [ opF, opR np, opF ]
1070
         ReplaceSecondary ns -> [ opR ns ]
1071
         ReplaceAndFailover np -> [ opR np, opF ]
1072
         FailoverAndReplace ns -> [ opF, opR ns ]
1073

    
1074
-- * Node group functions
1075

    
1076
-- | Computes the group of an instance.
1077
instanceGroup :: Node.List -> Instance.Instance -> Result Gdx
1078
instanceGroup nl i =
1079
  let sidx = Instance.sNode i
1080
      pnode = Container.find (Instance.pNode i) nl
1081
      snode = if sidx == Node.noSecondary
1082
              then pnode
1083
              else Container.find sidx nl
1084
      pgroup = Node.group pnode
1085
      sgroup = Node.group snode
1086
  in if pgroup /= sgroup
1087
     then fail ("Instance placed accross two node groups, primary " ++
1088
                show pgroup ++ ", secondary " ++ show sgroup)
1089
     else return pgroup
1090

    
1091
-- | Computes the group of an instance per the primary node.
1092
instancePriGroup :: Node.List -> Instance.Instance -> Gdx
1093
instancePriGroup nl i =
1094
  let pnode = Container.find (Instance.pNode i) nl
1095
  in  Node.group pnode
1096

    
1097
-- | Compute the list of badly allocated instances (split across node
1098
-- groups).
1099
findSplitInstances :: Node.List -> Instance.List -> [Instance.Instance]
1100
findSplitInstances nl =
1101
  filter (not . isOk . instanceGroup nl) . Container.elems
1102

    
1103
-- | Splits a cluster into the component node groups.
1104
splitCluster :: Node.List -> Instance.List ->
1105
                [(Gdx, (Node.List, Instance.List))]
1106
splitCluster nl il =
1107
  let ngroups = Node.computeGroups (Container.elems nl)
1108
  in map (\(guuid, nodes) ->
1109
           let nidxs = map Node.idx nodes
1110
               nodes' = zip nidxs nodes
1111
               instances = Container.filter ((`elem` nidxs) . Instance.pNode) il
1112
           in (guuid, (Container.fromList nodes', instances))) ngroups