Statistics
| Branch: | Tag: | Revision:

root / htools / Ganeti / HTools / Cluster.hs @ 2e5eb96a

History | View | Annotate | Download (45.4 kB)

1
{-| Implementation of cluster-wide logic.
2

    
3
This module holds all pure cluster-logic; I\/O related functionality
4
goes into the "Main" module for the individual binaries.
5

    
6
-}
7

    
8
{-
9

    
10
Copyright (C) 2009, 2010, 2011 Google Inc.
11

    
12
This program is free software; you can redistribute it and/or modify
13
it under the terms of the GNU General Public License as published by
14
the Free Software Foundation; either version 2 of the License, or
15
(at your option) any later version.
16

    
17
This program is distributed in the hope that it will be useful, but
18
WITHOUT ANY WARRANTY; without even the implied warranty of
19
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
20
General Public License for more details.
21

    
22
You should have received a copy of the GNU General Public License
23
along with this program; if not, write to the Free Software
24
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
25
02110-1301, USA.
26

    
27
-}
28

    
29
module Ganeti.HTools.Cluster
30
    (
31
     -- * Types
32
      AllocSolution(..)
33
    , Table(..)
34
    , CStats(..)
35
    , AllocStats
36
    -- * Generic functions
37
    , totalResources
38
    , computeAllocationDelta
39
    -- * First phase functions
40
    , computeBadItems
41
    -- * Second phase functions
42
    , printSolutionLine
43
    , formatCmds
44
    , involvedNodes
45
    , splitJobs
46
    -- * Display functions
47
    , printNodes
48
    , printInsts
49
    -- * Balacing functions
50
    , checkMove
51
    , doNextBalance
52
    , tryBalance
53
    , compCV
54
    , compDetailedCV
55
    , printStats
56
    , iMoveToJob
57
    -- * IAllocator functions
58
    , genAllocNodes
59
    , tryAlloc
60
    , tryMGAlloc
61
    , tryReloc
62
    , tryMGReloc
63
    , tryEvac
64
    , tryMGEvac
65
    , collapseFailures
66
    -- * Allocation functions
67
    , iterateAlloc
68
    , tieredAlloc
69
    , tieredSpecMap
70
     -- * Node group functions
71
    , instanceGroup
72
    , findSplitInstances
73
    , splitCluster
74
    ) where
75

    
76
import Data.Function (on)
77
import Data.List
78
import Data.Ord (comparing)
79
import Text.Printf (printf)
80
import Control.Monad
81
import Control.Parallel.Strategies
82

    
83
import qualified Ganeti.HTools.Container as Container
84
import qualified Ganeti.HTools.Instance as Instance
85
import qualified Ganeti.HTools.Node as Node
86
import qualified Ganeti.HTools.Group as Group
87
import Ganeti.HTools.Types
88
import Ganeti.HTools.Utils
89
import qualified Ganeti.OpCodes as OpCodes
90

    
91
-- * Types
92

    
93
-- | Allocation\/relocation solution.
94
data AllocSolution = AllocSolution
95
  { asFailures  :: [FailMode]          -- ^ Failure counts
96
  , asAllocs    :: Int                 -- ^ Good allocation count
97
  , asSolutions :: [Node.AllocElement] -- ^ The actual result, length
98
                                       -- of the list depends on the
99
                                       -- allocation/relocation mode
100
  , asLog       :: [String]            -- ^ A list of informational messages
101
  }
102

    
103
-- | Allocation results, as used in 'iterateAlloc' and 'tieredAlloc'.
104
type AllocResult = (FailStats, Node.List, Instance.List,
105
                    [Instance.Instance], [CStats])
106

    
107

    
108
-- | A type denoting the valid allocation mode/pairs.
109
-- For a one-node allocation, this will be a @Left ['Node.Node']@,
110
-- whereas for a two-node allocation, this will be a @Right
111
-- [('Node.Node', 'Node.Node')]@.
112
type AllocNodes = Either [Ndx] [(Ndx, Ndx)]
113

    
114
-- | The empty solution we start with when computing allocations
115
emptySolution :: AllocSolution
116
emptySolution = AllocSolution { asFailures = [], asAllocs = 0
117
                              , asSolutions = [], asLog = [] }
118

    
119
-- | The complete state for the balancing solution
120
data Table = Table Node.List Instance.List Score [Placement]
121
             deriving (Show, Read)
122

    
123
data CStats = CStats { csFmem :: Int    -- ^ Cluster free mem
124
                     , csFdsk :: Int    -- ^ Cluster free disk
125
                     , csAmem :: Int    -- ^ Cluster allocatable mem
126
                     , csAdsk :: Int    -- ^ Cluster allocatable disk
127
                     , csAcpu :: Int    -- ^ Cluster allocatable cpus
128
                     , csMmem :: Int    -- ^ Max node allocatable mem
129
                     , csMdsk :: Int    -- ^ Max node allocatable disk
130
                     , csMcpu :: Int    -- ^ Max node allocatable cpu
131
                     , csImem :: Int    -- ^ Instance used mem
132
                     , csIdsk :: Int    -- ^ Instance used disk
133
                     , csIcpu :: Int    -- ^ Instance used cpu
134
                     , csTmem :: Double -- ^ Cluster total mem
135
                     , csTdsk :: Double -- ^ Cluster total disk
136
                     , csTcpu :: Double -- ^ Cluster total cpus
137
                     , csVcpu :: Int    -- ^ Cluster virtual cpus (if
138
                                        -- node pCpu has been set,
139
                                        -- otherwise -1)
140
                     , csXmem :: Int    -- ^ Unnacounted for mem
141
                     , csNmem :: Int    -- ^ Node own memory
142
                     , csScore :: Score -- ^ The cluster score
143
                     , csNinst :: Int   -- ^ The total number of instances
144
                     }
145
            deriving (Show, Read)
146

    
147
-- | Currently used, possibly to allocate, unallocable
148
type AllocStats = (RSpec, RSpec, RSpec)
149

    
150
-- * Utility functions
151

    
152
-- | Verifies the N+1 status and return the affected nodes.
153
verifyN1 :: [Node.Node] -> [Node.Node]
154
verifyN1 = filter Node.failN1
155

    
156
{-| Computes the pair of bad nodes and instances.
157

    
158
The bad node list is computed via a simple 'verifyN1' check, and the
159
bad instance list is the list of primary and secondary instances of
160
those nodes.
161

    
162
-}
163
computeBadItems :: Node.List -> Instance.List ->
164
                   ([Node.Node], [Instance.Instance])
165
computeBadItems nl il =
166
  let bad_nodes = verifyN1 $ getOnline nl
167
      bad_instances = map (`Container.find` il) .
168
                      sort . nub $
169
                      concatMap (\ n -> Node.sList n ++ Node.pList n) bad_nodes
170
  in
171
    (bad_nodes, bad_instances)
172

    
173
-- | Zero-initializer for the CStats type
174
emptyCStats :: CStats
175
emptyCStats = CStats 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
176

    
177
-- | Update stats with data from a new node
178
updateCStats :: CStats -> Node.Node -> CStats
179
updateCStats cs node =
180
    let CStats { csFmem = x_fmem, csFdsk = x_fdsk,
181
                 csAmem = x_amem, csAcpu = x_acpu, csAdsk = x_adsk,
182
                 csMmem = x_mmem, csMdsk = x_mdsk, csMcpu = x_mcpu,
183
                 csImem = x_imem, csIdsk = x_idsk, csIcpu = x_icpu,
184
                 csTmem = x_tmem, csTdsk = x_tdsk, csTcpu = x_tcpu,
185
                 csVcpu = x_vcpu,
186
                 csXmem = x_xmem, csNmem = x_nmem, csNinst = x_ninst
187
               }
188
            = cs
189
        inc_amem = Node.fMem node - Node.rMem node
190
        inc_amem' = if inc_amem > 0 then inc_amem else 0
191
        inc_adsk = Node.availDisk node
192
        inc_imem = truncate (Node.tMem node) - Node.nMem node
193
                   - Node.xMem node - Node.fMem node
194
        inc_icpu = Node.uCpu node
195
        inc_idsk = truncate (Node.tDsk node) - Node.fDsk node
196
        inc_vcpu = Node.hiCpu node
197
        inc_acpu = Node.availCpu node
198

    
199
    in cs { csFmem = x_fmem + Node.fMem node
200
          , csFdsk = x_fdsk + Node.fDsk node
201
          , csAmem = x_amem + inc_amem'
202
          , csAdsk = x_adsk + inc_adsk
203
          , csAcpu = x_acpu + inc_acpu
204
          , csMmem = max x_mmem inc_amem'
205
          , csMdsk = max x_mdsk inc_adsk
206
          , csMcpu = max x_mcpu inc_acpu
207
          , csImem = x_imem + inc_imem
208
          , csIdsk = x_idsk + inc_idsk
209
          , csIcpu = x_icpu + inc_icpu
210
          , csTmem = x_tmem + Node.tMem node
211
          , csTdsk = x_tdsk + Node.tDsk node
212
          , csTcpu = x_tcpu + Node.tCpu node
213
          , csVcpu = x_vcpu + inc_vcpu
214
          , csXmem = x_xmem + Node.xMem node
215
          , csNmem = x_nmem + Node.nMem node
216
          , csNinst = x_ninst + length (Node.pList node)
217
          }
218

    
219
-- | Compute the total free disk and memory in the cluster.
220
totalResources :: Node.List -> CStats
221
totalResources nl =
222
    let cs = foldl' updateCStats emptyCStats . Container.elems $ nl
223
    in cs { csScore = compCV nl }
224

    
225
-- | Compute the delta between two cluster state.
226
--
227
-- This is used when doing allocations, to understand better the
228
-- available cluster resources. The return value is a triple of the
229
-- current used values, the delta that was still allocated, and what
230
-- was left unallocated.
231
computeAllocationDelta :: CStats -> CStats -> AllocStats
232
computeAllocationDelta cini cfin =
233
    let CStats {csImem = i_imem, csIdsk = i_idsk, csIcpu = i_icpu} = cini
234
        CStats {csImem = f_imem, csIdsk = f_idsk, csIcpu = f_icpu,
235
                csTmem = t_mem, csTdsk = t_dsk, csVcpu = v_cpu } = cfin
236
        rini = RSpec i_icpu i_imem i_idsk
237
        rfin = RSpec (f_icpu - i_icpu) (f_imem - i_imem) (f_idsk - i_idsk)
238
        un_cpu = v_cpu - f_icpu
239
        runa = RSpec un_cpu (truncate t_mem - f_imem) (truncate t_dsk - f_idsk)
240
    in (rini, rfin, runa)
241

    
242
-- | The names and weights of the individual elements in the CV list
243
detailedCVInfo :: [(Double, String)]
244
detailedCVInfo = [ (1,  "free_mem_cv")
245
                 , (1,  "free_disk_cv")
246
                 , (1,  "n1_cnt")
247
                 , (1,  "reserved_mem_cv")
248
                 , (4,  "offline_all_cnt")
249
                 , (16, "offline_pri_cnt")
250
                 , (1,  "vcpu_ratio_cv")
251
                 , (1,  "cpu_load_cv")
252
                 , (1,  "mem_load_cv")
253
                 , (1,  "disk_load_cv")
254
                 , (1,  "net_load_cv")
255
                 , (2,  "pri_tags_score")
256
                 ]
257

    
258
detailedCVWeights :: [Double]
259
detailedCVWeights = map fst detailedCVInfo
260

    
261
-- | Compute the mem and disk covariance.
262
compDetailedCV :: Node.List -> [Double]
263
compDetailedCV nl =
264
    let
265
        all_nodes = Container.elems nl
266
        (offline, nodes) = partition Node.offline all_nodes
267
        mem_l = map Node.pMem nodes
268
        dsk_l = map Node.pDsk nodes
269
        -- metric: memory covariance
270
        mem_cv = stdDev mem_l
271
        -- metric: disk covariance
272
        dsk_cv = stdDev dsk_l
273
        -- metric: count of instances living on N1 failing nodes
274
        n1_score = fromIntegral . sum . map (\n -> length (Node.sList n) +
275
                                                   length (Node.pList n)) .
276
                   filter Node.failN1 $ nodes :: Double
277
        res_l = map Node.pRem nodes
278
        -- metric: reserved memory covariance
279
        res_cv = stdDev res_l
280
        -- offline instances metrics
281
        offline_ipri = sum . map (length . Node.pList) $ offline
282
        offline_isec = sum . map (length . Node.sList) $ offline
283
        -- metric: count of instances on offline nodes
284
        off_score = fromIntegral (offline_ipri + offline_isec)::Double
285
        -- metric: count of primary instances on offline nodes (this
286
        -- helps with evacuation/failover of primary instances on
287
        -- 2-node clusters with one node offline)
288
        off_pri_score = fromIntegral offline_ipri::Double
289
        cpu_l = map Node.pCpu nodes
290
        -- metric: covariance of vcpu/pcpu ratio
291
        cpu_cv = stdDev cpu_l
292
        -- metrics: covariance of cpu, memory, disk and network load
293
        (c_load, m_load, d_load, n_load) = unzip4 $
294
            map (\n ->
295
                     let DynUtil c1 m1 d1 n1 = Node.utilLoad n
296
                         DynUtil c2 m2 d2 n2 = Node.utilPool n
297
                     in (c1/c2, m1/m2, d1/d2, n1/n2)
298
                ) nodes
299
        -- metric: conflicting instance count
300
        pri_tags_inst = sum $ map Node.conflictingPrimaries nodes
301
        pri_tags_score = fromIntegral pri_tags_inst::Double
302
    in [ mem_cv, dsk_cv, n1_score, res_cv, off_score, off_pri_score, cpu_cv
303
       , stdDev c_load, stdDev m_load , stdDev d_load, stdDev n_load
304
       , pri_tags_score ]
305

    
306
-- | Compute the /total/ variance.
307
compCV :: Node.List -> Double
308
compCV = sum . zipWith (*) detailedCVWeights . compDetailedCV
309

    
310
-- | Compute online nodes from a Node.List
311
getOnline :: Node.List -> [Node.Node]
312
getOnline = filter (not . Node.offline) . Container.elems
313

    
314
-- * hbal functions
315

    
316
-- | Compute best table. Note that the ordering of the arguments is important.
317
compareTables :: Table -> Table -> Table
318
compareTables a@(Table _ _ a_cv _) b@(Table _ _ b_cv _ ) =
319
    if a_cv > b_cv then b else a
320

    
321
-- | Applies an instance move to a given node list and instance.
322
applyMove :: Node.List -> Instance.Instance
323
          -> IMove -> OpResult (Node.List, Instance.Instance, Ndx, Ndx)
324
-- Failover (f)
325
applyMove nl inst Failover =
326
    let old_pdx = Instance.pNode inst
327
        old_sdx = Instance.sNode inst
328
        old_p = Container.find old_pdx nl
329
        old_s = Container.find old_sdx nl
330
        int_p = Node.removePri old_p inst
331
        int_s = Node.removeSec old_s inst
332
        force_p = Node.offline old_p
333
        new_nl = do -- Maybe monad
334
          new_p <- Node.addPriEx force_p int_s inst
335
          new_s <- Node.addSec int_p inst old_sdx
336
          let new_inst = Instance.setBoth inst old_sdx old_pdx
337
          return (Container.addTwo old_pdx new_s old_sdx new_p nl,
338
                  new_inst, old_sdx, old_pdx)
339
    in new_nl
340

    
341
-- Replace the primary (f:, r:np, f)
342
applyMove nl inst (ReplacePrimary new_pdx) =
343
    let old_pdx = Instance.pNode inst
344
        old_sdx = Instance.sNode inst
345
        old_p = Container.find old_pdx nl
346
        old_s = Container.find old_sdx nl
347
        tgt_n = Container.find new_pdx nl
348
        int_p = Node.removePri old_p inst
349
        int_s = Node.removeSec old_s inst
350
        force_p = Node.offline old_p
351
        new_nl = do -- Maybe monad
352
          -- check that the current secondary can host the instance
353
          -- during the migration
354
          tmp_s <- Node.addPriEx force_p int_s inst
355
          let tmp_s' = Node.removePri tmp_s inst
356
          new_p <- Node.addPriEx force_p tgt_n inst
357
          new_s <- Node.addSecEx force_p tmp_s' inst new_pdx
358
          let new_inst = Instance.setPri inst new_pdx
359
          return (Container.add new_pdx new_p $
360
                  Container.addTwo old_pdx int_p old_sdx new_s nl,
361
                  new_inst, new_pdx, old_sdx)
362
    in new_nl
363

    
364
-- Replace the secondary (r:ns)
365
applyMove nl inst (ReplaceSecondary new_sdx) =
366
    let old_pdx = Instance.pNode inst
367
        old_sdx = Instance.sNode inst
368
        old_s = Container.find old_sdx nl
369
        tgt_n = Container.find new_sdx nl
370
        int_s = Node.removeSec old_s inst
371
        force_s = Node.offline old_s
372
        new_inst = Instance.setSec inst new_sdx
373
        new_nl = Node.addSecEx force_s tgt_n inst old_pdx >>=
374
                 \new_s -> return (Container.addTwo new_sdx
375
                                   new_s old_sdx int_s nl,
376
                                   new_inst, old_pdx, new_sdx)
377
    in new_nl
378

    
379
-- Replace the secondary and failover (r:np, f)
380
applyMove nl inst (ReplaceAndFailover new_pdx) =
381
    let old_pdx = Instance.pNode inst
382
        old_sdx = Instance.sNode inst
383
        old_p = Container.find old_pdx nl
384
        old_s = Container.find old_sdx nl
385
        tgt_n = Container.find new_pdx nl
386
        int_p = Node.removePri old_p inst
387
        int_s = Node.removeSec old_s inst
388
        force_s = Node.offline old_s
389
        new_nl = do -- Maybe monad
390
          new_p <- Node.addPri tgt_n inst
391
          new_s <- Node.addSecEx force_s int_p inst new_pdx
392
          let new_inst = Instance.setBoth inst new_pdx old_pdx
393
          return (Container.add new_pdx new_p $
394
                  Container.addTwo old_pdx new_s old_sdx int_s nl,
395
                  new_inst, new_pdx, old_pdx)
396
    in new_nl
397

    
398
-- Failver and replace the secondary (f, r:ns)
399
applyMove nl inst (FailoverAndReplace new_sdx) =
400
    let old_pdx = Instance.pNode inst
401
        old_sdx = Instance.sNode inst
402
        old_p = Container.find old_pdx nl
403
        old_s = Container.find old_sdx nl
404
        tgt_n = Container.find new_sdx nl
405
        int_p = Node.removePri old_p inst
406
        int_s = Node.removeSec old_s inst
407
        force_p = Node.offline old_p
408
        new_nl = do -- Maybe monad
409
          new_p <- Node.addPriEx force_p int_s inst
410
          new_s <- Node.addSecEx force_p tgt_n inst old_sdx
411
          let new_inst = Instance.setBoth inst old_sdx new_sdx
412
          return (Container.add new_sdx new_s $
413
                  Container.addTwo old_sdx new_p old_pdx int_p nl,
414
                  new_inst, old_sdx, new_sdx)
415
    in new_nl
416

    
417
-- | Tries to allocate an instance on one given node.
418
allocateOnSingle :: Node.List -> Instance.Instance -> Ndx
419
                 -> OpResult Node.AllocElement
420
allocateOnSingle nl inst new_pdx =
421
    let p = Container.find new_pdx nl
422
        new_inst = Instance.setBoth inst new_pdx Node.noSecondary
423
    in  Node.addPri p inst >>= \new_p -> do
424
      let new_nl = Container.add new_pdx new_p nl
425
          new_score = compCV nl
426
      return (new_nl, new_inst, [new_p], new_score)
427

    
428
-- | Tries to allocate an instance on a given pair of nodes.
429
allocateOnPair :: Node.List -> Instance.Instance -> Ndx -> Ndx
430
               -> OpResult Node.AllocElement
431
allocateOnPair nl inst new_pdx new_sdx =
432
    let tgt_p = Container.find new_pdx nl
433
        tgt_s = Container.find new_sdx nl
434
    in do
435
      new_p <- Node.addPri tgt_p inst
436
      new_s <- Node.addSec tgt_s inst new_pdx
437
      let new_inst = Instance.setBoth inst new_pdx new_sdx
438
          new_nl = Container.addTwo new_pdx new_p new_sdx new_s nl
439
      return (new_nl, new_inst, [new_p, new_s], compCV new_nl)
440

    
441
-- | Tries to perform an instance move and returns the best table
442
-- between the original one and the new one.
443
checkSingleStep :: Table -- ^ The original table
444
                -> Instance.Instance -- ^ The instance to move
445
                -> Table -- ^ The current best table
446
                -> IMove -- ^ The move to apply
447
                -> Table -- ^ The final best table
448
checkSingleStep ini_tbl target cur_tbl move =
449
    let
450
        Table ini_nl ini_il _ ini_plc = ini_tbl
451
        tmp_resu = applyMove ini_nl target move
452
    in
453
      case tmp_resu of
454
        OpFail _ -> cur_tbl
455
        OpGood (upd_nl, new_inst, pri_idx, sec_idx) ->
456
            let tgt_idx = Instance.idx target
457
                upd_cvar = compCV upd_nl
458
                upd_il = Container.add tgt_idx new_inst ini_il
459
                upd_plc = (tgt_idx, pri_idx, sec_idx, move, upd_cvar):ini_plc
460
                upd_tbl = Table upd_nl upd_il upd_cvar upd_plc
461
            in
462
              compareTables cur_tbl upd_tbl
463

    
464
-- | Given the status of the current secondary as a valid new node and
465
-- the current candidate target node, generate the possible moves for
466
-- a instance.
467
possibleMoves :: Bool      -- ^ Whether the secondary node is a valid new node
468
              -> Ndx       -- ^ Target node candidate
469
              -> [IMove]   -- ^ List of valid result moves
470
possibleMoves True tdx =
471
    [ReplaceSecondary tdx,
472
     ReplaceAndFailover tdx,
473
     ReplacePrimary tdx,
474
     FailoverAndReplace tdx]
475

    
476
possibleMoves False tdx =
477
    [ReplaceSecondary tdx,
478
     ReplaceAndFailover tdx]
479

    
480
-- | Compute the best move for a given instance.
481
checkInstanceMove :: [Ndx]             -- ^ Allowed target node indices
482
                  -> Bool              -- ^ Whether disk moves are allowed
483
                  -> Table             -- ^ Original table
484
                  -> Instance.Instance -- ^ Instance to move
485
                  -> Table             -- ^ Best new table for this instance
486
checkInstanceMove nodes_idx disk_moves ini_tbl target =
487
    let
488
        opdx = Instance.pNode target
489
        osdx = Instance.sNode target
490
        nodes = filter (\idx -> idx /= opdx && idx /= osdx) nodes_idx
491
        use_secondary = elem osdx nodes_idx
492
        aft_failover = if use_secondary -- if allowed to failover
493
                       then checkSingleStep ini_tbl target ini_tbl Failover
494
                       else ini_tbl
495
        all_moves = if disk_moves
496
                    then concatMap (possibleMoves use_secondary) nodes
497
                    else []
498
    in
499
      -- iterate over the possible nodes for this instance
500
      foldl' (checkSingleStep ini_tbl target) aft_failover all_moves
501

    
502
-- | Compute the best next move.
503
checkMove :: [Ndx]               -- ^ Allowed target node indices
504
          -> Bool                -- ^ Whether disk moves are allowed
505
          -> Table               -- ^ The current solution
506
          -> [Instance.Instance] -- ^ List of instances still to move
507
          -> Table               -- ^ The new solution
508
checkMove nodes_idx disk_moves ini_tbl victims =
509
    let Table _ _ _ ini_plc = ini_tbl
510
        -- we're using rwhnf from the Control.Parallel.Strategies
511
        -- package; we don't need to use rnf as that would force too
512
        -- much evaluation in single-threaded cases, and in
513
        -- multi-threaded case the weak head normal form is enough to
514
        -- spark the evaluation
515
        tables = parMap rwhnf (checkInstanceMove nodes_idx disk_moves ini_tbl)
516
                 victims
517
        -- iterate over all instances, computing the best move
518
        best_tbl = foldl' compareTables ini_tbl tables
519
        Table _ _ _ best_plc = best_tbl
520
    in if length best_plc == length ini_plc
521
       then ini_tbl -- no advancement
522
       else best_tbl
523

    
524
-- | Check if we are allowed to go deeper in the balancing
525
doNextBalance :: Table     -- ^ The starting table
526
              -> Int       -- ^ Remaining length
527
              -> Score     -- ^ Score at which to stop
528
              -> Bool      -- ^ The resulting table and commands
529
doNextBalance ini_tbl max_rounds min_score =
530
    let Table _ _ ini_cv ini_plc = ini_tbl
531
        ini_plc_len = length ini_plc
532
    in (max_rounds < 0 || ini_plc_len < max_rounds) && ini_cv > min_score
533

    
534
-- | Run a balance move
535
tryBalance :: Table       -- ^ The starting table
536
           -> Bool        -- ^ Allow disk moves
537
           -> Bool        -- ^ Only evacuate moves
538
           -> Score       -- ^ Min gain threshold
539
           -> Score       -- ^ Min gain
540
           -> Maybe Table -- ^ The resulting table and commands
541
tryBalance ini_tbl disk_moves evac_mode mg_limit min_gain =
542
    let Table ini_nl ini_il ini_cv _ = ini_tbl
543
        all_inst = Container.elems ini_il
544
        all_inst' = if evac_mode
545
                    then let bad_nodes = map Node.idx . filter Node.offline $
546
                                         Container.elems ini_nl
547
                         in filter (\e -> Instance.sNode e `elem` bad_nodes ||
548
                                          Instance.pNode e `elem` bad_nodes)
549
                            all_inst
550
                    else all_inst
551
        reloc_inst = filter Instance.movable all_inst'
552
        node_idx = map Node.idx . filter (not . Node.offline) $
553
                   Container.elems ini_nl
554
        fin_tbl = checkMove node_idx disk_moves ini_tbl reloc_inst
555
        (Table _ _ fin_cv _) = fin_tbl
556
    in
557
      if fin_cv < ini_cv && (ini_cv > mg_limit || ini_cv - fin_cv >= min_gain)
558
      then Just fin_tbl -- this round made success, return the new table
559
      else Nothing
560

    
561
-- * Allocation functions
562

    
563
-- | Build failure stats out of a list of failures
564
collapseFailures :: [FailMode] -> FailStats
565
collapseFailures flst =
566
    map (\k -> (k, length $ filter (k ==) flst)) [minBound..maxBound]
567

    
568
-- | Update current Allocation solution and failure stats with new
569
-- elements
570
concatAllocs :: AllocSolution -> OpResult Node.AllocElement -> AllocSolution
571
concatAllocs as (OpFail reason) = as { asFailures = reason : asFailures as }
572

    
573
concatAllocs as (OpGood ns@(_, _, _, nscore)) =
574
    let -- Choose the old or new solution, based on the cluster score
575
        cntok = asAllocs as
576
        osols = asSolutions as
577
        nsols = case osols of
578
                  [] -> [ns]
579
                  (_, _, _, oscore):[] ->
580
                      if oscore < nscore
581
                      then osols
582
                      else [ns]
583
                  -- FIXME: here we simply concat to lists with more
584
                  -- than one element; we should instead abort, since
585
                  -- this is not a valid usage of this function
586
                  xs -> ns:xs
587
        nsuc = cntok + 1
588
    -- Note: we force evaluation of nsols here in order to keep the
589
    -- memory profile low - we know that we will need nsols for sure
590
    -- in the next cycle, so we force evaluation of nsols, since the
591
    -- foldl' in the caller will only evaluate the tuple, but not the
592
    -- elements of the tuple
593
    in nsols `seq` nsuc `seq` as { asAllocs = nsuc, asSolutions = nsols }
594

    
595
-- | Sums two allocation solutions (e.g. for two separate node groups).
596
sumAllocs :: AllocSolution -> AllocSolution -> AllocSolution
597
sumAllocs (AllocSolution af aa as al) (AllocSolution bf ba bs bl) =
598
    AllocSolution (af ++ bf) (aa + ba) (as ++ bs) (al ++ bl)
599

    
600
-- | Given a solution, generates a reasonable description for it
601
describeSolution :: AllocSolution -> String
602
describeSolution as =
603
  let fcnt = asFailures as
604
      sols = asSolutions as
605
      freasons =
606
        intercalate ", " . map (\(a, b) -> printf "%s: %d" (show a) b) .
607
        filter ((> 0) . snd) . collapseFailures $ fcnt
608
  in if null sols
609
     then "No valid allocation solutions, failure reasons: " ++
610
          (if null fcnt
611
           then "unknown reasons"
612
           else freasons)
613
     else let (_, _, nodes, cv) = head sols
614
          in printf ("score: %.8f, successes %d, failures %d (%s)" ++
615
                     " for node(s) %s") cv (asAllocs as) (length fcnt) freasons
616
             (intercalate "/" . map Node.name $ nodes)
617

    
618
-- | Annotates a solution with the appropriate string
619
annotateSolution :: AllocSolution -> AllocSolution
620
annotateSolution as = as { asLog = describeSolution as : asLog as }
621

    
622
-- | Generate the valid node allocation singles or pairs for a new instance.
623
genAllocNodes :: Group.List        -- ^ Group list
624
              -> Node.List         -- ^ The node map
625
              -> Int               -- ^ The number of nodes required
626
              -> Bool              -- ^ Whether to drop or not
627
                                   -- unallocable nodes
628
              -> Result AllocNodes -- ^ The (monadic) result
629
genAllocNodes gl nl count drop_unalloc =
630
    let filter_fn = if drop_unalloc
631
                    then filter ((/=) AllocUnallocable . Group.allocPolicy .
632
                                     flip Container.find gl . Node.group)
633
                    else id
634
        all_nodes = filter_fn $ getOnline nl
635
        all_pairs = liftM2 (,) all_nodes all_nodes
636
        ok_pairs = filter (\(x, y) -> Node.idx x /= Node.idx y &&
637
                                      Node.group x == Node.group y) all_pairs
638
    in case count of
639
         1 -> Ok (Left (map Node.idx all_nodes))
640
         2 -> Ok (Right (map (\(p, s) -> (Node.idx p, Node.idx s)) ok_pairs))
641
         _ -> Bad "Unsupported number of nodes, only one or two  supported"
642

    
643
-- | Try to allocate an instance on the cluster.
644
tryAlloc :: (Monad m) =>
645
            Node.List         -- ^ The node list
646
         -> Instance.List     -- ^ The instance list
647
         -> Instance.Instance -- ^ The instance to allocate
648
         -> AllocNodes        -- ^ The allocation targets
649
         -> m AllocSolution   -- ^ Possible solution list
650
tryAlloc nl _ inst (Right ok_pairs) =
651
    let sols = foldl' (\cstate (p, s) ->
652
                           concatAllocs cstate $ allocateOnPair nl inst p s
653
                      ) emptySolution ok_pairs
654

    
655
    in if null ok_pairs -- means we have just one node
656
       then fail "Not enough online nodes"
657
       else return $ annotateSolution sols
658

    
659
tryAlloc nl _ inst (Left all_nodes) =
660
    let sols = foldl' (\cstate ->
661
                           concatAllocs cstate . allocateOnSingle nl inst
662
                      ) emptySolution all_nodes
663
    in if null all_nodes
664
       then fail "No online nodes"
665
       else return $ annotateSolution sols
666

    
667
-- | Given a group/result, describe it as a nice (list of) messages
668
solutionDescription :: Group.List -> (Gdx, Result AllocSolution) -> [String]
669
solutionDescription gl (groupId, result) =
670
  case result of
671
    Ok solution -> map (printf "Group %s (%s): %s" gname pol) (asLog solution)
672
    Bad message -> [printf "Group %s: error %s" gname message]
673
  where grp = Container.find groupId gl
674
        gname = Group.name grp
675
        pol = apolToString (Group.allocPolicy grp)
676

    
677
-- | From a list of possibly bad and possibly empty solutions, filter
678
-- only the groups with a valid result
679
filterMGResults :: Group.List
680
                -> [(Gdx, Result AllocSolution)]
681
                -> [(Gdx, AllocSolution)]
682
filterMGResults gl=
683
  filter ((/= AllocUnallocable) . Group.allocPolicy .
684
             flip Container.find gl . fst) .
685
  filter (not . null . asSolutions . snd) .
686
  map (\(y, Ok x) -> (y, x)) .
687
  filter (isOk . snd)
688

    
689
-- | Sort multigroup results based on policy and score
690
sortMGResults :: Group.List
691
             -> [(Gdx, AllocSolution)]
692
             -> [(Gdx, AllocSolution)]
693
sortMGResults gl sols =
694
    let extractScore (_, _, _, x) = x
695
        solScore (gdx, sol) = (Group.allocPolicy (Container.find gdx gl),
696
                               (extractScore . head . asSolutions) sol)
697
    in sortBy (comparing solScore) sols
698

    
699
-- | Try to allocate an instance on a multi-group cluster.
700
tryMGAlloc :: Group.List           -- ^ The group list
701
           -> Node.List            -- ^ The node list
702
           -> Instance.List        -- ^ The instance list
703
           -> Instance.Instance    -- ^ The instance to allocate
704
           -> Int                  -- ^ Required number of nodes
705
           -> Result AllocSolution -- ^ Possible solution list
706
tryMGAlloc mggl mgnl mgil inst cnt =
707
  let groups = splitCluster mgnl mgil
708
      sols = map (\(gid, (nl, il)) ->
709
                   (gid, genAllocNodes mggl nl cnt False >>=
710
                       tryAlloc nl il inst))
711
             groups::[(Gdx, Result AllocSolution)]
712
      all_msgs = concatMap (solutionDescription mggl) sols
713
      goodSols = filterMGResults mggl sols
714
      sortedSols = sortMGResults mggl goodSols
715
  in if null sortedSols
716
     then Bad $ intercalate ", " all_msgs
717
     else let (final_group, final_sol) = head sortedSols
718
              final_name = Group.name $ Container.find final_group mggl
719
              selmsg = "Selected group: " ++  final_name
720
          in Ok $ final_sol { asLog = selmsg:all_msgs }
721

    
722
-- | Try to relocate an instance on the cluster.
723
tryReloc :: (Monad m) =>
724
            Node.List       -- ^ The node list
725
         -> Instance.List   -- ^ The instance list
726
         -> Idx             -- ^ The index of the instance to move
727
         -> Int             -- ^ The number of nodes required
728
         -> [Ndx]           -- ^ Nodes which should not be used
729
         -> m AllocSolution -- ^ Solution list
730
tryReloc nl il xid 1 ex_idx =
731
    let all_nodes = getOnline nl
732
        inst = Container.find xid il
733
        ex_idx' = Instance.pNode inst:ex_idx
734
        valid_nodes = filter (not . flip elem ex_idx' . Node.idx) all_nodes
735
        valid_idxes = map Node.idx valid_nodes
736
        sols1 = foldl' (\cstate x ->
737
                            let em = do
738
                                  (mnl, i, _, _) <-
739
                                      applyMove nl inst (ReplaceSecondary x)
740
                                  return (mnl, i, [Container.find x mnl],
741
                                          compCV mnl)
742
                            in concatAllocs cstate em
743
                       ) emptySolution valid_idxes
744
    in return sols1
745

    
746
tryReloc _ _ _ reqn _  = fail $ "Unsupported number of relocation \
747
                                \destinations required (" ++ show reqn ++
748
                                                  "), only one supported"
749

    
750
tryMGReloc :: (Monad m) =>
751
              Group.List      -- ^ The group list
752
           -> Node.List       -- ^ The node list
753
           -> Instance.List   -- ^ The instance list
754
           -> Idx             -- ^ The index of the instance to move
755
           -> Int             -- ^ The number of nodes required
756
           -> [Ndx]           -- ^ Nodes which should not be used
757
           -> m AllocSolution -- ^ Solution list
758
tryMGReloc _ mgnl mgil xid ncount ex_ndx = do
759
  let groups = splitCluster mgnl mgil
760
      -- TODO: we only relocate inside the group for now
761
      inst = Container.find xid mgil
762
  (nl, il) <- case lookup (instancePriGroup mgnl inst) groups of
763
                Nothing -> fail $ "Cannot find group for instance " ++
764
                           Instance.name inst
765
                Just v -> return v
766
  tryReloc nl il xid ncount ex_ndx
767

    
768
-- | Change an instance's secondary node
769
evacInstance :: (Monad m) =>
770
                [Ndx]                      -- ^ Excluded nodes
771
             -> Instance.List              -- ^ The current instance list
772
             -> (Node.List, AllocSolution) -- ^ The current state
773
             -> Idx                        -- ^ The instance to evacuate
774
             -> m (Node.List, AllocSolution)
775
evacInstance ex_ndx il (nl, old_as) idx = do
776
  -- FIXME: hardcoded one node here
777

    
778
  -- Longer explanation: evacuation is currently hardcoded to DRBD
779
  -- instances (which have one secondary); hence, even if the
780
  -- IAllocator protocol can request N nodes for an instance, and all
781
  -- the message parsing/loading pass this, this implementation only
782
  -- supports one; this situation needs to be revisited if we ever
783
  -- support more than one secondary, or if we change the storage
784
  -- model
785
  new_as <- tryReloc nl il idx 1 ex_ndx
786
  case asSolutions new_as of
787
    -- an individual relocation succeeded, we kind of compose the data
788
    -- from the two solutions
789
    csol@(nl', _, _, _):_ ->
790
        return (nl', new_as { asSolutions = csol:asSolutions old_as })
791
    -- this relocation failed, so we fail the entire evac
792
    _ -> fail $ "Can't evacuate instance " ++
793
         Instance.name (Container.find idx il) ++
794
             ": " ++ describeSolution new_as
795

    
796
-- | Try to evacuate a list of nodes.
797
tryEvac :: (Monad m) =>
798
            Node.List       -- ^ The node list
799
         -> Instance.List   -- ^ The instance list
800
         -> [Idx]           -- ^ Instances to be evacuated
801
         -> [Ndx]           -- ^ Restricted nodes (the ones being evacuated)
802
         -> m AllocSolution -- ^ Solution list
803
tryEvac nl il idxs ex_ndx = do
804
  (_, sol) <- foldM (evacInstance ex_ndx il) (nl, emptySolution) idxs
805
  return sol
806

    
807
-- | Multi-group evacuation of a list of nodes.
808
tryMGEvac :: (Monad m) =>
809
             Group.List -- ^ The group list
810
          -> Node.List       -- ^ The node list
811
          -> Instance.List   -- ^ The instance list
812
          -> [Ndx]           -- ^ Nodes to be evacuated
813
          -> m AllocSolution -- ^ Solution list
814
tryMGEvac _ nl il ex_ndx =
815
    let ex_nodes = map (`Container.find` nl) ex_ndx
816
        all_insts = nub . concatMap Node.sList $ ex_nodes
817
        gni = splitCluster nl il
818
        -- we run the instance index list through a couple of maps to
819
        -- get finally to a structure of the type [(group index,
820
        -- [instance indices])]
821
        all_insts' = map (\idx ->
822
                              (instancePriGroup nl (Container.find idx il),
823
                               idx)) all_insts
824
        all_insts'' = groupBy ((==) `on` fst) all_insts'
825
        all_insts3 = map (\xs -> let (gdxs, idxs) = unzip xs
826
                                 in (head gdxs, idxs)) all_insts''
827
    in do
828
      -- that done, we now add the per-group nl/il to the tuple
829
      all_insts4 <-
830
          mapM (\(gdx, idxs) ->
831
                case lookup gdx gni of
832
                    Nothing -> fail $ "Can't find group index " ++ show gdx
833
                    Just (gnl, gil) -> return (gdx, gnl, gil, idxs))
834
          all_insts3
835
      results <- mapM (\(_, gnl, gil, idxs) -> tryEvac gnl gil idxs ex_ndx)
836
                 all_insts4
837
      let sol = foldl' sumAllocs emptySolution results
838
      return $ annotateSolution sol
839

    
840
-- | Recursively place instances on the cluster until we're out of space
841
iterateAlloc :: Node.List
842
             -> Instance.List
843
             -> Instance.Instance
844
             -> AllocNodes
845
             -> [Instance.Instance]
846
             -> [CStats]
847
             -> Result AllocResult
848
iterateAlloc nl il newinst allocnodes ixes cstats =
849
      let depth = length ixes
850
          newname = printf "new-%d" depth::String
851
          newidx = length (Container.elems il) + depth
852
          newi2 = Instance.setIdx (Instance.setName newinst newname) newidx
853
      in case tryAlloc nl il newi2 allocnodes of
854
           Bad s -> Bad s
855
           Ok (AllocSolution { asFailures = errs, asSolutions = sols3 }) ->
856
               case sols3 of
857
                 [] -> Ok (collapseFailures errs, nl, il, ixes, cstats)
858
                 (xnl, xi, _, _):[] ->
859
                     iterateAlloc xnl (Container.add newidx xi il)
860
                                  newinst allocnodes (xi:ixes)
861
                                  (totalResources xnl:cstats)
862
                 _ -> Bad "Internal error: multiple solutions for single\
863
                          \ allocation"
864

    
865
-- | The core of the tiered allocation mode
866
tieredAlloc :: Node.List
867
            -> Instance.List
868
            -> Instance.Instance
869
            -> AllocNodes
870
            -> [Instance.Instance]
871
            -> [CStats]
872
            -> Result AllocResult
873
tieredAlloc nl il newinst allocnodes ixes cstats =
874
    case iterateAlloc nl il newinst allocnodes ixes cstats of
875
      Bad s -> Bad s
876
      Ok (errs, nl', il', ixes', cstats') ->
877
          case Instance.shrinkByType newinst . fst . last $
878
               sortBy (comparing snd) errs of
879
            Bad _ -> Ok (errs, nl', il', ixes', cstats')
880
            Ok newinst' ->
881
                tieredAlloc nl' il' newinst' allocnodes ixes' cstats'
882

    
883
-- | Compute the tiered spec string description from a list of
884
-- allocated instances.
885
tieredSpecMap :: [Instance.Instance]
886
              -> [String]
887
tieredSpecMap trl_ixes =
888
    let fin_trl_ixes = reverse trl_ixes
889
        ix_byspec = groupBy ((==) `on` Instance.specOf) fin_trl_ixes
890
        spec_map = map (\ixs -> (Instance.specOf $ head ixs, length ixs))
891
                   ix_byspec
892
    in  map (\(spec, cnt) -> printf "%d,%d,%d=%d" (rspecMem spec)
893
                             (rspecDsk spec) (rspecCpu spec) cnt) spec_map
894

    
895
-- * Formatting functions
896

    
897
-- | Given the original and final nodes, computes the relocation description.
898
computeMoves :: Instance.Instance -- ^ The instance to be moved
899
             -> String -- ^ The instance name
900
             -> IMove  -- ^ The move being performed
901
             -> String -- ^ New primary
902
             -> String -- ^ New secondary
903
             -> (String, [String])
904
                -- ^ Tuple of moves and commands list; moves is containing
905
                -- either @/f/@ for failover or @/r:name/@ for replace
906
                -- secondary, while the command list holds gnt-instance
907
                -- commands (without that prefix), e.g \"@failover instance1@\"
908
computeMoves i inam mv c d =
909
    case mv of
910
      Failover -> ("f", [mig])
911
      FailoverAndReplace _ -> (printf "f r:%s" d, [mig, rep d])
912
      ReplaceSecondary _ -> (printf "r:%s" d, [rep d])
913
      ReplaceAndFailover _ -> (printf "r:%s f" c, [rep c, mig])
914
      ReplacePrimary _ -> (printf "f r:%s f" c, [mig, rep c, mig])
915
    where morf = if Instance.running i then "migrate" else "failover"
916
          mig = printf "%s -f %s" morf inam::String
917
          rep n = printf "replace-disks -n %s %s" n inam
918

    
919
-- | Converts a placement to string format.
920
printSolutionLine :: Node.List     -- ^ The node list
921
                  -> Instance.List -- ^ The instance list
922
                  -> Int           -- ^ Maximum node name length
923
                  -> Int           -- ^ Maximum instance name length
924
                  -> Placement     -- ^ The current placement
925
                  -> Int           -- ^ The index of the placement in
926
                                   -- the solution
927
                  -> (String, [String])
928
printSolutionLine nl il nmlen imlen plc pos =
929
    let
930
        pmlen = (2*nmlen + 1)
931
        (i, p, s, mv, c) = plc
932
        inst = Container.find i il
933
        inam = Instance.alias inst
934
        npri = Node.alias $ Container.find p nl
935
        nsec = Node.alias $ Container.find s nl
936
        opri = Node.alias $ Container.find (Instance.pNode inst) nl
937
        osec = Node.alias $ Container.find (Instance.sNode inst) nl
938
        (moves, cmds) =  computeMoves inst inam mv npri nsec
939
        ostr = printf "%s:%s" opri osec::String
940
        nstr = printf "%s:%s" npri nsec::String
941
    in
942
      (printf "  %3d. %-*s %-*s => %-*s %.8f a=%s"
943
       pos imlen inam pmlen ostr
944
       pmlen nstr c moves,
945
       cmds)
946

    
947
-- | Return the instance and involved nodes in an instance move.
948
involvedNodes :: Instance.List -> Placement -> [Ndx]
949
involvedNodes il plc =
950
    let (i, np, ns, _, _) = plc
951
        inst = Container.find i il
952
        op = Instance.pNode inst
953
        os = Instance.sNode inst
954
    in nub [np, ns, op, os]
955

    
956
-- | Inner function for splitJobs, that either appends the next job to
957
-- the current jobset, or starts a new jobset.
958
mergeJobs :: ([JobSet], [Ndx]) -> MoveJob -> ([JobSet], [Ndx])
959
mergeJobs ([], _) n@(ndx, _, _, _) = ([[n]], ndx)
960
mergeJobs (cjs@(j:js), nbuf) n@(ndx, _, _, _)
961
    | null (ndx `intersect` nbuf) = ((n:j):js, ndx ++ nbuf)
962
    | otherwise = ([n]:cjs, ndx)
963

    
964
-- | Break a list of moves into independent groups. Note that this
965
-- will reverse the order of jobs.
966
splitJobs :: [MoveJob] -> [JobSet]
967
splitJobs = fst . foldl mergeJobs ([], [])
968

    
969
-- | Given a list of commands, prefix them with @gnt-instance@ and
970
-- also beautify the display a little.
971
formatJob :: Int -> Int -> (Int, MoveJob) -> [String]
972
formatJob jsn jsl (sn, (_, _, _, cmds)) =
973
    let out =
974
            printf "  echo job %d/%d" jsn sn:
975
            printf "  check":
976
            map ("  gnt-instance " ++) cmds
977
    in if sn == 1
978
       then ["", printf "echo jobset %d, %d jobs" jsn jsl] ++ out
979
       else out
980

    
981
-- | Given a list of commands, prefix them with @gnt-instance@ and
982
-- also beautify the display a little.
983
formatCmds :: [JobSet] -> String
984
formatCmds =
985
    unlines .
986
    concatMap (\(jsn, js) -> concatMap (formatJob jsn (length js))
987
                             (zip [1..] js)) .
988
    zip [1..]
989

    
990
-- | Print the node list.
991
printNodes :: Node.List -> [String] -> String
992
printNodes nl fs =
993
    let fields = case fs of
994
          [] -> Node.defaultFields
995
          "+":rest -> Node.defaultFields ++ rest
996
          _ -> fs
997
        snl = sortBy (comparing Node.idx) (Container.elems nl)
998
        (header, isnum) = unzip $ map Node.showHeader fields
999
    in unlines . map ((:) ' ' .  intercalate " ") $
1000
       formatTable (header:map (Node.list fields) snl) isnum
1001

    
1002
-- | Print the instance list.
1003
printInsts :: Node.List -> Instance.List -> String
1004
printInsts nl il =
1005
    let sil = sortBy (comparing Instance.idx) (Container.elems il)
1006
        helper inst = [ if Instance.running inst then "R" else " "
1007
                      , Instance.name inst
1008
                      , Container.nameOf nl (Instance.pNode inst)
1009
                      , let sdx = Instance.sNode inst
1010
                        in if sdx == Node.noSecondary
1011
                           then  ""
1012
                           else Container.nameOf nl sdx
1013
                      , if Instance.auto_balance inst then "Y" else "N"
1014
                      , printf "%3d" $ Instance.vcpus inst
1015
                      , printf "%5d" $ Instance.mem inst
1016
                      , printf "%5d" $ Instance.dsk inst `div` 1024
1017
                      , printf "%5.3f" lC
1018
                      , printf "%5.3f" lM
1019
                      , printf "%5.3f" lD
1020
                      , printf "%5.3f" lN
1021
                      ]
1022
            where DynUtil lC lM lD lN = Instance.util inst
1023
        header = [ "F", "Name", "Pri_node", "Sec_node", "Auto_bal"
1024
                 , "vcpu", "mem" , "dsk", "lCpu", "lMem", "lDsk", "lNet" ]
1025
        isnum = False:False:False:False:False:repeat True
1026
    in unlines . map ((:) ' ' . intercalate " ") $
1027
       formatTable (header:map helper sil) isnum
1028

    
1029
-- | Shows statistics for a given node list.
1030
printStats :: Node.List -> String
1031
printStats nl =
1032
    let dcvs = compDetailedCV nl
1033
        (weights, names) = unzip detailedCVInfo
1034
        hd = zip3 (weights ++ repeat 1) (names ++ repeat "unknown") dcvs
1035
        formatted = map (\(w, header, val) ->
1036
                             printf "%s=%.8f(x%.2f)" header val w::String) hd
1037
    in intercalate ", " formatted
1038

    
1039
-- | Convert a placement into a list of OpCodes (basically a job).
1040
iMoveToJob :: Node.List -> Instance.List
1041
          -> Idx -> IMove -> [OpCodes.OpCode]
1042
iMoveToJob nl il idx move =
1043
    let inst = Container.find idx il
1044
        iname = Instance.name inst
1045
        lookNode  = Just . Container.nameOf nl
1046
        opF = OpCodes.OpInstanceMigrate iname True False True
1047
        opR n = OpCodes.OpInstanceReplaceDisks iname (lookNode n)
1048
                OpCodes.ReplaceNewSecondary [] Nothing
1049
    in case move of
1050
         Failover -> [ opF ]
1051
         ReplacePrimary np -> [ opF, opR np, opF ]
1052
         ReplaceSecondary ns -> [ opR ns ]
1053
         ReplaceAndFailover np -> [ opR np, opF ]
1054
         FailoverAndReplace ns -> [ opF, opR ns ]
1055

    
1056
-- * Node group functions
1057

    
1058
-- | Computes the group of an instance
1059
instanceGroup :: Node.List -> Instance.Instance -> Result Gdx
1060
instanceGroup nl i =
1061
  let sidx = Instance.sNode i
1062
      pnode = Container.find (Instance.pNode i) nl
1063
      snode = if sidx == Node.noSecondary
1064
              then pnode
1065
              else Container.find sidx nl
1066
      pgroup = Node.group pnode
1067
      sgroup = Node.group snode
1068
  in if pgroup /= sgroup
1069
     then fail ("Instance placed accross two node groups, primary " ++
1070
                show pgroup ++ ", secondary " ++ show sgroup)
1071
     else return pgroup
1072

    
1073
-- | Computes the group of an instance per the primary node
1074
instancePriGroup :: Node.List -> Instance.Instance -> Gdx
1075
instancePriGroup nl i =
1076
  let pnode = Container.find (Instance.pNode i) nl
1077
  in  Node.group pnode
1078

    
1079
-- | Compute the list of badly allocated instances (split across node
1080
-- groups)
1081
findSplitInstances :: Node.List -> Instance.List -> [Instance.Instance]
1082
findSplitInstances nl =
1083
  filter (not . isOk . instanceGroup nl) . Container.elems
1084

    
1085
-- | Splits a cluster into the component node groups
1086
splitCluster :: Node.List -> Instance.List ->
1087
                [(Gdx, (Node.List, Instance.List))]
1088
splitCluster nl il =
1089
  let ngroups = Node.computeGroups (Container.elems nl)
1090
  in map (\(guuid, nodes) ->
1091
           let nidxs = map Node.idx nodes
1092
               nodes' = zip nidxs nodes
1093
               instances = Container.filter ((`elem` nidxs) . Instance.pNode) il
1094
           in (guuid, (Container.fromList nodes', instances))) ngroups