Statistics
| Branch: | Tag: | Revision:

root / Ganeti / HTools / Cluster.hs @ 848b65c9

History | View | Annotate | Download (33.9 kB)

1
{-| Implementation of cluster-wide logic.
2

    
3
This module holds all pure cluster-logic; I\/O related functionality
4
goes into the "Main" module for the individual binaries.
5

    
6
-}
7

    
8
{-
9

    
10
Copyright (C) 2009, 2010 Google Inc.
11

    
12
This program is free software; you can redistribute it and/or modify
13
it under the terms of the GNU General Public License as published by
14
the Free Software Foundation; either version 2 of the License, or
15
(at your option) any later version.
16

    
17
This program is distributed in the hope that it will be useful, but
18
WITHOUT ANY WARRANTY; without even the implied warranty of
19
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
20
General Public License for more details.
21

    
22
You should have received a copy of the GNU General Public License
23
along with this program; if not, write to the Free Software
24
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
25
02110-1301, USA.
26

    
27
-}
28

    
29
module Ganeti.HTools.Cluster
30
    (
31
     -- * Types
32
      AllocSolution
33
    , Table(..)
34
    , CStats(..)
35
    , AllocStats
36
    -- * Generic functions
37
    , totalResources
38
    , computeAllocationDelta
39
    -- * First phase functions
40
    , computeBadItems
41
    -- * Second phase functions
42
    , printSolutionLine
43
    , formatCmds
44
    , involvedNodes
45
    , splitJobs
46
    -- * Display functions
47
    , printNodes
48
    , printInsts
49
    -- * Balacing functions
50
    , checkMove
51
    , doNextBalance
52
    , tryBalance
53
    , compCV
54
    , printStats
55
    , iMoveToJob
56
    -- * IAllocator functions
57
    , tryAlloc
58
    , tryReloc
59
    , tryEvac
60
    , collapseFailures
61
    -- * Allocation functions
62
    , iterateAlloc
63
    , tieredAlloc
64
    ) where
65

    
66
import Data.List
67
import Data.Ord (comparing)
68
import Text.Printf (printf)
69
import Control.Monad
70

    
71
import qualified Ganeti.HTools.Container as Container
72
import qualified Ganeti.HTools.Instance as Instance
73
import qualified Ganeti.HTools.Node as Node
74
import Ganeti.HTools.Types
75
import Ganeti.HTools.Utils
76
import qualified Ganeti.OpCodes as OpCodes
77

    
78
-- * Types
79

    
80
-- | Allocation\/relocation solution.
81
type AllocSolution = ([FailMode], Int, [(Score, Node.AllocElement)])
82

    
83
-- | The complete state for the balancing solution
84
data Table = Table Node.List Instance.List Score [Placement]
85
             deriving (Show)
86

    
87
data CStats = CStats { csFmem :: Int    -- ^ Cluster free mem
88
                     , csFdsk :: Int    -- ^ Cluster free disk
89
                     , csAmem :: Int    -- ^ Cluster allocatable mem
90
                     , csAdsk :: Int    -- ^ Cluster allocatable disk
91
                     , csAcpu :: Int    -- ^ Cluster allocatable cpus
92
                     , csMmem :: Int    -- ^ Max node allocatable mem
93
                     , csMdsk :: Int    -- ^ Max node allocatable disk
94
                     , csMcpu :: Int    -- ^ Max node allocatable cpu
95
                     , csImem :: Int    -- ^ Instance used mem
96
                     , csIdsk :: Int    -- ^ Instance used disk
97
                     , csIcpu :: Int    -- ^ Instance used cpu
98
                     , csTmem :: Double -- ^ Cluster total mem
99
                     , csTdsk :: Double -- ^ Cluster total disk
100
                     , csTcpu :: Double -- ^ Cluster total cpus
101
                     , csVcpu :: Int    -- ^ Cluster virtual cpus (if
102
                                        -- node pCpu has been set,
103
                                        -- otherwise -1)
104
                     , csXmem :: Int    -- ^ Unnacounted for mem
105
                     , csNmem :: Int    -- ^ Node own memory
106
                     , csScore :: Score -- ^ The cluster score
107
                     , csNinst :: Int   -- ^ The total number of instances
108
                     }
109
            deriving (Show)
110

    
111
-- | Currently used, possibly to allocate, unallocable
112
type AllocStats = (RSpec, RSpec, RSpec)
113

    
114
-- * Utility functions
115

    
116
-- | Verifies the N+1 status and return the affected nodes.
117
verifyN1 :: [Node.Node] -> [Node.Node]
118
verifyN1 = filter Node.failN1
119

    
120
{-| Computes the pair of bad nodes and instances.
121

    
122
The bad node list is computed via a simple 'verifyN1' check, and the
123
bad instance list is the list of primary and secondary instances of
124
those nodes.
125

    
126
-}
127
computeBadItems :: Node.List -> Instance.List ->
128
                   ([Node.Node], [Instance.Instance])
129
computeBadItems nl il =
130
  let bad_nodes = verifyN1 $ getOnline nl
131
      bad_instances = map (`Container.find` il) .
132
                      sort . nub $
133
                      concatMap (\ n -> Node.sList n ++ Node.pList n) bad_nodes
134
  in
135
    (bad_nodes, bad_instances)
136

    
137
-- | Zero-initializer for the CStats type
138
emptyCStats :: CStats
139
emptyCStats = CStats 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
140

    
141
-- | Update stats with data from a new node
142
updateCStats :: CStats -> Node.Node -> CStats
143
updateCStats cs node =
144
    let CStats { csFmem = x_fmem, csFdsk = x_fdsk,
145
                 csAmem = x_amem, csAcpu = x_acpu, csAdsk = x_adsk,
146
                 csMmem = x_mmem, csMdsk = x_mdsk, csMcpu = x_mcpu,
147
                 csImem = x_imem, csIdsk = x_idsk, csIcpu = x_icpu,
148
                 csTmem = x_tmem, csTdsk = x_tdsk, csTcpu = x_tcpu,
149
                 csVcpu = x_vcpu,
150
                 csXmem = x_xmem, csNmem = x_nmem, csNinst = x_ninst
151
               }
152
            = cs
153
        inc_amem = Node.fMem node - Node.rMem node
154
        inc_amem' = if inc_amem > 0 then inc_amem else 0
155
        inc_adsk = Node.availDisk node
156
        inc_imem = truncate (Node.tMem node) - Node.nMem node
157
                   - Node.xMem node - Node.fMem node
158
        inc_icpu = Node.uCpu node
159
        inc_idsk = truncate (Node.tDsk node) - Node.fDsk node
160
        inc_vcpu = Node.hiCpu node
161

    
162
    in cs { csFmem = x_fmem + Node.fMem node
163
          , csFdsk = x_fdsk + Node.fDsk node
164
          , csAmem = x_amem + inc_amem'
165
          , csAdsk = x_adsk + inc_adsk
166
          , csAcpu = x_acpu
167
          , csMmem = max x_mmem inc_amem'
168
          , csMdsk = max x_mdsk inc_adsk
169
          , csMcpu = x_mcpu
170
          , csImem = x_imem + inc_imem
171
          , csIdsk = x_idsk + inc_idsk
172
          , csIcpu = x_icpu + inc_icpu
173
          , csTmem = x_tmem + Node.tMem node
174
          , csTdsk = x_tdsk + Node.tDsk node
175
          , csTcpu = x_tcpu + Node.tCpu node
176
          , csVcpu = x_vcpu + inc_vcpu
177
          , csXmem = x_xmem + Node.xMem node
178
          , csNmem = x_nmem + Node.nMem node
179
          , csNinst = x_ninst + length (Node.pList node)
180
          }
181

    
182
-- | Compute the total free disk and memory in the cluster.
183
totalResources :: Node.List -> CStats
184
totalResources nl =
185
    let cs = foldl' updateCStats emptyCStats . Container.elems $ nl
186
    in cs { csScore = compCV nl }
187

    
188
-- | Compute the delta between two cluster state.
189
--
190
-- This is used when doing allocations, to understand better the
191
-- available cluster resources. The return value is a triple of the
192
-- current used values, the delta that was still allocated, and what
193
-- was left unallocated.
194
computeAllocationDelta :: CStats -> CStats -> AllocStats
195
computeAllocationDelta cini cfin =
196
    let CStats {csImem = i_imem, csIdsk = i_idsk, csIcpu = i_icpu} = cini
197
        CStats {csImem = f_imem, csIdsk = f_idsk, csIcpu = f_icpu,
198
                csTmem = t_mem, csTdsk = t_dsk, csVcpu = v_cpu } = cfin
199
        rini = RSpec i_icpu i_imem i_idsk
200
        rfin = RSpec (f_icpu - i_icpu) (f_imem - i_imem) (f_idsk - i_idsk)
201
        un_cpu = v_cpu - f_icpu
202
        runa = RSpec un_cpu (truncate t_mem - f_imem) (truncate t_dsk - f_idsk)
203
    in (rini, rfin, runa)
204

    
205
-- | The names and weights of the individual elements in the CV list
206
detailedCVInfo :: [(Double, String)]
207
detailedCVInfo = [ (1,  "free_mem_cv")
208
                 , (1,  "free_disk_cv")
209
                 , (1,  "n1_cnt")
210
                 , (1,  "reserved_mem_cv")
211
                 , (4,  "offline_all_cnt")
212
                 , (16, "offline_pri_cnt")
213
                 , (1,  "vcpu_ratio_cv")
214
                 , (1,  "cpu_load_cv")
215
                 , (1,  "mem_load_cv")
216
                 , (1,  "disk_load_cv")
217
                 , (1,  "net_load_cv")
218
                 , (1,  "pri_tags_score")
219
                 ]
220

    
221
detailedCVWeights :: [Double]
222
detailedCVWeights = map fst detailedCVInfo
223

    
224
-- | Compute the mem and disk covariance.
225
compDetailedCV :: Node.List -> [Double]
226
compDetailedCV nl =
227
    let
228
        all_nodes = Container.elems nl
229
        (offline, nodes) = partition Node.offline all_nodes
230
        mem_l = map Node.pMem nodes
231
        dsk_l = map Node.pDsk nodes
232
        -- metric: memory covariance
233
        mem_cv = varianceCoeff mem_l
234
        -- metric: disk covariance
235
        dsk_cv = varianceCoeff dsk_l
236
        -- metric: count of instances living on N1 failing nodes
237
        n1_score = fromIntegral . sum . map (\n -> length (Node.sList n) +
238
                                                   length (Node.pList n)) .
239
                   filter Node.failN1 $ nodes :: Double
240
        res_l = map Node.pRem nodes
241
        -- metric: reserved memory covariance
242
        res_cv = varianceCoeff res_l
243
        -- offline instances metrics
244
        offline_ipri = sum . map (length . Node.pList) $ offline
245
        offline_isec = sum . map (length . Node.sList) $ offline
246
        -- metric: count of instances on offline nodes
247
        off_score = fromIntegral (offline_ipri + offline_isec)::Double
248
        -- metric: count of primary instances on offline nodes (this
249
        -- helps with evacuation/failover of primary instances on
250
        -- 2-node clusters with one node offline)
251
        off_pri_score = fromIntegral offline_ipri::Double
252
        cpu_l = map Node.pCpu nodes
253
        -- metric: covariance of vcpu/pcpu ratio
254
        cpu_cv = varianceCoeff cpu_l
255
        -- metrics: covariance of cpu, memory, disk and network load
256
        (c_load, m_load, d_load, n_load) = unzip4 $
257
            map (\n ->
258
                     let DynUtil c1 m1 d1 n1 = Node.utilLoad n
259
                         DynUtil c2 m2 d2 n2 = Node.utilPool n
260
                     in (c1/c2, m1/m2, d1/d2, n1/n2)
261
                ) nodes
262
        -- metric: conflicting instance count
263
        pri_tags_inst = sum $ map Node.conflictingPrimaries nodes
264
        pri_tags_score = fromIntegral pri_tags_inst::Double
265
    in [ mem_cv, dsk_cv, n1_score, res_cv, off_score, off_pri_score, cpu_cv
266
       , varianceCoeff c_load, varianceCoeff m_load
267
       , varianceCoeff d_load, varianceCoeff n_load
268
       , pri_tags_score ]
269

    
270
-- | Compute the /total/ variance.
271
compCV :: Node.List -> Double
272
compCV = sum . zipWith (*) detailedCVWeights . compDetailedCV
273

    
274
-- | Compute online nodes from a Node.List
275
getOnline :: Node.List -> [Node.Node]
276
getOnline = filter (not . Node.offline) . Container.elems
277

    
278
-- * hbal functions
279

    
280
-- | Compute best table. Note that the ordering of the arguments is important.
281
compareTables :: Table -> Table -> Table
282
compareTables a@(Table _ _ a_cv _) b@(Table _ _ b_cv _ ) =
283
    if a_cv > b_cv then b else a
284

    
285
-- | Applies an instance move to a given node list and instance.
286
applyMove :: Node.List -> Instance.Instance
287
          -> IMove -> OpResult (Node.List, Instance.Instance, Ndx, Ndx)
288
-- Failover (f)
289
applyMove nl inst Failover =
290
    let old_pdx = Instance.pNode inst
291
        old_sdx = Instance.sNode inst
292
        old_p = Container.find old_pdx nl
293
        old_s = Container.find old_sdx nl
294
        int_p = Node.removePri old_p inst
295
        int_s = Node.removeSec old_s inst
296
        force_p = Node.offline old_p
297
        new_nl = do -- Maybe monad
298
          new_p <- Node.addPriEx force_p int_s inst
299
          new_s <- Node.addSec int_p inst old_sdx
300
          let new_inst = Instance.setBoth inst old_sdx old_pdx
301
          return (Container.addTwo old_pdx new_s old_sdx new_p nl,
302
                  new_inst, old_sdx, old_pdx)
303
    in new_nl
304

    
305
-- Replace the primary (f:, r:np, f)
306
applyMove nl inst (ReplacePrimary new_pdx) =
307
    let old_pdx = Instance.pNode inst
308
        old_sdx = Instance.sNode inst
309
        old_p = Container.find old_pdx nl
310
        old_s = Container.find old_sdx nl
311
        tgt_n = Container.find new_pdx nl
312
        int_p = Node.removePri old_p inst
313
        int_s = Node.removeSec old_s inst
314
        force_p = Node.offline old_p
315
        new_nl = do -- Maybe monad
316
          -- check that the current secondary can host the instance
317
          -- during the migration
318
          tmp_s <- Node.addPriEx force_p int_s inst
319
          let tmp_s' = Node.removePri tmp_s inst
320
          new_p <- Node.addPriEx force_p tgt_n inst
321
          new_s <- Node.addSecEx force_p tmp_s' inst new_pdx
322
          let new_inst = Instance.setPri inst new_pdx
323
          return (Container.add new_pdx new_p $
324
                  Container.addTwo old_pdx int_p old_sdx new_s nl,
325
                  new_inst, new_pdx, old_sdx)
326
    in new_nl
327

    
328
-- Replace the secondary (r:ns)
329
applyMove nl inst (ReplaceSecondary new_sdx) =
330
    let old_pdx = Instance.pNode inst
331
        old_sdx = Instance.sNode inst
332
        old_s = Container.find old_sdx nl
333
        tgt_n = Container.find new_sdx nl
334
        int_s = Node.removeSec old_s inst
335
        force_s = Node.offline old_s
336
        new_inst = Instance.setSec inst new_sdx
337
        new_nl = Node.addSecEx force_s tgt_n inst old_pdx >>=
338
                 \new_s -> return (Container.addTwo new_sdx
339
                                   new_s old_sdx int_s nl,
340
                                   new_inst, old_pdx, new_sdx)
341
    in new_nl
342

    
343
-- Replace the secondary and failover (r:np, f)
344
applyMove nl inst (ReplaceAndFailover new_pdx) =
345
    let old_pdx = Instance.pNode inst
346
        old_sdx = Instance.sNode inst
347
        old_p = Container.find old_pdx nl
348
        old_s = Container.find old_sdx nl
349
        tgt_n = Container.find new_pdx nl
350
        int_p = Node.removePri old_p inst
351
        int_s = Node.removeSec old_s inst
352
        force_s = Node.offline old_s
353
        new_nl = do -- Maybe monad
354
          new_p <- Node.addPri tgt_n inst
355
          new_s <- Node.addSecEx force_s int_p inst new_pdx
356
          let new_inst = Instance.setBoth inst new_pdx old_pdx
357
          return (Container.add new_pdx new_p $
358
                  Container.addTwo old_pdx new_s old_sdx int_s nl,
359
                  new_inst, new_pdx, old_pdx)
360
    in new_nl
361

    
362
-- Failver and replace the secondary (f, r:ns)
363
applyMove nl inst (FailoverAndReplace new_sdx) =
364
    let old_pdx = Instance.pNode inst
365
        old_sdx = Instance.sNode inst
366
        old_p = Container.find old_pdx nl
367
        old_s = Container.find old_sdx nl
368
        tgt_n = Container.find new_sdx nl
369
        int_p = Node.removePri old_p inst
370
        int_s = Node.removeSec old_s inst
371
        force_p = Node.offline old_p
372
        new_nl = do -- Maybe monad
373
          new_p <- Node.addPriEx force_p int_s inst
374
          new_s <- Node.addSecEx force_p tgt_n inst old_sdx
375
          let new_inst = Instance.setBoth inst old_sdx new_sdx
376
          return (Container.add new_sdx new_s $
377
                  Container.addTwo old_sdx new_p old_pdx int_p nl,
378
                  new_inst, old_sdx, new_sdx)
379
    in new_nl
380

    
381
-- | Tries to allocate an instance on one given node.
382
allocateOnSingle :: Node.List -> Instance.Instance -> Node.Node
383
                 -> OpResult Node.AllocElement
384
allocateOnSingle nl inst p =
385
    let new_pdx = Node.idx p
386
        new_inst = Instance.setBoth inst new_pdx Node.noSecondary
387
        new_nl = Node.addPri p inst >>= \new_p ->
388
                 return (Container.add new_pdx new_p nl, new_inst, [new_p])
389
    in new_nl
390

    
391
-- | Tries to allocate an instance on a given pair of nodes.
392
allocateOnPair :: Node.List -> Instance.Instance -> Node.Node -> Node.Node
393
               -> OpResult Node.AllocElement
394
allocateOnPair nl inst tgt_p tgt_s =
395
    let new_pdx = Node.idx tgt_p
396
        new_sdx = Node.idx tgt_s
397
        new_nl = do -- Maybe monad
398
          new_p <- Node.addPri tgt_p inst
399
          new_s <- Node.addSec tgt_s inst new_pdx
400
          let new_inst = Instance.setBoth inst new_pdx new_sdx
401
          return (Container.addTwo new_pdx new_p new_sdx new_s nl, new_inst,
402
                 [new_p, new_s])
403
    in new_nl
404

    
405
-- | Tries to perform an instance move and returns the best table
406
-- between the original one and the new one.
407
checkSingleStep :: Table -- ^ The original table
408
                -> Instance.Instance -- ^ The instance to move
409
                -> Table -- ^ The current best table
410
                -> IMove -- ^ The move to apply
411
                -> Table -- ^ The final best table
412
checkSingleStep ini_tbl target cur_tbl move =
413
    let
414
        Table ini_nl ini_il _ ini_plc = ini_tbl
415
        tmp_resu = applyMove ini_nl target move
416
    in
417
      case tmp_resu of
418
        OpFail _ -> cur_tbl
419
        OpGood (upd_nl, new_inst, pri_idx, sec_idx) ->
420
            let tgt_idx = Instance.idx target
421
                upd_cvar = compCV upd_nl
422
                upd_il = Container.add tgt_idx new_inst ini_il
423
                upd_plc = (tgt_idx, pri_idx, sec_idx, move, upd_cvar):ini_plc
424
                upd_tbl = Table upd_nl upd_il upd_cvar upd_plc
425
            in
426
              compareTables cur_tbl upd_tbl
427

    
428
-- | Given the status of the current secondary as a valid new node and
429
-- the current candidate target node, generate the possible moves for
430
-- a instance.
431
possibleMoves :: Bool      -- ^ Whether the secondary node is a valid new node
432
              -> Ndx       -- ^ Target node candidate
433
              -> [IMove]   -- ^ List of valid result moves
434
possibleMoves True tdx =
435
    [ReplaceSecondary tdx,
436
     ReplaceAndFailover tdx,
437
     ReplacePrimary tdx,
438
     FailoverAndReplace tdx]
439

    
440
possibleMoves False tdx =
441
    [ReplaceSecondary tdx,
442
     ReplaceAndFailover tdx]
443

    
444
-- | Compute the best move for a given instance.
445
checkInstanceMove :: [Ndx]             -- ^ Allowed target node indices
446
                  -> Bool              -- ^ Whether disk moves are allowed
447
                  -> Table             -- ^ Original table
448
                  -> Instance.Instance -- ^ Instance to move
449
                  -> Table             -- ^ Best new table for this instance
450
checkInstanceMove nodes_idx disk_moves ini_tbl target =
451
    let
452
        opdx = Instance.pNode target
453
        osdx = Instance.sNode target
454
        nodes = filter (\idx -> idx /= opdx && idx /= osdx) nodes_idx
455
        use_secondary = elem osdx nodes_idx
456
        aft_failover = if use_secondary -- if allowed to failover
457
                       then checkSingleStep ini_tbl target ini_tbl Failover
458
                       else ini_tbl
459
        all_moves = if disk_moves
460
                    then concatMap (possibleMoves use_secondary) nodes
461
                    else []
462
    in
463
      -- iterate over the possible nodes for this instance
464
      foldl' (checkSingleStep ini_tbl target) aft_failover all_moves
465

    
466
-- | Compute the best next move.
467
checkMove :: [Ndx]               -- ^ Allowed target node indices
468
          -> Bool                -- ^ Whether disk moves are allowed
469
          -> Table               -- ^ The current solution
470
          -> [Instance.Instance] -- ^ List of instances still to move
471
          -> Table               -- ^ The new solution
472
checkMove nodes_idx disk_moves ini_tbl victims =
473
    let Table _ _ _ ini_plc = ini_tbl
474
        -- iterate over all instances, computing the best move
475
        best_tbl =
476
            foldl'
477
            (\ step_tbl em ->
478
                 compareTables step_tbl $
479
                 checkInstanceMove nodes_idx disk_moves ini_tbl em)
480
            ini_tbl victims
481
        Table _ _ _ best_plc = best_tbl
482
    in if length best_plc == length ini_plc
483
       then ini_tbl -- no advancement
484
       else best_tbl
485

    
486
-- | Check if we are allowed to go deeper in the balancing
487
doNextBalance :: Table     -- ^ The starting table
488
              -> Int       -- ^ Remaining length
489
              -> Score     -- ^ Score at which to stop
490
              -> Bool      -- ^ The resulting table and commands
491
doNextBalance ini_tbl max_rounds min_score =
492
    let Table _ _ ini_cv ini_plc = ini_tbl
493
        ini_plc_len = length ini_plc
494
    in (max_rounds < 0 || ini_plc_len < max_rounds) && ini_cv > min_score
495

    
496
-- | Run a balance move
497
tryBalance :: Table       -- ^ The starting table
498
           -> Bool        -- ^ Allow disk moves
499
           -> Bool        -- ^ Only evacuate moves
500
           -> Score       -- ^ Min gain threshold
501
           -> Score       -- ^ Min gain
502
           -> Maybe Table -- ^ The resulting table and commands
503
tryBalance ini_tbl disk_moves evac_mode mg_limit min_gain =
504
    let Table ini_nl ini_il ini_cv _ = ini_tbl
505
        all_inst = Container.elems ini_il
506
        all_inst' = if evac_mode
507
                    then let bad_nodes = map Node.idx . filter Node.offline $
508
                                         Container.elems ini_nl
509
                         in filter (\e -> Instance.sNode e `elem` bad_nodes ||
510
                                          Instance.pNode e `elem` bad_nodes)
511
                            all_inst
512
                    else all_inst
513
        reloc_inst = filter Instance.movable all_inst'
514
        node_idx = map Node.idx . filter (not . Node.offline) $
515
                   Container.elems ini_nl
516
        fin_tbl = checkMove node_idx disk_moves ini_tbl reloc_inst
517
        (Table _ _ fin_cv _) = fin_tbl
518
    in
519
      if fin_cv < ini_cv && (ini_cv > mg_limit || ini_cv - fin_cv >= min_gain)
520
      then Just fin_tbl -- this round made success, return the new table
521
      else Nothing
522

    
523
-- * Allocation functions
524

    
525
-- | Build failure stats out of a list of failures
526
collapseFailures :: [FailMode] -> FailStats
527
collapseFailures flst =
528
    map (\k -> (k, length $ filter (k ==) flst)) [minBound..maxBound]
529

    
530
-- | Update current Allocation solution and failure stats with new
531
-- elements
532
concatAllocs :: AllocSolution -> OpResult Node.AllocElement -> AllocSolution
533
concatAllocs (flst, cntok, sols) (OpFail reason) = (reason:flst, cntok, sols)
534

    
535
concatAllocs (flst, cntok, osols) (OpGood ns@(nl, _, _)) =
536
    let nscore = compCV nl
537
        -- Choose the old or new solution, based on the cluster score
538
        nsols = case osols of
539
                  [] -> [(nscore, ns)]
540
                  (oscore, _):[] ->
541
                      if oscore < nscore
542
                      then osols
543
                      else [(nscore, ns)]
544
                  -- FIXME: here we simply concat to lists with more
545
                  -- than one element; we should instead abort, since
546
                  -- this is not a valid usage of this function
547
                  xs -> (nscore, ns):xs
548
        nsuc = cntok + 1
549
    -- Note: we force evaluation of nsols here in order to keep the
550
    -- memory profile low - we know that we will need nsols for sure
551
    -- in the next cycle, so we force evaluation of nsols, since the
552
    -- foldl' in the caller will only evaluate the tuple, but not the
553
    -- elements of the tuple
554
    in nsols `seq` nsuc `seq` (flst, nsuc, nsols)
555

    
556
-- | Try to allocate an instance on the cluster.
557
tryAlloc :: (Monad m) =>
558
            Node.List         -- ^ The node list
559
         -> Instance.List     -- ^ The instance list
560
         -> Instance.Instance -- ^ The instance to allocate
561
         -> Int               -- ^ Required number of nodes
562
         -> m AllocSolution   -- ^ Possible solution list
563
tryAlloc nl _ inst 2 =
564
    let all_nodes = getOnline nl
565
        all_pairs = liftM2 (,) all_nodes all_nodes
566
        ok_pairs = filter (\(x, y) -> Node.idx x /= Node.idx y) all_pairs
567
        sols = foldl' (\cstate (p, s) ->
568
                           concatAllocs cstate $ allocateOnPair nl inst p s
569
                      ) ([], 0, []) ok_pairs
570
    in return sols
571

    
572
tryAlloc nl _ inst 1 =
573
    let all_nodes = getOnline nl
574
        sols = foldl' (\cstate ->
575
                           concatAllocs cstate . allocateOnSingle nl inst
576
                      ) ([], 0, []) all_nodes
577
    in return sols
578

    
579
tryAlloc _ _ _ reqn = fail $ "Unsupported number of allocation \
580
                             \destinations required (" ++ show reqn ++
581
                                               "), only two supported"
582

    
583
-- | Try to allocate an instance on the cluster.
584
tryReloc :: (Monad m) =>
585
            Node.List       -- ^ The node list
586
         -> Instance.List   -- ^ The instance list
587
         -> Idx             -- ^ The index of the instance to move
588
         -> Int             -- ^ The number of nodes required
589
         -> [Ndx]           -- ^ Nodes which should not be used
590
         -> m AllocSolution -- ^ Solution list
591
tryReloc nl il xid 1 ex_idx =
592
    let all_nodes = getOnline nl
593
        inst = Container.find xid il
594
        ex_idx' = Instance.pNode inst:ex_idx
595
        valid_nodes = filter (not . flip elem ex_idx' . Node.idx) all_nodes
596
        valid_idxes = map Node.idx valid_nodes
597
        sols1 = foldl' (\cstate x ->
598
                            let em = do
599
                                  (mnl, i, _, _) <-
600
                                      applyMove nl inst (ReplaceSecondary x)
601
                                  return (mnl, i, [Container.find x mnl])
602
                            in concatAllocs cstate em
603
                       ) ([], 0, []) valid_idxes
604
    in return sols1
605

    
606
tryReloc _ _ _ reqn _  = fail $ "Unsupported number of relocation \
607
                                \destinations required (" ++ show reqn ++
608
                                                  "), only one supported"
609

    
610
-- | Try to evacuate a list of nodes.
611
tryEvac :: (Monad m) =>
612
            Node.List       -- ^ The node list
613
         -> Instance.List   -- ^ The instance list
614
         -> [Ndx]           -- ^ Nodes to be evacuated
615
         -> m AllocSolution -- ^ Solution list
616
tryEvac nl il ex_ndx =
617
    let ex_nodes = map (`Container.find` nl) ex_ndx
618
        all_insts = nub . concatMap Node.sList $ ex_nodes
619
    in do
620
      (_, sol) <- foldM (\(nl', (_, _, rsols)) idx -> do
621
                           -- FIXME: hardcoded one node here
622
                           (fm, cs, aes) <- tryReloc nl' il idx 1 ex_ndx
623
                           case aes of
624
                             csol@(_, (nl'', _, _)):_ ->
625
                                 return (nl'', (fm, cs, csol:rsols))
626
                             _ -> fail $ "Can't evacuate instance " ++
627
                                  Instance.name (Container.find idx il)
628
                        ) (nl, ([], 0, [])) all_insts
629
      return sol
630

    
631
-- | Recursively place instances on the cluster until we're out of space
632
iterateAlloc :: Node.List
633
             -> Instance.List
634
             -> Instance.Instance
635
             -> Int
636
             -> [Instance.Instance]
637
             -> Result (FailStats, Node.List, Instance.List,
638
                        [Instance.Instance])
639
iterateAlloc nl il newinst nreq ixes =
640
      let depth = length ixes
641
          newname = printf "new-%d" depth::String
642
          newidx = length (Container.elems il) + depth
643
          newi2 = Instance.setIdx (Instance.setName newinst newname) newidx
644
      in case tryAlloc nl il newi2 nreq of
645
           Bad s -> Bad s
646
           Ok (errs, _, sols3) ->
647
               case sols3 of
648
                 [] -> Ok (collapseFailures errs, nl, il, ixes)
649
                 (_, (xnl, xi, _)):[] ->
650
                     iterateAlloc xnl (Container.add newidx xi il)
651
                                  newinst nreq $! (xi:ixes)
652
                 _ -> Bad "Internal error: multiple solutions for single\
653
                          \ allocation"
654

    
655
tieredAlloc :: Node.List
656
            -> Instance.List
657
            -> Instance.Instance
658
            -> Int
659
            -> [Instance.Instance]
660
            -> Result (FailStats, Node.List, Instance.List,
661
                       [Instance.Instance])
662
tieredAlloc nl il newinst nreq ixes =
663
    case iterateAlloc nl il newinst nreq ixes of
664
      Bad s -> Bad s
665
      Ok (errs, nl', il', ixes') ->
666
          case Instance.shrinkByType newinst . fst . last $
667
               sortBy (comparing snd) errs of
668
            Bad _ -> Ok (errs, nl', il', ixes')
669
            Ok newinst' ->
670
                tieredAlloc nl' il' newinst' nreq ixes'
671

    
672
-- * Formatting functions
673

    
674
-- | Given the original and final nodes, computes the relocation description.
675
computeMoves :: Instance.Instance -- ^ The instance to be moved
676
             -> String -- ^ The instance name
677
             -> IMove  -- ^ The move being performed
678
             -> String -- ^ New primary
679
             -> String -- ^ New secondary
680
             -> (String, [String])
681
                -- ^ Tuple of moves and commands list; moves is containing
682
                -- either @/f/@ for failover or @/r:name/@ for replace
683
                -- secondary, while the command list holds gnt-instance
684
                -- commands (without that prefix), e.g \"@failover instance1@\"
685
computeMoves i inam mv c d =
686
    case mv of
687
      Failover -> ("f", [mig])
688
      FailoverAndReplace _ -> (printf "f r:%s" d, [mig, rep d])
689
      ReplaceSecondary _ -> (printf "r:%s" d, [rep d])
690
      ReplaceAndFailover _ -> (printf "r:%s f" c, [rep c, mig])
691
      ReplacePrimary _ -> (printf "f r:%s f" c, [mig, rep c, mig])
692
    where morf = if Instance.running i then "migrate" else "failover"
693
          mig = printf "%s -f %s" morf inam::String
694
          rep n = printf "replace-disks -n %s %s" n inam
695

    
696
-- | Converts a placement to string format.
697
printSolutionLine :: Node.List     -- ^ The node list
698
                  -> Instance.List -- ^ The instance list
699
                  -> Int           -- ^ Maximum node name length
700
                  -> Int           -- ^ Maximum instance name length
701
                  -> Placement     -- ^ The current placement
702
                  -> Int           -- ^ The index of the placement in
703
                                   -- the solution
704
                  -> (String, [String])
705
printSolutionLine nl il nmlen imlen plc pos =
706
    let
707
        pmlen = (2*nmlen + 1)
708
        (i, p, s, mv, c) = plc
709
        inst = Container.find i il
710
        inam = Instance.alias inst
711
        npri = Node.alias $ Container.find p nl
712
        nsec = Node.alias $ Container.find s nl
713
        opri = Node.alias $ Container.find (Instance.pNode inst) nl
714
        osec = Node.alias $ Container.find (Instance.sNode inst) nl
715
        (moves, cmds) =  computeMoves inst inam mv npri nsec
716
        ostr = printf "%s:%s" opri osec::String
717
        nstr = printf "%s:%s" npri nsec::String
718
    in
719
      (printf "  %3d. %-*s %-*s => %-*s %.8f a=%s"
720
       pos imlen inam pmlen ostr
721
       pmlen nstr c moves,
722
       cmds)
723

    
724
-- | Return the instance and involved nodes in an instance move.
725
involvedNodes :: Instance.List -> Placement -> [Ndx]
726
involvedNodes il plc =
727
    let (i, np, ns, _, _) = plc
728
        inst = Container.find i il
729
        op = Instance.pNode inst
730
        os = Instance.sNode inst
731
    in nub [np, ns, op, os]
732

    
733
-- | Inner function for splitJobs, that either appends the next job to
734
-- the current jobset, or starts a new jobset.
735
mergeJobs :: ([JobSet], [Ndx]) -> MoveJob -> ([JobSet], [Ndx])
736
mergeJobs ([], _) n@(ndx, _, _, _) = ([[n]], ndx)
737
mergeJobs (cjs@(j:js), nbuf) n@(ndx, _, _, _)
738
    | null (ndx `intersect` nbuf) = ((n:j):js, ndx ++ nbuf)
739
    | otherwise = ([n]:cjs, ndx)
740

    
741
-- | Break a list of moves into independent groups. Note that this
742
-- will reverse the order of jobs.
743
splitJobs :: [MoveJob] -> [JobSet]
744
splitJobs = fst . foldl mergeJobs ([], [])
745

    
746
-- | Given a list of commands, prefix them with @gnt-instance@ and
747
-- also beautify the display a little.
748
formatJob :: Int -> Int -> (Int, MoveJob) -> [String]
749
formatJob jsn jsl (sn, (_, _, _, cmds)) =
750
    let out =
751
            printf "  echo job %d/%d" jsn sn:
752
            printf "  check":
753
            map ("  gnt-instance " ++) cmds
754
    in if sn == 1
755
       then ["", printf "echo jobset %d, %d jobs" jsn jsl] ++ out
756
       else out
757

    
758
-- | Given a list of commands, prefix them with @gnt-instance@ and
759
-- also beautify the display a little.
760
formatCmds :: [JobSet] -> String
761
formatCmds =
762
    unlines .
763
    concatMap (\(jsn, js) -> concatMap (formatJob jsn (length js))
764
                             (zip [1..] js)) .
765
    zip [1..]
766

    
767
-- | Print the node list.
768
printNodes :: Node.List -> [String] -> String
769
printNodes nl fs =
770
    let fields = case fs of
771
          [] -> Node.defaultFields
772
          "+":rest -> Node.defaultFields ++ rest
773
          _ -> fs
774
        snl = sortBy (comparing Node.idx) (Container.elems nl)
775
        (header, isnum) = unzip $ map Node.showHeader fields
776
    in unlines . map ((:) ' ' .  intercalate " ") $
777
       formatTable (header:map (Node.list fields) snl) isnum
778

    
779
-- | Print the instance list.
780
printInsts :: Node.List -> Instance.List -> String
781
printInsts nl il =
782
    let sil = sortBy (comparing Instance.idx) (Container.elems il)
783
        helper inst = [ if Instance.running inst then "R" else " "
784
                      , Instance.name inst
785
                      , Container.nameOf nl (Instance.pNode inst)
786
                      , let sdx = Instance.sNode inst
787
                        in if sdx == Node.noSecondary
788
                           then  ""
789
                           else Container.nameOf nl sdx
790
                      , printf "%3d" $ Instance.vcpus inst
791
                      , printf "%5d" $ Instance.mem inst
792
                      , printf "%5d" $ Instance.dsk inst `div` 1024
793
                      , printf "%5.3f" lC
794
                      , printf "%5.3f" lM
795
                      , printf "%5.3f" lD
796
                      , printf "%5.3f" lN
797
                      ]
798
            where DynUtil lC lM lD lN = Instance.util inst
799
        header = [ "F", "Name", "Pri_node", "Sec_node", "vcpu", "mem"
800
                 , "dsk", "lCpu", "lMem", "lDsk", "lNet" ]
801
        isnum = False:False:False:False:repeat True
802
    in unlines . map ((:) ' ' . intercalate " ") $
803
       formatTable (header:map helper sil) isnum
804

    
805
-- | Shows statistics for a given node list.
806
printStats :: Node.List -> String
807
printStats nl =
808
    let dcvs = compDetailedCV nl
809
        (weights, names) = unzip detailedCVInfo
810
        hd = zip3 (weights ++ repeat 1) (names ++ repeat "unknown") dcvs
811
        formatted = map (\(w, header, val) ->
812
                             printf "%s=%.8f(x%.2f)" header val w::String) hd
813
    in intercalate ", " formatted
814

    
815
-- | Convert a placement into a list of OpCodes (basically a job).
816
iMoveToJob :: Node.List -> Instance.List
817
          -> Idx -> IMove -> [OpCodes.OpCode]
818
iMoveToJob nl il idx move =
819
    let inst = Container.find idx il
820
        iname = Instance.name inst
821
        lookNode  = Just . Container.nameOf nl
822
        opF = if Instance.running inst
823
              then OpCodes.OpMigrateInstance iname True False
824
              else OpCodes.OpFailoverInstance iname False
825
        opR n = OpCodes.OpReplaceDisks iname (lookNode n)
826
                OpCodes.ReplaceNewSecondary [] Nothing
827
    in case move of
828
         Failover -> [ opF ]
829
         ReplacePrimary np -> [ opF, opR np, opF ]
830
         ReplaceSecondary ns -> [ opR ns ]
831
         ReplaceAndFailover np -> [ opR np, opF ]
832
         FailoverAndReplace ns -> [ opF, opR ns ]