Statistics
| Branch: | Tag: | Revision:

root / Ganeti / HTools / Cluster.hs @ 8a3b30ca

History | View | Annotate | Download (33.4 kB)

1
{-| Implementation of cluster-wide logic.
2

    
3
This module holds all pure cluster-logic; I\/O related functionality
4
goes into the "Main" module for the individual binaries.
5

    
6
-}
7

    
8
{-
9

    
10
Copyright (C) 2009 Google Inc.
11

    
12
This program is free software; you can redistribute it and/or modify
13
it under the terms of the GNU General Public License as published by
14
the Free Software Foundation; either version 2 of the License, or
15
(at your option) any later version.
16

    
17
This program is distributed in the hope that it will be useful, but
18
WITHOUT ANY WARRANTY; without even the implied warranty of
19
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
20
General Public License for more details.
21

    
22
You should have received a copy of the GNU General Public License
23
along with this program; if not, write to the Free Software
24
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
25
02110-1301, USA.
26

    
27
-}
28

    
29
module Ganeti.HTools.Cluster
30
    (
31
     -- * Types
32
      AllocSolution
33
    , Table(..)
34
    , CStats(..)
35
    , AllocStats
36
    -- * Generic functions
37
    , totalResources
38
    , computeAllocationDelta
39
    -- * First phase functions
40
    , computeBadItems
41
    -- * Second phase functions
42
    , printSolutionLine
43
    , formatCmds
44
    , involvedNodes
45
    , splitJobs
46
    -- * Display functions
47
    , printNodes
48
    , printInsts
49
    -- * Balacing functions
50
    , checkMove
51
    , doNextBalance
52
    , tryBalance
53
    , compCV
54
    , printStats
55
    , iMoveToJob
56
    -- * IAllocator functions
57
    , tryAlloc
58
    , tryReloc
59
    , tryEvac
60
    , collapseFailures
61
    -- * Allocation functions
62
    , iterateAlloc
63
    , tieredAlloc
64
    ) where
65

    
66
import Data.List
67
import Data.Ord (comparing)
68
import Text.Printf (printf)
69
import Control.Monad
70

    
71
import qualified Ganeti.HTools.Container as Container
72
import qualified Ganeti.HTools.Instance as Instance
73
import qualified Ganeti.HTools.Node as Node
74
import Ganeti.HTools.Types
75
import Ganeti.HTools.Utils
76
import qualified Ganeti.OpCodes as OpCodes
77

    
78
-- * Types
79

    
80
-- | Allocation\/relocation solution.
81
type AllocSolution = ([FailMode], Int, [(Score, Node.AllocElement)])
82

    
83
-- | The complete state for the balancing solution
84
data Table = Table Node.List Instance.List Score [Placement]
85
             deriving (Show)
86

    
87
data CStats = CStats { csFmem :: Int    -- ^ Cluster free mem
88
                     , csFdsk :: Int    -- ^ Cluster free disk
89
                     , csAmem :: Int    -- ^ Cluster allocatable mem
90
                     , csAdsk :: Int    -- ^ Cluster allocatable disk
91
                     , csAcpu :: Int    -- ^ Cluster allocatable cpus
92
                     , csMmem :: Int    -- ^ Max node allocatable mem
93
                     , csMdsk :: Int    -- ^ Max node allocatable disk
94
                     , csMcpu :: Int    -- ^ Max node allocatable cpu
95
                     , csImem :: Int    -- ^ Instance used mem
96
                     , csIdsk :: Int    -- ^ Instance used disk
97
                     , csIcpu :: Int    -- ^ Instance used cpu
98
                     , csTmem :: Double -- ^ Cluster total mem
99
                     , csTdsk :: Double -- ^ Cluster total disk
100
                     , csTcpu :: Double -- ^ Cluster total cpus
101
                     , csVcpu :: Int    -- ^ Cluster virtual cpus (if
102
                                        -- node pCpu has been set,
103
                                        -- otherwise -1)
104
                     , csXmem :: Int    -- ^ Unnacounted for mem
105
                     , csNmem :: Int    -- ^ Node own memory
106
                     , csScore :: Score -- ^ The cluster score
107
                     , csNinst :: Int   -- ^ The total number of instances
108
                     }
109
            deriving (Show)
110

    
111
-- | Currently used, possibly to allocate, unallocable
112
type AllocStats = (RSpec, RSpec, RSpec)
113

    
114
-- * Utility functions
115

    
116
-- | Verifies the N+1 status and return the affected nodes.
117
verifyN1 :: [Node.Node] -> [Node.Node]
118
verifyN1 = filter Node.failN1
119

    
120
{-| Computes the pair of bad nodes and instances.
121

    
122
The bad node list is computed via a simple 'verifyN1' check, and the
123
bad instance list is the list of primary and secondary instances of
124
those nodes.
125

    
126
-}
127
computeBadItems :: Node.List -> Instance.List ->
128
                   ([Node.Node], [Instance.Instance])
129
computeBadItems nl il =
130
  let bad_nodes = verifyN1 $ getOnline nl
131
      bad_instances = map (`Container.find` il) .
132
                      sort . nub $
133
                      concatMap (\ n -> Node.sList n ++ Node.pList n) bad_nodes
134
  in
135
    (bad_nodes, bad_instances)
136

    
137
-- | Zero-initializer for the CStats type
138
emptyCStats :: CStats
139
emptyCStats = CStats 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
140

    
141
-- | Update stats with data from a new node
142
updateCStats :: CStats -> Node.Node -> CStats
143
updateCStats cs node =
144
    let CStats { csFmem = x_fmem, csFdsk = x_fdsk,
145
                 csAmem = x_amem, csAcpu = x_acpu, csAdsk = x_adsk,
146
                 csMmem = x_mmem, csMdsk = x_mdsk, csMcpu = x_mcpu,
147
                 csImem = x_imem, csIdsk = x_idsk, csIcpu = x_icpu,
148
                 csTmem = x_tmem, csTdsk = x_tdsk, csTcpu = x_tcpu,
149
                 csVcpu = x_vcpu,
150
                 csXmem = x_xmem, csNmem = x_nmem, csNinst = x_ninst
151
               }
152
            = cs
153
        inc_amem = Node.fMem node - Node.rMem node
154
        inc_amem' = if inc_amem > 0 then inc_amem else 0
155
        inc_adsk = Node.availDisk node
156
        inc_imem = truncate (Node.tMem node) - Node.nMem node
157
                   - Node.xMem node - Node.fMem node
158
        inc_icpu = Node.uCpu node
159
        inc_idsk = truncate (Node.tDsk node) - Node.fDsk node
160
        inc_vcpu = Node.hiCpu node
161

    
162
    in cs { csFmem = x_fmem + Node.fMem node
163
          , csFdsk = x_fdsk + Node.fDsk node
164
          , csAmem = x_amem + inc_amem'
165
          , csAdsk = x_adsk + inc_adsk
166
          , csAcpu = x_acpu
167
          , csMmem = max x_mmem inc_amem'
168
          , csMdsk = max x_mdsk inc_adsk
169
          , csMcpu = x_mcpu
170
          , csImem = x_imem + inc_imem
171
          , csIdsk = x_idsk + inc_idsk
172
          , csIcpu = x_icpu + inc_icpu
173
          , csTmem = x_tmem + Node.tMem node
174
          , csTdsk = x_tdsk + Node.tDsk node
175
          , csTcpu = x_tcpu + Node.tCpu node
176
          , csVcpu = x_vcpu + inc_vcpu
177
          , csXmem = x_xmem + Node.xMem node
178
          , csNmem = x_nmem + Node.nMem node
179
          , csNinst = x_ninst + length (Node.pList node)
180
          }
181

    
182
-- | Compute the total free disk and memory in the cluster.
183
totalResources :: Node.List -> CStats
184
totalResources nl =
185
    let cs = foldl' updateCStats emptyCStats . Container.elems $ nl
186
    in cs { csScore = compCV nl }
187

    
188
-- | Compute the delta between two cluster state.
189
--
190
-- This is used when doing allocations, to understand better the
191
-- available cluster resources. The return value is a triple of the
192
-- current used values, the delta that was still allocated, and what
193
-- was left unallocated.
194
computeAllocationDelta :: CStats -> CStats -> AllocStats
195
computeAllocationDelta cini cfin =
196
    let CStats {csImem = i_imem, csIdsk = i_idsk, csIcpu = i_icpu} = cini
197
        CStats {csImem = f_imem, csIdsk = f_idsk, csIcpu = f_icpu,
198
                csTmem = t_mem, csTdsk = t_dsk, csVcpu = v_cpu } = cfin
199
        rini = RSpec i_icpu i_imem i_idsk
200
        rfin = RSpec (f_icpu - i_icpu) (f_imem - i_imem) (f_idsk - i_idsk)
201
        un_cpu = v_cpu - f_icpu
202
        runa = RSpec un_cpu (truncate t_mem - f_imem) (truncate t_dsk - f_idsk)
203
    in (rini, rfin, runa)
204

    
205
-- | The names and weights of the individual elements in the CV list
206
detailedCVInfo :: [(Double, String)]
207
detailedCVInfo = [ (1,  "free_mem_cv")
208
                 , (1,  "free_disk_cv")
209
                 , (1,  "n1_cnt")
210
                 , (1,  "reserved_mem_cv")
211
                 , (4,  "offline_all_cnt")
212
                 , (16, "offline_pri_cnt")
213
                 , (1,  "vcpu_ratio_cv")
214
                 , (1,  "cpu_load_cv")
215
                 , (1,  "mem_load_cv")
216
                 , (1,  "disk_load_cv")
217
                 , (1,  "net_load_cv")
218
                 , (1,  "pri_tags_score")
219
                 ]
220

    
221
detailedCVWeights :: [Double]
222
detailedCVWeights = map fst detailedCVInfo
223

    
224
-- | Compute the mem and disk covariance.
225
compDetailedCV :: Node.List -> [Double]
226
compDetailedCV nl =
227
    let
228
        all_nodes = Container.elems nl
229
        (offline, nodes) = partition Node.offline all_nodes
230
        mem_l = map Node.pMem nodes
231
        dsk_l = map Node.pDsk nodes
232
        -- metric: memory covariance
233
        mem_cv = varianceCoeff mem_l
234
        -- metric: disk covariance
235
        dsk_cv = varianceCoeff dsk_l
236
        n1_l = length $ filter Node.failN1 nodes
237
        -- metric: count of failN1 nodes
238
        n1_score = fromIntegral n1_l::Double
239
        res_l = map Node.pRem nodes
240
        -- metric: reserved memory covariance
241
        res_cv = varianceCoeff res_l
242
        -- offline instances metrics
243
        offline_ipri = sum . map (length . Node.pList) $ offline
244
        offline_isec = sum . map (length . Node.sList) $ offline
245
        -- metric: count of instances on offline nodes
246
        off_score = fromIntegral (offline_ipri + offline_isec)::Double
247
        -- metric: count of primary instances on offline nodes (this
248
        -- helps with evacuation/failover of primary instances on
249
        -- 2-node clusters with one node offline)
250
        off_pri_score = fromIntegral offline_ipri::Double
251
        cpu_l = map Node.pCpu nodes
252
        -- metric: covariance of vcpu/pcpu ratio
253
        cpu_cv = varianceCoeff cpu_l
254
        -- metrics: covariance of cpu, memory, disk and network load
255
        (c_load, m_load, d_load, n_load) = unzip4 $
256
            map (\n ->
257
                     let DynUtil c1 m1 d1 n1 = Node.utilLoad n
258
                         DynUtil c2 m2 d2 n2 = Node.utilPool n
259
                     in (c1/c2, m1/m2, d1/d2, n1/n2)
260
                ) nodes
261
        -- metric: conflicting instance count
262
        pri_tags_inst = sum $ map Node.conflictingPrimaries nodes
263
        pri_tags_score = fromIntegral pri_tags_inst::Double
264
    in [ mem_cv, dsk_cv, n1_score, res_cv, off_score, off_pri_score, cpu_cv
265
       , varianceCoeff c_load, varianceCoeff m_load
266
       , varianceCoeff d_load, varianceCoeff n_load
267
       , pri_tags_score ]
268

    
269
-- | Compute the /total/ variance.
270
compCV :: Node.List -> Double
271
compCV = sum . zipWith (*) detailedCVWeights . compDetailedCV
272

    
273
-- | Compute online nodes from a Node.List
274
getOnline :: Node.List -> [Node.Node]
275
getOnline = filter (not . Node.offline) . Container.elems
276

    
277
-- * hbal functions
278

    
279
-- | Compute best table. Note that the ordering of the arguments is important.
280
compareTables :: Table -> Table -> Table
281
compareTables a@(Table _ _ a_cv _) b@(Table _ _ b_cv _ ) =
282
    if a_cv > b_cv then b else a
283

    
284
-- | Applies an instance move to a given node list and instance.
285
applyMove :: Node.List -> Instance.Instance
286
          -> IMove -> OpResult (Node.List, Instance.Instance, Ndx, Ndx)
287
-- Failover (f)
288
applyMove nl inst Failover =
289
    let old_pdx = Instance.pNode inst
290
        old_sdx = Instance.sNode inst
291
        old_p = Container.find old_pdx nl
292
        old_s = Container.find old_sdx nl
293
        int_p = Node.removePri old_p inst
294
        int_s = Node.removeSec old_s inst
295
        force_p = Node.offline old_p
296
        new_nl = do -- Maybe monad
297
          new_p <- Node.addPriEx force_p int_s inst
298
          new_s <- Node.addSec int_p inst old_sdx
299
          let new_inst = Instance.setBoth inst old_sdx old_pdx
300
          return (Container.addTwo old_pdx new_s old_sdx new_p nl,
301
                  new_inst, old_sdx, old_pdx)
302
    in new_nl
303

    
304
-- Replace the primary (f:, r:np, f)
305
applyMove nl inst (ReplacePrimary new_pdx) =
306
    let old_pdx = Instance.pNode inst
307
        old_sdx = Instance.sNode inst
308
        old_p = Container.find old_pdx nl
309
        old_s = Container.find old_sdx nl
310
        tgt_n = Container.find new_pdx nl
311
        int_p = Node.removePri old_p inst
312
        int_s = Node.removeSec old_s inst
313
        force_p = Node.offline old_p
314
        new_nl = do -- Maybe monad
315
          -- check that the current secondary can host the instance
316
          -- during the migration
317
          tmp_s <- Node.addPriEx force_p int_s inst
318
          let tmp_s' = Node.removePri tmp_s inst
319
          new_p <- Node.addPriEx force_p tgt_n inst
320
          new_s <- Node.addSecEx force_p tmp_s' inst new_pdx
321
          let new_inst = Instance.setPri inst new_pdx
322
          return (Container.add new_pdx new_p $
323
                  Container.addTwo old_pdx int_p old_sdx new_s nl,
324
                  new_inst, new_pdx, old_sdx)
325
    in new_nl
326

    
327
-- Replace the secondary (r:ns)
328
applyMove nl inst (ReplaceSecondary new_sdx) =
329
    let old_pdx = Instance.pNode inst
330
        old_sdx = Instance.sNode inst
331
        old_s = Container.find old_sdx nl
332
        tgt_n = Container.find new_sdx nl
333
        int_s = Node.removeSec old_s inst
334
        force_s = Node.offline old_s
335
        new_inst = Instance.setSec inst new_sdx
336
        new_nl = Node.addSecEx force_s tgt_n inst old_pdx >>=
337
                 \new_s -> return (Container.addTwo new_sdx
338
                                   new_s old_sdx int_s nl,
339
                                   new_inst, old_pdx, new_sdx)
340
    in new_nl
341

    
342
-- Replace the secondary and failover (r:np, f)
343
applyMove nl inst (ReplaceAndFailover new_pdx) =
344
    let old_pdx = Instance.pNode inst
345
        old_sdx = Instance.sNode inst
346
        old_p = Container.find old_pdx nl
347
        old_s = Container.find old_sdx nl
348
        tgt_n = Container.find new_pdx nl
349
        int_p = Node.removePri old_p inst
350
        int_s = Node.removeSec old_s inst
351
        force_s = Node.offline old_s
352
        new_nl = do -- Maybe monad
353
          new_p <- Node.addPri tgt_n inst
354
          new_s <- Node.addSecEx force_s int_p inst new_pdx
355
          let new_inst = Instance.setBoth inst new_pdx old_pdx
356
          return (Container.add new_pdx new_p $
357
                  Container.addTwo old_pdx new_s old_sdx int_s nl,
358
                  new_inst, new_pdx, old_pdx)
359
    in new_nl
360

    
361
-- Failver and replace the secondary (f, r:ns)
362
applyMove nl inst (FailoverAndReplace new_sdx) =
363
    let old_pdx = Instance.pNode inst
364
        old_sdx = Instance.sNode inst
365
        old_p = Container.find old_pdx nl
366
        old_s = Container.find old_sdx nl
367
        tgt_n = Container.find new_sdx nl
368
        int_p = Node.removePri old_p inst
369
        int_s = Node.removeSec old_s inst
370
        force_p = Node.offline old_p
371
        new_nl = do -- Maybe monad
372
          new_p <- Node.addPriEx force_p int_s inst
373
          new_s <- Node.addSecEx force_p tgt_n inst old_sdx
374
          let new_inst = Instance.setBoth inst old_sdx new_sdx
375
          return (Container.add new_sdx new_s $
376
                  Container.addTwo old_sdx new_p old_pdx int_p nl,
377
                  new_inst, old_sdx, new_sdx)
378
    in new_nl
379

    
380
-- | Tries to allocate an instance on one given node.
381
allocateOnSingle :: Node.List -> Instance.Instance -> Node.Node
382
                 -> OpResult Node.AllocElement
383
allocateOnSingle nl inst p =
384
    let new_pdx = Node.idx p
385
        new_inst = Instance.setBoth inst new_pdx Node.noSecondary
386
        new_nl = Node.addPri p inst >>= \new_p ->
387
                 return (Container.add new_pdx new_p nl, new_inst, [new_p])
388
    in new_nl
389

    
390
-- | Tries to allocate an instance on a given pair of nodes.
391
allocateOnPair :: Node.List -> Instance.Instance -> Node.Node -> Node.Node
392
               -> OpResult Node.AllocElement
393
allocateOnPair nl inst tgt_p tgt_s =
394
    let new_pdx = Node.idx tgt_p
395
        new_sdx = Node.idx tgt_s
396
        new_nl = do -- Maybe monad
397
          new_p <- Node.addPri tgt_p inst
398
          new_s <- Node.addSec tgt_s inst new_pdx
399
          let new_inst = Instance.setBoth inst new_pdx new_sdx
400
          return (Container.addTwo new_pdx new_p new_sdx new_s nl, new_inst,
401
                 [new_p, new_s])
402
    in new_nl
403

    
404
-- | Tries to perform an instance move and returns the best table
405
-- between the original one and the new one.
406
checkSingleStep :: Table -- ^ The original table
407
                -> Instance.Instance -- ^ The instance to move
408
                -> Table -- ^ The current best table
409
                -> IMove -- ^ The move to apply
410
                -> Table -- ^ The final best table
411
checkSingleStep ini_tbl target cur_tbl move =
412
    let
413
        Table ini_nl ini_il _ ini_plc = ini_tbl
414
        tmp_resu = applyMove ini_nl target move
415
    in
416
      case tmp_resu of
417
        OpFail _ -> cur_tbl
418
        OpGood (upd_nl, new_inst, pri_idx, sec_idx) ->
419
            let tgt_idx = Instance.idx target
420
                upd_cvar = compCV upd_nl
421
                upd_il = Container.add tgt_idx new_inst ini_il
422
                upd_plc = (tgt_idx, pri_idx, sec_idx, move, upd_cvar):ini_plc
423
                upd_tbl = Table upd_nl upd_il upd_cvar upd_plc
424
            in
425
              compareTables cur_tbl upd_tbl
426

    
427
-- | Given the status of the current secondary as a valid new node and
428
-- the current candidate target node, generate the possible moves for
429
-- a instance.
430
possibleMoves :: Bool      -- ^ Whether the secondary node is a valid new node
431
              -> Ndx       -- ^ Target node candidate
432
              -> [IMove]   -- ^ List of valid result moves
433
possibleMoves True tdx =
434
    [ReplaceSecondary tdx,
435
     ReplaceAndFailover tdx,
436
     ReplacePrimary tdx,
437
     FailoverAndReplace tdx]
438

    
439
possibleMoves False tdx =
440
    [ReplaceSecondary tdx,
441
     ReplaceAndFailover tdx]
442

    
443
-- | Compute the best move for a given instance.
444
checkInstanceMove :: [Ndx]             -- ^ Allowed target node indices
445
                  -> Bool              -- ^ Whether disk moves are allowed
446
                  -> Table             -- ^ Original table
447
                  -> Instance.Instance -- ^ Instance to move
448
                  -> Table             -- ^ Best new table for this instance
449
checkInstanceMove nodes_idx disk_moves ini_tbl target =
450
    let
451
        opdx = Instance.pNode target
452
        osdx = Instance.sNode target
453
        nodes = filter (\idx -> idx /= opdx && idx /= osdx) nodes_idx
454
        use_secondary = elem osdx nodes_idx
455
        aft_failover = if use_secondary -- if allowed to failover
456
                       then checkSingleStep ini_tbl target ini_tbl Failover
457
                       else ini_tbl
458
        all_moves = if disk_moves
459
                    then concatMap (possibleMoves use_secondary) nodes
460
                    else []
461
    in
462
      -- iterate over the possible nodes for this instance
463
      foldl' (checkSingleStep ini_tbl target) aft_failover all_moves
464

    
465
-- | Compute the best next move.
466
checkMove :: [Ndx]               -- ^ Allowed target node indices
467
          -> Bool                -- ^ Whether disk moves are allowed
468
          -> Table               -- ^ The current solution
469
          -> [Instance.Instance] -- ^ List of instances still to move
470
          -> Table               -- ^ The new solution
471
checkMove nodes_idx disk_moves ini_tbl victims =
472
    let Table _ _ _ ini_plc = ini_tbl
473
        -- iterate over all instances, computing the best move
474
        best_tbl =
475
            foldl'
476
            (\ step_tbl em ->
477
                 compareTables step_tbl $
478
                 checkInstanceMove nodes_idx disk_moves ini_tbl em)
479
            ini_tbl victims
480
        Table _ _ _ best_plc = best_tbl
481
    in if length best_plc == length ini_plc
482
       then ini_tbl -- no advancement
483
       else best_tbl
484

    
485
-- | Check if we are allowed to go deeper in the balancing
486
doNextBalance :: Table     -- ^ The starting table
487
              -> Int       -- ^ Remaining length
488
              -> Score     -- ^ Score at which to stop
489
              -> Bool      -- ^ The resulting table and commands
490
doNextBalance ini_tbl max_rounds min_score =
491
    let Table _ _ ini_cv ini_plc = ini_tbl
492
        ini_plc_len = length ini_plc
493
    in (max_rounds < 0 || ini_plc_len < max_rounds) && ini_cv > min_score
494

    
495
-- | Run a balance move
496
tryBalance :: Table       -- ^ The starting table
497
           -> Bool        -- ^ Allow disk moves
498
           -> Bool        -- ^ Only evacuate moves
499
           -> Maybe Table -- ^ The resulting table and commands
500
tryBalance ini_tbl disk_moves evac_mode =
501
    let Table ini_nl ini_il ini_cv _ = ini_tbl
502
        all_inst = Container.elems ini_il
503
        all_inst' = if evac_mode
504
                    then let bad_nodes = map Node.idx . filter Node.offline $
505
                                         Container.elems ini_nl
506
                         in filter (\e -> Instance.sNode e `elem` bad_nodes ||
507
                                          Instance.pNode e `elem` bad_nodes)
508
                            all_inst
509
                    else all_inst
510
        reloc_inst = filter Instance.movable all_inst'
511
        node_idx = map Node.idx . filter (not . Node.offline) $
512
                   Container.elems ini_nl
513
        fin_tbl = checkMove node_idx disk_moves ini_tbl reloc_inst
514
        (Table _ _ fin_cv _) = fin_tbl
515
    in
516
      if fin_cv < ini_cv
517
      then Just fin_tbl -- this round made success, return the new table
518
      else Nothing
519

    
520
-- * Allocation functions
521

    
522
-- | Build failure stats out of a list of failures
523
collapseFailures :: [FailMode] -> FailStats
524
collapseFailures flst =
525
    map (\k -> (k, length $ filter (k ==) flst)) [minBound..maxBound]
526

    
527
-- | Update current Allocation solution and failure stats with new
528
-- elements
529
concatAllocs :: AllocSolution -> OpResult Node.AllocElement -> AllocSolution
530
concatAllocs (flst, cntok, sols) (OpFail reason) = (reason:flst, cntok, sols)
531

    
532
concatAllocs (flst, cntok, osols) (OpGood ns@(nl, _, _)) =
533
    let nscore = compCV nl
534
        -- Choose the old or new solution, based on the cluster score
535
        nsols = case osols of
536
                  [] -> [(nscore, ns)]
537
                  (oscore, _):[] ->
538
                      if oscore < nscore
539
                      then osols
540
                      else [(nscore, ns)]
541
                  -- FIXME: here we simply concat to lists with more
542
                  -- than one element; we should instead abort, since
543
                  -- this is not a valid usage of this function
544
                  xs -> (nscore, ns):xs
545
        nsuc = cntok + 1
546
    -- Note: we force evaluation of nsols here in order to keep the
547
    -- memory profile low - we know that we will need nsols for sure
548
    -- in the next cycle, so we force evaluation of nsols, since the
549
    -- foldl' in the caller will only evaluate the tuple, but not the
550
    -- elements of the tuple
551
    in nsols `seq` nsuc `seq` (flst, nsuc, nsols)
552

    
553
-- | Try to allocate an instance on the cluster.
554
tryAlloc :: (Monad m) =>
555
            Node.List         -- ^ The node list
556
         -> Instance.List     -- ^ The instance list
557
         -> Instance.Instance -- ^ The instance to allocate
558
         -> Int               -- ^ Required number of nodes
559
         -> m AllocSolution   -- ^ Possible solution list
560
tryAlloc nl _ inst 2 =
561
    let all_nodes = getOnline nl
562
        all_pairs = liftM2 (,) all_nodes all_nodes
563
        ok_pairs = filter (\(x, y) -> Node.idx x /= Node.idx y) all_pairs
564
        sols = foldl' (\cstate (p, s) ->
565
                           concatAllocs cstate $ allocateOnPair nl inst p s
566
                      ) ([], 0, []) ok_pairs
567
    in return sols
568

    
569
tryAlloc nl _ inst 1 =
570
    let all_nodes = getOnline nl
571
        sols = foldl' (\cstate ->
572
                           concatAllocs cstate . allocateOnSingle nl inst
573
                      ) ([], 0, []) all_nodes
574
    in return sols
575

    
576
tryAlloc _ _ _ reqn = fail $ "Unsupported number of allocation \
577
                             \destinations required (" ++ show reqn ++
578
                                               "), only two supported"
579

    
580
-- | Try to allocate an instance on the cluster.
581
tryReloc :: (Monad m) =>
582
            Node.List       -- ^ The node list
583
         -> Instance.List   -- ^ The instance list
584
         -> Idx             -- ^ The index of the instance to move
585
         -> Int             -- ^ The number of nodes required
586
         -> [Ndx]           -- ^ Nodes which should not be used
587
         -> m AllocSolution -- ^ Solution list
588
tryReloc nl il xid 1 ex_idx =
589
    let all_nodes = getOnline nl
590
        inst = Container.find xid il
591
        ex_idx' = Instance.pNode inst:ex_idx
592
        valid_nodes = filter (not . flip elem ex_idx' . Node.idx) all_nodes
593
        valid_idxes = map Node.idx valid_nodes
594
        sols1 = foldl' (\cstate x ->
595
                            let em = do
596
                                  (mnl, i, _, _) <-
597
                                      applyMove nl inst (ReplaceSecondary x)
598
                                  return (mnl, i, [Container.find x mnl])
599
                            in concatAllocs cstate em
600
                       ) ([], 0, []) valid_idxes
601
    in return sols1
602

    
603
tryReloc _ _ _ reqn _  = fail $ "Unsupported number of relocation \
604
                                \destinations required (" ++ show reqn ++
605
                                                  "), only one supported"
606

    
607
-- | Try to evacuate a list of nodes.
608
tryEvac :: (Monad m) =>
609
            Node.List       -- ^ The node list
610
         -> Instance.List   -- ^ The instance list
611
         -> [Ndx]           -- ^ Nodes to be evacuated
612
         -> m AllocSolution -- ^ Solution list
613
tryEvac nl il ex_ndx =
614
    let ex_nodes = map (`Container.find` nl) ex_ndx
615
        all_insts = nub . concatMap Node.sList $ ex_nodes
616
    in do
617
      (_, sol) <- foldM (\(nl', (_, _, rsols)) idx -> do
618
                           -- FIXME: hardcoded one node here
619
                           (fm, cs, aes) <- tryReloc nl' il idx 1 ex_ndx
620
                           case aes of
621
                             csol@(_, (nl'', _, _)):_ ->
622
                                 return (nl'', (fm, cs, csol:rsols))
623
                             _ -> fail $ "Can't evacuate instance " ++
624
                                  show idx
625
                        ) (nl, ([], 0, [])) all_insts
626
      return sol
627

    
628
-- | Recursively place instances on the cluster until we're out of space
629
iterateAlloc :: Node.List
630
             -> Instance.List
631
             -> Instance.Instance
632
             -> Int
633
             -> [Instance.Instance]
634
             -> Result (FailStats, Node.List, [Instance.Instance])
635
iterateAlloc nl il newinst nreq ixes =
636
      let depth = length ixes
637
          newname = printf "new-%d" depth::String
638
          newidx = length (Container.elems il) + depth
639
          newi2 = Instance.setIdx (Instance.setName newinst newname) newidx
640
      in case tryAlloc nl il newi2 nreq of
641
           Bad s -> Bad s
642
           Ok (errs, _, sols3) ->
643
               case sols3 of
644
                 [] -> Ok (collapseFailures errs, nl, ixes)
645
                 (_, (xnl, xi, _)):[] ->
646
                     iterateAlloc xnl il newinst nreq $! (xi:ixes)
647
                 _ -> Bad "Internal error: multiple solutions for single\
648
                          \ allocation"
649

    
650
tieredAlloc :: Node.List
651
            -> Instance.List
652
            -> Instance.Instance
653
            -> Int
654
            -> [Instance.Instance]
655
            -> Result (FailStats, Node.List, [Instance.Instance])
656
tieredAlloc nl il newinst nreq ixes =
657
    case iterateAlloc nl il newinst nreq ixes of
658
      Bad s -> Bad s
659
      Ok (errs, nl', ixes') ->
660
          case Instance.shrinkByType newinst . fst . last $
661
               sortBy (comparing snd) errs of
662
            Bad _ -> Ok (errs, nl', ixes')
663
            Ok newinst' ->
664
                tieredAlloc nl' il newinst' nreq ixes'
665

    
666
-- * Formatting functions
667

    
668
-- | Given the original and final nodes, computes the relocation description.
669
computeMoves :: Instance.Instance -- ^ The instance to be moved
670
             -> String -- ^ The instance name
671
             -> IMove  -- ^ The move being performed
672
             -> String -- ^ New primary
673
             -> String -- ^ New secondary
674
             -> (String, [String])
675
                -- ^ Tuple of moves and commands list; moves is containing
676
                -- either @/f/@ for failover or @/r:name/@ for replace
677
                -- secondary, while the command list holds gnt-instance
678
                -- commands (without that prefix), e.g \"@failover instance1@\"
679
computeMoves i inam mv c d =
680
    case mv of
681
      Failover -> ("f", [mig])
682
      FailoverAndReplace _ -> (printf "f r:%s" d, [mig, rep d])
683
      ReplaceSecondary _ -> (printf "r:%s" d, [rep d])
684
      ReplaceAndFailover _ -> (printf "r:%s f" c, [rep c, mig])
685
      ReplacePrimary _ -> (printf "f r:%s f" c, [mig, rep c, mig])
686
    where morf = if Instance.running i then "migrate" else "failover"
687
          mig = printf "%s -f %s" morf inam::String
688
          rep n = printf "replace-disks -n %s %s" n inam
689

    
690
-- | Converts a placement to string format.
691
printSolutionLine :: Node.List     -- ^ The node list
692
                  -> Instance.List -- ^ The instance list
693
                  -> Int           -- ^ Maximum node name length
694
                  -> Int           -- ^ Maximum instance name length
695
                  -> Placement     -- ^ The current placement
696
                  -> Int           -- ^ The index of the placement in
697
                                   -- the solution
698
                  -> (String, [String])
699
printSolutionLine nl il nmlen imlen plc pos =
700
    let
701
        pmlen = (2*nmlen + 1)
702
        (i, p, s, mv, c) = plc
703
        inst = Container.find i il
704
        inam = Instance.alias inst
705
        npri = Node.alias $ Container.find p nl
706
        nsec = Node.alias $ Container.find s nl
707
        opri = Node.alias $ Container.find (Instance.pNode inst) nl
708
        osec = Node.alias $ Container.find (Instance.sNode inst) nl
709
        (moves, cmds) =  computeMoves inst inam mv npri nsec
710
        ostr = printf "%s:%s" opri osec::String
711
        nstr = printf "%s:%s" npri nsec::String
712
    in
713
      (printf "  %3d. %-*s %-*s => %-*s %.8f a=%s"
714
       pos imlen inam pmlen ostr
715
       pmlen nstr c moves,
716
       cmds)
717

    
718
-- | Return the instance and involved nodes in an instance move.
719
involvedNodes :: Instance.List -> Placement -> [Ndx]
720
involvedNodes il plc =
721
    let (i, np, ns, _, _) = plc
722
        inst = Container.find i il
723
        op = Instance.pNode inst
724
        os = Instance.sNode inst
725
    in nub [np, ns, op, os]
726

    
727
-- | Inner function for splitJobs, that either appends the next job to
728
-- the current jobset, or starts a new jobset.
729
mergeJobs :: ([JobSet], [Ndx]) -> MoveJob -> ([JobSet], [Ndx])
730
mergeJobs ([], _) n@(ndx, _, _, _) = ([[n]], ndx)
731
mergeJobs (cjs@(j:js), nbuf) n@(ndx, _, _, _)
732
    | null (ndx `intersect` nbuf) = ((n:j):js, ndx ++ nbuf)
733
    | otherwise = ([n]:cjs, ndx)
734

    
735
-- | Break a list of moves into independent groups. Note that this
736
-- will reverse the order of jobs.
737
splitJobs :: [MoveJob] -> [JobSet]
738
splitJobs = fst . foldl mergeJobs ([], [])
739

    
740
-- | Given a list of commands, prefix them with @gnt-instance@ and
741
-- also beautify the display a little.
742
formatJob :: Int -> Int -> (Int, MoveJob) -> [String]
743
formatJob jsn jsl (sn, (_, _, _, cmds)) =
744
    let out =
745
            printf "  echo job %d/%d" jsn sn:
746
            printf "  check":
747
            map ("  gnt-instance " ++) cmds
748
    in if sn == 1
749
       then ["", printf "echo jobset %d, %d jobs" jsn jsl] ++ out
750
       else out
751

    
752
-- | Given a list of commands, prefix them with @gnt-instance@ and
753
-- also beautify the display a little.
754
formatCmds :: [JobSet] -> String
755
formatCmds =
756
    unlines .
757
    concatMap (\(jsn, js) -> concatMap (formatJob jsn (length js))
758
                             (zip [1..] js)) .
759
    zip [1..]
760

    
761
-- | Print the node list.
762
printNodes :: Node.List -> [String] -> String
763
printNodes nl fs =
764
    let fields = case fs of
765
          [] -> Node.defaultFields
766
          "+":rest -> Node.defaultFields ++ rest
767
          _ -> fs
768
        snl = sortBy (comparing Node.idx) (Container.elems nl)
769
        (header, isnum) = unzip $ map Node.showHeader fields
770
    in unlines . map ((:) ' ' .  intercalate " ") $
771
       formatTable (header:map (Node.list fields) snl) isnum
772

    
773
-- | Print the instance list.
774
printInsts :: Node.List -> Instance.List -> String
775
printInsts nl il =
776
    let sil = sortBy (comparing Instance.idx) (Container.elems il)
777
        helper inst = [ if Instance.running inst then "R" else " "
778
                      , Instance.name inst
779
                      , Container.nameOf nl (Instance.pNode inst)
780
                      , let sdx = Instance.sNode inst
781
                        in if sdx == Node.noSecondary
782
                           then  ""
783
                           else Container.nameOf nl sdx
784
                      , printf "%3d" $ Instance.vcpus inst
785
                      , printf "%5d" $ Instance.mem inst
786
                      , printf "%5d" $ Instance.dsk inst `div` 1024
787
                      , printf "%5.3f" lC
788
                      , printf "%5.3f" lM
789
                      , printf "%5.3f" lD
790
                      , printf "%5.3f" lN
791
                      ]
792
            where DynUtil lC lM lD lN = Instance.util inst
793
        header = [ "F", "Name", "Pri_node", "Sec_node", "vcpu", "mem"
794
                 , "dsk", "lCpu", "lMem", "lDsk", "lNet" ]
795
        isnum = False:False:False:False:repeat True
796
    in unlines . map ((:) ' ' . intercalate " ") $
797
       formatTable (header:map helper sil) isnum
798

    
799
-- | Shows statistics for a given node list.
800
printStats :: Node.List -> String
801
printStats nl =
802
    let dcvs = compDetailedCV nl
803
        (weights, names) = unzip detailedCVInfo
804
        hd = zip3 (weights ++ repeat 1) (names ++ repeat "unknown") dcvs
805
        formatted = map (\(w, header, val) ->
806
                             printf "%s=%.8f(x%.2f)" header val w::String) hd
807
    in intercalate ", " formatted
808

    
809
-- | Convert a placement into a list of OpCodes (basically a job).
810
iMoveToJob :: Node.List -> Instance.List
811
          -> Idx -> IMove -> [OpCodes.OpCode]
812
iMoveToJob nl il idx move =
813
    let inst = Container.find idx il
814
        iname = Instance.name inst
815
        lookNode  = Just . Container.nameOf nl
816
        opF = if Instance.running inst
817
              then OpCodes.OpMigrateInstance iname True False
818
              else OpCodes.OpFailoverInstance iname False
819
        opR n = OpCodes.OpReplaceDisks iname (lookNode n)
820
                OpCodes.ReplaceNewSecondary [] Nothing
821
    in case move of
822
         Failover -> [ opF ]
823
         ReplacePrimary np -> [ opF, opR np, opF ]
824
         ReplaceSecondary ns -> [ opR ns ]
825
         ReplaceAndFailover np -> [ opR np, opF ]
826
         FailoverAndReplace ns -> [ opF, opR ns ]