Statistics
| Branch: | Tag: | Revision:

root / Ganeti / HTools / Cluster.hs @ 2cae47e9

History | View | Annotate | Download (33.1 kB)

1
{-| Implementation of cluster-wide logic.
2

    
3
This module holds all pure cluster-logic; I\/O related functionality
4
goes into the "Main" module for the individual binaries.
5

    
6
-}
7

    
8
{-
9

    
10
Copyright (C) 2009 Google Inc.
11

    
12
This program is free software; you can redistribute it and/or modify
13
it under the terms of the GNU General Public License as published by
14
the Free Software Foundation; either version 2 of the License, or
15
(at your option) any later version.
16

    
17
This program is distributed in the hope that it will be useful, but
18
WITHOUT ANY WARRANTY; without even the implied warranty of
19
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
20
General Public License for more details.
21

    
22
You should have received a copy of the GNU General Public License
23
along with this program; if not, write to the Free Software
24
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
25
02110-1301, USA.
26

    
27
-}
28

    
29
module Ganeti.HTools.Cluster
30
    (
31
     -- * Types
32
      AllocSolution
33
    , Table(..)
34
    , CStats(..)
35
    , AllocStats
36
    -- * Generic functions
37
    , totalResources
38
    , computeAllocationDelta
39
    -- * First phase functions
40
    , computeBadItems
41
    -- * Second phase functions
42
    , printSolutionLine
43
    , formatCmds
44
    , involvedNodes
45
    , splitJobs
46
    -- * Display functions
47
    , printNodes
48
    , printInsts
49
    -- * Balacing functions
50
    , checkMove
51
    , doNextBalance
52
    , tryBalance
53
    , compCV
54
    , printStats
55
    , iMoveToJob
56
    -- * IAllocator functions
57
    , tryAlloc
58
    , tryReloc
59
    , tryEvac
60
    , collapseFailures
61
    -- * Allocation functions
62
    , iterateAlloc
63
    , tieredAlloc
64
    ) where
65

    
66
import Data.List
67
import Data.Ord (comparing)
68
import Text.Printf (printf)
69
import Control.Monad
70

    
71
import qualified Ganeti.HTools.Container as Container
72
import qualified Ganeti.HTools.Instance as Instance
73
import qualified Ganeti.HTools.Node as Node
74
import Ganeti.HTools.Types
75
import Ganeti.HTools.Utils
76
import qualified Ganeti.OpCodes as OpCodes
77

    
78
-- * Types
79

    
80
-- | Allocation\/relocation solution.
81
type AllocSolution = ([FailMode], Int, [(Score, Node.AllocElement)])
82

    
83
-- | The complete state for the balancing solution
84
data Table = Table Node.List Instance.List Score [Placement]
85
             deriving (Show)
86

    
87
data CStats = CStats { csFmem :: Int    -- ^ Cluster free mem
88
                     , csFdsk :: Int    -- ^ Cluster free disk
89
                     , csAmem :: Int    -- ^ Cluster allocatable mem
90
                     , csAdsk :: Int    -- ^ Cluster allocatable disk
91
                     , csAcpu :: Int    -- ^ Cluster allocatable cpus
92
                     , csMmem :: Int    -- ^ Max node allocatable mem
93
                     , csMdsk :: Int    -- ^ Max node allocatable disk
94
                     , csMcpu :: Int    -- ^ Max node allocatable cpu
95
                     , csImem :: Int    -- ^ Instance used mem
96
                     , csIdsk :: Int    -- ^ Instance used disk
97
                     , csIcpu :: Int    -- ^ Instance used cpu
98
                     , csTmem :: Double -- ^ Cluster total mem
99
                     , csTdsk :: Double -- ^ Cluster total disk
100
                     , csTcpu :: Double -- ^ Cluster total cpus
101
                     , csVcpu :: Int    -- ^ Cluster virtual cpus (if
102
                                        -- node pCpu has been set,
103
                                        -- otherwise -1)
104
                     , csXmem :: Int    -- ^ Unnacounted for mem
105
                     , csNmem :: Int    -- ^ Node own memory
106
                     , csScore :: Score -- ^ The cluster score
107
                     , csNinst :: Int   -- ^ The total number of instances
108
                     }
109
            deriving (Show)
110

    
111
-- | Currently used, possibly to allocate, unallocable
112
type AllocStats = (RSpec, RSpec, RSpec)
113

    
114
-- * Utility functions
115

    
116
-- | Verifies the N+1 status and return the affected nodes.
117
verifyN1 :: [Node.Node] -> [Node.Node]
118
verifyN1 = filter Node.failN1
119

    
120
{-| Computes the pair of bad nodes and instances.
121

    
122
The bad node list is computed via a simple 'verifyN1' check, and the
123
bad instance list is the list of primary and secondary instances of
124
those nodes.
125

    
126
-}
127
computeBadItems :: Node.List -> Instance.List ->
128
                   ([Node.Node], [Instance.Instance])
129
computeBadItems nl il =
130
  let bad_nodes = verifyN1 $ getOnline nl
131
      bad_instances = map (`Container.find` il) .
132
                      sort . nub $
133
                      concatMap (\ n -> Node.sList n ++ Node.pList n) bad_nodes
134
  in
135
    (bad_nodes, bad_instances)
136

    
137
-- | Zero-initializer for the CStats type
138
emptyCStats :: CStats
139
emptyCStats = CStats 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
140

    
141
-- | Update stats with data from a new node
142
updateCStats :: CStats -> Node.Node -> CStats
143
updateCStats cs node =
144
    let CStats { csFmem = x_fmem, csFdsk = x_fdsk,
145
                 csAmem = x_amem, csAcpu = x_acpu, csAdsk = x_adsk,
146
                 csMmem = x_mmem, csMdsk = x_mdsk, csMcpu = x_mcpu,
147
                 csImem = x_imem, csIdsk = x_idsk, csIcpu = x_icpu,
148
                 csTmem = x_tmem, csTdsk = x_tdsk, csTcpu = x_tcpu,
149
                 csVcpu = x_vcpu,
150
                 csXmem = x_xmem, csNmem = x_nmem, csNinst = x_ninst
151
               }
152
            = cs
153
        inc_amem = Node.fMem node - Node.rMem node
154
        inc_amem' = if inc_amem > 0 then inc_amem else 0
155
        inc_adsk = Node.availDisk node
156
        inc_imem = truncate (Node.tMem node) - Node.nMem node
157
                   - Node.xMem node - Node.fMem node
158
        inc_icpu = Node.uCpu node
159
        inc_idsk = truncate (Node.tDsk node) - Node.fDsk node
160
        inc_vcpu = Node.hiCpu node
161

    
162
    in cs { csFmem = x_fmem + Node.fMem node
163
          , csFdsk = x_fdsk + Node.fDsk node
164
          , csAmem = x_amem + inc_amem'
165
          , csAdsk = x_adsk + inc_adsk
166
          , csAcpu = x_acpu
167
          , csMmem = max x_mmem inc_amem'
168
          , csMdsk = max x_mdsk inc_adsk
169
          , csMcpu = x_mcpu
170
          , csImem = x_imem + inc_imem
171
          , csIdsk = x_idsk + inc_idsk
172
          , csIcpu = x_icpu + inc_icpu
173
          , csTmem = x_tmem + Node.tMem node
174
          , csTdsk = x_tdsk + Node.tDsk node
175
          , csTcpu = x_tcpu + Node.tCpu node
176
          , csVcpu = x_vcpu + inc_vcpu
177
          , csXmem = x_xmem + Node.xMem node
178
          , csNmem = x_nmem + Node.nMem node
179
          , csNinst = x_ninst + length (Node.pList node)
180
          }
181

    
182
-- | Compute the total free disk and memory in the cluster.
183
totalResources :: Node.List -> CStats
184
totalResources nl =
185
    let cs = foldl' updateCStats emptyCStats . Container.elems $ nl
186
    in cs { csScore = compCV nl }
187

    
188
-- | Compute the delta between two cluster state.
189
--
190
-- This is used when doing allocations, to understand better the
191
-- available cluster resources. The return value is a triple of the
192
-- current used values, the delta that was still allocated, and what
193
-- was left unallocated.
194
computeAllocationDelta :: CStats -> CStats -> AllocStats
195
computeAllocationDelta cini cfin =
196
    let CStats {csImem = i_imem, csIdsk = i_idsk, csIcpu = i_icpu} = cini
197
        CStats {csImem = f_imem, csIdsk = f_idsk, csIcpu = f_icpu,
198
                csTmem = t_mem, csTdsk = t_dsk, csVcpu = v_cpu } = cfin
199
        rini = RSpec i_icpu i_imem i_idsk
200
        rfin = RSpec (f_icpu - i_icpu) (f_imem - i_imem) (f_idsk - i_idsk)
201
        un_cpu = v_cpu - f_icpu
202
        runa = RSpec un_cpu (truncate t_mem - f_imem) (truncate t_dsk - f_idsk)
203
    in (rini, rfin, runa)
204

    
205
-- | The names of the individual elements in the CV list
206
detailedCVNames :: [String]
207
detailedCVNames = [ "free_mem_cv"
208
                  , "free_disk_cv"
209
                  , "n1_cnt"
210
                  , "reserved_mem_cv"
211
                  , "offline_all_cnt"
212
                  , "offline_pri_cnt"
213
                  , "vcpu_ratio_cv"
214
                  , "cpu_load_cv"
215
                  , "mem_load_cv"
216
                  , "disk_load_cv"
217
                  , "net_load_cv"
218
                  , "pri_tags_score"
219
                  ]
220

    
221
-- | Compute the mem and disk covariance.
222
compDetailedCV :: Node.List -> [Double]
223
compDetailedCV nl =
224
    let
225
        all_nodes = Container.elems nl
226
        (offline, nodes) = partition Node.offline all_nodes
227
        mem_l = map Node.pMem nodes
228
        dsk_l = map Node.pDsk nodes
229
        -- metric: memory covariance
230
        mem_cv = varianceCoeff mem_l
231
        -- metric: disk covariance
232
        dsk_cv = varianceCoeff dsk_l
233
        n1_l = length $ filter Node.failN1 nodes
234
        -- metric: count of failN1 nodes
235
        n1_score = fromIntegral n1_l::Double
236
        res_l = map Node.pRem nodes
237
        -- metric: reserved memory covariance
238
        res_cv = varianceCoeff res_l
239
        -- offline instances metrics
240
        offline_ipri = sum . map (length . Node.pList) $ offline
241
        offline_isec = sum . map (length . Node.sList) $ offline
242
        -- metric: count of instances on offline nodes
243
        off_score = fromIntegral (offline_ipri + offline_isec)::Double
244
        -- metric: count of primary instances on offline nodes (this
245
        -- helps with evacuation/failover of primary instances on
246
        -- 2-node clusters with one node offline)
247
        off_pri_score = fromIntegral offline_ipri::Double
248
        cpu_l = map Node.pCpu nodes
249
        -- metric: covariance of vcpu/pcpu ratio
250
        cpu_cv = varianceCoeff cpu_l
251
        -- metrics: covariance of cpu, memory, disk and network load
252
        (c_load, m_load, d_load, n_load) = unzip4 $
253
            map (\n ->
254
                     let DynUtil c1 m1 d1 n1 = Node.utilLoad n
255
                         DynUtil c2 m2 d2 n2 = Node.utilPool n
256
                     in (c1/c2, m1/m2, d1/d2, n1/n2)
257
                ) nodes
258
        -- metric: conflicting instance count
259
        pri_tags_inst = sum $ map Node.conflictingPrimaries nodes
260
        pri_tags_score = fromIntegral pri_tags_inst::Double
261
    in [ mem_cv, dsk_cv, n1_score, res_cv, off_score, off_pri_score, cpu_cv
262
       , varianceCoeff c_load, varianceCoeff m_load
263
       , varianceCoeff d_load, varianceCoeff n_load
264
       , pri_tags_score ]
265

    
266
-- | Compute the /total/ variance.
267
compCV :: Node.List -> Double
268
compCV = sum . compDetailedCV
269

    
270
-- | Compute online nodes from a Node.List
271
getOnline :: Node.List -> [Node.Node]
272
getOnline = filter (not . Node.offline) . Container.elems
273

    
274
-- * hbal functions
275

    
276
-- | Compute best table. Note that the ordering of the arguments is important.
277
compareTables :: Table -> Table -> Table
278
compareTables a@(Table _ _ a_cv _) b@(Table _ _ b_cv _ ) =
279
    if a_cv > b_cv then b else a
280

    
281
-- | Applies an instance move to a given node list and instance.
282
applyMove :: Node.List -> Instance.Instance
283
          -> IMove -> OpResult (Node.List, Instance.Instance, Ndx, Ndx)
284
-- Failover (f)
285
applyMove nl inst Failover =
286
    let old_pdx = Instance.pNode inst
287
        old_sdx = Instance.sNode inst
288
        old_p = Container.find old_pdx nl
289
        old_s = Container.find old_sdx nl
290
        int_p = Node.removePri old_p inst
291
        int_s = Node.removeSec old_s inst
292
        force_p = Node.offline old_p
293
        new_nl = do -- Maybe monad
294
          new_p <- Node.addPriEx force_p int_s inst
295
          new_s <- Node.addSec int_p inst old_sdx
296
          let new_inst = Instance.setBoth inst old_sdx old_pdx
297
          return (Container.addTwo old_pdx new_s old_sdx new_p nl,
298
                  new_inst, old_sdx, old_pdx)
299
    in new_nl
300

    
301
-- Replace the primary (f:, r:np, f)
302
applyMove nl inst (ReplacePrimary new_pdx) =
303
    let old_pdx = Instance.pNode inst
304
        old_sdx = Instance.sNode inst
305
        old_p = Container.find old_pdx nl
306
        old_s = Container.find old_sdx nl
307
        tgt_n = Container.find new_pdx nl
308
        int_p = Node.removePri old_p inst
309
        int_s = Node.removeSec old_s inst
310
        force_p = Node.offline old_p
311
        new_nl = do -- Maybe monad
312
          -- check that the current secondary can host the instance
313
          -- during the migration
314
          tmp_s <- Node.addPriEx force_p int_s inst
315
          let tmp_s' = Node.removePri tmp_s inst
316
          new_p <- Node.addPriEx force_p tgt_n inst
317
          new_s <- Node.addSecEx force_p tmp_s' inst new_pdx
318
          let new_inst = Instance.setPri inst new_pdx
319
          return (Container.add new_pdx new_p $
320
                  Container.addTwo old_pdx int_p old_sdx new_s nl,
321
                  new_inst, new_pdx, old_sdx)
322
    in new_nl
323

    
324
-- Replace the secondary (r:ns)
325
applyMove nl inst (ReplaceSecondary new_sdx) =
326
    let old_pdx = Instance.pNode inst
327
        old_sdx = Instance.sNode inst
328
        old_s = Container.find old_sdx nl
329
        tgt_n = Container.find new_sdx nl
330
        int_s = Node.removeSec old_s inst
331
        force_s = Node.offline old_s
332
        new_inst = Instance.setSec inst new_sdx
333
        new_nl = Node.addSecEx force_s tgt_n inst old_pdx >>=
334
                 \new_s -> return (Container.addTwo new_sdx
335
                                   new_s old_sdx int_s nl,
336
                                   new_inst, old_pdx, new_sdx)
337
    in new_nl
338

    
339
-- Replace the secondary and failover (r:np, f)
340
applyMove nl inst (ReplaceAndFailover new_pdx) =
341
    let old_pdx = Instance.pNode inst
342
        old_sdx = Instance.sNode inst
343
        old_p = Container.find old_pdx nl
344
        old_s = Container.find old_sdx nl
345
        tgt_n = Container.find new_pdx nl
346
        int_p = Node.removePri old_p inst
347
        int_s = Node.removeSec old_s inst
348
        force_s = Node.offline old_s
349
        new_nl = do -- Maybe monad
350
          new_p <- Node.addPri tgt_n inst
351
          new_s <- Node.addSecEx force_s int_p inst new_pdx
352
          let new_inst = Instance.setBoth inst new_pdx old_pdx
353
          return (Container.add new_pdx new_p $
354
                  Container.addTwo old_pdx new_s old_sdx int_s nl,
355
                  new_inst, new_pdx, old_pdx)
356
    in new_nl
357

    
358
-- Failver and replace the secondary (f, r:ns)
359
applyMove nl inst (FailoverAndReplace new_sdx) =
360
    let old_pdx = Instance.pNode inst
361
        old_sdx = Instance.sNode inst
362
        old_p = Container.find old_pdx nl
363
        old_s = Container.find old_sdx nl
364
        tgt_n = Container.find new_sdx nl
365
        int_p = Node.removePri old_p inst
366
        int_s = Node.removeSec old_s inst
367
        force_p = Node.offline old_p
368
        new_nl = do -- Maybe monad
369
          new_p <- Node.addPriEx force_p int_s inst
370
          new_s <- Node.addSecEx force_p tgt_n inst old_sdx
371
          let new_inst = Instance.setBoth inst old_sdx new_sdx
372
          return (Container.add new_sdx new_s $
373
                  Container.addTwo old_sdx new_p old_pdx int_p nl,
374
                  new_inst, old_sdx, new_sdx)
375
    in new_nl
376

    
377
-- | Tries to allocate an instance on one given node.
378
allocateOnSingle :: Node.List -> Instance.Instance -> Node.Node
379
                 -> OpResult Node.AllocElement
380
allocateOnSingle nl inst p =
381
    let new_pdx = Node.idx p
382
        new_inst = Instance.setBoth inst new_pdx Node.noSecondary
383
        new_nl = Node.addPri p inst >>= \new_p ->
384
                 return (Container.add new_pdx new_p nl, new_inst, [new_p])
385
    in new_nl
386

    
387
-- | Tries to allocate an instance on a given pair of nodes.
388
allocateOnPair :: Node.List -> Instance.Instance -> Node.Node -> Node.Node
389
               -> OpResult Node.AllocElement
390
allocateOnPair nl inst tgt_p tgt_s =
391
    let new_pdx = Node.idx tgt_p
392
        new_sdx = Node.idx tgt_s
393
        new_nl = do -- Maybe monad
394
          new_p <- Node.addPri tgt_p inst
395
          new_s <- Node.addSec tgt_s inst new_pdx
396
          let new_inst = Instance.setBoth inst new_pdx new_sdx
397
          return (Container.addTwo new_pdx new_p new_sdx new_s nl, new_inst,
398
                 [new_p, new_s])
399
    in new_nl
400

    
401
-- | Tries to perform an instance move and returns the best table
402
-- between the original one and the new one.
403
checkSingleStep :: Table -- ^ The original table
404
                -> Instance.Instance -- ^ The instance to move
405
                -> Table -- ^ The current best table
406
                -> IMove -- ^ The move to apply
407
                -> Table -- ^ The final best table
408
checkSingleStep ini_tbl target cur_tbl move =
409
    let
410
        Table ini_nl ini_il _ ini_plc = ini_tbl
411
        tmp_resu = applyMove ini_nl target move
412
    in
413
      case tmp_resu of
414
        OpFail _ -> cur_tbl
415
        OpGood (upd_nl, new_inst, pri_idx, sec_idx) ->
416
            let tgt_idx = Instance.idx target
417
                upd_cvar = compCV upd_nl
418
                upd_il = Container.add tgt_idx new_inst ini_il
419
                upd_plc = (tgt_idx, pri_idx, sec_idx, move, upd_cvar):ini_plc
420
                upd_tbl = Table upd_nl upd_il upd_cvar upd_plc
421
            in
422
              compareTables cur_tbl upd_tbl
423

    
424
-- | Given the status of the current secondary as a valid new node and
425
-- the current candidate target node, generate the possible moves for
426
-- a instance.
427
possibleMoves :: Bool      -- ^ Whether the secondary node is a valid new node
428
              -> Ndx       -- ^ Target node candidate
429
              -> [IMove]   -- ^ List of valid result moves
430
possibleMoves True tdx =
431
    [ReplaceSecondary tdx,
432
     ReplaceAndFailover tdx,
433
     ReplacePrimary tdx,
434
     FailoverAndReplace tdx]
435

    
436
possibleMoves False tdx =
437
    [ReplaceSecondary tdx,
438
     ReplaceAndFailover tdx]
439

    
440
-- | Compute the best move for a given instance.
441
checkInstanceMove :: [Ndx]             -- ^ Allowed target node indices
442
                  -> Bool              -- ^ Whether disk moves are allowed
443
                  -> Table             -- ^ Original table
444
                  -> Instance.Instance -- ^ Instance to move
445
                  -> Table             -- ^ Best new table for this instance
446
checkInstanceMove nodes_idx disk_moves ini_tbl target =
447
    let
448
        opdx = Instance.pNode target
449
        osdx = Instance.sNode target
450
        nodes = filter (\idx -> idx /= opdx && idx /= osdx) nodes_idx
451
        use_secondary = elem osdx nodes_idx
452
        aft_failover = if use_secondary -- if allowed to failover
453
                       then checkSingleStep ini_tbl target ini_tbl Failover
454
                       else ini_tbl
455
        all_moves = if disk_moves
456
                    then concatMap (possibleMoves use_secondary) nodes
457
                    else []
458
    in
459
      -- iterate over the possible nodes for this instance
460
      foldl' (checkSingleStep ini_tbl target) aft_failover all_moves
461

    
462
-- | Compute the best next move.
463
checkMove :: [Ndx]               -- ^ Allowed target node indices
464
          -> Bool                -- ^ Whether disk moves are allowed
465
          -> Table               -- ^ The current solution
466
          -> [Instance.Instance] -- ^ List of instances still to move
467
          -> Table               -- ^ The new solution
468
checkMove nodes_idx disk_moves ini_tbl victims =
469
    let Table _ _ _ ini_plc = ini_tbl
470
        -- iterate over all instances, computing the best move
471
        best_tbl =
472
            foldl'
473
            (\ step_tbl em ->
474
                 compareTables step_tbl $
475
                 checkInstanceMove nodes_idx disk_moves ini_tbl em)
476
            ini_tbl victims
477
        Table _ _ _ best_plc = best_tbl
478
    in if length best_plc == length ini_plc
479
       then ini_tbl -- no advancement
480
       else best_tbl
481

    
482
-- | Check if we are allowed to go deeper in the balancing
483
doNextBalance :: Table     -- ^ The starting table
484
              -> Int       -- ^ Remaining length
485
              -> Score     -- ^ Score at which to stop
486
              -> Bool      -- ^ The resulting table and commands
487
doNextBalance ini_tbl max_rounds min_score =
488
    let Table _ _ ini_cv ini_plc = ini_tbl
489
        ini_plc_len = length ini_plc
490
    in (max_rounds < 0 || ini_plc_len < max_rounds) && ini_cv > min_score
491

    
492
-- | Run a balance move
493
tryBalance :: Table       -- ^ The starting table
494
           -> Bool        -- ^ Allow disk moves
495
           -> Bool        -- ^ Only evacuate moves
496
           -> Maybe Table -- ^ The resulting table and commands
497
tryBalance ini_tbl disk_moves evac_mode =
498
    let Table ini_nl ini_il ini_cv _ = ini_tbl
499
        all_inst = Container.elems ini_il
500
        all_inst' = if evac_mode
501
                    then let bad_nodes = map Node.idx . filter Node.offline $
502
                                         Container.elems ini_nl
503
                         in filter (\e -> Instance.sNode e `elem` bad_nodes ||
504
                                          Instance.pNode e `elem` bad_nodes)
505
                            all_inst
506
                    else all_inst
507
        reloc_inst = filter Instance.movable all_inst'
508
        node_idx = map Node.idx . filter (not . Node.offline) $
509
                   Container.elems ini_nl
510
        fin_tbl = checkMove node_idx disk_moves ini_tbl reloc_inst
511
        (Table _ _ fin_cv _) = fin_tbl
512
    in
513
      if fin_cv < ini_cv
514
      then Just fin_tbl -- this round made success, return the new table
515
      else Nothing
516

    
517
-- * Allocation functions
518

    
519
-- | Build failure stats out of a list of failures
520
collapseFailures :: [FailMode] -> FailStats
521
collapseFailures flst =
522
    map (\k -> (k, length $ filter (k ==) flst)) [minBound..maxBound]
523

    
524
-- | Update current Allocation solution and failure stats with new
525
-- elements
526
concatAllocs :: AllocSolution -> OpResult Node.AllocElement -> AllocSolution
527
concatAllocs (flst, cntok, sols) (OpFail reason) = (reason:flst, cntok, sols)
528

    
529
concatAllocs (flst, cntok, osols) (OpGood ns@(nl, _, _)) =
530
    let nscore = compCV nl
531
        -- Choose the old or new solution, based on the cluster score
532
        nsols = case osols of
533
                  [] -> [(nscore, ns)]
534
                  (oscore, _):[] ->
535
                      if oscore < nscore
536
                      then osols
537
                      else [(nscore, ns)]
538
                  -- FIXME: here we simply concat to lists with more
539
                  -- than one element; we should instead abort, since
540
                  -- this is not a valid usage of this function
541
                  xs -> (nscore, ns):xs
542
        nsuc = cntok + 1
543
    -- Note: we force evaluation of nsols here in order to keep the
544
    -- memory profile low - we know that we will need nsols for sure
545
    -- in the next cycle, so we force evaluation of nsols, since the
546
    -- foldl' in the caller will only evaluate the tuple, but not the
547
    -- elements of the tuple
548
    in nsols `seq` nsuc `seq` (flst, nsuc, nsols)
549

    
550
-- | Try to allocate an instance on the cluster.
551
tryAlloc :: (Monad m) =>
552
            Node.List         -- ^ The node list
553
         -> Instance.List     -- ^ The instance list
554
         -> Instance.Instance -- ^ The instance to allocate
555
         -> Int               -- ^ Required number of nodes
556
         -> m AllocSolution   -- ^ Possible solution list
557
tryAlloc nl _ inst 2 =
558
    let all_nodes = getOnline nl
559
        all_pairs = liftM2 (,) all_nodes all_nodes
560
        ok_pairs = filter (\(x, y) -> Node.idx x /= Node.idx y) all_pairs
561
        sols = foldl' (\cstate (p, s) ->
562
                           concatAllocs cstate $ allocateOnPair nl inst p s
563
                      ) ([], 0, []) ok_pairs
564
    in return sols
565

    
566
tryAlloc nl _ inst 1 =
567
    let all_nodes = getOnline nl
568
        sols = foldl' (\cstate ->
569
                           concatAllocs cstate . allocateOnSingle nl inst
570
                      ) ([], 0, []) all_nodes
571
    in return sols
572

    
573
tryAlloc _ _ _ reqn = fail $ "Unsupported number of allocation \
574
                             \destinations required (" ++ show reqn ++
575
                                               "), only two supported"
576

    
577
-- | Try to allocate an instance on the cluster.
578
tryReloc :: (Monad m) =>
579
            Node.List       -- ^ The node list
580
         -> Instance.List   -- ^ The instance list
581
         -> Idx             -- ^ The index of the instance to move
582
         -> Int             -- ^ The number of nodes required
583
         -> [Ndx]           -- ^ Nodes which should not be used
584
         -> m AllocSolution -- ^ Solution list
585
tryReloc nl il xid 1 ex_idx =
586
    let all_nodes = getOnline nl
587
        inst = Container.find xid il
588
        ex_idx' = Instance.pNode inst:ex_idx
589
        valid_nodes = filter (not . flip elem ex_idx' . Node.idx) all_nodes
590
        valid_idxes = map Node.idx valid_nodes
591
        sols1 = foldl' (\cstate x ->
592
                            let em = do
593
                                  (mnl, i, _, _) <-
594
                                      applyMove nl inst (ReplaceSecondary x)
595
                                  return (mnl, i, [Container.find x mnl])
596
                            in concatAllocs cstate em
597
                       ) ([], 0, []) valid_idxes
598
    in return sols1
599

    
600
tryReloc _ _ _ reqn _  = fail $ "Unsupported number of relocation \
601
                                \destinations required (" ++ show reqn ++
602
                                                  "), only one supported"
603

    
604
-- | Try to evacuate a list of nodes.
605
tryEvac :: (Monad m) =>
606
            Node.List       -- ^ The node list
607
         -> Instance.List   -- ^ The instance list
608
         -> [Ndx]           -- ^ Nodes to be evacuated
609
         -> m AllocSolution -- ^ Solution list
610
tryEvac nl il ex_ndx =
611
    let ex_nodes = map (`Container.find` nl) ex_ndx
612
        all_insts = nub . concatMap Node.sList $ ex_nodes
613
    in do
614
      (_, sol) <- foldM (\(nl', (_, _, rsols)) idx -> do
615
                           -- FIXME: hardcoded one node here
616
                           (fm, cs, aes) <- tryReloc nl' il idx 1 ex_ndx
617
                           case aes of
618
                             csol@(_, (nl'', _, _)):_ ->
619
                                 return (nl'', (fm, cs, csol:rsols))
620
                             _ -> fail $ "Can't evacuate instance " ++
621
                                  show idx
622
                        ) (nl, ([], 0, [])) all_insts
623
      return sol
624

    
625
-- | Recursively place instances on the cluster until we're out of space
626
iterateAlloc :: Node.List
627
             -> Instance.List
628
             -> Instance.Instance
629
             -> Int
630
             -> [Instance.Instance]
631
             -> Result (FailStats, Node.List, [Instance.Instance])
632
iterateAlloc nl il newinst nreq ixes =
633
      let depth = length ixes
634
          newname = printf "new-%d" depth::String
635
          newidx = length (Container.elems il) + depth
636
          newi2 = Instance.setIdx (Instance.setName newinst newname) newidx
637
      in case tryAlloc nl il newi2 nreq of
638
           Bad s -> Bad s
639
           Ok (errs, _, sols3) ->
640
               case sols3 of
641
                 [] -> Ok (collapseFailures errs, nl, ixes)
642
                 (_, (xnl, xi, _)):[] ->
643
                     iterateAlloc xnl il newinst nreq $! (xi:ixes)
644
                 _ -> Bad "Internal error: multiple solutions for single\
645
                          \ allocation"
646

    
647
tieredAlloc :: Node.List
648
            -> Instance.List
649
            -> Instance.Instance
650
            -> Int
651
            -> [Instance.Instance]
652
            -> Result (FailStats, Node.List, [Instance.Instance])
653
tieredAlloc nl il newinst nreq ixes =
654
    case iterateAlloc nl il newinst nreq ixes of
655
      Bad s -> Bad s
656
      Ok (errs, nl', ixes') ->
657
          case Instance.shrinkByType newinst . fst . last $
658
               sortBy (comparing snd) errs of
659
            Bad _ -> Ok (errs, nl', ixes')
660
            Ok newinst' ->
661
                tieredAlloc nl' il newinst' nreq ixes'
662

    
663
-- * Formatting functions
664

    
665
-- | Given the original and final nodes, computes the relocation description.
666
computeMoves :: Instance.Instance -- ^ The instance to be moved
667
             -> String -- ^ The instance name
668
             -> IMove  -- ^ The move being performed
669
             -> String -- ^ New primary
670
             -> String -- ^ New secondary
671
             -> (String, [String])
672
                -- ^ Tuple of moves and commands list; moves is containing
673
                -- either @/f/@ for failover or @/r:name/@ for replace
674
                -- secondary, while the command list holds gnt-instance
675
                -- commands (without that prefix), e.g \"@failover instance1@\"
676
computeMoves i inam mv c d =
677
    case mv of
678
      Failover -> ("f", [mig])
679
      FailoverAndReplace _ -> (printf "f r:%s" d, [mig, rep d])
680
      ReplaceSecondary _ -> (printf "r:%s" d, [rep d])
681
      ReplaceAndFailover _ -> (printf "r:%s f" c, [rep c, mig])
682
      ReplacePrimary _ -> (printf "f r:%s f" c, [mig, rep c, mig])
683
    where morf = if Instance.running i then "migrate" else "failover"
684
          mig = printf "%s -f %s" morf inam::String
685
          rep n = printf "replace-disks -n %s %s" n inam
686

    
687
-- | Converts a placement to string format.
688
printSolutionLine :: Node.List     -- ^ The node list
689
                  -> Instance.List -- ^ The instance list
690
                  -> Int           -- ^ Maximum node name length
691
                  -> Int           -- ^ Maximum instance name length
692
                  -> Placement     -- ^ The current placement
693
                  -> Int           -- ^ The index of the placement in
694
                                   -- the solution
695
                  -> (String, [String])
696
printSolutionLine nl il nmlen imlen plc pos =
697
    let
698
        pmlen = (2*nmlen + 1)
699
        (i, p, s, mv, c) = plc
700
        inst = Container.find i il
701
        inam = Instance.alias inst
702
        npri = Node.alias $ Container.find p nl
703
        nsec = Node.alias $ Container.find s nl
704
        opri = Node.alias $ Container.find (Instance.pNode inst) nl
705
        osec = Node.alias $ Container.find (Instance.sNode inst) nl
706
        (moves, cmds) =  computeMoves inst inam mv npri nsec
707
        ostr = printf "%s:%s" opri osec::String
708
        nstr = printf "%s:%s" npri nsec::String
709
    in
710
      (printf "  %3d. %-*s %-*s => %-*s %.8f a=%s"
711
       pos imlen inam pmlen ostr
712
       pmlen nstr c moves,
713
       cmds)
714

    
715
-- | Return the instance and involved nodes in an instance move.
716
involvedNodes :: Instance.List -> Placement -> [Ndx]
717
involvedNodes il plc =
718
    let (i, np, ns, _, _) = plc
719
        inst = Container.find i il
720
        op = Instance.pNode inst
721
        os = Instance.sNode inst
722
    in nub [np, ns, op, os]
723

    
724
-- | Inner function for splitJobs, that either appends the next job to
725
-- the current jobset, or starts a new jobset.
726
mergeJobs :: ([JobSet], [Ndx]) -> MoveJob -> ([JobSet], [Ndx])
727
mergeJobs ([], _) n@(ndx, _, _, _) = ([[n]], ndx)
728
mergeJobs (cjs@(j:js), nbuf) n@(ndx, _, _, _)
729
    | null (ndx `intersect` nbuf) = ((n:j):js, ndx ++ nbuf)
730
    | otherwise = ([n]:cjs, ndx)
731

    
732
-- | Break a list of moves into independent groups. Note that this
733
-- will reverse the order of jobs.
734
splitJobs :: [MoveJob] -> [JobSet]
735
splitJobs = fst . foldl mergeJobs ([], [])
736

    
737
-- | Given a list of commands, prefix them with @gnt-instance@ and
738
-- also beautify the display a little.
739
formatJob :: Int -> Int -> (Int, MoveJob) -> [String]
740
formatJob jsn jsl (sn, (_, _, _, cmds)) =
741
    let out =
742
            printf "  echo job %d/%d" jsn sn:
743
            printf "  check":
744
            map ("  gnt-instance " ++) cmds
745
    in if sn == 1
746
       then ["", printf "echo jobset %d, %d jobs" jsn jsl] ++ out
747
       else out
748

    
749
-- | Given a list of commands, prefix them with @gnt-instance@ and
750
-- also beautify the display a little.
751
formatCmds :: [JobSet] -> String
752
formatCmds =
753
    unlines .
754
    concatMap (\(jsn, js) -> concatMap (formatJob jsn (length js))
755
                             (zip [1..] js)) .
756
    zip [1..]
757

    
758
-- | Print the node list.
759
printNodes :: Node.List -> [String] -> String
760
printNodes nl fs =
761
    let fields = case fs of
762
          [] -> Node.defaultFields
763
          "+":rest -> Node.defaultFields ++ rest
764
          _ -> fs
765
        snl = sortBy (comparing Node.idx) (Container.elems nl)
766
        (header, isnum) = unzip $ map Node.showHeader fields
767
    in unlines . map ((:) ' ' .  intercalate " ") $
768
       formatTable (header:map (Node.list fields) snl) isnum
769

    
770
-- | Print the instance list.
771
printInsts :: Node.List -> Instance.List -> String
772
printInsts nl il =
773
    let sil = sortBy (comparing Instance.idx) (Container.elems il)
774
        helper inst = [ if Instance.running inst then "R" else " "
775
                      , Instance.name inst
776
                      , Container.nameOf nl (Instance.pNode inst)
777
                      , let sdx = Instance.sNode inst
778
                        in if sdx == Node.noSecondary
779
                           then  ""
780
                           else Container.nameOf nl sdx
781
                      , printf "%3d" $ Instance.vcpus inst
782
                      , printf "%5d" $ Instance.mem inst
783
                      , printf "%5d" $ Instance.dsk inst `div` 1024
784
                      , printf "%5.3f" lC
785
                      , printf "%5.3f" lM
786
                      , printf "%5.3f" lD
787
                      , printf "%5.3f" lN
788
                      ]
789
            where DynUtil lC lM lD lN = Instance.util inst
790
        header = [ "F", "Name", "Pri_node", "Sec_node", "vcpu", "mem"
791
                 , "dsk", "lCpu", "lMem", "lDsk", "lNet" ]
792
        isnum = False:False:False:False:repeat True
793
    in unlines . map ((:) ' ' . intercalate " ") $
794
       formatTable (header:map helper sil) isnum
795

    
796
-- | Shows statistics for a given node list.
797
printStats :: Node.List -> String
798
printStats nl =
799
    let dcvs = compDetailedCV nl
800
        hd = zip (detailedCVNames ++ repeat "unknown") dcvs
801
        formatted = map (\(header, val) ->
802
                             printf "%s=%.8f" header val::String) hd
803
    in intercalate ", " formatted
804

    
805
-- | Convert a placement into a list of OpCodes (basically a job).
806
iMoveToJob :: Node.List -> Instance.List
807
          -> Idx -> IMove -> [OpCodes.OpCode]
808
iMoveToJob nl il idx move =
809
    let inst = Container.find idx il
810
        iname = Instance.name inst
811
        lookNode  = Just . Container.nameOf nl
812
        opF = if Instance.running inst
813
              then OpCodes.OpMigrateInstance iname True False
814
              else OpCodes.OpFailoverInstance iname False
815
        opR n = OpCodes.OpReplaceDisks iname (lookNode n)
816
                OpCodes.ReplaceNewSecondary [] Nothing
817
    in case move of
818
         Failover -> [ opF ]
819
         ReplacePrimary np -> [ opF, opR np, opF ]
820
         ReplaceSecondary ns -> [ opR ns ]
821
         ReplaceAndFailover np -> [ opR np, opF ]
822
         FailoverAndReplace ns -> [ opF, opR ns ]