Statistics
| Branch: | Tag: | Revision:

root / Ganeti / HTools / Cluster.hs @ f4c0b8c5

History | View | Annotate | Download (31.6 kB)

1
{-| Implementation of cluster-wide logic.
2

    
3
This module holds all pure cluster-logic; I\/O related functionality
4
goes into the "Main" module for the individual binaries.
5

    
6
-}
7

    
8
{-
9

    
10
Copyright (C) 2009 Google Inc.
11

    
12
This program is free software; you can redistribute it and/or modify
13
it under the terms of the GNU General Public License as published by
14
the Free Software Foundation; either version 2 of the License, or
15
(at your option) any later version.
16

    
17
This program is distributed in the hope that it will be useful, but
18
WITHOUT ANY WARRANTY; without even the implied warranty of
19
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
20
General Public License for more details.
21

    
22
You should have received a copy of the GNU General Public License
23
along with this program; if not, write to the Free Software
24
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
25
02110-1301, USA.
26

    
27
-}
28

    
29
module Ganeti.HTools.Cluster
30
    (
31
     -- * Types
32
      AllocSolution
33
    , Table(..)
34
    , CStats(..)
35
    , AllocStats
36
    -- * Generic functions
37
    , totalResources
38
    , computeAllocationDelta
39
    -- * First phase functions
40
    , computeBadItems
41
    -- * Second phase functions
42
    , printSolution
43
    , printSolutionLine
44
    , formatCmds
45
    , involvedNodes
46
    , splitJobs
47
    -- * Display functions
48
    , printNodes
49
    , printInsts
50
    -- * Balacing functions
51
    , checkMove
52
    , doNextBalance
53
    , tryBalance
54
    , compCV
55
    , printStats
56
    , iMoveToJob
57
    -- * IAllocator functions
58
    , tryAlloc
59
    , tryReloc
60
    , tryEvac
61
    , collapseFailures
62
    ) where
63

    
64
import Data.List
65
import Data.Ord (comparing)
66
import Text.Printf (printf)
67
import Control.Monad
68

    
69
import qualified Ganeti.HTools.Container as Container
70
import qualified Ganeti.HTools.Instance as Instance
71
import qualified Ganeti.HTools.Node as Node
72
import Ganeti.HTools.Types
73
import Ganeti.HTools.Utils
74
import qualified Ganeti.OpCodes as OpCodes
75

    
76
-- * Types
77

    
78
-- | Allocation\/relocation solution.
79
type AllocSolution = ([FailMode], Int, [(Score, Node.AllocElement)])
80

    
81
-- | The complete state for the balancing solution
82
data Table = Table Node.List Instance.List Score [Placement]
83
             deriving (Show)
84

    
85
data CStats = CStats { csFmem :: Int    -- ^ Cluster free mem
86
                     , csFdsk :: Int    -- ^ Cluster free disk
87
                     , csAmem :: Int    -- ^ Cluster allocatable mem
88
                     , csAdsk :: Int    -- ^ Cluster allocatable disk
89
                     , csAcpu :: Int    -- ^ Cluster allocatable cpus
90
                     , csMmem :: Int    -- ^ Max node allocatable mem
91
                     , csMdsk :: Int    -- ^ Max node allocatable disk
92
                     , csMcpu :: Int    -- ^ Max node allocatable cpu
93
                     , csImem :: Int    -- ^ Instance used mem
94
                     , csIdsk :: Int    -- ^ Instance used disk
95
                     , csIcpu :: Int    -- ^ Instance used cpu
96
                     , csTmem :: Double -- ^ Cluster total mem
97
                     , csTdsk :: Double -- ^ Cluster total disk
98
                     , csTcpu :: Double -- ^ Cluster total cpus
99
                     , csVcpu :: Int    -- ^ Cluster virtual cpus (if
100
                                        -- node pCpu has been set,
101
                                        -- otherwise -1)
102
                     , csXmem :: Int    -- ^ Unnacounted for mem
103
                     , csNmem :: Int    -- ^ Node own memory
104
                     , csScore :: Score -- ^ The cluster score
105
                     , csNinst :: Int   -- ^ The total number of instances
106
                     }
107

    
108
-- | Currently used, possibly to allocate, unallocable
109
type AllocStats = (RSpec, RSpec, RSpec)
110

    
111
-- * Utility functions
112

    
113
-- | Verifies the N+1 status and return the affected nodes.
114
verifyN1 :: [Node.Node] -> [Node.Node]
115
verifyN1 = filter Node.failN1
116

    
117
{-| Computes the pair of bad nodes and instances.
118

    
119
The bad node list is computed via a simple 'verifyN1' check, and the
120
bad instance list is the list of primary and secondary instances of
121
those nodes.
122

    
123
-}
124
computeBadItems :: Node.List -> Instance.List ->
125
                   ([Node.Node], [Instance.Instance])
126
computeBadItems nl il =
127
  let bad_nodes = verifyN1 $ getOnline nl
128
      bad_instances = map (`Container.find` il) .
129
                      sort . nub $
130
                      concatMap (\ n -> Node.sList n ++ Node.pList n) bad_nodes
131
  in
132
    (bad_nodes, bad_instances)
133

    
134
-- | Zero-initializer for the CStats type
135
emptyCStats :: CStats
136
emptyCStats = CStats 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
137

    
138
-- | Update stats with data from a new node
139
updateCStats :: CStats -> Node.Node -> CStats
140
updateCStats cs node =
141
    let CStats { csFmem = x_fmem, csFdsk = x_fdsk,
142
                 csAmem = x_amem, csAcpu = x_acpu, csAdsk = x_adsk,
143
                 csMmem = x_mmem, csMdsk = x_mdsk, csMcpu = x_mcpu,
144
                 csImem = x_imem, csIdsk = x_idsk, csIcpu = x_icpu,
145
                 csTmem = x_tmem, csTdsk = x_tdsk, csTcpu = x_tcpu,
146
                 csVcpu = x_vcpu,
147
                 csXmem = x_xmem, csNmem = x_nmem, csNinst = x_ninst
148
               }
149
            = cs
150
        inc_amem = Node.fMem node - Node.rMem node
151
        inc_amem' = if inc_amem > 0 then inc_amem else 0
152
        inc_adsk = Node.availDisk node
153
        inc_imem = truncate (Node.tMem node) - Node.nMem node
154
                   - Node.xMem node - Node.fMem node
155
        inc_icpu = Node.uCpu node
156
        inc_idsk = truncate (Node.tDsk node) - Node.fDsk node
157
        inc_vcpu = Node.hiCpu node
158

    
159
    in cs { csFmem = x_fmem + Node.fMem node
160
          , csFdsk = x_fdsk + Node.fDsk node
161
          , csAmem = x_amem + inc_amem'
162
          , csAdsk = x_adsk + inc_adsk
163
          , csAcpu = x_acpu
164
          , csMmem = max x_mmem inc_amem'
165
          , csMdsk = max x_mdsk inc_adsk
166
          , csMcpu = x_mcpu
167
          , csImem = x_imem + inc_imem
168
          , csIdsk = x_idsk + inc_idsk
169
          , csIcpu = x_icpu + inc_icpu
170
          , csTmem = x_tmem + Node.tMem node
171
          , csTdsk = x_tdsk + Node.tDsk node
172
          , csTcpu = x_tcpu + Node.tCpu node
173
          , csVcpu = x_vcpu + inc_vcpu
174
          , csXmem = x_xmem + Node.xMem node
175
          , csNmem = x_nmem + Node.nMem node
176
          , csNinst = x_ninst + length (Node.pList node)
177
          }
178

    
179
-- | Compute the total free disk and memory in the cluster.
180
totalResources :: Node.List -> CStats
181
totalResources nl =
182
    let cs = foldl' updateCStats emptyCStats . Container.elems $ nl
183
    in cs { csScore = compCV nl }
184

    
185
-- | Compute the delta between two cluster state.
186
--
187
-- This is used when doing allocations, to understand better the
188
-- available cluster resources. The return value is a triple of the
189
-- current used values, the delta that was still allocated, and what
190
-- was left unallocated.
191
computeAllocationDelta :: CStats -> CStats -> AllocStats
192
computeAllocationDelta cini cfin =
193
    let CStats {csImem = i_imem, csIdsk = i_idsk, csIcpu = i_icpu} = cini
194
        CStats {csImem = f_imem, csIdsk = f_idsk, csIcpu = f_icpu,
195
                csTmem = t_mem, csTdsk = t_dsk, csVcpu = v_cpu } = cfin
196
        rini = RSpec i_icpu i_imem i_idsk
197
        rfin = RSpec (f_icpu - i_icpu) (f_imem - i_imem) (f_idsk - i_idsk)
198
        un_cpu = v_cpu - f_icpu
199
        runa = RSpec un_cpu (truncate t_mem - f_imem) (truncate t_dsk - f_idsk)
200
    in (rini, rfin, runa)
201

    
202
-- | The names of the individual elements in the CV list
203
detailedCVNames :: [String]
204
detailedCVNames = [ "free_mem_cv"
205
                  , "free_disk_cv"
206
                  , "n1_cnt"
207
                  , "reserved_mem_cv"
208
                  , "offline_all_cnt"
209
                  , "offline_pri_cnt"
210
                  , "vcpu_ratio_cv"
211
                  , "cpu_load_cv"
212
                  , "mem_load_cv"
213
                  , "disk_load_cv"
214
                  , "net_load_cv"
215
                  , "pri_tags_score"
216
                  ]
217

    
218
-- | Compute the mem and disk covariance.
219
compDetailedCV :: Node.List -> [Double]
220
compDetailedCV nl =
221
    let
222
        all_nodes = Container.elems nl
223
        (offline, nodes) = partition Node.offline all_nodes
224
        mem_l = map Node.pMem nodes
225
        dsk_l = map Node.pDsk nodes
226
        -- metric: memory covariance
227
        mem_cv = varianceCoeff mem_l
228
        -- metric: disk covariance
229
        dsk_cv = varianceCoeff dsk_l
230
        n1_l = length $ filter Node.failN1 nodes
231
        -- metric: count of failN1 nodes
232
        n1_score = fromIntegral n1_l::Double
233
        res_l = map Node.pRem nodes
234
        -- metric: reserved memory covariance
235
        res_cv = varianceCoeff res_l
236
        -- offline instances metrics
237
        offline_ipri = sum . map (length . Node.pList) $ offline
238
        offline_isec = sum . map (length . Node.sList) $ offline
239
        -- metric: count of instances on offline nodes
240
        off_score = fromIntegral (offline_ipri + offline_isec)::Double
241
        -- metric: count of primary instances on offline nodes (this
242
        -- helps with evacuation/failover of primary instances on
243
        -- 2-node clusters with one node offline)
244
        off_pri_score = fromIntegral offline_ipri::Double
245
        cpu_l = map Node.pCpu nodes
246
        -- metric: covariance of vcpu/pcpu ratio
247
        cpu_cv = varianceCoeff cpu_l
248
        -- metrics: covariance of cpu, memory, disk and network load
249
        (c_load, m_load, d_load, n_load) = unzip4 $
250
            map (\n ->
251
                     let DynUtil c1 m1 d1 n1 = Node.utilLoad n
252
                         DynUtil c2 m2 d2 n2 = Node.utilPool n
253
                     in (c1/c2, m1/m2, d1/d2, n1/n2)
254
                ) nodes
255
        -- metric: conflicting instance count
256
        pri_tags_inst = sum $ map Node.conflictingPrimaries nodes
257
        pri_tags_score = fromIntegral pri_tags_inst::Double
258
    in [ mem_cv, dsk_cv, n1_score, res_cv, off_score, off_pri_score, cpu_cv
259
       , varianceCoeff c_load, varianceCoeff m_load
260
       , varianceCoeff d_load, varianceCoeff n_load
261
       , pri_tags_score ]
262

    
263
-- | Compute the /total/ variance.
264
compCV :: Node.List -> Double
265
compCV = sum . compDetailedCV
266

    
267
-- | Compute online nodes from a Node.List
268
getOnline :: Node.List -> [Node.Node]
269
getOnline = filter (not . Node.offline) . Container.elems
270

    
271
-- * hbal functions
272

    
273
-- | Compute best table. Note that the ordering of the arguments is important.
274
compareTables :: Table -> Table -> Table
275
compareTables a@(Table _ _ a_cv _) b@(Table _ _ b_cv _ ) =
276
    if a_cv > b_cv then b else a
277

    
278
-- | Applies an instance move to a given node list and instance.
279
applyMove :: Node.List -> Instance.Instance
280
          -> IMove -> OpResult (Node.List, Instance.Instance, Ndx, Ndx)
281
-- Failover (f)
282
applyMove nl inst Failover =
283
    let old_pdx = Instance.pNode inst
284
        old_sdx = Instance.sNode inst
285
        old_p = Container.find old_pdx nl
286
        old_s = Container.find old_sdx nl
287
        int_p = Node.removePri old_p inst
288
        int_s = Node.removeSec old_s inst
289
        new_nl = do -- Maybe monad
290
          new_p <- Node.addPri int_s inst
291
          new_s <- Node.addSec int_p inst old_sdx
292
          let new_inst = Instance.setBoth inst old_sdx old_pdx
293
          return (Container.addTwo old_pdx new_s old_sdx new_p nl,
294
                  new_inst, old_sdx, old_pdx)
295
    in new_nl
296

    
297
-- Replace the primary (f:, r:np, f)
298
applyMove nl inst (ReplacePrimary new_pdx) =
299
    let old_pdx = Instance.pNode inst
300
        old_sdx = Instance.sNode inst
301
        old_p = Container.find old_pdx nl
302
        old_s = Container.find old_sdx nl
303
        tgt_n = Container.find new_pdx nl
304
        int_p = Node.removePri old_p inst
305
        int_s = Node.removeSec old_s inst
306
        new_nl = do -- Maybe monad
307
          -- check that the current secondary can host the instance
308
          -- during the migration
309
          tmp_s <- Node.addPri int_s inst
310
          let tmp_s' = Node.removePri tmp_s inst
311
          new_p <- Node.addPri tgt_n inst
312
          new_s <- Node.addSec tmp_s' inst new_pdx
313
          let new_inst = Instance.setPri inst new_pdx
314
          return (Container.add new_pdx new_p $
315
                  Container.addTwo old_pdx int_p old_sdx new_s nl,
316
                  new_inst, new_pdx, old_sdx)
317
    in new_nl
318

    
319
-- Replace the secondary (r:ns)
320
applyMove nl inst (ReplaceSecondary new_sdx) =
321
    let old_pdx = Instance.pNode inst
322
        old_sdx = Instance.sNode inst
323
        old_s = Container.find old_sdx nl
324
        tgt_n = Container.find new_sdx nl
325
        int_s = Node.removeSec old_s inst
326
        new_inst = Instance.setSec inst new_sdx
327
        new_nl = Node.addSec tgt_n inst old_pdx >>=
328
                 \new_s -> return (Container.addTwo new_sdx
329
                                   new_s old_sdx int_s nl,
330
                                   new_inst, old_pdx, new_sdx)
331
    in new_nl
332

    
333
-- Replace the secondary and failover (r:np, f)
334
applyMove nl inst (ReplaceAndFailover new_pdx) =
335
    let old_pdx = Instance.pNode inst
336
        old_sdx = Instance.sNode inst
337
        old_p = Container.find old_pdx nl
338
        old_s = Container.find old_sdx nl
339
        tgt_n = Container.find new_pdx nl
340
        int_p = Node.removePri old_p inst
341
        int_s = Node.removeSec old_s inst
342
        new_nl = do -- Maybe monad
343
          new_p <- Node.addPri tgt_n inst
344
          new_s <- Node.addSec int_p inst new_pdx
345
          let new_inst = Instance.setBoth inst new_pdx old_pdx
346
          return (Container.add new_pdx new_p $
347
                  Container.addTwo old_pdx new_s old_sdx int_s nl,
348
                  new_inst, new_pdx, old_pdx)
349
    in new_nl
350

    
351
-- Failver and replace the secondary (f, r:ns)
352
applyMove nl inst (FailoverAndReplace new_sdx) =
353
    let old_pdx = Instance.pNode inst
354
        old_sdx = Instance.sNode inst
355
        old_p = Container.find old_pdx nl
356
        old_s = Container.find old_sdx nl
357
        tgt_n = Container.find new_sdx nl
358
        int_p = Node.removePri old_p inst
359
        int_s = Node.removeSec old_s inst
360
        new_nl = do -- Maybe monad
361
          new_p <- Node.addPri int_s inst
362
          new_s <- Node.addSec tgt_n inst old_sdx
363
          let new_inst = Instance.setBoth inst old_sdx new_sdx
364
          return (Container.add new_sdx new_s $
365
                  Container.addTwo old_sdx new_p old_pdx int_p nl,
366
                  new_inst, old_sdx, new_sdx)
367
    in new_nl
368

    
369
-- | Tries to allocate an instance on one given node.
370
allocateOnSingle :: Node.List -> Instance.Instance -> Node.Node
371
                 -> OpResult Node.AllocElement
372
allocateOnSingle nl inst p =
373
    let new_pdx = Node.idx p
374
        new_inst = Instance.setBoth inst new_pdx Node.noSecondary
375
        new_nl = Node.addPri p inst >>= \new_p ->
376
                 return (Container.add new_pdx new_p nl, new_inst, [new_p])
377
    in new_nl
378

    
379
-- | Tries to allocate an instance on a given pair of nodes.
380
allocateOnPair :: Node.List -> Instance.Instance -> Node.Node -> Node.Node
381
               -> OpResult Node.AllocElement
382
allocateOnPair nl inst tgt_p tgt_s =
383
    let new_pdx = Node.idx tgt_p
384
        new_sdx = Node.idx tgt_s
385
        new_nl = do -- Maybe monad
386
          new_p <- Node.addPri tgt_p inst
387
          new_s <- Node.addSec tgt_s inst new_pdx
388
          let new_inst = Instance.setBoth inst new_pdx new_sdx
389
          return (Container.addTwo new_pdx new_p new_sdx new_s nl, new_inst,
390
                 [new_p, new_s])
391
    in new_nl
392

    
393
-- | Tries to perform an instance move and returns the best table
394
-- between the original one and the new one.
395
checkSingleStep :: Table -- ^ The original table
396
                -> Instance.Instance -- ^ The instance to move
397
                -> Table -- ^ The current best table
398
                -> IMove -- ^ The move to apply
399
                -> Table -- ^ The final best table
400
checkSingleStep ini_tbl target cur_tbl move =
401
    let
402
        Table ini_nl ini_il _ ini_plc = ini_tbl
403
        tmp_resu = applyMove ini_nl target move
404
    in
405
      case tmp_resu of
406
        OpFail _ -> cur_tbl
407
        OpGood (upd_nl, new_inst, pri_idx, sec_idx) ->
408
            let tgt_idx = Instance.idx target
409
                upd_cvar = compCV upd_nl
410
                upd_il = Container.add tgt_idx new_inst ini_il
411
                upd_plc = (tgt_idx, pri_idx, sec_idx, move, upd_cvar):ini_plc
412
                upd_tbl = Table upd_nl upd_il upd_cvar upd_plc
413
            in
414
              compareTables cur_tbl upd_tbl
415

    
416
-- | Given the status of the current secondary as a valid new node and
417
-- the current candidate target node, generate the possible moves for
418
-- a instance.
419
possibleMoves :: Bool      -- ^ Whether the secondary node is a valid new node
420
              -> Ndx       -- ^ Target node candidate
421
              -> [IMove]   -- ^ List of valid result moves
422
possibleMoves True tdx =
423
    [ReplaceSecondary tdx,
424
     ReplaceAndFailover tdx,
425
     ReplacePrimary tdx,
426
     FailoverAndReplace tdx]
427

    
428
possibleMoves False tdx =
429
    [ReplaceSecondary tdx,
430
     ReplaceAndFailover tdx]
431

    
432
-- | Compute the best move for a given instance.
433
checkInstanceMove :: [Ndx]             -- ^ Allowed target node indices
434
                  -> Bool              -- ^ Whether disk moves are allowed
435
                  -> Table             -- ^ Original table
436
                  -> Instance.Instance -- ^ Instance to move
437
                  -> Table             -- ^ Best new table for this instance
438
checkInstanceMove nodes_idx disk_moves ini_tbl target =
439
    let
440
        opdx = Instance.pNode target
441
        osdx = Instance.sNode target
442
        nodes = filter (\idx -> idx /= opdx && idx /= osdx) nodes_idx
443
        use_secondary = elem osdx nodes_idx
444
        aft_failover = if use_secondary -- if allowed to failover
445
                       then checkSingleStep ini_tbl target ini_tbl Failover
446
                       else ini_tbl
447
        all_moves = if disk_moves
448
                    then concatMap (possibleMoves use_secondary) nodes
449
                    else []
450
    in
451
      -- iterate over the possible nodes for this instance
452
      foldl' (checkSingleStep ini_tbl target) aft_failover all_moves
453

    
454
-- | Compute the best next move.
455
checkMove :: [Ndx]               -- ^ Allowed target node indices
456
          -> Bool                -- ^ Whether disk moves are allowed
457
          -> Table               -- ^ The current solution
458
          -> [Instance.Instance] -- ^ List of instances still to move
459
          -> Table               -- ^ The new solution
460
checkMove nodes_idx disk_moves ini_tbl victims =
461
    let Table _ _ _ ini_plc = ini_tbl
462
        -- iterate over all instances, computing the best move
463
        best_tbl =
464
            foldl'
465
            (\ step_tbl em ->
466
                 compareTables step_tbl $
467
                 checkInstanceMove nodes_idx disk_moves ini_tbl em)
468
            ini_tbl victims
469
        Table _ _ _ best_plc = best_tbl
470
    in if length best_plc == length ini_plc
471
       then ini_tbl -- no advancement
472
       else best_tbl
473

    
474
-- | Check if we are allowed to go deeper in the balancing
475

    
476
doNextBalance :: Table       -- ^ The starting table
477
              -> Int         -- ^ Remaining length
478
              -> Score       -- ^ Score at which to stop
479
              -> Bool -- ^ The resulting table and commands
480
doNextBalance ini_tbl max_rounds min_score =
481
    let Table _ _ ini_cv ini_plc = ini_tbl
482
        ini_plc_len = length ini_plc
483
    in (max_rounds < 0 || ini_plc_len < max_rounds) && ini_cv > min_score
484

    
485
-- | Run a balance move
486

    
487
tryBalance :: Table       -- ^ The starting table
488
           -> Bool        -- ^ Allow disk moves
489
           -> Bool        -- ^ Only evacuate moves
490
           -> Maybe Table -- ^ The resulting table and commands
491
tryBalance ini_tbl disk_moves evac_mode =
492
    let Table ini_nl ini_il ini_cv _ = ini_tbl
493
        all_inst = Container.elems ini_il
494
        all_inst' = if evac_mode
495
                    then let bad_nodes = map Node.idx . filter Node.offline $
496
                                         Container.elems ini_nl
497
                         in filter (\e -> Instance.sNode e `elem` bad_nodes ||
498
                                          Instance.pNode e `elem` bad_nodes)
499
                            all_inst
500
                    else all_inst
501
        reloc_inst = filter Instance.movable all_inst'
502
        node_idx = map Node.idx . filter (not . Node.offline) $
503
                   Container.elems ini_nl
504
        fin_tbl = checkMove node_idx disk_moves ini_tbl reloc_inst
505
        (Table _ _ fin_cv _) = fin_tbl
506
    in
507
      if fin_cv < ini_cv
508
      then Just fin_tbl -- this round made success, return the new table
509
      else Nothing
510

    
511
-- * Allocation functions
512

    
513
-- | Build failure stats out of a list of failures
514
collapseFailures :: [FailMode] -> FailStats
515
collapseFailures flst =
516
    map (\k -> (k, length $ filter (k ==) flst)) [minBound..maxBound]
517

    
518
-- | Update current Allocation solution and failure stats with new
519
-- elements
520
concatAllocs :: AllocSolution -> OpResult Node.AllocElement -> AllocSolution
521
concatAllocs (flst, cntok, sols) (OpFail reason) = (reason:flst, cntok, sols)
522

    
523
concatAllocs (flst, cntok, osols) (OpGood ns@(nl, _, _)) =
524
    let nscore = compCV nl
525
        -- Choose the old or new solution, based on the cluster score
526
        nsols = case osols of
527
                  [] -> [(nscore, ns)]
528
                  (oscore, _):[] ->
529
                      if oscore < nscore
530
                      then osols
531
                      else [(nscore, ns)]
532
                  -- FIXME: here we simply concat to lists with more
533
                  -- than one element; we should instead abort, since
534
                  -- this is not a valid usage of this function
535
                  xs -> (nscore, ns):xs
536
        nsuc = cntok + 1
537
    -- Note: we force evaluation of nsols here in order to keep the
538
    -- memory profile low - we know that we will need nsols for sure
539
    -- in the next cycle, so we force evaluation of nsols, since the
540
    -- foldl' in the caller will only evaluate the tuple, but not the
541
    -- elements of the tuple
542
    in nsols `seq` nsuc `seq` (flst, nsuc, nsols)
543

    
544
-- | Try to allocate an instance on the cluster.
545
tryAlloc :: (Monad m) =>
546
            Node.List         -- ^ The node list
547
         -> Instance.List     -- ^ The instance list
548
         -> Instance.Instance -- ^ The instance to allocate
549
         -> Int               -- ^ Required number of nodes
550
         -> m AllocSolution   -- ^ Possible solution list
551
tryAlloc nl _ inst 2 =
552
    let all_nodes = getOnline nl
553
        all_pairs = liftM2 (,) all_nodes all_nodes
554
        ok_pairs = filter (\(x, y) -> Node.idx x /= Node.idx y) all_pairs
555
        sols = foldl' (\cstate (p, s) ->
556
                           concatAllocs cstate $ allocateOnPair nl inst p s
557
                      ) ([], 0, []) ok_pairs
558
    in return sols
559

    
560
tryAlloc nl _ inst 1 =
561
    let all_nodes = getOnline nl
562
        sols = foldl' (\cstate ->
563
                           concatAllocs cstate . allocateOnSingle nl inst
564
                      ) ([], 0, []) all_nodes
565
    in return sols
566

    
567
tryAlloc _ _ _ reqn = fail $ "Unsupported number of allocation \
568
                             \destinations required (" ++ show reqn ++
569
                                               "), only two supported"
570

    
571
-- | Try to allocate an instance on the cluster.
572
tryReloc :: (Monad m) =>
573
            Node.List       -- ^ The node list
574
         -> Instance.List   -- ^ The instance list
575
         -> Idx             -- ^ The index of the instance to move
576
         -> Int             -- ^ The number of nodes required
577
         -> [Ndx]           -- ^ Nodes which should not be used
578
         -> m AllocSolution -- ^ Solution list
579
tryReloc nl il xid 1 ex_idx =
580
    let all_nodes = getOnline nl
581
        inst = Container.find xid il
582
        ex_idx' = Instance.pNode inst:ex_idx
583
        valid_nodes = filter (not . flip elem ex_idx' . Node.idx) all_nodes
584
        valid_idxes = map Node.idx valid_nodes
585
        sols1 = foldl' (\cstate x ->
586
                            let em = do
587
                                  (mnl, i, _, _) <-
588
                                      applyMove nl inst (ReplaceSecondary x)
589
                                  return (mnl, i, [Container.find x mnl])
590
                            in concatAllocs cstate em
591
                       ) ([], 0, []) valid_idxes
592
    in return sols1
593

    
594
tryReloc _ _ _ reqn _  = fail $ "Unsupported number of relocation \
595
                                \destinations required (" ++ show reqn ++
596
                                                  "), only one supported"
597

    
598
-- | Try to allocate an instance on the cluster.
599
tryEvac :: (Monad m) =>
600
            Node.List       -- ^ The node list
601
         -> Instance.List   -- ^ The instance list
602
         -> [Ndx]           -- ^ Nodes to be evacuated
603
         -> m AllocSolution -- ^ Solution list
604
tryEvac nl il ex_ndx =
605
    let ex_nodes = map (`Container.find` nl) ex_ndx
606
        all_insts = nub . concatMap Node.sList $ ex_nodes
607
    in do
608
      (_, sol) <- foldM (\(nl', (_, _, rsols)) idx -> do
609
                           -- FIXME: hardcoded one node here
610
                           (fm, cs, aes) <- tryReloc nl' il idx 1 ex_ndx
611
                           case aes of
612
                             csol@(_, (nl'', _, _)):_ ->
613
                                 return (nl'', (fm, cs, csol:rsols))
614
                             _ -> fail $ "Can't evacuate instance " ++
615
                                  show idx
616
                        ) (nl, ([], 0, [])) all_insts
617
      return sol
618

    
619
-- * Formatting functions
620

    
621
-- | Given the original and final nodes, computes the relocation description.
622
computeMoves :: Instance.Instance -- ^ The instance to be moved
623
             -> String -- ^ The instance name
624
             -> IMove  -- ^ The move being performed
625
             -> String -- ^ New primary
626
             -> String -- ^ New secondary
627
             -> (String, [String])
628
                -- ^ Tuple of moves and commands list; moves is containing
629
                -- either @/f/@ for failover or @/r:name/@ for replace
630
                -- secondary, while the command list holds gnt-instance
631
                -- commands (without that prefix), e.g \"@failover instance1@\"
632
computeMoves i inam mv c d =
633
    case mv of
634
      Failover -> ("f", [mig])
635
      FailoverAndReplace _ -> (printf "f r:%s" d, [mig, rep d])
636
      ReplaceSecondary _ -> (printf "r:%s" d, [rep d])
637
      ReplaceAndFailover _ -> (printf "r:%s f" c, [rep c, mig])
638
      ReplacePrimary _ -> (printf "f r:%s f" c, [mig, rep c, mig])
639
    where morf = if Instance.running i then "migrate" else "failover"
640
          mig = printf "%s -f %s" morf inam::String
641
          rep n = printf "replace-disks -n %s %s" n inam
642

    
643
-- | Converts a placement to string format.
644
printSolutionLine :: Node.List     -- ^ The node list
645
                  -> Instance.List -- ^ The instance list
646
                  -> Int           -- ^ Maximum node name length
647
                  -> Int           -- ^ Maximum instance name length
648
                  -> Placement     -- ^ The current placement
649
                  -> Int           -- ^ The index of the placement in
650
                                   -- the solution
651
                  -> (String, [String])
652
printSolutionLine nl il nmlen imlen plc pos =
653
    let
654
        pmlen = (2*nmlen + 1)
655
        (i, p, s, mv, c) = plc
656
        inst = Container.find i il
657
        inam = Instance.name inst
658
        npri = Container.nameOf nl p
659
        nsec = Container.nameOf nl s
660
        opri = Container.nameOf nl $ Instance.pNode inst
661
        osec = Container.nameOf nl $ Instance.sNode inst
662
        (moves, cmds) =  computeMoves inst inam mv npri nsec
663
        ostr = printf "%s:%s" opri osec::String
664
        nstr = printf "%s:%s" npri nsec::String
665
    in
666
      (printf "  %3d. %-*s %-*s => %-*s %.8f a=%s"
667
       pos imlen inam pmlen ostr
668
       pmlen nstr c moves,
669
       cmds)
670

    
671
-- | Return the instance and involved nodes in an instance move.
672
involvedNodes :: Instance.List -> Placement -> [Ndx]
673
involvedNodes il plc =
674
    let (i, np, ns, _, _) = plc
675
        inst = Container.find i il
676
        op = Instance.pNode inst
677
        os = Instance.sNode inst
678
    in nub [np, ns, op, os]
679

    
680
-- | Inner function for splitJobs, that either appends the next job to
681
-- the current jobset, or starts a new jobset.
682
mergeJobs :: ([JobSet], [Ndx]) -> MoveJob -> ([JobSet], [Ndx])
683
mergeJobs ([], _) n@(ndx, _, _, _) = ([[n]], ndx)
684
mergeJobs (cjs@(j:js), nbuf) n@(ndx, _, _, _)
685
    | null (ndx `intersect` nbuf) = ((n:j):js, ndx ++ nbuf)
686
    | otherwise = ([n]:cjs, ndx)
687

    
688
-- | Break a list of moves into independent groups. Note that this
689
-- will reverse the order of jobs.
690
splitJobs :: [MoveJob] -> [JobSet]
691
splitJobs = fst . foldl mergeJobs ([], [])
692

    
693
-- | Given a list of commands, prefix them with @gnt-instance@ and
694
-- also beautify the display a little.
695
formatJob :: Int -> Int -> (Int, MoveJob) -> [String]
696
formatJob jsn jsl (sn, (_, _, _, cmds)) =
697
    let out =
698
            printf "  echo job %d/%d" jsn sn:
699
            printf "  check":
700
            map ("  gnt-instance " ++) cmds
701
    in if sn == 1
702
       then ["", printf "echo jobset %d, %d jobs" jsn jsl] ++ out
703
       else out
704

    
705
-- | Given a list of commands, prefix them with @gnt-instance@ and
706
-- also beautify the display a little.
707
formatCmds :: [JobSet] -> String
708
formatCmds =
709
    unlines .
710
    concatMap (\(jsn, js) -> concatMap (formatJob jsn (length js))
711
                             (zip [1..] js)) .
712
    zip [1..]
713

    
714
-- | Converts a solution to string format.
715
printSolution :: Node.List
716
              -> Instance.List
717
              -> [Placement]
718
              -> ([String], [[String]])
719
printSolution nl il sol =
720
    let
721
        nmlen = Container.maxNameLen nl
722
        imlen = Container.maxNameLen il
723
    in
724
      unzip $ zipWith (printSolutionLine nl il nmlen imlen) sol [1..]
725

    
726
-- | Print the node list.
727
printNodes :: Node.List -> [String] -> String
728
printNodes nl fs =
729
    let fields = if null fs
730
                 then Node.defaultFields
731
                 else fs
732
        snl = sortBy (comparing Node.idx) (Container.elems nl)
733
        (header, isnum) = unzip $ map Node.showHeader fields
734
    in unlines . map ((:) ' ' .  intercalate " ") $
735
       formatTable (header:map (Node.list fields) snl) isnum
736

    
737
-- | Print the instance list.
738
printInsts :: Node.List -> Instance.List -> String
739
printInsts nl il =
740
    let sil = sortBy (comparing Instance.idx) (Container.elems il)
741
        helper inst = [ if Instance.running inst then "R" else " "
742
                      , Instance.name inst
743
                      , Container.nameOf nl (Instance.pNode inst)
744
                      , let sdx = Instance.sNode inst
745
                        in if sdx == Node.noSecondary
746
                           then  ""
747
                           else Container.nameOf nl sdx
748
                      , printf "%3d" $ Instance.vcpus inst
749
                      , printf "%5d" $ Instance.mem inst
750
                      , printf "%5d" $ Instance.dsk inst `div` 1024
751
                      , printf "%5.3f" lC
752
                      , printf "%5.3f" lM
753
                      , printf "%5.3f" lD
754
                      , printf "%5.3f" lN
755
                      ]
756
            where DynUtil lC lM lD lN = Instance.util inst
757
        header = [ "F", "Name", "Pri_node", "Sec_node", "vcpu", "mem"
758
                 , "dsk", "lCpu", "lMem", "lDsk", "lNet" ]
759
        isnum = False:False:False:False:repeat True
760
    in unlines . map ((:) ' ' . intercalate " ") $
761
       formatTable (header:map helper sil) isnum
762

    
763
-- | Shows statistics for a given node list.
764
printStats :: Node.List -> String
765
printStats nl =
766
    let dcvs = compDetailedCV nl
767
        hd = zip (detailedCVNames ++ repeat "unknown") dcvs
768
        formatted = map (\(header, val) ->
769
                             printf "%s=%.8f" header val::String) hd
770
    in intercalate ", " formatted
771

    
772
-- | Convert a placement into a list of OpCodes (basically a job).
773
iMoveToJob :: String -> Node.List -> Instance.List
774
          -> Idx -> IMove -> [OpCodes.OpCode]
775
iMoveToJob csf nl il idx move =
776
    let inst = Container.find idx il
777
        iname = Instance.name inst ++ csf
778
        lookNode n = Just (Container.nameOf nl n ++ csf)
779
        opF = if Instance.running inst
780
              then OpCodes.OpMigrateInstance iname True False
781
              else OpCodes.OpFailoverInstance iname False
782
        opR n = OpCodes.OpReplaceDisks iname (lookNode n)
783
                OpCodes.ReplaceNewSecondary [] Nothing
784
    in case move of
785
         Failover -> [ opF ]
786
         ReplacePrimary np -> [ opF, opR np, opF ]
787
         ReplaceSecondary ns -> [ opR ns ]
788
         ReplaceAndFailover np -> [ opR np, opF ]
789
         FailoverAndReplace ns -> [ opF, opR ns ]