Statistics
| Branch: | Tag: | Revision:

root / Ganeti / HTools / Cluster.hs @ 86ecce4a

History | View | Annotate | Download (30.8 kB)

1
{-| Implementation of cluster-wide logic.
2

    
3
This module holds all pure cluster-logic; I\/O related functionality
4
goes into the "Main" module for the individual binaries.
5

    
6
-}
7

    
8
{-
9

    
10
Copyright (C) 2009 Google Inc.
11

    
12
This program is free software; you can redistribute it and/or modify
13
it under the terms of the GNU General Public License as published by
14
the Free Software Foundation; either version 2 of the License, or
15
(at your option) any later version.
16

    
17
This program is distributed in the hope that it will be useful, but
18
WITHOUT ANY WARRANTY; without even the implied warranty of
19
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
20
General Public License for more details.
21

    
22
You should have received a copy of the GNU General Public License
23
along with this program; if not, write to the Free Software
24
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
25
02110-1301, USA.
26

    
27
-}
28

    
29
module Ganeti.HTools.Cluster
30
    (
31
     -- * Types
32
      AllocSolution
33
    , Table(..)
34
    , CStats(..)
35
    -- * Generic functions
36
    , totalResources
37
    -- * First phase functions
38
    , computeBadItems
39
    -- * Second phase functions
40
    , printSolution
41
    , printSolutionLine
42
    , formatCmds
43
    , involvedNodes
44
    , splitJobs
45
    -- * Display functions
46
    , printNodes
47
    , printInsts
48
    -- * Balacing functions
49
    , checkMove
50
    , doNextBalance
51
    , tryBalance
52
    , compCV
53
    , printStats
54
    , iMoveToJob
55
    -- * IAllocator functions
56
    , tryAlloc
57
    , tryReloc
58
    , tryEvac
59
    , collapseFailures
60
    ) where
61

    
62
import Data.List
63
import Data.Ord (comparing)
64
import Text.Printf (printf)
65
import Control.Monad
66

    
67
import qualified Ganeti.HTools.Container as Container
68
import qualified Ganeti.HTools.Instance as Instance
69
import qualified Ganeti.HTools.Node as Node
70
import Ganeti.HTools.Types
71
import Ganeti.HTools.Utils
72
import qualified Ganeti.OpCodes as OpCodes
73

    
74
-- * Types
75

    
76
-- | Allocation\/relocation solution.
77
type AllocSolution = ([FailMode], Int, [(Score, Node.AllocElement)])
78

    
79
-- | The complete state for the balancing solution
80
data Table = Table Node.List Instance.List Score [Placement]
81
             deriving (Show)
82

    
83
data CStats = CStats { csFmem :: Int    -- ^ Cluster free mem
84
                     , csFdsk :: Int    -- ^ Cluster free disk
85
                     , csAmem :: Int    -- ^ Cluster allocatable mem
86
                     , csAdsk :: Int    -- ^ Cluster allocatable disk
87
                     , csAcpu :: Int    -- ^ Cluster allocatable cpus
88
                     , csMmem :: Int    -- ^ Max node allocatable mem
89
                     , csMdsk :: Int    -- ^ Max node allocatable disk
90
                     , csMcpu :: Int    -- ^ Max node allocatable cpu
91
                     , csImem :: Int    -- ^ Instance used mem
92
                     , csIdsk :: Int    -- ^ Instance used disk
93
                     , csIcpu :: Int    -- ^ Instance used cpu
94
                     , csTmem :: Double -- ^ Cluster total mem
95
                     , csTdsk :: Double -- ^ Cluster total disk
96
                     , csTcpu :: Double -- ^ Cluster total cpus
97
                     , csVcpu :: Int    -- ^ Cluster virtual cpus (if
98
                                        -- node pCpu has been set,
99
                                        -- otherwise -1)
100
                     , csXmem :: Int    -- ^ Unnacounted for mem
101
                     , csNmem :: Int    -- ^ Node own memory
102
                     , csScore :: Score -- ^ The cluster score
103
                     , csNinst :: Int   -- ^ The total number of instances
104
                     }
105

    
106
-- * Utility functions
107

    
108
-- | Verifies the N+1 status and return the affected nodes.
109
verifyN1 :: [Node.Node] -> [Node.Node]
110
verifyN1 = filter Node.failN1
111

    
112
{-| Computes the pair of bad nodes and instances.
113

    
114
The bad node list is computed via a simple 'verifyN1' check, and the
115
bad instance list is the list of primary and secondary instances of
116
those nodes.
117

    
118
-}
119
computeBadItems :: Node.List -> Instance.List ->
120
                   ([Node.Node], [Instance.Instance])
121
computeBadItems nl il =
122
  let bad_nodes = verifyN1 $ getOnline nl
123
      bad_instances = map (`Container.find` il) .
124
                      sort . nub $
125
                      concatMap (\ n -> Node.sList n ++ Node.pList n) bad_nodes
126
  in
127
    (bad_nodes, bad_instances)
128

    
129
-- | Zero-initializer for the CStats type
130
emptyCStats :: CStats
131
emptyCStats = CStats 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
132

    
133
-- | Update stats with data from a new node
134
updateCStats :: CStats -> Node.Node -> CStats
135
updateCStats cs node =
136
    let CStats { csFmem = x_fmem, csFdsk = x_fdsk,
137
                 csAmem = x_amem, csAcpu = x_acpu, csAdsk = x_adsk,
138
                 csMmem = x_mmem, csMdsk = x_mdsk, csMcpu = x_mcpu,
139
                 csImem = x_imem, csIdsk = x_idsk, csIcpu = x_icpu,
140
                 csTmem = x_tmem, csTdsk = x_tdsk, csTcpu = x_tcpu,
141
                 csVcpu = x_vcpu,
142
                 csXmem = x_xmem, csNmem = x_nmem, csNinst = x_ninst
143
               }
144
            = cs
145
        inc_amem = Node.fMem node - Node.rMem node
146
        inc_amem' = if inc_amem > 0 then inc_amem else 0
147
        inc_adsk = Node.availDisk node
148
        inc_imem = truncate (Node.tMem node) - Node.nMem node
149
                   - Node.xMem node - Node.fMem node
150
        inc_icpu = Node.uCpu node
151
        inc_idsk = truncate (Node.tDsk node) - Node.fDsk node
152
        inc_vcpu = Node.hiCpu node
153

    
154
    in cs { csFmem = x_fmem + Node.fMem node
155
          , csFdsk = x_fdsk + Node.fDsk node
156
          , csAmem = x_amem + inc_amem'
157
          , csAdsk = x_adsk + inc_adsk
158
          , csAcpu = x_acpu
159
          , csMmem = max x_mmem inc_amem'
160
          , csMdsk = max x_mdsk inc_adsk
161
          , csMcpu = x_mcpu
162
          , csImem = x_imem + inc_imem
163
          , csIdsk = x_idsk + inc_idsk
164
          , csIcpu = x_icpu + inc_icpu
165
          , csTmem = x_tmem + Node.tMem node
166
          , csTdsk = x_tdsk + Node.tDsk node
167
          , csTcpu = x_tcpu + Node.tCpu node
168
          , csVcpu = if inc_vcpu == Node.noLimitInt
169
                     then Node.noLimitInt
170
                     else x_vcpu + inc_vcpu
171
          , csXmem = x_xmem + Node.xMem node
172
          , csNmem = x_nmem + Node.nMem node
173
          , csNinst = x_ninst + length (Node.pList node)
174
          }
175

    
176
-- | Compute the total free disk and memory in the cluster.
177
totalResources :: Node.List -> CStats
178
totalResources nl =
179
    let cs = foldl' updateCStats emptyCStats . Container.elems $ nl
180
    in cs { csScore = compCV nl }
181

    
182
-- | The names of the individual elements in the CV list
183
detailedCVNames :: [String]
184
detailedCVNames = [ "free_mem_cv"
185
                  , "free_disk_cv"
186
                  , "n1_cnt"
187
                  , "reserved_mem_cv"
188
                  , "offline_all_cnt"
189
                  , "offline_pri_cnt"
190
                  , "vcpu_ratio_cv"
191
                  , "cpu_load_cv"
192
                  , "mem_load_cv"
193
                  , "disk_load_cv"
194
                  , "net_load_cv"
195
                  , "pri_tags_score"
196
                  ]
197

    
198
-- | Compute the mem and disk covariance.
199
compDetailedCV :: Node.List -> [Double]
200
compDetailedCV nl =
201
    let
202
        all_nodes = Container.elems nl
203
        (offline, nodes) = partition Node.offline all_nodes
204
        mem_l = map Node.pMem nodes
205
        dsk_l = map Node.pDsk nodes
206
        -- metric: memory covariance
207
        mem_cv = varianceCoeff mem_l
208
        -- metric: disk covariance
209
        dsk_cv = varianceCoeff dsk_l
210
        n1_l = length $ filter Node.failN1 nodes
211
        -- metric: count of failN1 nodes
212
        n1_score = fromIntegral n1_l::Double
213
        res_l = map Node.pRem nodes
214
        -- metric: reserved memory covariance
215
        res_cv = varianceCoeff res_l
216
        -- offline instances metrics
217
        offline_ipri = sum . map (length . Node.pList) $ offline
218
        offline_isec = sum . map (length . Node.sList) $ offline
219
        -- metric: count of instances on offline nodes
220
        off_score = fromIntegral (offline_ipri + offline_isec)::Double
221
        -- metric: count of primary instances on offline nodes (this
222
        -- helps with evacuation/failover of primary instances on
223
        -- 2-node clusters with one node offline)
224
        off_pri_score = fromIntegral offline_ipri::Double
225
        cpu_l = map Node.pCpu nodes
226
        -- metric: covariance of vcpu/pcpu ratio
227
        cpu_cv = varianceCoeff cpu_l
228
        -- metrics: covariance of cpu, memory, disk and network load
229
        (c_load, m_load, d_load, n_load) = unzip4 $
230
            map (\n ->
231
                     let DynUtil c1 m1 d1 n1 = Node.utilLoad n
232
                         DynUtil c2 m2 d2 n2 = Node.utilPool n
233
                     in (c1/c2, m1/m2, d1/d2, n1/n2)
234
                ) nodes
235
        -- metric: conflicting instance count
236
        pri_tags_inst = sum $ map Node.conflictingPrimaries nodes
237
        pri_tags_score = fromIntegral pri_tags_inst::Double
238
    in [ mem_cv, dsk_cv, n1_score, res_cv, off_score, off_pri_score, cpu_cv
239
       , varianceCoeff c_load, varianceCoeff m_load
240
       , varianceCoeff d_load, varianceCoeff n_load
241
       , pri_tags_score ]
242

    
243
-- | Compute the /total/ variance.
244
compCV :: Node.List -> Double
245
compCV = sum . compDetailedCV
246

    
247
-- | Compute online nodes from a Node.List
248
getOnline :: Node.List -> [Node.Node]
249
getOnline = filter (not . Node.offline) . Container.elems
250

    
251
-- * hbal functions
252

    
253
-- | Compute best table. Note that the ordering of the arguments is important.
254
compareTables :: Table -> Table -> Table
255
compareTables a@(Table _ _ a_cv _) b@(Table _ _ b_cv _ ) =
256
    if a_cv > b_cv then b else a
257

    
258
-- | Applies an instance move to a given node list and instance.
259
applyMove :: Node.List -> Instance.Instance
260
          -> IMove -> OpResult (Node.List, Instance.Instance, Ndx, Ndx)
261
-- Failover (f)
262
applyMove nl inst Failover =
263
    let old_pdx = Instance.pNode inst
264
        old_sdx = Instance.sNode inst
265
        old_p = Container.find old_pdx nl
266
        old_s = Container.find old_sdx nl
267
        int_p = Node.removePri old_p inst
268
        int_s = Node.removeSec old_s inst
269
        new_nl = do -- Maybe monad
270
          new_p <- Node.addPri int_s inst
271
          new_s <- Node.addSec int_p inst old_sdx
272
          let new_inst = Instance.setBoth inst old_sdx old_pdx
273
          return (Container.addTwo old_pdx new_s old_sdx new_p nl,
274
                  new_inst, old_sdx, old_pdx)
275
    in new_nl
276

    
277
-- Replace the primary (f:, r:np, f)
278
applyMove nl inst (ReplacePrimary new_pdx) =
279
    let old_pdx = Instance.pNode inst
280
        old_sdx = Instance.sNode inst
281
        old_p = Container.find old_pdx nl
282
        old_s = Container.find old_sdx nl
283
        tgt_n = Container.find new_pdx nl
284
        int_p = Node.removePri old_p inst
285
        int_s = Node.removeSec old_s inst
286
        new_nl = do -- Maybe monad
287
          -- check that the current secondary can host the instance
288
          -- during the migration
289
          tmp_s <- Node.addPri int_s inst
290
          let tmp_s' = Node.removePri tmp_s inst
291
          new_p <- Node.addPri tgt_n inst
292
          new_s <- Node.addSec tmp_s' inst new_pdx
293
          let new_inst = Instance.setPri inst new_pdx
294
          return (Container.add new_pdx new_p $
295
                  Container.addTwo old_pdx int_p old_sdx new_s nl,
296
                  new_inst, new_pdx, old_sdx)
297
    in new_nl
298

    
299
-- Replace the secondary (r:ns)
300
applyMove nl inst (ReplaceSecondary new_sdx) =
301
    let old_pdx = Instance.pNode inst
302
        old_sdx = Instance.sNode inst
303
        old_s = Container.find old_sdx nl
304
        tgt_n = Container.find new_sdx nl
305
        int_s = Node.removeSec old_s inst
306
        new_inst = Instance.setSec inst new_sdx
307
        new_nl = Node.addSec tgt_n inst old_pdx >>=
308
                 \new_s -> return (Container.addTwo new_sdx
309
                                   new_s old_sdx int_s nl,
310
                                   new_inst, old_pdx, new_sdx)
311
    in new_nl
312

    
313
-- Replace the secondary and failover (r:np, f)
314
applyMove nl inst (ReplaceAndFailover new_pdx) =
315
    let old_pdx = Instance.pNode inst
316
        old_sdx = Instance.sNode inst
317
        old_p = Container.find old_pdx nl
318
        old_s = Container.find old_sdx nl
319
        tgt_n = Container.find new_pdx nl
320
        int_p = Node.removePri old_p inst
321
        int_s = Node.removeSec old_s inst
322
        new_nl = do -- Maybe monad
323
          new_p <- Node.addPri tgt_n inst
324
          new_s <- Node.addSec int_p inst new_pdx
325
          let new_inst = Instance.setBoth inst new_pdx old_pdx
326
          return (Container.add new_pdx new_p $
327
                  Container.addTwo old_pdx new_s old_sdx int_s nl,
328
                  new_inst, new_pdx, old_pdx)
329
    in new_nl
330

    
331
-- Failver and replace the secondary (f, r:ns)
332
applyMove nl inst (FailoverAndReplace new_sdx) =
333
    let old_pdx = Instance.pNode inst
334
        old_sdx = Instance.sNode inst
335
        old_p = Container.find old_pdx nl
336
        old_s = Container.find old_sdx nl
337
        tgt_n = Container.find new_sdx nl
338
        int_p = Node.removePri old_p inst
339
        int_s = Node.removeSec old_s inst
340
        new_nl = do -- Maybe monad
341
          new_p <- Node.addPri int_s inst
342
          new_s <- Node.addSec tgt_n inst old_sdx
343
          let new_inst = Instance.setBoth inst old_sdx new_sdx
344
          return (Container.add new_sdx new_s $
345
                  Container.addTwo old_sdx new_p old_pdx int_p nl,
346
                  new_inst, old_sdx, new_sdx)
347
    in new_nl
348

    
349
-- | Tries to allocate an instance on one given node.
350
allocateOnSingle :: Node.List -> Instance.Instance -> Node.Node
351
                 -> OpResult Node.AllocElement
352
allocateOnSingle nl inst p =
353
    let new_pdx = Node.idx p
354
        new_inst = Instance.setBoth inst new_pdx Node.noSecondary
355
        new_nl = Node.addPri p inst >>= \new_p ->
356
                 return (Container.add new_pdx new_p nl, new_inst, [new_p])
357
    in new_nl
358

    
359
-- | Tries to allocate an instance on a given pair of nodes.
360
allocateOnPair :: Node.List -> Instance.Instance -> Node.Node -> Node.Node
361
               -> OpResult Node.AllocElement
362
allocateOnPair nl inst tgt_p tgt_s =
363
    let new_pdx = Node.idx tgt_p
364
        new_sdx = Node.idx tgt_s
365
        new_nl = do -- Maybe monad
366
          new_p <- Node.addPri tgt_p inst
367
          new_s <- Node.addSec tgt_s inst new_pdx
368
          let new_inst = Instance.setBoth inst new_pdx new_sdx
369
          return (Container.addTwo new_pdx new_p new_sdx new_s nl, new_inst,
370
                 [new_p, new_s])
371
    in new_nl
372

    
373
-- | Tries to perform an instance move and returns the best table
374
-- between the original one and the new one.
375
checkSingleStep :: Table -- ^ The original table
376
                -> Instance.Instance -- ^ The instance to move
377
                -> Table -- ^ The current best table
378
                -> IMove -- ^ The move to apply
379
                -> Table -- ^ The final best table
380
checkSingleStep ini_tbl target cur_tbl move =
381
    let
382
        Table ini_nl ini_il _ ini_plc = ini_tbl
383
        tmp_resu = applyMove ini_nl target move
384
    in
385
      case tmp_resu of
386
        OpFail _ -> cur_tbl
387
        OpGood (upd_nl, new_inst, pri_idx, sec_idx) ->
388
            let tgt_idx = Instance.idx target
389
                upd_cvar = compCV upd_nl
390
                upd_il = Container.add tgt_idx new_inst ini_il
391
                upd_plc = (tgt_idx, pri_idx, sec_idx, move, upd_cvar):ini_plc
392
                upd_tbl = Table upd_nl upd_il upd_cvar upd_plc
393
            in
394
              compareTables cur_tbl upd_tbl
395

    
396
-- | Given the status of the current secondary as a valid new node and
397
-- the current candidate target node, generate the possible moves for
398
-- a instance.
399
possibleMoves :: Bool      -- ^ Whether the secondary node is a valid new node
400
              -> Ndx       -- ^ Target node candidate
401
              -> [IMove]   -- ^ List of valid result moves
402
possibleMoves True tdx =
403
    [ReplaceSecondary tdx,
404
     ReplaceAndFailover tdx,
405
     ReplacePrimary tdx,
406
     FailoverAndReplace tdx]
407

    
408
possibleMoves False tdx =
409
    [ReplaceSecondary tdx,
410
     ReplaceAndFailover tdx]
411

    
412
-- | Compute the best move for a given instance.
413
checkInstanceMove :: [Ndx]             -- ^ Allowed target node indices
414
                  -> Bool              -- ^ Whether disk moves are allowed
415
                  -> Table             -- ^ Original table
416
                  -> Instance.Instance -- ^ Instance to move
417
                  -> Table             -- ^ Best new table for this instance
418
checkInstanceMove nodes_idx disk_moves ini_tbl target =
419
    let
420
        opdx = Instance.pNode target
421
        osdx = Instance.sNode target
422
        nodes = filter (\idx -> idx /= opdx && idx /= osdx) nodes_idx
423
        use_secondary = elem osdx nodes_idx
424
        aft_failover = if use_secondary -- if allowed to failover
425
                       then checkSingleStep ini_tbl target ini_tbl Failover
426
                       else ini_tbl
427
        all_moves = if disk_moves
428
                    then concatMap (possibleMoves use_secondary) nodes
429
                    else []
430
    in
431
      -- iterate over the possible nodes for this instance
432
      foldl' (checkSingleStep ini_tbl target) aft_failover all_moves
433

    
434
-- | Compute the best next move.
435
checkMove :: [Ndx]               -- ^ Allowed target node indices
436
          -> Bool                -- ^ Whether disk moves are allowed
437
          -> Table               -- ^ The current solution
438
          -> [Instance.Instance] -- ^ List of instances still to move
439
          -> Table               -- ^ The new solution
440
checkMove nodes_idx disk_moves ini_tbl victims =
441
    let Table _ _ _ ini_plc = ini_tbl
442
        -- iterate over all instances, computing the best move
443
        best_tbl =
444
            foldl'
445
            (\ step_tbl em ->
446
                 compareTables step_tbl $
447
                 checkInstanceMove nodes_idx disk_moves ini_tbl em)
448
            ini_tbl victims
449
        Table _ _ _ best_plc = best_tbl
450
    in if length best_plc == length ini_plc
451
       then ini_tbl -- no advancement
452
       else best_tbl
453

    
454
-- | Check if we are allowed to go deeper in the balancing
455

    
456
doNextBalance :: Table       -- ^ The starting table
457
              -> Int         -- ^ Remaining length
458
              -> Score       -- ^ Score at which to stop
459
              -> Bool -- ^ The resulting table and commands
460
doNextBalance ini_tbl max_rounds min_score =
461
    let Table _ _ ini_cv ini_plc = ini_tbl
462
        ini_plc_len = length ini_plc
463
    in (max_rounds < 0 || ini_plc_len < max_rounds) && ini_cv > min_score
464

    
465
-- | Run a balance move
466

    
467
tryBalance :: Table       -- ^ The starting table
468
           -> Bool        -- ^ Allow disk moves
469
           -> Bool        -- ^ Only evacuate moves
470
           -> Maybe Table -- ^ The resulting table and commands
471
tryBalance ini_tbl disk_moves evac_mode =
472
    let Table ini_nl ini_il ini_cv _ = ini_tbl
473
        all_inst = Container.elems ini_il
474
        all_inst' = if evac_mode
475
                    then let bad_nodes = map Node.idx . filter Node.offline $
476
                                         Container.elems ini_nl
477
                         in filter (\e -> Instance.sNode e `elem` bad_nodes ||
478
                                          Instance.pNode e `elem` bad_nodes)
479
                            all_inst
480
                    else all_inst
481
        reloc_inst = filter Instance.movable all_inst'
482
        node_idx = map Node.idx . filter (not . Node.offline) $
483
                   Container.elems ini_nl
484
        fin_tbl = checkMove node_idx disk_moves ini_tbl reloc_inst
485
        (Table _ _ fin_cv _) = fin_tbl
486
    in
487
      if fin_cv < ini_cv
488
      then Just fin_tbl -- this round made success, return the new table
489
      else Nothing
490

    
491
-- * Allocation functions
492

    
493
-- | Build failure stats out of a list of failures
494
collapseFailures :: [FailMode] -> FailStats
495
collapseFailures flst =
496
    map (\k -> (k, length $ filter (k ==) flst)) [minBound..maxBound]
497

    
498
-- | Update current Allocation solution and failure stats with new
499
-- elements
500
concatAllocs :: AllocSolution -> OpResult Node.AllocElement -> AllocSolution
501
concatAllocs (flst, cntok, sols) (OpFail reason) = (reason:flst, cntok, sols)
502

    
503
concatAllocs (flst, cntok, osols) (OpGood ns@(nl, _, _)) =
504
    let nscore = compCV nl
505
        -- Choose the old or new solution, based on the cluster score
506
        nsols = case osols of
507
                  [] -> [(nscore, ns)]
508
                  (oscore, _):[] ->
509
                      if oscore < nscore
510
                      then osols
511
                      else [(nscore, ns)]
512
                  -- FIXME: here we simply concat to lists with more
513
                  -- than one element; we should instead abort, since
514
                  -- this is not a valid usage of this function
515
                  xs -> (nscore, ns):xs
516
        nsuc = cntok + 1
517
    -- Note: we force evaluation of nsols here in order to keep the
518
    -- memory profile low - we know that we will need nsols for sure
519
    -- in the next cycle, so we force evaluation of nsols, since the
520
    -- foldl' in the caller will only evaluate the tuple, but not the
521
    -- elements of the tuple
522
    in nsols `seq` nsuc `seq` (flst, nsuc, nsols)
523

    
524
-- | Try to allocate an instance on the cluster.
525
tryAlloc :: (Monad m) =>
526
            Node.List         -- ^ The node list
527
         -> Instance.List     -- ^ The instance list
528
         -> Instance.Instance -- ^ The instance to allocate
529
         -> Int               -- ^ Required number of nodes
530
         -> m AllocSolution   -- ^ Possible solution list
531
tryAlloc nl _ inst 2 =
532
    let all_nodes = getOnline nl
533
        all_pairs = liftM2 (,) all_nodes all_nodes
534
        ok_pairs = filter (\(x, y) -> Node.idx x /= Node.idx y) all_pairs
535
        sols = foldl' (\cstate (p, s) ->
536
                           concatAllocs cstate $ allocateOnPair nl inst p s
537
                      ) ([], 0, []) ok_pairs
538
    in return sols
539

    
540
tryAlloc nl _ inst 1 =
541
    let all_nodes = getOnline nl
542
        sols = foldl' (\cstate ->
543
                           concatAllocs cstate . allocateOnSingle nl inst
544
                      ) ([], 0, []) all_nodes
545
    in return sols
546

    
547
tryAlloc _ _ _ reqn = fail $ "Unsupported number of allocation \
548
                             \destinations required (" ++ show reqn ++
549
                                               "), only two supported"
550

    
551
-- | Try to allocate an instance on the cluster.
552
tryReloc :: (Monad m) =>
553
            Node.List       -- ^ The node list
554
         -> Instance.List   -- ^ The instance list
555
         -> Idx             -- ^ The index of the instance to move
556
         -> Int             -- ^ The number of nodes required
557
         -> [Ndx]           -- ^ Nodes which should not be used
558
         -> m AllocSolution -- ^ Solution list
559
tryReloc nl il xid 1 ex_idx =
560
    let all_nodes = getOnline nl
561
        inst = Container.find xid il
562
        ex_idx' = Instance.pNode inst:ex_idx
563
        valid_nodes = filter (not . flip elem ex_idx' . Node.idx) all_nodes
564
        valid_idxes = map Node.idx valid_nodes
565
        sols1 = foldl' (\cstate x ->
566
                            let em = do
567
                                  (mnl, i, _, _) <-
568
                                      applyMove nl inst (ReplaceSecondary x)
569
                                  return (mnl, i, [Container.find x mnl])
570
                            in concatAllocs cstate em
571
                       ) ([], 0, []) valid_idxes
572
    in return sols1
573

    
574
tryReloc _ _ _ reqn _  = fail $ "Unsupported number of relocation \
575
                                \destinations required (" ++ show reqn ++
576
                                                  "), only one supported"
577

    
578
-- | Try to allocate an instance on the cluster.
579
tryEvac :: (Monad m) =>
580
            Node.List       -- ^ The node list
581
         -> Instance.List   -- ^ The instance list
582
         -> [Ndx]           -- ^ Nodes to be evacuated
583
         -> m AllocSolution -- ^ Solution list
584
tryEvac nl il ex_ndx =
585
    let ex_nodes = map (`Container.find` nl) ex_ndx
586
        all_insts = nub . concatMap Node.sList $ ex_nodes
587
    in do
588
      (_, sol) <- foldM (\(nl', (_, _, rsols)) idx -> do
589
                           -- FIXME: hardcoded one node here
590
                           (fm, cs, aes) <- tryReloc nl' il idx 1 ex_ndx
591
                           case aes of
592
                             csol@(_, (nl'', _, _)):_ ->
593
                                 return (nl'', (fm, cs, csol:rsols))
594
                             _ -> fail $ "Can't evacuate instance " ++
595
                                  show idx
596
                        ) (nl, ([], 0, [])) all_insts
597
      return sol
598

    
599
-- * Formatting functions
600

    
601
-- | Given the original and final nodes, computes the relocation description.
602
computeMoves :: Instance.Instance -- ^ The instance to be moved
603
             -> String -- ^ The instance name
604
             -> IMove  -- ^ The move being performed
605
             -> String -- ^ New primary
606
             -> String -- ^ New secondary
607
             -> (String, [String])
608
                -- ^ Tuple of moves and commands list; moves is containing
609
                -- either @/f/@ for failover or @/r:name/@ for replace
610
                -- secondary, while the command list holds gnt-instance
611
                -- commands (without that prefix), e.g \"@failover instance1@\"
612
computeMoves i inam mv c d =
613
    case mv of
614
      Failover -> ("f", [mig])
615
      FailoverAndReplace _ -> (printf "f r:%s" d, [mig, rep d])
616
      ReplaceSecondary _ -> (printf "r:%s" d, [rep d])
617
      ReplaceAndFailover _ -> (printf "r:%s f" c, [rep c, mig])
618
      ReplacePrimary _ -> (printf "f r:%s f" c, [mig, rep c, mig])
619
    where morf = if Instance.running i then "migrate" else "failover"
620
          mig = printf "%s -f %s" morf inam::String
621
          rep n = printf "replace-disks -n %s %s" n inam
622

    
623
-- | Converts a placement to string format.
624
printSolutionLine :: Node.List     -- ^ The node list
625
                  -> Instance.List -- ^ The instance list
626
                  -> Int           -- ^ Maximum node name length
627
                  -> Int           -- ^ Maximum instance name length
628
                  -> Placement     -- ^ The current placement
629
                  -> Int           -- ^ The index of the placement in
630
                                   -- the solution
631
                  -> (String, [String])
632
printSolutionLine nl il nmlen imlen plc pos =
633
    let
634
        pmlen = (2*nmlen + 1)
635
        (i, p, s, mv, c) = plc
636
        inst = Container.find i il
637
        inam = Instance.name inst
638
        npri = Container.nameOf nl p
639
        nsec = Container.nameOf nl s
640
        opri = Container.nameOf nl $ Instance.pNode inst
641
        osec = Container.nameOf nl $ Instance.sNode inst
642
        (moves, cmds) =  computeMoves inst inam mv npri nsec
643
        ostr = printf "%s:%s" opri osec::String
644
        nstr = printf "%s:%s" npri nsec::String
645
    in
646
      (printf "  %3d. %-*s %-*s => %-*s %.8f a=%s"
647
       pos imlen inam pmlen ostr
648
       pmlen nstr c moves,
649
       cmds)
650

    
651
-- | Return the instance and involved nodes in an instance move.
652
involvedNodes :: Instance.List -> Placement -> [Ndx]
653
involvedNodes il plc =
654
    let (i, np, ns, _, _) = plc
655
        inst = Container.find i il
656
        op = Instance.pNode inst
657
        os = Instance.sNode inst
658
    in nub [np, ns, op, os]
659

    
660
-- | Inner function for splitJobs, that either appends the next job to
661
-- the current jobset, or starts a new jobset.
662
mergeJobs :: ([JobSet], [Ndx]) -> MoveJob -> ([JobSet], [Ndx])
663
mergeJobs ([], _) n@(ndx, _, _, _) = ([[n]], ndx)
664
mergeJobs (cjs@(j:js), nbuf) n@(ndx, _, _, _)
665
    | null (ndx `intersect` nbuf) = ((n:j):js, ndx ++ nbuf)
666
    | otherwise = ([n]:cjs, ndx)
667

    
668
-- | Break a list of moves into independent groups. Note that this
669
-- will reverse the order of jobs.
670
splitJobs :: [MoveJob] -> [JobSet]
671
splitJobs = fst . foldl mergeJobs ([], [])
672

    
673
-- | Given a list of commands, prefix them with @gnt-instance@ and
674
-- also beautify the display a little.
675
formatJob :: Int -> Int -> (Int, MoveJob) -> [String]
676
formatJob jsn jsl (sn, (_, _, _, cmds)) =
677
    let out =
678
            printf "  echo job %d/%d" jsn sn:
679
            printf "  check":
680
            map ("  gnt-instance " ++) cmds
681
    in if sn == 1
682
       then ["", printf "echo jobset %d, %d jobs" jsn jsl] ++ out
683
       else out
684

    
685
-- | Given a list of commands, prefix them with @gnt-instance@ and
686
-- also beautify the display a little.
687
formatCmds :: [JobSet] -> String
688
formatCmds =
689
    unlines .
690
    concatMap (\(jsn, js) -> concatMap (formatJob jsn (length js))
691
                             (zip [1..] js)) .
692
    zip [1..]
693

    
694
-- | Converts a solution to string format.
695
printSolution :: Node.List
696
              -> Instance.List
697
              -> [Placement]
698
              -> ([String], [[String]])
699
printSolution nl il sol =
700
    let
701
        nmlen = Container.maxNameLen nl
702
        imlen = Container.maxNameLen il
703
    in
704
      unzip $ zipWith (printSolutionLine nl il nmlen imlen) sol [1..]
705

    
706
-- | Print the node list.
707
printNodes :: Node.List -> [String] -> String
708
printNodes nl fs =
709
    let fields = if null fs
710
                 then Node.defaultFields
711
                 else fs
712
        snl = sortBy (comparing Node.idx) (Container.elems nl)
713
        (header, isnum) = unzip $ map Node.showHeader fields
714
    in unlines . map ((:) ' ' .  intercalate " ") $
715
       formatTable (header:map (Node.list fields) snl) isnum
716

    
717
-- | Print the instance list.
718
printInsts :: Node.List -> Instance.List -> String
719
printInsts nl il =
720
    let sil = sortBy (comparing Instance.idx) (Container.elems il)
721
        helper inst = [ if Instance.running inst then "R" else " "
722
                      , Instance.name inst
723
                      , Container.nameOf nl (Instance.pNode inst)
724
                      , let sdx = Instance.sNode inst
725
                        in if sdx == Node.noSecondary
726
                           then  ""
727
                           else Container.nameOf nl sdx
728
                      , printf "%3d" $ Instance.vcpus inst
729
                      , printf "%5d" $ Instance.mem inst
730
                      , printf "%5d" $ Instance.dsk inst `div` 1024
731
                      , printf "%5.3f" lC
732
                      , printf "%5.3f" lM
733
                      , printf "%5.3f" lD
734
                      , printf "%5.3f" lN
735
                      ]
736
            where DynUtil lC lM lD lN = Instance.util inst
737
        header = [ "F", "Name", "Pri_node", "Sec_node", "vcpu", "mem"
738
                 , "dsk", "lCpu", "lMem", "lDsk", "lNet" ]
739
        isnum = False:False:False:False:repeat True
740
    in unlines . map ((:) ' ' . intercalate " ") $
741
       formatTable (header:map helper sil) isnum
742

    
743
-- | Shows statistics for a given node list.
744
printStats :: Node.List -> String
745
printStats nl =
746
    let dcvs = compDetailedCV nl
747
        hd = zip (detailedCVNames ++ repeat "unknown") dcvs
748
        formatted = map (\(header, val) ->
749
                             printf "%s=%.8f" header val::String) hd
750
    in intercalate ", " formatted
751

    
752
-- | Convert a placement into a list of OpCodes (basically a job).
753
iMoveToJob :: String -> Node.List -> Instance.List
754
          -> Idx -> IMove -> [OpCodes.OpCode]
755
iMoveToJob csf nl il idx move =
756
    let inst = Container.find idx il
757
        iname = Instance.name inst ++ csf
758
        lookNode n = Just (Container.nameOf nl n ++ csf)
759
        opF = if Instance.running inst
760
              then OpCodes.OpMigrateInstance iname True False
761
              else OpCodes.OpFailoverInstance iname False
762
        opR n = OpCodes.OpReplaceDisks iname (lookNode n)
763
                OpCodes.ReplaceNewSecondary [] Nothing
764
    in case move of
765
         Failover -> [ opF ]
766
         ReplacePrimary np -> [ opF, opR np, opF ]
767
         ReplaceSecondary ns -> [ opR ns ]
768
         ReplaceAndFailover np -> [ opR np, opF ]
769
         FailoverAndReplace ns -> [ opF, opR ns ]