Statistics
| Branch: | Tag: | Revision:

root / src / Ganeti / HTools / Cluster.hs @ 418a9d72

History | View | Annotate | Download (69.2 kB)

1
{-| Implementation of cluster-wide logic.
2

    
3
This module holds all pure cluster-logic; I\/O related functionality
4
goes into the /Main/ module for the individual binaries.
5

    
6
-}
7

    
8
{-
9

    
10
Copyright (C) 2009, 2010, 2011, 2012, 2013 Google Inc.
11

    
12
This program is free software; you can redistribute it and/or modify
13
it under the terms of the GNU General Public License as published by
14
the Free Software Foundation; either version 2 of the License, or
15
(at your option) any later version.
16

    
17
This program is distributed in the hope that it will be useful, but
18
WITHOUT ANY WARRANTY; without even the implied warranty of
19
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
20
General Public License for more details.
21

    
22
You should have received a copy of the GNU General Public License
23
along with this program; if not, write to the Free Software
24
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
25
02110-1301, USA.
26

    
27
-}
28

    
29
module Ganeti.HTools.Cluster
30
  (
31
    -- * Types
32
    AllocSolution(..)
33
  , EvacSolution(..)
34
  , Table(..)
35
  , CStats(..)
36
  , AllocNodes
37
  , AllocResult
38
  , AllocMethod
39
  , AllocSolutionList
40
  -- * Generic functions
41
  , totalResources
42
  , computeAllocationDelta
43
  -- * First phase functions
44
  , computeBadItems
45
  -- * Second phase functions
46
  , printSolutionLine
47
  , formatCmds
48
  , involvedNodes
49
  , splitJobs
50
  -- * Display functions
51
  , printNodes
52
  , printInsts
53
  -- * Balacing functions
54
  , checkMove
55
  , doNextBalance
56
  , tryBalance
57
  , compCV
58
  , compCVNodes
59
  , compDetailedCV
60
  , printStats
61
  , iMoveToJob
62
  -- * IAllocator functions
63
  , genAllocNodes
64
  , tryAlloc
65
  , tryMGAlloc
66
  , tryNodeEvac
67
  , tryChangeGroup
68
  , collapseFailures
69
  , allocList
70
  -- * Allocation functions
71
  , iterateAlloc
72
  , tieredAlloc
73
  -- * Node group functions
74
  , instanceGroup
75
  , findSplitInstances
76
  , splitCluster
77
  ) where
78

    
79
import Control.Applicative (liftA2)
80
import Control.Arrow ((&&&))
81
import qualified Data.IntSet as IntSet
82
import Data.List
83
import Data.Maybe (fromJust, fromMaybe, isJust, isNothing)
84
import Data.Ord (comparing)
85
import Text.Printf (printf)
86

    
87
import Ganeti.BasicTypes
88
import qualified Ganeti.HTools.Container as Container
89
import qualified Ganeti.HTools.Instance as Instance
90
import qualified Ganeti.HTools.Nic as Nic
91
import qualified Ganeti.HTools.Node as Node
92
import qualified Ganeti.HTools.Group as Group
93
import Ganeti.HTools.Types
94
import Ganeti.Compat
95
import qualified Ganeti.OpCodes as OpCodes
96
import Ganeti.Utils
97
import Ganeti.Types (mkNonEmpty)
98

    
99
-- * Types
100

    
101
-- | Allocation\/relocation solution.
102
data AllocSolution = AllocSolution
103
  { asFailures :: [FailMode]              -- ^ Failure counts
104
  , asAllocs   :: Int                     -- ^ Good allocation count
105
  , asSolution :: Maybe Node.AllocElement -- ^ The actual allocation result
106
  , asLog      :: [String]                -- ^ Informational messages
107
  }
108

    
109
-- | Node evacuation/group change iallocator result type. This result
110
-- type consists of actual opcodes (a restricted subset) that are
111
-- transmitted back to Ganeti.
112
data EvacSolution = EvacSolution
113
  { esMoved   :: [(Idx, Gdx, [Ndx])]  -- ^ Instances moved successfully
114
  , esFailed  :: [(Idx, String)]      -- ^ Instances which were not
115
                                      -- relocated
116
  , esOpCodes :: [[OpCodes.OpCode]]   -- ^ List of jobs
117
  } deriving (Show)
118

    
119
-- | Allocation results, as used in 'iterateAlloc' and 'tieredAlloc'.
120
type AllocResult = (FailStats, Node.List, Instance.List,
121
                    [Instance.Instance], [CStats])
122

    
123
-- | Type alias for easier handling.
124
type AllocSolutionList = [(Instance.Instance, AllocSolution)]
125

    
126
-- | A type denoting the valid allocation mode/pairs.
127
--
128
-- For a one-node allocation, this will be a @Left ['Ndx']@, whereas
129
-- for a two-node allocation, this will be a @Right [('Ndx',
130
-- ['Ndx'])]@. In the latter case, the list is basically an
131
-- association list, grouped by primary node and holding the potential
132
-- secondary nodes in the sub-list.
133
type AllocNodes = Either [Ndx] [(Ndx, [Ndx])]
134

    
135
-- | The empty solution we start with when computing allocations.
136
emptyAllocSolution :: AllocSolution
137
emptyAllocSolution = AllocSolution { asFailures = [], asAllocs = 0
138
                                   , asSolution = Nothing, asLog = [] }
139

    
140
-- | The empty evac solution.
141
emptyEvacSolution :: EvacSolution
142
emptyEvacSolution = EvacSolution { esMoved = []
143
                                 , esFailed = []
144
                                 , esOpCodes = []
145
                                 }
146

    
147
-- | The complete state for the balancing solution.
148
data Table = Table Node.List Instance.List Score [Placement]
149
             deriving (Show)
150

    
151
-- | Cluster statistics data type.
152
data CStats = CStats
153
  { csFmem :: Integer -- ^ Cluster free mem
154
  , csFdsk :: Integer -- ^ Cluster free disk
155
  , csAmem :: Integer -- ^ Cluster allocatable mem
156
  , csAdsk :: Integer -- ^ Cluster allocatable disk
157
  , csAcpu :: Integer -- ^ Cluster allocatable cpus
158
  , csMmem :: Integer -- ^ Max node allocatable mem
159
  , csMdsk :: Integer -- ^ Max node allocatable disk
160
  , csMcpu :: Integer -- ^ Max node allocatable cpu
161
  , csImem :: Integer -- ^ Instance used mem
162
  , csIdsk :: Integer -- ^ Instance used disk
163
  , csIcpu :: Integer -- ^ Instance used cpu
164
  , csTmem :: Double  -- ^ Cluster total mem
165
  , csTdsk :: Double  -- ^ Cluster total disk
166
  , csTcpu :: Double  -- ^ Cluster total cpus
167
  , csVcpu :: Integer -- ^ Cluster total virtual cpus
168
  , csNcpu :: Double  -- ^ Equivalent to 'csIcpu' but in terms of
169
                      -- physical CPUs, i.e. normalised used phys CPUs
170
  , csXmem :: Integer -- ^ Unnacounted for mem
171
  , csNmem :: Integer -- ^ Node own memory
172
  , csScore :: Score  -- ^ The cluster score
173
  , csNinst :: Int    -- ^ The total number of instances
174
  } deriving (Show)
175

    
176
-- | A simple type for allocation functions.
177
type AllocMethod =  Node.List           -- ^ Node list
178
                 -> Instance.List       -- ^ Instance list
179
                 -> Maybe Int           -- ^ Optional allocation limit
180
                 -> Instance.Instance   -- ^ Instance spec for allocation
181
                 -> AllocNodes          -- ^ Which nodes we should allocate on
182
                 -> [Instance.Instance] -- ^ Allocated instances
183
                 -> [CStats]            -- ^ Running cluster stats
184
                 -> Result AllocResult  -- ^ Allocation result
185

    
186
-- | A simple type for the running solution of evacuations.
187
type EvacInnerState =
188
  Either String (Node.List, Instance.Instance, Score, Ndx)
189

    
190
-- * Utility functions
191

    
192
-- | Verifies the N+1 status and return the affected nodes.
193
verifyN1 :: [Node.Node] -> [Node.Node]
194
verifyN1 = filter Node.failN1
195

    
196
{-| Computes the pair of bad nodes and instances.
197

    
198
The bad node list is computed via a simple 'verifyN1' check, and the
199
bad instance list is the list of primary and secondary instances of
200
those nodes.
201

    
202
-}
203
computeBadItems :: Node.List -> Instance.List ->
204
                   ([Node.Node], [Instance.Instance])
205
computeBadItems nl il =
206
  let bad_nodes = verifyN1 $ getOnline nl
207
      bad_instances = map (`Container.find` il) .
208
                      sort . nub $
209
                      concatMap (\ n -> Node.sList n ++ Node.pList n) bad_nodes
210
  in
211
    (bad_nodes, bad_instances)
212

    
213
-- | Extracts the node pairs for an instance. This can fail if the
214
-- instance is single-homed. FIXME: this needs to be improved,
215
-- together with the general enhancement for handling non-DRBD moves.
216
instanceNodes :: Node.List -> Instance.Instance ->
217
                 (Ndx, Ndx, Node.Node, Node.Node)
218
instanceNodes nl inst =
219
  let old_pdx = Instance.pNode inst
220
      old_sdx = Instance.sNode inst
221
      old_p = Container.find old_pdx nl
222
      old_s = Container.find old_sdx nl
223
  in (old_pdx, old_sdx, old_p, old_s)
224

    
225
-- | Zero-initializer for the CStats type.
226
emptyCStats :: CStats
227
emptyCStats = CStats 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
228

    
229
-- | Update stats with data from a new node.
230
updateCStats :: CStats -> Node.Node -> CStats
231
updateCStats cs node =
232
  let CStats { csFmem = x_fmem, csFdsk = x_fdsk,
233
               csAmem = x_amem, csAcpu = x_acpu, csAdsk = x_adsk,
234
               csMmem = x_mmem, csMdsk = x_mdsk, csMcpu = x_mcpu,
235
               csImem = x_imem, csIdsk = x_idsk, csIcpu = x_icpu,
236
               csTmem = x_tmem, csTdsk = x_tdsk, csTcpu = x_tcpu,
237
               csVcpu = x_vcpu, csNcpu = x_ncpu,
238
               csXmem = x_xmem, csNmem = x_nmem, csNinst = x_ninst
239
             }
240
        = cs
241
      inc_amem = Node.fMem node - Node.rMem node
242
      inc_amem' = if inc_amem > 0 then inc_amem else 0
243
      inc_adsk = Node.availDisk node
244
      inc_imem = truncate (Node.tMem node) - Node.nMem node
245
                 - Node.xMem node - Node.fMem node
246
      inc_icpu = Node.uCpu node
247
      inc_idsk = truncate (Node.tDsk node) - Node.fDsk node
248
      inc_vcpu = Node.hiCpu node
249
      inc_acpu = Node.availCpu node
250
      inc_ncpu = fromIntegral (Node.uCpu node) /
251
                 iPolicyVcpuRatio (Node.iPolicy node)
252
  in cs { csFmem = x_fmem + fromIntegral (Node.fMem node)
253
        , csFdsk = x_fdsk + fromIntegral (Node.fDsk node)
254
        , csAmem = x_amem + fromIntegral inc_amem'
255
        , csAdsk = x_adsk + fromIntegral inc_adsk
256
        , csAcpu = x_acpu + fromIntegral inc_acpu
257
        , csMmem = max x_mmem (fromIntegral inc_amem')
258
        , csMdsk = max x_mdsk (fromIntegral inc_adsk)
259
        , csMcpu = max x_mcpu (fromIntegral inc_acpu)
260
        , csImem = x_imem + fromIntegral inc_imem
261
        , csIdsk = x_idsk + fromIntegral inc_idsk
262
        , csIcpu = x_icpu + fromIntegral inc_icpu
263
        , csTmem = x_tmem + Node.tMem node
264
        , csTdsk = x_tdsk + Node.tDsk node
265
        , csTcpu = x_tcpu + Node.tCpu node
266
        , csVcpu = x_vcpu + fromIntegral inc_vcpu
267
        , csNcpu = x_ncpu + inc_ncpu
268
        , csXmem = x_xmem + fromIntegral (Node.xMem node)
269
        , csNmem = x_nmem + fromIntegral (Node.nMem node)
270
        , csNinst = x_ninst + length (Node.pList node)
271
        }
272

    
273
-- | Compute the total free disk and memory in the cluster.
274
totalResources :: Node.List -> CStats
275
totalResources nl =
276
  let cs = foldl' updateCStats emptyCStats . Container.elems $ nl
277
  in cs { csScore = compCV nl }
278

    
279
-- | Compute the delta between two cluster state.
280
--
281
-- This is used when doing allocations, to understand better the
282
-- available cluster resources. The return value is a triple of the
283
-- current used values, the delta that was still allocated, and what
284
-- was left unallocated.
285
computeAllocationDelta :: CStats -> CStats -> AllocStats
286
computeAllocationDelta cini cfin =
287
  let CStats {csImem = i_imem, csIdsk = i_idsk, csIcpu = i_icpu,
288
              csNcpu = i_ncpu } = cini
289
      CStats {csImem = f_imem, csIdsk = f_idsk, csIcpu = f_icpu,
290
              csTmem = t_mem, csTdsk = t_dsk, csVcpu = f_vcpu,
291
              csNcpu = f_ncpu, csTcpu = f_tcpu } = cfin
292
      rini = AllocInfo { allocInfoVCpus = fromIntegral i_icpu
293
                       , allocInfoNCpus = i_ncpu
294
                       , allocInfoMem   = fromIntegral i_imem
295
                       , allocInfoDisk  = fromIntegral i_idsk
296
                       }
297
      rfin = AllocInfo { allocInfoVCpus = fromIntegral (f_icpu - i_icpu)
298
                       , allocInfoNCpus = f_ncpu - i_ncpu
299
                       , allocInfoMem   = fromIntegral (f_imem - i_imem)
300
                       , allocInfoDisk  = fromIntegral (f_idsk - i_idsk)
301
                       }
302
      runa = AllocInfo { allocInfoVCpus = fromIntegral (f_vcpu - f_icpu)
303
                       , allocInfoNCpus = f_tcpu - f_ncpu
304
                       , allocInfoMem   = truncate t_mem - fromIntegral f_imem
305
                       , allocInfoDisk  = truncate t_dsk - fromIntegral f_idsk
306
                       }
307
  in (rini, rfin, runa)
308

    
309
-- | The names and weights of the individual elements in the CV list.
310
detailedCVInfo :: [(Double, String)]
311
detailedCVInfo = [ (1,  "free_mem_cv")
312
                 , (1,  "free_disk_cv")
313
                 , (1,  "n1_cnt")
314
                 , (1,  "reserved_mem_cv")
315
                 , (4,  "offline_all_cnt")
316
                 , (16, "offline_pri_cnt")
317
                 , (1,  "vcpu_ratio_cv")
318
                 , (1,  "cpu_load_cv")
319
                 , (1,  "mem_load_cv")
320
                 , (1,  "disk_load_cv")
321
                 , (1,  "net_load_cv")
322
                 , (2,  "pri_tags_score")
323
                 , (1,  "spindles_cv")
324
                 ]
325

    
326
-- | Holds the weights used by 'compCVNodes' for each metric.
327
detailedCVWeights :: [Double]
328
detailedCVWeights = map fst detailedCVInfo
329

    
330
-- | Compute the mem and disk covariance.
331
compDetailedCV :: [Node.Node] -> [Double]
332
compDetailedCV all_nodes =
333
  let (offline, nodes) = partition Node.offline all_nodes
334
      mem_l = map Node.pMem nodes
335
      dsk_l = map Node.pDsk nodes
336
      -- metric: memory covariance
337
      mem_cv = stdDev mem_l
338
      -- metric: disk covariance
339
      dsk_cv = stdDev dsk_l
340
      -- metric: count of instances living on N1 failing nodes
341
      n1_score = fromIntegral . sum . map (\n -> length (Node.sList n) +
342
                                                 length (Node.pList n)) .
343
                 filter Node.failN1 $ nodes :: Double
344
      res_l = map Node.pRem nodes
345
      -- metric: reserved memory covariance
346
      res_cv = stdDev res_l
347
      -- offline instances metrics
348
      offline_ipri = sum . map (length . Node.pList) $ offline
349
      offline_isec = sum . map (length . Node.sList) $ offline
350
      -- metric: count of instances on offline nodes
351
      off_score = fromIntegral (offline_ipri + offline_isec)::Double
352
      -- metric: count of primary instances on offline nodes (this
353
      -- helps with evacuation/failover of primary instances on
354
      -- 2-node clusters with one node offline)
355
      off_pri_score = fromIntegral offline_ipri::Double
356
      cpu_l = map Node.pCpu nodes
357
      -- metric: covariance of vcpu/pcpu ratio
358
      cpu_cv = stdDev cpu_l
359
      -- metrics: covariance of cpu, memory, disk and network load
360
      (c_load, m_load, d_load, n_load) =
361
        unzip4 $ map (\n ->
362
                      let DynUtil c1 m1 d1 n1 = Node.utilLoad n
363
                          DynUtil c2 m2 d2 n2 = Node.utilPool n
364
                      in (c1/c2, m1/m2, d1/d2, n1/n2)) nodes
365
      -- metric: conflicting instance count
366
      pri_tags_inst = sum $ map Node.conflictingPrimaries nodes
367
      pri_tags_score = fromIntegral pri_tags_inst::Double
368
      -- metric: spindles %
369
      spindles_cv = map (\n -> Node.instSpindles n / Node.hiSpindles n) nodes
370
  in [ mem_cv, dsk_cv, n1_score, res_cv, off_score, off_pri_score, cpu_cv
371
     , stdDev c_load, stdDev m_load , stdDev d_load, stdDev n_load
372
     , pri_tags_score, stdDev spindles_cv ]
373

    
374
-- | Compute the /total/ variance.
375
compCVNodes :: [Node.Node] -> Double
376
compCVNodes = sum . zipWith (*) detailedCVWeights . compDetailedCV
377

    
378
-- | Wrapper over 'compCVNodes' for callers that have a 'Node.List'.
379
compCV :: Node.List -> Double
380
compCV = compCVNodes . Container.elems
381

    
382
-- | Compute online nodes from a 'Node.List'.
383
getOnline :: Node.List -> [Node.Node]
384
getOnline = filter (not . Node.offline) . Container.elems
385

    
386
-- * Balancing functions
387

    
388
-- | Compute best table. Note that the ordering of the arguments is important.
389
compareTables :: Table -> Table -> Table
390
compareTables a@(Table _ _ a_cv _) b@(Table _ _ b_cv _ ) =
391
  if a_cv > b_cv then b else a
392

    
393
-- | Applies an instance move to a given node list and instance.
394
applyMove :: Node.List -> Instance.Instance
395
          -> IMove -> OpResult (Node.List, Instance.Instance, Ndx, Ndx)
396
-- Failover (f)
397
applyMove nl inst Failover =
398
  let (old_pdx, old_sdx, old_p, old_s) = instanceNodes nl inst
399
      int_p = Node.removePri old_p inst
400
      int_s = Node.removeSec old_s inst
401
      new_nl = do -- Maybe monad
402
        new_p <- Node.addPriEx (Node.offline old_p) int_s inst
403
        new_s <- Node.addSec int_p inst old_sdx
404
        let new_inst = Instance.setBoth inst old_sdx old_pdx
405
        return (Container.addTwo old_pdx new_s old_sdx new_p nl,
406
                new_inst, old_sdx, old_pdx)
407
  in new_nl
408

    
409
-- Failover to any (fa)
410
applyMove nl inst (FailoverToAny new_pdx) = do
411
  let (old_pdx, old_sdx, old_pnode, _) = instanceNodes nl inst
412
      new_pnode = Container.find new_pdx nl
413
      force_failover = Node.offline old_pnode
414
  new_pnode' <- Node.addPriEx force_failover new_pnode inst
415
  let old_pnode' = Node.removePri old_pnode inst
416
      inst' = Instance.setPri inst new_pdx
417
      nl' = Container.addTwo old_pdx old_pnode' new_pdx new_pnode' nl
418
  return (nl', inst', new_pdx, old_sdx)
419

    
420
-- Replace the primary (f:, r:np, f)
421
applyMove nl inst (ReplacePrimary new_pdx) =
422
  let (old_pdx, old_sdx, old_p, old_s) = instanceNodes nl inst
423
      tgt_n = Container.find new_pdx nl
424
      int_p = Node.removePri old_p inst
425
      int_s = Node.removeSec old_s inst
426
      force_p = Node.offline old_p
427
      new_nl = do -- Maybe monad
428
                  -- check that the current secondary can host the instance
429
                  -- during the migration
430
        tmp_s <- Node.addPriEx force_p int_s inst
431
        let tmp_s' = Node.removePri tmp_s inst
432
        new_p <- Node.addPriEx force_p tgt_n inst
433
        new_s <- Node.addSecEx force_p tmp_s' inst new_pdx
434
        let new_inst = Instance.setPri inst new_pdx
435
        return (Container.add new_pdx new_p $
436
                Container.addTwo old_pdx int_p old_sdx new_s nl,
437
                new_inst, new_pdx, old_sdx)
438
  in new_nl
439

    
440
-- Replace the secondary (r:ns)
441
applyMove nl inst (ReplaceSecondary new_sdx) =
442
  let old_pdx = Instance.pNode inst
443
      old_sdx = Instance.sNode inst
444
      old_s = Container.find old_sdx nl
445
      tgt_n = Container.find new_sdx nl
446
      int_s = Node.removeSec old_s inst
447
      force_s = Node.offline old_s
448
      new_inst = Instance.setSec inst new_sdx
449
      new_nl = Node.addSecEx force_s tgt_n inst old_pdx >>=
450
               \new_s -> return (Container.addTwo new_sdx
451
                                 new_s old_sdx int_s nl,
452
                                 new_inst, old_pdx, new_sdx)
453
  in new_nl
454

    
455
-- Replace the secondary and failover (r:np, f)
456
applyMove nl inst (ReplaceAndFailover new_pdx) =
457
  let (old_pdx, old_sdx, old_p, old_s) = instanceNodes nl inst
458
      tgt_n = Container.find new_pdx nl
459
      int_p = Node.removePri old_p inst
460
      int_s = Node.removeSec old_s inst
461
      force_s = Node.offline old_s
462
      new_nl = do -- Maybe monad
463
        new_p <- Node.addPri tgt_n inst
464
        new_s <- Node.addSecEx force_s int_p inst new_pdx
465
        let new_inst = Instance.setBoth inst new_pdx old_pdx
466
        return (Container.add new_pdx new_p $
467
                Container.addTwo old_pdx new_s old_sdx int_s nl,
468
                new_inst, new_pdx, old_pdx)
469
  in new_nl
470

    
471
-- Failver and replace the secondary (f, r:ns)
472
applyMove nl inst (FailoverAndReplace new_sdx) =
473
  let (old_pdx, old_sdx, old_p, old_s) = instanceNodes nl inst
474
      tgt_n = Container.find new_sdx nl
475
      int_p = Node.removePri old_p inst
476
      int_s = Node.removeSec old_s inst
477
      force_p = Node.offline old_p
478
      new_nl = do -- Maybe monad
479
        new_p <- Node.addPriEx force_p int_s inst
480
        new_s <- Node.addSecEx force_p tgt_n inst old_sdx
481
        let new_inst = Instance.setBoth inst old_sdx new_sdx
482
        return (Container.add new_sdx new_s $
483
                Container.addTwo old_sdx new_p old_pdx int_p nl,
484
                new_inst, old_sdx, new_sdx)
485
  in new_nl
486

    
487
-- | Tries to allocate an instance on one given node.
488
allocateOnSingle :: Node.List -> Instance.Instance -> Ndx
489
                 -> OpResult Node.AllocElement
490
allocateOnSingle nl inst new_pdx =
491
  let p = Container.find new_pdx nl
492
      new_inst = Instance.setBoth inst new_pdx Node.noSecondary
493
  in do
494
    Instance.instMatchesPolicy inst (Node.iPolicy p)
495
    new_p <- Node.addPri p inst
496
    let new_nl = Container.add new_pdx new_p nl
497
        new_score = compCV new_nl
498
    return (new_nl, new_inst, [new_p], new_score)
499

    
500
-- | Tries to allocate an instance on a given pair of nodes.
501
allocateOnPair :: Node.List -> Instance.Instance -> Ndx -> Ndx
502
               -> OpResult Node.AllocElement
503
allocateOnPair nl inst new_pdx new_sdx =
504
  let tgt_p = Container.find new_pdx nl
505
      tgt_s = Container.find new_sdx nl
506
  in do
507
    Instance.instMatchesPolicy inst (Node.iPolicy tgt_p)
508
    new_p <- Node.addPri tgt_p inst
509
    new_s <- Node.addSec tgt_s inst new_pdx
510
    let new_inst = Instance.setBoth inst new_pdx new_sdx
511
        new_nl = Container.addTwo new_pdx new_p new_sdx new_s nl
512
    return (new_nl, new_inst, [new_p, new_s], compCV new_nl)
513

    
514
-- | Tries to perform an instance move and returns the best table
515
-- between the original one and the new one.
516
checkSingleStep :: Table -- ^ The original table
517
                -> Instance.Instance -- ^ The instance to move
518
                -> Table -- ^ The current best table
519
                -> IMove -- ^ The move to apply
520
                -> Table -- ^ The final best table
521
checkSingleStep ini_tbl target cur_tbl move =
522
  let Table ini_nl ini_il _ ini_plc = ini_tbl
523
      tmp_resu = applyMove ini_nl target move
524
  in case tmp_resu of
525
       Bad _ -> cur_tbl
526
       Ok (upd_nl, new_inst, pri_idx, sec_idx) ->
527
         let tgt_idx = Instance.idx target
528
             upd_cvar = compCV upd_nl
529
             upd_il = Container.add tgt_idx new_inst ini_il
530
             upd_plc = (tgt_idx, pri_idx, sec_idx, move, upd_cvar):ini_plc
531
             upd_tbl = Table upd_nl upd_il upd_cvar upd_plc
532
         in compareTables cur_tbl upd_tbl
533

    
534
-- | Given the status of the current secondary as a valid new node and
535
-- the current candidate target node, generate the possible moves for
536
-- a instance.
537
possibleMoves :: MirrorType -- ^ The mirroring type of the instance
538
              -> Bool       -- ^ Whether the secondary node is a valid new node
539
              -> Bool       -- ^ Whether we can change the primary node
540
              -> Ndx        -- ^ Target node candidate
541
              -> [IMove]    -- ^ List of valid result moves
542

    
543
possibleMoves MirrorNone _ _ _ = []
544

    
545
possibleMoves MirrorExternal _ False _ = []
546

    
547
possibleMoves MirrorExternal _ True tdx =
548
  [ FailoverToAny tdx ]
549

    
550
possibleMoves MirrorInternal _ False tdx =
551
  [ ReplaceSecondary tdx ]
552

    
553
possibleMoves MirrorInternal True True tdx =
554
  [ ReplaceSecondary tdx
555
  , ReplaceAndFailover tdx
556
  , ReplacePrimary tdx
557
  , FailoverAndReplace tdx
558
  ]
559

    
560
possibleMoves MirrorInternal False True tdx =
561
  [ ReplaceSecondary tdx
562
  , ReplaceAndFailover tdx
563
  ]
564

    
565
-- | Compute the best move for a given instance.
566
checkInstanceMove :: [Ndx]             -- ^ Allowed target node indices
567
                  -> Bool              -- ^ Whether disk moves are allowed
568
                  -> Bool              -- ^ Whether instance moves are allowed
569
                  -> Table             -- ^ Original table
570
                  -> Instance.Instance -- ^ Instance to move
571
                  -> Table             -- ^ Best new table for this instance
572
checkInstanceMove nodes_idx disk_moves inst_moves ini_tbl target =
573
  let opdx = Instance.pNode target
574
      osdx = Instance.sNode target
575
      bad_nodes = [opdx, osdx]
576
      nodes = filter (`notElem` bad_nodes) nodes_idx
577
      mir_type = Instance.mirrorType target
578
      use_secondary = elem osdx nodes_idx && inst_moves
579
      aft_failover = if mir_type == MirrorInternal && use_secondary
580
                       -- if drbd and allowed to failover
581
                       then checkSingleStep ini_tbl target ini_tbl Failover
582
                       else ini_tbl
583
      all_moves =
584
        if disk_moves
585
          then concatMap (possibleMoves mir_type use_secondary inst_moves)
586
               nodes
587
          else []
588
    in
589
      -- iterate over the possible nodes for this instance
590
      foldl' (checkSingleStep ini_tbl target) aft_failover all_moves
591

    
592
-- | Compute the best next move.
593
checkMove :: [Ndx]               -- ^ Allowed target node indices
594
          -> Bool                -- ^ Whether disk moves are allowed
595
          -> Bool                -- ^ Whether instance moves are allowed
596
          -> Table               -- ^ The current solution
597
          -> [Instance.Instance] -- ^ List of instances still to move
598
          -> Table               -- ^ The new solution
599
checkMove nodes_idx disk_moves inst_moves ini_tbl victims =
600
  let Table _ _ _ ini_plc = ini_tbl
601
      -- we're using rwhnf from the Control.Parallel.Strategies
602
      -- package; we don't need to use rnf as that would force too
603
      -- much evaluation in single-threaded cases, and in
604
      -- multi-threaded case the weak head normal form is enough to
605
      -- spark the evaluation
606
      tables = parMap rwhnf (checkInstanceMove nodes_idx disk_moves
607
                             inst_moves ini_tbl)
608
               victims
609
      -- iterate over all instances, computing the best move
610
      best_tbl = foldl' compareTables ini_tbl tables
611
      Table _ _ _ best_plc = best_tbl
612
  in if length best_plc == length ini_plc
613
       then ini_tbl -- no advancement
614
       else best_tbl
615

    
616
-- | Check if we are allowed to go deeper in the balancing.
617
doNextBalance :: Table     -- ^ The starting table
618
              -> Int       -- ^ Remaining length
619
              -> Score     -- ^ Score at which to stop
620
              -> Bool      -- ^ The resulting table and commands
621
doNextBalance ini_tbl max_rounds min_score =
622
  let Table _ _ ini_cv ini_plc = ini_tbl
623
      ini_plc_len = length ini_plc
624
  in (max_rounds < 0 || ini_plc_len < max_rounds) && ini_cv > min_score
625

    
626
-- | Run a balance move.
627
tryBalance :: Table       -- ^ The starting table
628
           -> Bool        -- ^ Allow disk moves
629
           -> Bool        -- ^ Allow instance moves
630
           -> Bool        -- ^ Only evacuate moves
631
           -> Score       -- ^ Min gain threshold
632
           -> Score       -- ^ Min gain
633
           -> Maybe Table -- ^ The resulting table and commands
634
tryBalance ini_tbl disk_moves inst_moves evac_mode mg_limit min_gain =
635
    let Table ini_nl ini_il ini_cv _ = ini_tbl
636
        all_inst = Container.elems ini_il
637
        all_nodes = Container.elems ini_nl
638
        (offline_nodes, online_nodes) = partition Node.offline all_nodes
639
        all_inst' = if evac_mode
640
                      then let bad_nodes = map Node.idx offline_nodes
641
                           in filter (any (`elem` bad_nodes) .
642
                                          Instance.allNodes) all_inst
643
                      else all_inst
644
        reloc_inst = filter (\i -> Instance.movable i &&
645
                                   Instance.autoBalance i) all_inst'
646
        node_idx = map Node.idx online_nodes
647
        fin_tbl = checkMove node_idx disk_moves inst_moves ini_tbl reloc_inst
648
        (Table _ _ fin_cv _) = fin_tbl
649
    in
650
      if fin_cv < ini_cv && (ini_cv > mg_limit || ini_cv - fin_cv >= min_gain)
651
      then Just fin_tbl -- this round made success, return the new table
652
      else Nothing
653

    
654
-- * Allocation functions
655

    
656
-- | Build failure stats out of a list of failures.
657
collapseFailures :: [FailMode] -> FailStats
658
collapseFailures flst =
659
    map (\k -> (k, foldl' (\a e -> if e == k then a + 1 else a) 0 flst))
660
            [minBound..maxBound]
661

    
662
-- | Compares two Maybe AllocElement and chooses the best score.
663
bestAllocElement :: Maybe Node.AllocElement
664
                 -> Maybe Node.AllocElement
665
                 -> Maybe Node.AllocElement
666
bestAllocElement a Nothing = a
667
bestAllocElement Nothing b = b
668
bestAllocElement a@(Just (_, _, _, ascore)) b@(Just (_, _, _, bscore)) =
669
  if ascore < bscore then a else b
670

    
671
-- | Update current Allocation solution and failure stats with new
672
-- elements.
673
concatAllocs :: AllocSolution -> OpResult Node.AllocElement -> AllocSolution
674
concatAllocs as (Bad reason) = as { asFailures = reason : asFailures as }
675

    
676
concatAllocs as (Ok ns) =
677
  let -- Choose the old or new solution, based on the cluster score
678
    cntok = asAllocs as
679
    osols = asSolution as
680
    nsols = bestAllocElement osols (Just ns)
681
    nsuc = cntok + 1
682
    -- Note: we force evaluation of nsols here in order to keep the
683
    -- memory profile low - we know that we will need nsols for sure
684
    -- in the next cycle, so we force evaluation of nsols, since the
685
    -- foldl' in the caller will only evaluate the tuple, but not the
686
    -- elements of the tuple
687
  in nsols `seq` nsuc `seq` as { asAllocs = nsuc, asSolution = nsols }
688

    
689
-- | Sums two 'AllocSolution' structures.
690
sumAllocs :: AllocSolution -> AllocSolution -> AllocSolution
691
sumAllocs (AllocSolution aFails aAllocs aSols aLog)
692
          (AllocSolution bFails bAllocs bSols bLog) =
693
  -- note: we add b first, since usually it will be smaller; when
694
  -- fold'ing, a will grow and grow whereas b is the per-group
695
  -- result, hence smaller
696
  let nFails  = bFails ++ aFails
697
      nAllocs = aAllocs + bAllocs
698
      nSols   = bestAllocElement aSols bSols
699
      nLog    = bLog ++ aLog
700
  in AllocSolution nFails nAllocs nSols nLog
701

    
702
-- | Given a solution, generates a reasonable description for it.
703
describeSolution :: AllocSolution -> String
704
describeSolution as =
705
  let fcnt = asFailures as
706
      sols = asSolution as
707
      freasons =
708
        intercalate ", " . map (\(a, b) -> printf "%s: %d" (show a) b) .
709
        filter ((> 0) . snd) . collapseFailures $ fcnt
710
  in case sols of
711
     Nothing -> "No valid allocation solutions, failure reasons: " ++
712
                (if null fcnt then "unknown reasons" else freasons)
713
     Just (_, _, nodes, cv) ->
714
         printf ("score: %.8f, successes %d, failures %d (%s)" ++
715
                 " for node(s) %s") cv (asAllocs as) (length fcnt) freasons
716
               (intercalate "/" . map Node.name $ nodes)
717

    
718
-- | Annotates a solution with the appropriate string.
719
annotateSolution :: AllocSolution -> AllocSolution
720
annotateSolution as = as { asLog = describeSolution as : asLog as }
721

    
722
-- | Reverses an evacuation solution.
723
--
724
-- Rationale: we always concat the results to the top of the lists, so
725
-- for proper jobset execution, we should reverse all lists.
726
reverseEvacSolution :: EvacSolution -> EvacSolution
727
reverseEvacSolution (EvacSolution f m o) =
728
  EvacSolution (reverse f) (reverse m) (reverse o)
729

    
730
-- | Generate the valid node allocation singles or pairs for a new instance.
731
genAllocNodes :: Group.List        -- ^ Group list
732
              -> Node.List         -- ^ The node map
733
              -> Int               -- ^ The number of nodes required
734
              -> Bool              -- ^ Whether to drop or not
735
                                   -- unallocable nodes
736
              -> Result AllocNodes -- ^ The (monadic) result
737
genAllocNodes gl nl count drop_unalloc =
738
  let filter_fn = if drop_unalloc
739
                    then filter (Group.isAllocable .
740
                                 flip Container.find gl . Node.group)
741
                    else id
742
      all_nodes = filter_fn $ getOnline nl
743
      all_pairs = [(Node.idx p,
744
                    [Node.idx s | s <- all_nodes,
745
                                       Node.idx p /= Node.idx s,
746
                                       Node.group p == Node.group s]) |
747
                   p <- all_nodes]
748
  in case count of
749
       1 -> Ok (Left (map Node.idx all_nodes))
750
       2 -> Ok (Right (filter (not . null . snd) all_pairs))
751
       _ -> Bad "Unsupported number of nodes, only one or two  supported"
752

    
753
-- | Try to allocate an instance on the cluster.
754
tryAlloc :: (Monad m) =>
755
            Node.List         -- ^ The node list
756
         -> Instance.List     -- ^ The instance list
757
         -> Instance.Instance -- ^ The instance to allocate
758
         -> AllocNodes        -- ^ The allocation targets
759
         -> m AllocSolution   -- ^ Possible solution list
760
tryAlloc _  _ _    (Right []) = fail "Not enough online nodes"
761
tryAlloc nl _ inst (Right ok_pairs) =
762
  let psols = parMap rwhnf (\(p, ss) ->
763
                              foldl' (\cstate ->
764
                                        concatAllocs cstate .
765
                                        allocateOnPair nl inst p)
766
                              emptyAllocSolution ss) ok_pairs
767
      sols = foldl' sumAllocs emptyAllocSolution psols
768
  in return $ annotateSolution sols
769

    
770
tryAlloc _  _ _    (Left []) = fail "No online nodes"
771
tryAlloc nl _ inst (Left all_nodes) =
772
  let sols = foldl' (\cstate ->
773
                       concatAllocs cstate . allocateOnSingle nl inst
774
                    ) emptyAllocSolution all_nodes
775
  in return $ annotateSolution sols
776

    
777
-- | Given a group/result, describe it as a nice (list of) messages.
778
solutionDescription :: (Group.Group, Result AllocSolution)
779
                    -> [String]
780
solutionDescription (grp, result) =
781
  case result of
782
    Ok solution -> map (printf "Group %s (%s): %s" gname pol) (asLog solution)
783
    Bad message -> [printf "Group %s: error %s" gname message]
784
  where gname = Group.name grp
785
        pol = allocPolicyToRaw (Group.allocPolicy grp)
786

    
787
-- | From a list of possibly bad and possibly empty solutions, filter
788
-- only the groups with a valid result. Note that the result will be
789
-- reversed compared to the original list.
790
filterMGResults :: [(Group.Group, Result AllocSolution)]
791
                -> [(Group.Group, AllocSolution)]
792
filterMGResults = foldl' fn []
793
  where unallocable = not . Group.isAllocable
794
        fn accu (grp, rasol) =
795
          case rasol of
796
            Bad _ -> accu
797
            Ok sol | isNothing (asSolution sol) -> accu
798
                   | unallocable grp -> accu
799
                   | otherwise -> (grp, sol):accu
800

    
801
-- | Sort multigroup results based on policy and score.
802
sortMGResults :: [(Group.Group, AllocSolution)]
803
              -> [(Group.Group, AllocSolution)]
804
sortMGResults sols =
805
  let extractScore (_, _, _, x) = x
806
      solScore (grp, sol) = (Group.allocPolicy grp,
807
                             (extractScore . fromJust . asSolution) sol)
808
  in sortBy (comparing solScore) sols
809

    
810
-- | Removes node groups which can't accommodate the instance
811
filterValidGroups :: [(Group.Group, (Node.List, Instance.List))]
812
                  -> Instance.Instance
813
                  -> ([(Group.Group, (Node.List, Instance.List))], [String])
814
filterValidGroups [] _ = ([], [])
815
filterValidGroups (ng:ngs) inst =
816
  let (valid_ngs, msgs) = filterValidGroups ngs inst
817
      hasNetwork nic = case Nic.network nic of
818
        Just net -> net `elem` Group.networks (fst ng)
819
        Nothing -> True
820
      hasRequiredNetworks = all hasNetwork (Instance.nics inst)
821
  in if hasRequiredNetworks
822
      then (ng:valid_ngs, msgs)
823
      else (valid_ngs,
824
            ("group " ++ Group.name (fst ng) ++
825
             " is not connected to a network required by instance " ++
826
             Instance.name inst):msgs)
827

    
828
-- | Finds the best group for an instance on a multi-group cluster.
829
--
830
-- Only solutions in @preferred@ and @last_resort@ groups will be
831
-- accepted as valid, and additionally if the allowed groups parameter
832
-- is not null then allocation will only be run for those group
833
-- indices.
834
findBestAllocGroup :: Group.List           -- ^ The group list
835
                   -> Node.List            -- ^ The node list
836
                   -> Instance.List        -- ^ The instance list
837
                   -> Maybe [Gdx]          -- ^ The allowed groups
838
                   -> Instance.Instance    -- ^ The instance to allocate
839
                   -> Int                  -- ^ Required number of nodes
840
                   -> Result (Group.Group, AllocSolution, [String])
841
findBestAllocGroup mggl mgnl mgil allowed_gdxs inst cnt =
842
  let groups_by_idx = splitCluster mgnl mgil
843
      groups = map (\(gid, d) -> (Container.find gid mggl, d)) groups_by_idx
844
      groups' = maybe groups
845
                (\gs -> filter ((`elem` gs) . Group.idx . fst) groups)
846
                allowed_gdxs
847
      (groups'', filter_group_msgs) = filterValidGroups groups' inst
848
      sols = map (\(gr, (nl, il)) ->
849
                   (gr, genAllocNodes mggl nl cnt False >>=
850
                        tryAlloc nl il inst))
851
             groups''::[(Group.Group, Result AllocSolution)]
852
      all_msgs = filter_group_msgs ++ concatMap solutionDescription sols
853
      goodSols = filterMGResults sols
854
      sortedSols = sortMGResults goodSols
855
  in case sortedSols of
856
       [] -> Bad $ if null groups'
857
                     then "no groups for evacuation: allowed groups was" ++
858
                          show allowed_gdxs ++ ", all groups: " ++
859
                          show (map fst groups)
860
                     else intercalate ", " all_msgs
861
       (final_group, final_sol):_ -> return (final_group, final_sol, all_msgs)
862

    
863
-- | Try to allocate an instance on a multi-group cluster.
864
tryMGAlloc :: Group.List           -- ^ The group list
865
           -> Node.List            -- ^ The node list
866
           -> Instance.List        -- ^ The instance list
867
           -> Instance.Instance    -- ^ The instance to allocate
868
           -> Int                  -- ^ Required number of nodes
869
           -> Result AllocSolution -- ^ Possible solution list
870
tryMGAlloc mggl mgnl mgil inst cnt = do
871
  (best_group, solution, all_msgs) <-
872
      findBestAllocGroup mggl mgnl mgil Nothing inst cnt
873
  let group_name = Group.name best_group
874
      selmsg = "Selected group: " ++ group_name
875
  return $ solution { asLog = selmsg:all_msgs }
876

    
877
-- | Calculate the new instance list after allocation solution.
878
updateIl :: Instance.List           -- ^ The original instance list
879
         -> Maybe Node.AllocElement -- ^ The result of the allocation attempt
880
         -> Instance.List           -- ^ The updated instance list
881
updateIl il Nothing = il
882
updateIl il (Just (_, xi, _, _)) = Container.add (Container.size il) xi il
883

    
884
-- | Extract the the new node list from the allocation solution.
885
extractNl :: Node.List               -- ^ The original node list
886
          -> Maybe Node.AllocElement -- ^ The result of the allocation attempt
887
          -> Node.List               -- ^ The new node list
888
extractNl nl Nothing = nl
889
extractNl _ (Just (xnl, _, _, _)) = xnl
890

    
891
-- | Try to allocate a list of instances on a multi-group cluster.
892
allocList :: Group.List                  -- ^ The group list
893
          -> Node.List                   -- ^ The node list
894
          -> Instance.List               -- ^ The instance list
895
          -> [(Instance.Instance, Int)]  -- ^ The instance to allocate
896
          -> AllocSolutionList           -- ^ Possible solution list
897
          -> Result (Node.List, Instance.List,
898
                     AllocSolutionList)  -- ^ The final solution list
899
allocList _  nl il [] result = Ok (nl, il, result)
900
allocList gl nl il ((xi, xicnt):xies) result = do
901
  ares <- tryMGAlloc gl nl il xi xicnt
902
  let sol = asSolution ares
903
      nl' = extractNl nl sol
904
      il' = updateIl il sol
905
  allocList gl nl' il' xies ((xi, ares):result)
906

    
907
-- | Function which fails if the requested mode is change secondary.
908
--
909
-- This is useful since except DRBD, no other disk template can
910
-- execute change secondary; thus, we can just call this function
911
-- instead of always checking for secondary mode. After the call to
912
-- this function, whatever mode we have is just a primary change.
913
failOnSecondaryChange :: (Monad m) => EvacMode -> DiskTemplate -> m ()
914
failOnSecondaryChange ChangeSecondary dt =
915
  fail $ "Instances with disk template '" ++ diskTemplateToRaw dt ++
916
         "' can't execute change secondary"
917
failOnSecondaryChange _ _ = return ()
918

    
919
-- | Run evacuation for a single instance.
920
--
921
-- /Note:/ this function should correctly execute both intra-group
922
-- evacuations (in all modes) and inter-group evacuations (in the
923
-- 'ChangeAll' mode). Of course, this requires that the correct list
924
-- of target nodes is passed.
925
nodeEvacInstance :: Node.List         -- ^ The node list (cluster-wide)
926
                 -> Instance.List     -- ^ Instance list (cluster-wide)
927
                 -> EvacMode          -- ^ The evacuation mode
928
                 -> Instance.Instance -- ^ The instance to be evacuated
929
                 -> Gdx               -- ^ The group we're targetting
930
                 -> [Ndx]             -- ^ The list of available nodes
931
                                      -- for allocation
932
                 -> Result (Node.List, Instance.List, [OpCodes.OpCode])
933
nodeEvacInstance nl il mode inst@(Instance.Instance
934
                                  {Instance.diskTemplate = dt@DTDiskless})
935
                 gdx avail_nodes =
936
                   failOnSecondaryChange mode dt >>
937
                   evacOneNodeOnly nl il inst gdx avail_nodes
938

    
939
nodeEvacInstance _ _ _ (Instance.Instance
940
                        {Instance.diskTemplate = DTPlain}) _ _ =
941
                  fail "Instances of type plain cannot be relocated"
942

    
943
nodeEvacInstance _ _ _ (Instance.Instance
944
                        {Instance.diskTemplate = DTFile}) _ _ =
945
                  fail "Instances of type file cannot be relocated"
946

    
947
nodeEvacInstance nl il mode inst@(Instance.Instance
948
                                  {Instance.diskTemplate = dt@DTSharedFile})
949
                 gdx avail_nodes =
950
                   failOnSecondaryChange mode dt >>
951
                   evacOneNodeOnly nl il inst gdx avail_nodes
952

    
953
nodeEvacInstance nl il mode inst@(Instance.Instance
954
                                  {Instance.diskTemplate = dt@DTBlock})
955
                 gdx avail_nodes =
956
                   failOnSecondaryChange mode dt >>
957
                   evacOneNodeOnly nl il inst gdx avail_nodes
958

    
959
nodeEvacInstance nl il mode inst@(Instance.Instance
960
                                  {Instance.diskTemplate = dt@DTRbd})
961
                 gdx avail_nodes =
962
                   failOnSecondaryChange mode dt >>
963
                   evacOneNodeOnly nl il inst gdx avail_nodes
964

    
965
nodeEvacInstance nl il mode inst@(Instance.Instance
966
                                  {Instance.diskTemplate = dt@DTExt})
967
                 gdx avail_nodes =
968
                   failOnSecondaryChange mode dt >>
969
                   evacOneNodeOnly nl il inst gdx avail_nodes
970

    
971
nodeEvacInstance nl il ChangePrimary
972
                 inst@(Instance.Instance {Instance.diskTemplate = DTDrbd8})
973
                 _ _ =
974
  do
975
    (nl', inst', _, _) <- opToResult $ applyMove nl inst Failover
976
    let idx = Instance.idx inst
977
        il' = Container.add idx inst' il
978
        ops = iMoveToJob nl' il' idx Failover
979
    return (nl', il', ops)
980

    
981
nodeEvacInstance nl il ChangeSecondary
982
                 inst@(Instance.Instance {Instance.diskTemplate = DTDrbd8})
983
                 gdx avail_nodes =
984
  evacOneNodeOnly nl il inst gdx avail_nodes
985

    
986
-- The algorithm for ChangeAll is as follows:
987
--
988
-- * generate all (primary, secondary) node pairs for the target groups
989
-- * for each pair, execute the needed moves (r:s, f, r:s) and compute
990
--   the final node list state and group score
991
-- * select the best choice via a foldl that uses the same Either
992
--   String solution as the ChangeSecondary mode
993
nodeEvacInstance nl il ChangeAll
994
                 inst@(Instance.Instance {Instance.diskTemplate = DTDrbd8})
995
                 gdx avail_nodes =
996
  do
997
    let no_nodes = Left "no nodes available"
998
        node_pairs = [(p,s) | p <- avail_nodes, s <- avail_nodes, p /= s]
999
    (nl', il', ops, _) <-
1000
        annotateResult "Can't find any good nodes for relocation" .
1001
        eitherToResult $
1002
        foldl'
1003
        (\accu nodes -> case evacDrbdAllInner nl il inst gdx nodes of
1004
                          Bad msg ->
1005
                              case accu of
1006
                                Right _ -> accu
1007
                                -- we don't need more details (which
1008
                                -- nodes, etc.) as we only selected
1009
                                -- this group if we can allocate on
1010
                                -- it, hence failures will not
1011
                                -- propagate out of this fold loop
1012
                                Left _ -> Left $ "Allocation failed: " ++ msg
1013
                          Ok result@(_, _, _, new_cv) ->
1014
                              let new_accu = Right result in
1015
                              case accu of
1016
                                Left _ -> new_accu
1017
                                Right (_, _, _, old_cv) ->
1018
                                    if old_cv < new_cv
1019
                                    then accu
1020
                                    else new_accu
1021
        ) no_nodes node_pairs
1022

    
1023
    return (nl', il', ops)
1024

    
1025
-- | Generic function for changing one node of an instance.
1026
--
1027
-- This is similar to 'nodeEvacInstance' but will be used in a few of
1028
-- its sub-patterns. It folds the inner function 'evacOneNodeInner'
1029
-- over the list of available nodes, which results in the best choice
1030
-- for relocation.
1031
evacOneNodeOnly :: Node.List         -- ^ The node list (cluster-wide)
1032
                -> Instance.List     -- ^ Instance list (cluster-wide)
1033
                -> Instance.Instance -- ^ The instance to be evacuated
1034
                -> Gdx               -- ^ The group we're targetting
1035
                -> [Ndx]             -- ^ The list of available nodes
1036
                                      -- for allocation
1037
                -> Result (Node.List, Instance.List, [OpCodes.OpCode])
1038
evacOneNodeOnly nl il inst gdx avail_nodes = do
1039
  op_fn <- case Instance.mirrorType inst of
1040
             MirrorNone -> Bad "Can't relocate/evacuate non-mirrored instances"
1041
             MirrorInternal -> Ok ReplaceSecondary
1042
             MirrorExternal -> Ok FailoverToAny
1043
  (nl', inst', _, ndx) <- annotateResult "Can't find any good node" .
1044
                          eitherToResult $
1045
                          foldl' (evacOneNodeInner nl inst gdx op_fn)
1046
                          (Left "no nodes available") avail_nodes
1047
  let idx = Instance.idx inst
1048
      il' = Container.add idx inst' il
1049
      ops = iMoveToJob nl' il' idx (op_fn ndx)
1050
  return (nl', il', ops)
1051

    
1052
-- | Inner fold function for changing one node of an instance.
1053
--
1054
-- Depending on the instance disk template, this will either change
1055
-- the secondary (for DRBD) or the primary node (for shared
1056
-- storage). However, the operation is generic otherwise.
1057
--
1058
-- The running solution is either a @Left String@, which means we
1059
-- don't have yet a working solution, or a @Right (...)@, which
1060
-- represents a valid solution; it holds the modified node list, the
1061
-- modified instance (after evacuation), the score of that solution,
1062
-- and the new secondary node index.
1063
evacOneNodeInner :: Node.List         -- ^ Cluster node list
1064
                 -> Instance.Instance -- ^ Instance being evacuated
1065
                 -> Gdx               -- ^ The group index of the instance
1066
                 -> (Ndx -> IMove)    -- ^ Operation constructor
1067
                 -> EvacInnerState    -- ^ Current best solution
1068
                 -> Ndx               -- ^ Node we're evaluating as target
1069
                 -> EvacInnerState    -- ^ New best solution
1070
evacOneNodeInner nl inst gdx op_fn accu ndx =
1071
  case applyMove nl inst (op_fn ndx) of
1072
    Bad fm -> let fail_msg = "Node " ++ Container.nameOf nl ndx ++
1073
                             " failed: " ++ show fm
1074
              in either (const $ Left fail_msg) (const accu) accu
1075
    Ok (nl', inst', _, _) ->
1076
      let nodes = Container.elems nl'
1077
          -- The fromJust below is ugly (it can fail nastily), but
1078
          -- at this point we should have any internal mismatches,
1079
          -- and adding a monad here would be quite involved
1080
          grpnodes = fromJust (gdx `lookup` Node.computeGroups nodes)
1081
          new_cv = compCVNodes grpnodes
1082
          new_accu = Right (nl', inst', new_cv, ndx)
1083
      in case accu of
1084
           Left _ -> new_accu
1085
           Right (_, _, old_cv, _) ->
1086
             if old_cv < new_cv
1087
               then accu
1088
               else new_accu
1089

    
1090
-- | Compute result of changing all nodes of a DRBD instance.
1091
--
1092
-- Given the target primary and secondary node (which might be in a
1093
-- different group or not), this function will 'execute' all the
1094
-- required steps and assuming all operations succceed, will return
1095
-- the modified node and instance lists, the opcodes needed for this
1096
-- and the new group score.
1097
evacDrbdAllInner :: Node.List         -- ^ Cluster node list
1098
                 -> Instance.List     -- ^ Cluster instance list
1099
                 -> Instance.Instance -- ^ The instance to be moved
1100
                 -> Gdx               -- ^ The target group index
1101
                                      -- (which can differ from the
1102
                                      -- current group of the
1103
                                      -- instance)
1104
                 -> (Ndx, Ndx)        -- ^ Tuple of new
1105
                                      -- primary\/secondary nodes
1106
                 -> Result (Node.List, Instance.List, [OpCodes.OpCode], Score)
1107
evacDrbdAllInner nl il inst gdx (t_pdx, t_sdx) = do
1108
  let primary = Container.find (Instance.pNode inst) nl
1109
      idx = Instance.idx inst
1110
  -- if the primary is offline, then we first failover
1111
  (nl1, inst1, ops1) <-
1112
    if Node.offline primary
1113
      then do
1114
        (nl', inst', _, _) <-
1115
          annotateResult "Failing over to the secondary" .
1116
          opToResult $ applyMove nl inst Failover
1117
        return (nl', inst', [Failover])
1118
      else return (nl, inst, [])
1119
  let (o1, o2, o3) = (ReplaceSecondary t_pdx,
1120
                      Failover,
1121
                      ReplaceSecondary t_sdx)
1122
  -- we now need to execute a replace secondary to the future
1123
  -- primary node
1124
  (nl2, inst2, _, _) <-
1125
    annotateResult "Changing secondary to new primary" .
1126
    opToResult $
1127
    applyMove nl1 inst1 o1
1128
  let ops2 = o1:ops1
1129
  -- we now execute another failover, the primary stays fixed now
1130
  (nl3, inst3, _, _) <- annotateResult "Failing over to new primary" .
1131
                        opToResult $ applyMove nl2 inst2 o2
1132
  let ops3 = o2:ops2
1133
  -- and finally another replace secondary, to the final secondary
1134
  (nl4, inst4, _, _) <-
1135
    annotateResult "Changing secondary to final secondary" .
1136
    opToResult $
1137
    applyMove nl3 inst3 o3
1138
  let ops4 = o3:ops3
1139
      il' = Container.add idx inst4 il
1140
      ops = concatMap (iMoveToJob nl4 il' idx) $ reverse ops4
1141
  let nodes = Container.elems nl4
1142
      -- The fromJust below is ugly (it can fail nastily), but
1143
      -- at this point we should have any internal mismatches,
1144
      -- and adding a monad here would be quite involved
1145
      grpnodes = fromJust (gdx `lookup` Node.computeGroups nodes)
1146
      new_cv = compCVNodes grpnodes
1147
  return (nl4, il', ops, new_cv)
1148

    
1149
-- | Computes the nodes in a given group which are available for
1150
-- allocation.
1151
availableGroupNodes :: [(Gdx, [Ndx])] -- ^ Group index/node index assoc list
1152
                    -> IntSet.IntSet  -- ^ Nodes that are excluded
1153
                    -> Gdx            -- ^ The group for which we
1154
                                      -- query the nodes
1155
                    -> Result [Ndx]   -- ^ List of available node indices
1156
availableGroupNodes group_nodes excl_ndx gdx = do
1157
  local_nodes <- maybe (Bad $ "Can't find group with index " ++ show gdx)
1158
                 Ok (lookup gdx group_nodes)
1159
  let avail_nodes = filter (not . flip IntSet.member excl_ndx) local_nodes
1160
  return avail_nodes
1161

    
1162
-- | Updates the evac solution with the results of an instance
1163
-- evacuation.
1164
updateEvacSolution :: (Node.List, Instance.List, EvacSolution)
1165
                   -> Idx
1166
                   -> Result (Node.List, Instance.List, [OpCodes.OpCode])
1167
                   -> (Node.List, Instance.List, EvacSolution)
1168
updateEvacSolution (nl, il, es) idx (Bad msg) =
1169
  (nl, il, es { esFailed = (idx, msg):esFailed es})
1170
updateEvacSolution (_, _, es) idx (Ok (nl, il, opcodes)) =
1171
  (nl, il, es { esMoved = new_elem:esMoved es
1172
              , esOpCodes = opcodes:esOpCodes es })
1173
    where inst = Container.find idx il
1174
          new_elem = (idx,
1175
                      instancePriGroup nl inst,
1176
                      Instance.allNodes inst)
1177

    
1178
-- | Node-evacuation IAllocator mode main function.
1179
tryNodeEvac :: Group.List    -- ^ The cluster groups
1180
            -> Node.List     -- ^ The node list (cluster-wide, not per group)
1181
            -> Instance.List -- ^ Instance list (cluster-wide)
1182
            -> EvacMode      -- ^ The evacuation mode
1183
            -> [Idx]         -- ^ List of instance (indices) to be evacuated
1184
            -> Result (Node.List, Instance.List, EvacSolution)
1185
tryNodeEvac _ ini_nl ini_il mode idxs =
1186
  let evac_ndx = nodesToEvacuate ini_il mode idxs
1187
      offline = map Node.idx . filter Node.offline $ Container.elems ini_nl
1188
      excl_ndx = foldl' (flip IntSet.insert) evac_ndx offline
1189
      group_ndx = map (\(gdx, (nl, _)) -> (gdx, map Node.idx
1190
                                           (Container.elems nl))) $
1191
                  splitCluster ini_nl ini_il
1192
      (fin_nl, fin_il, esol) =
1193
        foldl' (\state@(nl, il, _) inst ->
1194
                  let gdx = instancePriGroup nl inst
1195
                      pdx = Instance.pNode inst in
1196
                  updateEvacSolution state (Instance.idx inst) $
1197
                  availableGroupNodes group_ndx
1198
                    (IntSet.insert pdx excl_ndx) gdx >>=
1199
                      nodeEvacInstance nl il mode inst gdx
1200
               )
1201
        (ini_nl, ini_il, emptyEvacSolution)
1202
        (map (`Container.find` ini_il) idxs)
1203
  in return (fin_nl, fin_il, reverseEvacSolution esol)
1204

    
1205
-- | Change-group IAllocator mode main function.
1206
--
1207
-- This is very similar to 'tryNodeEvac', the only difference is that
1208
-- we don't choose as target group the current instance group, but
1209
-- instead:
1210
--
1211
--   1. at the start of the function, we compute which are the target
1212
--   groups; either no groups were passed in, in which case we choose
1213
--   all groups out of which we don't evacuate instance, or there were
1214
--   some groups passed, in which case we use those
1215
--
1216
--   2. for each instance, we use 'findBestAllocGroup' to choose the
1217
--   best group to hold the instance, and then we do what
1218
--   'tryNodeEvac' does, except for this group instead of the current
1219
--   instance group.
1220
--
1221
-- Note that the correct behaviour of this function relies on the
1222
-- function 'nodeEvacInstance' to be able to do correctly both
1223
-- intra-group and inter-group moves when passed the 'ChangeAll' mode.
1224
tryChangeGroup :: Group.List    -- ^ The cluster groups
1225
               -> Node.List     -- ^ The node list (cluster-wide)
1226
               -> Instance.List -- ^ Instance list (cluster-wide)
1227
               -> [Gdx]         -- ^ Target groups; if empty, any
1228
                                -- groups not being evacuated
1229
               -> [Idx]         -- ^ List of instance (indices) to be evacuated
1230
               -> Result (Node.List, Instance.List, EvacSolution)
1231
tryChangeGroup gl ini_nl ini_il gdxs idxs =
1232
  let evac_gdxs = nub $ map (instancePriGroup ini_nl .
1233
                             flip Container.find ini_il) idxs
1234
      target_gdxs = (if null gdxs
1235
                       then Container.keys gl
1236
                       else gdxs) \\ evac_gdxs
1237
      offline = map Node.idx . filter Node.offline $ Container.elems ini_nl
1238
      excl_ndx = foldl' (flip IntSet.insert) IntSet.empty offline
1239
      group_ndx = map (\(gdx, (nl, _)) -> (gdx, map Node.idx
1240
                                           (Container.elems nl))) $
1241
                  splitCluster ini_nl ini_il
1242
      (fin_nl, fin_il, esol) =
1243
        foldl' (\state@(nl, il, _) inst ->
1244
                  let solution = do
1245
                        let ncnt = Instance.requiredNodes $
1246
                                   Instance.diskTemplate inst
1247
                        (grp, _, _) <- findBestAllocGroup gl nl il
1248
                                       (Just target_gdxs) inst ncnt
1249
                        let gdx = Group.idx grp
1250
                        av_nodes <- availableGroupNodes group_ndx
1251
                                    excl_ndx gdx
1252
                        nodeEvacInstance nl il ChangeAll inst gdx av_nodes
1253
                  in updateEvacSolution state (Instance.idx inst) solution
1254
               )
1255
        (ini_nl, ini_il, emptyEvacSolution)
1256
        (map (`Container.find` ini_il) idxs)
1257
  in return (fin_nl, fin_il, reverseEvacSolution esol)
1258

    
1259
-- | Standard-sized allocation method.
1260
--
1261
-- This places instances of the same size on the cluster until we're
1262
-- out of space. The result will be a list of identically-sized
1263
-- instances.
1264
iterateAlloc :: AllocMethod
1265
iterateAlloc nl il limit newinst allocnodes ixes cstats =
1266
  let depth = length ixes
1267
      newname = printf "new-%d" depth::String
1268
      newidx = Container.size il
1269
      newi2 = Instance.setIdx (Instance.setName newinst newname) newidx
1270
      newlimit = fmap (flip (-) 1) limit
1271
  in case tryAlloc nl il newi2 allocnodes of
1272
       Bad s -> Bad s
1273
       Ok (AllocSolution { asFailures = errs, asSolution = sols3 }) ->
1274
         let newsol = Ok (collapseFailures errs, nl, il, ixes, cstats) in
1275
         case sols3 of
1276
           Nothing -> newsol
1277
           Just (xnl, xi, _, _) ->
1278
             if limit == Just 0
1279
               then newsol
1280
               else iterateAlloc xnl (Container.add newidx xi il)
1281
                      newlimit newinst allocnodes (xi:ixes)
1282
                      (totalResources xnl:cstats)
1283

    
1284
-- | Predicate whether shrinking a single resource can lead to a valid
1285
-- allocation.
1286
sufficesShrinking :: (Instance.Instance -> AllocSolution) -> Instance.Instance
1287
                     -> FailMode  -> Maybe Instance.Instance
1288
sufficesShrinking allocFn inst fm =
1289
  case dropWhile (isNothing . asSolution . fst)
1290
       . takeWhile (liftA2 (||) (elem fm . asFailures . fst)
1291
                                (isJust . asSolution . fst))
1292
       . map (allocFn &&& id) $
1293
       iterateOk (`Instance.shrinkByType` fm) inst
1294
  of x:_ -> Just . snd $ x
1295
     _ -> Nothing
1296

    
1297
-- | Tiered allocation method.
1298
--
1299
-- This places instances on the cluster, and decreases the spec until
1300
-- we can allocate again. The result will be a list of decreasing
1301
-- instance specs.
1302
tieredAlloc :: AllocMethod
1303
tieredAlloc nl il limit newinst allocnodes ixes cstats =
1304
  case iterateAlloc nl il limit newinst allocnodes ixes cstats of
1305
    Bad s -> Bad s
1306
    Ok (errs, nl', il', ixes', cstats') ->
1307
      let newsol = Ok (errs, nl', il', ixes', cstats')
1308
          ixes_cnt = length ixes'
1309
          (stop, newlimit) = case limit of
1310
                               Nothing -> (False, Nothing)
1311
                               Just n -> (n <= ixes_cnt,
1312
                                            Just (n - ixes_cnt))
1313
          sortedErrs = map fst $ sortBy (comparing snd) errs
1314
          suffShrink = sufficesShrinking (fromMaybe emptyAllocSolution
1315
                                          . flip (tryAlloc nl' il') allocnodes)
1316
                       newinst
1317
          bigSteps = filter isJust . map suffShrink . reverse $ sortedErrs
1318
      in if stop then newsol else
1319
          case bigSteps of
1320
            Just newinst':_ -> tieredAlloc nl' il' newlimit
1321
                               newinst' allocnodes ixes' cstats'
1322
            _ -> case Instance.shrinkByType newinst . last $ sortedErrs of
1323
                   Bad _ -> newsol
1324
                   Ok newinst' -> tieredAlloc nl' il' newlimit
1325
                                  newinst' allocnodes ixes' cstats'
1326

    
1327
-- * Formatting functions
1328

    
1329
-- | Given the original and final nodes, computes the relocation description.
1330
computeMoves :: Instance.Instance -- ^ The instance to be moved
1331
             -> String -- ^ The instance name
1332
             -> IMove  -- ^ The move being performed
1333
             -> String -- ^ New primary
1334
             -> String -- ^ New secondary
1335
             -> (String, [String])
1336
                -- ^ Tuple of moves and commands list; moves is containing
1337
                -- either @/f/@ for failover or @/r:name/@ for replace
1338
                -- secondary, while the command list holds gnt-instance
1339
                -- commands (without that prefix), e.g \"@failover instance1@\"
1340
computeMoves i inam mv c d =
1341
  case mv of
1342
    Failover -> ("f", [mig])
1343
    FailoverToAny _ -> (printf "fa:%s" c, [mig_any])
1344
    FailoverAndReplace _ -> (printf "f r:%s" d, [mig, rep d])
1345
    ReplaceSecondary _ -> (printf "r:%s" d, [rep d])
1346
    ReplaceAndFailover _ -> (printf "r:%s f" c, [rep c, mig])
1347
    ReplacePrimary _ -> (printf "f r:%s f" c, [mig, rep c, mig])
1348
  where morf = if Instance.isRunning i then "migrate" else "failover"
1349
        mig = printf "%s -f %s" morf inam::String
1350
        mig_any = printf "%s -f -n %s %s" morf c inam::String
1351
        rep n = printf "replace-disks -n %s %s" n inam::String
1352

    
1353
-- | Converts a placement to string format.
1354
printSolutionLine :: Node.List     -- ^ The node list
1355
                  -> Instance.List -- ^ The instance list
1356
                  -> Int           -- ^ Maximum node name length
1357
                  -> Int           -- ^ Maximum instance name length
1358
                  -> Placement     -- ^ The current placement
1359
                  -> Int           -- ^ The index of the placement in
1360
                                   -- the solution
1361
                  -> (String, [String])
1362
printSolutionLine nl il nmlen imlen plc pos =
1363
  let pmlen = (2*nmlen + 1)
1364
      (i, p, s, mv, c) = plc
1365
      old_sec = Instance.sNode inst
1366
      inst = Container.find i il
1367
      inam = Instance.alias inst
1368
      npri = Node.alias $ Container.find p nl
1369
      nsec = Node.alias $ Container.find s nl
1370
      opri = Node.alias $ Container.find (Instance.pNode inst) nl
1371
      osec = Node.alias $ Container.find old_sec nl
1372
      (moves, cmds) =  computeMoves inst inam mv npri nsec
1373
      -- FIXME: this should check instead/also the disk template
1374
      ostr = if old_sec == Node.noSecondary
1375
               then printf "%s" opri::String
1376
               else printf "%s:%s" opri osec::String
1377
      nstr = if s == Node.noSecondary
1378
               then printf "%s" npri::String
1379
               else printf "%s:%s" npri nsec::String
1380
  in (printf "  %3d. %-*s %-*s => %-*s %12.8f a=%s"
1381
      pos imlen inam pmlen ostr pmlen nstr c moves,
1382
      cmds)
1383

    
1384
-- | Return the instance and involved nodes in an instance move.
1385
--
1386
-- Note that the output list length can vary, and is not required nor
1387
-- guaranteed to be of any specific length.
1388
involvedNodes :: Instance.List -- ^ Instance list, used for retrieving
1389
                               -- the instance from its index; note
1390
                               -- that this /must/ be the original
1391
                               -- instance list, so that we can
1392
                               -- retrieve the old nodes
1393
              -> Placement     -- ^ The placement we're investigating,
1394
                               -- containing the new nodes and
1395
                               -- instance index
1396
              -> [Ndx]         -- ^ Resulting list of node indices
1397
involvedNodes il plc =
1398
  let (i, np, ns, _, _) = plc
1399
      inst = Container.find i il
1400
  in nub $ [np, ns] ++ Instance.allNodes inst
1401

    
1402
-- | Inner function for splitJobs, that either appends the next job to
1403
-- the current jobset, or starts a new jobset.
1404
mergeJobs :: ([JobSet], [Ndx]) -> MoveJob -> ([JobSet], [Ndx])
1405
mergeJobs ([], _) n@(ndx, _, _, _) = ([[n]], ndx)
1406
mergeJobs (cjs@(j:js), nbuf) n@(ndx, _, _, _)
1407
  | null (ndx `intersect` nbuf) = ((n:j):js, ndx ++ nbuf)
1408
  | otherwise = ([n]:cjs, ndx)
1409

    
1410
-- | Break a list of moves into independent groups. Note that this
1411
-- will reverse the order of jobs.
1412
splitJobs :: [MoveJob] -> [JobSet]
1413
splitJobs = fst . foldl mergeJobs ([], [])
1414

    
1415
-- | Given a list of commands, prefix them with @gnt-instance@ and
1416
-- also beautify the display a little.
1417
formatJob :: Int -> Int -> (Int, MoveJob) -> [String]
1418
formatJob jsn jsl (sn, (_, _, _, cmds)) =
1419
  let out =
1420
        printf "  echo job %d/%d" jsn sn:
1421
        printf "  check":
1422
        map ("  gnt-instance " ++) cmds
1423
  in if sn == 1
1424
       then ["", printf "echo jobset %d, %d jobs" jsn jsl] ++ out
1425
       else out
1426

    
1427
-- | Given a list of commands, prefix them with @gnt-instance@ and
1428
-- also beautify the display a little.
1429
formatCmds :: [JobSet] -> String
1430
formatCmds =
1431
  unlines .
1432
  concatMap (\(jsn, js) -> concatMap (formatJob jsn (length js))
1433
                           (zip [1..] js)) .
1434
  zip [1..]
1435

    
1436
-- | Print the node list.
1437
printNodes :: Node.List -> [String] -> String
1438
printNodes nl fs =
1439
  let fields = case fs of
1440
                 [] -> Node.defaultFields
1441
                 "+":rest -> Node.defaultFields ++ rest
1442
                 _ -> fs
1443
      snl = sortBy (comparing Node.idx) (Container.elems nl)
1444
      (header, isnum) = unzip $ map Node.showHeader fields
1445
  in printTable "" header (map (Node.list fields) snl) isnum
1446

    
1447
-- | Print the instance list.
1448
printInsts :: Node.List -> Instance.List -> String
1449
printInsts nl il =
1450
  let sil = sortBy (comparing Instance.idx) (Container.elems il)
1451
      helper inst = [ if Instance.isRunning inst then "R" else " "
1452
                    , Instance.name inst
1453
                    , Container.nameOf nl (Instance.pNode inst)
1454
                    , let sdx = Instance.sNode inst
1455
                      in if sdx == Node.noSecondary
1456
                           then  ""
1457
                           else Container.nameOf nl sdx
1458
                    , if Instance.autoBalance inst then "Y" else "N"
1459
                    , printf "%3d" $ Instance.vcpus inst
1460
                    , printf "%5d" $ Instance.mem inst
1461
                    , printf "%5d" $ Instance.dsk inst `div` 1024
1462
                    , printf "%5.3f" lC
1463
                    , printf "%5.3f" lM
1464
                    , printf "%5.3f" lD
1465
                    , printf "%5.3f" lN
1466
                    ]
1467
          where DynUtil lC lM lD lN = Instance.util inst
1468
      header = [ "F", "Name", "Pri_node", "Sec_node", "Auto_bal"
1469
               , "vcpu", "mem" , "dsk", "lCpu", "lMem", "lDsk", "lNet" ]
1470
      isnum = False:False:False:False:False:repeat True
1471
  in printTable "" header (map helper sil) isnum
1472

    
1473
-- | Shows statistics for a given node list.
1474
printStats :: String -> Node.List -> String
1475
printStats lp nl =
1476
  let dcvs = compDetailedCV $ Container.elems nl
1477
      (weights, names) = unzip detailedCVInfo
1478
      hd = zip3 (weights ++ repeat 1) (names ++ repeat "unknown") dcvs
1479
      header = [ "Field", "Value", "Weight" ]
1480
      formatted = map (\(w, h, val) ->
1481
                         [ h
1482
                         , printf "%.8f" val
1483
                         , printf "x%.2f" w
1484
                         ]) hd
1485
  in printTable lp header formatted $ False:repeat True
1486

    
1487
-- | Convert a placement into a list of OpCodes (basically a job).
1488
iMoveToJob :: Node.List        -- ^ The node list; only used for node
1489
                               -- names, so any version is good
1490
                               -- (before or after the operation)
1491
           -> Instance.List    -- ^ The instance list; also used for
1492
                               -- names only
1493
           -> Idx              -- ^ The index of the instance being
1494
                               -- moved
1495
           -> IMove            -- ^ The actual move to be described
1496
           -> [OpCodes.OpCode] -- ^ The list of opcodes equivalent to
1497
                               -- the given move
1498
iMoveToJob nl il idx move =
1499
  let inst = Container.find idx il
1500
      iname = Instance.name inst
1501
      lookNode  n = case mkNonEmpty (Container.nameOf nl n) of
1502
                      -- FIXME: convert htools codebase to non-empty strings
1503
                      Bad msg -> error $ "Empty node name for idx " ++
1504
                                 show n ++ ": " ++ msg ++ "??"
1505
                      Ok ne -> Just ne
1506
      opF = OpCodes.OpInstanceMigrate
1507
              { OpCodes.opInstanceName        = iname
1508
              , OpCodes.opMigrationMode       = Nothing -- default
1509
              , OpCodes.opOldLiveMode         = Nothing -- default as well
1510
              , OpCodes.opTargetNode          = Nothing -- this is drbd
1511
              , OpCodes.opAllowRuntimeChanges = False
1512
              , OpCodes.opIgnoreIpolicy       = False
1513
              , OpCodes.opMigrationCleanup    = False
1514
              , OpCodes.opIallocator          = Nothing
1515
              , OpCodes.opAllowFailover       = True }
1516
      opFA n = opF { OpCodes.opTargetNode = lookNode n } -- not drbd
1517
      opR n = OpCodes.OpInstanceReplaceDisks
1518
                { OpCodes.opInstanceName     = iname
1519
                , OpCodes.opEarlyRelease     = False
1520
                , OpCodes.opIgnoreIpolicy    = False
1521
                , OpCodes.opReplaceDisksMode = OpCodes.ReplaceNewSecondary
1522
                , OpCodes.opReplaceDisksList = []
1523
                , OpCodes.opRemoteNode       = lookNode n
1524
                , OpCodes.opIallocator       = Nothing
1525
                }
1526
  in case move of
1527
       Failover -> [ opF ]
1528
       FailoverToAny np -> [ opFA np ]
1529
       ReplacePrimary np -> [ opF, opR np, opF ]
1530
       ReplaceSecondary ns -> [ opR ns ]
1531
       ReplaceAndFailover np -> [ opR np, opF ]
1532
       FailoverAndReplace ns -> [ opF, opR ns ]
1533

    
1534
-- * Node group functions
1535

    
1536
-- | Computes the group of an instance.
1537
instanceGroup :: Node.List -> Instance.Instance -> Result Gdx
1538
instanceGroup nl i =
1539
  let sidx = Instance.sNode i
1540
      pnode = Container.find (Instance.pNode i) nl
1541
      snode = if sidx == Node.noSecondary
1542
              then pnode
1543
              else Container.find sidx nl
1544
      pgroup = Node.group pnode
1545
      sgroup = Node.group snode
1546
  in if pgroup /= sgroup
1547
       then fail ("Instance placed accross two node groups, primary " ++
1548
                  show pgroup ++ ", secondary " ++ show sgroup)
1549
       else return pgroup
1550

    
1551
-- | Computes the group of an instance per the primary node.
1552
instancePriGroup :: Node.List -> Instance.Instance -> Gdx
1553
instancePriGroup nl i =
1554
  let pnode = Container.find (Instance.pNode i) nl
1555
  in  Node.group pnode
1556

    
1557
-- | Compute the list of badly allocated instances (split across node
1558
-- groups).
1559
findSplitInstances :: Node.List -> Instance.List -> [Instance.Instance]
1560
findSplitInstances nl =
1561
  filter (not . isOk . instanceGroup nl) . Container.elems
1562

    
1563
-- | Splits a cluster into the component node groups.
1564
splitCluster :: Node.List -> Instance.List ->
1565
                [(Gdx, (Node.List, Instance.List))]
1566
splitCluster nl il =
1567
  let ngroups = Node.computeGroups (Container.elems nl)
1568
  in map (\(gdx, nodes) ->
1569
           let nidxs = map Node.idx nodes
1570
               nodes' = zip nidxs nodes
1571
               instances = Container.filter ((`elem` nidxs) . Instance.pNode) il
1572
           in (gdx, (Container.fromList nodes', instances))) ngroups
1573

    
1574
-- | Compute the list of nodes that are to be evacuated, given a list
1575
-- of instances and an evacuation mode.
1576
nodesToEvacuate :: Instance.List -- ^ The cluster-wide instance list
1577
                -> EvacMode      -- ^ The evacuation mode we're using
1578
                -> [Idx]         -- ^ List of instance indices being evacuated
1579
                -> IntSet.IntSet -- ^ Set of node indices
1580
nodesToEvacuate il mode =
1581
  IntSet.delete Node.noSecondary .
1582
  foldl' (\ns idx ->
1583
            let i = Container.find idx il
1584
                pdx = Instance.pNode i
1585
                sdx = Instance.sNode i
1586
                dt = Instance.diskTemplate i
1587
                withSecondary = case dt of
1588
                                  DTDrbd8 -> IntSet.insert sdx ns
1589
                                  _ -> ns
1590
            in case mode of
1591
                 ChangePrimary   -> IntSet.insert pdx ns
1592
                 ChangeSecondary -> withSecondary
1593
                 ChangeAll       -> IntSet.insert pdx withSecondary
1594
         ) IntSet.empty