Statistics
| Branch: | Tag: | Revision:

root / htools / Ganeti / HTools / Cluster.hs @ 97da6b71

History | View | Annotate | Download (63.9 kB)

1 e4f08c46 Iustin Pop
{-| Implementation of cluster-wide logic.
2 e4f08c46 Iustin Pop
3 e4f08c46 Iustin Pop
This module holds all pure cluster-logic; I\/O related functionality
4 525bfb36 Iustin Pop
goes into the /Main/ module for the individual binaries.
5 e4f08c46 Iustin Pop
6 e4f08c46 Iustin Pop
-}
7 e4f08c46 Iustin Pop
8 e2fa2baf Iustin Pop
{-
9 e2fa2baf Iustin Pop
10 4bc33d60 Iustin Pop
Copyright (C) 2009, 2010, 2011 Google Inc.
11 e2fa2baf Iustin Pop
12 e2fa2baf Iustin Pop
This program is free software; you can redistribute it and/or modify
13 e2fa2baf Iustin Pop
it under the terms of the GNU General Public License as published by
14 e2fa2baf Iustin Pop
the Free Software Foundation; either version 2 of the License, or
15 e2fa2baf Iustin Pop
(at your option) any later version.
16 e2fa2baf Iustin Pop
17 e2fa2baf Iustin Pop
This program is distributed in the hope that it will be useful, but
18 e2fa2baf Iustin Pop
WITHOUT ANY WARRANTY; without even the implied warranty of
19 e2fa2baf Iustin Pop
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
20 e2fa2baf Iustin Pop
General Public License for more details.
21 e2fa2baf Iustin Pop
22 e2fa2baf Iustin Pop
You should have received a copy of the GNU General Public License
23 e2fa2baf Iustin Pop
along with this program; if not, write to the Free Software
24 e2fa2baf Iustin Pop
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
25 e2fa2baf Iustin Pop
02110-1301, USA.
26 e2fa2baf Iustin Pop
27 e2fa2baf Iustin Pop
-}
28 e2fa2baf Iustin Pop
29 669d7e3d Iustin Pop
module Ganeti.HTools.Cluster
30 e4f08c46 Iustin Pop
    (
31 e4f08c46 Iustin Pop
     -- * Types
32 85d0ddc3 Iustin Pop
      AllocSolution(..)
33 47eed3f4 Iustin Pop
    , EvacSolution(..)
34 e4f08c46 Iustin Pop
    , Table(..)
35 1a7eff0e Iustin Pop
    , CStats(..)
36 9b8fac3d Iustin Pop
    , AllocStats
37 e4f08c46 Iustin Pop
    -- * Generic functions
38 e4f08c46 Iustin Pop
    , totalResources
39 9b8fac3d Iustin Pop
    , computeAllocationDelta
40 e4f08c46 Iustin Pop
    -- * First phase functions
41 e4f08c46 Iustin Pop
    , computeBadItems
42 e4f08c46 Iustin Pop
    -- * Second phase functions
43 7dfaafb1 Iustin Pop
    , printSolutionLine
44 142538ff Iustin Pop
    , formatCmds
45 0e8ae201 Iustin Pop
    , involvedNodes
46 0e8ae201 Iustin Pop
    , splitJobs
47 507fda3f Iustin Pop
    -- * Display functions
48 507fda3f Iustin Pop
    , printNodes
49 507fda3f Iustin Pop
    , printInsts
50 e4f08c46 Iustin Pop
    -- * Balacing functions
51 e4f08c46 Iustin Pop
    , checkMove
52 5ad86777 Iustin Pop
    , doNextBalance
53 f25e5aac Iustin Pop
    , tryBalance
54 e4f08c46 Iustin Pop
    , compCV
55 9bb5721c Iustin Pop
    , compCVNodes
56 6bc39970 Iustin Pop
    , compDetailedCV
57 e4f08c46 Iustin Pop
    , printStats
58 6b20875c Iustin Pop
    , iMoveToJob
59 4a340313 Iustin Pop
    -- * IAllocator functions
60 6cb1649f Iustin Pop
    , genAllocNodes
61 dbba5246 Iustin Pop
    , tryAlloc
62 9b1584fc Iustin Pop
    , tryMGAlloc
63 dbba5246 Iustin Pop
    , tryReloc
64 12b0511d Iustin Pop
    , tryEvac
65 47eed3f4 Iustin Pop
    , tryNodeEvac
66 20b376ff Iustin Pop
    , tryChangeGroup
67 478df686 Iustin Pop
    , collapseFailures
68 3ce8009a Iustin Pop
    -- * Allocation functions
69 3ce8009a Iustin Pop
    , iterateAlloc
70 3ce8009a Iustin Pop
    , tieredAlloc
71 949397c8 Iustin Pop
     -- * Node group functions
72 32b8d9c0 Iustin Pop
    , instanceGroup
73 32b8d9c0 Iustin Pop
    , findSplitInstances
74 f4161783 Iustin Pop
    , splitCluster
75 e4f08c46 Iustin Pop
    ) where
76 e4f08c46 Iustin Pop
77 63a78055 Iustin Pop
import qualified Data.IntSet as IntSet
78 e4f08c46 Iustin Pop
import Data.List
79 db56cfc4 Iustin Pop
import Data.Maybe (fromJust)
80 5182e970 Iustin Pop
import Data.Ord (comparing)
81 e4f08c46 Iustin Pop
import Text.Printf (printf)
82 9d3fada5 Iustin Pop
import Control.Monad
83 e4f08c46 Iustin Pop
84 669d7e3d Iustin Pop
import qualified Ganeti.HTools.Container as Container
85 669d7e3d Iustin Pop
import qualified Ganeti.HTools.Instance as Instance
86 669d7e3d Iustin Pop
import qualified Ganeti.HTools.Node as Node
87 aec636b9 Iustin Pop
import qualified Ganeti.HTools.Group as Group
88 e4c5beaf Iustin Pop
import Ganeti.HTools.Types
89 669d7e3d Iustin Pop
import Ganeti.HTools.Utils
90 1adec4be Iustin Pop
import Ganeti.HTools.Compat
91 6b20875c Iustin Pop
import qualified Ganeti.OpCodes as OpCodes
92 e4f08c46 Iustin Pop
93 9188aeef Iustin Pop
-- * Types
94 9188aeef Iustin Pop
95 0c936d24 Iustin Pop
-- | Allocation\/relocation solution.
96 85d0ddc3 Iustin Pop
data AllocSolution = AllocSolution
97 85d0ddc3 Iustin Pop
  { asFailures  :: [FailMode]          -- ^ Failure counts
98 85d0ddc3 Iustin Pop
  , asAllocs    :: Int                 -- ^ Good allocation count
99 85d0ddc3 Iustin Pop
  , asSolutions :: [Node.AllocElement] -- ^ The actual result, length
100 85d0ddc3 Iustin Pop
                                       -- of the list depends on the
101 85d0ddc3 Iustin Pop
                                       -- allocation/relocation mode
102 859fc11d Iustin Pop
  , asLog       :: [String]            -- ^ A list of informational messages
103 85d0ddc3 Iustin Pop
  }
104 85d0ddc3 Iustin Pop
105 47eed3f4 Iustin Pop
-- | Node evacuation/group change iallocator result type. This result
106 47eed3f4 Iustin Pop
-- type consists of actual opcodes (a restricted subset) that are
107 47eed3f4 Iustin Pop
-- transmitted back to Ganeti.
108 47eed3f4 Iustin Pop
data EvacSolution = EvacSolution
109 5440c877 Iustin Pop
    { esMoved   :: [(Idx, Gdx, [Ndx])]  -- ^ Instances moved successfully
110 5440c877 Iustin Pop
    , esFailed  :: [(Idx, String)]      -- ^ Instances which were not
111 47eed3f4 Iustin Pop
                                        -- relocated
112 47eed3f4 Iustin Pop
    , esOpCodes :: [[[OpCodes.OpCode]]] -- ^ List of lists of jobs
113 47eed3f4 Iustin Pop
    }
114 47eed3f4 Iustin Pop
115 40ee14bc Iustin Pop
-- | Allocation results, as used in 'iterateAlloc' and 'tieredAlloc'.
116 40ee14bc Iustin Pop
type AllocResult = (FailStats, Node.List, Instance.List,
117 40ee14bc Iustin Pop
                    [Instance.Instance], [CStats])
118 40ee14bc Iustin Pop
119 6cb1649f Iustin Pop
-- | A type denoting the valid allocation mode/pairs.
120 525bfb36 Iustin Pop
--
121 6cb1649f Iustin Pop
-- For a one-node allocation, this will be a @Left ['Node.Node']@,
122 6cb1649f Iustin Pop
-- whereas for a two-node allocation, this will be a @Right
123 6cb1649f Iustin Pop
-- [('Node.Node', 'Node.Node')]@.
124 0d66ea67 Iustin Pop
type AllocNodes = Either [Ndx] [(Ndx, Ndx)]
125 6cb1649f Iustin Pop
126 525bfb36 Iustin Pop
-- | The empty solution we start with when computing allocations.
127 97936d51 Iustin Pop
emptyAllocSolution :: AllocSolution
128 97936d51 Iustin Pop
emptyAllocSolution = AllocSolution { asFailures = [], asAllocs = 0
129 97936d51 Iustin Pop
                                   , asSolutions = [], asLog = [] }
130 78694255 Iustin Pop
131 47eed3f4 Iustin Pop
-- | The empty evac solution.
132 47eed3f4 Iustin Pop
emptyEvacSolution :: EvacSolution
133 47eed3f4 Iustin Pop
emptyEvacSolution = EvacSolution { esMoved = []
134 47eed3f4 Iustin Pop
                                 , esFailed = []
135 47eed3f4 Iustin Pop
                                 , esOpCodes = []
136 47eed3f4 Iustin Pop
                                 }
137 47eed3f4 Iustin Pop
138 525bfb36 Iustin Pop
-- | The complete state for the balancing solution.
139 262a08a2 Iustin Pop
data Table = Table Node.List Instance.List Score [Placement]
140 6bc39970 Iustin Pop
             deriving (Show, Read)
141 e4f08c46 Iustin Pop
142 179c0828 Iustin Pop
-- | Cluster statistics data type.
143 7034694d Iustin Pop
data CStats = CStats { csFmem :: Integer -- ^ Cluster free mem
144 7034694d Iustin Pop
                     , csFdsk :: Integer -- ^ Cluster free disk
145 7034694d Iustin Pop
                     , csAmem :: Integer -- ^ Cluster allocatable mem
146 7034694d Iustin Pop
                     , csAdsk :: Integer -- ^ Cluster allocatable disk
147 7034694d Iustin Pop
                     , csAcpu :: Integer -- ^ Cluster allocatable cpus
148 7034694d Iustin Pop
                     , csMmem :: Integer -- ^ Max node allocatable mem
149 7034694d Iustin Pop
                     , csMdsk :: Integer -- ^ Max node allocatable disk
150 7034694d Iustin Pop
                     , csMcpu :: Integer -- ^ Max node allocatable cpu
151 7034694d Iustin Pop
                     , csImem :: Integer -- ^ Instance used mem
152 7034694d Iustin Pop
                     , csIdsk :: Integer -- ^ Instance used disk
153 7034694d Iustin Pop
                     , csIcpu :: Integer -- ^ Instance used cpu
154 7034694d Iustin Pop
                     , csTmem :: Double  -- ^ Cluster total mem
155 7034694d Iustin Pop
                     , csTdsk :: Double  -- ^ Cluster total disk
156 7034694d Iustin Pop
                     , csTcpu :: Double  -- ^ Cluster total cpus
157 7034694d Iustin Pop
                     , csVcpu :: Integer -- ^ Cluster virtual cpus (if
158 7034694d Iustin Pop
                                         -- node pCpu has been set,
159 7034694d Iustin Pop
                                         -- otherwise -1)
160 7034694d Iustin Pop
                     , csXmem :: Integer -- ^ Unnacounted for mem
161 7034694d Iustin Pop
                     , csNmem :: Integer -- ^ Node own memory
162 7034694d Iustin Pop
                     , csScore :: Score  -- ^ The cluster score
163 7034694d Iustin Pop
                     , csNinst :: Int    -- ^ The total number of instances
164 1a7eff0e Iustin Pop
                     }
165 6bc39970 Iustin Pop
            deriving (Show, Read)
166 1a7eff0e Iustin Pop
167 525bfb36 Iustin Pop
-- | Currently used, possibly to allocate, unallocable.
168 9b8fac3d Iustin Pop
type AllocStats = (RSpec, RSpec, RSpec)
169 9b8fac3d Iustin Pop
170 9188aeef Iustin Pop
-- * Utility functions
171 9188aeef Iustin Pop
172 e4f08c46 Iustin Pop
-- | Verifies the N+1 status and return the affected nodes.
173 e4f08c46 Iustin Pop
verifyN1 :: [Node.Node] -> [Node.Node]
174 9f6dcdea Iustin Pop
verifyN1 = filter Node.failN1
175 e4f08c46 Iustin Pop
176 9188aeef Iustin Pop
{-| Computes the pair of bad nodes and instances.
177 9188aeef Iustin Pop
178 9188aeef Iustin Pop
The bad node list is computed via a simple 'verifyN1' check, and the
179 9188aeef Iustin Pop
bad instance list is the list of primary and secondary instances of
180 9188aeef Iustin Pop
those nodes.
181 9188aeef Iustin Pop
182 9188aeef Iustin Pop
-}
183 9188aeef Iustin Pop
computeBadItems :: Node.List -> Instance.List ->
184 9188aeef Iustin Pop
                   ([Node.Node], [Instance.Instance])
185 9188aeef Iustin Pop
computeBadItems nl il =
186 dbba5246 Iustin Pop
  let bad_nodes = verifyN1 $ getOnline nl
187 5182e970 Iustin Pop
      bad_instances = map (`Container.find` il) .
188 9f6dcdea Iustin Pop
                      sort . nub $
189 2060348b Iustin Pop
                      concatMap (\ n -> Node.sList n ++ Node.pList n) bad_nodes
190 9188aeef Iustin Pop
  in
191 9188aeef Iustin Pop
    (bad_nodes, bad_instances)
192 9188aeef Iustin Pop
193 525bfb36 Iustin Pop
-- | Zero-initializer for the CStats type.
194 1a7eff0e Iustin Pop
emptyCStats :: CStats
195 86ecce4a Iustin Pop
emptyCStats = CStats 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
196 1a7eff0e Iustin Pop
197 525bfb36 Iustin Pop
-- | Update stats with data from a new node.
198 1a7eff0e Iustin Pop
updateCStats :: CStats -> Node.Node -> CStats
199 1a7eff0e Iustin Pop
updateCStats cs node =
200 f5b553da Iustin Pop
    let CStats { csFmem = x_fmem, csFdsk = x_fdsk,
201 f5b553da Iustin Pop
                 csAmem = x_amem, csAcpu = x_acpu, csAdsk = x_adsk,
202 f5b553da Iustin Pop
                 csMmem = x_mmem, csMdsk = x_mdsk, csMcpu = x_mcpu,
203 f5b553da Iustin Pop
                 csImem = x_imem, csIdsk = x_idsk, csIcpu = x_icpu,
204 f5b553da Iustin Pop
                 csTmem = x_tmem, csTdsk = x_tdsk, csTcpu = x_tcpu,
205 86ecce4a Iustin Pop
                 csVcpu = x_vcpu,
206 f5b553da Iustin Pop
                 csXmem = x_xmem, csNmem = x_nmem, csNinst = x_ninst
207 8c4c6a8a Iustin Pop
               }
208 1a7eff0e Iustin Pop
            = cs
209 2060348b Iustin Pop
        inc_amem = Node.fMem node - Node.rMem node
210 1a7eff0e Iustin Pop
        inc_amem' = if inc_amem > 0 then inc_amem else 0
211 301789f4 Iustin Pop
        inc_adsk = Node.availDisk node
212 2060348b Iustin Pop
        inc_imem = truncate (Node.tMem node) - Node.nMem node
213 2060348b Iustin Pop
                   - Node.xMem node - Node.fMem node
214 2060348b Iustin Pop
        inc_icpu = Node.uCpu node
215 2060348b Iustin Pop
        inc_idsk = truncate (Node.tDsk node) - Node.fDsk node
216 86ecce4a Iustin Pop
        inc_vcpu = Node.hiCpu node
217 f52dadb2 Iustin Pop
        inc_acpu = Node.availCpu node
218 8c4c6a8a Iustin Pop
219 7034694d Iustin Pop
    in cs { csFmem = x_fmem + fromIntegral (Node.fMem node)
220 7034694d Iustin Pop
          , csFdsk = x_fdsk + fromIntegral (Node.fDsk node)
221 7034694d Iustin Pop
          , csAmem = x_amem + fromIntegral inc_amem'
222 7034694d Iustin Pop
          , csAdsk = x_adsk + fromIntegral inc_adsk
223 7034694d Iustin Pop
          , csAcpu = x_acpu + fromIntegral inc_acpu
224 7034694d Iustin Pop
          , csMmem = max x_mmem (fromIntegral inc_amem')
225 7034694d Iustin Pop
          , csMdsk = max x_mdsk (fromIntegral inc_adsk)
226 7034694d Iustin Pop
          , csMcpu = max x_mcpu (fromIntegral inc_acpu)
227 7034694d Iustin Pop
          , csImem = x_imem + fromIntegral inc_imem
228 7034694d Iustin Pop
          , csIdsk = x_idsk + fromIntegral inc_idsk
229 7034694d Iustin Pop
          , csIcpu = x_icpu + fromIntegral inc_icpu
230 f5b553da Iustin Pop
          , csTmem = x_tmem + Node.tMem node
231 f5b553da Iustin Pop
          , csTdsk = x_tdsk + Node.tDsk node
232 f5b553da Iustin Pop
          , csTcpu = x_tcpu + Node.tCpu node
233 7034694d Iustin Pop
          , csVcpu = x_vcpu + fromIntegral inc_vcpu
234 7034694d Iustin Pop
          , csXmem = x_xmem + fromIntegral (Node.xMem node)
235 7034694d Iustin Pop
          , csNmem = x_nmem + fromIntegral (Node.nMem node)
236 f5b553da Iustin Pop
          , csNinst = x_ninst + length (Node.pList node)
237 de4ac2c2 Iustin Pop
          }
238 1a7eff0e Iustin Pop
239 9188aeef Iustin Pop
-- | Compute the total free disk and memory in the cluster.
240 1a7eff0e Iustin Pop
totalResources :: Node.List -> CStats
241 de4ac2c2 Iustin Pop
totalResources nl =
242 de4ac2c2 Iustin Pop
    let cs = foldl' updateCStats emptyCStats . Container.elems $ nl
243 f5b553da Iustin Pop
    in cs { csScore = compCV nl }
244 9188aeef Iustin Pop
245 9b8fac3d Iustin Pop
-- | Compute the delta between two cluster state.
246 9b8fac3d Iustin Pop
--
247 9b8fac3d Iustin Pop
-- This is used when doing allocations, to understand better the
248 e2436511 Iustin Pop
-- available cluster resources. The return value is a triple of the
249 e2436511 Iustin Pop
-- current used values, the delta that was still allocated, and what
250 e2436511 Iustin Pop
-- was left unallocated.
251 9b8fac3d Iustin Pop
computeAllocationDelta :: CStats -> CStats -> AllocStats
252 9b8fac3d Iustin Pop
computeAllocationDelta cini cfin =
253 9b8fac3d Iustin Pop
    let CStats {csImem = i_imem, csIdsk = i_idsk, csIcpu = i_icpu} = cini
254 9b8fac3d Iustin Pop
        CStats {csImem = f_imem, csIdsk = f_idsk, csIcpu = f_icpu,
255 9b8fac3d Iustin Pop
                csTmem = t_mem, csTdsk = t_dsk, csVcpu = v_cpu } = cfin
256 7034694d Iustin Pop
        rini = RSpec (fromIntegral i_icpu) (fromIntegral i_imem)
257 7034694d Iustin Pop
               (fromIntegral i_idsk)
258 7034694d Iustin Pop
        rfin = RSpec (fromIntegral (f_icpu - i_icpu))
259 7034694d Iustin Pop
               (fromIntegral (f_imem - i_imem))
260 7034694d Iustin Pop
               (fromIntegral (f_idsk - i_idsk))
261 7034694d Iustin Pop
        un_cpu = fromIntegral (v_cpu - f_icpu)::Int
262 7034694d Iustin Pop
        runa = RSpec un_cpu (truncate t_mem - fromIntegral f_imem)
263 7034694d Iustin Pop
               (truncate t_dsk - fromIntegral f_idsk)
264 9b8fac3d Iustin Pop
    in (rini, rfin, runa)
265 9b8fac3d Iustin Pop
266 525bfb36 Iustin Pop
-- | The names and weights of the individual elements in the CV list.
267 8a3b30ca Iustin Pop
detailedCVInfo :: [(Double, String)]
268 8a3b30ca Iustin Pop
detailedCVInfo = [ (1,  "free_mem_cv")
269 8a3b30ca Iustin Pop
                 , (1,  "free_disk_cv")
270 8a3b30ca Iustin Pop
                 , (1,  "n1_cnt")
271 8a3b30ca Iustin Pop
                 , (1,  "reserved_mem_cv")
272 8a3b30ca Iustin Pop
                 , (4,  "offline_all_cnt")
273 8a3b30ca Iustin Pop
                 , (16, "offline_pri_cnt")
274 8a3b30ca Iustin Pop
                 , (1,  "vcpu_ratio_cv")
275 8a3b30ca Iustin Pop
                 , (1,  "cpu_load_cv")
276 8a3b30ca Iustin Pop
                 , (1,  "mem_load_cv")
277 8a3b30ca Iustin Pop
                 , (1,  "disk_load_cv")
278 8a3b30ca Iustin Pop
                 , (1,  "net_load_cv")
279 306cccd5 Iustin Pop
                 , (2,  "pri_tags_score")
280 8a3b30ca Iustin Pop
                 ]
281 8a3b30ca Iustin Pop
282 179c0828 Iustin Pop
-- | Holds the weights used by 'compCVNodes' for each metric.
283 8a3b30ca Iustin Pop
detailedCVWeights :: [Double]
284 8a3b30ca Iustin Pop
detailedCVWeights = map fst detailedCVInfo
285 fca250e9 Iustin Pop
286 9188aeef Iustin Pop
-- | Compute the mem and disk covariance.
287 9bb5721c Iustin Pop
compDetailedCV :: [Node.Node] -> [Double]
288 9bb5721c Iustin Pop
compDetailedCV all_nodes =
289 9188aeef Iustin Pop
    let
290 9188aeef Iustin Pop
        (offline, nodes) = partition Node.offline all_nodes
291 2060348b Iustin Pop
        mem_l = map Node.pMem nodes
292 2060348b Iustin Pop
        dsk_l = map Node.pDsk nodes
293 daee4bed Iustin Pop
        -- metric: memory covariance
294 4715711d Iustin Pop
        mem_cv = stdDev mem_l
295 daee4bed Iustin Pop
        -- metric: disk covariance
296 4715711d Iustin Pop
        dsk_cv = stdDev dsk_l
297 c3c7a0c1 Iustin Pop
        -- metric: count of instances living on N1 failing nodes
298 c3c7a0c1 Iustin Pop
        n1_score = fromIntegral . sum . map (\n -> length (Node.sList n) +
299 c3c7a0c1 Iustin Pop
                                                   length (Node.pList n)) .
300 c3c7a0c1 Iustin Pop
                   filter Node.failN1 $ nodes :: Double
301 2060348b Iustin Pop
        res_l = map Node.pRem nodes
302 daee4bed Iustin Pop
        -- metric: reserved memory covariance
303 4715711d Iustin Pop
        res_cv = stdDev res_l
304 e4d31268 Iustin Pop
        -- offline instances metrics
305 e4d31268 Iustin Pop
        offline_ipri = sum . map (length . Node.pList) $ offline
306 e4d31268 Iustin Pop
        offline_isec = sum . map (length . Node.sList) $ offline
307 e4d31268 Iustin Pop
        -- metric: count of instances on offline nodes
308 e4d31268 Iustin Pop
        off_score = fromIntegral (offline_ipri + offline_isec)::Double
309 673f0f00 Iustin Pop
        -- metric: count of primary instances on offline nodes (this
310 673f0f00 Iustin Pop
        -- helps with evacuation/failover of primary instances on
311 673f0f00 Iustin Pop
        -- 2-node clusters with one node offline)
312 673f0f00 Iustin Pop
        off_pri_score = fromIntegral offline_ipri::Double
313 2060348b Iustin Pop
        cpu_l = map Node.pCpu nodes
314 daee4bed Iustin Pop
        -- metric: covariance of vcpu/pcpu ratio
315 4715711d Iustin Pop
        cpu_cv = stdDev cpu_l
316 daee4bed Iustin Pop
        -- metrics: covariance of cpu, memory, disk and network load
317 ee9724b9 Iustin Pop
        (c_load, m_load, d_load, n_load) = unzip4 $
318 ee9724b9 Iustin Pop
            map (\n ->
319 ee9724b9 Iustin Pop
                     let DynUtil c1 m1 d1 n1 = Node.utilLoad n
320 ee9724b9 Iustin Pop
                         DynUtil c2 m2 d2 n2 = Node.utilPool n
321 ee9724b9 Iustin Pop
                     in (c1/c2, m1/m2, d1/d2, n1/n2)
322 ee9724b9 Iustin Pop
                ) nodes
323 d844fe88 Iustin Pop
        -- metric: conflicting instance count
324 d844fe88 Iustin Pop
        pri_tags_inst = sum $ map Node.conflictingPrimaries nodes
325 d844fe88 Iustin Pop
        pri_tags_score = fromIntegral pri_tags_inst::Double
326 673f0f00 Iustin Pop
    in [ mem_cv, dsk_cv, n1_score, res_cv, off_score, off_pri_score, cpu_cv
327 4715711d Iustin Pop
       , stdDev c_load, stdDev m_load , stdDev d_load, stdDev n_load
328 d844fe88 Iustin Pop
       , pri_tags_score ]
329 9188aeef Iustin Pop
330 9188aeef Iustin Pop
-- | Compute the /total/ variance.
331 9bb5721c Iustin Pop
compCVNodes :: [Node.Node] -> Double
332 9bb5721c Iustin Pop
compCVNodes = sum . zipWith (*) detailedCVWeights . compDetailedCV
333 9bb5721c Iustin Pop
334 9bb5721c Iustin Pop
-- | Wrapper over 'compCVNodes' for callers that have a 'Node.List'.
335 9188aeef Iustin Pop
compCV :: Node.List -> Double
336 9bb5721c Iustin Pop
compCV = compCVNodes . Container.elems
337 9bb5721c Iustin Pop
338 525bfb36 Iustin Pop
-- | Compute online nodes from a 'Node.List'.
339 dbba5246 Iustin Pop
getOnline :: Node.List -> [Node.Node]
340 dbba5246 Iustin Pop
getOnline = filter (not . Node.offline) . Container.elems
341 dbba5246 Iustin Pop
342 525bfb36 Iustin Pop
-- * Balancing functions
343 9188aeef Iustin Pop
344 9188aeef Iustin Pop
-- | Compute best table. Note that the ordering of the arguments is important.
345 9188aeef Iustin Pop
compareTables :: Table -> Table -> Table
346 9188aeef Iustin Pop
compareTables a@(Table _ _ a_cv _) b@(Table _ _ b_cv _ ) =
347 9188aeef Iustin Pop
    if a_cv > b_cv then b else a
348 9188aeef Iustin Pop
349 9188aeef Iustin Pop
-- | Applies an instance move to a given node list and instance.
350 262a08a2 Iustin Pop
applyMove :: Node.List -> Instance.Instance
351 8880d889 Iustin Pop
          -> IMove -> OpResult (Node.List, Instance.Instance, Ndx, Ndx)
352 00b51a14 Iustin Pop
-- Failover (f)
353 e4f08c46 Iustin Pop
applyMove nl inst Failover =
354 2060348b Iustin Pop
    let old_pdx = Instance.pNode inst
355 2060348b Iustin Pop
        old_sdx = Instance.sNode inst
356 e4f08c46 Iustin Pop
        old_p = Container.find old_pdx nl
357 e4f08c46 Iustin Pop
        old_s = Container.find old_sdx nl
358 e4f08c46 Iustin Pop
        int_p = Node.removePri old_p inst
359 e4f08c46 Iustin Pop
        int_s = Node.removeSec old_s inst
360 2cae47e9 Iustin Pop
        force_p = Node.offline old_p
361 b161386d Iustin Pop
        new_nl = do -- Maybe monad
362 2cae47e9 Iustin Pop
          new_p <- Node.addPriEx force_p int_s inst
363 b161386d Iustin Pop
          new_s <- Node.addSec int_p inst old_sdx
364 8880d889 Iustin Pop
          let new_inst = Instance.setBoth inst old_sdx old_pdx
365 8880d889 Iustin Pop
          return (Container.addTwo old_pdx new_s old_sdx new_p nl,
366 8880d889 Iustin Pop
                  new_inst, old_sdx, old_pdx)
367 8880d889 Iustin Pop
    in new_nl
368 e4f08c46 Iustin Pop
369 00b51a14 Iustin Pop
-- Replace the primary (f:, r:np, f)
370 e4f08c46 Iustin Pop
applyMove nl inst (ReplacePrimary new_pdx) =
371 2060348b Iustin Pop
    let old_pdx = Instance.pNode inst
372 2060348b Iustin Pop
        old_sdx = Instance.sNode inst
373 e4f08c46 Iustin Pop
        old_p = Container.find old_pdx nl
374 e4f08c46 Iustin Pop
        old_s = Container.find old_sdx nl
375 e4f08c46 Iustin Pop
        tgt_n = Container.find new_pdx nl
376 e4f08c46 Iustin Pop
        int_p = Node.removePri old_p inst
377 e4f08c46 Iustin Pop
        int_s = Node.removeSec old_s inst
378 2cae47e9 Iustin Pop
        force_p = Node.offline old_p
379 b161386d Iustin Pop
        new_nl = do -- Maybe monad
380 70db354e Iustin Pop
          -- check that the current secondary can host the instance
381 70db354e Iustin Pop
          -- during the migration
382 2cae47e9 Iustin Pop
          tmp_s <- Node.addPriEx force_p int_s inst
383 70db354e Iustin Pop
          let tmp_s' = Node.removePri tmp_s inst
384 2cae47e9 Iustin Pop
          new_p <- Node.addPriEx force_p tgt_n inst
385 2cae47e9 Iustin Pop
          new_s <- Node.addSecEx force_p tmp_s' inst new_pdx
386 8880d889 Iustin Pop
          let new_inst = Instance.setPri inst new_pdx
387 8880d889 Iustin Pop
          return (Container.add new_pdx new_p $
388 8880d889 Iustin Pop
                  Container.addTwo old_pdx int_p old_sdx new_s nl,
389 8880d889 Iustin Pop
                  new_inst, new_pdx, old_sdx)
390 8880d889 Iustin Pop
    in new_nl
391 e4f08c46 Iustin Pop
392 00b51a14 Iustin Pop
-- Replace the secondary (r:ns)
393 e4f08c46 Iustin Pop
applyMove nl inst (ReplaceSecondary new_sdx) =
394 2060348b Iustin Pop
    let old_pdx = Instance.pNode inst
395 2060348b Iustin Pop
        old_sdx = Instance.sNode inst
396 e4f08c46 Iustin Pop
        old_s = Container.find old_sdx nl
397 e4f08c46 Iustin Pop
        tgt_n = Container.find new_sdx nl
398 e4f08c46 Iustin Pop
        int_s = Node.removeSec old_s inst
399 2cae47e9 Iustin Pop
        force_s = Node.offline old_s
400 8880d889 Iustin Pop
        new_inst = Instance.setSec inst new_sdx
401 2cae47e9 Iustin Pop
        new_nl = Node.addSecEx force_s tgt_n inst old_pdx >>=
402 8880d889 Iustin Pop
                 \new_s -> return (Container.addTwo new_sdx
403 8880d889 Iustin Pop
                                   new_s old_sdx int_s nl,
404 8880d889 Iustin Pop
                                   new_inst, old_pdx, new_sdx)
405 8880d889 Iustin Pop
    in new_nl
406 e4f08c46 Iustin Pop
407 00b51a14 Iustin Pop
-- Replace the secondary and failover (r:np, f)
408 79ac6b6f Iustin Pop
applyMove nl inst (ReplaceAndFailover new_pdx) =
409 2060348b Iustin Pop
    let old_pdx = Instance.pNode inst
410 2060348b Iustin Pop
        old_sdx = Instance.sNode inst
411 79ac6b6f Iustin Pop
        old_p = Container.find old_pdx nl
412 79ac6b6f Iustin Pop
        old_s = Container.find old_sdx nl
413 79ac6b6f Iustin Pop
        tgt_n = Container.find new_pdx nl
414 79ac6b6f Iustin Pop
        int_p = Node.removePri old_p inst
415 79ac6b6f Iustin Pop
        int_s = Node.removeSec old_s inst
416 2cae47e9 Iustin Pop
        force_s = Node.offline old_s
417 b161386d Iustin Pop
        new_nl = do -- Maybe monad
418 b161386d Iustin Pop
          new_p <- Node.addPri tgt_n inst
419 2cae47e9 Iustin Pop
          new_s <- Node.addSecEx force_s int_p inst new_pdx
420 8880d889 Iustin Pop
          let new_inst = Instance.setBoth inst new_pdx old_pdx
421 8880d889 Iustin Pop
          return (Container.add new_pdx new_p $
422 8880d889 Iustin Pop
                  Container.addTwo old_pdx new_s old_sdx int_s nl,
423 8880d889 Iustin Pop
                  new_inst, new_pdx, old_pdx)
424 8880d889 Iustin Pop
    in new_nl
425 79ac6b6f Iustin Pop
426 19493d33 Iustin Pop
-- Failver and replace the secondary (f, r:ns)
427 19493d33 Iustin Pop
applyMove nl inst (FailoverAndReplace new_sdx) =
428 2060348b Iustin Pop
    let old_pdx = Instance.pNode inst
429 2060348b Iustin Pop
        old_sdx = Instance.sNode inst
430 19493d33 Iustin Pop
        old_p = Container.find old_pdx nl
431 19493d33 Iustin Pop
        old_s = Container.find old_sdx nl
432 19493d33 Iustin Pop
        tgt_n = Container.find new_sdx nl
433 19493d33 Iustin Pop
        int_p = Node.removePri old_p inst
434 19493d33 Iustin Pop
        int_s = Node.removeSec old_s inst
435 2cae47e9 Iustin Pop
        force_p = Node.offline old_p
436 b161386d Iustin Pop
        new_nl = do -- Maybe monad
437 2cae47e9 Iustin Pop
          new_p <- Node.addPriEx force_p int_s inst
438 2cae47e9 Iustin Pop
          new_s <- Node.addSecEx force_p tgt_n inst old_sdx
439 8880d889 Iustin Pop
          let new_inst = Instance.setBoth inst old_sdx new_sdx
440 8880d889 Iustin Pop
          return (Container.add new_sdx new_s $
441 8880d889 Iustin Pop
                  Container.addTwo old_sdx new_p old_pdx int_p nl,
442 8880d889 Iustin Pop
                  new_inst, old_sdx, new_sdx)
443 8880d889 Iustin Pop
    in new_nl
444 19493d33 Iustin Pop
445 9188aeef Iustin Pop
-- | Tries to allocate an instance on one given node.
446 0d66ea67 Iustin Pop
allocateOnSingle :: Node.List -> Instance.Instance -> Ndx
447 1fe81531 Iustin Pop
                 -> OpResult Node.AllocElement
448 0d66ea67 Iustin Pop
allocateOnSingle nl inst new_pdx =
449 0d66ea67 Iustin Pop
    let p = Container.find new_pdx nl
450 8880d889 Iustin Pop
        new_inst = Instance.setBoth inst new_pdx Node.noSecondary
451 7d3f4253 Iustin Pop
    in  Node.addPri p inst >>= \new_p -> do
452 7d3f4253 Iustin Pop
      let new_nl = Container.add new_pdx new_p nl
453 7d3f4253 Iustin Pop
          new_score = compCV nl
454 7d3f4253 Iustin Pop
      return (new_nl, new_inst, [new_p], new_score)
455 5e15f460 Iustin Pop
456 9188aeef Iustin Pop
-- | Tries to allocate an instance on a given pair of nodes.
457 0d66ea67 Iustin Pop
allocateOnPair :: Node.List -> Instance.Instance -> Ndx -> Ndx
458 1fe81531 Iustin Pop
               -> OpResult Node.AllocElement
459 0d66ea67 Iustin Pop
allocateOnPair nl inst new_pdx new_sdx =
460 0d66ea67 Iustin Pop
    let tgt_p = Container.find new_pdx nl
461 0d66ea67 Iustin Pop
        tgt_s = Container.find new_sdx nl
462 7d3f4253 Iustin Pop
    in do
463 7d3f4253 Iustin Pop
      new_p <- Node.addPri tgt_p inst
464 7d3f4253 Iustin Pop
      new_s <- Node.addSec tgt_s inst new_pdx
465 7d3f4253 Iustin Pop
      let new_inst = Instance.setBoth inst new_pdx new_sdx
466 7d3f4253 Iustin Pop
          new_nl = Container.addTwo new_pdx new_p new_sdx new_s nl
467 7d3f4253 Iustin Pop
      return (new_nl, new_inst, [new_p, new_s], compCV new_nl)
468 4a340313 Iustin Pop
469 9188aeef Iustin Pop
-- | Tries to perform an instance move and returns the best table
470 9188aeef Iustin Pop
-- between the original one and the new one.
471 e4f08c46 Iustin Pop
checkSingleStep :: Table -- ^ The original table
472 e4f08c46 Iustin Pop
                -> Instance.Instance -- ^ The instance to move
473 e4f08c46 Iustin Pop
                -> Table -- ^ The current best table
474 e4f08c46 Iustin Pop
                -> IMove -- ^ The move to apply
475 e4f08c46 Iustin Pop
                -> Table -- ^ The final best table
476 e4f08c46 Iustin Pop
checkSingleStep ini_tbl target cur_tbl move =
477 e4f08c46 Iustin Pop
    let
478 e4f08c46 Iustin Pop
        Table ini_nl ini_il _ ini_plc = ini_tbl
479 8880d889 Iustin Pop
        tmp_resu = applyMove ini_nl target move
480 e4f08c46 Iustin Pop
    in
481 8880d889 Iustin Pop
      case tmp_resu of
482 f2280553 Iustin Pop
        OpFail _ -> cur_tbl
483 3173c987 Iustin Pop
        OpGood (upd_nl, new_inst, pri_idx, sec_idx) ->
484 f2280553 Iustin Pop
            let tgt_idx = Instance.idx target
485 f2280553 Iustin Pop
                upd_cvar = compCV upd_nl
486 f2280553 Iustin Pop
                upd_il = Container.add tgt_idx new_inst ini_il
487 3173c987 Iustin Pop
                upd_plc = (tgt_idx, pri_idx, sec_idx, move, upd_cvar):ini_plc
488 f2280553 Iustin Pop
                upd_tbl = Table upd_nl upd_il upd_cvar upd_plc
489 f2280553 Iustin Pop
            in
490 f2280553 Iustin Pop
              compareTables cur_tbl upd_tbl
491 e4f08c46 Iustin Pop
492 c0501c69 Iustin Pop
-- | Given the status of the current secondary as a valid new node and
493 c0501c69 Iustin Pop
-- the current candidate target node, generate the possible moves for
494 c0501c69 Iustin Pop
-- a instance.
495 c0501c69 Iustin Pop
possibleMoves :: Bool      -- ^ Whether the secondary node is a valid new node
496 e08424a8 Guido Trotter
              -> Bool      -- ^ Whether we can change the primary node
497 c0501c69 Iustin Pop
              -> Ndx       -- ^ Target node candidate
498 c0501c69 Iustin Pop
              -> [IMove]   -- ^ List of valid result moves
499 e08424a8 Guido Trotter
500 e08424a8 Guido Trotter
possibleMoves _ False tdx =
501 e08424a8 Guido Trotter
    [ReplaceSecondary tdx]
502 e08424a8 Guido Trotter
503 e08424a8 Guido Trotter
possibleMoves True True tdx =
504 40d4eba0 Iustin Pop
    [ReplaceSecondary tdx,
505 40d4eba0 Iustin Pop
     ReplaceAndFailover tdx,
506 40d4eba0 Iustin Pop
     ReplacePrimary tdx,
507 40d4eba0 Iustin Pop
     FailoverAndReplace tdx]
508 40d4eba0 Iustin Pop
509 e08424a8 Guido Trotter
possibleMoves False True tdx =
510 40d4eba0 Iustin Pop
    [ReplaceSecondary tdx,
511 40d4eba0 Iustin Pop
     ReplaceAndFailover tdx]
512 40d4eba0 Iustin Pop
513 40d4eba0 Iustin Pop
-- | Compute the best move for a given instance.
514 c0501c69 Iustin Pop
checkInstanceMove :: [Ndx]             -- ^ Allowed target node indices
515 c0501c69 Iustin Pop
                  -> Bool              -- ^ Whether disk moves are allowed
516 e08424a8 Guido Trotter
                  -> Bool              -- ^ Whether instance moves are allowed
517 c0501c69 Iustin Pop
                  -> Table             -- ^ Original table
518 c0501c69 Iustin Pop
                  -> Instance.Instance -- ^ Instance to move
519 c0501c69 Iustin Pop
                  -> Table             -- ^ Best new table for this instance
520 e08424a8 Guido Trotter
checkInstanceMove nodes_idx disk_moves inst_moves ini_tbl target =
521 4e25d1c2 Iustin Pop
    let
522 2060348b Iustin Pop
        opdx = Instance.pNode target
523 2060348b Iustin Pop
        osdx = Instance.sNode target
524 9dc6023f Iustin Pop
        nodes = filter (\idx -> idx /= opdx && idx /= osdx) nodes_idx
525 e08424a8 Guido Trotter
        use_secondary = elem osdx nodes_idx && inst_moves
526 40d4eba0 Iustin Pop
        aft_failover = if use_secondary -- if allowed to failover
527 40d4eba0 Iustin Pop
                       then checkSingleStep ini_tbl target ini_tbl Failover
528 40d4eba0 Iustin Pop
                       else ini_tbl
529 c0501c69 Iustin Pop
        all_moves = if disk_moves
530 e08424a8 Guido Trotter
                    then concatMap
531 e08424a8 Guido Trotter
                         (possibleMoves use_secondary inst_moves) nodes
532 c0501c69 Iustin Pop
                    else []
533 4e25d1c2 Iustin Pop
    in
534 4e25d1c2 Iustin Pop
      -- iterate over the possible nodes for this instance
535 9dc6023f Iustin Pop
      foldl' (checkSingleStep ini_tbl target) aft_failover all_moves
536 4e25d1c2 Iustin Pop
537 e4f08c46 Iustin Pop
-- | Compute the best next move.
538 608efcce Iustin Pop
checkMove :: [Ndx]               -- ^ Allowed target node indices
539 c0501c69 Iustin Pop
          -> Bool                -- ^ Whether disk moves are allowed
540 e08424a8 Guido Trotter
          -> Bool                -- ^ Whether instance moves are allowed
541 256810de Iustin Pop
          -> Table               -- ^ The current solution
542 e4f08c46 Iustin Pop
          -> [Instance.Instance] -- ^ List of instances still to move
543 256810de Iustin Pop
          -> Table               -- ^ The new solution
544 e08424a8 Guido Trotter
checkMove nodes_idx disk_moves inst_moves ini_tbl victims =
545 4e25d1c2 Iustin Pop
    let Table _ _ _ ini_plc = ini_tbl
546 a3eee4ad Iustin Pop
        -- we're using rwhnf from the Control.Parallel.Strategies
547 a3eee4ad Iustin Pop
        -- package; we don't need to use rnf as that would force too
548 a3eee4ad Iustin Pop
        -- much evaluation in single-threaded cases, and in
549 a3eee4ad Iustin Pop
        -- multi-threaded case the weak head normal form is enough to
550 a3eee4ad Iustin Pop
        -- spark the evaluation
551 e08424a8 Guido Trotter
        tables = parMap rwhnf (checkInstanceMove nodes_idx disk_moves
552 e08424a8 Guido Trotter
                               inst_moves ini_tbl)
553 a3eee4ad Iustin Pop
                 victims
554 4e25d1c2 Iustin Pop
        -- iterate over all instances, computing the best move
555 2a8e2dc9 Iustin Pop
        best_tbl = foldl' compareTables ini_tbl tables
556 aaaa0e43 Iustin Pop
        Table _ _ _ best_plc = best_tbl
557 a804261a Iustin Pop
    in if length best_plc == length ini_plc
558 a804261a Iustin Pop
       then ini_tbl -- no advancement
559 a804261a Iustin Pop
       else best_tbl
560 e4f08c46 Iustin Pop
561 525bfb36 Iustin Pop
-- | Check if we are allowed to go deeper in the balancing.
562 3fea6959 Iustin Pop
doNextBalance :: Table     -- ^ The starting table
563 3fea6959 Iustin Pop
              -> Int       -- ^ Remaining length
564 3fea6959 Iustin Pop
              -> Score     -- ^ Score at which to stop
565 3fea6959 Iustin Pop
              -> Bool      -- ^ The resulting table and commands
566 5ad86777 Iustin Pop
doNextBalance ini_tbl max_rounds min_score =
567 5ad86777 Iustin Pop
    let Table _ _ ini_cv ini_plc = ini_tbl
568 5ad86777 Iustin Pop
        ini_plc_len = length ini_plc
569 5ad86777 Iustin Pop
    in (max_rounds < 0 || ini_plc_len < max_rounds) && ini_cv > min_score
570 5ad86777 Iustin Pop
571 525bfb36 Iustin Pop
-- | Run a balance move.
572 f25e5aac Iustin Pop
tryBalance :: Table       -- ^ The starting table
573 f25e5aac Iustin Pop
           -> Bool        -- ^ Allow disk moves
574 e08424a8 Guido Trotter
           -> Bool        -- ^ Allow instance moves
575 2e28ac32 Iustin Pop
           -> Bool        -- ^ Only evacuate moves
576 848b65c9 Iustin Pop
           -> Score       -- ^ Min gain threshold
577 848b65c9 Iustin Pop
           -> Score       -- ^ Min gain
578 f25e5aac Iustin Pop
           -> Maybe Table -- ^ The resulting table and commands
579 e08424a8 Guido Trotter
tryBalance ini_tbl disk_moves inst_moves evac_mode mg_limit min_gain =
580 5ad86777 Iustin Pop
    let Table ini_nl ini_il ini_cv _ = ini_tbl
581 5ad86777 Iustin Pop
        all_inst = Container.elems ini_il
582 2e28ac32 Iustin Pop
        all_inst' = if evac_mode
583 2e28ac32 Iustin Pop
                    then let bad_nodes = map Node.idx . filter Node.offline $
584 2e28ac32 Iustin Pop
                                         Container.elems ini_nl
585 77ecfa82 Iustin Pop
                         in filter (any (`elem` bad_nodes) . Instance.allNodes)
586 2e28ac32 Iustin Pop
                            all_inst
587 2e28ac32 Iustin Pop
                    else all_inst
588 c424cdc8 Iustin Pop
        reloc_inst = filter Instance.movable all_inst'
589 5ad86777 Iustin Pop
        node_idx = map Node.idx . filter (not . Node.offline) $
590 5ad86777 Iustin Pop
                   Container.elems ini_nl
591 e08424a8 Guido Trotter
        fin_tbl = checkMove node_idx disk_moves inst_moves ini_tbl reloc_inst
592 5ad86777 Iustin Pop
        (Table _ _ fin_cv _) = fin_tbl
593 f25e5aac Iustin Pop
    in
594 848b65c9 Iustin Pop
      if fin_cv < ini_cv && (ini_cv > mg_limit || ini_cv - fin_cv >= min_gain)
595 5ad86777 Iustin Pop
      then Just fin_tbl -- this round made success, return the new table
596 f25e5aac Iustin Pop
      else Nothing
597 f25e5aac Iustin Pop
598 478df686 Iustin Pop
-- * Allocation functions
599 478df686 Iustin Pop
600 525bfb36 Iustin Pop
-- | Build failure stats out of a list of failures.
601 478df686 Iustin Pop
collapseFailures :: [FailMode] -> FailStats
602 478df686 Iustin Pop
collapseFailures flst =
603 b4bae394 Iustin Pop
    map (\k -> (k, foldl' (\a e -> if e == k then a + 1 else a) 0 flst))
604 b4bae394 Iustin Pop
            [minBound..maxBound]
605 478df686 Iustin Pop
606 478df686 Iustin Pop
-- | Update current Allocation solution and failure stats with new
607 525bfb36 Iustin Pop
-- elements.
608 1fe81531 Iustin Pop
concatAllocs :: AllocSolution -> OpResult Node.AllocElement -> AllocSolution
609 85d0ddc3 Iustin Pop
concatAllocs as (OpFail reason) = as { asFailures = reason : asFailures as }
610 478df686 Iustin Pop
611 85d0ddc3 Iustin Pop
concatAllocs as (OpGood ns@(_, _, _, nscore)) =
612 7d3f4253 Iustin Pop
    let -- Choose the old or new solution, based on the cluster score
613 85d0ddc3 Iustin Pop
        cntok = asAllocs as
614 85d0ddc3 Iustin Pop
        osols = asSolutions as
615 478df686 Iustin Pop
        nsols = case osols of
616 a334d536 Iustin Pop
                  [] -> [ns]
617 a334d536 Iustin Pop
                  (_, _, _, oscore):[] ->
618 478df686 Iustin Pop
                      if oscore < nscore
619 478df686 Iustin Pop
                      then osols
620 a334d536 Iustin Pop
                      else [ns]
621 23f9ab76 Iustin Pop
                  -- FIXME: here we simply concat to lists with more
622 23f9ab76 Iustin Pop
                  -- than one element; we should instead abort, since
623 23f9ab76 Iustin Pop
                  -- this is not a valid usage of this function
624 a334d536 Iustin Pop
                  xs -> ns:xs
625 fbb95f28 Iustin Pop
        nsuc = cntok + 1
626 478df686 Iustin Pop
    -- Note: we force evaluation of nsols here in order to keep the
627 478df686 Iustin Pop
    -- memory profile low - we know that we will need nsols for sure
628 478df686 Iustin Pop
    -- in the next cycle, so we force evaluation of nsols, since the
629 478df686 Iustin Pop
    -- foldl' in the caller will only evaluate the tuple, but not the
630 7d11799b Iustin Pop
    -- elements of the tuple
631 85d0ddc3 Iustin Pop
    in nsols `seq` nsuc `seq` as { asAllocs = nsuc, asSolutions = nsols }
632 dbba5246 Iustin Pop
633 525bfb36 Iustin Pop
-- | Given a solution, generates a reasonable description for it.
634 859fc11d Iustin Pop
describeSolution :: AllocSolution -> String
635 859fc11d Iustin Pop
describeSolution as =
636 859fc11d Iustin Pop
  let fcnt = asFailures as
637 859fc11d Iustin Pop
      sols = asSolutions as
638 859fc11d Iustin Pop
      freasons =
639 859fc11d Iustin Pop
        intercalate ", " . map (\(a, b) -> printf "%s: %d" (show a) b) .
640 859fc11d Iustin Pop
        filter ((> 0) . snd) . collapseFailures $ fcnt
641 859fc11d Iustin Pop
  in if null sols
642 859fc11d Iustin Pop
     then "No valid allocation solutions, failure reasons: " ++
643 859fc11d Iustin Pop
          (if null fcnt
644 859fc11d Iustin Pop
           then "unknown reasons"
645 859fc11d Iustin Pop
           else freasons)
646 859fc11d Iustin Pop
     else let (_, _, nodes, cv) = head sols
647 859fc11d Iustin Pop
          in printf ("score: %.8f, successes %d, failures %d (%s)" ++
648 859fc11d Iustin Pop
                     " for node(s) %s") cv (asAllocs as) (length fcnt) freasons
649 859fc11d Iustin Pop
             (intercalate "/" . map Node.name $ nodes)
650 859fc11d Iustin Pop
651 525bfb36 Iustin Pop
-- | Annotates a solution with the appropriate string.
652 859fc11d Iustin Pop
annotateSolution :: AllocSolution -> AllocSolution
653 859fc11d Iustin Pop
annotateSolution as = as { asLog = describeSolution as : asLog as }
654 859fc11d Iustin Pop
655 47eed3f4 Iustin Pop
-- | Reverses an evacuation solution.
656 47eed3f4 Iustin Pop
--
657 47eed3f4 Iustin Pop
-- Rationale: we always concat the results to the top of the lists, so
658 47eed3f4 Iustin Pop
-- for proper jobset execution, we should reverse all lists.
659 47eed3f4 Iustin Pop
reverseEvacSolution :: EvacSolution -> EvacSolution
660 47eed3f4 Iustin Pop
reverseEvacSolution (EvacSolution f m o) =
661 47eed3f4 Iustin Pop
    EvacSolution (reverse f) (reverse m) (reverse o)
662 47eed3f4 Iustin Pop
663 6cb1649f Iustin Pop
-- | Generate the valid node allocation singles or pairs for a new instance.
664 6d0bc5ca Iustin Pop
genAllocNodes :: Group.List        -- ^ Group list
665 6d0bc5ca Iustin Pop
              -> Node.List         -- ^ The node map
666 6cb1649f Iustin Pop
              -> Int               -- ^ The number of nodes required
667 6d0bc5ca Iustin Pop
              -> Bool              -- ^ Whether to drop or not
668 6d0bc5ca Iustin Pop
                                   -- unallocable nodes
669 6cb1649f Iustin Pop
              -> Result AllocNodes -- ^ The (monadic) result
670 6d0bc5ca Iustin Pop
genAllocNodes gl nl count drop_unalloc =
671 6d0bc5ca Iustin Pop
    let filter_fn = if drop_unalloc
672 e4491427 Iustin Pop
                    then filter (Group.isAllocable .
673 e4491427 Iustin Pop
                                 flip Container.find gl . Node.group)
674 6d0bc5ca Iustin Pop
                    else id
675 6d0bc5ca Iustin Pop
        all_nodes = filter_fn $ getOnline nl
676 6cb1649f Iustin Pop
        all_pairs = liftM2 (,) all_nodes all_nodes
677 6cb1649f Iustin Pop
        ok_pairs = filter (\(x, y) -> Node.idx x /= Node.idx y &&
678 6cb1649f Iustin Pop
                                      Node.group x == Node.group y) all_pairs
679 6cb1649f Iustin Pop
    in case count of
680 0d66ea67 Iustin Pop
         1 -> Ok (Left (map Node.idx all_nodes))
681 0d66ea67 Iustin Pop
         2 -> Ok (Right (map (\(p, s) -> (Node.idx p, Node.idx s)) ok_pairs))
682 6cb1649f Iustin Pop
         _ -> Bad "Unsupported number of nodes, only one or two  supported"
683 6cb1649f Iustin Pop
684 dbba5246 Iustin Pop
-- | Try to allocate an instance on the cluster.
685 dbba5246 Iustin Pop
tryAlloc :: (Monad m) =>
686 dbba5246 Iustin Pop
            Node.List         -- ^ The node list
687 dbba5246 Iustin Pop
         -> Instance.List     -- ^ The instance list
688 dbba5246 Iustin Pop
         -> Instance.Instance -- ^ The instance to allocate
689 6cb1649f Iustin Pop
         -> AllocNodes        -- ^ The allocation targets
690 78694255 Iustin Pop
         -> m AllocSolution   -- ^ Possible solution list
691 6cb1649f Iustin Pop
tryAlloc nl _ inst (Right ok_pairs) =
692 6cb1649f Iustin Pop
    let sols = foldl' (\cstate (p, s) ->
693 478df686 Iustin Pop
                           concatAllocs cstate $ allocateOnPair nl inst p s
694 97936d51 Iustin Pop
                      ) emptyAllocSolution ok_pairs
695 859fc11d Iustin Pop
696 dec88196 Iustin Pop
    in if null ok_pairs -- means we have just one node
697 dec88196 Iustin Pop
       then fail "Not enough online nodes"
698 dec88196 Iustin Pop
       else return $ annotateSolution sols
699 dbba5246 Iustin Pop
700 6cb1649f Iustin Pop
tryAlloc nl _ inst (Left all_nodes) =
701 6cb1649f Iustin Pop
    let sols = foldl' (\cstate ->
702 2485487d Iustin Pop
                           concatAllocs cstate . allocateOnSingle nl inst
703 97936d51 Iustin Pop
                      ) emptyAllocSolution all_nodes
704 dec88196 Iustin Pop
    in if null all_nodes
705 dec88196 Iustin Pop
       then fail "No online nodes"
706 dec88196 Iustin Pop
       else return $ annotateSolution sols
707 dbba5246 Iustin Pop
708 525bfb36 Iustin Pop
-- | Given a group/result, describe it as a nice (list of) messages.
709 aec636b9 Iustin Pop
solutionDescription :: Group.List -> (Gdx, Result AllocSolution) -> [String]
710 aec636b9 Iustin Pop
solutionDescription gl (groupId, result) =
711 9b1584fc Iustin Pop
  case result of
712 73206d0a Iustin Pop
    Ok solution -> map (printf "Group %s (%s): %s" gname pol) (asLog solution)
713 aec636b9 Iustin Pop
    Bad message -> [printf "Group %s: error %s" gname message]
714 73206d0a Iustin Pop
  where grp = Container.find groupId gl
715 73206d0a Iustin Pop
        gname = Group.name grp
716 73206d0a Iustin Pop
        pol = apolToString (Group.allocPolicy grp)
717 9b1584fc Iustin Pop
718 9b1584fc Iustin Pop
-- | From a list of possibly bad and possibly empty solutions, filter
719 88253d03 Iustin Pop
-- only the groups with a valid result. Note that the result will be
720 525bfb36 Iustin Pop
-- reversed compared to the original list.
721 73206d0a Iustin Pop
filterMGResults :: Group.List
722 73206d0a Iustin Pop
                -> [(Gdx, Result AllocSolution)]
723 73206d0a Iustin Pop
                -> [(Gdx, AllocSolution)]
724 88253d03 Iustin Pop
filterMGResults gl = foldl' fn []
725 88253d03 Iustin Pop
    where unallocable = not . Group.isAllocable . flip Container.find gl
726 88253d03 Iustin Pop
          fn accu (gdx, rasol) =
727 88253d03 Iustin Pop
              case rasol of
728 88253d03 Iustin Pop
                Bad _ -> accu
729 88253d03 Iustin Pop
                Ok sol | null (asSolutions sol) -> accu
730 88253d03 Iustin Pop
                       | unallocable gdx -> accu
731 88253d03 Iustin Pop
                       | otherwise -> (gdx, sol):accu
732 9b1584fc Iustin Pop
733 525bfb36 Iustin Pop
-- | Sort multigroup results based on policy and score.
734 73206d0a Iustin Pop
sortMGResults :: Group.List
735 73206d0a Iustin Pop
             -> [(Gdx, AllocSolution)]
736 73206d0a Iustin Pop
             -> [(Gdx, AllocSolution)]
737 73206d0a Iustin Pop
sortMGResults gl sols =
738 2a8e2dc9 Iustin Pop
    let extractScore (_, _, _, x) = x
739 73206d0a Iustin Pop
        solScore (gdx, sol) = (Group.allocPolicy (Container.find gdx gl),
740 73206d0a Iustin Pop
                               (extractScore . head . asSolutions) sol)
741 73206d0a Iustin Pop
    in sortBy (comparing solScore) sols
742 73206d0a Iustin Pop
743 8fd09137 Iustin Pop
-- | Finds the best group for an instance on a multi-group cluster.
744 d72ff6c3 Iustin Pop
--
745 d72ff6c3 Iustin Pop
-- Only solutions in @preferred@ and @last_resort@ groups will be
746 d72ff6c3 Iustin Pop
-- accepted as valid, and additionally if the allowed groups parameter
747 d72ff6c3 Iustin Pop
-- is not null then allocation will only be run for those group
748 d72ff6c3 Iustin Pop
-- indices.
749 8fd09137 Iustin Pop
findBestAllocGroup :: Group.List           -- ^ The group list
750 8fd09137 Iustin Pop
                   -> Node.List            -- ^ The node list
751 8fd09137 Iustin Pop
                   -> Instance.List        -- ^ The instance list
752 d72ff6c3 Iustin Pop
                   -> Maybe [Gdx]          -- ^ The allowed groups
753 8fd09137 Iustin Pop
                   -> Instance.Instance    -- ^ The instance to allocate
754 8fd09137 Iustin Pop
                   -> Int                  -- ^ Required number of nodes
755 8fd09137 Iustin Pop
                   -> Result (Gdx, AllocSolution, [String])
756 d72ff6c3 Iustin Pop
findBestAllocGroup mggl mgnl mgil allowed_gdxs inst cnt =
757 9b1584fc Iustin Pop
  let groups = splitCluster mgnl mgil
758 d72ff6c3 Iustin Pop
      groups' = maybe groups (\gs -> filter ((`elem` gs) . fst) groups)
759 d72ff6c3 Iustin Pop
                allowed_gdxs
760 9b1584fc Iustin Pop
      sols = map (\(gid, (nl, il)) ->
761 6d0bc5ca Iustin Pop
                   (gid, genAllocNodes mggl nl cnt False >>=
762 6d0bc5ca Iustin Pop
                       tryAlloc nl il inst))
763 d72ff6c3 Iustin Pop
             groups'::[(Gdx, Result AllocSolution)]
764 aec636b9 Iustin Pop
      all_msgs = concatMap (solutionDescription mggl) sols
765 73206d0a Iustin Pop
      goodSols = filterMGResults mggl sols
766 73206d0a Iustin Pop
      sortedSols = sortMGResults mggl goodSols
767 9b1584fc Iustin Pop
  in if null sortedSols
768 9b1584fc Iustin Pop
     then Bad $ intercalate ", " all_msgs
769 9b1584fc Iustin Pop
     else let (final_group, final_sol) = head sortedSols
770 8fd09137 Iustin Pop
          in return (final_group, final_sol, all_msgs)
771 8fd09137 Iustin Pop
772 8fd09137 Iustin Pop
-- | Try to allocate an instance on a multi-group cluster.
773 8fd09137 Iustin Pop
tryMGAlloc :: Group.List           -- ^ The group list
774 8fd09137 Iustin Pop
           -> Node.List            -- ^ The node list
775 8fd09137 Iustin Pop
           -> Instance.List        -- ^ The instance list
776 8fd09137 Iustin Pop
           -> Instance.Instance    -- ^ The instance to allocate
777 8fd09137 Iustin Pop
           -> Int                  -- ^ Required number of nodes
778 8fd09137 Iustin Pop
           -> Result AllocSolution -- ^ Possible solution list
779 8fd09137 Iustin Pop
tryMGAlloc mggl mgnl mgil inst cnt = do
780 8fd09137 Iustin Pop
  (best_group, solution, all_msgs) <-
781 d72ff6c3 Iustin Pop
      findBestAllocGroup mggl mgnl mgil Nothing inst cnt
782 8fd09137 Iustin Pop
  let group_name = Group.name $ Container.find best_group mggl
783 8fd09137 Iustin Pop
      selmsg = "Selected group: " ++ group_name
784 8fd09137 Iustin Pop
  return $ solution { asLog = selmsg:all_msgs }
785 9b1584fc Iustin Pop
786 9b1584fc Iustin Pop
-- | Try to relocate an instance on the cluster.
787 dbba5246 Iustin Pop
tryReloc :: (Monad m) =>
788 78694255 Iustin Pop
            Node.List       -- ^ The node list
789 78694255 Iustin Pop
         -> Instance.List   -- ^ The instance list
790 78694255 Iustin Pop
         -> Idx             -- ^ The index of the instance to move
791 478df686 Iustin Pop
         -> Int             -- ^ The number of nodes required
792 78694255 Iustin Pop
         -> [Ndx]           -- ^ Nodes which should not be used
793 78694255 Iustin Pop
         -> m AllocSolution -- ^ Solution list
794 dbba5246 Iustin Pop
tryReloc nl il xid 1 ex_idx =
795 dbba5246 Iustin Pop
    let all_nodes = getOnline nl
796 dbba5246 Iustin Pop
        inst = Container.find xid il
797 2060348b Iustin Pop
        ex_idx' = Instance.pNode inst:ex_idx
798 dbba5246 Iustin Pop
        valid_nodes = filter (not . flip elem ex_idx' . Node.idx) all_nodes
799 dbba5246 Iustin Pop
        valid_idxes = map Node.idx valid_nodes
800 478df686 Iustin Pop
        sols1 = foldl' (\cstate x ->
801 fbb95f28 Iustin Pop
                            let em = do
802 478df686 Iustin Pop
                                  (mnl, i, _, _) <-
803 478df686 Iustin Pop
                                      applyMove nl inst (ReplaceSecondary x)
804 7d3f4253 Iustin Pop
                                  return (mnl, i, [Container.find x mnl],
805 7d3f4253 Iustin Pop
                                          compCV mnl)
806 fbb95f28 Iustin Pop
                            in concatAllocs cstate em
807 97936d51 Iustin Pop
                       ) emptyAllocSolution valid_idxes
808 dbba5246 Iustin Pop
    in return sols1
809 dbba5246 Iustin Pop
810 dbba5246 Iustin Pop
tryReloc _ _ _ reqn _  = fail $ "Unsupported number of relocation \
811 9f6dcdea Iustin Pop
                                \destinations required (" ++ show reqn ++
812 dbba5246 Iustin Pop
                                                  "), only one supported"
813 e4f08c46 Iustin Pop
814 525bfb36 Iustin Pop
-- | Change an instance's secondary node.
815 2ca68e2b Iustin Pop
evacInstance :: (Monad m) =>
816 2ca68e2b Iustin Pop
                [Ndx]                      -- ^ Excluded nodes
817 2ca68e2b Iustin Pop
             -> Instance.List              -- ^ The current instance list
818 2ca68e2b Iustin Pop
             -> (Node.List, AllocSolution) -- ^ The current state
819 2ca68e2b Iustin Pop
             -> Idx                        -- ^ The instance to evacuate
820 2ca68e2b Iustin Pop
             -> m (Node.List, AllocSolution)
821 2ca68e2b Iustin Pop
evacInstance ex_ndx il (nl, old_as) idx = do
822 2ca68e2b Iustin Pop
  -- FIXME: hardcoded one node here
823 2ca68e2b Iustin Pop
824 2ca68e2b Iustin Pop
  -- Longer explanation: evacuation is currently hardcoded to DRBD
825 2ca68e2b Iustin Pop
  -- instances (which have one secondary); hence, even if the
826 2ca68e2b Iustin Pop
  -- IAllocator protocol can request N nodes for an instance, and all
827 2ca68e2b Iustin Pop
  -- the message parsing/loading pass this, this implementation only
828 2ca68e2b Iustin Pop
  -- supports one; this situation needs to be revisited if we ever
829 2ca68e2b Iustin Pop
  -- support more than one secondary, or if we change the storage
830 2ca68e2b Iustin Pop
  -- model
831 2ca68e2b Iustin Pop
  new_as <- tryReloc nl il idx 1 ex_ndx
832 2ca68e2b Iustin Pop
  case asSolutions new_as of
833 2ca68e2b Iustin Pop
    -- an individual relocation succeeded, we kind of compose the data
834 2ca68e2b Iustin Pop
    -- from the two solutions
835 2ca68e2b Iustin Pop
    csol@(nl', _, _, _):_ ->
836 2ca68e2b Iustin Pop
        return (nl', new_as { asSolutions = csol:asSolutions old_as })
837 2ca68e2b Iustin Pop
    -- this relocation failed, so we fail the entire evac
838 2ca68e2b Iustin Pop
    _ -> fail $ "Can't evacuate instance " ++
839 2ca68e2b Iustin Pop
         Instance.name (Container.find idx il) ++
840 2ca68e2b Iustin Pop
             ": " ++ describeSolution new_as
841 2ca68e2b Iustin Pop
842 3fea6959 Iustin Pop
-- | Try to evacuate a list of nodes.
843 12b0511d Iustin Pop
tryEvac :: (Monad m) =>
844 12b0511d Iustin Pop
            Node.List       -- ^ The node list
845 12b0511d Iustin Pop
         -> Instance.List   -- ^ The instance list
846 1bc47d38 Iustin Pop
         -> [Idx]           -- ^ Instances to be evacuated
847 1bc47d38 Iustin Pop
         -> [Ndx]           -- ^ Restricted nodes (the ones being evacuated)
848 12b0511d Iustin Pop
         -> m AllocSolution -- ^ Solution list
849 1bc47d38 Iustin Pop
tryEvac nl il idxs ex_ndx = do
850 97936d51 Iustin Pop
  (_, sol) <- foldM (evacInstance ex_ndx il) (nl, emptyAllocSolution) idxs
851 1bc47d38 Iustin Pop
  return sol
852 1bc47d38 Iustin Pop
853 47eed3f4 Iustin Pop
-- | Function which fails if the requested mode is change secondary.
854 47eed3f4 Iustin Pop
--
855 47eed3f4 Iustin Pop
-- This is useful since except DRBD, no other disk template can
856 47eed3f4 Iustin Pop
-- execute change secondary; thus, we can just call this function
857 47eed3f4 Iustin Pop
-- instead of always checking for secondary mode. After the call to
858 47eed3f4 Iustin Pop
-- this function, whatever mode we have is just a primary change.
859 47eed3f4 Iustin Pop
failOnSecondaryChange :: (Monad m) => EvacMode -> DiskTemplate -> m ()
860 47eed3f4 Iustin Pop
failOnSecondaryChange ChangeSecondary dt =
861 47eed3f4 Iustin Pop
    fail $ "Instances with disk template '" ++ dtToString dt ++
862 47eed3f4 Iustin Pop
         "' can't execute change secondary"
863 47eed3f4 Iustin Pop
failOnSecondaryChange _ _ = return ()
864 47eed3f4 Iustin Pop
865 47eed3f4 Iustin Pop
-- | Run evacuation for a single instance.
866 20b376ff Iustin Pop
--
867 20b376ff Iustin Pop
-- /Note:/ this function should correctly execute both intra-group
868 20b376ff Iustin Pop
-- evacuations (in all modes) and inter-group evacuations (in the
869 20b376ff Iustin Pop
-- 'ChangeAll' mode). Of course, this requires that the correct list
870 20b376ff Iustin Pop
-- of target nodes is passed.
871 47eed3f4 Iustin Pop
nodeEvacInstance :: Node.List         -- ^ The node list (cluster-wide)
872 47eed3f4 Iustin Pop
                 -> Instance.List     -- ^ Instance list (cluster-wide)
873 47eed3f4 Iustin Pop
                 -> EvacMode          -- ^ The evacuation mode
874 47eed3f4 Iustin Pop
                 -> Instance.Instance -- ^ The instance to be evacuated
875 a86fbf36 Iustin Pop
                 -> Gdx               -- ^ The group we're targetting
876 47eed3f4 Iustin Pop
                 -> [Ndx]             -- ^ The list of available nodes
877 47eed3f4 Iustin Pop
                                      -- for allocation
878 47eed3f4 Iustin Pop
                 -> Result (Node.List, Instance.List, [OpCodes.OpCode])
879 47eed3f4 Iustin Pop
nodeEvacInstance _ _ mode (Instance.Instance
880 a86fbf36 Iustin Pop
                           {Instance.diskTemplate = dt@DTDiskless}) _ _ =
881 47eed3f4 Iustin Pop
                  failOnSecondaryChange mode dt >>
882 47eed3f4 Iustin Pop
                  fail "Diskless relocations not implemented yet"
883 47eed3f4 Iustin Pop
884 47eed3f4 Iustin Pop
nodeEvacInstance _ _ _ (Instance.Instance
885 a86fbf36 Iustin Pop
                        {Instance.diskTemplate = DTPlain}) _ _ =
886 47eed3f4 Iustin Pop
                  fail "Instances of type plain cannot be relocated"
887 47eed3f4 Iustin Pop
888 47eed3f4 Iustin Pop
nodeEvacInstance _ _ _ (Instance.Instance
889 a86fbf36 Iustin Pop
                        {Instance.diskTemplate = DTFile}) _ _ =
890 47eed3f4 Iustin Pop
                  fail "Instances of type file cannot be relocated"
891 47eed3f4 Iustin Pop
892 47eed3f4 Iustin Pop
nodeEvacInstance _ _ mode  (Instance.Instance
893 a86fbf36 Iustin Pop
                            {Instance.diskTemplate = dt@DTSharedFile}) _ _ =
894 47eed3f4 Iustin Pop
                  failOnSecondaryChange mode dt >>
895 47eed3f4 Iustin Pop
                  fail "Shared file relocations not implemented yet"
896 47eed3f4 Iustin Pop
897 47eed3f4 Iustin Pop
nodeEvacInstance _ _ mode (Instance.Instance
898 a86fbf36 Iustin Pop
                           {Instance.diskTemplate = dt@DTBlock}) _ _ =
899 47eed3f4 Iustin Pop
                  failOnSecondaryChange mode dt >>
900 47eed3f4 Iustin Pop
                  fail "Block device relocations not implemented yet"
901 47eed3f4 Iustin Pop
902 bef83fd1 Iustin Pop
nodeEvacInstance nl il ChangePrimary
903 a86fbf36 Iustin Pop
                 inst@(Instance.Instance {Instance.diskTemplate = DTDrbd8})
904 a86fbf36 Iustin Pop
                 _ _ =
905 bef83fd1 Iustin Pop
  do
906 bef83fd1 Iustin Pop
    (nl', inst', _, _) <- opToResult $ applyMove nl inst Failover
907 bef83fd1 Iustin Pop
    let idx = Instance.idx inst
908 bef83fd1 Iustin Pop
        il' = Container.add idx inst' il
909 bef83fd1 Iustin Pop
        ops = iMoveToJob nl' il' idx Failover
910 bef83fd1 Iustin Pop
    return (nl', il', ops)
911 bef83fd1 Iustin Pop
912 db56cfc4 Iustin Pop
nodeEvacInstance nl il ChangeSecondary
913 db56cfc4 Iustin Pop
                 inst@(Instance.Instance {Instance.diskTemplate = DTDrbd8})
914 a86fbf36 Iustin Pop
                 gdx avail_nodes =
915 db56cfc4 Iustin Pop
  do
916 db56cfc4 Iustin Pop
    (nl', inst', _, ndx) <- annotateResult "Can't find any good node" $
917 db56cfc4 Iustin Pop
                            eitherToResult $
918 db56cfc4 Iustin Pop
                            foldl' (evacDrbdSecondaryInner nl inst gdx)
919 db56cfc4 Iustin Pop
                            (Left "no nodes available") avail_nodes
920 db56cfc4 Iustin Pop
    let idx = Instance.idx inst
921 db56cfc4 Iustin Pop
        il' = Container.add idx inst' il
922 db56cfc4 Iustin Pop
        ops = iMoveToJob nl' il' idx (ReplaceSecondary ndx)
923 db56cfc4 Iustin Pop
    return (nl', il', ops)
924 db56cfc4 Iustin Pop
925 97da6b71 Iustin Pop
-- The algorithm for ChangeAll is as follows:
926 97da6b71 Iustin Pop
--
927 97da6b71 Iustin Pop
-- * generate all (primary, secondary) node pairs for the target groups
928 97da6b71 Iustin Pop
-- * for each pair, execute the needed moves (r:s, f, r:s) and compute
929 97da6b71 Iustin Pop
--   the final node list state and group score
930 97da6b71 Iustin Pop
-- * select the best choice via a foldl that uses the same Either
931 97da6b71 Iustin Pop
--   String solution as the ChangeSecondary mode
932 d52d41de Iustin Pop
nodeEvacInstance nl il ChangeAll
933 d52d41de Iustin Pop
                 inst@(Instance.Instance {Instance.diskTemplate = DTDrbd8})
934 a86fbf36 Iustin Pop
                 gdx avail_nodes =
935 d52d41de Iustin Pop
  do
936 97da6b71 Iustin Pop
    let no_nodes = Left "no nodes available"
937 97da6b71 Iustin Pop
        node_pairs = [(p,s) | p <- avail_nodes, s <- avail_nodes, p /= s]
938 97da6b71 Iustin Pop
    (nl', il', ops, _) <-
939 97da6b71 Iustin Pop
        annotateResult "Can't find any good nodes for relocation" $
940 d52d41de Iustin Pop
        eitherToResult $
941 97da6b71 Iustin Pop
        foldl'
942 97da6b71 Iustin Pop
        (\accu nodes -> case evacDrbdAllInner nl il inst gdx nodes of
943 97da6b71 Iustin Pop
                          Bad msg ->
944 97da6b71 Iustin Pop
                              case accu of
945 97da6b71 Iustin Pop
                                Right _ -> accu
946 97da6b71 Iustin Pop
                                -- we don't need more details (which
947 97da6b71 Iustin Pop
                                -- nodes, etc.) as we only selected
948 97da6b71 Iustin Pop
                                -- this group if we can allocate on
949 97da6b71 Iustin Pop
                                -- it, hence failures will not
950 97da6b71 Iustin Pop
                                -- propagate out of this fold loop
951 97da6b71 Iustin Pop
                                Left _ -> Left $ "Allocation failed: " ++ msg
952 97da6b71 Iustin Pop
                          Ok result@(_, _, _, new_cv) ->
953 97da6b71 Iustin Pop
                              let new_accu = Right result in
954 97da6b71 Iustin Pop
                              case accu of
955 97da6b71 Iustin Pop
                                Left _ -> new_accu
956 97da6b71 Iustin Pop
                                Right (_, _, _, old_cv) ->
957 97da6b71 Iustin Pop
                                    if old_cv < new_cv
958 97da6b71 Iustin Pop
                                    then accu
959 97da6b71 Iustin Pop
                                    else new_accu
960 97da6b71 Iustin Pop
        ) no_nodes node_pairs
961 97da6b71 Iustin Pop
962 97da6b71 Iustin Pop
    return (nl', il', ops)
963 47eed3f4 Iustin Pop
964 db56cfc4 Iustin Pop
-- | Inner fold function for changing secondary of a DRBD instance.
965 db56cfc4 Iustin Pop
--
966 97da6b71 Iustin Pop
-- The running solution is either a @Left String@, which means we
967 db56cfc4 Iustin Pop
-- don't have yet a working solution, or a @Right (...)@, which
968 db56cfc4 Iustin Pop
-- represents a valid solution; it holds the modified node list, the
969 db56cfc4 Iustin Pop
-- modified instance (after evacuation), the score of that solution,
970 db56cfc4 Iustin Pop
-- and the new secondary node index.
971 db56cfc4 Iustin Pop
evacDrbdSecondaryInner :: Node.List -- ^ Cluster node list
972 db56cfc4 Iustin Pop
                       -> Instance.Instance -- ^ Instance being evacuated
973 db56cfc4 Iustin Pop
                       -> Gdx -- ^ The group index of the instance
974 db56cfc4 Iustin Pop
                       -> Either String ( Node.List
975 db56cfc4 Iustin Pop
                                        , Instance.Instance
976 db56cfc4 Iustin Pop
                                        , Score
977 db56cfc4 Iustin Pop
                                        , Ndx)  -- ^ Current best solution
978 db56cfc4 Iustin Pop
                       -> Ndx  -- ^ Node we're evaluating as new secondary
979 db56cfc4 Iustin Pop
                       -> Either String ( Node.List
980 db56cfc4 Iustin Pop
                                        , Instance.Instance
981 db56cfc4 Iustin Pop
                                        , Score
982 db56cfc4 Iustin Pop
                                        , Ndx) -- ^ New best solution
983 db56cfc4 Iustin Pop
evacDrbdSecondaryInner nl inst gdx accu ndx =
984 db56cfc4 Iustin Pop
    case applyMove nl inst (ReplaceSecondary ndx) of
985 db56cfc4 Iustin Pop
      OpFail fm ->
986 db56cfc4 Iustin Pop
          case accu of
987 db56cfc4 Iustin Pop
            Right _ -> accu
988 db56cfc4 Iustin Pop
            Left _ -> Left $ "Node " ++ Container.nameOf nl ndx ++
989 db56cfc4 Iustin Pop
                      " failed: " ++ show fm
990 db56cfc4 Iustin Pop
      OpGood (nl', inst', _, _) ->
991 db56cfc4 Iustin Pop
          let nodes = Container.elems nl'
992 db56cfc4 Iustin Pop
              -- The fromJust below is ugly (it can fail nastily), but
993 db56cfc4 Iustin Pop
              -- at this point we should have any internal mismatches,
994 db56cfc4 Iustin Pop
              -- and adding a monad here would be quite involved
995 1b0a6356 Iustin Pop
              grpnodes = fromJust (gdx `lookup` Node.computeGroups nodes)
996 db56cfc4 Iustin Pop
              new_cv = compCVNodes grpnodes
997 db56cfc4 Iustin Pop
              new_accu = Right (nl', inst', new_cv, ndx)
998 db56cfc4 Iustin Pop
          in case accu of
999 db56cfc4 Iustin Pop
               Left _ -> new_accu
1000 db56cfc4 Iustin Pop
               Right (_, _, old_cv, _) ->
1001 db56cfc4 Iustin Pop
                   if old_cv < new_cv
1002 db56cfc4 Iustin Pop
                   then accu
1003 db56cfc4 Iustin Pop
                   else new_accu
1004 db56cfc4 Iustin Pop
1005 97da6b71 Iustin Pop
-- | Compute result of changing all nodes of a DRBD instance.
1006 97da6b71 Iustin Pop
--
1007 97da6b71 Iustin Pop
-- Given the target primary and secondary node (which might be in a
1008 97da6b71 Iustin Pop
-- different group or not), this function will 'execute' all the
1009 97da6b71 Iustin Pop
-- required steps and assuming all operations succceed, will return
1010 97da6b71 Iustin Pop
-- the modified node and instance lists, the opcodes needed for this
1011 97da6b71 Iustin Pop
-- and the new group score.
1012 97da6b71 Iustin Pop
evacDrbdAllInner :: Node.List         -- ^ Cluster node list
1013 97da6b71 Iustin Pop
                 -> Instance.List     -- ^ Cluster instance list
1014 97da6b71 Iustin Pop
                 -> Instance.Instance -- ^ The instance to be moved
1015 97da6b71 Iustin Pop
                 -> Gdx               -- ^ The target group index
1016 97da6b71 Iustin Pop
                                      -- (which can differ from the
1017 97da6b71 Iustin Pop
                                      -- current group of the
1018 97da6b71 Iustin Pop
                                      -- instance)
1019 97da6b71 Iustin Pop
                 -> (Ndx, Ndx)        -- ^ Tuple of new
1020 97da6b71 Iustin Pop
                                      -- primary\/secondary nodes
1021 97da6b71 Iustin Pop
                 -> Result (Node.List, Instance.List, [OpCodes.OpCode], Score)
1022 97da6b71 Iustin Pop
evacDrbdAllInner nl il inst gdx (t_pdx, t_sdx) =
1023 97da6b71 Iustin Pop
  do
1024 97da6b71 Iustin Pop
    let primary = Container.find (Instance.pNode inst) nl
1025 97da6b71 Iustin Pop
        idx = Instance.idx inst
1026 97da6b71 Iustin Pop
    -- if the primary is offline, then we first failover
1027 97da6b71 Iustin Pop
    (nl1, inst1, ops1) <-
1028 97da6b71 Iustin Pop
        if Node.offline primary
1029 97da6b71 Iustin Pop
        then do
1030 97da6b71 Iustin Pop
          (nl', inst', _, _) <-
1031 97da6b71 Iustin Pop
              annotateResult "Failing over to the secondary" $
1032 97da6b71 Iustin Pop
              opToResult $ applyMove nl inst Failover
1033 97da6b71 Iustin Pop
          return (nl', inst', [Failover])
1034 97da6b71 Iustin Pop
        else return (nl, inst, [])
1035 97da6b71 Iustin Pop
    let (o1, o2, o3) = (ReplaceSecondary t_pdx,
1036 97da6b71 Iustin Pop
                        Failover,
1037 97da6b71 Iustin Pop
                        ReplaceSecondary t_sdx)
1038 97da6b71 Iustin Pop
    -- we now need to execute a replace secondary to the future
1039 97da6b71 Iustin Pop
    -- primary node
1040 97da6b71 Iustin Pop
    (nl2, inst2, _, _) <-
1041 97da6b71 Iustin Pop
        annotateResult "Changing secondary to new primary" $
1042 97da6b71 Iustin Pop
        opToResult $
1043 97da6b71 Iustin Pop
        applyMove nl1 inst1 o1
1044 97da6b71 Iustin Pop
    let ops2 = o1:ops1
1045 97da6b71 Iustin Pop
    -- we now execute another failover, the primary stays fixed now
1046 97da6b71 Iustin Pop
    (nl3, inst3, _, _) <- annotateResult "Failing over to new primary" $
1047 97da6b71 Iustin Pop
                          opToResult $ applyMove nl2 inst2 o2
1048 97da6b71 Iustin Pop
    let ops3 = o2:ops2
1049 97da6b71 Iustin Pop
    -- and finally another replace secondary, to the final secondary
1050 97da6b71 Iustin Pop
    (nl4, inst4, _, _) <-
1051 97da6b71 Iustin Pop
        annotateResult "Changing secondary to final secondary" $
1052 97da6b71 Iustin Pop
        opToResult $
1053 97da6b71 Iustin Pop
        applyMove nl3 inst3 o3
1054 97da6b71 Iustin Pop
    let ops4 = o3:ops3
1055 97da6b71 Iustin Pop
        il' = Container.add idx inst4 il
1056 97da6b71 Iustin Pop
        ops = concatMap (iMoveToJob nl4 il' idx) $ reverse ops4
1057 97da6b71 Iustin Pop
    let nodes = Container.elems nl4
1058 97da6b71 Iustin Pop
        -- The fromJust below is ugly (it can fail nastily), but
1059 97da6b71 Iustin Pop
        -- at this point we should have any internal mismatches,
1060 97da6b71 Iustin Pop
        -- and adding a monad here would be quite involved
1061 97da6b71 Iustin Pop
        grpnodes = fromJust (gdx `lookup` Node.computeGroups nodes)
1062 97da6b71 Iustin Pop
        new_cv = compCVNodes grpnodes
1063 97da6b71 Iustin Pop
    return (nl4, il', ops, new_cv)
1064 97da6b71 Iustin Pop
1065 c9a9b853 Iustin Pop
-- | Computes the nodes in a given group which are available for
1066 c9a9b853 Iustin Pop
-- allocation.
1067 c9a9b853 Iustin Pop
availableGroupNodes :: [(Gdx, [Ndx])] -- ^ Group index/node index assoc list
1068 c9a9b853 Iustin Pop
                    -> IntSet.IntSet  -- ^ Nodes that are excluded
1069 c9a9b853 Iustin Pop
                    -> Gdx            -- ^ The group for which we
1070 c9a9b853 Iustin Pop
                                      -- query the nodes
1071 c9a9b853 Iustin Pop
                    -> Result [Ndx]   -- ^ List of available node indices
1072 c9a9b853 Iustin Pop
availableGroupNodes group_nodes excl_ndx gdx = do
1073 47eed3f4 Iustin Pop
  local_nodes <- maybe (Bad $ "Can't find group with index " ++ show gdx)
1074 47eed3f4 Iustin Pop
                 Ok (lookup gdx group_nodes)
1075 47eed3f4 Iustin Pop
  let avail_nodes = filter (not . flip IntSet.member excl_ndx) local_nodes
1076 47eed3f4 Iustin Pop
  return avail_nodes
1077 47eed3f4 Iustin Pop
1078 47eed3f4 Iustin Pop
-- | Updates the evac solution with the results of an instance
1079 47eed3f4 Iustin Pop
-- evacuation.
1080 47eed3f4 Iustin Pop
updateEvacSolution :: (Node.List, Instance.List, EvacSolution)
1081 5440c877 Iustin Pop
                   -> Idx
1082 47eed3f4 Iustin Pop
                   -> Result (Node.List, Instance.List, [OpCodes.OpCode])
1083 47eed3f4 Iustin Pop
                   -> (Node.List, Instance.List, EvacSolution)
1084 5440c877 Iustin Pop
updateEvacSolution (nl, il, es) idx (Bad msg) =
1085 5440c877 Iustin Pop
    (nl, il, es { esFailed = (idx, msg):esFailed es})
1086 5440c877 Iustin Pop
updateEvacSolution (_, _, es) idx (Ok (nl, il, opcodes)) =
1087 5440c877 Iustin Pop
    (nl, il, es { esMoved = new_elem:esMoved es
1088 47eed3f4 Iustin Pop
                , esOpCodes = [opcodes]:esOpCodes es })
1089 5440c877 Iustin Pop
     where inst = Container.find idx il
1090 5440c877 Iustin Pop
           new_elem = (idx,
1091 5440c877 Iustin Pop
                       instancePriGroup nl inst,
1092 5440c877 Iustin Pop
                       Instance.allNodes inst)
1093 47eed3f4 Iustin Pop
1094 47eed3f4 Iustin Pop
-- | Node-evacuation IAllocator mode main function.
1095 47eed3f4 Iustin Pop
tryNodeEvac :: Group.List    -- ^ The cluster groups
1096 47eed3f4 Iustin Pop
            -> Node.List     -- ^ The node list (cluster-wide, not per group)
1097 47eed3f4 Iustin Pop
            -> Instance.List -- ^ Instance list (cluster-wide)
1098 47eed3f4 Iustin Pop
            -> EvacMode      -- ^ The evacuation mode
1099 47eed3f4 Iustin Pop
            -> [Idx]         -- ^ List of instance (indices) to be evacuated
1100 4036f63a Iustin Pop
            -> Result (Node.List, Instance.List, EvacSolution)
1101 47eed3f4 Iustin Pop
tryNodeEvac _ ini_nl ini_il mode idxs =
1102 47eed3f4 Iustin Pop
    let evac_ndx = nodesToEvacuate ini_il mode idxs
1103 47eed3f4 Iustin Pop
        offline = map Node.idx . filter Node.offline $ Container.elems ini_nl
1104 47eed3f4 Iustin Pop
        excl_ndx = foldl' (flip IntSet.insert) evac_ndx offline
1105 47eed3f4 Iustin Pop
        group_ndx = map (\(gdx, (nl, _)) -> (gdx, map Node.idx
1106 47eed3f4 Iustin Pop
                                             (Container.elems nl))) $
1107 47eed3f4 Iustin Pop
                      splitCluster ini_nl ini_il
1108 4036f63a Iustin Pop
        (fin_nl, fin_il, esol) =
1109 47eed3f4 Iustin Pop
            foldl' (\state@(nl, il, _) inst ->
1110 a86fbf36 Iustin Pop
                        let gdx = instancePriGroup nl inst in
1111 5440c877 Iustin Pop
                        updateEvacSolution state (Instance.idx inst) $
1112 c9a9b853 Iustin Pop
                        availableGroupNodes group_ndx
1113 a86fbf36 Iustin Pop
                          excl_ndx gdx >>=
1114 a86fbf36 Iustin Pop
                        nodeEvacInstance nl il mode inst gdx
1115 47eed3f4 Iustin Pop
                   )
1116 47eed3f4 Iustin Pop
            (ini_nl, ini_il, emptyEvacSolution)
1117 47eed3f4 Iustin Pop
            (map (`Container.find` ini_il) idxs)
1118 4036f63a Iustin Pop
    in return (fin_nl, fin_il, reverseEvacSolution esol)
1119 47eed3f4 Iustin Pop
1120 20b376ff Iustin Pop
-- | Change-group IAllocator mode main function.
1121 20b376ff Iustin Pop
--
1122 20b376ff Iustin Pop
-- This is very similar to 'tryNodeEvac', the only difference is that
1123 20b376ff Iustin Pop
-- we don't choose as target group the current instance group, but
1124 20b376ff Iustin Pop
-- instead:
1125 20b376ff Iustin Pop
--
1126 20b376ff Iustin Pop
--   1. at the start of the function, we compute which are the target
1127 20b376ff Iustin Pop
--   groups; either no groups were passed in, in which case we choose
1128 20b376ff Iustin Pop
--   all groups out of which we don't evacuate instance, or there were
1129 20b376ff Iustin Pop
--   some groups passed, in which case we use those
1130 20b376ff Iustin Pop
--
1131 20b376ff Iustin Pop
--   2. for each instance, we use 'findBestAllocGroup' to choose the
1132 20b376ff Iustin Pop
--   best group to hold the instance, and then we do what
1133 20b376ff Iustin Pop
--   'tryNodeEvac' does, except for this group instead of the current
1134 20b376ff Iustin Pop
--   instance group.
1135 20b376ff Iustin Pop
--
1136 20b376ff Iustin Pop
-- Note that the correct behaviour of this function relies on the
1137 20b376ff Iustin Pop
-- function 'nodeEvacInstance' to be able to do correctly both
1138 20b376ff Iustin Pop
-- intra-group and inter-group moves when passed the 'ChangeAll' mode.
1139 20b376ff Iustin Pop
tryChangeGroup :: Group.List    -- ^ The cluster groups
1140 20b376ff Iustin Pop
               -> Node.List     -- ^ The node list (cluster-wide)
1141 20b376ff Iustin Pop
               -> Instance.List -- ^ Instance list (cluster-wide)
1142 20b376ff Iustin Pop
               -> [Gdx]         -- ^ Target groups; if empty, any
1143 20b376ff Iustin Pop
                                -- groups not being evacuated
1144 20b376ff Iustin Pop
               -> [Idx]         -- ^ List of instance (indices) to be evacuated
1145 4036f63a Iustin Pop
               -> Result (Node.List, Instance.List, EvacSolution)
1146 20b376ff Iustin Pop
tryChangeGroup gl ini_nl ini_il gdxs idxs =
1147 20b376ff Iustin Pop
    let evac_gdxs = nub $ map (instancePriGroup ini_nl .
1148 20b376ff Iustin Pop
                               flip Container.find ini_il) idxs
1149 20b376ff Iustin Pop
        target_gdxs = (if null gdxs
1150 20b376ff Iustin Pop
                       then Container.keys gl
1151 20b376ff Iustin Pop
                       else gdxs) \\ evac_gdxs
1152 20b376ff Iustin Pop
        offline = map Node.idx . filter Node.offline $ Container.elems ini_nl
1153 20b376ff Iustin Pop
        excl_ndx = foldl' (flip IntSet.insert) IntSet.empty offline
1154 20b376ff Iustin Pop
        group_ndx = map (\(gdx, (nl, _)) -> (gdx, map Node.idx
1155 20b376ff Iustin Pop
                                             (Container.elems nl))) $
1156 20b376ff Iustin Pop
                      splitCluster ini_nl ini_il
1157 4036f63a Iustin Pop
        (fin_nl, fin_il, esol) =
1158 20b376ff Iustin Pop
            foldl' (\state@(nl, il, _) inst ->
1159 20b376ff Iustin Pop
                        let solution = do
1160 20b376ff Iustin Pop
                              let ncnt = Instance.requiredNodes $
1161 20b376ff Iustin Pop
                                         Instance.diskTemplate inst
1162 20b376ff Iustin Pop
                              (gdx, _, _) <- findBestAllocGroup gl nl il
1163 20b376ff Iustin Pop
                                             (Just target_gdxs) inst ncnt
1164 20b376ff Iustin Pop
                              av_nodes <- availableGroupNodes group_ndx
1165 20b376ff Iustin Pop
                                          excl_ndx gdx
1166 a86fbf36 Iustin Pop
                              nodeEvacInstance nl il ChangeAll inst
1167 a86fbf36 Iustin Pop
                                       gdx av_nodes
1168 5440c877 Iustin Pop
                        in updateEvacSolution state
1169 5440c877 Iustin Pop
                               (Instance.idx inst) solution
1170 20b376ff Iustin Pop
                   )
1171 20b376ff Iustin Pop
            (ini_nl, ini_il, emptyEvacSolution)
1172 20b376ff Iustin Pop
            (map (`Container.find` ini_il) idxs)
1173 4036f63a Iustin Pop
    in return (fin_nl, fin_il, reverseEvacSolution esol)
1174 20b376ff Iustin Pop
1175 525bfb36 Iustin Pop
-- | Recursively place instances on the cluster until we're out of space.
1176 3ce8009a Iustin Pop
iterateAlloc :: Node.List
1177 3ce8009a Iustin Pop
             -> Instance.List
1178 8f48f67d Iustin Pop
             -> Maybe Int
1179 3ce8009a Iustin Pop
             -> Instance.Instance
1180 41b5c85a Iustin Pop
             -> AllocNodes
1181 3ce8009a Iustin Pop
             -> [Instance.Instance]
1182 d5ccec02 Iustin Pop
             -> [CStats]
1183 40ee14bc Iustin Pop
             -> Result AllocResult
1184 8f48f67d Iustin Pop
iterateAlloc nl il limit newinst allocnodes ixes cstats =
1185 3ce8009a Iustin Pop
      let depth = length ixes
1186 3ce8009a Iustin Pop
          newname = printf "new-%d" depth::String
1187 3ce8009a Iustin Pop
          newidx = length (Container.elems il) + depth
1188 3ce8009a Iustin Pop
          newi2 = Instance.setIdx (Instance.setName newinst newname) newidx
1189 8f48f67d Iustin Pop
          newlimit = fmap (flip (-) 1) limit
1190 41b5c85a Iustin Pop
      in case tryAlloc nl il newi2 allocnodes of
1191 3ce8009a Iustin Pop
           Bad s -> Bad s
1192 85d0ddc3 Iustin Pop
           Ok (AllocSolution { asFailures = errs, asSolutions = sols3 }) ->
1193 8f48f67d Iustin Pop
               let newsol = Ok (collapseFailures errs, nl, il, ixes, cstats) in
1194 3ce8009a Iustin Pop
               case sols3 of
1195 8f48f67d Iustin Pop
                 [] -> newsol
1196 a334d536 Iustin Pop
                 (xnl, xi, _, _):[] ->
1197 8f48f67d Iustin Pop
                     if limit == Just 0
1198 8f48f67d Iustin Pop
                     then newsol
1199 8f48f67d Iustin Pop
                     else iterateAlloc xnl (Container.add newidx xi il)
1200 8f48f67d Iustin Pop
                          newlimit newinst allocnodes (xi:ixes)
1201 8f48f67d Iustin Pop
                          (totalResources xnl:cstats)
1202 3ce8009a Iustin Pop
                 _ -> Bad "Internal error: multiple solutions for single\
1203 3ce8009a Iustin Pop
                          \ allocation"
1204 3ce8009a Iustin Pop
1205 525bfb36 Iustin Pop
-- | The core of the tiered allocation mode.
1206 3ce8009a Iustin Pop
tieredAlloc :: Node.List
1207 3ce8009a Iustin Pop
            -> Instance.List
1208 8f48f67d Iustin Pop
            -> Maybe Int
1209 3ce8009a Iustin Pop
            -> Instance.Instance
1210 41b5c85a Iustin Pop
            -> AllocNodes
1211 3ce8009a Iustin Pop
            -> [Instance.Instance]
1212 d5ccec02 Iustin Pop
            -> [CStats]
1213 40ee14bc Iustin Pop
            -> Result AllocResult
1214 8f48f67d Iustin Pop
tieredAlloc nl il limit newinst allocnodes ixes cstats =
1215 8f48f67d Iustin Pop
    case iterateAlloc nl il limit newinst allocnodes ixes cstats of
1216 3ce8009a Iustin Pop
      Bad s -> Bad s
1217 d5ccec02 Iustin Pop
      Ok (errs, nl', il', ixes', cstats') ->
1218 8f48f67d Iustin Pop
          let newsol = Ok (errs, nl', il', ixes', cstats')
1219 8f48f67d Iustin Pop
              ixes_cnt = length ixes'
1220 8f48f67d Iustin Pop
              (stop, newlimit) = case limit of
1221 8f48f67d Iustin Pop
                                   Nothing -> (False, Nothing)
1222 8f48f67d Iustin Pop
                                   Just n -> (n <= ixes_cnt,
1223 8f48f67d Iustin Pop
                                              Just (n - ixes_cnt)) in
1224 8f48f67d Iustin Pop
          if stop then newsol else
1225 3ce8009a Iustin Pop
          case Instance.shrinkByType newinst . fst . last $
1226 3ce8009a Iustin Pop
               sortBy (comparing snd) errs of
1227 8f48f67d Iustin Pop
            Bad _ -> newsol
1228 8f48f67d Iustin Pop
            Ok newinst' -> tieredAlloc nl' il' newlimit
1229 8f48f67d Iustin Pop
                           newinst' allocnodes ixes' cstats'
1230 3ce8009a Iustin Pop
1231 9188aeef Iustin Pop
-- * Formatting functions
1232 e4f08c46 Iustin Pop
1233 e4f08c46 Iustin Pop
-- | Given the original and final nodes, computes the relocation description.
1234 c9926b22 Iustin Pop
computeMoves :: Instance.Instance -- ^ The instance to be moved
1235 c9926b22 Iustin Pop
             -> String -- ^ The instance name
1236 668c03b3 Iustin Pop
             -> IMove  -- ^ The move being performed
1237 e4f08c46 Iustin Pop
             -> String -- ^ New primary
1238 e4f08c46 Iustin Pop
             -> String -- ^ New secondary
1239 e4f08c46 Iustin Pop
             -> (String, [String])
1240 e4f08c46 Iustin Pop
                -- ^ Tuple of moves and commands list; moves is containing
1241 e4f08c46 Iustin Pop
                -- either @/f/@ for failover or @/r:name/@ for replace
1242 e4f08c46 Iustin Pop
                -- secondary, while the command list holds gnt-instance
1243 e4f08c46 Iustin Pop
                -- commands (without that prefix), e.g \"@failover instance1@\"
1244 668c03b3 Iustin Pop
computeMoves i inam mv c d =
1245 668c03b3 Iustin Pop
    case mv of
1246 668c03b3 Iustin Pop
      Failover -> ("f", [mig])
1247 668c03b3 Iustin Pop
      FailoverAndReplace _ -> (printf "f r:%s" d, [mig, rep d])
1248 668c03b3 Iustin Pop
      ReplaceSecondary _ -> (printf "r:%s" d, [rep d])
1249 668c03b3 Iustin Pop
      ReplaceAndFailover _ -> (printf "r:%s f" c, [rep c, mig])
1250 668c03b3 Iustin Pop
      ReplacePrimary _ -> (printf "f r:%s f" c, [mig, rep c, mig])
1251 c9926b22 Iustin Pop
    where morf = if Instance.running i then "migrate" else "failover"
1252 c9926b22 Iustin Pop
          mig = printf "%s -f %s" morf inam::String
1253 c9926b22 Iustin Pop
          rep n = printf "replace-disks -n %s %s" n inam
1254 e4f08c46 Iustin Pop
1255 9188aeef Iustin Pop
-- | Converts a placement to string format.
1256 9188aeef Iustin Pop
printSolutionLine :: Node.List     -- ^ The node list
1257 9188aeef Iustin Pop
                  -> Instance.List -- ^ The instance list
1258 9188aeef Iustin Pop
                  -> Int           -- ^ Maximum node name length
1259 9188aeef Iustin Pop
                  -> Int           -- ^ Maximum instance name length
1260 9188aeef Iustin Pop
                  -> Placement     -- ^ The current placement
1261 9188aeef Iustin Pop
                  -> Int           -- ^ The index of the placement in
1262 9188aeef Iustin Pop
                                   -- the solution
1263 db1bcfe8 Iustin Pop
                  -> (String, [String])
1264 db1bcfe8 Iustin Pop
printSolutionLine nl il nmlen imlen plc pos =
1265 ca8258d9 Iustin Pop
    let
1266 ca8258d9 Iustin Pop
        pmlen = (2*nmlen + 1)
1267 668c03b3 Iustin Pop
        (i, p, s, mv, c) = plc
1268 ca8258d9 Iustin Pop
        inst = Container.find i il
1269 14c972c7 Iustin Pop
        inam = Instance.alias inst
1270 14c972c7 Iustin Pop
        npri = Node.alias $ Container.find p nl
1271 14c972c7 Iustin Pop
        nsec = Node.alias $ Container.find s nl
1272 14c972c7 Iustin Pop
        opri = Node.alias $ Container.find (Instance.pNode inst) nl
1273 14c972c7 Iustin Pop
        osec = Node.alias $ Container.find (Instance.sNode inst) nl
1274 668c03b3 Iustin Pop
        (moves, cmds) =  computeMoves inst inam mv npri nsec
1275 9f6dcdea Iustin Pop
        ostr = printf "%s:%s" opri osec::String
1276 9f6dcdea Iustin Pop
        nstr = printf "%s:%s" npri nsec::String
1277 ca8258d9 Iustin Pop
    in
1278 ab271fc1 Iustin Pop
      (printf "  %3d. %-*s %-*s => %-*s %.8f a=%s"
1279 ab271fc1 Iustin Pop
       pos imlen inam pmlen ostr
1280 ca8258d9 Iustin Pop
       pmlen nstr c moves,
1281 ca8258d9 Iustin Pop
       cmds)
1282 ca8258d9 Iustin Pop
1283 0e8ae201 Iustin Pop
-- | Return the instance and involved nodes in an instance move.
1284 77ecfa82 Iustin Pop
--
1285 77ecfa82 Iustin Pop
-- Note that the output list length can vary, and is not required nor
1286 77ecfa82 Iustin Pop
-- guaranteed to be of any specific length.
1287 77ecfa82 Iustin Pop
involvedNodes :: Instance.List -- ^ Instance list, used for retrieving
1288 77ecfa82 Iustin Pop
                               -- the instance from its index; note
1289 77ecfa82 Iustin Pop
                               -- that this /must/ be the original
1290 77ecfa82 Iustin Pop
                               -- instance list, so that we can
1291 77ecfa82 Iustin Pop
                               -- retrieve the old nodes
1292 77ecfa82 Iustin Pop
              -> Placement     -- ^ The placement we're investigating,
1293 77ecfa82 Iustin Pop
                               -- containing the new nodes and
1294 77ecfa82 Iustin Pop
                               -- instance index
1295 77ecfa82 Iustin Pop
              -> [Ndx]         -- ^ Resulting list of node indices
1296 0e8ae201 Iustin Pop
involvedNodes il plc =
1297 3173c987 Iustin Pop
    let (i, np, ns, _, _) = plc
1298 0e8ae201 Iustin Pop
        inst = Container.find i il
1299 77ecfa82 Iustin Pop
    in nub $ [np, ns] ++ Instance.allNodes inst
1300 0e8ae201 Iustin Pop
1301 0e8ae201 Iustin Pop
-- | Inner function for splitJobs, that either appends the next job to
1302 0e8ae201 Iustin Pop
-- the current jobset, or starts a new jobset.
1303 0e8ae201 Iustin Pop
mergeJobs :: ([JobSet], [Ndx]) -> MoveJob -> ([JobSet], [Ndx])
1304 924f9c16 Iustin Pop
mergeJobs ([], _) n@(ndx, _, _, _) = ([[n]], ndx)
1305 924f9c16 Iustin Pop
mergeJobs (cjs@(j:js), nbuf) n@(ndx, _, _, _)
1306 0e8ae201 Iustin Pop
    | null (ndx `intersect` nbuf) = ((n:j):js, ndx ++ nbuf)
1307 0e8ae201 Iustin Pop
    | otherwise = ([n]:cjs, ndx)
1308 0e8ae201 Iustin Pop
1309 0e8ae201 Iustin Pop
-- | Break a list of moves into independent groups. Note that this
1310 0e8ae201 Iustin Pop
-- will reverse the order of jobs.
1311 0e8ae201 Iustin Pop
splitJobs :: [MoveJob] -> [JobSet]
1312 0e8ae201 Iustin Pop
splitJobs = fst . foldl mergeJobs ([], [])
1313 0e8ae201 Iustin Pop
1314 0e8ae201 Iustin Pop
-- | Given a list of commands, prefix them with @gnt-instance@ and
1315 0e8ae201 Iustin Pop
-- also beautify the display a little.
1316 0e8ae201 Iustin Pop
formatJob :: Int -> Int -> (Int, MoveJob) -> [String]
1317 924f9c16 Iustin Pop
formatJob jsn jsl (sn, (_, _, _, cmds)) =
1318 0e8ae201 Iustin Pop
    let out =
1319 0e8ae201 Iustin Pop
            printf "  echo job %d/%d" jsn sn:
1320 0e8ae201 Iustin Pop
            printf "  check":
1321 0e8ae201 Iustin Pop
            map ("  gnt-instance " ++) cmds
1322 0e8ae201 Iustin Pop
    in if sn == 1
1323 0e8ae201 Iustin Pop
       then ["", printf "echo jobset %d, %d jobs" jsn jsl] ++ out
1324 0e8ae201 Iustin Pop
       else out
1325 0e8ae201 Iustin Pop
1326 9188aeef Iustin Pop
-- | Given a list of commands, prefix them with @gnt-instance@ and
1327 9188aeef Iustin Pop
-- also beautify the display a little.
1328 0e8ae201 Iustin Pop
formatCmds :: [JobSet] -> String
1329 9f6dcdea Iustin Pop
formatCmds =
1330 9f6dcdea Iustin Pop
    unlines .
1331 0e8ae201 Iustin Pop
    concatMap (\(jsn, js) -> concatMap (formatJob jsn (length js))
1332 0e8ae201 Iustin Pop
                             (zip [1..] js)) .
1333 9f6dcdea Iustin Pop
    zip [1..]
1334 142538ff Iustin Pop
1335 e4f08c46 Iustin Pop
-- | Print the node list.
1336 e98fb766 Iustin Pop
printNodes :: Node.List -> [String] -> String
1337 e98fb766 Iustin Pop
printNodes nl fs =
1338 6dfa04fd Iustin Pop
    let fields = case fs of
1339 6dfa04fd Iustin Pop
          [] -> Node.defaultFields
1340 6dfa04fd Iustin Pop
          "+":rest -> Node.defaultFields ++ rest
1341 6dfa04fd Iustin Pop
          _ -> fs
1342 5182e970 Iustin Pop
        snl = sortBy (comparing Node.idx) (Container.elems nl)
1343 e98fb766 Iustin Pop
        (header, isnum) = unzip $ map Node.showHeader fields
1344 c5f7412e Iustin Pop
    in unlines . map ((:) ' ' .  intercalate " ") $
1345 e98fb766 Iustin Pop
       formatTable (header:map (Node.list fields) snl) isnum
1346 e4f08c46 Iustin Pop
1347 507fda3f Iustin Pop
-- | Print the instance list.
1348 507fda3f Iustin Pop
printInsts :: Node.List -> Instance.List -> String
1349 507fda3f Iustin Pop
printInsts nl il =
1350 5182e970 Iustin Pop
    let sil = sortBy (comparing Instance.idx) (Container.elems il)
1351 30ff0c73 Iustin Pop
        helper inst = [ if Instance.running inst then "R" else " "
1352 30ff0c73 Iustin Pop
                      , Instance.name inst
1353 30ff0c73 Iustin Pop
                      , Container.nameOf nl (Instance.pNode inst)
1354 5182e970 Iustin Pop
                      , let sdx = Instance.sNode inst
1355 5182e970 Iustin Pop
                        in if sdx == Node.noSecondary
1356 5182e970 Iustin Pop
                           then  ""
1357 5182e970 Iustin Pop
                           else Container.nameOf nl sdx
1358 0e09422b Iustin Pop
                      , if Instance.autoBalance inst then "Y" else "N"
1359 30ff0c73 Iustin Pop
                      , printf "%3d" $ Instance.vcpus inst
1360 30ff0c73 Iustin Pop
                      , printf "%5d" $ Instance.mem inst
1361 30ff0c73 Iustin Pop
                      , printf "%5d" $ Instance.dsk inst `div` 1024
1362 30ff0c73 Iustin Pop
                      , printf "%5.3f" lC
1363 30ff0c73 Iustin Pop
                      , printf "%5.3f" lM
1364 30ff0c73 Iustin Pop
                      , printf "%5.3f" lD
1365 30ff0c73 Iustin Pop
                      , printf "%5.3f" lN
1366 30ff0c73 Iustin Pop
                      ]
1367 30ff0c73 Iustin Pop
            where DynUtil lC lM lD lN = Instance.util inst
1368 93439b1c Iustin Pop
        header = [ "F", "Name", "Pri_node", "Sec_node", "Auto_bal"
1369 93439b1c Iustin Pop
                 , "vcpu", "mem" , "dsk", "lCpu", "lMem", "lDsk", "lNet" ]
1370 93439b1c Iustin Pop
        isnum = False:False:False:False:False:repeat True
1371 c5f7412e Iustin Pop
    in unlines . map ((:) ' ' . intercalate " ") $
1372 c5f7412e Iustin Pop
       formatTable (header:map helper sil) isnum
1373 507fda3f Iustin Pop
1374 9188aeef Iustin Pop
-- | Shows statistics for a given node list.
1375 262a08a2 Iustin Pop
printStats :: Node.List -> String
1376 e4f08c46 Iustin Pop
printStats nl =
1377 9bb5721c Iustin Pop
    let dcvs = compDetailedCV $ Container.elems nl
1378 8a3b30ca Iustin Pop
        (weights, names) = unzip detailedCVInfo
1379 8a3b30ca Iustin Pop
        hd = zip3 (weights ++ repeat 1) (names ++ repeat "unknown") dcvs
1380 8a3b30ca Iustin Pop
        formatted = map (\(w, header, val) ->
1381 8a3b30ca Iustin Pop
                             printf "%s=%.8f(x%.2f)" header val w::String) hd
1382 fca250e9 Iustin Pop
    in intercalate ", " formatted
1383 6b20875c Iustin Pop
1384 6b20875c Iustin Pop
-- | Convert a placement into a list of OpCodes (basically a job).
1385 179c0828 Iustin Pop
iMoveToJob :: Node.List        -- ^ The node list; only used for node
1386 179c0828 Iustin Pop
                               -- names, so any version is good
1387 179c0828 Iustin Pop
                               -- (before or after the operation)
1388 179c0828 Iustin Pop
           -> Instance.List    -- ^ The instance list; also used for
1389 179c0828 Iustin Pop
                               -- names only
1390 179c0828 Iustin Pop
           -> Idx              -- ^ The index of the instance being
1391 179c0828 Iustin Pop
                               -- moved
1392 179c0828 Iustin Pop
           -> IMove            -- ^ The actual move to be described
1393 179c0828 Iustin Pop
           -> [OpCodes.OpCode] -- ^ The list of opcodes equivalent to
1394 179c0828 Iustin Pop
                               -- the given move
1395 3e4480e0 Iustin Pop
iMoveToJob nl il idx move =
1396 dfbbd43a Iustin Pop
    let inst = Container.find idx il
1397 3e4480e0 Iustin Pop
        iname = Instance.name inst
1398 3e4480e0 Iustin Pop
        lookNode  = Just . Container.nameOf nl
1399 bbe9758d Iustin Pop
        opF = OpCodes.OpInstanceMigrate iname True False True Nothing
1400 10028866 René Nussbaumer
        opR n = OpCodes.OpInstanceReplaceDisks iname (lookNode n)
1401 6b20875c Iustin Pop
                OpCodes.ReplaceNewSecondary [] Nothing
1402 6b20875c Iustin Pop
    in case move of
1403 6b20875c Iustin Pop
         Failover -> [ opF ]
1404 6b20875c Iustin Pop
         ReplacePrimary np -> [ opF, opR np, opF ]
1405 6b20875c Iustin Pop
         ReplaceSecondary ns -> [ opR ns ]
1406 6b20875c Iustin Pop
         ReplaceAndFailover np -> [ opR np, opF ]
1407 6b20875c Iustin Pop
         FailoverAndReplace ns -> [ opF, opR ns ]
1408 32b8d9c0 Iustin Pop
1409 949397c8 Iustin Pop
-- * Node group functions
1410 949397c8 Iustin Pop
1411 525bfb36 Iustin Pop
-- | Computes the group of an instance.
1412 10ef6b4e Iustin Pop
instanceGroup :: Node.List -> Instance.Instance -> Result Gdx
1413 32b8d9c0 Iustin Pop
instanceGroup nl i =
1414 32b8d9c0 Iustin Pop
  let sidx = Instance.sNode i
1415 32b8d9c0 Iustin Pop
      pnode = Container.find (Instance.pNode i) nl
1416 32b8d9c0 Iustin Pop
      snode = if sidx == Node.noSecondary
1417 32b8d9c0 Iustin Pop
              then pnode
1418 32b8d9c0 Iustin Pop
              else Container.find sidx nl
1419 10ef6b4e Iustin Pop
      pgroup = Node.group pnode
1420 10ef6b4e Iustin Pop
      sgroup = Node.group snode
1421 10ef6b4e Iustin Pop
  in if pgroup /= sgroup
1422 10ef6b4e Iustin Pop
     then fail ("Instance placed accross two node groups, primary " ++
1423 10ef6b4e Iustin Pop
                show pgroup ++ ", secondary " ++ show sgroup)
1424 10ef6b4e Iustin Pop
     else return pgroup
1425 32b8d9c0 Iustin Pop
1426 525bfb36 Iustin Pop
-- | Computes the group of an instance per the primary node.
1427 4bc33d60 Iustin Pop
instancePriGroup :: Node.List -> Instance.Instance -> Gdx
1428 4bc33d60 Iustin Pop
instancePriGroup nl i =
1429 4bc33d60 Iustin Pop
  let pnode = Container.find (Instance.pNode i) nl
1430 4bc33d60 Iustin Pop
  in  Node.group pnode
1431 4bc33d60 Iustin Pop
1432 32b8d9c0 Iustin Pop
-- | Compute the list of badly allocated instances (split across node
1433 525bfb36 Iustin Pop
-- groups).
1434 32b8d9c0 Iustin Pop
findSplitInstances :: Node.List -> Instance.List -> [Instance.Instance]
1435 2a8e2dc9 Iustin Pop
findSplitInstances nl =
1436 2a8e2dc9 Iustin Pop
  filter (not . isOk . instanceGroup nl) . Container.elems
1437 f4161783 Iustin Pop
1438 525bfb36 Iustin Pop
-- | Splits a cluster into the component node groups.
1439 f4161783 Iustin Pop
splitCluster :: Node.List -> Instance.List ->
1440 10ef6b4e Iustin Pop
                [(Gdx, (Node.List, Instance.List))]
1441 f4161783 Iustin Pop
splitCluster nl il =
1442 f4161783 Iustin Pop
  let ngroups = Node.computeGroups (Container.elems nl)
1443 f4161783 Iustin Pop
  in map (\(guuid, nodes) ->
1444 f4161783 Iustin Pop
           let nidxs = map Node.idx nodes
1445 f4161783 Iustin Pop
               nodes' = zip nidxs nodes
1446 f4161783 Iustin Pop
               instances = Container.filter ((`elem` nidxs) . Instance.pNode) il
1447 cb0c77ff Iustin Pop
           in (guuid, (Container.fromList nodes', instances))) ngroups
1448 1f4ae205 Iustin Pop
1449 63a78055 Iustin Pop
-- | Compute the list of nodes that are to be evacuated, given a list
1450 63a78055 Iustin Pop
-- of instances and an evacuation mode.
1451 63a78055 Iustin Pop
nodesToEvacuate :: Instance.List -- ^ The cluster-wide instance list
1452 63a78055 Iustin Pop
                -> EvacMode      -- ^ The evacuation mode we're using
1453 63a78055 Iustin Pop
                -> [Idx]         -- ^ List of instance indices being evacuated
1454 63a78055 Iustin Pop
                -> IntSet.IntSet -- ^ Set of node indices
1455 63a78055 Iustin Pop
nodesToEvacuate il mode =
1456 63a78055 Iustin Pop
    IntSet.delete Node.noSecondary .
1457 63a78055 Iustin Pop
    foldl' (\ns idx ->
1458 63a78055 Iustin Pop
                let i = Container.find idx il
1459 63a78055 Iustin Pop
                    pdx = Instance.pNode i
1460 63a78055 Iustin Pop
                    sdx = Instance.sNode i
1461 63a78055 Iustin Pop
                    dt = Instance.diskTemplate i
1462 63a78055 Iustin Pop
                    withSecondary = case dt of
1463 63a78055 Iustin Pop
                                      DTDrbd8 -> IntSet.insert sdx ns
1464 63a78055 Iustin Pop
                                      _ -> ns
1465 63a78055 Iustin Pop
                in case mode of
1466 63a78055 Iustin Pop
                     ChangePrimary   -> IntSet.insert pdx ns
1467 63a78055 Iustin Pop
                     ChangeSecondary -> withSecondary
1468 63a78055 Iustin Pop
                     ChangeAll       -> IntSet.insert pdx withSecondary
1469 63a78055 Iustin Pop
           ) IntSet.empty