code.grnet.gr Git - ganeti-local/blob - Ganeti/HTools/Cluster.hs

   1 {-| Implementation of cluster-wide logic.
   2
   3 This module holds all pure cluster-logic; I\/O related functionality
   4 goes into the "Main" module for the individual binaries.
   5
   6 -}
   7
   8 {-
   9
  10 Copyright (C) 2009 Google Inc.
  11
  12 This program is free software; you can redistribute it and/or modify
  13 it under the terms of the GNU General Public License as published by
  14 the Free Software Foundation; either version 2 of the License, or
  15 (at your option) any later version.
  16
  17 This program is distributed in the hope that it will be useful, but
  18 WITHOUT ANY WARRANTY; without even the implied warranty of
  19 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  20 General Public License for more details.
  21
  22 You should have received a copy of the GNU General Public License
  23 along with this program; if not, write to the Free Software
  24 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  25 02110-1301, USA.
  26
  27 -}
  28
  29 module Ganeti.HTools.Cluster
  30     (
  31      -- * Types
  32       AllocSolution
  33     , Table(..)
  34     , CStats(..)
  35     , AllocStats
  36     -- * Generic functions
  37     , totalResources
  38     , computeAllocationDelta
  39     -- * First phase functions
  40     , computeBadItems
  41     -- * Second phase functions
  42     , printSolution
  43     , printSolutionLine
  44     , formatCmds
  45     , involvedNodes
  46     , splitJobs
  47     -- * Display functions
  48     , printNodes
  49     , printInsts
  50     -- * Balacing functions
  51     , checkMove
  52     , doNextBalance
  53     , tryBalance
  54     , compCV
  55     , printStats
  56     , iMoveToJob
  57     -- * IAllocator functions
  58     , tryAlloc
  59     , tryReloc
  60     , tryEvac
  61     , collapseFailures
  62     ) where
  63
  64 import Data.List
  65 import Data.Ord (comparing)
  66 import Text.Printf (printf)
  67 import Control.Monad
  68
  69 import qualified Ganeti.HTools.Container as Container
  70 import qualified Ganeti.HTools.Instance as Instance
  71 import qualified Ganeti.HTools.Node as Node
  72 import Ganeti.HTools.Types
  73 import Ganeti.HTools.Utils
  74 import qualified Ganeti.OpCodes as OpCodes
  75
  76 -- * Types
  77
  78 -- | Allocation\/relocation solution.
  79 type AllocSolution = ([FailMode], Int, [(Score, Node.AllocElement)])
  80
  81 -- | The complete state for the balancing solution
  82 data Table = Table Node.List Instance.List Score [Placement]
  83              deriving (Show)
  84
  85 data CStats = CStats { csFmem :: Int    -- ^ Cluster free mem
  86                      , csFdsk :: Int    -- ^ Cluster free disk
  87                      , csAmem :: Int    -- ^ Cluster allocatable mem
  88                      , csAdsk :: Int    -- ^ Cluster allocatable disk
  89                      , csAcpu :: Int    -- ^ Cluster allocatable cpus
  90                      , csMmem :: Int    -- ^ Max node allocatable mem
  91                      , csMdsk :: Int    -- ^ Max node allocatable disk
  92                      , csMcpu :: Int    -- ^ Max node allocatable cpu
  93                      , csImem :: Int    -- ^ Instance used mem
  94                      , csIdsk :: Int    -- ^ Instance used disk
  95                      , csIcpu :: Int    -- ^ Instance used cpu
  96                      , csTmem :: Double -- ^ Cluster total mem
  97                      , csTdsk :: Double -- ^ Cluster total disk
  98                      , csTcpu :: Double -- ^ Cluster total cpus
  99                      , csVcpu :: Int    -- ^ Cluster virtual cpus (if
 100                                         -- node pCpu has been set,
 101                                         -- otherwise -1)
 102                      , csXmem :: Int    -- ^ Unnacounted for mem
 103                      , csNmem :: Int    -- ^ Node own memory
 104                      , csScore :: Score -- ^ The cluster score
 105                      , csNinst :: Int   -- ^ The total number of instances
 106                      }
 107
 108 -- | Currently used, possibly to allocate, unallocable
 109 type AllocStats = (RSpec, RSpec, RSpec)
 110
 111 -- * Utility functions
 112
 113 -- | Verifies the N+1 status and return the affected nodes.
 114 verifyN1 :: [Node.Node] -> [Node.Node]
 115 verifyN1 = filter Node.failN1
 116
 117 {-| Computes the pair of bad nodes and instances.
 118
 119 The bad node list is computed via a simple 'verifyN1' check, and the
 120 bad instance list is the list of primary and secondary instances of
 121 those nodes.
 122
 123 -}
 124 computeBadItems :: Node.List -> Instance.List ->
 125                    ([Node.Node], [Instance.Instance])
 126 computeBadItems nl il =
 127   let bad_nodes = verifyN1 $ getOnline nl
 128       bad_instances = map (`Container.find` il) .
 129                       sort . nub $
 130                       concatMap (\ n -> Node.sList n ++ Node.pList n) bad_nodes
 131   in
 132     (bad_nodes, bad_instances)
 133
 134 -- | Zero-initializer for the CStats type
 135 emptyCStats :: CStats
 136 emptyCStats = CStats 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 137
 138 -- | Update stats with data from a new node
 139 updateCStats :: CStats -> Node.Node -> CStats
 140 updateCStats cs node =
 141     let CStats { csFmem = x_fmem, csFdsk = x_fdsk,
 142                  csAmem = x_amem, csAcpu = x_acpu, csAdsk = x_adsk,
 143                  csMmem = x_mmem, csMdsk = x_mdsk, csMcpu = x_mcpu,
 144                  csImem = x_imem, csIdsk = x_idsk, csIcpu = x_icpu,
 145                  csTmem = x_tmem, csTdsk = x_tdsk, csTcpu = x_tcpu,
 146                  csVcpu = x_vcpu,
 147                  csXmem = x_xmem, csNmem = x_nmem, csNinst = x_ninst
 148                }
 149             = cs
 150         inc_amem = Node.fMem node - Node.rMem node
 151         inc_amem' = if inc_amem > 0 then inc_amem else 0
 152         inc_adsk = Node.availDisk node
 153         inc_imem = truncate (Node.tMem node) - Node.nMem node
 154                    - Node.xMem node - Node.fMem node
 155         inc_icpu = Node.uCpu node
 156         inc_idsk = truncate (Node.tDsk node) - Node.fDsk node
 157         inc_vcpu = Node.hiCpu node
 158
 159     in cs { csFmem = x_fmem + Node.fMem node
 160           , csFdsk = x_fdsk + Node.fDsk node
 161           , csAmem = x_amem + inc_amem'
 162           , csAdsk = x_adsk + inc_adsk
 163           , csAcpu = x_acpu
 164           , csMmem = max x_mmem inc_amem'
 165           , csMdsk = max x_mdsk inc_adsk
 166           , csMcpu = x_mcpu
 167           , csImem = x_imem + inc_imem
 168           , csIdsk = x_idsk + inc_idsk
 169           , csIcpu = x_icpu + inc_icpu
 170           , csTmem = x_tmem + Node.tMem node
 171           , csTdsk = x_tdsk + Node.tDsk node
 172           , csTcpu = x_tcpu + Node.tCpu node
 173           , csVcpu = if inc_vcpu == Node.noLimitInt
 174                      then Node.noLimitInt
 175                      else x_vcpu + inc_vcpu
 176           , csXmem = x_xmem + Node.xMem node
 177           , csNmem = x_nmem + Node.nMem node
 178           , csNinst = x_ninst + length (Node.pList node)
 179           }
 180
 181 -- | Compute the total free disk and memory in the cluster.
 182 totalResources :: Node.List -> CStats
 183 totalResources nl =
 184     let cs = foldl' updateCStats emptyCStats . Container.elems $ nl
 185     in cs { csScore = compCV nl }
 186
 187 -- | Compute the delta between two cluster state.
 188 --
 189 -- This is used when doing allocations, to understand better the
 190 -- available cluster resources. The return value is a triple of the
 191 -- current used values, the delta that was still allocated, and what
 192 -- was left unallocated.
 193 computeAllocationDelta :: CStats -> CStats -> AllocStats
 194 computeAllocationDelta cini cfin =
 195     let CStats {csImem = i_imem, csIdsk = i_idsk, csIcpu = i_icpu} = cini
 196         CStats {csImem = f_imem, csIdsk = f_idsk, csIcpu = f_icpu,
 197                 csTmem = t_mem, csTdsk = t_dsk, csVcpu = v_cpu } = cfin
 198         rini = RSpec i_icpu i_imem i_idsk
 199         rfin = RSpec (f_icpu - i_icpu) (f_imem - i_imem) (f_idsk - i_idsk)
 200         un_cpu = if v_cpu == Node.noLimitInt
 201                  then Node.noLimitInt
 202                  else v_cpu - f_icpu
 203         runa = RSpec un_cpu (truncate t_mem - f_imem) (truncate t_dsk - f_idsk)
 204     in (rini, rfin, runa)
 205
 206 -- | The names of the individual elements in the CV list
 207 detailedCVNames :: [String]
 208 detailedCVNames = [ "free_mem_cv"
 209                   , "free_disk_cv"
 210                   , "n1_cnt"
 211                   , "reserved_mem_cv"
 212                   , "offline_all_cnt"
 213                   , "offline_pri_cnt"
 214                   , "vcpu_ratio_cv"
 215                   , "cpu_load_cv"
 216                   , "mem_load_cv"
 217                   , "disk_load_cv"
 218                   , "net_load_cv"
 219                   , "pri_tags_score"
 220                   ]
 221
 222 -- | Compute the mem and disk covariance.
 223 compDetailedCV :: Node.List -> [Double]
 224 compDetailedCV nl =
 225     let
 226         all_nodes = Container.elems nl
 227         (offline, nodes) = partition Node.offline all_nodes
 228         mem_l = map Node.pMem nodes
 229         dsk_l = map Node.pDsk nodes
 230         -- metric: memory covariance
 231         mem_cv = varianceCoeff mem_l
 232         -- metric: disk covariance
 233         dsk_cv = varianceCoeff dsk_l
 234         n1_l = length $ filter Node.failN1 nodes
 235         -- metric: count of failN1 nodes
 236         n1_score = fromIntegral n1_l::Double
 237         res_l = map Node.pRem nodes
 238         -- metric: reserved memory covariance
 239         res_cv = varianceCoeff res_l
 240         -- offline instances metrics
 241         offline_ipri = sum . map (length . Node.pList) $ offline
 242         offline_isec = sum . map (length . Node.sList) $ offline
 243         -- metric: count of instances on offline nodes
 244         off_score = fromIntegral (offline_ipri + offline_isec)::Double
 245         -- metric: count of primary instances on offline nodes (this
 246         -- helps with evacuation/failover of primary instances on
 247         -- 2-node clusters with one node offline)
 248         off_pri_score = fromIntegral offline_ipri::Double
 249         cpu_l = map Node.pCpu nodes
 250         -- metric: covariance of vcpu/pcpu ratio
 251         cpu_cv = varianceCoeff cpu_l
 252         -- metrics: covariance of cpu, memory, disk and network load
 253         (c_load, m_load, d_load, n_load) = unzip4 $
 254             map (\n ->
 255                      let DynUtil c1 m1 d1 n1 = Node.utilLoad n
 256                          DynUtil c2 m2 d2 n2 = Node.utilPool n
 257                      in (c1/c2, m1/m2, d1/d2, n1/n2)
 258                 ) nodes
 259         -- metric: conflicting instance count
 260         pri_tags_inst = sum $ map Node.conflictingPrimaries nodes
 261         pri_tags_score = fromIntegral pri_tags_inst::Double
 262     in [ mem_cv, dsk_cv, n1_score, res_cv, off_score, off_pri_score, cpu_cv
 263        , varianceCoeff c_load, varianceCoeff m_load
 264        , varianceCoeff d_load, varianceCoeff n_load
 265        , pri_tags_score ]
 266
 267 -- | Compute the /total/ variance.
 268 compCV :: Node.List -> Double
 269 compCV = sum . compDetailedCV
 270
 271 -- | Compute online nodes from a Node.List
 272 getOnline :: Node.List -> [Node.Node]
 273 getOnline = filter (not . Node.offline) . Container.elems
 274
 275 -- * hbal functions
 276
 277 -- | Compute best table. Note that the ordering of the arguments is important.
 278 compareTables :: Table -> Table -> Table
 279 compareTables a@(Table _ _ a_cv _) b@(Table _ _ b_cv _ ) =
 280     if a_cv > b_cv then b else a
 281
 282 -- | Applies an instance move to a given node list and instance.
 283 applyMove :: Node.List -> Instance.Instance
 284           -> IMove -> OpResult (Node.List, Instance.Instance, Ndx, Ndx)
 285 -- Failover (f)
 286 applyMove nl inst Failover =
 287     let old_pdx = Instance.pNode inst
 288         old_sdx = Instance.sNode inst
 289         old_p = Container.find old_pdx nl
 290         old_s = Container.find old_sdx nl
 291         int_p = Node.removePri old_p inst
 292         int_s = Node.removeSec old_s inst
 293         new_nl = do -- Maybe monad
 294           new_p <- Node.addPri int_s inst
 295           new_s <- Node.addSec int_p inst old_sdx
 296           let new_inst = Instance.setBoth inst old_sdx old_pdx
 297           return (Container.addTwo old_pdx new_s old_sdx new_p nl,
 298                   new_inst, old_sdx, old_pdx)
 299     in new_nl
 300
 301 -- Replace the primary (f:, r:np, f)
 302 applyMove nl inst (ReplacePrimary new_pdx) =
 303     let old_pdx = Instance.pNode inst
 304         old_sdx = Instance.sNode inst
 305         old_p = Container.find old_pdx nl
 306         old_s = Container.find old_sdx nl
 307         tgt_n = Container.find new_pdx nl
 308         int_p = Node.removePri old_p inst
 309         int_s = Node.removeSec old_s inst
 310         new_nl = do -- Maybe monad
 311           -- check that the current secondary can host the instance
 312           -- during the migration
 313           tmp_s <- Node.addPri int_s inst
 314           let tmp_s' = Node.removePri tmp_s inst
 315           new_p <- Node.addPri tgt_n inst
 316           new_s <- Node.addSec tmp_s' inst new_pdx
 317           let new_inst = Instance.setPri inst new_pdx
 318           return (Container.add new_pdx new_p $
 319                   Container.addTwo old_pdx int_p old_sdx new_s nl,
 320                   new_inst, new_pdx, old_sdx)
 321     in new_nl
 322
 323 -- Replace the secondary (r:ns)
 324 applyMove nl inst (ReplaceSecondary new_sdx) =
 325     let old_pdx = Instance.pNode inst
 326         old_sdx = Instance.sNode inst
 327         old_s = Container.find old_sdx nl
 328         tgt_n = Container.find new_sdx nl
 329         int_s = Node.removeSec old_s inst
 330         new_inst = Instance.setSec inst new_sdx
 331         new_nl = Node.addSec tgt_n inst old_pdx >>=
 332                  \new_s -> return (Container.addTwo new_sdx
 333                                    new_s old_sdx int_s nl,
 334                                    new_inst, old_pdx, new_sdx)
 335     in new_nl
 336
 337 -- Replace the secondary and failover (r:np, f)
 338 applyMove nl inst (ReplaceAndFailover new_pdx) =
 339     let old_pdx = Instance.pNode inst
 340         old_sdx = Instance.sNode inst
 341         old_p = Container.find old_pdx nl
 342         old_s = Container.find old_sdx nl
 343         tgt_n = Container.find new_pdx nl
 344         int_p = Node.removePri old_p inst
 345         int_s = Node.removeSec old_s inst
 346         new_nl = do -- Maybe monad
 347           new_p <- Node.addPri tgt_n inst
 348           new_s <- Node.addSec int_p inst new_pdx
 349           let new_inst = Instance.setBoth inst new_pdx old_pdx
 350           return (Container.add new_pdx new_p $
 351                   Container.addTwo old_pdx new_s old_sdx int_s nl,
 352                   new_inst, new_pdx, old_pdx)
 353     in new_nl
 354
 355 -- Failver and replace the secondary (f, r:ns)
 356 applyMove nl inst (FailoverAndReplace new_sdx) =
 357     let old_pdx = Instance.pNode inst
 358         old_sdx = Instance.sNode inst
 359         old_p = Container.find old_pdx nl
 360         old_s = Container.find old_sdx nl
 361         tgt_n = Container.find new_sdx nl
 362         int_p = Node.removePri old_p inst
 363         int_s = Node.removeSec old_s inst
 364         new_nl = do -- Maybe monad
 365           new_p <- Node.addPri int_s inst
 366           new_s <- Node.addSec tgt_n inst old_sdx
 367           let new_inst = Instance.setBoth inst old_sdx new_sdx
 368           return (Container.add new_sdx new_s $
 369                   Container.addTwo old_sdx new_p old_pdx int_p nl,
 370                   new_inst, old_sdx, new_sdx)
 371     in new_nl
 372
 373 -- | Tries to allocate an instance on one given node.
 374 allocateOnSingle :: Node.List -> Instance.Instance -> Node.Node
 375                  -> OpResult Node.AllocElement
 376 allocateOnSingle nl inst p =
 377     let new_pdx = Node.idx p
 378         new_inst = Instance.setBoth inst new_pdx Node.noSecondary
 379         new_nl = Node.addPri p inst >>= \new_p ->
 380                  return (Container.add new_pdx new_p nl, new_inst, [new_p])
 381     in new_nl
 382
 383 -- | Tries to allocate an instance on a given pair of nodes.
 384 allocateOnPair :: Node.List -> Instance.Instance -> Node.Node -> Node.Node
 385                -> OpResult Node.AllocElement
 386 allocateOnPair nl inst tgt_p tgt_s =
 387     let new_pdx = Node.idx tgt_p
 388         new_sdx = Node.idx tgt_s
 389         new_nl = do -- Maybe monad
 390           new_p <- Node.addPri tgt_p inst
 391           new_s <- Node.addSec tgt_s inst new_pdx
 392           let new_inst = Instance.setBoth inst new_pdx new_sdx
 393           return (Container.addTwo new_pdx new_p new_sdx new_s nl, new_inst,
 394                  [new_p, new_s])
 395     in new_nl
 396
 397 -- | Tries to perform an instance move and returns the best table
 398 -- between the original one and the new one.
 399 checkSingleStep :: Table -- ^ The original table
 400                 -> Instance.Instance -- ^ The instance to move
 401                 -> Table -- ^ The current best table
 402                 -> IMove -- ^ The move to apply
 403                 -> Table -- ^ The final best table
 404 checkSingleStep ini_tbl target cur_tbl move =
 405     let
 406         Table ini_nl ini_il _ ini_plc = ini_tbl
 407         tmp_resu = applyMove ini_nl target move
 408     in
 409       case tmp_resu of
 410         OpFail _ -> cur_tbl
 411         OpGood (upd_nl, new_inst, pri_idx, sec_idx) ->
 412             let tgt_idx = Instance.idx target
 413                 upd_cvar = compCV upd_nl
 414                 upd_il = Container.add tgt_idx new_inst ini_il
 415                 upd_plc = (tgt_idx, pri_idx, sec_idx, move, upd_cvar):ini_plc
 416                 upd_tbl = Table upd_nl upd_il upd_cvar upd_plc
 417             in
 418               compareTables cur_tbl upd_tbl
 419
 420 -- | Given the status of the current secondary as a valid new node and
 421 -- the current candidate target node, generate the possible moves for
 422 -- a instance.
 423 possibleMoves :: Bool      -- ^ Whether the secondary node is a valid new node
 424               -> Ndx       -- ^ Target node candidate
 425               -> [IMove]   -- ^ List of valid result moves
 426 possibleMoves True tdx =
 427     [ReplaceSecondary tdx,
 428      ReplaceAndFailover tdx,
 429      ReplacePrimary tdx,
 430      FailoverAndReplace tdx]
 431
 432 possibleMoves False tdx =
 433     [ReplaceSecondary tdx,
 434      ReplaceAndFailover tdx]
 435
 436 -- | Compute the best move for a given instance.
 437 checkInstanceMove :: [Ndx]             -- ^ Allowed target node indices
 438                   -> Bool              -- ^ Whether disk moves are allowed
 439                   -> Table             -- ^ Original table
 440                   -> Instance.Instance -- ^ Instance to move
 441                   -> Table             -- ^ Best new table for this instance
 442 checkInstanceMove nodes_idx disk_moves ini_tbl target =
 443     let
 444         opdx = Instance.pNode target
 445         osdx = Instance.sNode target
 446         nodes = filter (\idx -> idx /= opdx && idx /= osdx) nodes_idx
 447         use_secondary = elem osdx nodes_idx
 448         aft_failover = if use_secondary -- if allowed to failover
 449                        then checkSingleStep ini_tbl target ini_tbl Failover
 450                        else ini_tbl
 451         all_moves = if disk_moves
 452                     then concatMap (possibleMoves use_secondary) nodes
 453                     else []
 454     in
 455       -- iterate over the possible nodes for this instance
 456       foldl' (checkSingleStep ini_tbl target) aft_failover all_moves
 457
 458 -- | Compute the best next move.
 459 checkMove :: [Ndx]               -- ^ Allowed target node indices
 460           -> Bool                -- ^ Whether disk moves are allowed
 461           -> Table               -- ^ The current solution
 462           -> [Instance.Instance] -- ^ List of instances still to move
 463           -> Table               -- ^ The new solution
 464 checkMove nodes_idx disk_moves ini_tbl victims =
 465     let Table _ _ _ ini_plc = ini_tbl
 466         -- iterate over all instances, computing the best move
 467         best_tbl =
 468             foldl'
 469             (\ step_tbl em ->
 470                  compareTables step_tbl $
 471                  checkInstanceMove nodes_idx disk_moves ini_tbl em)
 472             ini_tbl victims
 473         Table _ _ _ best_plc = best_tbl
 474     in if length best_plc == length ini_plc
 475        then ini_tbl -- no advancement
 476        else best_tbl
 477
 478 -- | Check if we are allowed to go deeper in the balancing
 479
 480 doNextBalance :: Table       -- ^ The starting table
 481               -> Int         -- ^ Remaining length
 482               -> Score       -- ^ Score at which to stop
 483               -> Bool -- ^ The resulting table and commands
 484 doNextBalance ini_tbl max_rounds min_score =
 485     let Table _ _ ini_cv ini_plc = ini_tbl
 486         ini_plc_len = length ini_plc
 487     in (max_rounds < 0 || ini_plc_len < max_rounds) && ini_cv > min_score
 488
 489 -- | Run a balance move
 490
 491 tryBalance :: Table       -- ^ The starting table
 492            -> Bool        -- ^ Allow disk moves
 493            -> Bool        -- ^ Only evacuate moves
 494            -> Maybe Table -- ^ The resulting table and commands
 495 tryBalance ini_tbl disk_moves evac_mode =
 496     let Table ini_nl ini_il ini_cv _ = ini_tbl
 497         all_inst = Container.elems ini_il
 498         all_inst' = if evac_mode
 499                     then let bad_nodes = map Node.idx . filter Node.offline $
 500                                          Container.elems ini_nl
 501                          in filter (\e -> Instance.sNode e `elem` bad_nodes ||
 502                                           Instance.pNode e `elem` bad_nodes)
 503                             all_inst
 504                     else all_inst
 505         reloc_inst = filter Instance.movable all_inst'
 506         node_idx = map Node.idx . filter (not . Node.offline) $
 507                    Container.elems ini_nl
 508         fin_tbl = checkMove node_idx disk_moves ini_tbl reloc_inst
 509         (Table _ _ fin_cv _) = fin_tbl
 510     in
 511       if fin_cv < ini_cv
 512       then Just fin_tbl -- this round made success, return the new table
 513       else Nothing
 514
 515 -- * Allocation functions
 516
 517 -- | Build failure stats out of a list of failures
 518 collapseFailures :: [FailMode] -> FailStats
 519 collapseFailures flst =
 520     map (\k -> (k, length $ filter (k ==) flst)) [minBound..maxBound]
 521
 522 -- | Update current Allocation solution and failure stats with new
 523 -- elements
 524 concatAllocs :: AllocSolution -> OpResult Node.AllocElement -> AllocSolution
 525 concatAllocs (flst, cntok, sols) (OpFail reason) = (reason:flst, cntok, sols)
 526
 527 concatAllocs (flst, cntok, osols) (OpGood ns@(nl, _, _)) =
 528     let nscore = compCV nl
 529         -- Choose the old or new solution, based on the cluster score
 530         nsols = case osols of
 531                   [] -> [(nscore, ns)]
 532                   (oscore, _):[] ->
 533                       if oscore < nscore
 534                       then osols
 535                       else [(nscore, ns)]
 536                   -- FIXME: here we simply concat to lists with more
 537                   -- than one element; we should instead abort, since
 538                   -- this is not a valid usage of this function
 539                   xs -> (nscore, ns):xs
 540         nsuc = cntok + 1
 541     -- Note: we force evaluation of nsols here in order to keep the
 542     -- memory profile low - we know that we will need nsols for sure
 543     -- in the next cycle, so we force evaluation of nsols, since the
 544     -- foldl' in the caller will only evaluate the tuple, but not the
 545     -- elements of the tuple
 546     in nsols `seq` nsuc `seq` (flst, nsuc, nsols)
 547
 548 -- | Try to allocate an instance on the cluster.
 549 tryAlloc :: (Monad m) =>
 550             Node.List         -- ^ The node list
 551          -> Instance.List     -- ^ The instance list
 552          -> Instance.Instance -- ^ The instance to allocate
 553          -> Int               -- ^ Required number of nodes
 554          -> m AllocSolution   -- ^ Possible solution list
 555 tryAlloc nl _ inst 2 =
 556     let all_nodes = getOnline nl
 557         all_pairs = liftM2 (,) all_nodes all_nodes
 558         ok_pairs = filter (\(x, y) -> Node.idx x /= Node.idx y) all_pairs
 559         sols = foldl' (\cstate (p, s) ->
 560                            concatAllocs cstate $ allocateOnPair nl inst p s
 561                       ) ([], 0, []) ok_pairs
 562     in return sols
 563
 564 tryAlloc nl _ inst 1 =
 565     let all_nodes = getOnline nl
 566         sols = foldl' (\cstate ->
 567                            concatAllocs cstate . allocateOnSingle nl inst
 568                       ) ([], 0, []) all_nodes
 569     in return sols
 570
 571 tryAlloc _ _ _ reqn = fail $ "Unsupported number of allocation \
 572                              \destinations required (" ++ show reqn ++
 573                                                "), only two supported"
 574
 575 -- | Try to allocate an instance on the cluster.
 576 tryReloc :: (Monad m) =>
 577             Node.List       -- ^ The node list
 578          -> Instance.List   -- ^ The instance list
 579          -> Idx             -- ^ The index of the instance to move
 580          -> Int             -- ^ The number of nodes required
 581          -> [Ndx]           -- ^ Nodes which should not be used
 582          -> m AllocSolution -- ^ Solution list
 583 tryReloc nl il xid 1 ex_idx =
 584     let all_nodes = getOnline nl
 585         inst = Container.find xid il
 586         ex_idx' = Instance.pNode inst:ex_idx
 587         valid_nodes = filter (not . flip elem ex_idx' . Node.idx) all_nodes
 588         valid_idxes = map Node.idx valid_nodes
 589         sols1 = foldl' (\cstate x ->
 590                             let em = do
 591                                   (mnl, i, _, _) <-
 592                                       applyMove nl inst (ReplaceSecondary x)
 593                                   return (mnl, i, [Container.find x mnl])
 594                             in concatAllocs cstate em
 595                        ) ([], 0, []) valid_idxes
 596     in return sols1
 597
 598 tryReloc _ _ _ reqn _  = fail $ "Unsupported number of relocation \
 599                                 \destinations required (" ++ show reqn ++
 600                                                   "), only one supported"
 601
 602 -- | Try to allocate an instance on the cluster.
 603 tryEvac :: (Monad m) =>
 604             Node.List       -- ^ The node list
 605          -> Instance.List   -- ^ The instance list
 606          -> [Ndx]           -- ^ Nodes to be evacuated
 607          -> m AllocSolution -- ^ Solution list
 608 tryEvac nl il ex_ndx =
 609     let ex_nodes = map (`Container.find` nl) ex_ndx
 610         all_insts = nub . concatMap Node.sList $ ex_nodes
 611     in do
 612       (_, sol) <- foldM (\(nl', (_, _, rsols)) idx -> do
 613                            -- FIXME: hardcoded one node here
 614                            (fm, cs, aes) <- tryReloc nl' il idx 1 ex_ndx
 615                            case aes of
 616                              csol@(_, (nl'', _, _)):_ ->
 617                                  return (nl'', (fm, cs, csol:rsols))
 618                              _ -> fail $ "Can't evacuate instance " ++
 619                                   show idx
 620                         ) (nl, ([], 0, [])) all_insts
 621       return sol
 622
 623 -- * Formatting functions
 624
 625 -- | Given the original and final nodes, computes the relocation description.
 626 computeMoves :: Instance.Instance -- ^ The instance to be moved
 627              -> String -- ^ The instance name
 628              -> IMove  -- ^ The move being performed
 629              -> String -- ^ New primary
 630              -> String -- ^ New secondary
 631              -> (String, [String])
 632                 -- ^ Tuple of moves and commands list; moves is containing
 633                 -- either @/f/@ for failover or @/r:name/@ for replace
 634                 -- secondary, while the command list holds gnt-instance
 635                 -- commands (without that prefix), e.g \"@failover instance1@\"
 636 computeMoves i inam mv c d =
 637     case mv of
 638       Failover -> ("f", [mig])
 639       FailoverAndReplace _ -> (printf "f r:%s" d, [mig, rep d])
 640       ReplaceSecondary _ -> (printf "r:%s" d, [rep d])
 641       ReplaceAndFailover _ -> (printf "r:%s f" c, [rep c, mig])
 642       ReplacePrimary _ -> (printf "f r:%s f" c, [mig, rep c, mig])
 643     where morf = if Instance.running i then "migrate" else "failover"
 644           mig = printf "%s -f %s" morf inam::String
 645           rep n = printf "replace-disks -n %s %s" n inam
 646
 647 -- | Converts a placement to string format.
 648 printSolutionLine :: Node.List     -- ^ The node list
 649                   -> Instance.List -- ^ The instance list
 650                   -> Int           -- ^ Maximum node name length
 651                   -> Int           -- ^ Maximum instance name length
 652                   -> Placement     -- ^ The current placement
 653                   -> Int           -- ^ The index of the placement in
 654                                    -- the solution
 655                   -> (String, [String])
 656 printSolutionLine nl il nmlen imlen plc pos =
 657     let
 658         pmlen = (2*nmlen + 1)
 659         (i, p, s, mv, c) = plc
 660         inst = Container.find i il
 661         inam = Instance.name inst
 662         npri = Container.nameOf nl p
 663         nsec = Container.nameOf nl s
 664         opri = Container.nameOf nl $ Instance.pNode inst
 665         osec = Container.nameOf nl $ Instance.sNode inst
 666         (moves, cmds) =  computeMoves inst inam mv npri nsec
 667         ostr = printf "%s:%s" opri osec::String
 668         nstr = printf "%s:%s" npri nsec::String
 669     in
 670       (printf "  %3d. %-*s %-*s => %-*s %.8f a=%s"
 671        pos imlen inam pmlen ostr
 672        pmlen nstr c moves,
 673        cmds)
 674
 675 -- | Return the instance and involved nodes in an instance move.
 676 involvedNodes :: Instance.List -> Placement -> [Ndx]
 677 involvedNodes il plc =
 678     let (i, np, ns, _, _) = plc
 679         inst = Container.find i il
 680         op = Instance.pNode inst
 681         os = Instance.sNode inst
 682     in nub [np, ns, op, os]
 683
 684 -- | Inner function for splitJobs, that either appends the next job to
 685 -- the current jobset, or starts a new jobset.
 686 mergeJobs :: ([JobSet], [Ndx]) -> MoveJob -> ([JobSet], [Ndx])
 687 mergeJobs ([], _) n@(ndx, _, _, _) = ([[n]], ndx)
 688 mergeJobs (cjs@(j:js), nbuf) n@(ndx, _, _, _)
 689     | null (ndx `intersect` nbuf) = ((n:j):js, ndx ++ nbuf)
 690     | otherwise = ([n]:cjs, ndx)
 691
 692 -- | Break a list of moves into independent groups. Note that this
 693 -- will reverse the order of jobs.
 694 splitJobs :: [MoveJob] -> [JobSet]
 695 splitJobs = fst . foldl mergeJobs ([], [])
 696
 697 -- | Given a list of commands, prefix them with @gnt-instance@ and
 698 -- also beautify the display a little.
 699 formatJob :: Int -> Int -> (Int, MoveJob) -> [String]
 700 formatJob jsn jsl (sn, (_, _, _, cmds)) =
 701     let out =
 702             printf "  echo job %d/%d" jsn sn:
 703             printf "  check":
 704             map ("  gnt-instance " ++) cmds
 705     in if sn == 1
 706        then ["", printf "echo jobset %d, %d jobs" jsn jsl] ++ out
 707        else out
 708
 709 -- | Given a list of commands, prefix them with @gnt-instance@ and
 710 -- also beautify the display a little.
 711 formatCmds :: [JobSet] -> String
 712 formatCmds =
 713     unlines .
 714     concatMap (\(jsn, js) -> concatMap (formatJob jsn (length js))
 715                              (zip [1..] js)) .
 716     zip [1..]
 717
 718 -- | Converts a solution to string format.
 719 printSolution :: Node.List
 720               -> Instance.List
 721               -> [Placement]
 722               -> ([String], [[String]])
 723 printSolution nl il sol =
 724     let
 725         nmlen = Container.maxNameLen nl
 726         imlen = Container.maxNameLen il
 727     in
 728       unzip $ zipWith (printSolutionLine nl il nmlen imlen) sol [1..]
 729
 730 -- | Print the node list.
 731 printNodes :: Node.List -> [String] -> String
 732 printNodes nl fs =
 733     let fields = if null fs
 734                  then Node.defaultFields
 735                  else fs
 736         snl = sortBy (comparing Node.idx) (Container.elems nl)
 737         (header, isnum) = unzip $ map Node.showHeader fields
 738     in unlines . map ((:) ' ' .  intercalate " ") $
 739        formatTable (header:map (Node.list fields) snl) isnum
 740
 741 -- | Print the instance list.
 742 printInsts :: Node.List -> Instance.List -> String
 743 printInsts nl il =
 744     let sil = sortBy (comparing Instance.idx) (Container.elems il)
 745         helper inst = [ if Instance.running inst then "R" else " "
 746                       , Instance.name inst
 747                       , Container.nameOf nl (Instance.pNode inst)
 748                       , let sdx = Instance.sNode inst
 749                         in if sdx == Node.noSecondary
 750                            then  ""
 751                            else Container.nameOf nl sdx
 752                       , printf "%3d" $ Instance.vcpus inst
 753                       , printf "%5d" $ Instance.mem inst
 754                       , printf "%5d" $ Instance.dsk inst `div` 1024
 755                       , printf "%5.3f" lC
 756                       , printf "%5.3f" lM
 757                       , printf "%5.3f" lD
 758                       , printf "%5.3f" lN
 759                       ]
 760             where DynUtil lC lM lD lN = Instance.util inst
 761         header = [ "F", "Name", "Pri_node", "Sec_node", "vcpu", "mem"
 762                  , "dsk", "lCpu", "lMem", "lDsk", "lNet" ]
 763         isnum = False:False:False:False:repeat True
 764     in unlines . map ((:) ' ' . intercalate " ") $
 765        formatTable (header:map helper sil) isnum
 766
 767 -- | Shows statistics for a given node list.
 768 printStats :: Node.List -> String
 769 printStats nl =
 770     let dcvs = compDetailedCV nl
 771         hd = zip (detailedCVNames ++ repeat "unknown") dcvs
 772         formatted = map (\(header, val) ->
 773                              printf "%s=%.8f" header val::String) hd
 774     in intercalate ", " formatted
 775
 776 -- | Convert a placement into a list of OpCodes (basically a job).
 777 iMoveToJob :: String -> Node.List -> Instance.List
 778           -> Idx -> IMove -> [OpCodes.OpCode]
 779 iMoveToJob csf nl il idx move =
 780     let inst = Container.find idx il
 781         iname = Instance.name inst ++ csf
 782         lookNode n = Just (Container.nameOf nl n ++ csf)
 783         opF = if Instance.running inst
 784               then OpCodes.OpMigrateInstance iname True False
 785               else OpCodes.OpFailoverInstance iname False
 786         opR n = OpCodes.OpReplaceDisks iname (lookNode n)
 787                 OpCodes.ReplaceNewSecondary [] Nothing
 788     in case move of
 789          Failover -> [ opF ]
 790          ReplacePrimary np -> [ opF, opR np, opF ]
 791          ReplaceSecondary ns -> [ opR ns ]
 792          ReplaceAndFailover np -> [ opR np, opF ]
 793          FailoverAndReplace ns -> [ opF, opR ns ]