Remove custom OpResult type/monad

[ganeti-local] / htools / Ganeti / HTools / Cluster.hs
diff --git a/htools/Ganeti/HTools/Cluster.hs b/htools/Ganeti/HTools/Cluster.hs

index 97a0561..82d4757 100644 (file)
--- a/htools/Ganeti/HTools/Cluster.hs
+++ b/htools/Ganeti/HTools/Cluster.hs
@@ -7,7 +7,7 @@ goes into the /Main/ module for the individual binaries.
  
  {-
  
-Copyright (C) 2009, 2010, 2011 Google Inc.
+Copyright (C) 2009, 2010, 2011, 2012 Google Inc.
  
  This program is free software; you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
@@ -27,106 +27,110 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  -}
  
  module Ganeti.HTools.Cluster
-    (
-     -- * Types
-      AllocSolution(..)
-    , EvacSolution(..)
-    , Table(..)
-    , CStats(..)
-    , AllocStats
-    -- * Generic functions
-    , totalResources
-    , computeAllocationDelta
-    -- * First phase functions
-    , computeBadItems
-    -- * Second phase functions
-    , printSolutionLine
-    , formatCmds
-    , involvedNodes
-    , splitJobs
-    -- * Display functions
-    , printNodes
-    , printInsts
-    -- * Balacing functions
-    , checkMove
-    , doNextBalance
-    , tryBalance
-    , compCV
-    , compCVNodes
-    , compDetailedCV
-    , printStats
-    , iMoveToJob
-    -- * IAllocator functions
-    , genAllocNodes
-    , tryAlloc
-    , tryMGAlloc
-    , tryReloc
-    , tryEvac
-    , tryNodeEvac
-    , tryChangeGroup
-    , collapseFailures
-    -- * Allocation functions
-    , iterateAlloc
-    , tieredAlloc
-     -- * Node group functions
-    , instanceGroup
-    , findSplitInstances
-    , splitCluster
-    ) where
+  (
+    -- * Types
+    AllocSolution(..)
+  , EvacSolution(..)
+  , Table(..)
+  , CStats(..)
+  , AllocResult
+  , AllocMethod
+  , AllocSolutionList
+  -- * Generic functions
+  , totalResources
+  , computeAllocationDelta
+  -- * First phase functions
+  , computeBadItems
+  -- * Second phase functions
+  , printSolutionLine
+  , formatCmds
+  , involvedNodes
+  , splitJobs
+  -- * Display functions
+  , printNodes
+  , printInsts
+  -- * Balacing functions
+  , checkMove
+  , doNextBalance
+  , tryBalance
+  , compCV
+  , compCVNodes
+  , compDetailedCV
+  , printStats
+  , iMoveToJob
+  -- * IAllocator functions
+  , genAllocNodes
+  , tryAlloc
+  , tryMGAlloc
+  , tryNodeEvac
+  , tryChangeGroup
+  , collapseFailures
+  , allocList
+  -- * Allocation functions
+  , iterateAlloc
+  , tieredAlloc
+  -- * Node group functions
+  , instanceGroup
+  , findSplitInstances
+  , splitCluster
+  ) where
  
  import qualified Data.IntSet as IntSet
  import Data.List
-import Data.Maybe (fromJust)
+import Data.Maybe (fromJust, isNothing)
  import Data.Ord (comparing)
  import Text.Printf (printf)
-import Control.Monad
  
+import Ganeti.BasicTypes
  import qualified Ganeti.HTools.Container as Container
  import qualified Ganeti.HTools.Instance as Instance
  import qualified Ganeti.HTools.Node as Node
  import qualified Ganeti.HTools.Group as Group
  import Ganeti.HTools.Types
-import Ganeti.HTools.Utils
-import Ganeti.HTools.Compat
+import Ganeti.Compat
  import qualified Ganeti.OpCodes as OpCodes
+import Ganeti.Utils
  
  -- * Types
  
  -- | Allocation\/relocation solution.
  data AllocSolution = AllocSolution
-  { asFailures  :: [FailMode]          -- ^ Failure counts
-  , asAllocs    :: Int                 -- ^ Good allocation count
-  , asSolutions :: [Node.AllocElement] -- ^ The actual result, length
-                                       -- of the list depends on the
-                                       -- allocation/relocation mode
-  , asLog       :: [String]            -- ^ A list of informational messages
+  { asFailures :: [FailMode]              -- ^ Failure counts
+  , asAllocs   :: Int                     -- ^ Good allocation count
+  , asSolution :: Maybe Node.AllocElement -- ^ The actual allocation result
+  , asLog      :: [String]                -- ^ Informational messages
    }
  
  -- | Node evacuation/group change iallocator result type. This result
  -- type consists of actual opcodes (a restricted subset) that are
  -- transmitted back to Ganeti.
  data EvacSolution = EvacSolution
-    { esMoved   :: [(Idx, Gdx, [Ndx])]  -- ^ Instances moved successfully
-    , esFailed  :: [(Idx, String)]      -- ^ Instances which were not
-                                        -- relocated
-    , esOpCodes :: [[[OpCodes.OpCode]]] -- ^ List of lists of jobs
-    }
+  { esMoved   :: [(Idx, Gdx, [Ndx])]  -- ^ Instances moved successfully
+  , esFailed  :: [(Idx, String)]      -- ^ Instances which were not
+                                      -- relocated
+  , esOpCodes :: [[OpCodes.OpCode]]   -- ^ List of jobs
+  } deriving (Show)
  
  -- | Allocation results, as used in 'iterateAlloc' and 'tieredAlloc'.
  type AllocResult = (FailStats, Node.List, Instance.List,
                      [Instance.Instance], [CStats])
  
+-- | Type alias for easier handling.
+type AllocSolutionList = [(Instance.Instance, AllocSolution)]
+
  -- | A type denoting the valid allocation mode/pairs.
  --
--- For a one-node allocation, this will be a @Left ['Node.Node']@,
--- whereas for a two-node allocation, this will be a @Right
--- [('Node.Node', 'Node.Node')]@.
-type AllocNodes = Either [Ndx] [(Ndx, Ndx)]
+-- For a one-node allocation, this will be a @Left ['Ndx']@, whereas
+-- for a two-node allocation, this will be a @Right [('Ndx',
+-- ['Ndx'])]@. In the latter case, the list is basically an
+-- association list, grouped by primary node and holding the potential
+-- secondary nodes in the sub-list.
+type AllocNodes = Either [Ndx] [(Ndx, [Ndx])]
  
  -- | The empty solution we start with when computing allocations.
  emptyAllocSolution :: AllocSolution
  emptyAllocSolution = AllocSolution { asFailures = [], asAllocs = 0
-                                   , asSolutions = [], asLog = [] }
+                                   , asSolution = Nothing, asLog = [] }
  
  -- | The empty evac solution.
  emptyEvacSolution :: EvacSolution
@@ -140,32 +144,43 @@ data Table = Table Node.List Instance.List Score [Placement]
               deriving (Show, Read)
  
  -- | Cluster statistics data type.
-data CStats = CStats { csFmem :: Integer -- ^ Cluster free mem
-                     , csFdsk :: Integer -- ^ Cluster free disk
-                     , csAmem :: Integer -- ^ Cluster allocatable mem
-                     , csAdsk :: Integer -- ^ Cluster allocatable disk
-                     , csAcpu :: Integer -- ^ Cluster allocatable cpus
-                     , csMmem :: Integer -- ^ Max node allocatable mem
-                     , csMdsk :: Integer -- ^ Max node allocatable disk
-                     , csMcpu :: Integer -- ^ Max node allocatable cpu
-                     , csImem :: Integer -- ^ Instance used mem
-                     , csIdsk :: Integer -- ^ Instance used disk
-                     , csIcpu :: Integer -- ^ Instance used cpu
-                     , csTmem :: Double  -- ^ Cluster total mem
-                     , csTdsk :: Double  -- ^ Cluster total disk
-                     , csTcpu :: Double  -- ^ Cluster total cpus
-                     , csVcpu :: Integer -- ^ Cluster virtual cpus (if
-                                         -- node pCpu has been set,
-                                         -- otherwise -1)
-                     , csXmem :: Integer -- ^ Unnacounted for mem
-                     , csNmem :: Integer -- ^ Node own memory
-                     , csScore :: Score  -- ^ The cluster score
-                     , csNinst :: Int    -- ^ The total number of instances
-                     }
-            deriving (Show, Read)
-
--- | Currently used, possibly to allocate, unallocable.
-type AllocStats = (RSpec, RSpec, RSpec)
+data CStats = CStats
+  { csFmem :: Integer -- ^ Cluster free mem
+  , csFdsk :: Integer -- ^ Cluster free disk
+  , csAmem :: Integer -- ^ Cluster allocatable mem
+  , csAdsk :: Integer -- ^ Cluster allocatable disk
+  , csAcpu :: Integer -- ^ Cluster allocatable cpus
+  , csMmem :: Integer -- ^ Max node allocatable mem
+  , csMdsk :: Integer -- ^ Max node allocatable disk
+  , csMcpu :: Integer -- ^ Max node allocatable cpu
+  , csImem :: Integer -- ^ Instance used mem
+  , csIdsk :: Integer -- ^ Instance used disk
+  , csIcpu :: Integer -- ^ Instance used cpu
+  , csTmem :: Double  -- ^ Cluster total mem
+  , csTdsk :: Double  -- ^ Cluster total disk
+  , csTcpu :: Double  -- ^ Cluster total cpus
+  , csVcpu :: Integer -- ^ Cluster total virtual cpus
+  , csNcpu :: Double  -- ^ Equivalent to 'csIcpu' but in terms of
+                      -- physical CPUs, i.e. normalised used phys CPUs
+  , csXmem :: Integer -- ^ Unnacounted for mem
+  , csNmem :: Integer -- ^ Node own memory
+  , csScore :: Score  -- ^ The cluster score
+  , csNinst :: Int    -- ^ The total number of instances
+  } deriving (Show, Read)
+
+-- | A simple type for allocation functions.
+type AllocMethod =  Node.List           -- ^ Node list
+                 -> Instance.List       -- ^ Instance list
+                 -> Maybe Int           -- ^ Optional allocation limit
+                 -> Instance.Instance   -- ^ Instance spec for allocation
+                 -> AllocNodes          -- ^ Which nodes we should allocate on
+                 -> [Instance.Instance] -- ^ Allocated instances
+                 -> [CStats]            -- ^ Running cluster stats
+                 -> Result AllocResult  -- ^ Allocation result
+
+-- | A simple type for the running solution of evacuations.
+type EvacInnerState =
+  Either String (Node.List, Instance.Instance, Score, Ndx)
  
  -- * Utility functions
  
@@ -190,57 +205,71 @@ computeBadItems nl il =
    in
      (bad_nodes, bad_instances)
  
+-- | Extracts the node pairs for an instance. This can fail if the
+-- instance is single-homed. FIXME: this needs to be improved,
+-- together with the general enhancement for handling non-DRBD moves.
+instanceNodes :: Node.List -> Instance.Instance ->
+                 (Ndx, Ndx, Node.Node, Node.Node)
+instanceNodes nl inst =
+  let old_pdx = Instance.pNode inst
+      old_sdx = Instance.sNode inst
+      old_p = Container.find old_pdx nl
+      old_s = Container.find old_sdx nl
+  in (old_pdx, old_sdx, old_p, old_s)
+
  -- | Zero-initializer for the CStats type.
  emptyCStats :: CStats
-emptyCStats = CStats 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+emptyCStats = CStats 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
  
  -- | Update stats with data from a new node.
  updateCStats :: CStats -> Node.Node -> CStats
  updateCStats cs node =
-    let CStats { csFmem = x_fmem, csFdsk = x_fdsk,
-                 csAmem = x_amem, csAcpu = x_acpu, csAdsk = x_adsk,
-                 csMmem = x_mmem, csMdsk = x_mdsk, csMcpu = x_mcpu,
-                 csImem = x_imem, csIdsk = x_idsk, csIcpu = x_icpu,
-                 csTmem = x_tmem, csTdsk = x_tdsk, csTcpu = x_tcpu,
-                 csVcpu = x_vcpu,
-                 csXmem = x_xmem, csNmem = x_nmem, csNinst = x_ninst
-               }
-            = cs
-        inc_amem = Node.fMem node - Node.rMem node
-        inc_amem' = if inc_amem > 0 then inc_amem else 0
-        inc_adsk = Node.availDisk node
-        inc_imem = truncate (Node.tMem node) - Node.nMem node
-                   - Node.xMem node - Node.fMem node
-        inc_icpu = Node.uCpu node
-        inc_idsk = truncate (Node.tDsk node) - Node.fDsk node
-        inc_vcpu = Node.hiCpu node
-        inc_acpu = Node.availCpu node
-
-    in cs { csFmem = x_fmem + fromIntegral (Node.fMem node)
-          , csFdsk = x_fdsk + fromIntegral (Node.fDsk node)
-          , csAmem = x_amem + fromIntegral inc_amem'
-          , csAdsk = x_adsk + fromIntegral inc_adsk
-          , csAcpu = x_acpu + fromIntegral inc_acpu
-          , csMmem = max x_mmem (fromIntegral inc_amem')
-          , csMdsk = max x_mdsk (fromIntegral inc_adsk)
-          , csMcpu = max x_mcpu (fromIntegral inc_acpu)
-          , csImem = x_imem + fromIntegral inc_imem
-          , csIdsk = x_idsk + fromIntegral inc_idsk
-          , csIcpu = x_icpu + fromIntegral inc_icpu
-          , csTmem = x_tmem + Node.tMem node
-          , csTdsk = x_tdsk + Node.tDsk node
-          , csTcpu = x_tcpu + Node.tCpu node
-          , csVcpu = x_vcpu + fromIntegral inc_vcpu
-          , csXmem = x_xmem + fromIntegral (Node.xMem node)
-          , csNmem = x_nmem + fromIntegral (Node.nMem node)
-          , csNinst = x_ninst + length (Node.pList node)
-          }
+  let CStats { csFmem = x_fmem, csFdsk = x_fdsk,
+               csAmem = x_amem, csAcpu = x_acpu, csAdsk = x_adsk,
+               csMmem = x_mmem, csMdsk = x_mdsk, csMcpu = x_mcpu,
+               csImem = x_imem, csIdsk = x_idsk, csIcpu = x_icpu,
+               csTmem = x_tmem, csTdsk = x_tdsk, csTcpu = x_tcpu,
+               csVcpu = x_vcpu, csNcpu = x_ncpu,
+               csXmem = x_xmem, csNmem = x_nmem, csNinst = x_ninst
+             }
+        = cs
+      inc_amem = Node.fMem node - Node.rMem node
+      inc_amem' = if inc_amem > 0 then inc_amem else 0
+      inc_adsk = Node.availDisk node
+      inc_imem = truncate (Node.tMem node) - Node.nMem node
+                 - Node.xMem node - Node.fMem node
+      inc_icpu = Node.uCpu node
+      inc_idsk = truncate (Node.tDsk node) - Node.fDsk node
+      inc_vcpu = Node.hiCpu node
+      inc_acpu = Node.availCpu node
+      inc_ncpu = fromIntegral (Node.uCpu node) /
+                 iPolicyVcpuRatio (Node.iPolicy node)
+  in cs { csFmem = x_fmem + fromIntegral (Node.fMem node)
+        , csFdsk = x_fdsk + fromIntegral (Node.fDsk node)
+        , csAmem = x_amem + fromIntegral inc_amem'
+        , csAdsk = x_adsk + fromIntegral inc_adsk
+        , csAcpu = x_acpu + fromIntegral inc_acpu
+        , csMmem = max x_mmem (fromIntegral inc_amem')
+        , csMdsk = max x_mdsk (fromIntegral inc_adsk)
+        , csMcpu = max x_mcpu (fromIntegral inc_acpu)
+        , csImem = x_imem + fromIntegral inc_imem
+        , csIdsk = x_idsk + fromIntegral inc_idsk
+        , csIcpu = x_icpu + fromIntegral inc_icpu
+        , csTmem = x_tmem + Node.tMem node
+        , csTdsk = x_tdsk + Node.tDsk node
+        , csTcpu = x_tcpu + Node.tCpu node
+        , csVcpu = x_vcpu + fromIntegral inc_vcpu
+        , csNcpu = x_ncpu + inc_ncpu
+        , csXmem = x_xmem + fromIntegral (Node.xMem node)
+        , csNmem = x_nmem + fromIntegral (Node.nMem node)
+        , csNinst = x_ninst + length (Node.pList node)
+        }
  
  -- | Compute the total free disk and memory in the cluster.
  totalResources :: Node.List -> CStats
  totalResources nl =
-    let cs = foldl' updateCStats emptyCStats . Container.elems $ nl
-    in cs { csScore = compCV nl }
+  let cs = foldl' updateCStats emptyCStats . Container.elems $ nl
+  in cs { csScore = compCV nl }
  
  -- | Compute the delta between two cluster state.
  --
@@ -250,18 +279,27 @@ totalResources nl =
  -- was left unallocated.
  computeAllocationDelta :: CStats -> CStats -> AllocStats
  computeAllocationDelta cini cfin =
-    let CStats {csImem = i_imem, csIdsk = i_idsk, csIcpu = i_icpu} = cini
-        CStats {csImem = f_imem, csIdsk = f_idsk, csIcpu = f_icpu,
-                csTmem = t_mem, csTdsk = t_dsk, csVcpu = v_cpu } = cfin
-        rini = RSpec (fromIntegral i_icpu) (fromIntegral i_imem)
-               (fromIntegral i_idsk)
-        rfin = RSpec (fromIntegral (f_icpu - i_icpu))
-               (fromIntegral (f_imem - i_imem))
-               (fromIntegral (f_idsk - i_idsk))
-        un_cpu = fromIntegral (v_cpu - f_icpu)::Int
-        runa = RSpec un_cpu (truncate t_mem - fromIntegral f_imem)
-               (truncate t_dsk - fromIntegral f_idsk)
-    in (rini, rfin, runa)
+  let CStats {csImem = i_imem, csIdsk = i_idsk, csIcpu = i_icpu,
+              csNcpu = i_ncpu } = cini
+      CStats {csImem = f_imem, csIdsk = f_idsk, csIcpu = f_icpu,
+              csTmem = t_mem, csTdsk = t_dsk, csVcpu = f_vcpu,
+              csNcpu = f_ncpu, csTcpu = f_tcpu } = cfin
+      rini = AllocInfo { allocInfoVCpus = fromIntegral i_icpu
+                       , allocInfoNCpus = i_ncpu
+                       , allocInfoMem   = fromIntegral i_imem
+                       , allocInfoDisk  = fromIntegral i_idsk
+                       }
+      rfin = AllocInfo { allocInfoVCpus = fromIntegral (f_icpu - i_icpu)
+                       , allocInfoNCpus = f_ncpu - i_ncpu
+                       , allocInfoMem   = fromIntegral (f_imem - i_imem)
+                       , allocInfoDisk  = fromIntegral (f_idsk - i_idsk)
+                       }
+      runa = AllocInfo { allocInfoVCpus = fromIntegral (f_vcpu - f_icpu)
+                       , allocInfoNCpus = f_tcpu - f_ncpu
+                       , allocInfoMem   = truncate t_mem - fromIntegral f_imem
+                       , allocInfoDisk  = truncate t_dsk - fromIntegral f_idsk
+                       }
+  in (rini, rfin, runa)
  
  -- | The names and weights of the individual elements in the CV list.
  detailedCVInfo :: [(Double, String)]
@@ -277,6 +315,7 @@ detailedCVInfo = [ (1,  "free_mem_cv")
                   , (1,  "disk_load_cv")
                   , (1,  "net_load_cv")
                   , (2,  "pri_tags_score")
+                 , (1,  "spindles_cv")
                   ]
  
  -- | Holds the weights used by 'compCVNodes' for each metric.
@@ -286,46 +325,46 @@ detailedCVWeights = map fst detailedCVInfo
  -- | Compute the mem and disk covariance.
  compDetailedCV :: [Node.Node] -> [Double]
  compDetailedCV all_nodes =
-    let
-        (offline, nodes) = partition Node.offline all_nodes
-        mem_l = map Node.pMem nodes
-        dsk_l = map Node.pDsk nodes
-        -- metric: memory covariance
-        mem_cv = stdDev mem_l
-        -- metric: disk covariance
-        dsk_cv = stdDev dsk_l
-        -- metric: count of instances living on N1 failing nodes
-        n1_score = fromIntegral . sum . map (\n -> length (Node.sList n) +
-                                                   length (Node.pList n)) .
-                   filter Node.failN1 $ nodes :: Double
-        res_l = map Node.pRem nodes
-        -- metric: reserved memory covariance
-        res_cv = stdDev res_l
-        -- offline instances metrics
-        offline_ipri = sum . map (length . Node.pList) $ offline
-        offline_isec = sum . map (length . Node.sList) $ offline
-        -- metric: count of instances on offline nodes
-        off_score = fromIntegral (offline_ipri + offline_isec)::Double
-        -- metric: count of primary instances on offline nodes (this
-        -- helps with evacuation/failover of primary instances on
-        -- 2-node clusters with one node offline)
-        off_pri_score = fromIntegral offline_ipri::Double
-        cpu_l = map Node.pCpu nodes
-        -- metric: covariance of vcpu/pcpu ratio
-        cpu_cv = stdDev cpu_l
-        -- metrics: covariance of cpu, memory, disk and network load
-        (c_load, m_load, d_load, n_load) = unzip4 $
-            map (\n ->
-                     let DynUtil c1 m1 d1 n1 = Node.utilLoad n
-                         DynUtil c2 m2 d2 n2 = Node.utilPool n
-                     in (c1/c2, m1/m2, d1/d2, n1/n2)
-                ) nodes
-        -- metric: conflicting instance count
-        pri_tags_inst = sum $ map Node.conflictingPrimaries nodes
-        pri_tags_score = fromIntegral pri_tags_inst::Double
-    in [ mem_cv, dsk_cv, n1_score, res_cv, off_score, off_pri_score, cpu_cv
-       , stdDev c_load, stdDev m_load , stdDev d_load, stdDev n_load
-       , pri_tags_score ]
+  let (offline, nodes) = partition Node.offline all_nodes
+      mem_l = map Node.pMem nodes
+      dsk_l = map Node.pDsk nodes
+      -- metric: memory covariance
+      mem_cv = stdDev mem_l
+      -- metric: disk covariance
+      dsk_cv = stdDev dsk_l
+      -- metric: count of instances living on N1 failing nodes
+      n1_score = fromIntegral . sum . map (\n -> length (Node.sList n) +
+                                                 length (Node.pList n)) .
+                 filter Node.failN1 $ nodes :: Double
+      res_l = map Node.pRem nodes
+      -- metric: reserved memory covariance
+      res_cv = stdDev res_l
+      -- offline instances metrics
+      offline_ipri = sum . map (length . Node.pList) $ offline
+      offline_isec = sum . map (length . Node.sList) $ offline
+      -- metric: count of instances on offline nodes
+      off_score = fromIntegral (offline_ipri + offline_isec)::Double
+      -- metric: count of primary instances on offline nodes (this
+      -- helps with evacuation/failover of primary instances on
+      -- 2-node clusters with one node offline)
+      off_pri_score = fromIntegral offline_ipri::Double
+      cpu_l = map Node.pCpu nodes
+      -- metric: covariance of vcpu/pcpu ratio
+      cpu_cv = stdDev cpu_l
+      -- metrics: covariance of cpu, memory, disk and network load
+      (c_load, m_load, d_load, n_load) =
+        unzip4 $ map (\n ->
+                      let DynUtil c1 m1 d1 n1 = Node.utilLoad n
+                          DynUtil c2 m2 d2 n2 = Node.utilPool n
+                      in (c1/c2, m1/m2, d1/d2, n1/n2)) nodes
+      -- metric: conflicting instance count
+      pri_tags_inst = sum $ map Node.conflictingPrimaries nodes
+      pri_tags_score = fromIntegral pri_tags_inst::Double
+      -- metric: spindles %
+      spindles_cv = map (\n -> Node.instSpindles n / Node.hiSpindles n) nodes
+  in [ mem_cv, dsk_cv, n1_score, res_cv, off_score, off_pri_score, cpu_cv
+     , stdDev c_load, stdDev m_load , stdDev d_load, stdDev n_load
+     , pri_tags_score, stdDev spindles_cv ]
  
  -- | Compute the /total/ variance.
  compCVNodes :: [Node.Node] -> Double
@@ -344,127 +383,128 @@ getOnline = filter (not . Node.offline) . Container.elems
  -- | Compute best table. Note that the ordering of the arguments is important.
  compareTables :: Table -> Table -> Table
  compareTables a@(Table _ _ a_cv _) b@(Table _ _ b_cv _ ) =
-    if a_cv > b_cv then b else a
+  if a_cv > b_cv then b else a
  
  -- | Applies an instance move to a given node list and instance.
  applyMove :: Node.List -> Instance.Instance
            -> IMove -> OpResult (Node.List, Instance.Instance, Ndx, Ndx)
  -- Failover (f)
  applyMove nl inst Failover =
-    let old_pdx = Instance.pNode inst
-        old_sdx = Instance.sNode inst
-        old_p = Container.find old_pdx nl
-        old_s = Container.find old_sdx nl
-        int_p = Node.removePri old_p inst
-        int_s = Node.removeSec old_s inst
-        force_p = Node.offline old_p
-        new_nl = do -- Maybe monad
-          new_p <- Node.addPriEx force_p int_s inst
-          new_s <- Node.addSec int_p inst old_sdx
-          let new_inst = Instance.setBoth inst old_sdx old_pdx
-          return (Container.addTwo old_pdx new_s old_sdx new_p nl,
-                  new_inst, old_sdx, old_pdx)
-    in new_nl
+  let (old_pdx, old_sdx, old_p, old_s) = instanceNodes nl inst
+      int_p = Node.removePri old_p inst
+      int_s = Node.removeSec old_s inst
+      new_nl = do -- Maybe monad
+        new_p <- Node.addPriEx (Node.offline old_p) int_s inst
+        new_s <- Node.addSec int_p inst old_sdx
+        let new_inst = Instance.setBoth inst old_sdx old_pdx
+        return (Container.addTwo old_pdx new_s old_sdx new_p nl,
+                new_inst, old_sdx, old_pdx)
+  in new_nl
+
+-- Failover to any (fa)
+applyMove nl inst (FailoverToAny new_pdx) = do
+  let (old_pdx, old_sdx, old_pnode, _) = instanceNodes nl inst
+      new_pnode = Container.find new_pdx nl
+      force_failover = Node.offline old_pnode
+  new_pnode' <- Node.addPriEx force_failover new_pnode inst
+  let old_pnode' = Node.removePri old_pnode inst
+      inst' = Instance.setPri inst new_pdx
+      nl' = Container.addTwo old_pdx old_pnode' new_pdx new_pnode' nl
+  return (nl', inst', new_pdx, old_sdx)
  
  -- Replace the primary (f:, r:np, f)
  applyMove nl inst (ReplacePrimary new_pdx) =
-    let old_pdx = Instance.pNode inst
-        old_sdx = Instance.sNode inst
-        old_p = Container.find old_pdx nl
-        old_s = Container.find old_sdx nl
-        tgt_n = Container.find new_pdx nl
-        int_p = Node.removePri old_p inst
-        int_s = Node.removeSec old_s inst
-        force_p = Node.offline old_p
-        new_nl = do -- Maybe monad
-          -- check that the current secondary can host the instance
-          -- during the migration
-          tmp_s <- Node.addPriEx force_p int_s inst
-          let tmp_s' = Node.removePri tmp_s inst
-          new_p <- Node.addPriEx force_p tgt_n inst
-          new_s <- Node.addSecEx force_p tmp_s' inst new_pdx
-          let new_inst = Instance.setPri inst new_pdx
-          return (Container.add new_pdx new_p $
-                  Container.addTwo old_pdx int_p old_sdx new_s nl,
-                  new_inst, new_pdx, old_sdx)
-    in new_nl
+  let (old_pdx, old_sdx, old_p, old_s) = instanceNodes nl inst
+      tgt_n = Container.find new_pdx nl
+      int_p = Node.removePri old_p inst
+      int_s = Node.removeSec old_s inst
+      force_p = Node.offline old_p
+      new_nl = do -- Maybe monad
+                  -- check that the current secondary can host the instance
+                  -- during the migration
+        tmp_s <- Node.addPriEx force_p int_s inst
+        let tmp_s' = Node.removePri tmp_s inst
+        new_p <- Node.addPriEx force_p tgt_n inst
+        new_s <- Node.addSecEx force_p tmp_s' inst new_pdx
+        let new_inst = Instance.setPri inst new_pdx
+        return (Container.add new_pdx new_p $
+                Container.addTwo old_pdx int_p old_sdx new_s nl,
+                new_inst, new_pdx, old_sdx)
+  in new_nl
  
  -- Replace the secondary (r:ns)
  applyMove nl inst (ReplaceSecondary new_sdx) =
-    let old_pdx = Instance.pNode inst
-        old_sdx = Instance.sNode inst
-        old_s = Container.find old_sdx nl
-        tgt_n = Container.find new_sdx nl
-        int_s = Node.removeSec old_s inst
-        force_s = Node.offline old_s
-        new_inst = Instance.setSec inst new_sdx
-        new_nl = Node.addSecEx force_s tgt_n inst old_pdx >>=
-                 \new_s -> return (Container.addTwo new_sdx
-                                   new_s old_sdx int_s nl,
-                                   new_inst, old_pdx, new_sdx)
-    in new_nl
+  let old_pdx = Instance.pNode inst
+      old_sdx = Instance.sNode inst
+      old_s = Container.find old_sdx nl
+      tgt_n = Container.find new_sdx nl
+      int_s = Node.removeSec old_s inst
+      force_s = Node.offline old_s
+      new_inst = Instance.setSec inst new_sdx
+      new_nl = Node.addSecEx force_s tgt_n inst old_pdx >>=
+               \new_s -> return (Container.addTwo new_sdx
+                                 new_s old_sdx int_s nl,
+                                 new_inst, old_pdx, new_sdx)
+  in new_nl
  
  -- Replace the secondary and failover (r:np, f)
  applyMove nl inst (ReplaceAndFailover new_pdx) =
-    let old_pdx = Instance.pNode inst
-        old_sdx = Instance.sNode inst
-        old_p = Container.find old_pdx nl
-        old_s = Container.find old_sdx nl
-        tgt_n = Container.find new_pdx nl
-        int_p = Node.removePri old_p inst
-        int_s = Node.removeSec old_s inst
-        force_s = Node.offline old_s
-        new_nl = do -- Maybe monad
-          new_p <- Node.addPri tgt_n inst
-          new_s <- Node.addSecEx force_s int_p inst new_pdx
-          let new_inst = Instance.setBoth inst new_pdx old_pdx
-          return (Container.add new_pdx new_p $
-                  Container.addTwo old_pdx new_s old_sdx int_s nl,
-                  new_inst, new_pdx, old_pdx)
-    in new_nl
+  let (old_pdx, old_sdx, old_p, old_s) = instanceNodes nl inst
+      tgt_n = Container.find new_pdx nl
+      int_p = Node.removePri old_p inst
+      int_s = Node.removeSec old_s inst
+      force_s = Node.offline old_s
+      new_nl = do -- Maybe monad
+        new_p <- Node.addPri tgt_n inst
+        new_s <- Node.addSecEx force_s int_p inst new_pdx
+        let new_inst = Instance.setBoth inst new_pdx old_pdx
+        return (Container.add new_pdx new_p $
+                Container.addTwo old_pdx new_s old_sdx int_s nl,
+                new_inst, new_pdx, old_pdx)
+  in new_nl
  
  -- Failver and replace the secondary (f, r:ns)
  applyMove nl inst (FailoverAndReplace new_sdx) =
-    let old_pdx = Instance.pNode inst
-        old_sdx = Instance.sNode inst
-        old_p = Container.find old_pdx nl
-        old_s = Container.find old_sdx nl
-        tgt_n = Container.find new_sdx nl
-        int_p = Node.removePri old_p inst
-        int_s = Node.removeSec old_s inst
-        force_p = Node.offline old_p
-        new_nl = do -- Maybe monad
-          new_p <- Node.addPriEx force_p int_s inst
-          new_s <- Node.addSecEx force_p tgt_n inst old_sdx
-          let new_inst = Instance.setBoth inst old_sdx new_sdx
-          return (Container.add new_sdx new_s $
-                  Container.addTwo old_sdx new_p old_pdx int_p nl,
-                  new_inst, old_sdx, new_sdx)
-    in new_nl
+  let (old_pdx, old_sdx, old_p, old_s) = instanceNodes nl inst
+      tgt_n = Container.find new_sdx nl
+      int_p = Node.removePri old_p inst
+      int_s = Node.removeSec old_s inst
+      force_p = Node.offline old_p
+      new_nl = do -- Maybe monad
+        new_p <- Node.addPriEx force_p int_s inst
+        new_s <- Node.addSecEx force_p tgt_n inst old_sdx
+        let new_inst = Instance.setBoth inst old_sdx new_sdx
+        return (Container.add new_sdx new_s $
+                Container.addTwo old_sdx new_p old_pdx int_p nl,
+                new_inst, old_sdx, new_sdx)
+  in new_nl
  
  -- | Tries to allocate an instance on one given node.
  allocateOnSingle :: Node.List -> Instance.Instance -> Ndx
                   -> OpResult Node.AllocElement
  allocateOnSingle nl inst new_pdx =
-    let p = Container.find new_pdx nl
-        new_inst = Instance.setBoth inst new_pdx Node.noSecondary
-    in  Node.addPri p inst >>= \new_p -> do
-      let new_nl = Container.add new_pdx new_p nl
-          new_score = compCV nl
-      return (new_nl, new_inst, [new_p], new_score)
+  let p = Container.find new_pdx nl
+      new_inst = Instance.setBoth inst new_pdx Node.noSecondary
+  in do
+    Instance.instMatchesPolicy inst (Node.iPolicy p)
+    new_p <- Node.addPri p inst
+    let new_nl = Container.add new_pdx new_p nl
+        new_score = compCV new_nl
+    return (new_nl, new_inst, [new_p], new_score)
  
  -- | Tries to allocate an instance on a given pair of nodes.
  allocateOnPair :: Node.List -> Instance.Instance -> Ndx -> Ndx
                 -> OpResult Node.AllocElement
  allocateOnPair nl inst new_pdx new_sdx =
-    let tgt_p = Container.find new_pdx nl
-        tgt_s = Container.find new_sdx nl
-    in do
-      new_p <- Node.addPri tgt_p inst
-      new_s <- Node.addSec tgt_s inst new_pdx
-      let new_inst = Instance.setBoth inst new_pdx new_sdx
-          new_nl = Container.addTwo new_pdx new_p new_sdx new_s nl
-      return (new_nl, new_inst, [new_p, new_s], compCV new_nl)
+  let tgt_p = Container.find new_pdx nl
+      tgt_s = Container.find new_sdx nl
+  in do
+    Instance.instMatchesPolicy inst (Node.iPolicy tgt_p)
+    new_p <- Node.addPri tgt_p inst
+    new_s <- Node.addSec tgt_s inst new_pdx
+    let new_inst = Instance.setBoth inst new_pdx new_sdx
+        new_nl = Container.addTwo new_pdx new_p new_sdx new_s nl
+    return (new_nl, new_inst, [new_p, new_s], compCV new_nl)
  
  -- | Tries to perform an instance move and returns the best table
  -- between the original one and the new one.
@@ -474,41 +514,48 @@ checkSingleStep :: Table -- ^ The original table
                  -> IMove -- ^ The move to apply
                  -> Table -- ^ The final best table
  checkSingleStep ini_tbl target cur_tbl move =
-    let
-        Table ini_nl ini_il _ ini_plc = ini_tbl
-        tmp_resu = applyMove ini_nl target move
-    in
-      case tmp_resu of
-        OpFail _ -> cur_tbl
-        OpGood (upd_nl, new_inst, pri_idx, sec_idx) ->
-            let tgt_idx = Instance.idx target
-                upd_cvar = compCV upd_nl
-                upd_il = Container.add tgt_idx new_inst ini_il
-                upd_plc = (tgt_idx, pri_idx, sec_idx, move, upd_cvar):ini_plc
-                upd_tbl = Table upd_nl upd_il upd_cvar upd_plc
-            in
-              compareTables cur_tbl upd_tbl
+  let Table ini_nl ini_il _ ini_plc = ini_tbl
+      tmp_resu = applyMove ini_nl target move
+  in case tmp_resu of
+       Bad _ -> cur_tbl
+       Ok (upd_nl, new_inst, pri_idx, sec_idx) ->
+         let tgt_idx = Instance.idx target
+             upd_cvar = compCV upd_nl
+             upd_il = Container.add tgt_idx new_inst ini_il
+             upd_plc = (tgt_idx, pri_idx, sec_idx, move, upd_cvar):ini_plc
+             upd_tbl = Table upd_nl upd_il upd_cvar upd_plc
+         in compareTables cur_tbl upd_tbl
  
  -- | Given the status of the current secondary as a valid new node and
  -- the current candidate target node, generate the possible moves for
  -- a instance.
-possibleMoves :: Bool      -- ^ Whether the secondary node is a valid new node
-              -> Bool      -- ^ Whether we can change the primary node
-              -> Ndx       -- ^ Target node candidate
-              -> [IMove]   -- ^ List of valid result moves
+possibleMoves :: MirrorType -- ^ The mirroring type of the instance
+              -> Bool       -- ^ Whether the secondary node is a valid new node
+              -> Bool       -- ^ Whether we can change the primary node
+              -> Ndx        -- ^ Target node candidate
+              -> [IMove]    -- ^ List of valid result moves
+
+possibleMoves MirrorNone _ _ _ = []
  
-possibleMoves _ False tdx =
-    [ReplaceSecondary tdx]
+possibleMoves MirrorExternal _ False _ = []
  
-possibleMoves True True tdx =
-    [ReplaceSecondary tdx,
-     ReplaceAndFailover tdx,
-     ReplacePrimary tdx,
-     FailoverAndReplace tdx]
+possibleMoves MirrorExternal _ True tdx =
+  [ FailoverToAny tdx ]
  
-possibleMoves False True tdx =
-    [ReplaceSecondary tdx,
-     ReplaceAndFailover tdx]
+possibleMoves MirrorInternal _ False tdx =
+  [ ReplaceSecondary tdx ]
+
+possibleMoves MirrorInternal True True tdx =
+  [ ReplaceSecondary tdx
+  , ReplaceAndFailover tdx
+  , ReplacePrimary tdx
+  , FailoverAndReplace tdx
+  ]
+
+possibleMoves MirrorInternal False True tdx =
+  [ ReplaceSecondary tdx
+  , ReplaceAndFailover tdx
+  ]
  
  -- | Compute the best move for a given instance.
  checkInstanceMove :: [Ndx]             -- ^ Allowed target node indices
@@ -518,18 +565,21 @@ checkInstanceMove :: [Ndx]             -- ^ Allowed target node indices
                    -> Instance.Instance -- ^ Instance to move
                    -> Table             -- ^ Best new table for this instance
  checkInstanceMove nodes_idx disk_moves inst_moves ini_tbl target =
-    let
-        opdx = Instance.pNode target
-        osdx = Instance.sNode target
-        nodes = filter (\idx -> idx /= opdx && idx /= osdx) nodes_idx
-        use_secondary = elem osdx nodes_idx && inst_moves
-        aft_failover = if use_secondary -- if allowed to failover
+  let opdx = Instance.pNode target
+      osdx = Instance.sNode target
+      bad_nodes = [opdx, osdx]
+      nodes = filter (`notElem` bad_nodes) nodes_idx
+      mir_type = Instance.mirrorType target
+      use_secondary = elem osdx nodes_idx && inst_moves
+      aft_failover = if mir_type == MirrorInternal && use_secondary
+                       -- if drbd and allowed to failover
                         then checkSingleStep ini_tbl target ini_tbl Failover
                         else ini_tbl
-        all_moves = if disk_moves
-                    then concatMap
-                         (possibleMoves use_secondary inst_moves) nodes
-                    else []
+      all_moves =
+        if disk_moves
+          then concatMap (possibleMoves mir_type use_secondary inst_moves)
+               nodes
+          else []
      in
        -- iterate over the possible nodes for this instance
        foldl' (checkSingleStep ini_tbl target) aft_failover all_moves
@@ -542,19 +592,19 @@ checkMove :: [Ndx]               -- ^ Allowed target node indices
            -> [Instance.Instance] -- ^ List of instances still to move
            -> Table               -- ^ The new solution
  checkMove nodes_idx disk_moves inst_moves ini_tbl victims =
-    let Table _ _ _ ini_plc = ini_tbl
-        -- we're using rwhnf from the Control.Parallel.Strategies
-        -- package; we don't need to use rnf as that would force too
-        -- much evaluation in single-threaded cases, and in
-        -- multi-threaded case the weak head normal form is enough to
-        -- spark the evaluation
-        tables = parMap rwhnf (checkInstanceMove nodes_idx disk_moves
-                               inst_moves ini_tbl)
-                 victims
-        -- iterate over all instances, computing the best move
-        best_tbl = foldl' compareTables ini_tbl tables
-        Table _ _ _ best_plc = best_tbl
-    in if length best_plc == length ini_plc
+  let Table _ _ _ ini_plc = ini_tbl
+      -- we're using rwhnf from the Control.Parallel.Strategies
+      -- package; we don't need to use rnf as that would force too
+      -- much evaluation in single-threaded cases, and in
+      -- multi-threaded case the weak head normal form is enough to
+      -- spark the evaluation
+      tables = parMap rwhnf (checkInstanceMove nodes_idx disk_moves
+                             inst_moves ini_tbl)
+               victims
+      -- iterate over all instances, computing the best move
+      best_tbl = foldl' compareTables ini_tbl tables
+      Table _ _ _ best_plc = best_tbl
+  in if length best_plc == length ini_plc
         then ini_tbl -- no advancement
         else best_tbl
  
@@ -564,9 +614,9 @@ doNextBalance :: Table     -- ^ The starting table
                -> Score     -- ^ Score at which to stop
                -> Bool      -- ^ The resulting table and commands
  doNextBalance ini_tbl max_rounds min_score =
-    let Table _ _ ini_cv ini_plc = ini_tbl
-        ini_plc_len = length ini_plc
-    in (max_rounds < 0 || ini_plc_len < max_rounds) && ini_cv > min_score
+  let Table _ _ ini_cv ini_plc = ini_tbl
+      ini_plc_len = length ini_plc
+  in (max_rounds < 0 || ini_plc_len < max_rounds) && ini_cv > min_score
  
  -- | Run a balance move.
  tryBalance :: Table       -- ^ The starting table
@@ -579,16 +629,15 @@ tryBalance :: Table       -- ^ The starting table
  tryBalance ini_tbl disk_moves inst_moves evac_mode mg_limit min_gain =
      let Table ini_nl ini_il ini_cv _ = ini_tbl
          all_inst = Container.elems ini_il
+        all_nodes = Container.elems ini_nl
+        (offline_nodes, online_nodes) = partition Node.offline all_nodes
          all_inst' = if evac_mode
-                    then let bad_nodes = map Node.idx . filter Node.offline $
-                                         Container.elems ini_nl
-                         in filter (\e -> Instance.sNode e `elem` bad_nodes ||
-                                          Instance.pNode e `elem` bad_nodes)
-                            all_inst
-                    else all_inst
+                      then let bad_nodes = map Node.idx offline_nodes
+                           in filter (any (`elem` bad_nodes) .
+                                          Instance.allNodes) all_inst
+                      else all_inst
          reloc_inst = filter Instance.movable all_inst'
-        node_idx = map Node.idx . filter (not . Node.offline) $
-                   Container.elems ini_nl
+        node_idx = map Node.idx online_nodes
          fin_tbl = checkMove node_idx disk_moves inst_moves ini_tbl reloc_inst
          (Table _ _ fin_cv _) = fin_tbl
      in
@@ -604,50 +653,61 @@ collapseFailures flst =
      map (\k -> (k, foldl' (\a e -> if e == k then a + 1 else a) 0 flst))
              [minBound..maxBound]
  
+-- | Compares two Maybe AllocElement and chooses the besst score.
+bestAllocElement :: Maybe Node.AllocElement
+                 -> Maybe Node.AllocElement
+                 -> Maybe Node.AllocElement
+bestAllocElement a Nothing = a
+bestAllocElement Nothing b = b
+bestAllocElement a@(Just (_, _, _, ascore)) b@(Just (_, _, _, bscore)) =
+  if ascore < bscore then a else b
+
  -- | Update current Allocation solution and failure stats with new
  -- elements.
  concatAllocs :: AllocSolution -> OpResult Node.AllocElement -> AllocSolution
-concatAllocs as (OpFail reason) = as { asFailures = reason : asFailures as }
-
-concatAllocs as (OpGood ns@(_, _, _, nscore)) =
-    let -- Choose the old or new solution, based on the cluster score
-        cntok = asAllocs as
-        osols = asSolutions as
-        nsols = case osols of
-                  [] -> [ns]
-                  (_, _, _, oscore):[] ->
-                      if oscore < nscore
-                      then osols
-                      else [ns]
-                  -- FIXME: here we simply concat to lists with more
-                  -- than one element; we should instead abort, since
-                  -- this is not a valid usage of this function
-                  xs -> ns:xs
-        nsuc = cntok + 1
+concatAllocs as (Bad reason) = as { asFailures = reason : asFailures as }
+
+concatAllocs as (Ok ns) =
+  let -- Choose the old or new solution, based on the cluster score
+    cntok = asAllocs as
+    osols = asSolution as
+    nsols = bestAllocElement osols (Just ns)
+    nsuc = cntok + 1
      -- Note: we force evaluation of nsols here in order to keep the
      -- memory profile low - we know that we will need nsols for sure
      -- in the next cycle, so we force evaluation of nsols, since the
      -- foldl' in the caller will only evaluate the tuple, but not the
      -- elements of the tuple
-    in nsols `seq` nsuc `seq` as { asAllocs = nsuc, asSolutions = nsols }
+  in nsols `seq` nsuc `seq` as { asAllocs = nsuc, asSolution = nsols }
+
+-- | Sums two 'AllocSolution' structures.
+sumAllocs :: AllocSolution -> AllocSolution -> AllocSolution
+sumAllocs (AllocSolution aFails aAllocs aSols aLog)
+          (AllocSolution bFails bAllocs bSols bLog) =
+  -- note: we add b first, since usually it will be smaller; when
+  -- fold'ing, a will grow and grow whereas b is the per-group
+  -- result, hence smaller
+  let nFails  = bFails ++ aFails
+      nAllocs = aAllocs + bAllocs
+      nSols   = bestAllocElement aSols bSols
+      nLog    = bLog ++ aLog
+  in AllocSolution nFails nAllocs nSols nLog
  
  -- | Given a solution, generates a reasonable description for it.
  describeSolution :: AllocSolution -> String
  describeSolution as =
    let fcnt = asFailures as
-      sols = asSolutions as
+      sols = asSolution as
        freasons =
          intercalate ", " . map (\(a, b) -> printf "%s: %d" (show a) b) .
          filter ((> 0) . snd) . collapseFailures $ fcnt
-  in if null sols
-     then "No valid allocation solutions, failure reasons: " ++
-          (if null fcnt
-           then "unknown reasons"
-           else freasons)
-     else let (_, _, nodes, cv) = head sols
-          in printf ("score: %.8f, successes %d, failures %d (%s)" ++
-                     " for node(s) %s") cv (asAllocs as) (length fcnt) freasons
-             (intercalate "/" . map Node.name $ nodes)
+  in case sols of
+     Nothing -> "No valid allocation solutions, failure reasons: " ++
+                (if null fcnt then "unknown reasons" else freasons)
+     Just (_, _, nodes, cv) ->
+         printf ("score: %.8f, successes %d, failures %d (%s)" ++
+                 " for node(s) %s") cv (asAllocs as) (length fcnt) freasons
+               (intercalate "/" . map Node.name $ nodes)
  
  -- | Annotates a solution with the appropriate string.
  annotateSolution :: AllocSolution -> AllocSolution
@@ -659,7 +719,7 @@ annotateSolution as = as { asLog = describeSolution as : asLog as }
  -- for proper jobset execution, we should reverse all lists.
  reverseEvacSolution :: EvacSolution -> EvacSolution
  reverseEvacSolution (EvacSolution f m o) =
-    EvacSolution (reverse f) (reverse m) (reverse o)
+  EvacSolution (reverse f) (reverse m) (reverse o)
  
  -- | Generate the valid node allocation singles or pairs for a new instance.
  genAllocNodes :: Group.List        -- ^ Group list
@@ -669,18 +729,20 @@ genAllocNodes :: Group.List        -- ^ Group list
                                     -- unallocable nodes
                -> Result AllocNodes -- ^ The (monadic) result
  genAllocNodes gl nl count drop_unalloc =
-    let filter_fn = if drop_unalloc
+  let filter_fn = if drop_unalloc
                      then filter (Group.isAllocable .
                                   flip Container.find gl . Node.group)
                      else id
-        all_nodes = filter_fn $ getOnline nl
-        all_pairs = liftM2 (,) all_nodes all_nodes
-        ok_pairs = filter (\(x, y) -> Node.idx x /= Node.idx y &&
-                                      Node.group x == Node.group y) all_pairs
-    in case count of
-         1 -> Ok (Left (map Node.idx all_nodes))
-         2 -> Ok (Right (map (\(p, s) -> (Node.idx p, Node.idx s)) ok_pairs))
-         _ -> Bad "Unsupported number of nodes, only one or two  supported"
+      all_nodes = filter_fn $ getOnline nl
+      all_pairs = [(Node.idx p,
+                    [Node.idx s | s <- all_nodes,
+                                       Node.idx p /= Node.idx s,
+                                       Node.group p == Node.group s]) |
+                   p <- all_nodes]
+  in case count of
+       1 -> Ok (Left (map Node.idx all_nodes))
+       2 -> Ok (Right (filter (not . null . snd) all_pairs))
+       _ -> Bad "Unsupported number of nodes, only one or two  supported"
  
  -- | Try to allocate an instance on the cluster.
  tryAlloc :: (Monad m) =>
@@ -689,22 +751,22 @@ tryAlloc :: (Monad m) =>
           -> Instance.Instance -- ^ The instance to allocate
           -> AllocNodes        -- ^ The allocation targets
           -> m AllocSolution   -- ^ Possible solution list
+tryAlloc _  _ _    (Right []) = fail "Not enough online nodes"
  tryAlloc nl _ inst (Right ok_pairs) =
-    let sols = foldl' (\cstate (p, s) ->
-                           concatAllocs cstate $ allocateOnPair nl inst p s
-                      ) emptyAllocSolution ok_pairs
-
-    in if null ok_pairs -- means we have just one node
-       then fail "Not enough online nodes"
-       else return $ annotateSolution sols
-
+  let psols = parMap rwhnf (\(p, ss) ->
+                              foldl' (\cstate ->
+                                        concatAllocs cstate .
+                                        allocateOnPair nl inst p)
+                              emptyAllocSolution ss) ok_pairs
+      sols = foldl' sumAllocs emptyAllocSolution psols
+  in return $ annotateSolution sols
+
+tryAlloc _  _ _    (Left []) = fail "No online nodes"
  tryAlloc nl _ inst (Left all_nodes) =
-    let sols = foldl' (\cstate ->
-                           concatAllocs cstate . allocateOnSingle nl inst
-                      ) emptyAllocSolution all_nodes
-    in if null all_nodes
-       then fail "No online nodes"
-       else return $ annotateSolution sols
+  let sols = foldl' (\cstate ->
+                       concatAllocs cstate . allocateOnSingle nl inst
+                    ) emptyAllocSolution all_nodes
+  in return $ annotateSolution sols
  
  -- | Given a group/result, describe it as a nice (list of) messages.
  solutionDescription :: Group.List -> (Gdx, Result AllocSolution) -> [String]
@@ -714,7 +776,7 @@ solutionDescription gl (groupId, result) =
      Bad message -> [printf "Group %s: error %s" gname message]
    where grp = Container.find groupId gl
          gname = Group.name grp
-        pol = apolToString (Group.allocPolicy grp)
+        pol = allocPolicyToRaw (Group.allocPolicy grp)
  
  -- | From a list of possibly bad and possibly empty solutions, filter
  -- only the groups with a valid result. Note that the result will be
@@ -723,23 +785,23 @@ filterMGResults :: Group.List
                  -> [(Gdx, Result AllocSolution)]
                  -> [(Gdx, AllocSolution)]
  filterMGResults gl = foldl' fn []
-    where unallocable = not . Group.isAllocable . flip Container.find gl
-          fn accu (gdx, rasol) =
-              case rasol of
-                Bad _ -> accu
-                Ok sol | null (asSolutions sol) -> accu
-                       | unallocable gdx -> accu
-                       | otherwise -> (gdx, sol):accu
+  where unallocable = not . Group.isAllocable . flip Container.find gl
+        fn accu (gdx, rasol) =
+          case rasol of
+            Bad _ -> accu
+            Ok sol | isNothing (asSolution sol) -> accu
+                   | unallocable gdx -> accu
+                   | otherwise -> (gdx, sol):accu
  
  -- | Sort multigroup results based on policy and score.
  sortMGResults :: Group.List
               -> [(Gdx, AllocSolution)]
               -> [(Gdx, AllocSolution)]
  sortMGResults gl sols =
-    let extractScore (_, _, _, x) = x
-        solScore (gdx, sol) = (Group.allocPolicy (Container.find gdx gl),
-                               (extractScore . head . asSolutions) sol)
-    in sortBy (comparing solScore) sols
+  let extractScore (_, _, _, x) = x
+      solScore (gdx, sol) = (Group.allocPolicy (Container.find gdx gl),
+                             (extractScore . fromJust . asSolution) sol)
+  in sortBy (comparing solScore) sols
  
  -- | Finds the best group for an instance on a multi-group cluster.
  --
@@ -766,9 +828,13 @@ findBestAllocGroup mggl mgnl mgil allowed_gdxs inst cnt =
        goodSols = filterMGResults mggl sols
        sortedSols = sortMGResults mggl goodSols
    in if null sortedSols
-     then Bad $ intercalate ", " all_msgs
-     else let (final_group, final_sol) = head sortedSols
-          in return (final_group, final_sol, all_msgs)
+       then if null groups'
+              then Bad $ "no groups for evacuation: allowed groups was" ++
+                     show allowed_gdxs ++ ", all groups: " ++
+                     show (map fst groups)
+              else Bad $ intercalate ", " all_msgs
+       else let (final_group, final_sol) = head sortedSols
+            in return (final_group, final_sol, all_msgs)
  
  -- | Try to allocate an instance on a multi-group cluster.
  tryMGAlloc :: Group.List           -- ^ The group list
@@ -784,72 +850,35 @@ tryMGAlloc mggl mgnl mgil inst cnt = do
        selmsg = "Selected group: " ++ group_name
    return $ solution { asLog = selmsg:all_msgs }
  
--- | Try to relocate an instance on the cluster.
-tryReloc :: (Monad m) =>
-            Node.List       -- ^ The node list
-         -> Instance.List   -- ^ The instance list
-         -> Idx             -- ^ The index of the instance to move
-         -> Int             -- ^ The number of nodes required
-         -> [Ndx]           -- ^ Nodes which should not be used
-         -> m AllocSolution -- ^ Solution list
-tryReloc nl il xid 1 ex_idx =
-    let all_nodes = getOnline nl
-        inst = Container.find xid il
-        ex_idx' = Instance.pNode inst:ex_idx
-        valid_nodes = filter (not . flip elem ex_idx' . Node.idx) all_nodes
-        valid_idxes = map Node.idx valid_nodes
-        sols1 = foldl' (\cstate x ->
-                            let em = do
-                                  (mnl, i, _, _) <-
-                                      applyMove nl inst (ReplaceSecondary x)
-                                  return (mnl, i, [Container.find x mnl],
-                                          compCV mnl)
-                            in concatAllocs cstate em
-                       ) emptyAllocSolution valid_idxes
-    in return sols1
-
-tryReloc _ _ _ reqn _  = fail $ "Unsupported number of relocation \
-                                \destinations required (" ++ show reqn ++
-                                                  "), only one supported"
-
--- | Change an instance's secondary node.
-evacInstance :: (Monad m) =>
-                [Ndx]                      -- ^ Excluded nodes
-             -> Instance.List              -- ^ The current instance list
-             -> (Node.List, AllocSolution) -- ^ The current state
-             -> Idx                        -- ^ The instance to evacuate
-             -> m (Node.List, AllocSolution)
-evacInstance ex_ndx il (nl, old_as) idx = do
-  -- FIXME: hardcoded one node here
-
-  -- Longer explanation: evacuation is currently hardcoded to DRBD
-  -- instances (which have one secondary); hence, even if the
-  -- IAllocator protocol can request N nodes for an instance, and all
-  -- the message parsing/loading pass this, this implementation only
-  -- supports one; this situation needs to be revisited if we ever
-  -- support more than one secondary, or if we change the storage
-  -- model
-  new_as <- tryReloc nl il idx 1 ex_ndx
-  case asSolutions new_as of
-    -- an individual relocation succeeded, we kind of compose the data
-    -- from the two solutions
-    csol@(nl', _, _, _):_ ->
-        return (nl', new_as { asSolutions = csol:asSolutions old_as })
-    -- this relocation failed, so we fail the entire evac
-    _ -> fail $ "Can't evacuate instance " ++
-         Instance.name (Container.find idx il) ++
-             ": " ++ describeSolution new_as
-
--- | Try to evacuate a list of nodes.
-tryEvac :: (Monad m) =>
-            Node.List       -- ^ The node list
-         -> Instance.List   -- ^ The instance list
-         -> [Idx]           -- ^ Instances to be evacuated
-         -> [Ndx]           -- ^ Restricted nodes (the ones being evacuated)
-         -> m AllocSolution -- ^ Solution list
-tryEvac nl il idxs ex_ndx = do
-  (_, sol) <- foldM (evacInstance ex_ndx il) (nl, emptyAllocSolution) idxs
-  return sol
+-- | Calculate the new instance list after allocation solution.
+updateIl :: Instance.List           -- ^ The original instance list
+         -> Maybe Node.AllocElement -- ^ The result of the allocation attempt
+         -> Instance.List           -- ^ The updated instance list
+updateIl il Nothing = il
+updateIl il (Just (_, xi, _, _)) = Container.add (Container.size il) xi il
+
+-- | Extract the the new node list from the allocation solution.
+extractNl :: Node.List               -- ^ The original node list
+          -> Maybe Node.AllocElement -- ^ The result of the allocation attempt
+          -> Node.List               -- ^ The new node list
+extractNl nl Nothing = nl
+extractNl _ (Just (xnl, _, _, _)) = xnl
+
+-- | Try to allocate a list of instances on a multi-group cluster.
+allocList :: Group.List                  -- ^ The group list
+          -> Node.List                   -- ^ The node list
+          -> Instance.List               -- ^ The instance list
+          -> [(Instance.Instance, Int)]  -- ^ The instance to allocate
+          -> AllocSolutionList           -- ^ Possible solution list
+          -> Result (Node.List, Instance.List,
+                     AllocSolutionList)  -- ^ The final solution list
+allocList _  nl il [] result = Ok (nl, il, result)
+allocList gl nl il ((xi, xicnt):xies) result = do
+  ares <- tryMGAlloc gl nl il xi xicnt
+  let sol = asSolution ares
+      nl' = extractNl nl sol
+      il' = updateIl il sol
+  allocList gl nl' il' xies ((xi, ares):result)
  
  -- | Function which fails if the requested mode is change secondary.
  --
@@ -859,7 +888,7 @@ tryEvac nl il idxs ex_ndx = do
  -- this function, whatever mode we have is just a primary change.
  failOnSecondaryChange :: (Monad m) => EvacMode -> DiskTemplate -> m ()
  failOnSecondaryChange ChangeSecondary dt =
-    fail $ "Instances with disk template '" ++ dtToString dt ++
+  fail $ "Instances with disk template '" ++ diskTemplateToRaw dt ++
           "' can't execute change secondary"
  failOnSecondaryChange _ _ = return ()
  
@@ -877,10 +906,11 @@ nodeEvacInstance :: Node.List         -- ^ The node list (cluster-wide)
                   -> [Ndx]             -- ^ The list of available nodes
                                        -- for allocation
                   -> Result (Node.List, Instance.List, [OpCodes.OpCode])
-nodeEvacInstance _ _ mode (Instance.Instance
-                           {Instance.diskTemplate = dt@DTDiskless}) _ _ =
-                  failOnSecondaryChange mode dt >>
-                  fail "Diskless relocations not implemented yet"
+nodeEvacInstance nl il mode inst@(Instance.Instance
+                                  {Instance.diskTemplate = dt@DTDiskless})
+                 gdx avail_nodes =
+                   failOnSecondaryChange mode dt >>
+                   evacOneNodeOnly nl il inst gdx avail_nodes
  
  nodeEvacInstance _ _ _ (Instance.Instance
                          {Instance.diskTemplate = DTPlain}) _ _ =
@@ -890,15 +920,23 @@ nodeEvacInstance _ _ _ (Instance.Instance
                          {Instance.diskTemplate = DTFile}) _ _ =
                    fail "Instances of type file cannot be relocated"
  
-nodeEvacInstance _ _ mode  (Instance.Instance
-                            {Instance.diskTemplate = dt@DTSharedFile}) _ _ =
-                  failOnSecondaryChange mode dt >>
-                  fail "Shared file relocations not implemented yet"
+nodeEvacInstance nl il mode inst@(Instance.Instance
+                                  {Instance.diskTemplate = dt@DTSharedFile})
+                 gdx avail_nodes =
+                   failOnSecondaryChange mode dt >>
+                   evacOneNodeOnly nl il inst gdx avail_nodes
  
-nodeEvacInstance _ _ mode (Instance.Instance
-                           {Instance.diskTemplate = dt@DTBlock}) _ _ =
-                  failOnSecondaryChange mode dt >>
-                  fail "Block device relocations not implemented yet"
+nodeEvacInstance nl il mode inst@(Instance.Instance
+                                  {Instance.diskTemplate = dt@DTBlock})
+                 gdx avail_nodes =
+                   failOnSecondaryChange mode dt >>
+                   evacOneNodeOnly nl il inst gdx avail_nodes
+
+nodeEvacInstance nl il mode inst@(Instance.Instance
+                                  {Instance.diskTemplate = dt@DTRbd})
+                 gdx avail_nodes =
+                   failOnSecondaryChange mode dt >>
+                   evacOneNodeOnly nl il inst gdx avail_nodes
  
  nodeEvacInstance nl il ChangePrimary
                   inst@(Instance.Instance {Instance.diskTemplate = DTDrbd8})
@@ -913,96 +951,170 @@ nodeEvacInstance nl il ChangePrimary
  nodeEvacInstance nl il ChangeSecondary
                   inst@(Instance.Instance {Instance.diskTemplate = DTDrbd8})
                   gdx avail_nodes =
-  do
-    (nl', inst', _, ndx) <- annotateResult "Can't find any good node" $
-                            eitherToResult $
-                            foldl' (evacDrbdSecondaryInner nl inst gdx)
-                            (Left "no nodes available") avail_nodes
-    let idx = Instance.idx inst
-        il' = Container.add idx inst' il
-        ops = iMoveToJob nl' il' idx (ReplaceSecondary ndx)
-    return (nl', il', ops)
+  evacOneNodeOnly nl il inst gdx avail_nodes
  
+-- The algorithm for ChangeAll is as follows:
+--
+-- * generate all (primary, secondary) node pairs for the target groups
+-- * for each pair, execute the needed moves (r:s, f, r:s) and compute
+--   the final node list state and group score
+-- * select the best choice via a foldl that uses the same Either
+--   String solution as the ChangeSecondary mode
  nodeEvacInstance nl il ChangeAll
                   inst@(Instance.Instance {Instance.diskTemplate = DTDrbd8})
                   gdx avail_nodes =
    do
-    let primary = Container.find (Instance.pNode inst) nl
-        idx = Instance.idx inst
-        no_nodes = Left "no nodes available"
-    -- if the primary is offline, then we first failover
-    (nl1, inst1, ops1) <-
-        if Node.offline primary
-        then do
-          (nl', inst', _, _) <-
-              annotateResult "Failing over to the secondary" $
-              opToResult $ applyMove nl inst Failover
-          return (nl', inst', [Failover])
-        else return (nl, inst, [])
-    -- we now need to execute a replace secondary to the future
-    -- primary node
-    (nl2, inst2, _, new_pdx) <- annotateResult "Searching for a new primary" $
-                                eitherToResult $
-                                foldl' (evacDrbdSecondaryInner nl1 inst1 gdx)
-                                no_nodes avail_nodes
-    let ops2 = ReplaceSecondary new_pdx:ops1
-    -- since we chose the new primary, we remove it from the list of
-    -- available nodes
-    let avail_nodes_sec = new_pdx `delete` avail_nodes
-    -- we now execute another failover, the primary stays fixed now
-    (nl3, inst3, _, _) <- annotateResult "Failing over to new primary" $
-                          opToResult $ applyMove nl2 inst2 Failover
-    let ops3 = Failover:ops2
-    -- and finally another replace secondary, to the final secondary
-    (nl4, inst4, _, new_sdx) <-
-        annotateResult "Searching for a new secondary" $
+    let no_nodes = Left "no nodes available"
+        node_pairs = [(p,s) | p <- avail_nodes, s <- avail_nodes, p /= s]
+    (nl', il', ops, _) <-
+        annotateResult "Can't find any good nodes for relocation" .
          eitherToResult $
-        foldl' (evacDrbdSecondaryInner nl3 inst3 gdx) no_nodes avail_nodes_sec
-    let ops4 = ReplaceSecondary new_sdx:ops3
-        il' = Container.add idx inst4 il
-        ops = concatMap (iMoveToJob nl4 il' idx) $ reverse ops4
-    return (nl4, il', ops)
+        foldl'
+        (\accu nodes -> case evacDrbdAllInner nl il inst gdx nodes of
+                          Bad msg ->
+                              case accu of
+                                Right _ -> accu
+                                -- we don't need more details (which
+                                -- nodes, etc.) as we only selected
+                                -- this group if we can allocate on
+                                -- it, hence failures will not
+                                -- propagate out of this fold loop
+                                Left _ -> Left $ "Allocation failed: " ++ msg
+                          Ok result@(_, _, _, new_cv) ->
+                              let new_accu = Right result in
+                              case accu of
+                                Left _ -> new_accu
+                                Right (_, _, _, old_cv) ->
+                                    if old_cv < new_cv
+                                    then accu
+                                    else new_accu
+        ) no_nodes node_pairs
  
--- | Inner fold function for changing secondary of a DRBD instance.
+    return (nl', il', ops)
+
+-- | Generic function for changing one node of an instance.
+--
+-- This is similar to 'nodeEvacInstance' but will be used in a few of
+-- its sub-patterns. It folds the inner function 'evacOneNodeInner'
+-- over the list of available nodes, which results in the best choice
+-- for relocation.
+evacOneNodeOnly :: Node.List         -- ^ The node list (cluster-wide)
+                -> Instance.List     -- ^ Instance list (cluster-wide)
+                -> Instance.Instance -- ^ The instance to be evacuated
+                -> Gdx               -- ^ The group we're targetting
+                -> [Ndx]             -- ^ The list of available nodes
+                                      -- for allocation
+                -> Result (Node.List, Instance.List, [OpCodes.OpCode])
+evacOneNodeOnly nl il inst gdx avail_nodes = do
+  op_fn <- case Instance.mirrorType inst of
+             MirrorNone -> Bad "Can't relocate/evacuate non-mirrored instances"
+             MirrorInternal -> Ok ReplaceSecondary
+             MirrorExternal -> Ok FailoverToAny
+  (nl', inst', _, ndx) <- annotateResult "Can't find any good node" .
+                          eitherToResult $
+                          foldl' (evacOneNodeInner nl inst gdx op_fn)
+                          (Left "no nodes available") avail_nodes
+  let idx = Instance.idx inst
+      il' = Container.add idx inst' il
+      ops = iMoveToJob nl' il' idx (op_fn ndx)
+  return (nl', il', ops)
+
+-- | Inner fold function for changing one node of an instance.
+--
+-- Depending on the instance disk template, this will either change
+-- the secondary (for DRBD) or the primary node (for shared
+-- storage). However, the operation is generic otherwise.
  --
--- The "running" solution is either a @Left String@, which means we
+-- The running solution is either a @Left String@, which means we
  -- don't have yet a working solution, or a @Right (...)@, which
  -- represents a valid solution; it holds the modified node list, the
  -- modified instance (after evacuation), the score of that solution,
  -- and the new secondary node index.
-evacDrbdSecondaryInner :: Node.List -- ^ Cluster node list
-                       -> Instance.Instance -- ^ Instance being evacuated
-                       -> Gdx -- ^ The group index of the instance
-                       -> Either String ( Node.List
-                                        , Instance.Instance
-                                        , Score
-                                        , Ndx)  -- ^ Current best solution
-                       -> Ndx  -- ^ Node we're evaluating as new secondary
-                       -> Either String ( Node.List
-                                        , Instance.Instance
-                                        , Score
-                                        , Ndx) -- ^ New best solution
-evacDrbdSecondaryInner nl inst gdx accu ndx =
-    case applyMove nl inst (ReplaceSecondary ndx) of
-      OpFail fm ->
-          case accu of
-            Right _ -> accu
-            Left _ -> Left $ "Node " ++ Container.nameOf nl ndx ++
-                      " failed: " ++ show fm
-      OpGood (nl', inst', _, _) ->
-          let nodes = Container.elems nl'
-              -- The fromJust below is ugly (it can fail nastily), but
-              -- at this point we should have any internal mismatches,
-              -- and adding a monad here would be quite involved
-              grpnodes = fromJust (gdx `lookup` (Node.computeGroups nodes))
-              new_cv = compCVNodes grpnodes
-              new_accu = Right (nl', inst', new_cv, ndx)
-          in case accu of
-               Left _ -> new_accu
-               Right (_, _, old_cv, _) ->
-                   if old_cv < new_cv
-                   then accu
-                   else new_accu
+evacOneNodeInner :: Node.List         -- ^ Cluster node list
+                 -> Instance.Instance -- ^ Instance being evacuated
+                 -> Gdx               -- ^ The group index of the instance
+                 -> (Ndx -> IMove)    -- ^ Operation constructor
+                 -> EvacInnerState    -- ^ Current best solution
+                 -> Ndx               -- ^ Node we're evaluating as target
+                 -> EvacInnerState    -- ^ New best solution
+evacOneNodeInner nl inst gdx op_fn accu ndx =
+  case applyMove nl inst (op_fn ndx) of
+    Bad fm -> let fail_msg = "Node " ++ Container.nameOf nl ndx ++
+                             " failed: " ++ show fm
+              in either (const $ Left fail_msg) (const accu) accu
+    Ok (nl', inst', _, _) ->
+      let nodes = Container.elems nl'
+          -- The fromJust below is ugly (it can fail nastily), but
+          -- at this point we should have any internal mismatches,
+          -- and adding a monad here would be quite involved
+          grpnodes = fromJust (gdx `lookup` Node.computeGroups nodes)
+          new_cv = compCVNodes grpnodes
+          new_accu = Right (nl', inst', new_cv, ndx)
+      in case accu of
+           Left _ -> new_accu
+           Right (_, _, old_cv, _) ->
+             if old_cv < new_cv
+               then accu
+               else new_accu
+
+-- | Compute result of changing all nodes of a DRBD instance.
+--
+-- Given the target primary and secondary node (which might be in a
+-- different group or not), this function will 'execute' all the
+-- required steps and assuming all operations succceed, will return
+-- the modified node and instance lists, the opcodes needed for this
+-- and the new group score.
+evacDrbdAllInner :: Node.List         -- ^ Cluster node list
+                 -> Instance.List     -- ^ Cluster instance list
+                 -> Instance.Instance -- ^ The instance to be moved
+                 -> Gdx               -- ^ The target group index
+                                      -- (which can differ from the
+                                      -- current group of the
+                                      -- instance)
+                 -> (Ndx, Ndx)        -- ^ Tuple of new
+                                      -- primary\/secondary nodes
+                 -> Result (Node.List, Instance.List, [OpCodes.OpCode], Score)
+evacDrbdAllInner nl il inst gdx (t_pdx, t_sdx) = do
+  let primary = Container.find (Instance.pNode inst) nl
+      idx = Instance.idx inst
+  -- if the primary is offline, then we first failover
+  (nl1, inst1, ops1) <-
+    if Node.offline primary
+      then do
+        (nl', inst', _, _) <-
+          annotateResult "Failing over to the secondary" .
+          opToResult $ applyMove nl inst Failover
+        return (nl', inst', [Failover])
+      else return (nl, inst, [])
+  let (o1, o2, o3) = (ReplaceSecondary t_pdx,
+                      Failover,
+                      ReplaceSecondary t_sdx)
+  -- we now need to execute a replace secondary to the future
+  -- primary node
+  (nl2, inst2, _, _) <-
+    annotateResult "Changing secondary to new primary" .
+    opToResult $
+    applyMove nl1 inst1 o1
+  let ops2 = o1:ops1
+  -- we now execute another failover, the primary stays fixed now
+  (nl3, inst3, _, _) <- annotateResult "Failing over to new primary" .
+                        opToResult $ applyMove nl2 inst2 o2
+  let ops3 = o2:ops2
+  -- and finally another replace secondary, to the final secondary
+  (nl4, inst4, _, _) <-
+    annotateResult "Changing secondary to final secondary" .
+    opToResult $
+    applyMove nl3 inst3 o3
+  let ops4 = o3:ops3
+      il' = Container.add idx inst4 il
+      ops = concatMap (iMoveToJob nl4 il' idx) $ reverse ops4
+  let nodes = Container.elems nl4
+      -- The fromJust below is ugly (it can fail nastily), but
+      -- at this point we should have any internal mismatches,
+      -- and adding a monad here would be quite involved
+      grpnodes = fromJust (gdx `lookup` Node.computeGroups nodes)
+      new_cv = compCVNodes grpnodes
+  return (nl4, il', ops, new_cv)
  
  -- | Computes the nodes in a given group which are available for
  -- allocation.
@@ -1024,14 +1136,14 @@ updateEvacSolution :: (Node.List, Instance.List, EvacSolution)
                     -> Result (Node.List, Instance.List, [OpCodes.OpCode])
                     -> (Node.List, Instance.List, EvacSolution)
  updateEvacSolution (nl, il, es) idx (Bad msg) =
-    (nl, il, es { esFailed = (idx, msg):esFailed es})
+  (nl, il, es { esFailed = (idx, msg):esFailed es})
  updateEvacSolution (_, _, es) idx (Ok (nl, il, opcodes)) =
-    (nl, il, es { esMoved = new_elem:esMoved es
-                , esOpCodes = [opcodes]:esOpCodes es })
-     where inst = Container.find idx il
-           new_elem = (idx,
-                       instancePriGroup nl inst,
-                       Instance.allNodes inst)
+  (nl, il, es { esMoved = new_elem:esMoved es
+              , esOpCodes = opcodes:esOpCodes es })
+    where inst = Container.find idx il
+          new_elem = (idx,
+                      instancePriGroup nl inst,
+                      Instance.allNodes inst)
  
  -- | Node-evacuation IAllocator mode main function.
  tryNodeEvac :: Group.List    -- ^ The cluster groups
@@ -1041,23 +1153,24 @@ tryNodeEvac :: Group.List    -- ^ The cluster groups
              -> [Idx]         -- ^ List of instance (indices) to be evacuated
              -> Result (Node.List, Instance.List, EvacSolution)
  tryNodeEvac _ ini_nl ini_il mode idxs =
-    let evac_ndx = nodesToEvacuate ini_il mode idxs
-        offline = map Node.idx . filter Node.offline $ Container.elems ini_nl
-        excl_ndx = foldl' (flip IntSet.insert) evac_ndx offline
-        group_ndx = map (\(gdx, (nl, _)) -> (gdx, map Node.idx
-                                             (Container.elems nl))) $
-                      splitCluster ini_nl ini_il
-        (fin_nl, fin_il, esol) =
-            foldl' (\state@(nl, il, _) inst ->
-                        let gdx = instancePriGroup nl inst in
-                        updateEvacSolution state (Instance.idx inst) $
-                        availableGroupNodes group_ndx
-                          excl_ndx gdx >>=
-                        nodeEvacInstance nl il mode inst gdx
-                   )
-            (ini_nl, ini_il, emptyEvacSolution)
-            (map (`Container.find` ini_il) idxs)
-    in return (fin_nl, fin_il, reverseEvacSolution esol)
+  let evac_ndx = nodesToEvacuate ini_il mode idxs
+      offline = map Node.idx . filter Node.offline $ Container.elems ini_nl
+      excl_ndx = foldl' (flip IntSet.insert) evac_ndx offline
+      group_ndx = map (\(gdx, (nl, _)) -> (gdx, map Node.idx
+                                           (Container.elems nl))) $
+                  splitCluster ini_nl ini_il
+      (fin_nl, fin_il, esol) =
+        foldl' (\state@(nl, il, _) inst ->
+                  let gdx = instancePriGroup nl inst
+                      pdx = Instance.pNode inst in
+                  updateEvacSolution state (Instance.idx inst) $
+                  availableGroupNodes group_ndx
+                    (IntSet.insert pdx excl_ndx) gdx >>=
+                      nodeEvacInstance nl il mode inst gdx
+               )
+        (ini_nl, ini_il, emptyEvacSolution)
+        (map (`Container.find` ini_il) idxs)
+  in return (fin_nl, fin_il, reverseEvacSolution esol)
  
  -- | Change-group IAllocator mode main function.
  --
@@ -1086,84 +1199,74 @@ tryChangeGroup :: Group.List    -- ^ The cluster groups
                 -> [Idx]         -- ^ List of instance (indices) to be evacuated
                 -> Result (Node.List, Instance.List, EvacSolution)
  tryChangeGroup gl ini_nl ini_il gdxs idxs =
-    let evac_gdxs = nub $ map (instancePriGroup ini_nl .
-                               flip Container.find ini_il) idxs
-        target_gdxs = (if null gdxs
+  let evac_gdxs = nub $ map (instancePriGroup ini_nl .
+                             flip Container.find ini_il) idxs
+      target_gdxs = (if null gdxs
                         then Container.keys gl
                         else gdxs) \\ evac_gdxs
-        offline = map Node.idx . filter Node.offline $ Container.elems ini_nl
-        excl_ndx = foldl' (flip IntSet.insert) IntSet.empty offline
-        group_ndx = map (\(gdx, (nl, _)) -> (gdx, map Node.idx
-                                             (Container.elems nl))) $
-                      splitCluster ini_nl ini_il
-        (fin_nl, fin_il, esol) =
-            foldl' (\state@(nl, il, _) inst ->
-                        let solution = do
-                              let ncnt = Instance.requiredNodes $
-                                         Instance.diskTemplate inst
-                              (gdx, _, _) <- findBestAllocGroup gl nl il
-                                             (Just target_gdxs) inst ncnt
-                              av_nodes <- availableGroupNodes group_ndx
-                                          excl_ndx gdx
-                              nodeEvacInstance nl il ChangeAll inst
-                                       gdx av_nodes
-                        in updateEvacSolution state
-                               (Instance.idx inst) solution
-                   )
-            (ini_nl, ini_il, emptyEvacSolution)
-            (map (`Container.find` ini_il) idxs)
-    in return (fin_nl, fin_il, reverseEvacSolution esol)
-
--- | Recursively place instances on the cluster until we're out of space.
-iterateAlloc :: Node.List
-             -> Instance.List
-             -> Maybe Int
-             -> Instance.Instance
-             -> AllocNodes
-             -> [Instance.Instance]
-             -> [CStats]
-             -> Result AllocResult
+      offline = map Node.idx . filter Node.offline $ Container.elems ini_nl
+      excl_ndx = foldl' (flip IntSet.insert) IntSet.empty offline
+      group_ndx = map (\(gdx, (nl, _)) -> (gdx, map Node.idx
+                                           (Container.elems nl))) $
+                  splitCluster ini_nl ini_il
+      (fin_nl, fin_il, esol) =
+        foldl' (\state@(nl, il, _) inst ->
+                  let solution = do
+                        let ncnt = Instance.requiredNodes $
+                                   Instance.diskTemplate inst
+                        (gdx, _, _) <- findBestAllocGroup gl nl il
+                                       (Just target_gdxs) inst ncnt
+                        av_nodes <- availableGroupNodes group_ndx
+                                    excl_ndx gdx
+                        nodeEvacInstance nl il ChangeAll inst gdx av_nodes
+                  in updateEvacSolution state (Instance.idx inst) solution
+               )
+        (ini_nl, ini_il, emptyEvacSolution)
+        (map (`Container.find` ini_il) idxs)
+  in return (fin_nl, fin_il, reverseEvacSolution esol)
+
+-- | Standard-sized allocation method.
+--
+-- This places instances of the same size on the cluster until we're
+-- out of space. The result will be a list of identically-sized
+-- instances.
+iterateAlloc :: AllocMethod
  iterateAlloc nl il limit newinst allocnodes ixes cstats =
-      let depth = length ixes
-          newname = printf "new-%d" depth::String
-          newidx = length (Container.elems il) + depth
-          newi2 = Instance.setIdx (Instance.setName newinst newname) newidx
-          newlimit = fmap (flip (-) 1) limit
-      in case tryAlloc nl il newi2 allocnodes of
-           Bad s -> Bad s
-           Ok (AllocSolution { asFailures = errs, asSolutions = sols3 }) ->
-               let newsol = Ok (collapseFailures errs, nl, il, ixes, cstats) in
-               case sols3 of
-                 [] -> newsol
-                 (xnl, xi, _, _):[] ->
-                     if limit == Just 0
-                     then newsol
-                     else iterateAlloc xnl (Container.add newidx xi il)
-                          newlimit newinst allocnodes (xi:ixes)
-                          (totalResources xnl:cstats)
-                 _ -> Bad "Internal error: multiple solutions for single\
-                          \ allocation"
-
--- | The core of the tiered allocation mode.
-tieredAlloc :: Node.List
-            -> Instance.List
-            -> Maybe Int
-            -> Instance.Instance
-            -> AllocNodes
-            -> [Instance.Instance]
-            -> [CStats]
-            -> Result AllocResult
+  let depth = length ixes
+      newname = printf "new-%d" depth::String
+      newidx = Container.size il
+      newi2 = Instance.setIdx (Instance.setName newinst newname) newidx
+      newlimit = fmap (flip (-) 1) limit
+  in case tryAlloc nl il newi2 allocnodes of
+       Bad s -> Bad s
+       Ok (AllocSolution { asFailures = errs, asSolution = sols3 }) ->
+         let newsol = Ok (collapseFailures errs, nl, il, ixes, cstats) in
+         case sols3 of
+           Nothing -> newsol
+           Just (xnl, xi, _, _) ->
+             if limit == Just 0
+               then newsol
+               else iterateAlloc xnl (Container.add newidx xi il)
+                      newlimit newinst allocnodes (xi:ixes)
+                      (totalResources xnl:cstats)
+
+-- | Tiered allocation method.
+--
+-- This places instances on the cluster, and decreases the spec until
+-- we can allocate again. The result will be a list of decreasing
+-- instance specs.
+tieredAlloc :: AllocMethod
  tieredAlloc nl il limit newinst allocnodes ixes cstats =
-    case iterateAlloc nl il limit newinst allocnodes ixes cstats of
-      Bad s -> Bad s
-      Ok (errs, nl', il', ixes', cstats') ->
-          let newsol = Ok (errs, nl', il', ixes', cstats')
-              ixes_cnt = length ixes'
-              (stop, newlimit) = case limit of
-                                   Nothing -> (False, Nothing)
-                                   Just n -> (n <= ixes_cnt,
-                                              Just (n - ixes_cnt)) in
-          if stop then newsol else
+  case iterateAlloc nl il limit newinst allocnodes ixes cstats of
+    Bad s -> Bad s
+    Ok (errs, nl', il', ixes', cstats') ->
+      let newsol = Ok (errs, nl', il', ixes', cstats')
+          ixes_cnt = length ixes'
+          (stop, newlimit) = case limit of
+                               Nothing -> (False, Nothing)
+                               Just n -> (n <= ixes_cnt,
+                                            Just (n - ixes_cnt)) in
+      if stop then newsol else
            case Instance.shrinkByType newinst . fst . last $
                 sortBy (comparing snd) errs of
              Bad _ -> newsol
@@ -1184,15 +1287,17 @@ computeMoves :: Instance.Instance -- ^ The instance to be moved
                  -- secondary, while the command list holds gnt-instance
                  -- commands (without that prefix), e.g \"@failover instance1@\"
  computeMoves i inam mv c d =
-    case mv of
-      Failover -> ("f", [mig])
-      FailoverAndReplace _ -> (printf "f r:%s" d, [mig, rep d])
-      ReplaceSecondary _ -> (printf "r:%s" d, [rep d])
-      ReplaceAndFailover _ -> (printf "r:%s f" c, [rep c, mig])
-      ReplacePrimary _ -> (printf "f r:%s f" c, [mig, rep c, mig])
-    where morf = if Instance.running i then "migrate" else "failover"
-          mig = printf "%s -f %s" morf inam::String
-          rep n = printf "replace-disks -n %s %s" n inam
+  case mv of
+    Failover -> ("f", [mig])
+    FailoverToAny _ -> (printf "fa:%s" c, [mig_any])
+    FailoverAndReplace _ -> (printf "f r:%s" d, [mig, rep d])
+    ReplaceSecondary _ -> (printf "r:%s" d, [rep d])
+    ReplaceAndFailover _ -> (printf "r:%s f" c, [rep c, mig])
+    ReplacePrimary _ -> (printf "f r:%s f" c, [mig, rep c, mig])
+  where morf = if Instance.isRunning i then "migrate" else "failover"
+        mig = printf "%s -f %s" morf inam::String
+        mig_any = printf "%s -f -n %s %s" morf c inam::String
+        rep n = printf "replace-disks -n %s %s" n inam::String
  
  -- | Converts a placement to string format.
  printSolutionLine :: Node.List     -- ^ The node list
@@ -1204,40 +1309,52 @@ printSolutionLine :: Node.List     -- ^ The node list
                                     -- the solution
                    -> (String, [String])
  printSolutionLine nl il nmlen imlen plc pos =
-    let
-        pmlen = (2*nmlen + 1)
-        (i, p, s, mv, c) = plc
-        inst = Container.find i il
-        inam = Instance.alias inst
-        npri = Node.alias $ Container.find p nl
-        nsec = Node.alias $ Container.find s nl
-        opri = Node.alias $ Container.find (Instance.pNode inst) nl
-        osec = Node.alias $ Container.find (Instance.sNode inst) nl
-        (moves, cmds) =  computeMoves inst inam mv npri nsec
-        ostr = printf "%s:%s" opri osec::String
-        nstr = printf "%s:%s" npri nsec::String
-    in
-      (printf "  %3d. %-*s %-*s => %-*s %.8f a=%s"
-       pos imlen inam pmlen ostr
-       pmlen nstr c moves,
-       cmds)
+  let pmlen = (2*nmlen + 1)
+      (i, p, s, mv, c) = plc
+      old_sec = Instance.sNode inst
+      inst = Container.find i il
+      inam = Instance.alias inst
+      npri = Node.alias $ Container.find p nl
+      nsec = Node.alias $ Container.find s nl
+      opri = Node.alias $ Container.find (Instance.pNode inst) nl
+      osec = Node.alias $ Container.find old_sec nl
+      (moves, cmds) =  computeMoves inst inam mv npri nsec
+      -- FIXME: this should check instead/also the disk template
+      ostr = if old_sec == Node.noSecondary
+               then printf "%s" opri::String
+               else printf "%s:%s" opri osec::String
+      nstr = if s == Node.noSecondary
+               then printf "%s" npri::String
+               else printf "%s:%s" npri nsec::String
+  in (printf "  %3d. %-*s %-*s => %-*s %12.8f a=%s"
+      pos imlen inam pmlen ostr pmlen nstr c moves,
+      cmds)
  
  -- | Return the instance and involved nodes in an instance move.
-involvedNodes :: Instance.List -> Placement -> [Ndx]
+--
+-- Note that the output list length can vary, and is not required nor
+-- guaranteed to be of any specific length.
+involvedNodes :: Instance.List -- ^ Instance list, used for retrieving
+                               -- the instance from its index; note
+                               -- that this /must/ be the original
+                               -- instance list, so that we can
+                               -- retrieve the old nodes
+              -> Placement     -- ^ The placement we're investigating,
+                               -- containing the new nodes and
+                               -- instance index
+              -> [Ndx]         -- ^ Resulting list of node indices
  involvedNodes il plc =
-    let (i, np, ns, _, _) = plc
-        inst = Container.find i il
-        op = Instance.pNode inst
-        os = Instance.sNode inst
-    in nub [np, ns, op, os]
+  let (i, np, ns, _, _) = plc
+      inst = Container.find i il
+  in nub $ [np, ns] ++ Instance.allNodes inst
  
  -- | Inner function for splitJobs, that either appends the next job to
  -- the current jobset, or starts a new jobset.
  mergeJobs :: ([JobSet], [Ndx]) -> MoveJob -> ([JobSet], [Ndx])
  mergeJobs ([], _) n@(ndx, _, _, _) = ([[n]], ndx)
  mergeJobs (cjs@(j:js), nbuf) n@(ndx, _, _, _)
-    | null (ndx `intersect` nbuf) = ((n:j):js, ndx ++ nbuf)
-    | otherwise = ([n]:cjs, ndx)
+  | null (ndx `intersect` nbuf) = ((n:j):js, ndx ++ nbuf)
+  | otherwise = ([n]:cjs, ndx)
  
  -- | Break a list of moves into independent groups. Note that this
  -- will reverse the order of jobs.
@@ -1248,11 +1365,11 @@ splitJobs = fst . foldl mergeJobs ([], [])
  -- also beautify the display a little.
  formatJob :: Int -> Int -> (Int, MoveJob) -> [String]
  formatJob jsn jsl (sn, (_, _, _, cmds)) =
-    let out =
-            printf "  echo job %d/%d" jsn sn:
-            printf "  check":
-            map ("  gnt-instance " ++) cmds
-    in if sn == 1
+  let out =
+        printf "  echo job %d/%d" jsn sn:
+        printf "  check":
+        map ("  gnt-instance " ++) cmds
+  in if sn == 1
         then ["", printf "echo jobset %d, %d jobs" jsn jsl] ++ out
         else out
  
@@ -1260,59 +1377,61 @@ formatJob jsn jsl (sn, (_, _, _, cmds)) =
  -- also beautify the display a little.
  formatCmds :: [JobSet] -> String
  formatCmds =
-    unlines .
-    concatMap (\(jsn, js) -> concatMap (formatJob jsn (length js))
-                             (zip [1..] js)) .
-    zip [1..]
+  unlines .
+  concatMap (\(jsn, js) -> concatMap (formatJob jsn (length js))
+                           (zip [1..] js)) .
+  zip [1..]
  
  -- | Print the node list.
  printNodes :: Node.List -> [String] -> String
  printNodes nl fs =
-    let fields = case fs of
-          [] -> Node.defaultFields
-          "+":rest -> Node.defaultFields ++ rest
-          _ -> fs
-        snl = sortBy (comparing Node.idx) (Container.elems nl)
-        (header, isnum) = unzip $ map Node.showHeader fields
-    in unlines . map ((:) ' ' .  intercalate " ") $
-       formatTable (header:map (Node.list fields) snl) isnum
+  let fields = case fs of
+                 [] -> Node.defaultFields
+                 "+":rest -> Node.defaultFields ++ rest
+                 _ -> fs
+      snl = sortBy (comparing Node.idx) (Container.elems nl)
+      (header, isnum) = unzip $ map Node.showHeader fields
+  in printTable "" header (map (Node.list fields) snl) isnum
  
  -- | Print the instance list.
  printInsts :: Node.List -> Instance.List -> String
  printInsts nl il =
-    let sil = sortBy (comparing Instance.idx) (Container.elems il)
-        helper inst = [ if Instance.running inst then "R" else " "
-                      , Instance.name inst
-                      , Container.nameOf nl (Instance.pNode inst)
-                      , let sdx = Instance.sNode inst
-                        in if sdx == Node.noSecondary
+  let sil = sortBy (comparing Instance.idx) (Container.elems il)
+      helper inst = [ if Instance.isRunning inst then "R" else " "
+                    , Instance.name inst
+                    , Container.nameOf nl (Instance.pNode inst)
+                    , let sdx = Instance.sNode inst
+                      in if sdx == Node.noSecondary
                             then  ""
                             else Container.nameOf nl sdx
-                      , if Instance.autoBalance inst then "Y" else "N"
-                      , printf "%3d" $ Instance.vcpus inst
-                      , printf "%5d" $ Instance.mem inst
-                      , printf "%5d" $ Instance.dsk inst `div` 1024
-                      , printf "%5.3f" lC
-                      , printf "%5.3f" lM
-                      , printf "%5.3f" lD
-                      , printf "%5.3f" lN
-                      ]
-            where DynUtil lC lM lD lN = Instance.util inst
-        header = [ "F", "Name", "Pri_node", "Sec_node", "Auto_bal"
-                 , "vcpu", "mem" , "dsk", "lCpu", "lMem", "lDsk", "lNet" ]
-        isnum = False:False:False:False:False:repeat True
-    in unlines . map ((:) ' ' . intercalate " ") $
-       formatTable (header:map helper sil) isnum
+                    , if Instance.autoBalance inst then "Y" else "N"
+                    , printf "%3d" $ Instance.vcpus inst
+                    , printf "%5d" $ Instance.mem inst
+                    , printf "%5d" $ Instance.dsk inst `div` 1024
+                    , printf "%5.3f" lC
+                    , printf "%5.3f" lM
+                    , printf "%5.3f" lD
+                    , printf "%5.3f" lN
+                    ]
+          where DynUtil lC lM lD lN = Instance.util inst
+      header = [ "F", "Name", "Pri_node", "Sec_node", "Auto_bal"
+               , "vcpu", "mem" , "dsk", "lCpu", "lMem", "lDsk", "lNet" ]
+      isnum = False:False:False:False:False:repeat True
+  in printTable "" header (map helper sil) isnum
  
  -- | Shows statistics for a given node list.
-printStats :: Node.List -> String
-printStats nl =
-    let dcvs = compDetailedCV $ Container.elems nl
-        (weights, names) = unzip detailedCVInfo
-        hd = zip3 (weights ++ repeat 1) (names ++ repeat "unknown") dcvs
-        formatted = map (\(w, header, val) ->
-                             printf "%s=%.8f(x%.2f)" header val w::String) hd
-    in intercalate ", " formatted
+printStats :: String -> Node.List -> String
+printStats lp nl =
+  let dcvs = compDetailedCV $ Container.elems nl
+      (weights, names) = unzip detailedCVInfo
+      hd = zip3 (weights ++ repeat 1) (names ++ repeat "unknown") dcvs
+      header = [ "Field", "Value", "Weight" ]
+      formatted = map (\(w, h, val) ->
+                         [ h
+                         , printf "%.8f" val
+                         , printf "x%.2f" w
+                         ]) hd
+  in printTable lp header formatted $ False:repeat True
  
  -- | Convert a placement into a list of OpCodes (basically a job).
  iMoveToJob :: Node.List        -- ^ The node list; only used for node
@@ -1326,18 +1445,20 @@ iMoveToJob :: Node.List        -- ^ The node list; only used for node
             -> [OpCodes.OpCode] -- ^ The list of opcodes equivalent to
                                 -- the given move
  iMoveToJob nl il idx move =
-    let inst = Container.find idx il
-        iname = Instance.name inst
-        lookNode  = Just . Container.nameOf nl
-        opF = OpCodes.OpInstanceMigrate iname True False True
-        opR n = OpCodes.OpInstanceReplaceDisks iname (lookNode n)
-                OpCodes.ReplaceNewSecondary [] Nothing
-    in case move of
-         Failover -> [ opF ]
-         ReplacePrimary np -> [ opF, opR np, opF ]
-         ReplaceSecondary ns -> [ opR ns ]
-         ReplaceAndFailover np -> [ opR np, opF ]
-         FailoverAndReplace ns -> [ opF, opR ns ]
+  let inst = Container.find idx il
+      iname = Instance.name inst
+      lookNode  = Just . Container.nameOf nl
+      opF = OpCodes.OpInstanceMigrate iname True False True Nothing
+      opFA n = OpCodes.OpInstanceMigrate iname True False True (lookNode n)
+      opR n = OpCodes.OpInstanceReplaceDisks iname (lookNode n)
+              OpCodes.ReplaceNewSecondary [] Nothing
+  in case move of
+       Failover -> [ opF ]
+       FailoverToAny np -> [ opFA np ]
+       ReplacePrimary np -> [ opF, opR np, opF ]
+       ReplaceSecondary ns -> [ opR ns ]
+       ReplaceAndFailover np -> [ opR np, opF ]
+       FailoverAndReplace ns -> [ opF, opR ns ]
  
  -- * Node group functions
  
@@ -1352,9 +1473,9 @@ instanceGroup nl i =
        pgroup = Node.group pnode
        sgroup = Node.group snode
    in if pgroup /= sgroup
-     then fail ("Instance placed accross two node groups, primary " ++
-                show pgroup ++ ", secondary " ++ show sgroup)
-     else return pgroup
+       then fail ("Instance placed accross two node groups, primary " ++
+                  show pgroup ++ ", secondary " ++ show sgroup)
+       else return pgroup
  
  -- | Computes the group of an instance per the primary node.
  instancePriGroup :: Node.List -> Instance.Instance -> Gdx
@@ -1386,17 +1507,17 @@ nodesToEvacuate :: Instance.List -- ^ The cluster-wide instance list
                  -> [Idx]         -- ^ List of instance indices being evacuated
                  -> IntSet.IntSet -- ^ Set of node indices
  nodesToEvacuate il mode =
-    IntSet.delete Node.noSecondary .
-    foldl' (\ns idx ->
-                let i = Container.find idx il
-                    pdx = Instance.pNode i
-                    sdx = Instance.sNode i
-                    dt = Instance.diskTemplate i
-                    withSecondary = case dt of
-                                      DTDrbd8 -> IntSet.insert sdx ns
-                                      _ -> ns
-                in case mode of
-                     ChangePrimary   -> IntSet.insert pdx ns
-                     ChangeSecondary -> withSecondary
-                     ChangeAll       -> IntSet.insert pdx withSecondary
-           ) IntSet.empty
+  IntSet.delete Node.noSecondary .
+  foldl' (\ns idx ->
+            let i = Container.find idx il
+                pdx = Instance.pNode i
+                sdx = Instance.sNode i
+                dt = Instance.diskTemplate i
+                withSecondary = case dt of
+                                  DTDrbd8 -> IntSet.insert sdx ns
+                                  _ -> ns
+            in case mode of
+                 ChangePrimary   -> IntSet.insert pdx ns
+                 ChangeSecondary -> withSecondary
+                 ChangeAll       -> IntSet.insert pdx withSecondary
+         ) IntSet.empty