Add a tryEvac function

[ganeti-local] / Ganeti / HTools / Cluster.hs
diff --git a/Ganeti/HTools/Cluster.hs b/Ganeti/HTools/Cluster.hs

index 31b9e4e..42f7a6c 100644 (file)
--- a/Ganeti/HTools/Cluster.hs
+++ b/Ganeti/HTools/Cluster.hs
@@ -5,414 +5,365 @@ goes into the "Main" module for the individual binaries.
  
  -}
  
  
  -}
  
+{-
+
+Copyright (C) 2009 Google Inc.
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+02110-1301, USA.
+
+-}
+
  module Ganeti.HTools.Cluster
      (
       -- * Types
  module Ganeti.HTools.Cluster
      (
       -- * Types
-     NodeList
-    , InstanceList
-    , NameList
-    , Placement
-    , Solution(..)
+      AllocSolution
      , Table(..)
      , Table(..)
-    , Removal
+    , CStats(..)
      -- * Generic functions
      , totalResources
      -- * First phase functions
      , computeBadItems
      -- * Second phase functions
      -- * Generic functions
      , totalResources
      -- * First phase functions
      , computeBadItems
      -- * Second phase functions
-    , computeSolution
-    , applySolution
      , printSolution
      , printSolutionLine
      , formatCmds
      , printSolution
      , printSolutionLine
      , formatCmds
+    , involvedNodes
+    , splitJobs
+    -- * Display functions
      , printNodes
      , printNodes
+    , printInsts
      -- * Balacing functions
      , checkMove
      -- * Balacing functions
      , checkMove
+    , doNextBalance
+    , tryBalance
      , compCV
      , printStats
      , compCV
      , printStats
-    -- * Loading functions
-    , loadData
+    , iMoveToJob
+    -- * IAllocator functions
+    , tryAlloc
+    , tryReloc
+    , tryEvac
+    , collapseFailures
      ) where
  
  import Data.List
      ) where
  
  import Data.List
-import Data.Maybe (isNothing, fromJust)
  import Text.Printf (printf)
  import Data.Function
  import Text.Printf (printf)
  import Data.Function
+import Control.Monad
  
  import qualified Ganeti.HTools.Container as Container
  import qualified Ganeti.HTools.Instance as Instance
  import qualified Ganeti.HTools.Node as Node
  
  import qualified Ganeti.HTools.Container as Container
  import qualified Ganeti.HTools.Instance as Instance
  import qualified Ganeti.HTools.Node as Node
+import Ganeti.HTools.Types
  import Ganeti.HTools.Utils
  import Ganeti.HTools.Utils
+import qualified Ganeti.OpCodes as OpCodes
  
  
-type NodeList = Container.Container Node.Node
-type InstanceList = Container.Container Instance.Instance
--- | The type used to hold idx-to-name mappings
-type NameList = [(Int, String)]
--- | A separate name for the cluster score type
-type Score = Double
-
--- | The description of an instance placement.
-type Placement = (Int, Int, Int, Score)
+-- * Types
  
  
-{- | A cluster solution described as the solution delta and the list
-of placements.
-
--}
-data Solution = Solution Int [Placement]
-                deriving (Eq, Ord, Show)
-
--- | Returns the delta of a solution or -1 for Nothing
-solutionDelta :: Maybe Solution -> Int
-solutionDelta sol = case sol of
-                      Just (Solution d _) -> d
-                      _ -> -1
-
--- | A removal set.
-data Removal = Removal NodeList [Instance.Instance]
-
--- | An instance move definition
-data IMove = Failover                -- ^ Failover the instance (f)
-           | ReplacePrimary Int      -- ^ Replace primary (f, r:np, f)
-           | ReplaceSecondary Int    -- ^ Replace secondary (r:ns)
-           | ReplaceAndFailover Int  -- ^ Replace secondary, failover (r:np, f)
-           | FailoverAndReplace Int  -- ^ Failover, replace secondary (f, r:ns)
-             deriving (Show)
+-- | Allocation\/relocation solution.
+type AllocSolution = ([FailMode], Int, [(Score, Node.AllocElement)])
  
  -- | The complete state for the balancing solution
  
  -- | The complete state for the balancing solution
-data Table = Table NodeList InstanceList Score [Placement]
+data Table = Table Node.List Instance.List Score [Placement]
               deriving (Show)
  
               deriving (Show)
  
--- General functions
-
--- | Cap the removal list if needed.
-capRemovals :: [a] -> Int -> [a]
-capRemovals removals max_removals =
-    if max_removals > 0 then
-        take max_removals removals
-    else
-        removals
-
--- | Check if the given node list fails the N+1 check.
-verifyN1Check :: [Node.Node] -> Bool
-verifyN1Check nl = any Node.failN1 nl
+data CStats = CStats { csFmem :: Int    -- ^ Cluster free mem
+                     , csFdsk :: Int    -- ^ Cluster free disk
+                     , csAmem :: Int    -- ^ Cluster allocatable mem
+                     , csAdsk :: Int    -- ^ Cluster allocatable disk
+                     , csAcpu :: Int    -- ^ Cluster allocatable cpus
+                     , csMmem :: Int    -- ^ Max node allocatable mem
+                     , csMdsk :: Int    -- ^ Max node allocatable disk
+                     , csMcpu :: Int    -- ^ Max node allocatable cpu
+                     , csImem :: Int    -- ^ Instance used mem
+                     , csIdsk :: Int    -- ^ Instance used disk
+                     , csIcpu :: Int    -- ^ Instance used cpu
+                     , csTmem :: Double -- ^ Cluster total mem
+                     , csTdsk :: Double -- ^ Cluster total disk
+                     , csTcpu :: Double -- ^ Cluster total cpus
+                     , csXmem :: Int    -- ^ Unnacounted for mem
+                     , csNmem :: Int    -- ^ Node own memory
+                     , csScore :: Score -- ^ The cluster score
+                     , csNinst :: Int   -- ^ The total number of instances
+                     }
+
+-- * Utility functions
  
  -- | Verifies the N+1 status and return the affected nodes.
  verifyN1 :: [Node.Node] -> [Node.Node]
  
  -- | Verifies the N+1 status and return the affected nodes.
  verifyN1 :: [Node.Node] -> [Node.Node]
-verifyN1 nl = filter Node.failN1 nl
-
-{-| Add an instance and return the new node and instance maps. -}
-addInstance :: NodeList -> Instance.Instance ->
-               Node.Node -> Node.Node -> Maybe NodeList
-addInstance nl idata pri sec =
-  let pdx = Node.idx pri
-      sdx = Node.idx sec
-  in do
-      pnode <- Node.addPri pri idata
-      snode <- Node.addSec sec idata pdx
-      new_nl <- return $ Container.addTwo sdx snode
-                         pdx pnode nl
-      return new_nl
-
--- | Remove an instance and return the new node and instance maps.
-removeInstance :: NodeList -> Instance.Instance -> NodeList
-removeInstance nl idata =
-  let pnode = Instance.pnode idata
-      snode = Instance.snode idata
-      pn = Container.find pnode nl
-      sn = Container.find snode nl
-      new_nl = Container.addTwo
-               pnode (Node.removePri pn idata)
-               snode (Node.removeSec sn idata) nl in
-  new_nl
-
--- | Remove an instance and return the new node map.
-removeInstances :: NodeList -> [Instance.Instance] -> NodeList
-removeInstances = foldl' removeInstance
-
--- | Compute the total free disk and memory in the cluster.
-totalResources :: Container.Container Node.Node -> (Int, Int)
-totalResources nl =
-    foldl'
-    (\ (mem, dsk) node -> (mem + (Node.f_mem node),
-                           dsk + (Node.f_dsk node)))
-    (0, 0) (Container.elems nl)
-
-{- | Compute a new version of a cluster given a solution.
-
-This is not used for computing the solutions, but for applying a
-(known-good) solution to the original cluster for final display.
-
-It first removes the relocated instances after which it places them on
-their new nodes.
-
- -}
-applySolution :: NodeList -> InstanceList -> [Placement] -> NodeList
-applySolution nl il sol =
-    let odxes = map (\ (a, b, c, _) -> (Container.find a il,
-                                        Node.idx (Container.find b nl),
-                                        Node.idx (Container.find c nl))
-                    ) sol
-        idxes = (\ (x, _, _) -> x) (unzip3 odxes)
-        nc = removeInstances nl idxes
-    in
-      foldl' (\ nz (a, b, c) ->
-                 let new_p = Container.find b nz
-                     new_s = Container.find c nz in
-                 fromJust (addInstance nz a new_p new_s)
-           ) nc odxes
-
+verifyN1 = filter Node.failN1
  
  
--- First phase functions
-
-{- | Given a list 1,2,3..n build a list of pairs [(1, [2..n]), (2,
-    [3..n]), ...]
-
--}
-genParts :: [a] -> Int -> [(a, [a])]
-genParts l count =
-    case l of
-      [] -> []
-      x:xs ->
-          if length l < count then
-              []
-          else
-              (x, xs) : (genParts xs count)
-
--- | Generates combinations of count items from the names list.
-genNames :: Int -> [b] -> [[b]]
-genNames count1 names1 =
-  let aux_fn count names current =
-          case count of
-            0 -> [current]
-            _ ->
-                concatMap
-                (\ (x, xs) -> aux_fn (count - 1) xs (x:current))
-                (genParts names count)
-  in
-    aux_fn count1 names1 []
-
-{- | Computes the pair of bad nodes and instances.
+{-| Computes the pair of bad nodes and instances.
  
  The bad node list is computed via a simple 'verifyN1' check, and the
  bad instance list is the list of primary and secondary instances of
  those nodes.
  
  -}
  
  The bad node list is computed via a simple 'verifyN1' check, and the
  bad instance list is the list of primary and secondary instances of
  those nodes.
  
  -}
-computeBadItems :: NodeList -> InstanceList ->
+computeBadItems :: Node.List -> Instance.List ->
                     ([Node.Node], [Instance.Instance])
  computeBadItems nl il =
                     ([Node.Node], [Instance.Instance])
  computeBadItems nl il =
-  let bad_nodes = verifyN1 $ Container.elems nl
-      bad_instances = map (\idx -> Container.find idx il) $
-                      sort $ nub $ concat $
-                      map (\ n -> (Node.slist n) ++ (Node.plist n)) bad_nodes
+  let bad_nodes = verifyN1 $ getOnline nl
+      bad_instances = map (\idx -> Container.find idx il) .
+                      sort . nub $
+                      concatMap (\ n -> Node.sList n ++ Node.pList n) bad_nodes
    in
      (bad_nodes, bad_instances)
  
    in
      (bad_nodes, bad_instances)
  
+-- | Zero-initializer for the CStats type
+emptyCStats :: CStats
+emptyCStats = CStats 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+
+-- | Update stats with data from a new node
+updateCStats :: CStats -> Node.Node -> CStats
+updateCStats cs node =
+    let CStats { csFmem = x_fmem, csFdsk = x_fdsk,
+                 csAmem = x_amem, csAcpu = x_acpu, csAdsk = x_adsk,
+                 csMmem = x_mmem, csMdsk = x_mdsk, csMcpu = x_mcpu,
+                 csImem = x_imem, csIdsk = x_idsk, csIcpu = x_icpu,
+                 csTmem = x_tmem, csTdsk = x_tdsk, csTcpu = x_tcpu,
+                 csXmem = x_xmem, csNmem = x_nmem, csNinst = x_ninst
+               }
+            = cs
+        inc_amem = Node.fMem node - Node.rMem node
+        inc_amem' = if inc_amem > 0 then inc_amem else 0
+        inc_adsk = Node.availDisk node
+        inc_imem = truncate (Node.tMem node) - Node.nMem node
+                   - Node.xMem node - Node.fMem node
+        inc_icpu = Node.uCpu node
+        inc_idsk = truncate (Node.tDsk node) - Node.fDsk node
+
+    in cs { csFmem = x_fmem + Node.fMem node
+          , csFdsk = x_fdsk + Node.fDsk node
+          , csAmem = x_amem + inc_amem'
+          , csAdsk = x_adsk + inc_adsk
+          , csAcpu = x_acpu
+          , csMmem = max x_mmem inc_amem'
+          , csMdsk = max x_mdsk inc_adsk
+          , csMcpu = x_mcpu
+          , csImem = x_imem + inc_imem
+          , csIdsk = x_idsk + inc_idsk
+          , csIcpu = x_icpu + inc_icpu
+          , csTmem = x_tmem + Node.tMem node
+          , csTdsk = x_tdsk + Node.tDsk node
+          , csTcpu = x_tcpu + Node.tCpu node
+          , csXmem = x_xmem + Node.xMem node
+          , csNmem = x_nmem + Node.nMem node
+          , csNinst = x_ninst + length (Node.pList node)
+          }
  
  
-{- | Checks if removal of instances results in N+1 pass.
-
-Note: the check removal cannot optimize by scanning only the affected
-nodes, since the cluster is known to be not healthy; only the check
-placement can make this shortcut.
+-- | Compute the total free disk and memory in the cluster.
+totalResources :: Node.List -> CStats
+totalResources nl =
+    let cs = foldl' updateCStats emptyCStats . Container.elems $ nl
+    in cs { csScore = compCV nl }
+
+-- | The names of the individual elements in the CV list
+detailedCVNames :: [String]
+detailedCVNames = [ "free_mem_cv"
+                  , "free_disk_cv"
+                  , "n1_cnt"
+                  , "reserved_mem_cv"
+                  , "offline_all_cnt"
+                  , "offline_pri_cnt"
+                  , "vcpu_ratio_cv"
+                  , "cpu_load_cv"
+                  , "mem_load_cv"
+                  , "disk_load_cv"
+                  , "net_load_cv"
+                  , "pri_tags_score"
+                  ]
  
  
--}
-checkRemoval :: NodeList -> [Instance.Instance] -> Maybe Removal
-checkRemoval nl victims =
-  let nx = removeInstances nl victims
-      failN1 = verifyN1Check (Container.elems nx)
-  in
-    if failN1 then
-      Nothing
-    else
-      Just $ Removal nx victims
-
-
--- | Computes the removals list for a given depth
-computeRemovals :: NodeList
-                 -> [Instance.Instance]
-                 -> Int
-                 -> [Maybe Removal]
-computeRemovals nl bad_instances depth =
-    map (checkRemoval nl) $ genNames depth bad_instances
-
--- Second phase functions
-
--- | Single-node relocation cost
-nodeDelta :: Int -> Int -> Int -> Int
-nodeDelta i p s =
-    if i == p || i == s then
-        0
-    else
-        1
-
-{-| Compute best solution.
-
-    This function compares two solutions, choosing the minimum valid
-    solution.
--}
-compareSolutions :: Maybe Solution -> Maybe Solution -> Maybe Solution
-compareSolutions a b = case (a, b) of
-  (Nothing, x) -> x
-  (x, Nothing) -> x
-  (x, y) -> min x y
+-- | Compute the mem and disk covariance.
+compDetailedCV :: Node.List -> [Double]
+compDetailedCV nl =
+    let
+        all_nodes = Container.elems nl
+        (offline, nodes) = partition Node.offline all_nodes
+        mem_l = map Node.pMem nodes
+        dsk_l = map Node.pDsk nodes
+        -- metric: memory covariance
+        mem_cv = varianceCoeff mem_l
+        -- metric: disk covariance
+        dsk_cv = varianceCoeff dsk_l
+        n1_l = length $ filter Node.failN1 nodes
+        -- metric: count of failN1 nodes
+        n1_score = fromIntegral n1_l::Double
+        res_l = map Node.pRem nodes
+        -- metric: reserved memory covariance
+        res_cv = varianceCoeff res_l
+        -- offline instances metrics
+        offline_ipri = sum . map (length . Node.pList) $ offline
+        offline_isec = sum . map (length . Node.sList) $ offline
+        -- metric: count of instances on offline nodes
+        off_score = fromIntegral (offline_ipri + offline_isec)::Double
+        -- metric: count of primary instances on offline nodes (this
+        -- helps with evacuation/failover of primary instances on
+        -- 2-node clusters with one node offline)
+        off_pri_score = fromIntegral offline_ipri::Double
+        cpu_l = map Node.pCpu nodes
+        -- metric: covariance of vcpu/pcpu ratio
+        cpu_cv = varianceCoeff cpu_l
+        -- metrics: covariance of cpu, memory, disk and network load
+        (c_load, m_load, d_load, n_load) = unzip4 $
+            map (\n ->
+                     let DynUtil c1 m1 d1 n1 = Node.utilLoad n
+                         DynUtil c2 m2 d2 n2 = Node.utilPool n
+                     in (c1/c2, m1/m2, d1/d2, n1/n2)
+                ) nodes
+        -- metric: conflicting instance count
+        pri_tags_inst = sum $ map Node.conflictingPrimaries nodes
+        pri_tags_score = fromIntegral pri_tags_inst::Double
+    in [ mem_cv, dsk_cv, n1_score, res_cv, off_score, off_pri_score, cpu_cv
+       , varianceCoeff c_load, varianceCoeff m_load
+       , varianceCoeff d_load, varianceCoeff n_load
+       , pri_tags_score ]
+
+-- | Compute the /total/ variance.
+compCV :: Node.List -> Double
+compCV = sum . compDetailedCV
+
+-- | Compute online nodes from a Node.List
+getOnline :: Node.List -> [Node.Node]
+getOnline = filter (not . Node.offline) . Container.elems
+
+-- * hbal functions
  
  -- | Compute best table. Note that the ordering of the arguments is important.
  compareTables :: Table -> Table -> Table
  compareTables a@(Table _ _ a_cv _) b@(Table _ _ b_cv _ ) =
      if a_cv > b_cv then b else a
  
  
  -- | Compute best table. Note that the ordering of the arguments is important.
  compareTables :: Table -> Table -> Table
  compareTables a@(Table _ _ a_cv _) b@(Table _ _ b_cv _ ) =
      if a_cv > b_cv then b else a
  
--- | Check if a given delta is worse then an existing solution.
-tooHighDelta :: Maybe Solution -> Int -> Int -> Bool
-tooHighDelta sol new_delta max_delta =
-    if new_delta > max_delta && max_delta >=0 then
-        True
-    else
-        case sol of
-          Nothing -> False
-          Just (Solution old_delta _) -> old_delta <= new_delta
-
-{-| Check if placement of instances still keeps the cluster N+1 compliant.
-
-    This is the workhorse of the allocation algorithm: given the
-    current node and instance maps, the list of instances to be
-    placed, and the current solution, this will return all possible
-    solution by recursing until all target instances are placed.
-
--}
-checkPlacement :: NodeList            -- ^ The current node list
-               -> [Instance.Instance] -- ^ List of instances still to place
-               -> [Placement]         -- ^ Partial solution until now
-               -> Int                 -- ^ The delta of the partial solution
-               -> Maybe Solution      -- ^ The previous solution
-               -> Int                 -- ^ Abort if the we go above this delta
-               -> Maybe Solution      -- ^ The new solution
-checkPlacement nl victims current current_delta prev_sol max_delta =
-  let target = head victims
-      opdx = Instance.pnode target
-      osdx = Instance.snode target
-      vtail = tail victims
-      have_tail = (length vtail) > 0
-      nodes = Container.elems nl
-      iidx = Instance.idx target
-  in
-    foldl'
-    (\ accu_p pri ->
-         let
-             pri_idx = Node.idx pri
-             upri_delta = current_delta + nodeDelta pri_idx opdx osdx
-             new_pri = Node.addPri pri target
-             fail_delta1 = tooHighDelta accu_p upri_delta max_delta
-         in
-           if fail_delta1 || isNothing(new_pri) then accu_p
-           else let pri_nl = Container.add pri_idx (fromJust new_pri) nl in
-                foldl'
-                (\ accu sec ->
-                     let
-                         sec_idx = Node.idx sec
-                         upd_delta = upri_delta +
-                                     nodeDelta sec_idx opdx osdx
-                         fail_delta2 = tooHighDelta accu upd_delta max_delta
-                         new_sec = Node.addSec sec target pri_idx
-                     in
-                       if sec_idx == pri_idx || fail_delta2 ||
-                          isNothing new_sec then accu
-                       else let
-                           nx = Container.add sec_idx (fromJust new_sec) pri_nl
-                           upd_cv = compCV nx
-                           plc = (iidx, pri_idx, sec_idx, upd_cv)
-                           c2 = plc:current
-                           result =
-                               if have_tail then
-                                   checkPlacement nx vtail c2 upd_delta
-                                                  accu max_delta
-                               else
-                                   Just (Solution upd_delta c2)
-                      in compareSolutions accu result
-                ) accu_p nodes
-    ) prev_sol nodes
-
--- | Apply a move
-applyMove :: NodeList -> Instance.Instance
-          -> IMove -> (Maybe NodeList, Instance.Instance, Int, Int)
+-- | Applies an instance move to a given node list and instance.
+applyMove :: Node.List -> Instance.Instance
+          -> IMove -> OpResult (Node.List, Instance.Instance, Ndx, Ndx)
  -- Failover (f)
  applyMove nl inst Failover =
  -- Failover (f)
  applyMove nl inst Failover =
-    let old_pdx = Instance.pnode inst
-        old_sdx = Instance.snode inst
+    let old_pdx = Instance.pNode inst
+        old_sdx = Instance.sNode inst
          old_p = Container.find old_pdx nl
          old_s = Container.find old_sdx nl
          int_p = Node.removePri old_p inst
          int_s = Node.removeSec old_s inst
          old_p = Container.find old_pdx nl
          old_s = Container.find old_sdx nl
          int_p = Node.removePri old_p inst
          int_s = Node.removeSec old_s inst
-        new_p = Node.addPri int_s inst
-        new_s = Node.addSec int_p inst old_sdx
-        new_nl = if isNothing(new_p) || isNothing(new_s) then Nothing
-                 else Just $ Container.addTwo old_pdx (fromJust new_s)
-                      old_sdx (fromJust new_p) nl
-    in (new_nl, Instance.setBoth inst old_sdx old_pdx, old_sdx, old_pdx)
+        new_nl = do -- Maybe monad
+          new_p <- Node.addPri int_s inst
+          new_s <- Node.addSec int_p inst old_sdx
+          let new_inst = Instance.setBoth inst old_sdx old_pdx
+          return (Container.addTwo old_pdx new_s old_sdx new_p nl,
+                  new_inst, old_sdx, old_pdx)
+    in new_nl
  
  -- Replace the primary (f:, r:np, f)
  applyMove nl inst (ReplacePrimary new_pdx) =
  
  -- Replace the primary (f:, r:np, f)
  applyMove nl inst (ReplacePrimary new_pdx) =
-    let old_pdx = Instance.pnode inst
-        old_sdx = Instance.snode inst
+    let old_pdx = Instance.pNode inst
+        old_sdx = Instance.sNode inst
          old_p = Container.find old_pdx nl
          old_s = Container.find old_sdx nl
          tgt_n = Container.find new_pdx nl
          int_p = Node.removePri old_p inst
          int_s = Node.removeSec old_s inst
          old_p = Container.find old_pdx nl
          old_s = Container.find old_sdx nl
          tgt_n = Container.find new_pdx nl
          int_p = Node.removePri old_p inst
          int_s = Node.removeSec old_s inst
-        new_p = Node.addPri tgt_n inst
-        new_s = Node.addSec int_s inst new_pdx
-        new_nl = if isNothing(new_p) || isNothing(new_s) then Nothing
-                 else Just $ Container.add new_pdx (fromJust new_p) $
-                      Container.addTwo old_pdx int_p
-                               old_sdx (fromJust new_s) nl
-    in (new_nl, Instance.setPri inst new_pdx, new_pdx, old_sdx)
+        new_nl = do -- Maybe monad
+          -- check that the current secondary can host the instance
+          -- during the migration
+          tmp_s <- Node.addPri int_s inst
+          let tmp_s' = Node.removePri tmp_s inst
+          new_p <- Node.addPri tgt_n inst
+          new_s <- Node.addSec tmp_s' inst new_pdx
+          let new_inst = Instance.setPri inst new_pdx
+          return (Container.add new_pdx new_p $
+                  Container.addTwo old_pdx int_p old_sdx new_s nl,
+                  new_inst, new_pdx, old_sdx)
+    in new_nl
  
  -- Replace the secondary (r:ns)
  applyMove nl inst (ReplaceSecondary new_sdx) =
  
  -- Replace the secondary (r:ns)
  applyMove nl inst (ReplaceSecondary new_sdx) =
-    let old_pdx = Instance.pnode inst
-        old_sdx = Instance.snode inst
+    let old_pdx = Instance.pNode inst
+        old_sdx = Instance.sNode inst
          old_s = Container.find old_sdx nl
          tgt_n = Container.find new_sdx nl
          int_s = Node.removeSec old_s inst
          old_s = Container.find old_sdx nl
          tgt_n = Container.find new_sdx nl
          int_s = Node.removeSec old_s inst
-        new_s = Node.addSec tgt_n inst old_pdx
-        new_nl = if isNothing(new_s) then Nothing
-                 else Just $ Container.addTwo new_sdx (fromJust new_s)
-                      old_sdx int_s nl
-    in (new_nl, Instance.setSec inst new_sdx, old_pdx, new_sdx)
+        new_inst = Instance.setSec inst new_sdx
+        new_nl = Node.addSec tgt_n inst old_pdx >>=
+                 \new_s -> return (Container.addTwo new_sdx
+                                   new_s old_sdx int_s nl,
+                                   new_inst, old_pdx, new_sdx)
+    in new_nl
  
  -- Replace the secondary and failover (r:np, f)
  applyMove nl inst (ReplaceAndFailover new_pdx) =
  
  -- Replace the secondary and failover (r:np, f)
  applyMove nl inst (ReplaceAndFailover new_pdx) =
-    let old_pdx = Instance.pnode inst
-        old_sdx = Instance.snode inst
+    let old_pdx = Instance.pNode inst
+        old_sdx = Instance.sNode inst
          old_p = Container.find old_pdx nl
          old_s = Container.find old_sdx nl
          tgt_n = Container.find new_pdx nl
          int_p = Node.removePri old_p inst
          int_s = Node.removeSec old_s inst
          old_p = Container.find old_pdx nl
          old_s = Container.find old_sdx nl
          tgt_n = Container.find new_pdx nl
          int_p = Node.removePri old_p inst
          int_s = Node.removeSec old_s inst
-        new_p = Node.addPri tgt_n inst
-        new_s = Node.addSec int_p inst new_pdx
-        new_nl = if isNothing(new_p) || isNothing(new_s) then Nothing
-                 else Just $ Container.add new_pdx (fromJust new_p) $
-                      Container.addTwo old_pdx (fromJust new_s)
-                               old_sdx int_s nl
-    in (new_nl, Instance.setBoth inst new_pdx old_pdx, new_pdx, old_pdx)
+        new_nl = do -- Maybe monad
+          new_p <- Node.addPri tgt_n inst
+          new_s <- Node.addSec int_p inst new_pdx
+          let new_inst = Instance.setBoth inst new_pdx old_pdx
+          return (Container.add new_pdx new_p $
+                  Container.addTwo old_pdx new_s old_sdx int_s nl,
+                  new_inst, new_pdx, old_pdx)
+    in new_nl
  
  -- Failver and replace the secondary (f, r:ns)
  applyMove nl inst (FailoverAndReplace new_sdx) =
  
  -- Failver and replace the secondary (f, r:ns)
  applyMove nl inst (FailoverAndReplace new_sdx) =
-    let old_pdx = Instance.pnode inst
-        old_sdx = Instance.snode inst
+    let old_pdx = Instance.pNode inst
+        old_sdx = Instance.sNode inst
          old_p = Container.find old_pdx nl
          old_s = Container.find old_sdx nl
          tgt_n = Container.find new_sdx nl
          int_p = Node.removePri old_p inst
          int_s = Node.removeSec old_s inst
          old_p = Container.find old_pdx nl
          old_s = Container.find old_sdx nl
          tgt_n = Container.find new_sdx nl
          int_p = Node.removePri old_p inst
          int_s = Node.removeSec old_s inst
-        new_p = Node.addPri int_s inst
-        new_s = Node.addSec tgt_n inst old_sdx
-        new_nl = if isNothing(new_p) || isNothing(new_s) then Nothing
-                 else Just $ Container.add new_sdx (fromJust new_s) $
-                      Container.addTwo old_sdx (fromJust new_p)
-                               old_pdx int_p nl
-    in (new_nl, Instance.setBoth inst old_sdx new_sdx, old_sdx, new_sdx)
-
+        new_nl = do -- Maybe monad
+          new_p <- Node.addPri int_s inst
+          new_s <- Node.addSec tgt_n inst old_sdx
+          let new_inst = Instance.setBoth inst old_sdx new_sdx
+          return (Container.add new_sdx new_s $
+                  Container.addTwo old_sdx new_p old_pdx int_p nl,
+                  new_inst, old_sdx, new_sdx)
+    in new_nl
+
+-- | Tries to allocate an instance on one given node.
+allocateOnSingle :: Node.List -> Instance.Instance -> Node.Node
+                 -> OpResult Node.AllocElement
+allocateOnSingle nl inst p =
+    let new_pdx = Node.idx p
+        new_inst = Instance.setBoth inst new_pdx Node.noSecondary
+        new_nl = Node.addPri p inst >>= \new_p ->
+                 return (Container.add new_pdx new_p nl, new_inst, [new_p])
+    in new_nl
+
+-- | Tries to allocate an instance on a given pair of nodes.
+allocateOnPair :: Node.List -> Instance.Instance -> Node.Node -> Node.Node
+               -> OpResult Node.AllocElement
+allocateOnPair nl inst tgt_p tgt_s =
+    let new_pdx = Node.idx tgt_p
+        new_sdx = Node.idx tgt_s
+        new_nl = do -- Maybe monad
+          new_p <- Node.addPri tgt_p inst
+          new_s <- Node.addSec tgt_s inst new_pdx
+          let new_inst = Instance.setBoth inst new_pdx new_sdx
+          return (Container.addTwo new_pdx new_p new_sdx new_s nl, new_inst,
+                 [new_p, new_s])
+    in new_nl
+
+-- | Tries to perform an instance move and returns the best table
+-- between the original one and the new one.
  checkSingleStep :: Table -- ^ The original table
                  -> Instance.Instance -- ^ The instance to move
                  -> Table -- ^ The current best table
  checkSingleStep :: Table -- ^ The original table
                  -> Instance.Instance -- ^ The instance to move
                  -> Table -- ^ The current best table
@@ -421,23 +372,25 @@ checkSingleStep :: Table -- ^ The original table
  checkSingleStep ini_tbl target cur_tbl move =
      let
          Table ini_nl ini_il _ ini_plc = ini_tbl
  checkSingleStep ini_tbl target cur_tbl move =
      let
          Table ini_nl ini_il _ ini_plc = ini_tbl
-        (tmp_nl, new_inst, pri_idx, sec_idx) = applyMove ini_nl target move
+        tmp_resu = applyMove ini_nl target move
      in
      in
-      if isNothing tmp_nl then cur_tbl
-      else
-          let tgt_idx = Instance.idx target
-              upd_nl = fromJust tmp_nl
-              upd_cvar = compCV upd_nl
-              upd_il = Container.add tgt_idx new_inst ini_il
-              upd_plc = (tgt_idx, pri_idx, sec_idx, upd_cvar):ini_plc
-              upd_tbl = Table upd_nl upd_il upd_cvar upd_plc
-          in
-            compareTables cur_tbl upd_tbl
-
--- | Given the status of the current secondary as a valid new node
--- and the current candidate target node,
--- generate the possible moves for a instance.
-possibleMoves :: Bool -> Int -> [IMove]
+      case tmp_resu of
+        OpFail _ -> cur_tbl
+        OpGood (upd_nl, new_inst, pri_idx, sec_idx) ->
+            let tgt_idx = Instance.idx target
+                upd_cvar = compCV upd_nl
+                upd_il = Container.add tgt_idx new_inst ini_il
+                upd_plc = (tgt_idx, pri_idx, sec_idx, move, upd_cvar):ini_plc
+                upd_tbl = Table upd_nl upd_il upd_cvar upd_plc
+            in
+              compareTables cur_tbl upd_tbl
+
+-- | Given the status of the current secondary as a valid new node and
+-- the current candidate target node, generate the possible moves for
+-- a instance.
+possibleMoves :: Bool      -- ^ Whether the secondary node is a valid new node
+              -> Ndx       -- ^ Target node candidate
+              -> [IMove]   -- ^ List of valid result moves
  possibleMoves True tdx =
      [ReplaceSecondary tdx,
       ReplaceAndFailover tdx,
  possibleMoves True tdx =
      [ReplaceSecondary tdx,
       ReplaceAndFailover tdx,
@@ -449,102 +402,199 @@ possibleMoves False tdx =
       ReplaceAndFailover tdx]
  
  -- | Compute the best move for a given instance.
       ReplaceAndFailover tdx]
  
  -- | Compute the best move for a given instance.
-checkInstanceMove :: [Int]             -- Allowed target node indices
-                  -> Table             -- Original table
-                  -> Instance.Instance -- Instance to move
-                  -> Table             -- Best new table for this instance
-checkInstanceMove nodes_idx ini_tbl target =
+checkInstanceMove :: [Ndx]             -- ^ Allowed target node indices
+                  -> Bool              -- ^ Whether disk moves are allowed
+                  -> Table             -- ^ Original table
+                  -> Instance.Instance -- ^ Instance to move
+                  -> Table             -- ^ Best new table for this instance
+checkInstanceMove nodes_idx disk_moves ini_tbl target =
      let
      let
-        opdx = Instance.pnode target
-        osdx = Instance.snode target
+        opdx = Instance.pNode target
+        osdx = Instance.sNode target
          nodes = filter (\idx -> idx /= opdx && idx /= osdx) nodes_idx
          use_secondary = elem osdx nodes_idx
          aft_failover = if use_secondary -- if allowed to failover
                         then checkSingleStep ini_tbl target ini_tbl Failover
                         else ini_tbl
          nodes = filter (\idx -> idx /= opdx && idx /= osdx) nodes_idx
          use_secondary = elem osdx nodes_idx
          aft_failover = if use_secondary -- if allowed to failover
                         then checkSingleStep ini_tbl target ini_tbl Failover
                         else ini_tbl
-        all_moves = concatMap (possibleMoves use_secondary) nodes
+        all_moves = if disk_moves
+                    then concatMap (possibleMoves use_secondary) nodes
+                    else []
      in
        -- iterate over the possible nodes for this instance
        foldl' (checkSingleStep ini_tbl target) aft_failover all_moves
  
  -- | Compute the best next move.
      in
        -- iterate over the possible nodes for this instance
        foldl' (checkSingleStep ini_tbl target) aft_failover all_moves
  
  -- | Compute the best next move.
-checkMove :: [Int]               -- ^ Allowed target node indices
+checkMove :: [Ndx]               -- ^ Allowed target node indices
+          -> Bool                -- ^ Whether disk moves are allowed
            -> Table               -- ^ The current solution
            -> [Instance.Instance] -- ^ List of instances still to move
            -> Table               -- ^ The new solution
            -> Table               -- ^ The current solution
            -> [Instance.Instance] -- ^ List of instances still to move
            -> Table               -- ^ The new solution
-checkMove nodes_idx ini_tbl victims =
+checkMove nodes_idx disk_moves ini_tbl victims =
      let Table _ _ _ ini_plc = ini_tbl
          -- iterate over all instances, computing the best move
          best_tbl =
              foldl'
      let Table _ _ _ ini_plc = ini_tbl
          -- iterate over all instances, computing the best move
          best_tbl =
              foldl'
-            (\ step_tbl elem -> compareTables step_tbl $
-                                checkInstanceMove nodes_idx ini_tbl elem)
+            (\ step_tbl em ->
+                 compareTables step_tbl $
+                 checkInstanceMove nodes_idx disk_moves ini_tbl em)
              ini_tbl victims
          Table _ _ _ best_plc = best_tbl
              ini_tbl victims
          Table _ _ _ best_plc = best_tbl
+    in if length best_plc == length ini_plc
+       then ini_tbl -- no advancement
+       else best_tbl
+
+-- | Check if we are allowed to go deeper in the balancing
+
+doNextBalance :: Table       -- ^ The starting table
+              -> Int         -- ^ Remaining length
+              -> Score       -- ^ Score at which to stop
+              -> Bool -- ^ The resulting table and commands
+doNextBalance ini_tbl max_rounds min_score =
+    let Table _ _ ini_cv ini_plc = ini_tbl
+        ini_plc_len = length ini_plc
+    in (max_rounds < 0 || ini_plc_len < max_rounds) && ini_cv > min_score
+
+-- | Run a balance move
+
+tryBalance :: Table       -- ^ The starting table
+           -> Bool        -- ^ Allow disk moves
+           -> Bool        -- ^ Only evacuate moves
+           -> Maybe Table -- ^ The resulting table and commands
+tryBalance ini_tbl disk_moves evac_mode =
+    let Table ini_nl ini_il ini_cv _ = ini_tbl
+        all_inst = Container.elems ini_il
+        all_inst' = if evac_mode
+                    then let bad_nodes = map Node.idx . filter Node.offline $
+                                         Container.elems ini_nl
+                         in filter (\e -> Instance.sNode e `elem` bad_nodes ||
+                                          Instance.pNode e `elem` bad_nodes)
+                            all_inst
+                    else all_inst
+        reloc_inst = filter (\e -> Instance.sNode e /= Node.noSecondary)
+                     all_inst'
+        node_idx = map Node.idx . filter (not . Node.offline) $
+                   Container.elems ini_nl
+        fin_tbl = checkMove node_idx disk_moves ini_tbl reloc_inst
+        (Table _ _ fin_cv _) = fin_tbl
      in
      in
-      if length best_plc == length ini_plc then -- no advancement
-          ini_tbl
-      else
-          best_tbl
-
-{- | Auxiliary function for solution computation.
-
-We write this in an explicit recursive fashion in order to control
-early-abort in case we have met the min delta. We can't use foldr
-instead of explicit recursion since we need the accumulator for the
-abort decision.
-
--}
-advanceSolution :: [Maybe Removal] -- ^ The removal to process
-                -> Int             -- ^ Minimum delta parameter
-                -> Int             -- ^ Maximum delta parameter
-                -> Maybe Solution  -- ^ Current best solution
-                -> Maybe Solution  -- ^ New best solution
-advanceSolution [] _ _ sol = sol
-advanceSolution (Nothing:xs) m n sol = advanceSolution xs m n sol
-advanceSolution ((Just (Removal nx removed)):xs) min_d max_d prev_sol =
-    let new_sol = checkPlacement nx removed [] 0 prev_sol max_d
-        new_delta = solutionDelta $! new_sol
-    in
-      if new_delta >= 0 && new_delta <= min_d then
-          new_sol
-      else
-          advanceSolution xs min_d max_d new_sol
-
--- | Computes the placement solution.
-solutionFromRemovals :: [Maybe Removal] -- ^ The list of (possible) removals
-                     -> Int             -- ^ Minimum delta parameter
-                     -> Int             -- ^ Maximum delta parameter
-                     -> Maybe Solution  -- ^ The best solution found
-solutionFromRemovals removals min_delta max_delta =
-    advanceSolution removals min_delta max_delta Nothing
-
-{- | Computes the solution at the given depth.
-
-This is a wrapper over both computeRemovals and
-solutionFromRemovals. In case we have no solution, we return Nothing.
-
--}
-computeSolution :: NodeList        -- ^ The original node data
-                -> [Instance.Instance] -- ^ The list of /bad/ instances
-                -> Int             -- ^ The /depth/ of removals
-                -> Int             -- ^ Maximum number of removals to process
-                -> Int             -- ^ Minimum delta parameter
-                -> Int             -- ^ Maximum delta parameter
-                -> Maybe Solution  -- ^ The best solution found (or Nothing)
-computeSolution nl bad_instances depth max_removals min_delta max_delta =
-  let
-      removals = computeRemovals nl bad_instances depth
-      removals' = capRemovals removals max_removals
-  in
-    solutionFromRemovals removals' min_delta max_delta
-
--- Solution display functions (pure)
+      if fin_cv < ini_cv
+      then Just fin_tbl -- this round made success, return the new table
+      else Nothing
+
+-- * Allocation functions
+
+-- | Build failure stats out of a list of failures
+collapseFailures :: [FailMode] -> FailStats
+collapseFailures flst =
+    map (\k -> (k, length $ filter ((==) k) flst)) [minBound..maxBound]
+
+-- | Update current Allocation solution and failure stats with new
+-- elements
+concatAllocs :: AllocSolution -> OpResult Node.AllocElement -> AllocSolution
+concatAllocs (flst, cntok, sols) (OpFail reason) = (reason:flst, cntok, sols)
+
+concatAllocs (flst, cntok, osols) (OpGood ns@(nl, _, _)) =
+    let nscore = compCV nl
+        -- Choose the old or new solution, based on the cluster score
+        nsols = case osols of
+                  [] -> [(nscore, ns)]
+                  (oscore, _):[] ->
+                      if oscore < nscore
+                      then osols
+                      else [(nscore, ns)]
+                  -- FIXME: here we simply concat to lists with more
+                  -- than one element; we should instead abort, since
+                  -- this is not a valid usage of this function
+                  xs -> (nscore, ns):xs
+        nsuc = cntok + 1
+    -- Note: we force evaluation of nsols here in order to keep the
+    -- memory profile low - we know that we will need nsols for sure
+    -- in the next cycle, so we force evaluation of nsols, since the
+    -- foldl' in the caller will only evaluate the tuple, but not the
+    -- elements of the tuple
+    in nsols `seq` nsuc `seq` (flst, nsuc, nsols)
+
+-- | Try to allocate an instance on the cluster.
+tryAlloc :: (Monad m) =>
+            Node.List         -- ^ The node list
+         -> Instance.List     -- ^ The instance list
+         -> Instance.Instance -- ^ The instance to allocate
+         -> Int               -- ^ Required number of nodes
+         -> m AllocSolution   -- ^ Possible solution list
+tryAlloc nl _ inst 2 =
+    let all_nodes = getOnline nl
+        all_pairs = liftM2 (,) all_nodes all_nodes
+        ok_pairs = filter (\(x, y) -> Node.idx x /= Node.idx y) all_pairs
+        sols = foldl' (\cstate (p, s) ->
+                           concatAllocs cstate $ allocateOnPair nl inst p s
+                      ) ([], 0, []) ok_pairs
+    in return sols
+
+tryAlloc nl _ inst 1 =
+    let all_nodes = getOnline nl
+        sols = foldl' (\cstate ->
+                           concatAllocs cstate . allocateOnSingle nl inst
+                      ) ([], 0, []) all_nodes
+    in return sols
+
+tryAlloc _ _ _ reqn = fail $ "Unsupported number of allocation \
+                             \destinations required (" ++ show reqn ++
+                                               "), only two supported"
+
+-- | Try to allocate an instance on the cluster.
+tryReloc :: (Monad m) =>
+            Node.List       -- ^ The node list
+         -> Instance.List   -- ^ The instance list
+         -> Idx             -- ^ The index of the instance to move
+         -> Int             -- ^ The number of nodes required
+         -> [Ndx]           -- ^ Nodes which should not be used
+         -> m AllocSolution -- ^ Solution list
+tryReloc nl il xid 1 ex_idx =
+    let all_nodes = getOnline nl
+        inst = Container.find xid il
+        ex_idx' = Instance.pNode inst:ex_idx
+        valid_nodes = filter (not . flip elem ex_idx' . Node.idx) all_nodes
+        valid_idxes = map Node.idx valid_nodes
+        sols1 = foldl' (\cstate x ->
+                            let em = do
+                                  (mnl, i, _, _) <-
+                                      applyMove nl inst (ReplaceSecondary x)
+                                  return (mnl, i, [Container.find x mnl])
+                            in concatAllocs cstate em
+                       ) ([], 0, []) valid_idxes
+    in return sols1
+
+tryReloc _ _ _ reqn _  = fail $ "Unsupported number of relocation \
+                                \destinations required (" ++ show reqn ++
+                                                  "), only one supported"
+
+-- | Try to allocate an instance on the cluster.
+tryEvac :: (Monad m) =>
+            Node.List       -- ^ The node list
+         -> Instance.List   -- ^ The instance list
+         -> [Ndx]           -- ^ Nodes to be evacuated
+         -> m AllocSolution -- ^ Solution list
+tryEvac nl il ex_ndx =
+    let ex_nodes = map (flip Container.find nl) ex_ndx
+        all_insts = nub . concat . map Node.sList $ ex_nodes
+    in do
+      (_, sol) <- foldM (\(nl', (_, _, rsols)) idx -> do
+                           -- FIXME: hardcoded one node here
+                           (fm, cs, aes) <- tryReloc nl' il idx 1 ex_ndx
+                           case aes of
+                             csol@(_, (nl'', _, _)):_ ->
+                                 return (nl'', (fm, cs, csol:rsols))
+                             _ -> fail $ "Can't evacuate instance " ++
+                                  show idx
+                        ) (nl, ([], 0, [])) all_insts
+      return sol
+
+-- * Formatting functions
  
  -- | Given the original and final nodes, computes the relocation description.
  
  -- | Given the original and final nodes, computes the relocation description.
-computeMoves :: String -- ^ The instance name
-             -> String -- ^ Original primary
-             -> String -- ^ Original secondary
+computeMoves :: Instance.Instance -- ^ The instance to be moved
+             -> String -- ^ The instance name
+             -> IMove  -- ^ The move being performed
               -> String -- ^ New primary
               -> String -- ^ New secondary
               -> (String, [String])
               -> String -- ^ New primary
               -> String -- ^ New secondary
               -> (String, [String])
@@ -552,226 +602,161 @@ computeMoves :: String -- ^ The instance name
                  -- either @/f/@ for failover or @/r:name/@ for replace
                  -- secondary, while the command list holds gnt-instance
                  -- commands (without that prefix), e.g \"@failover instance1@\"
                  -- either @/f/@ for failover or @/r:name/@ for replace
                  -- secondary, while the command list holds gnt-instance
                  -- commands (without that prefix), e.g \"@failover instance1@\"
-computeMoves i a b c d =
-    if c == a then {- Same primary -}
-        if d == b then {- Same sec??! -}
-            ("-", [])
-        else {- Change of secondary -}
-            (printf "r:%s" d,
-             [printf "replace-disks -n %s %s" d i])
-    else
-        if c == b then {- Failover and ... -}
-            if d == a then {- that's all -}
-                ("f", [printf "migrate %s" i])
-            else
-                (printf "f r:%s" d,
-                 [printf "migrate %s" i,
-                  printf "replace-disks -n %s %s" d i])
-        else
-            if d == a then {- ... and keep primary as secondary -}
-                (printf "r:%s f" c,
-                 [printf "replace-disks -n %s %s" c i,
-                  printf "migrate %s" i])
-            else
-                if d == b then {- ... keep same secondary -}
-                    (printf "f r:%s f" c,
-                     [printf "migrate %s" i,
-                      printf "replace-disks -n %s %s" c i,
-                      printf "migrate %s" i])
-
-                else {- Nothing in common -}
-                    (printf "r:%s f r:%s" c d,
-                     [printf "replace-disks -n %s %s" c i,
-                      printf "migrate %s" i,
-                      printf "replace-disks -n %s %s" d i])
-
-{-| Converts a placement to string format -}
-printSolutionLine :: InstanceList
-              -> NameList
-              -> NameList
-              -> Int
-              -> Int
-              -> Placement
-              -> Int
-              -> (String, [String])
-printSolutionLine il ktn kti nmlen imlen plc pos =
+computeMoves i inam mv c d =
+    case mv of
+      Failover -> ("f", [mig])
+      FailoverAndReplace _ -> (printf "f r:%s" d, [mig, rep d])
+      ReplaceSecondary _ -> (printf "r:%s" d, [rep d])
+      ReplaceAndFailover _ -> (printf "r:%s f" c, [rep c, mig])
+      ReplacePrimary _ -> (printf "f r:%s f" c, [mig, rep c, mig])
+    where morf = if Instance.running i then "migrate" else "failover"
+          mig = printf "%s -f %s" morf inam::String
+          rep n = printf "replace-disks -n %s %s" n inam
+
+-- | Converts a placement to string format.
+printSolutionLine :: Node.List     -- ^ The node list
+                  -> Instance.List -- ^ The instance list
+                  -> Int           -- ^ Maximum node name length
+                  -> Int           -- ^ Maximum instance name length
+                  -> Placement     -- ^ The current placement
+                  -> Int           -- ^ The index of the placement in
+                                   -- the solution
+                  -> (String, [String])
+printSolutionLine nl il nmlen imlen plc pos =
      let
          pmlen = (2*nmlen + 1)
      let
          pmlen = (2*nmlen + 1)
-        (i, p, s, c) = plc
+        (i, p, s, mv, c) = plc
          inst = Container.find i il
          inst = Container.find i il
-        inam = fromJust $ lookup (Instance.idx inst) kti
-        npri = fromJust $ lookup p ktn
-        nsec = fromJust $ lookup s ktn
-        opri = fromJust $ lookup (Instance.pnode inst) ktn
-        osec = fromJust $ lookup (Instance.snode inst) ktn
-        (moves, cmds) =  computeMoves inam opri osec npri nsec
-        ostr = (printf "%s:%s" opri osec)::String
-        nstr = (printf "%s:%s" npri nsec)::String
+        inam = Instance.name inst
+        npri = Container.nameOf nl p
+        nsec = Container.nameOf nl s
+        opri = Container.nameOf nl $ Instance.pNode inst
+        osec = Container.nameOf nl $ Instance.sNode inst
+        (moves, cmds) =  computeMoves inst inam mv npri nsec
+        ostr = printf "%s:%s" opri osec::String
+        nstr = printf "%s:%s" npri nsec::String
      in
        (printf "  %3d. %-*s %-*s => %-*s %.8f a=%s"
         pos imlen inam pmlen ostr
         pmlen nstr c moves,
         cmds)
  
      in
        (printf "  %3d. %-*s %-*s => %-*s %.8f a=%s"
         pos imlen inam pmlen ostr
         pmlen nstr c moves,
         cmds)
  
-formatCmds :: [[String]] -> String
-formatCmds cmd_strs =
-    unlines $ map ("  echo " ++) $
-    concat $ map (\(a, b) ->
-        (printf "step %d" (a::Int)):(map ("gnt-instance " ++) b)) $
-        zip [1..] cmd_strs
-
-{-| Converts a solution to string format -}
-printSolution :: InstanceList
-              -> NameList
-              -> NameList
+-- | Return the instance and involved nodes in an instance move.
+involvedNodes :: Instance.List -> Placement -> [Ndx]
+involvedNodes il plc =
+    let (i, np, ns, _, _) = plc
+        inst = Container.find i il
+        op = Instance.pNode inst
+        os = Instance.sNode inst
+    in nub [np, ns, op, os]
+
+-- | Inner function for splitJobs, that either appends the next job to
+-- the current jobset, or starts a new jobset.
+mergeJobs :: ([JobSet], [Ndx]) -> MoveJob -> ([JobSet], [Ndx])
+mergeJobs ([], _) n@(ndx, _, _, _) = ([[n]], ndx)
+mergeJobs (cjs@(j:js), nbuf) n@(ndx, _, _, _)
+    | null (ndx `intersect` nbuf) = ((n:j):js, ndx ++ nbuf)
+    | otherwise = ([n]:cjs, ndx)
+
+-- | Break a list of moves into independent groups. Note that this
+-- will reverse the order of jobs.
+splitJobs :: [MoveJob] -> [JobSet]
+splitJobs = fst . foldl mergeJobs ([], [])
+
+-- | Given a list of commands, prefix them with @gnt-instance@ and
+-- also beautify the display a little.
+formatJob :: Int -> Int -> (Int, MoveJob) -> [String]
+formatJob jsn jsl (sn, (_, _, _, cmds)) =
+    let out =
+            printf "  echo job %d/%d" jsn sn:
+            printf "  check":
+            map ("  gnt-instance " ++) cmds
+    in if sn == 1
+       then ["", printf "echo jobset %d, %d jobs" jsn jsl] ++ out
+       else out
+
+-- | Given a list of commands, prefix them with @gnt-instance@ and
+-- also beautify the display a little.
+formatCmds :: [JobSet] -> String
+formatCmds =
+    unlines .
+    concatMap (\(jsn, js) -> concatMap (formatJob jsn (length js))
+                             (zip [1..] js)) .
+    zip [1..]
+
+-- | Converts a solution to string format.
+printSolution :: Node.List
+              -> Instance.List
                -> [Placement]
                -> ([String], [[String]])
                -> [Placement]
                -> ([String], [[String]])
-printSolution il ktn kti sol =
+printSolution nl il sol =
      let
      let
-        mlen_fn = maximum . (map length) . snd . unzip
-        imlen = mlen_fn kti
-        nmlen = mlen_fn ktn
+        nmlen = Container.maxNameLen nl
+        imlen = Container.maxNameLen il
      in
      in
-      unzip $ map (uncurry $ printSolutionLine il ktn kti nmlen imlen) $
-            zip sol [1..]
+      unzip $ zipWith (printSolutionLine nl il nmlen imlen) sol [1..]
  
  -- | Print the node list.
  
  -- | Print the node list.
-printNodes :: NameList -> NodeList -> String
-printNodes ktn nl =
-    let snl = sortBy (compare `on` Node.idx) (Container.elems nl)
-        snl' = map (\ n -> ((fromJust $ lookup (Node.idx n) ktn), n)) snl
-        m_name = maximum . (map length) . fst . unzip $ snl'
-        helper = Node.list m_name
-        header = printf "%2s %-*s %5s %5s %5s %5s %5s %3s %3s %7s %7s"
-                 " F" m_name "Name" "t_mem" "f_mem" "r_mem"
-                 "t_dsk" "f_dsk"
-                 "pri" "sec" "p_fmem" "p_fdsk"
-    in unlines $ (header:map (uncurry helper) snl')
-
--- | Compute the mem and disk covariance.
-compDetailedCV :: NodeList -> (Double, Double, Double, Double, Double)
-compDetailedCV nl =
-    let
-        all_nodes = Container.elems nl
-        (offline, nodes) = partition Node.offline all_nodes
-        mem_l = map Node.p_mem nodes
-        dsk_l = map Node.p_dsk nodes
-        mem_cv = varianceCoeff mem_l
-        dsk_cv = varianceCoeff dsk_l
-        n1_l = length $ filter Node.failN1 nodes
-        n1_score = (fromIntegral n1_l) / (fromIntegral $ length nodes)
-        res_l = map Node.p_rem nodes
-        res_cv = varianceCoeff res_l
-        offline_inst = sum . map (\n -> (length . Node.plist $ n) +
-                                        (length . Node.slist $ n)) $ offline
-        online_inst = sum . map (\n -> (length . Node.plist $ n) +
-                                       (length . Node.slist $ n)) $ nodes
-        off_score = (fromIntegral offline_inst) /
-                    (fromIntegral $ online_inst + offline_inst)
-    in (mem_cv, dsk_cv, n1_score, res_cv, off_score)
-
--- | Compute the 'total' variance.
-compCV :: NodeList -> Double
-compCV nl =
-    let (mem_cv, dsk_cv, n1_score, res_cv, off_score) = compDetailedCV nl
-    in mem_cv + dsk_cv + n1_score + res_cv + off_score
-
-printStats :: NodeList -> String
+printNodes :: Node.List -> [String] -> String
+printNodes nl fs =
+    let fields = if null fs
+                 then Node.defaultFields
+                 else fs
+        snl = sortBy (compare `on` Node.idx) (Container.elems nl)
+        (header, isnum) = unzip $ map Node.showHeader fields
+    in unlines . map ((:) ' ' .  intercalate " ") $
+       formatTable (header:map (Node.list fields) snl) isnum
+
+-- | Print the instance list.
+printInsts :: Node.List -> Instance.List -> String
+printInsts nl il =
+    let sil = sortBy (compare `on` Instance.idx) (Container.elems il)
+        helper inst = [ if Instance.running inst then "R" else " "
+                      , Instance.name inst
+                      , Container.nameOf nl (Instance.pNode inst)
+                      , (let sdx = Instance.sNode inst
+                         in if sdx == Node.noSecondary
+                            then  ""
+                            else Container.nameOf nl sdx)
+                      , printf "%3d" $ Instance.vcpus inst
+                      , printf "%5d" $ Instance.mem inst
+                      , printf "%5d" $ Instance.dsk inst `div` 1024
+                      , printf "%5.3f" lC
+                      , printf "%5.3f" lM
+                      , printf "%5.3f" lD
+                      , printf "%5.3f" lN
+                      ]
+            where DynUtil lC lM lD lN = Instance.util inst
+        header = [ "F", "Name", "Pri_node", "Sec_node", "vcpu", "mem"
+                 , "dsk", "lCpu", "lMem", "lDsk", "lNet" ]
+        isnum = False:False:False:False:repeat True
+    in unlines . map ((:) ' ' . intercalate " ") $
+       formatTable (header:map helper sil) isnum
+
+-- | Shows statistics for a given node list.
+printStats :: Node.List -> String
  printStats nl =
  printStats nl =
-    let (mem_cv, dsk_cv, n1_score, res_cv, off_score) = compDetailedCV nl
-    in printf "f_mem=%.8f, r_mem=%.8f, f_dsk=%.8f, n1=%.3f, uf=%.3f"
-       mem_cv res_cv dsk_cv n1_score off_score
-
--- Balancing functions
-
--- Loading functions
-
-{- | Convert newline and delimiter-separated text.
-
-This function converts a text in tabular format as generated by
-@gnt-instance list@ and @gnt-node list@ to a list of objects using a
-supplied conversion function.
-
--}
-loadTabular :: String -> ([String] -> (String, a))
-            -> (a -> Int -> a) -> ([(String, Int)], [(Int, a)])
-loadTabular text_data convert_fn set_fn =
-    let lines_data = lines text_data
-        rows = map (sepSplit '|') lines_data
-        kerows = (map convert_fn rows)
-        idxrows = map (\ (idx, (k, v)) -> ((k, idx), (idx, set_fn v idx)))
-                  (zip [0..] kerows)
-    in unzip idxrows
-
--- | For each instance, add its index to its primary and secondary nodes
-fixNodes :: [(Int, Node.Node)]
-         -> [(Int, Instance.Instance)]
-         -> [(Int, Node.Node)]
-fixNodes nl il =
-    foldl' (\accu (idx, inst) ->
-                let
-                    assocEqual = (\ (i, _) (j, _) -> i == j)
-                    pdx = Instance.pnode inst
-                    sdx = Instance.snode inst
-                    pold = fromJust $ lookup pdx accu
-                    sold = fromJust $ lookup sdx accu
-                    pnew = Node.setPri pold idx
-                    snew = Node.setSec sold idx
-                    ac1 = deleteBy assocEqual (pdx, pold) accu
-                    ac2 = deleteBy assocEqual (sdx, sold) ac1
-                    ac3 = (pdx, pnew):(sdx, snew):ac2
-                in ac3) nl il
-
--- | Compute the longest common suffix of a NameList list that
--- | starts with a dot
-longestDomain :: NameList -> String
-longestDomain [] = ""
-longestDomain ((_,x):xs) =
-    let
-        onlyStrings = snd $ unzip xs
-    in
-      foldr (\ suffix accu -> if all (isSuffixOf suffix) onlyStrings
-                              then suffix
-                              else accu)
-      "" $ filter (isPrefixOf ".") (tails x)
-
--- | Remove tails from the (Int, String) lists
-stripSuffix :: String -> NameList -> NameList
-stripSuffix suffix lst =
-    let sflen = length suffix in
-    map (\ (key, name) -> (key, take ((length name) - sflen) name)) lst
-
-{-| Initializer function that loads the data from a node and list file
-    and massages it into the correct format. -}
-loadData :: String -- ^ Node data in text format
-         -> String -- ^ Instance data in text format
-         -> (Container.Container Node.Node,
-             Container.Container Instance.Instance,
-             String, NameList, NameList)
-loadData ndata idata =
-    let
-    {- node file: name t_mem f_mem t_disk f_disk -}
-        (ktn, nl) = loadTabular ndata
-                    (\ (name:tm:fm:td:fd:[]) ->
-                         (name,
-                          Node.create (read tm) (read fm) (read td) (read fd)))
-                    Node.setIdx
-    {- instance file: name mem disk pnode snode -}
-        (kti, il) = loadTabular idata
-                    (\ (name:mem:dsk:pnode:snode:[]) ->
-                         (name,
-                          Instance.create (read mem) (read dsk)
-                              (fromJust $ lookup pnode ktn)
-                              (fromJust $ lookup snode ktn)))
-                    Instance.setIdx
-        nl2 = fixNodes nl il
-        il3 = Container.fromAssocList il
-        nl3 = Container.fromAssocList
-             (map (\ (k, v) -> (k, Node.buildPeers v il3 (length nl2))) nl2)
-        xtn = swapPairs ktn
-        xti = swapPairs kti
-        common_suffix = longestDomain (xti ++ xtn)
-        stn = stripSuffix common_suffix xtn
-        sti = stripSuffix common_suffix xti
-    in
-      (nl3, il3, common_suffix, stn, sti)
+    let dcvs = compDetailedCV nl
+        hd = zip (detailedCVNames ++ repeat "unknown") dcvs
+        formatted = map (\(header, val) ->
+                             printf "%s=%.8f" header val::String) hd
+    in intercalate ", " formatted
+
+-- | Convert a placement into a list of OpCodes (basically a job).
+iMoveToJob :: String -> Node.List -> Instance.List
+          -> Idx -> IMove -> [OpCodes.OpCode]
+iMoveToJob csf nl il idx move =
+    let inst = Container.find idx il
+        iname = Instance.name inst ++ csf
+        lookNode n = Just (Container.nameOf nl n ++ csf)
+        opF = if Instance.running inst
+              then OpCodes.OpMigrateInstance iname True False
+              else OpCodes.OpFailoverInstance iname False
+        opR n = OpCodes.OpReplaceDisks iname (lookNode n)
+                OpCodes.ReplaceNewSecondary [] Nothing
+    in case move of
+         Failover -> [ opF ]
+         ReplacePrimary np -> [ opF, opR np, opF ]
+         ReplaceSecondary ns -> [ opR ns ]
+         ReplaceAndFailover np -> [ opR np, opF ]
+         FailoverAndReplace ns -> [ opF, opR ns ]