Cleanup AllocSolution after AllocElement changes

[ganeti-local] / Ganeti / HTools / Cluster.hs
diff --git a/Ganeti/HTools/Cluster.hs b/Ganeti/HTools/Cluster.hs

index 614b250..8ba4edc 100644 (file)
--- a/Ganeti/HTools/Cluster.hs
+++ b/Ganeti/HTools/Cluster.hs
@@ -7,7 +7,7 @@ goes into the "Main" module for the individual binaries.
  
  {-
  
-Copyright (C) 2009 Google Inc.
+Copyright (C) 2009, 2010 Google Inc.
  
  This program is free software; you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
@@ -39,7 +39,6 @@ module Ganeti.HTools.Cluster
      -- * First phase functions
      , computeBadItems
      -- * Second phase functions
-    , printSolution
      , printSolutionLine
      , formatCmds
      , involvedNodes
@@ -62,6 +61,9 @@ module Ganeti.HTools.Cluster
      -- * Allocation functions
      , iterateAlloc
      , tieredAlloc
+    , instanceGroup
+    , findSplitInstances
+    , splitCluster
      ) where
  
  import Data.List
@@ -79,7 +81,7 @@ import qualified Ganeti.OpCodes as OpCodes
  -- * Types
  
  -- | Allocation\/relocation solution.
-type AllocSolution = ([FailMode], Int, [(Score, Node.AllocElement)])
+type AllocSolution = ([FailMode], Int, [Node.AllocElement])
  
  -- | The complete state for the balancing solution
  data Table = Table Node.List Instance.List Score [Placement]
@@ -203,21 +205,24 @@ computeAllocationDelta cini cfin =
          runa = RSpec un_cpu (truncate t_mem - f_imem) (truncate t_dsk - f_idsk)
      in (rini, rfin, runa)
  
--- | The names of the individual elements in the CV list
-detailedCVNames :: [String]
-detailedCVNames = [ "free_mem_cv"
-                  , "free_disk_cv"
-                  , "n1_cnt"
-                  , "reserved_mem_cv"
-                  , "offline_all_cnt"
-                  , "offline_pri_cnt"
-                  , "vcpu_ratio_cv"
-                  , "cpu_load_cv"
-                  , "mem_load_cv"
-                  , "disk_load_cv"
-                  , "net_load_cv"
-                  , "pri_tags_score"
-                  ]
+-- | The names and weights of the individual elements in the CV list
+detailedCVInfo :: [(Double, String)]
+detailedCVInfo = [ (1,  "free_mem_cv")
+                 , (1,  "free_disk_cv")
+                 , (1,  "n1_cnt")
+                 , (1,  "reserved_mem_cv")
+                 , (4,  "offline_all_cnt")
+                 , (16, "offline_pri_cnt")
+                 , (1,  "vcpu_ratio_cv")
+                 , (1,  "cpu_load_cv")
+                 , (1,  "mem_load_cv")
+                 , (1,  "disk_load_cv")
+                 , (1,  "net_load_cv")
+                 , (2,  "pri_tags_score")
+                 ]
+
+detailedCVWeights :: [Double]
+detailedCVWeights = map fst detailedCVInfo
  
  -- | Compute the mem and disk covariance.
  compDetailedCV :: Node.List -> [Double]
@@ -231,9 +236,10 @@ compDetailedCV nl =
          mem_cv = varianceCoeff mem_l
          -- metric: disk covariance
          dsk_cv = varianceCoeff dsk_l
-        n1_l = length $ filter Node.failN1 nodes
-        -- metric: count of failN1 nodes
-        n1_score = fromIntegral n1_l::Double
+        -- metric: count of instances living on N1 failing nodes
+        n1_score = fromIntegral . sum . map (\n -> length (Node.sList n) +
+                                                   length (Node.pList n)) .
+                   filter Node.failN1 $ nodes :: Double
          res_l = map Node.pRem nodes
          -- metric: reserved memory covariance
          res_cv = varianceCoeff res_l
@@ -266,7 +272,7 @@ compDetailedCV nl =
  
  -- | Compute the /total/ variance.
  compCV :: Node.List -> Double
-compCV = sum . compDetailedCV
+compCV = sum . zipWith (*) detailedCVWeights . compDetailedCV
  
  -- | Compute online nodes from a Node.List
  getOnline :: Node.List -> [Node.Node]
@@ -290,8 +296,9 @@ applyMove nl inst Failover =
          old_s = Container.find old_sdx nl
          int_p = Node.removePri old_p inst
          int_s = Node.removeSec old_s inst
+        force_p = Node.offline old_p
          new_nl = do -- Maybe monad
-          new_p <- Node.addPri int_s inst
+          new_p <- Node.addPriEx force_p int_s inst
            new_s <- Node.addSec int_p inst old_sdx
            let new_inst = Instance.setBoth inst old_sdx old_pdx
            return (Container.addTwo old_pdx new_s old_sdx new_p nl,
@@ -307,13 +314,14 @@ applyMove nl inst (ReplacePrimary new_pdx) =
          tgt_n = Container.find new_pdx nl
          int_p = Node.removePri old_p inst
          int_s = Node.removeSec old_s inst
+        force_p = Node.offline old_p
          new_nl = do -- Maybe monad
            -- check that the current secondary can host the instance
            -- during the migration
-          tmp_s <- Node.addPri int_s inst
+          tmp_s <- Node.addPriEx force_p int_s inst
            let tmp_s' = Node.removePri tmp_s inst
-          new_p <- Node.addPri tgt_n inst
-          new_s <- Node.addSec tmp_s' inst new_pdx
+          new_p <- Node.addPriEx force_p tgt_n inst
+          new_s <- Node.addSecEx force_p tmp_s' inst new_pdx
            let new_inst = Instance.setPri inst new_pdx
            return (Container.add new_pdx new_p $
                    Container.addTwo old_pdx int_p old_sdx new_s nl,
@@ -327,8 +335,9 @@ applyMove nl inst (ReplaceSecondary new_sdx) =
          old_s = Container.find old_sdx nl
          tgt_n = Container.find new_sdx nl
          int_s = Node.removeSec old_s inst
+        force_s = Node.offline old_s
          new_inst = Instance.setSec inst new_sdx
-        new_nl = Node.addSec tgt_n inst old_pdx >>=
+        new_nl = Node.addSecEx force_s tgt_n inst old_pdx >>=
                   \new_s -> return (Container.addTwo new_sdx
                                     new_s old_sdx int_s nl,
                                     new_inst, old_pdx, new_sdx)
@@ -343,9 +352,10 @@ applyMove nl inst (ReplaceAndFailover new_pdx) =
          tgt_n = Container.find new_pdx nl
          int_p = Node.removePri old_p inst
          int_s = Node.removeSec old_s inst
+        force_s = Node.offline old_s
          new_nl = do -- Maybe monad
            new_p <- Node.addPri tgt_n inst
-          new_s <- Node.addSec int_p inst new_pdx
+          new_s <- Node.addSecEx force_s int_p inst new_pdx
            let new_inst = Instance.setBoth inst new_pdx old_pdx
            return (Container.add new_pdx new_p $
                    Container.addTwo old_pdx new_s old_sdx int_s nl,
@@ -361,9 +371,10 @@ applyMove nl inst (FailoverAndReplace new_sdx) =
          tgt_n = Container.find new_sdx nl
          int_p = Node.removePri old_p inst
          int_s = Node.removeSec old_s inst
+        force_p = Node.offline old_p
          new_nl = do -- Maybe monad
-          new_p <- Node.addPri int_s inst
-          new_s <- Node.addSec tgt_n inst old_sdx
+          new_p <- Node.addPriEx force_p int_s inst
+          new_s <- Node.addSecEx force_p tgt_n inst old_sdx
            let new_inst = Instance.setBoth inst old_sdx new_sdx
            return (Container.add new_sdx new_s $
                    Container.addTwo old_sdx new_p old_pdx int_p nl,
@@ -376,9 +387,10 @@ allocateOnSingle :: Node.List -> Instance.Instance -> Node.Node
  allocateOnSingle nl inst p =
      let new_pdx = Node.idx p
          new_inst = Instance.setBoth inst new_pdx Node.noSecondary
-        new_nl = Node.addPri p inst >>= \new_p ->
-                 return (Container.add new_pdx new_p nl, new_inst, [new_p])
-    in new_nl
+    in  Node.addPri p inst >>= \new_p -> do
+      let new_nl = Container.add new_pdx new_p nl
+          new_score = compCV nl
+      return (new_nl, new_inst, [new_p], new_score)
  
  -- | Tries to allocate an instance on a given pair of nodes.
  allocateOnPair :: Node.List -> Instance.Instance -> Node.Node -> Node.Node
@@ -386,13 +398,12 @@ allocateOnPair :: Node.List -> Instance.Instance -> Node.Node -> Node.Node
  allocateOnPair nl inst tgt_p tgt_s =
      let new_pdx = Node.idx tgt_p
          new_sdx = Node.idx tgt_s
-        new_nl = do -- Maybe monad
-          new_p <- Node.addPri tgt_p inst
-          new_s <- Node.addSec tgt_s inst new_pdx
-          let new_inst = Instance.setBoth inst new_pdx new_sdx
-          return (Container.addTwo new_pdx new_p new_sdx new_s nl, new_inst,
-                 [new_p, new_s])
-    in new_nl
+    in do
+      new_p <- Node.addPri tgt_p inst
+      new_s <- Node.addSec tgt_s inst new_pdx
+      let new_inst = Instance.setBoth inst new_pdx new_sdx
+          new_nl = Container.addTwo new_pdx new_p new_sdx new_s nl
+      return (new_nl, new_inst, [new_p, new_s], compCV new_nl)
  
  -- | Tries to perform an instance move and returns the best table
  -- between the original one and the new one.
@@ -476,23 +487,23 @@ checkMove nodes_idx disk_moves ini_tbl victims =
         else best_tbl
  
  -- | Check if we are allowed to go deeper in the balancing
-
-doNextBalance :: Table       -- ^ The starting table
-              -> Int         -- ^ Remaining length
-              -> Score       -- ^ Score at which to stop
-              -> Bool -- ^ The resulting table and commands
+doNextBalance :: Table     -- ^ The starting table
+              -> Int       -- ^ Remaining length
+              -> Score     -- ^ Score at which to stop
+              -> Bool      -- ^ The resulting table and commands
  doNextBalance ini_tbl max_rounds min_score =
      let Table _ _ ini_cv ini_plc = ini_tbl
          ini_plc_len = length ini_plc
      in (max_rounds < 0 || ini_plc_len < max_rounds) && ini_cv > min_score
  
  -- | Run a balance move
-
  tryBalance :: Table       -- ^ The starting table
             -> Bool        -- ^ Allow disk moves
             -> Bool        -- ^ Only evacuate moves
+           -> Score       -- ^ Min gain threshold
+           -> Score       -- ^ Min gain
             -> Maybe Table -- ^ The resulting table and commands
-tryBalance ini_tbl disk_moves evac_mode =
+tryBalance ini_tbl disk_moves evac_mode mg_limit min_gain =
      let Table ini_nl ini_il ini_cv _ = ini_tbl
          all_inst = Container.elems ini_il
          all_inst' = if evac_mode
@@ -508,7 +519,7 @@ tryBalance ini_tbl disk_moves evac_mode =
          fin_tbl = checkMove node_idx disk_moves ini_tbl reloc_inst
          (Table _ _ fin_cv _) = fin_tbl
      in
-      if fin_cv < ini_cv
+      if fin_cv < ini_cv && (ini_cv > mg_limit || ini_cv - fin_cv >= min_gain)
        then Just fin_tbl -- this round made success, return the new table
        else Nothing
  
@@ -524,19 +535,18 @@ collapseFailures flst =
  concatAllocs :: AllocSolution -> OpResult Node.AllocElement -> AllocSolution
  concatAllocs (flst, cntok, sols) (OpFail reason) = (reason:flst, cntok, sols)
  
-concatAllocs (flst, cntok, osols) (OpGood ns@(nl, _, _)) =
-    let nscore = compCV nl
-        -- Choose the old or new solution, based on the cluster score
+concatAllocs (flst, cntok, osols) (OpGood ns@(_, _, _, nscore)) =
+    let -- Choose the old or new solution, based on the cluster score
          nsols = case osols of
-                  [] -> [(nscore, ns)]
-                  (oscore, _):[] ->
+                  [] -> [ns]
+                  (_, _, _, oscore):[] ->
                        if oscore < nscore
                        then osols
-                      else [(nscore, ns)]
+                      else [ns]
                    -- FIXME: here we simply concat to lists with more
                    -- than one element; we should instead abort, since
                    -- this is not a valid usage of this function
-                  xs -> (nscore, ns):xs
+                  xs -> ns:xs
          nsuc = cntok + 1
      -- Note: we force evaluation of nsols here in order to keep the
      -- memory profile low - we know that we will need nsols for sure
@@ -590,7 +600,8 @@ tryReloc nl il xid 1 ex_idx =
                              let em = do
                                    (mnl, i, _, _) <-
                                        applyMove nl inst (ReplaceSecondary x)
-                                  return (mnl, i, [Container.find x mnl])
+                                  return (mnl, i, [Container.find x mnl],
+                                          compCV mnl)
                              in concatAllocs cstate em
                         ) ([], 0, []) valid_idxes
      in return sols1
@@ -599,7 +610,7 @@ tryReloc _ _ _ reqn _  = fail $ "Unsupported number of relocation \
                                  \destinations required (" ++ show reqn ++
                                                    "), only one supported"
  
--- | Try to allocate an instance on the cluster.
+-- | Try to evacuate a list of nodes.
  tryEvac :: (Monad m) =>
              Node.List       -- ^ The node list
           -> Instance.List   -- ^ The instance list
@@ -613,10 +624,10 @@ tryEvac nl il ex_ndx =
                             -- FIXME: hardcoded one node here
                             (fm, cs, aes) <- tryReloc nl' il idx 1 ex_ndx
                             case aes of
-                             csol@(_, (nl'', _, _)):_ ->
+                             csol@(nl'', _, _, _):_ ->
                                   return (nl'', (fm, cs, csol:rsols))
                               _ -> fail $ "Can't evacuate instance " ++
-                                  show idx
+                                  Instance.name (Container.find idx il)
                          ) (nl, ([], 0, [])) all_insts
        return sol
  
@@ -626,7 +637,8 @@ iterateAlloc :: Node.List
               -> Instance.Instance
               -> Int
               -> [Instance.Instance]
-             -> Result (FailStats, Node.List, [Instance.Instance])
+             -> Result (FailStats, Node.List, Instance.List,
+                        [Instance.Instance])
  iterateAlloc nl il newinst nreq ixes =
        let depth = length ixes
            newname = printf "new-%d" depth::String
@@ -636,9 +648,10 @@ iterateAlloc nl il newinst nreq ixes =
             Bad s -> Bad s
             Ok (errs, _, sols3) ->
                 case sols3 of
-                 [] -> Ok (collapseFailures errs, nl, ixes)
-                 (_, (xnl, xi, _)):[] ->
-                     iterateAlloc xnl il newinst nreq $! (xi:ixes)
+                 [] -> Ok (collapseFailures errs, nl, il, ixes)
+                 (xnl, xi, _, _):[] ->
+                     iterateAlloc xnl (Container.add newidx xi il)
+                                  newinst nreq $! (xi:ixes)
                   _ -> Bad "Internal error: multiple solutions for single\
                            \ allocation"
  
@@ -647,16 +660,17 @@ tieredAlloc :: Node.List
              -> Instance.Instance
              -> Int
              -> [Instance.Instance]
-            -> Result (FailStats, Node.List, [Instance.Instance])
+            -> Result (FailStats, Node.List, Instance.List,
+                       [Instance.Instance])
  tieredAlloc nl il newinst nreq ixes =
      case iterateAlloc nl il newinst nreq ixes of
        Bad s -> Bad s
-      Ok (errs, nl', ixes') ->
+      Ok (errs, nl', il', ixes') ->
            case Instance.shrinkByType newinst . fst . last $
                 sortBy (comparing snd) errs of
-            Bad _ -> Ok (errs, nl', ixes')
+            Bad _ -> Ok (errs, nl', il', ixes')
              Ok newinst' ->
-                tieredAlloc nl' il newinst' nreq ixes'
+                tieredAlloc nl' il' newinst' nreq ixes'
  
  -- * Formatting functions
  
@@ -696,11 +710,11 @@ printSolutionLine nl il nmlen imlen plc pos =
          pmlen = (2*nmlen + 1)
          (i, p, s, mv, c) = plc
          inst = Container.find i il
-        inam = Instance.name inst
-        npri = Container.nameOf nl p
-        nsec = Container.nameOf nl s
-        opri = Container.nameOf nl $ Instance.pNode inst
-        osec = Container.nameOf nl $ Instance.sNode inst
+        inam = Instance.alias inst
+        npri = Node.alias $ Container.find p nl
+        nsec = Node.alias $ Container.find s nl
+        opri = Node.alias $ Container.find (Instance.pNode inst) nl
+        osec = Node.alias $ Container.find (Instance.sNode inst) nl
          (moves, cmds) =  computeMoves inst inam mv npri nsec
          ostr = printf "%s:%s" opri osec::String
          nstr = printf "%s:%s" npri nsec::String
@@ -753,24 +767,13 @@ formatCmds =
                               (zip [1..] js)) .
      zip [1..]
  
--- | Converts a solution to string format.
-printSolution :: Node.List
-              -> Instance.List
-              -> [Placement]
-              -> ([String], [[String]])
-printSolution nl il sol =
-    let
-        nmlen = Container.maxNameLen nl
-        imlen = Container.maxNameLen il
-    in
-      unzip $ zipWith (printSolutionLine nl il nmlen imlen) sol [1..]
-
  -- | Print the node list.
  printNodes :: Node.List -> [String] -> String
  printNodes nl fs =
-    let fields = if null fs
-                 then Node.defaultFields
-                 else fs
+    let fields = case fs of
+          [] -> Node.defaultFields
+          "+":rest -> Node.defaultFields ++ rest
+          _ -> fs
          snl = sortBy (comparing Node.idx) (Container.elems nl)
          (header, isnum) = unzip $ map Node.showHeader fields
      in unlines . map ((:) ' ' .  intercalate " ") $
@@ -806,9 +809,10 @@ printInsts nl il =
  printStats :: Node.List -> String
  printStats nl =
      let dcvs = compDetailedCV nl
-        hd = zip (detailedCVNames ++ repeat "unknown") dcvs
-        formatted = map (\(header, val) ->
-                             printf "%s=%.8f" header val::String) hd
+        (weights, names) = unzip detailedCVInfo
+        hd = zip3 (weights ++ repeat 1) (names ++ repeat "unknown") dcvs
+        formatted = map (\(w, header, val) ->
+                             printf "%s=%.8f(x%.2f)" header val w::String) hd
      in intercalate ", " formatted
  
  -- | Convert a placement into a list of OpCodes (basically a job).
@@ -829,3 +833,35 @@ iMoveToJob nl il idx move =
           ReplaceSecondary ns -> [ opR ns ]
           ReplaceAndFailover np -> [ opR np, opF ]
           FailoverAndReplace ns -> [ opF, opR ns ]
+
+-- | Computes the group of an instance
+instanceGroup :: Node.List -> Instance.Instance -> Result GroupID
+instanceGroup nl i =
+  let sidx = Instance.sNode i
+      pnode = Container.find (Instance.pNode i) nl
+      snode = if sidx == Node.noSecondary
+              then pnode
+              else Container.find sidx nl
+      puuid = Node.group pnode
+      suuid = Node.group snode
+  in if puuid /= suuid
+     then fail ("Instance placed accross two node groups, primary " ++ puuid ++
+                ", secondary " ++ suuid)
+     else return puuid
+
+-- | Compute the list of badly allocated instances (split across node
+-- groups)
+findSplitInstances :: Node.List -> Instance.List -> [Instance.Instance]
+findSplitInstances nl il =
+  filter (not . isOk . instanceGroup nl) (Container.elems il)
+
+-- | Splits a cluster into the component node groups
+splitCluster :: Node.List -> Instance.List ->
+                [(GroupID, (Node.List, Instance.List))]
+splitCluster nl il =
+  let ngroups = Node.computeGroups (Container.elems nl)
+  in map (\(guuid, nodes) ->
+           let nidxs = map Node.idx nodes
+               nodes' = zip nidxs nodes
+               instances = Container.filter ((`elem` nidxs) . Instance.pNode) il
+           in (guuid, (Container.fromAssocList nodes', instances))) ngroups