{-
-Copyright (C) 2009 Google Inc.
+Copyright (C) 2009, 2010 Google Inc.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
-- ** Finalization after data loading
, buildPeers
, setIdx
- , setName
+ , setAlias
, setOffline
, setXmem
, setFmem
, setSec
, setMdsk
, setMcpu
+ -- * Tag maps
+ , addTags
+ , delTags
+ , rejectAddTags
-- * Instance (re)location
, removePri
, removeSec
, addPri
+ , addPriEx
, addSec
+ , addSecEx
-- * Stats
, availDisk
+ , availMem
+ , availCpu
+ , conflictingPrimaries
-- * Formatting
+ , defaultFields
+ , showHeader
+ , showField
, list
-- * Misc stuff
, AssocList
+ , AllocElement
, noSecondary
+ , computeGroups
) where
-import Data.List
+import Data.List hiding (group)
+import qualified Data.Map as Map
+import qualified Data.Foldable as Foldable
+import Data.Ord (comparing)
import Text.Printf (printf)
import qualified Ganeti.HTools.Container as Container
import qualified Ganeti.HTools.Instance as Instance
-import qualified Ganeti.HTools.PeerMap as PeerMap
+import qualified Ganeti.HTools.PeerMap as P
import qualified Ganeti.HTools.Types as T
-- * Type declarations
+-- | The tag map type
+type TagMap = Map.Map String Int
+
-- | The node type.
-data Node = Node { name :: String -- ^ The node name
- , tMem :: Double -- ^ Total memory (MiB)
- , nMem :: Int -- ^ Node memory (MiB)
- , fMem :: Int -- ^ Free memory (MiB)
- , xMem :: Int -- ^ Unaccounted memory (MiB)
- , tDsk :: Double -- ^ Total disk space (MiB)
- , fDsk :: Int -- ^ Free disk space (MiB)
- , tCpu :: Double -- ^ Total CPU count
- , uCpu :: Int -- ^ Used VCPU count
- , pList :: [T.Idx]-- ^ List of primary instance indices
- , sList :: [T.Idx]-- ^ List of secondary instance indices
- , idx :: T.Ndx -- ^ Internal index for book-keeping
- , peers :: PeerMap.PeerMap -- ^ Pnode to instance mapping
- , failN1 :: Bool -- ^ Whether the node has failed n1
- , rMem :: Int -- ^ Maximum memory needed for
- -- failover by primaries of this node
- , pMem :: Double -- ^ Percent of free memory
- , pDsk :: Double -- ^ Percent of free disk
- , pRem :: Double -- ^ Percent of reserved memory
- , pCpu :: Double -- ^ Ratio of virtual to physical CPUs
- , mDsk :: Double -- ^ Minimum free disk ratio
- , mCpu :: Double -- ^ Max ratio of virt-to-phys CPUs
- , loDsk :: Int -- ^ Autocomputed from mDsk low disk
- -- threshold
- , hiCpu :: Int -- ^ Autocomputed from mCpu high cpu
- -- threshold
- , offline :: Bool -- ^ Whether the node should not be used
- -- for allocations and skipped from
- -- score computations
- , utilPool :: T.DynUtil -- ^ Total utilisation capacity
- , utilLoad :: T.DynUtil -- ^ Sum of instance utilisation
- } deriving (Show)
+data Node = Node
+ { name :: String -- ^ The node name
+ , alias :: String -- ^ The shortened name (for display purposes)
+ , tMem :: Double -- ^ Total memory (MiB)
+ , nMem :: Int -- ^ Node memory (MiB)
+ , fMem :: Int -- ^ Free memory (MiB)
+ , xMem :: Int -- ^ Unaccounted memory (MiB)
+ , tDsk :: Double -- ^ Total disk space (MiB)
+ , fDsk :: Int -- ^ Free disk space (MiB)
+ , tCpu :: Double -- ^ Total CPU count
+ , uCpu :: Int -- ^ Used VCPU count
+ , pList :: [T.Idx] -- ^ List of primary instance indices
+ , sList :: [T.Idx] -- ^ List of secondary instance indices
+ , idx :: T.Ndx -- ^ Internal index for book-keeping
+ , peers :: P.PeerMap -- ^ Pnode to instance mapping
+ , failN1 :: Bool -- ^ Whether the node has failed n1
+ , rMem :: Int -- ^ Maximum memory needed for failover by
+ -- primaries of this node
+ , pMem :: Double -- ^ Percent of free memory
+ , pDsk :: Double -- ^ Percent of free disk
+ , pRem :: Double -- ^ Percent of reserved memory
+ , pCpu :: Double -- ^ Ratio of virtual to physical CPUs
+ , mDsk :: Double -- ^ Minimum free disk ratio
+ , mCpu :: Double -- ^ Max ratio of virt-to-phys CPUs
+ , loDsk :: Int -- ^ Autocomputed from mDsk low disk
+ -- threshold
+ , hiCpu :: Int -- ^ Autocomputed from mCpu high cpu
+ -- threshold
+ , offline :: Bool -- ^ Whether the node should not be used
+ -- for allocations and skipped from score
+ -- computations
+ , utilPool :: T.DynUtil -- ^ Total utilisation capacity
+ , utilLoad :: T.DynUtil -- ^ Sum of instance utilisation
+ , pTags :: TagMap -- ^ Map of primary instance tags and their count
+ , group :: T.Gdx -- ^ The node's group (index)
+ } deriving (Show, Eq)
instance T.Element Node where
nameOf = name
idxOf = idx
- setName = setName
+ setAlias = setAlias
setIdx = setIdx
+ allNames n = [name n, alias n]
-- | A simple name for the int, node association list.
type AssocList = [(T.Ndx, Node)]
-- | A simple name for a node map.
type List = Container.Container Node
+-- | A simple name for an allocation element (here just for logistic
+-- reasons)
+type AllocElement = (List, Instance.Instance, [Node], T.Score)
+
-- | Constant node index for a non-moveable instance.
noSecondary :: T.Ndx
noSecondary = -1
--- | No limit value
-noLimit :: Double
-noLimit = -1
+-- * Helper functions
+
+-- | Add a tag to a tagmap
+addTag :: TagMap -> String -> TagMap
+addTag t s = Map.insertWith (+) s 1 t
+
+-- | Add multiple tags
+addTags :: TagMap -> [String] -> TagMap
+addTags = foldl' addTag
+
+-- | Adjust or delete a tag from a tagmap
+delTag :: TagMap -> String -> TagMap
+delTag t s = Map.update (\v -> if v > 1
+ then Just (v-1)
+ else Nothing)
+ s t
+
+-- | Remove multiple tags
+delTags :: TagMap -> [String] -> TagMap
+delTags = foldl' delTag
--- | No limit int value
-noLimitInt :: Int
-noLimitInt = -1
+-- | Check if we can add a list of tags to a tagmap
+rejectAddTags :: TagMap -> [String] -> Bool
+rejectAddTags t = any (`Map.member` t)
+
+-- | Check how many primary instances have conflicting tags. The
+-- algorithm to compute this is to sum the count of all tags, then
+-- subtract the size of the tag map (since each tag has at least one,
+-- non-conflicting instance); this is equivalent to summing the
+-- values in the tag map minus one.
+conflictingPrimaries :: Node -> Int
+conflictingPrimaries (Node { pTags = t }) = Foldable.sum t - Map.size t
-- * Initialization functions
-- The index and the peers maps are empty, and will be need to be
-- update later via the 'setIdx' and 'buildPeers' functions.
create :: String -> Double -> Int -> Int -> Double
- -> Int -> Double -> Bool -> Node
+ -> Int -> Double -> Bool -> T.Gdx -> Node
create name_init mem_t_init mem_n_init mem_f_init
- dsk_t_init dsk_f_init cpu_t_init offline_init =
- Node { name = name_init
+ dsk_t_init dsk_f_init cpu_t_init offline_init group_init =
+ Node { name = name_init
+ , alias = name_init
, tMem = mem_t_init
, nMem = mem_n_init
, fMem = mem_f_init
, sList = []
, failN1 = True
, idx = -1
- , peers = PeerMap.empty
+ , peers = P.empty
, rMem = 0
, pMem = fromIntegral mem_f_init / mem_t_init
, pDsk = fromIntegral dsk_f_init / dsk_t_init
, pCpu = 0
, offline = offline_init
, xMem = 0
- , mDsk = noLimit
- , mCpu = noLimit
- , loDsk = noLimitInt
- , hiCpu = noLimitInt
+ , mDsk = T.defReservedDiskRatio
+ , mCpu = T.defVcpuRatio
+ , loDsk = mDskToloDsk T.defReservedDiskRatio dsk_t_init
+ , hiCpu = mCpuTohiCpu T.defVcpuRatio cpu_t_init
, utilPool = T.baseUtil
, utilLoad = T.zeroUtil
+ , pTags = Map.empty
+ , group = group_init
}
+-- | Conversion formula from mDsk\/tDsk to loDsk
+mDskToloDsk :: Double -> Double -> Int
+mDskToloDsk mval tdsk = floor (mval * tdsk)
+
+-- | Conversion formula from mCpu\/tCpu to hiCpu
+mCpuTohiCpu :: Double -> Double -> Int
+mCpuTohiCpu mval tcpu = floor (mval * tcpu)
+
-- | Changes the index.
--
-- This is used only during the building of the data structures.
setIdx :: Node -> T.Ndx -> Node
setIdx t i = t {idx = i}
--- | Changes the name.
+-- | Changes the alias.
--
-- This is used only during the building of the data structures.
-setName :: Node -> String -> Node
-setName t s = t {name = s}
+setAlias :: Node -> String -> Node
+setAlias t s = t { alias = s }
-- | Sets the offline attribute.
setOffline :: Node -> Bool -> Node
-- | Sets the max disk usage ratio
setMdsk :: Node -> Double -> Node
-setMdsk t val = t { mDsk = val,
- loDsk = if val == noLimit
- then noLimitInt
- else floor (val * tDsk t) }
+setMdsk t val = t { mDsk = val, loDsk = mDskToloDsk val (tDsk t) }
-- | Sets the max cpu usage ratio
setMcpu :: Node -> Double -> Node
-setMcpu t val = t { mCpu = val, hiCpu = floor (val * tCpu t) }
+setMcpu t val = t { mCpu = val, hiCpu = mCpuTohiCpu val (tCpu t) }
-- | Computes the maximum reserved memory for peers from a peer map.
-computeMaxRes :: PeerMap.PeerMap -> PeerMap.Elem
-computeMaxRes = PeerMap.maxElem
+computeMaxRes :: P.PeerMap -> P.Elem
+computeMaxRes = P.maxElem
-- | Builds the peer map for a given node.
buildPeers :: Node -> Instance.List -> Node
(\i_idx -> let inst = Container.find i_idx il
in (Instance.pNode inst, Instance.mem inst))
(sList t)
- pmap = PeerMap.accumArray (+) mdata
+ pmap = P.accumArray (+) mdata
new_rmem = computeMaxRes pmap
new_failN1 = fMem t <= new_rmem
new_prem = fromIntegral new_rmem / tMem t
in t {peers=pmap, failN1 = new_failN1, rMem = new_rmem, pRem = new_prem}
-- | Assigns an instance to a node as primary and update the used VCPU
--- count and utilisation data.
+-- count, utilisation data and tags map.
setPri :: Node -> Instance.Instance -> Node
setPri t inst = t { pList = Instance.idx inst:pList t
, uCpu = new_count
, pCpu = fromIntegral new_count / tCpu t
, utilLoad = utilLoad t `T.addUtil` Instance.util inst
+ , pTags = addTags (pTags t) (Instance.tags inst)
}
where new_count = uCpu t + Instance.vcpus inst
new_load = utilLoad t `T.subUtil` Instance.util inst
in t { pList = new_plist, fMem = new_mem, fDsk = new_dsk
, failN1 = new_failn1, pMem = new_mp, pDsk = new_dp
- , uCpu = new_ucpu, pCpu = new_rcpu, utilLoad = new_load }
+ , uCpu = new_ucpu, pCpu = new_rcpu, utilLoad = new_load
+ , pTags = delTags (pTags t) (Instance.tags inst) }
-- | Removes a secondary instance.
removeSec :: Node -> Instance.Instance -> Node
new_slist = delete iname (sList t)
new_dsk = fDsk t + Instance.dsk inst
old_peers = peers t
- old_peem = PeerMap.find pnode old_peers
+ old_peem = P.find pnode old_peers
new_peem = old_peem - Instance.mem inst
- new_peers = PeerMap.add pnode new_peem old_peers
+ new_peers = if new_peem > 0
+ then P.add pnode new_peem old_peers
+ else P.remove pnode old_peers
old_rmem = rMem t
new_rmem = if old_peem < old_rmem
then old_rmem
, failN1 = new_failn1, rMem = new_rmem, pDsk = new_dp
, pRem = new_prem, utilLoad = new_load }
--- | Adds a primary instance.
+-- | Adds a primary instance (basic version).
addPri :: Node -> Instance.Instance -> T.OpResult Node
-addPri t inst =
+addPri = addPriEx False
+
+-- | Adds a primary instance (extended version).
+addPriEx :: Bool -- ^ Whether to override the N+1 and
+ -- other /soft/ checks, useful if we
+ -- come from a worse status
+ -- (e.g. offline)
+ -> Node -- ^ The target node
+ -> Instance.Instance -- ^ The instance to add
+ -> T.OpResult Node -- ^ The result of the operation,
+ -- either the new version of the node
+ -- or a failure mode
+addPriEx force t inst =
let iname = Instance.idx inst
new_mem = fMem t - Instance.mem inst
new_dsk = fDsk t - Instance.dsk inst
new_dp = fromIntegral new_dsk / tDsk t
l_cpu = mCpu t
new_load = utilLoad t `T.addUtil` Instance.util inst
- in if new_mem <= 0 then T.OpFail T.FailMem
- else if new_dsk <= 0 || mDsk t > new_dp then T.OpFail T.FailDisk
- else if new_failn1 && not (failN1 t) then T.OpFail T.FailMem
- else if l_cpu >= 0 && l_cpu < new_pcpu then T.OpFail T.FailCPU
- else
- let new_plist = iname:pList t
- new_mp = fromIntegral new_mem / tMem t
- r = t { pList = new_plist, fMem = new_mem, fDsk = new_dsk
- , failN1 = new_failn1, pMem = new_mp, pDsk = new_dp
- , uCpu = new_ucpu, pCpu = new_pcpu, utilLoad = new_load }
- in T.OpGood r
-
--- | Adds a secondary instance.
+ inst_tags = Instance.tags inst
+ old_tags = pTags t
+ strict = not force
+ in case () of
+ _ | new_mem <= 0 -> T.OpFail T.FailMem
+ | new_dsk <= 0 -> T.OpFail T.FailDisk
+ | mDsk t > new_dp && strict -> T.OpFail T.FailDisk
+ | new_failn1 && not (failN1 t) && strict -> T.OpFail T.FailMem
+ | l_cpu >= 0 && l_cpu < new_pcpu && strict -> T.OpFail T.FailCPU
+ | rejectAddTags old_tags inst_tags -> T.OpFail T.FailTags
+ | otherwise ->
+ let new_plist = iname:pList t
+ new_mp = fromIntegral new_mem / tMem t
+ r = t { pList = new_plist, fMem = new_mem, fDsk = new_dsk
+ , failN1 = new_failn1, pMem = new_mp, pDsk = new_dp
+ , uCpu = new_ucpu, pCpu = new_pcpu
+ , utilLoad = new_load
+ , pTags = addTags old_tags inst_tags }
+ in T.OpGood r
+
+-- | Adds a secondary instance (basic version).
addSec :: Node -> Instance.Instance -> T.Ndx -> T.OpResult Node
-addSec t inst pdx =
+addSec = addSecEx False
+
+-- | Adds a secondary instance (extended version).
+addSecEx :: Bool -> Node -> Instance.Instance -> T.Ndx -> T.OpResult Node
+addSecEx force t inst pdx =
let iname = Instance.idx inst
old_peers = peers t
old_mem = fMem t
new_dsk = fDsk t - Instance.dsk inst
- new_peem = PeerMap.find pdx old_peers + Instance.mem inst
- new_peers = PeerMap.add pdx new_peem old_peers
+ new_peem = P.find pdx old_peers + Instance.mem inst
+ new_peers = P.add pdx new_peem old_peers
new_rmem = max (rMem t) new_peem
new_prem = fromIntegral new_rmem / tMem t
new_failn1 = old_mem <= new_rmem
old_load = utilLoad t
new_load = old_load { T.dskWeight = T.dskWeight old_load +
T.dskWeight (Instance.util inst) }
- in if new_dsk <= 0 || mDsk t > new_dp then T.OpFail T.FailDisk
- else if new_failn1 && not (failN1 t) then T.OpFail T.FailMem
- else let new_slist = iname:sList t
- r = t { sList = new_slist, fDsk = new_dsk
- , peers = new_peers, failN1 = new_failn1
- , rMem = new_rmem, pDsk = new_dp
- , pRem = new_prem, utilLoad = new_load }
- in T.OpGood r
+ strict = not force
+ in case () of
+ _ | new_dsk <= 0 -> T.OpFail T.FailDisk
+ | mDsk t > new_dp && strict -> T.OpFail T.FailDisk
+ | Instance.mem inst >= old_mem && strict -> T.OpFail T.FailMem
+ | new_failn1 && not (failN1 t) && strict -> T.OpFail T.FailMem
+ | otherwise ->
+ let new_slist = iname:sList t
+ r = t { sList = new_slist, fDsk = new_dsk
+ , peers = new_peers, failN1 = new_failn1
+ , rMem = new_rmem, pDsk = new_dp
+ , pRem = new_prem, utilLoad = new_load }
+ in T.OpGood r
-- * Stats functions
availDisk t =
let _f = fDsk t
_l = loDsk t
- in
- if _l == noLimitInt
- then _f
- else if _f < _l
- then 0
- else _f - _l
+ in if _f < _l
+ then 0
+ else _f - _l
+
+-- | Computes the amount of available memory on a given node
+availMem :: Node -> Int
+availMem t =
+ let _f = fMem t
+ _l = rMem t
+ in if _f < _l
+ then 0
+ else _f - _l
+
+-- | Computes the amount of available memory on a given node
+availCpu :: Node -> Int
+availCpu t =
+ let _u = uCpu t
+ _l = hiCpu t
+ in if _l >= _u
+ then _l - _u
+ else 0
-- * Display functions
showField :: Node -> String -> String
showField t field =
case field of
- "name" -> name t
+ "idx" -> printf "%4d" $ idx t
+ "name" -> alias t
+ "fqdn" -> name t
"status" -> if offline t then "-"
else if failN1 t then "*" else " "
"tmem" -> printf "%5.0f" $ tMem t
"fmem" -> printf "%5d" $ fMem t
"imem" -> printf "%5d" imem
"rmem" -> printf "%5d" $ rMem t
+ "amem" -> printf "%5d" $ fMem t - rMem t
"tdsk" -> printf "%5.0f" $ tDsk t / 1024
"fdsk" -> printf "%5d" $ fDsk t `div` 1024
"tcpu" -> printf "%4.0f" $ tCpu t
"ucpu" -> printf "%4d" $ uCpu t
- "plist" -> printf "%3d" $ length (pList t)
- "slist" -> printf "%3d" $ length (sList t)
+ "pcnt" -> printf "%3d" $ length (pList t)
+ "scnt" -> printf "%3d" $ length (sList t)
+ "plist" -> show $ pList t
+ "slist" -> show $ sList t
"pfmem" -> printf "%6.4f" $ pMem t
"pfdsk" -> printf "%6.4f" $ pDsk t
"rcpu" -> printf "%5.2f" $ pCpu t
"mload" -> printf "%5.3f" uM
"dload" -> printf "%5.3f" uD
"nload" -> printf "%5.3f" uN
- _ -> printf "<unknown field>"
+ "ptags" -> intercalate "," . map (\(k, v) -> printf "%s=%d" k v) .
+ Map.toList $ pTags t
+ "peermap" -> show $ peers t
+ _ -> T.unknownField
where
T.DynUtil { T.cpuWeight = uC, T.memWeight = uM,
T.dskWeight = uD, T.netWeight = uN } = utilLoad t
imem = truncate (tMem t) - nMem t - xMem t - fMem t
+-- | Returns the header and numeric propery of a field
+showHeader :: String -> (String, Bool)
+showHeader field =
+ case field of
+ "idx" -> ("Index", True)
+ "name" -> ("Name", False)
+ "fqdn" -> ("Name", False)
+ "status" -> ("F", False)
+ "tmem" -> ("t_mem", True)
+ "nmem" -> ("n_mem", True)
+ "xmem" -> ("x_mem", True)
+ "fmem" -> ("f_mem", True)
+ "imem" -> ("i_mem", True)
+ "rmem" -> ("r_mem", True)
+ "amem" -> ("a_mem", True)
+ "tdsk" -> ("t_dsk", True)
+ "fdsk" -> ("f_dsk", True)
+ "tcpu" -> ("pcpu", True)
+ "ucpu" -> ("vcpu", True)
+ "pcnt" -> ("pcnt", True)
+ "scnt" -> ("scnt", True)
+ "plist" -> ("primaries", True)
+ "slist" -> ("secondaries", True)
+ "pfmem" -> ("p_fmem", True)
+ "pfdsk" -> ("p_fdsk", True)
+ "rcpu" -> ("r_cpu", True)
+ "cload" -> ("lCpu", True)
+ "mload" -> ("lMem", True)
+ "dload" -> ("lDsk", True)
+ "nload" -> ("lNet", True)
+ "ptags" -> ("PrimaryTags", False)
+ "peermap" -> ("PeerMap", False)
+ -- TODO: add node fields (group.uuid, group)
+ _ -> (T.unknownField, False)
-- | String converter for the node list functionality.
-list :: Node -> [String]
-list t = map (showField t)
- [ "status", "name", "tmem", "nmem", "imem", "xmem", "fmem"
- , "rmem", "tdsk", "fdsk", "tcpu", "ucpu", "plist", "slist"
- , "pfmem", "pfdsk", "rcpu"
- , "cload", "mload", "dload", "nload" ]
+list :: [String] -> Node -> [String]
+list fields t = map (showField t) fields
+
+
+defaultFields :: [String]
+defaultFields =
+ [ "status", "name", "tmem", "nmem", "imem", "xmem", "fmem"
+ , "rmem", "tdsk", "fdsk", "tcpu", "ucpu", "pcnt", "scnt"
+ , "pfmem", "pfdsk", "rcpu"
+ , "cload", "mload", "dload", "nload" ]
+
+-- | Split a list of nodes into a list of (node group UUID, list of
+-- associated nodes)
+computeGroups :: [Node] -> [(T.Gdx, [Node])]
+computeGroups nodes =
+ let nodes' = sortBy (comparing group) nodes
+ nodes'' = groupBy (\a b -> group a == group b) nodes'
+ in map (\nl -> (group (head nl), nl)) nodes''