1 {-| Module describing a node.
3 All updates are functional (copy-based) and return a new node with
9 Copyright (C) 2009, 2010, 2011, 2012 Google Inc.
11 This program is free software; you can redistribute it and/or modify
12 it under the terms of the GNU General Public License as published by
13 the Free Software Foundation; either version 2 of the License, or
14 (at your option) any later version.
16 This program is distributed in the hope that it will be useful, but
17 WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 General Public License for more details.
21 You should have received a copy of the GNU General Public License
22 along with this program; if not, write to the Free Software
23 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
28 module Ganeti.HTools.Node
33 -- ** Finalization after data loading
49 -- * Instance (re)location
62 , conflictingPrimaries
75 import Data.List hiding (group)
76 import qualified Data.Map as Map
77 import qualified Data.Foldable as Foldable
78 import Data.Ord (comparing)
79 import Text.Printf (printf)
81 import qualified Ganeti.HTools.Container as Container
82 import qualified Ganeti.HTools.Instance as Instance
83 import qualified Ganeti.HTools.PeerMap as P
85 import Ganeti.BasicTypes
86 import qualified Ganeti.HTools.Types as T
88 -- * Type declarations
90 -- | The tag map type.
91 type TagMap = Map.Map String Int
95 { name :: String -- ^ The node name
96 , alias :: String -- ^ The shortened name (for display purposes)
97 , tMem :: Double -- ^ Total memory (MiB)
98 , nMem :: Int -- ^ Node memory (MiB)
99 , fMem :: Int -- ^ Free memory (MiB)
100 , xMem :: Int -- ^ Unaccounted memory (MiB)
101 , tDsk :: Double -- ^ Total disk space (MiB)
102 , fDsk :: Int -- ^ Free disk space (MiB)
103 , tCpu :: Double -- ^ Total CPU count
104 , uCpu :: Int -- ^ Used VCPU count
105 , spindleCount :: Int -- ^ Node spindles (spindle_count node parameter)
106 , pList :: [T.Idx] -- ^ List of primary instance indices
107 , sList :: [T.Idx] -- ^ List of secondary instance indices
108 , idx :: T.Ndx -- ^ Internal index for book-keeping
109 , peers :: P.PeerMap -- ^ Pnode to instance mapping
110 , failN1 :: Bool -- ^ Whether the node has failed n1
111 , rMem :: Int -- ^ Maximum memory needed for failover by
112 -- primaries of this node
113 , pMem :: Double -- ^ Percent of free memory
114 , pDsk :: Double -- ^ Percent of free disk
115 , pRem :: Double -- ^ Percent of reserved memory
116 , pCpu :: Double -- ^ Ratio of virtual to physical CPUs
117 , mDsk :: Double -- ^ Minimum free disk ratio
118 , loDsk :: Int -- ^ Autocomputed from mDsk low disk
120 , hiCpu :: Int -- ^ Autocomputed from mCpu high cpu
122 , hiSpindles :: Double -- ^ Auto-computed from policy spindle_ratio
123 -- and the node spindle count
124 , instSpindles :: Double -- ^ Spindles used by instances
125 , offline :: Bool -- ^ Whether the node should not be used for
126 -- allocations and skipped from score
128 , utilPool :: T.DynUtil -- ^ Total utilisation capacity
129 , utilLoad :: T.DynUtil -- ^ Sum of instance utilisation
130 , pTags :: TagMap -- ^ Primary instance exclusion tags and their count
131 , group :: T.Gdx -- ^ The node's group (index)
132 , iPolicy :: T.IPolicy -- ^ The instance policy (of the node's group)
133 } deriving (Show, Read, Eq)
135 instance T.Element Node where
140 allNames n = [name n, alias n]
142 -- | A simple name for the int, node association list.
143 type AssocList = [(T.Ndx, Node)]
145 -- | A simple name for a node map.
146 type List = Container.Container Node
148 -- | A simple name for an allocation element (here just for logistic
150 type AllocElement = (List, Instance.Instance, [Node], T.Score)
152 -- | Constant node index for a non-moveable instance.
156 -- * Helper functions
158 -- | Add a tag to a tagmap.
159 addTag :: TagMap -> String -> TagMap
160 addTag t s = Map.insertWith (+) s 1 t
162 -- | Add multiple tags.
163 addTags :: TagMap -> [String] -> TagMap
164 addTags = foldl' addTag
166 -- | Adjust or delete a tag from a tagmap.
167 delTag :: TagMap -> String -> TagMap
168 delTag t s = Map.update (\v -> if v > 1
173 -- | Remove multiple tags.
174 delTags :: TagMap -> [String] -> TagMap
175 delTags = foldl' delTag
177 -- | Check if we can add a list of tags to a tagmap.
178 rejectAddTags :: TagMap -> [String] -> Bool
179 rejectAddTags t = any (`Map.member` t)
181 -- | Check how many primary instances have conflicting tags. The
182 -- algorithm to compute this is to sum the count of all tags, then
183 -- subtract the size of the tag map (since each tag has at least one,
184 -- non-conflicting instance); this is equivalent to summing the
185 -- values in the tag map minus one.
186 conflictingPrimaries :: Node -> Int
187 conflictingPrimaries (Node { pTags = t }) = Foldable.sum t - Map.size t
189 -- | Helper function to increment a base value depending on the passed
191 incIf :: (Num a) => Bool -> a -> a -> a
192 incIf True base delta = base + delta
193 incIf False base _ = base
195 -- | Helper function to decrement a base value depending on the passed
197 decIf :: (Num a) => Bool -> a -> a -> a
198 decIf True base delta = base - delta
199 decIf False base _ = base
201 -- * Initialization functions
203 -- | Create a new node.
205 -- The index and the peers maps are empty, and will be need to be
206 -- update later via the 'setIdx' and 'buildPeers' functions.
207 create :: String -> Double -> Int -> Int -> Double
208 -> Int -> Double -> Bool -> Int -> T.Gdx -> Node
209 create name_init mem_t_init mem_n_init mem_f_init
210 dsk_t_init dsk_f_init cpu_t_init offline_init spindles_init
212 Node { name = name_init
220 , spindleCount = spindles_init
228 , pMem = fromIntegral mem_f_init / mem_t_init
229 , pDsk = computePDsk dsk_f_init dsk_t_init
232 , offline = offline_init
234 , mDsk = T.defReservedDiskRatio
235 , loDsk = mDskToloDsk T.defReservedDiskRatio dsk_t_init
236 , hiCpu = mCpuTohiCpu (T.iPolicyVcpuRatio T.defIPolicy) cpu_t_init
237 , hiSpindles = computeHiSpindles (T.iPolicySpindleRatio T.defIPolicy)
240 , utilPool = T.baseUtil
241 , utilLoad = T.zeroUtil
244 , iPolicy = T.defIPolicy
247 -- | Conversion formula from mDsk\/tDsk to loDsk.
248 mDskToloDsk :: Double -> Double -> Int
249 mDskToloDsk mval = floor . (mval *)
251 -- | Conversion formula from mCpu\/tCpu to hiCpu.
252 mCpuTohiCpu :: Double -> Double -> Int
253 mCpuTohiCpu mval = floor . (mval *)
255 -- | Conversiojn formula from spindles and spindle ratio to hiSpindles.
256 computeHiSpindles :: Double -> Int -> Double
257 computeHiSpindles spindle_ratio = (spindle_ratio *) . fromIntegral
259 -- | Changes the index.
261 -- This is used only during the building of the data structures.
262 setIdx :: Node -> T.Ndx -> Node
263 setIdx t i = t {idx = i}
265 -- | Changes the alias.
267 -- This is used only during the building of the data structures.
268 setAlias :: Node -> String -> Node
269 setAlias t s = t { alias = s }
271 -- | Sets the offline attribute.
272 setOffline :: Node -> Bool -> Node
273 setOffline t val = t { offline = val }
275 -- | Sets the unnaccounted memory.
276 setXmem :: Node -> Int -> Node
277 setXmem t val = t { xMem = val }
279 -- | Sets the max disk usage ratio.
280 setMdsk :: Node -> Double -> Node
281 setMdsk t val = t { mDsk = val, loDsk = mDskToloDsk val (tDsk t) }
283 -- | Sets the max cpu usage ratio. This will update the node's
284 -- ipolicy, losing sharing (but it should be a seldomly done operation).
285 setMcpu :: Node -> Double -> Node
287 let new_ipol = (iPolicy t) { T.iPolicyVcpuRatio = val }
288 in t { hiCpu = mCpuTohiCpu val (tCpu t), iPolicy = new_ipol }
290 -- | Sets the policy.
291 setPolicy :: T.IPolicy -> Node -> Node
294 , hiCpu = mCpuTohiCpu (T.iPolicyVcpuRatio pol) (tCpu node)
295 , hiSpindles = computeHiSpindles (T.iPolicySpindleRatio pol)
299 -- | Computes the maximum reserved memory for peers from a peer map.
300 computeMaxRes :: P.PeerMap -> P.Elem
301 computeMaxRes = P.maxElem
303 -- | Builds the peer map for a given node.
304 buildPeers :: Node -> Instance.List -> Node
307 (\i_idx -> let inst = Container.find i_idx il
308 mem = if Instance.usesSecMem inst
309 then Instance.mem inst
311 in (Instance.pNode inst, mem))
313 pmap = P.accumArray (+) mdata
314 new_rmem = computeMaxRes pmap
315 new_failN1 = fMem t <= new_rmem
316 new_prem = fromIntegral new_rmem / tMem t
317 in t {peers=pmap, failN1 = new_failN1, rMem = new_rmem, pRem = new_prem}
319 -- | Calculate the new spindle usage
320 calcSpindleUse :: Node -> Instance.Instance -> Double
321 calcSpindleUse n i = incIf (Instance.usesLocalStorage i) (instSpindles n)
322 (fromIntegral $ Instance.spindleUse i)
324 -- | Assigns an instance to a node as primary and update the used VCPU
325 -- count, utilisation data and tags map.
326 setPri :: Node -> Instance.Instance -> Node
327 setPri t inst = t { pList = Instance.idx inst:pList t
329 , pCpu = fromIntegral new_count / tCpu t
330 , utilLoad = utilLoad t `T.addUtil` Instance.util inst
331 , pTags = addTags (pTags t) (Instance.exclTags inst)
332 , instSpindles = calcSpindleUse t inst
334 where new_count = Instance.applyIfOnline inst (+ Instance.vcpus inst)
337 -- | Assigns an instance to a node as secondary without other updates.
338 setSec :: Node -> Instance.Instance -> Node
339 setSec t inst = t { sList = Instance.idx inst:sList t
340 , utilLoad = old_load { T.dskWeight = T.dskWeight old_load +
341 T.dskWeight (Instance.util inst) }
342 , instSpindles = calcSpindleUse t inst
344 where old_load = utilLoad t
346 -- | Computes the new 'pDsk' value, handling nodes without local disk
347 -- storage (we consider all their disk used).
348 computePDsk :: Int -> Double -> Double
350 computePDsk used total = fromIntegral used / total
352 -- * Update functions
354 -- | Sets the free memory.
355 setFmem :: Node -> Int -> Node
357 let new_n1 = new_mem <= rMem t
358 new_mp = fromIntegral new_mem / tMem t
359 in t { fMem = new_mem, failN1 = new_n1, pMem = new_mp }
361 -- | Removes a primary instance.
362 removePri :: Node -> Instance.Instance -> Node
364 let iname = Instance.idx inst
365 i_online = Instance.notOffline inst
366 uses_disk = Instance.usesLocalStorage inst
367 new_plist = delete iname (pList t)
368 new_mem = incIf i_online (fMem t) (Instance.mem inst)
369 new_dsk = incIf uses_disk (fDsk t) (Instance.dsk inst)
370 new_spindles = decIf uses_disk (instSpindles t) 1
371 new_mp = fromIntegral new_mem / tMem t
372 new_dp = computePDsk new_dsk (tDsk t)
373 new_failn1 = new_mem <= rMem t
374 new_ucpu = decIf i_online (uCpu t) (Instance.vcpus inst)
375 new_rcpu = fromIntegral new_ucpu / tCpu t
376 new_load = utilLoad t `T.subUtil` Instance.util inst
377 in t { pList = new_plist, fMem = new_mem, fDsk = new_dsk
378 , failN1 = new_failn1, pMem = new_mp, pDsk = new_dp
379 , uCpu = new_ucpu, pCpu = new_rcpu, utilLoad = new_load
380 , pTags = delTags (pTags t) (Instance.exclTags inst)
381 , instSpindles = new_spindles
384 -- | Removes a secondary instance.
385 removeSec :: Node -> Instance.Instance -> Node
387 let iname = Instance.idx inst
388 uses_disk = Instance.usesLocalStorage inst
390 pnode = Instance.pNode inst
391 new_slist = delete iname (sList t)
392 new_dsk = incIf uses_disk cur_dsk (Instance.dsk inst)
393 new_spindles = decIf uses_disk (instSpindles t) 1
395 old_peem = P.find pnode old_peers
396 new_peem = decIf (Instance.usesSecMem inst) old_peem (Instance.mem inst)
397 new_peers = if new_peem > 0
398 then P.add pnode new_peem old_peers
399 else P.remove pnode old_peers
401 new_rmem = if old_peem < old_rmem
403 else computeMaxRes new_peers
404 new_prem = fromIntegral new_rmem / tMem t
405 new_failn1 = fMem t <= new_rmem
406 new_dp = computePDsk new_dsk (tDsk t)
407 old_load = utilLoad t
408 new_load = old_load { T.dskWeight = T.dskWeight old_load -
409 T.dskWeight (Instance.util inst) }
410 in t { sList = new_slist, fDsk = new_dsk, peers = new_peers
411 , failN1 = new_failn1, rMem = new_rmem, pDsk = new_dp
412 , pRem = new_prem, utilLoad = new_load
413 , instSpindles = new_spindles
416 -- | Adds a primary instance (basic version).
417 addPri :: Node -> Instance.Instance -> T.OpResult Node
418 addPri = addPriEx False
420 -- | Adds a primary instance (extended version).
421 addPriEx :: Bool -- ^ Whether to override the N+1 and
422 -- other /soft/ checks, useful if we
423 -- come from a worse status
425 -> Node -- ^ The target node
426 -> Instance.Instance -- ^ The instance to add
427 -> T.OpResult Node -- ^ The result of the operation,
428 -- either the new version of the node
430 addPriEx force t inst =
431 let iname = Instance.idx inst
432 i_online = Instance.notOffline inst
433 uses_disk = Instance.usesLocalStorage inst
435 new_mem = decIf i_online (fMem t) (Instance.mem inst)
436 new_dsk = decIf uses_disk cur_dsk (Instance.dsk inst)
437 new_spindles = incIf uses_disk (instSpindles t) 1
438 new_failn1 = new_mem <= rMem t
439 new_ucpu = incIf i_online (uCpu t) (Instance.vcpus inst)
440 new_pcpu = fromIntegral new_ucpu / tCpu t
441 new_dp = computePDsk new_dsk (tDsk t)
442 l_cpu = T.iPolicyVcpuRatio $ iPolicy t
443 new_load = utilLoad t `T.addUtil` Instance.util inst
444 inst_tags = Instance.exclTags inst
448 _ | new_mem <= 0 -> Bad T.FailMem
449 | uses_disk && new_dsk <= 0 -> Bad T.FailDisk
450 | uses_disk && mDsk t > new_dp && strict -> Bad T.FailDisk
451 | uses_disk && new_spindles > hiSpindles t
452 && strict -> Bad T.FailDisk
453 | new_failn1 && not (failN1 t) && strict -> Bad T.FailMem
454 | l_cpu >= 0 && l_cpu < new_pcpu && strict -> Bad T.FailCPU
455 | rejectAddTags old_tags inst_tags -> Bad T.FailTags
457 let new_plist = iname:pList t
458 new_mp = fromIntegral new_mem / tMem t
459 r = t { pList = new_plist, fMem = new_mem, fDsk = new_dsk
460 , failN1 = new_failn1, pMem = new_mp, pDsk = new_dp
461 , uCpu = new_ucpu, pCpu = new_pcpu
462 , utilLoad = new_load
463 , pTags = addTags old_tags inst_tags
464 , instSpindles = new_spindles
468 -- | Adds a secondary instance (basic version).
469 addSec :: Node -> Instance.Instance -> T.Ndx -> T.OpResult Node
470 addSec = addSecEx False
472 -- | Adds a secondary instance (extended version).
473 addSecEx :: Bool -> Node -> Instance.Instance -> T.Ndx -> T.OpResult Node
474 addSecEx force t inst pdx =
475 let iname = Instance.idx inst
478 new_dsk = fDsk t - Instance.dsk inst
479 new_spindles = instSpindles t + 1
480 secondary_needed_mem = if Instance.usesSecMem inst
481 then Instance.mem inst
483 new_peem = P.find pdx old_peers + secondary_needed_mem
484 new_peers = P.add pdx new_peem old_peers
485 new_rmem = max (rMem t) new_peem
486 new_prem = fromIntegral new_rmem / tMem t
487 new_failn1 = old_mem <= new_rmem
488 new_dp = computePDsk new_dsk (tDsk t)
489 old_load = utilLoad t
490 new_load = old_load { T.dskWeight = T.dskWeight old_load +
491 T.dskWeight (Instance.util inst) }
494 _ | not (Instance.hasSecondary inst) -> Bad T.FailDisk
495 | new_dsk <= 0 -> Bad T.FailDisk
496 | mDsk t > new_dp && strict -> Bad T.FailDisk
497 | new_spindles > hiSpindles t && strict -> Bad T.FailDisk
498 | secondary_needed_mem >= old_mem && strict -> Bad T.FailMem
499 | new_failn1 && not (failN1 t) && strict -> Bad T.FailMem
501 let new_slist = iname:sList t
502 r = t { sList = new_slist, fDsk = new_dsk
503 , peers = new_peers, failN1 = new_failn1
504 , rMem = new_rmem, pDsk = new_dp
505 , pRem = new_prem, utilLoad = new_load
506 , instSpindles = new_spindles
512 -- | Computes the amount of available disk on a given node.
513 availDisk :: Node -> Int
521 -- | Computes the amount of used disk on a given node.
523 iDsk t = truncate (tDsk t) - fDsk t
525 -- | Computes the amount of available memory on a given node.
526 availMem :: Node -> Int
534 -- | Computes the amount of available memory on a given node.
535 availCpu :: Node -> Int
543 -- | The memory used by instances on a given node.
545 iMem t = truncate (tMem t) - nMem t - xMem t - fMem t
547 -- * Display functions
549 -- | Return a field for a given node.
550 showField :: Node -- ^ Node which we're querying
551 -> String -- ^ Field name
552 -> String -- ^ Field value as string
555 "idx" -> printf "%4d" $ idx t
558 "status" -> case () of
562 "tmem" -> printf "%5.0f" $ tMem t
563 "nmem" -> printf "%5d" $ nMem t
564 "xmem" -> printf "%5d" $ xMem t
565 "fmem" -> printf "%5d" $ fMem t
566 "imem" -> printf "%5d" $ iMem t
567 "rmem" -> printf "%5d" $ rMem t
568 "amem" -> printf "%5d" $ fMem t - rMem t
569 "tdsk" -> printf "%5.0f" $ tDsk t / 1024
570 "fdsk" -> printf "%5d" $ fDsk t `div` 1024
571 "tcpu" -> printf "%4.0f" $ tCpu t
572 "ucpu" -> printf "%4d" $ uCpu t
573 "pcnt" -> printf "%3d" $ length (pList t)
574 "scnt" -> printf "%3d" $ length (sList t)
575 "plist" -> show $ pList t
576 "slist" -> show $ sList t
577 "pfmem" -> printf "%6.4f" $ pMem t
578 "pfdsk" -> printf "%6.4f" $ pDsk t
579 "rcpu" -> printf "%5.2f" $ pCpu t
580 "cload" -> printf "%5.3f" uC
581 "mload" -> printf "%5.3f" uM
582 "dload" -> printf "%5.3f" uD
583 "nload" -> printf "%5.3f" uN
584 "ptags" -> intercalate "," . map (uncurry (printf "%s=%d")) .
586 "peermap" -> show $ peers t
587 "spindle_count" -> show $ spindleCount t
588 "hi_spindles" -> show $ hiSpindles t
589 "inst_spindles" -> show $ instSpindles t
592 T.DynUtil { T.cpuWeight = uC, T.memWeight = uM,
593 T.dskWeight = uD, T.netWeight = uN } = utilLoad t
595 -- | Returns the header and numeric propery of a field.
596 showHeader :: String -> (String, Bool)
599 "idx" -> ("Index", True)
600 "name" -> ("Name", False)
601 "fqdn" -> ("Name", False)
602 "status" -> ("F", False)
603 "tmem" -> ("t_mem", True)
604 "nmem" -> ("n_mem", True)
605 "xmem" -> ("x_mem", True)
606 "fmem" -> ("f_mem", True)
607 "imem" -> ("i_mem", True)
608 "rmem" -> ("r_mem", True)
609 "amem" -> ("a_mem", True)
610 "tdsk" -> ("t_dsk", True)
611 "fdsk" -> ("f_dsk", True)
612 "tcpu" -> ("pcpu", True)
613 "ucpu" -> ("vcpu", True)
614 "pcnt" -> ("pcnt", True)
615 "scnt" -> ("scnt", True)
616 "plist" -> ("primaries", True)
617 "slist" -> ("secondaries", True)
618 "pfmem" -> ("p_fmem", True)
619 "pfdsk" -> ("p_fdsk", True)
620 "rcpu" -> ("r_cpu", True)
621 "cload" -> ("lCpu", True)
622 "mload" -> ("lMem", True)
623 "dload" -> ("lDsk", True)
624 "nload" -> ("lNet", True)
625 "ptags" -> ("PrimaryTags", False)
626 "peermap" -> ("PeerMap", False)
627 "spindle_count" -> ("NodeSpindles", True)
628 "hi_spindles" -> ("MaxSpindles", True)
629 "inst_spindles" -> ("InstSpindles", True)
630 -- TODO: add node fields (group.uuid, group)
631 _ -> (T.unknownField, False)
633 -- | String converter for the node list functionality.
634 list :: [String] -> Node -> [String]
635 list fields t = map (showField t) fields
637 -- | Constant holding the fields we're displaying by default.
638 defaultFields :: [String]
640 [ "status", "name", "tmem", "nmem", "imem", "xmem", "fmem"
641 , "rmem", "tdsk", "fdsk", "tcpu", "ucpu", "pcnt", "scnt"
642 , "pfmem", "pfdsk", "rcpu"
643 , "cload", "mload", "dload", "nload" ]
645 -- | Split a list of nodes into a list of (node group UUID, list of
646 -- associated nodes).
647 computeGroups :: [Node] -> [(T.Gdx, [Node])]
648 computeGroups nodes =
649 let nodes' = sortBy (comparing group) nodes
650 nodes'' = groupBy (\a b -> group a == group b) nodes'
651 in map (\nl -> (group (head nl), nl)) nodes''