root / src / Ganeti / HTools / Program / Hroller.hs @ 23247a73
History | View | Annotate | Download (9.9 kB)
1 |
{-| Cluster rolling maintenance helper. |
---|---|
2 |
|
3 |
-} |
4 |
|
5 |
{- |
6 |
|
7 |
Copyright (C) 2012 Google Inc. |
8 |
|
9 |
This program is free software; you can redistribute it and/or modify |
10 |
it under the terms of the GNU General Public License as published by |
11 |
the Free Software Foundation; either version 2 of the License, or |
12 |
(at your option) any later version. |
13 |
|
14 |
This program is distributed in the hope that it will be useful, but |
15 |
WITHOUT ANY WARRANTY; without even the implied warranty of |
16 |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
17 |
General Public License for more details. |
18 |
|
19 |
You should have received a copy of the GNU General Public License |
20 |
along with this program; if not, write to the Free Software |
21 |
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA |
22 |
02110-1301, USA. |
23 |
|
24 |
-} |
25 |
|
26 |
module Ganeti.HTools.Program.Hroller |
27 |
( main |
28 |
, options |
29 |
, arguments |
30 |
) where |
31 |
|
32 |
import Control.Applicative |
33 |
import Control.Monad |
34 |
import Data.Function |
35 |
import Data.List |
36 |
import Data.Ord |
37 |
|
38 |
import qualified Data.IntMap as IntMap |
39 |
|
40 |
import qualified Ganeti.HTools.Container as Container |
41 |
import qualified Ganeti.HTools.Node as Node |
42 |
import qualified Ganeti.HTools.Instance as Instance |
43 |
import qualified Ganeti.HTools.Group as Group |
44 |
|
45 |
import Ganeti.BasicTypes |
46 |
import Ganeti.Common |
47 |
import Ganeti.HTools.CLI |
48 |
import Ganeti.HTools.ExtLoader |
49 |
import Ganeti.HTools.Graph |
50 |
import Ganeti.HTools.Loader |
51 |
import Ganeti.HTools.Types |
52 |
import Ganeti.Utils |
53 |
|
54 |
-- | Options list and functions. |
55 |
options :: IO [OptType] |
56 |
options = do |
57 |
luxi <- oLuxiSocket |
58 |
return |
59 |
[ luxi |
60 |
, oRapiMaster |
61 |
, oDataFile |
62 |
, oIAllocSrc |
63 |
, oOfflineNode |
64 |
, oOfflineMaintenance |
65 |
, oVerbose |
66 |
, oQuiet |
67 |
, oNoHeaders |
68 |
, oNodeTags |
69 |
, oSaveCluster |
70 |
, oGroup |
71 |
, oSkipNonRedundant |
72 |
, oIgnoreNonRedundant |
73 |
, oForce |
74 |
, oOneStepOnly |
75 |
] |
76 |
|
77 |
-- | The list of arguments supported by the program. |
78 |
arguments :: [ArgCompletion] |
79 |
arguments = [] |
80 |
|
81 |
-- | Compute the result of moving an instance to a different node. |
82 |
move :: Idx -> Ndx -> (Node.List, Instance.List) |
83 |
-> OpResult (Node.List, Instance.List) |
84 |
move idx new_ndx (nl, il) = do |
85 |
let new_node = Container.find new_ndx nl |
86 |
inst = Container.find idx il |
87 |
old_ndx = Instance.pNode inst |
88 |
old_node = Container.find old_ndx nl |
89 |
new_node' <- Node.addPriEx True new_node inst |
90 |
let old_node' = Node.removePri old_node inst |
91 |
inst' = Instance.setPri inst new_ndx |
92 |
nl' = Container.addTwo old_ndx old_node' new_ndx new_node' nl |
93 |
il' = Container.add idx inst' il |
94 |
return (nl', il') |
95 |
|
96 |
-- | Move an instance to one of the candidate nodes mentioned. |
97 |
locateInstance :: Idx -> [Ndx] -> (Node.List, Instance.List) |
98 |
-> Result (Node.List, Instance.List) |
99 |
locateInstance idx ndxs conf = |
100 |
msum $ map (opToResult . flip (move idx) conf) ndxs |
101 |
|
102 |
-- | Move a list of instances to some of the candidate nodes mentioned. |
103 |
locateInstances :: [Idx] -> [Ndx] -> (Node.List, Instance.List) |
104 |
-> Result (Node.List, Instance.List) |
105 |
locateInstances idxs ndxs conf = |
106 |
foldM (\ cf idx -> locateInstance idx ndxs cf) conf idxs |
107 |
|
108 |
-- | Greedily move the non-redundant instances away from a list of nodes. |
109 |
-- The arguments are the list of nodes to be cleared, a list of nodes the |
110 |
-- instances can be moved to, and an initial configuration. Returned is a |
111 |
-- list of nodes that can be cleared simultaneously and the configuration |
112 |
-- after these nodes are cleared. |
113 |
clearNodes :: [Ndx] -> [Ndx] -> (Node.List, Instance.List) |
114 |
-> Result ([Ndx], (Node.List, Instance.List)) |
115 |
clearNodes [] _ conf = return ([], conf) |
116 |
clearNodes (ndx:ndxs) targets conf = withFirst `mplus` withoutFirst where |
117 |
withFirst = do |
118 |
let othernodes = delete ndx targets |
119 |
conf' <- locateInstances (nonRedundant conf ndx) othernodes conf |
120 |
(ndxs', conf'') <- clearNodes ndxs othernodes conf' |
121 |
return (ndx:ndxs', conf'') |
122 |
withoutFirst = clearNodes ndxs targets conf |
123 |
|
124 |
-- | Parition a list of nodes into chunks according cluster capacity. |
125 |
partitionNonRedundant :: [Ndx] -> [Ndx] -> (Node.List, Instance.List) |
126 |
-> Result [[Ndx]] |
127 |
partitionNonRedundant [] _ _ = return [] |
128 |
partitionNonRedundant ndxs targets conf = do |
129 |
(grp, _) <- clearNodes ndxs targets conf |
130 |
guard . not . null $ grp |
131 |
let remaining = ndxs \\ grp |
132 |
part <- partitionNonRedundant remaining targets conf |
133 |
return $ grp : part |
134 |
|
135 |
-- | Gather statistics for the coloring algorithms. |
136 |
-- Returns a string with a summary on how each algorithm has performed, |
137 |
-- in order of non-decreasing effectiveness, and whether it tied or lost |
138 |
-- with the previous one. |
139 |
getStats :: [(String, ColorVertMap)] -> String |
140 |
getStats colorings = snd . foldr helper (0,"") $ algBySize colorings |
141 |
where algostat (algo, cmap) = algo ++ ": " ++ size cmap ++ grpsizes cmap |
142 |
size cmap = show (IntMap.size cmap) ++ " " |
143 |
grpsizes cmap = |
144 |
"(" ++ commaJoin (map (show.length) (IntMap.elems cmap)) ++ ")" |
145 |
algBySize = sortBy (flip (comparing (IntMap.size.snd))) |
146 |
helper :: (String, ColorVertMap) -> (Int, String) -> (Int, String) |
147 |
helper el (0, _) = ((IntMap.size.snd) el, algostat el) |
148 |
helper el (old, str) |
149 |
| old == elsize = (elsize, str ++ " TIE " ++ algostat el) |
150 |
| otherwise = (elsize, str ++ " LOOSE " ++ algostat el) |
151 |
where elsize = (IntMap.size.snd) el |
152 |
|
153 |
-- | Predicate of belonging to a given group restriction. |
154 |
hasGroup :: Maybe Group.Group -> Node.Node -> Bool |
155 |
hasGroup Nothing _ = True |
156 |
hasGroup (Just grp) node = Node.group node == Group.idx grp |
157 |
|
158 |
-- | Predicate of having at least one tag in a given set. |
159 |
hasTag :: Maybe [String] -> Node.Node -> Bool |
160 |
hasTag Nothing _ = True |
161 |
hasTag (Just tags) node = not . null $ Node.nTags node `intersect` tags |
162 |
|
163 |
-- | From a cluster configuration, get the list of non-redundant instances |
164 |
-- of a node. |
165 |
nonRedundant :: (Node.List, Instance.List) -> Ndx -> [Idx] |
166 |
nonRedundant (nl, il) ndx = |
167 |
filter (not . Instance.hasSecondary . flip Container.find il) $ |
168 |
Node.pList (Container.find ndx nl) |
169 |
|
170 |
-- | Within a cluster configuration, decide if the node hosts non-redundant |
171 |
-- Instances. |
172 |
noNonRedundant :: (Node.List, Instance.List) -> Node.Node -> Bool |
173 |
noNonRedundant conf = null . nonRedundant conf . Node.idx |
174 |
|
175 |
-- | Put the master node last. |
176 |
-- Reorder a list of lists of nodes such that the master node (if present) |
177 |
-- is the last node of the last group. |
178 |
masterLast :: [[Node.Node]] -> [[Node.Node]] |
179 |
masterLast rebootgroups = |
180 |
map (uncurry (++)) . uncurry (++) . partition (null . snd) $ |
181 |
map (partition (not . Node.isMaster)) rebootgroups |
182 |
|
183 |
-- | Main function. |
184 |
main :: Options -> [String] -> IO () |
185 |
main opts args = do |
186 |
unless (null args) $ exitErr "This program doesn't take any arguments." |
187 |
|
188 |
let verbose = optVerbose opts |
189 |
maybeExit = if optForce opts then warn else exitErr |
190 |
|
191 |
-- Load cluster data. The last two arguments, cluster tags and ipolicy, are |
192 |
-- currently not used by this tool. |
193 |
ini_cdata@(ClusterData gl fixed_nl ilf _ _) <- loadExternalData opts |
194 |
|
195 |
let master_names = map Node.name . filter Node.isMaster . IntMap.elems $ |
196 |
fixed_nl |
197 |
case master_names of |
198 |
[] -> maybeExit "No master node found (maybe not supported by backend)." |
199 |
[ _ ] -> return () |
200 |
_ -> exitErr $ "Found more than one master node: " ++ show master_names |
201 |
|
202 |
nlf <- setNodeStatus opts fixed_nl |
203 |
|
204 |
maybeSaveData (optSaveCluster opts) "original" "before hroller run" ini_cdata |
205 |
|
206 |
-- Find the wanted node group, if any. |
207 |
wantedGroup <- case optGroup opts of |
208 |
Nothing -> return Nothing |
209 |
Just name -> case Container.findByName gl name of |
210 |
Nothing -> exitErr "Cannot find target group." |
211 |
Just grp -> return (Just grp) |
212 |
|
213 |
let nodes = IntMap.filter (foldl (liftA2 (&&)) (const True) |
214 |
[ not . Node.offline |
215 |
, if optSkipNonRedundant opts |
216 |
then noNonRedundant (nlf, ilf) |
217 |
else const True |
218 |
, hasTag $ optNodeTags opts |
219 |
, hasGroup wantedGroup ]) |
220 |
nlf |
221 |
mkGraph = if optOfflineMaintenance opts |
222 |
then Node.mkNodeGraph |
223 |
else Node.mkRebootNodeGraph nlf |
224 |
|
225 |
-- TODO: fail if instances are running (with option to warn only) |
226 |
|
227 |
nodeGraph <- case mkGraph nodes ilf of |
228 |
Nothing -> exitErr "Cannot create node graph" |
229 |
Just g -> return g |
230 |
|
231 |
when (verbose > 2) . putStrLn $ "Node Graph: " ++ show nodeGraph |
232 |
|
233 |
let colorAlgorithms = [ ("LF", colorLF) |
234 |
, ("Dsatur", colorDsatur) |
235 |
, ("Dcolor", colorDcolor) |
236 |
] |
237 |
colorings = map (\(v,a) -> (v,(colorVertMap.a) nodeGraph)) colorAlgorithms |
238 |
smallestColoring = IntMap.elems $ |
239 |
(snd . minimumBy (comparing (IntMap.size . snd))) colorings |
240 |
allNdx = map Node.idx $ Container.elems nlf |
241 |
splitted = mapM (\ grp -> partitionNonRedundant grp allNdx (nlf,ilf)) |
242 |
smallestColoring |
243 |
rebootGroups <- if optIgnoreNonRedundant opts |
244 |
then return smallestColoring |
245 |
else case splitted of |
246 |
Ok splitgroups -> return $ concat splitgroups |
247 |
Bad _ -> exitErr "Not enough capacity to move\ |
248 |
\ non-redundant instances" |
249 |
let idToNode = (`Container.find` nodes) |
250 |
nodesRebootGroups = |
251 |
map (map idToNode . filter (`IntMap.member` nodes)) rebootGroups |
252 |
outputRebootGroups = masterLast . |
253 |
sortBy (flip compare `on` length) $ |
254 |
nodesRebootGroups |
255 |
outputRebootNames = map (map Node.name) outputRebootGroups |
256 |
|
257 |
when (verbose > 1) . putStrLn $ getStats colorings |
258 |
|
259 |
if optOneStepOnly opts |
260 |
then do |
261 |
unless (optNoHeaders opts) $ |
262 |
putStrLn "'First Reboot Group'" |
263 |
case outputRebootNames of |
264 |
[] -> return () |
265 |
y : _ -> mapM_ putStrLn y |
266 |
else do |
267 |
unless (optNoHeaders opts) $ |
268 |
putStrLn "'Node Reboot Groups'" |
269 |
mapM_ (putStrLn . commaJoin) outputRebootNames |