Statistics
| Branch: | Tag: | Revision:

root / src / Ganeti / HTools / Program / Hroller.hs @ a947a583

History | View | Annotate | Download (11.9 kB)

1
{-| Cluster rolling maintenance helper.
2

    
3
-}
4

    
5
{-
6

    
7
Copyright (C) 2012 Google Inc.
8

    
9
This program is free software; you can redistribute it and/or modify
10
it under the terms of the GNU General Public License as published by
11
the Free Software Foundation; either version 2 of the License, or
12
(at your option) any later version.
13

    
14
This program is distributed in the hope that it will be useful, but
15
WITHOUT ANY WARRANTY; without even the implied warranty of
16
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17
General Public License for more details.
18

    
19
You should have received a copy of the GNU General Public License
20
along with this program; if not, write to the Free Software
21
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22
02110-1301, USA.
23

    
24
-}
25

    
26
module Ganeti.HTools.Program.Hroller
27
  ( main
28
  , options
29
  , arguments
30
  ) where
31

    
32
import Control.Applicative
33
import Control.Arrow
34
import Control.Monad
35
import Data.Function
36
import Data.List
37
import Data.Ord
38
import Text.Printf
39

    
40
import qualified Data.IntMap as IntMap
41

    
42
import qualified Ganeti.HTools.Container as Container
43
import qualified Ganeti.HTools.Node as Node
44
import qualified Ganeti.HTools.Instance as Instance
45
import qualified Ganeti.HTools.Group as Group
46

    
47
import Ganeti.BasicTypes
48
import Ganeti.Common
49
import Ganeti.HTools.CLI
50
import Ganeti.HTools.ExtLoader
51
import Ganeti.HTools.Graph
52
import Ganeti.HTools.Loader
53
import Ganeti.HTools.Types
54
import Ganeti.Utils
55

    
56
-- | Options list and functions.
57
options :: IO [OptType]
58
options = do
59
  luxi <- oLuxiSocket
60
  return
61
    [ luxi
62
    , oRapiMaster
63
    , oDataFile
64
    , oIAllocSrc
65
    , oOfflineNode
66
    , oOfflineMaintenance
67
    , oVerbose
68
    , oQuiet
69
    , oNoHeaders
70
    , oNodeTags
71
    , oSaveCluster
72
    , oGroup
73
    , oPrintMoves
74
    , oSkipNonRedundant
75
    , oIgnoreNonRedundant
76
    , oForce
77
    , oOneStepOnly
78
    ]
79

    
80
-- | The list of arguments supported by the program.
81
arguments :: [ArgCompletion]
82
arguments = []
83

    
84
-- | Compute the result of moving an instance to a different node.
85
move :: Idx -> Ndx -> (Node.List, Instance.List)
86
        -> OpResult (Node.List, Instance.List)
87
move idx new_ndx (nl, il) = do
88
  let new_node = Container.find new_ndx nl
89
      inst = Container.find idx il
90
      old_ndx = Instance.pNode inst
91
      old_node = Container.find old_ndx nl
92
  new_node' <- Node.addPriEx True new_node inst
93
  let old_node' = Node.removePri old_node inst
94
      inst' = Instance.setPri inst new_ndx
95
      nl' = Container.addTwo old_ndx old_node' new_ndx new_node' nl
96
      il' = Container.add idx inst' il
97
  return (nl', il')
98

    
99
-- | Move a non-redundant instance to one of the candidate nodes mentioned.
100
locateInstance :: Idx -> [Ndx] -> (Node.List, Instance.List)
101
                  -> Result (Node.List, Instance.List)
102
locateInstance idx ndxs conf =
103
  msum $ map (opToResult . flip (move idx) conf) ndxs
104

    
105
-- | Move a list of non-redundant instances to some of the nodes mentioned.
106
locateInstances :: [Idx] -> [Ndx] -> (Node.List, Instance.List)
107
                   -> Result (Node.List, Instance.List)
108
locateInstances idxs ndxs conf =
109
  foldM (\ cf idx -> locateInstance idx ndxs cf) conf idxs
110

    
111

    
112
-- | Greedily clear a node of a kind of instances by a given relocation method.
113
-- The arguments are a function providing the list of instances to be cleared,
114
-- the relocation function, the list of nodes to be cleared, a list of nodes
115
-- that can be relocated to, and the initial configuration. Returned is a list
116
-- of nodes that can be cleared simultaneously and the configuration after
117
-- clearing these nodes.
118
greedyClearNodes :: ((Node.List, Instance.List) -> Ndx -> [Idx])
119
                    -> ([Idx] -> [Ndx] -> (Node.List, Instance.List)
120
                        -> Result (Node.List, Instance.List))
121
                    -> [Ndx] -> [Ndx] -> (Node.List, Instance.List)
122
                    -> Result ([Ndx], (Node.List, Instance.List))
123
greedyClearNodes  _ _ [] _ conf = return ([], conf)
124
greedyClearNodes getInstances relocate (ndx:ndxs) targets conf@(nl, _) =
125
  withFirst `mplus` withoutFirst where
126
  withFirst = do
127
     let othernodes = delete ndx targets
128
         grp = Node.group $ Container.find ndx nl
129
         othernodesSameGroup =
130
           filter ((==) grp . Node.group . flip Container.find nl) othernodes
131
     conf' <- relocate (getInstances conf ndx) othernodesSameGroup conf
132
     (ndxs', conf'') <- greedyClearNodes getInstances relocate
133
                        ndxs othernodes conf'
134
     return (ndx:ndxs', conf'')
135
  withoutFirst = greedyClearNodes getInstances relocate ndxs targets conf
136
                    
137
-- | Greedily move the non-redundant instances away from a list of nodes.
138
-- Returns a list of ndoes that can be cleared simultaneously and the
139
-- configuration after clearing these nodes.
140
clearNodes :: [Ndx] -> [Ndx] -> (Node.List, Instance.List)
141
              -> Result ([Ndx], (Node.List, Instance.List))
142
clearNodes = greedyClearNodes nonRedundant locateInstances
143

    
144
-- | Parition a list of nodes into chunks according cluster capacity.
145
partitionNonRedundant :: [Ndx] -> [Ndx] -> (Node.List, Instance.List)
146
                         -> Result [([Ndx], (Node.List, Instance.List))]
147
partitionNonRedundant [] _ _ = return []
148
partitionNonRedundant ndxs targets conf = do
149
  (grp, conf') <- clearNodes ndxs targets conf
150
  guard . not . null $ grp
151
  let remaining = ndxs \\ grp
152
  part <- partitionNonRedundant remaining targets conf
153
  return $ (grp, conf') : part
154

    
155
-- | Gather statistics for the coloring algorithms.
156
-- Returns a string with a summary on how each algorithm has performed,
157
-- in order of non-decreasing effectiveness, and whether it tied or lost
158
-- with the previous one.
159
getStats :: [(String, ColorVertMap)] -> String
160
getStats colorings = snd . foldr helper (0,"") $ algBySize colorings
161
    where algostat (algo, cmap) = algo ++ ": " ++ size cmap ++ grpsizes cmap
162
          size cmap = show (IntMap.size cmap) ++ " "
163
          grpsizes cmap =
164
            "(" ++ commaJoin (map (show.length) (IntMap.elems cmap)) ++ ")"
165
          algBySize = sortBy (flip (comparing (IntMap.size.snd)))
166
          helper :: (String, ColorVertMap) -> (Int, String) -> (Int, String)
167
          helper el (0, _) = ((IntMap.size.snd) el, algostat el)
168
          helper el (old, str)
169
            | old == elsize = (elsize, str ++ " TIE " ++ algostat el)
170
            | otherwise = (elsize, str ++ " LOOSE " ++ algostat el)
171
              where elsize = (IntMap.size.snd) el
172

    
173
-- | Predicate of belonging to a given group restriction.
174
hasGroup :: Maybe Group.Group -> Node.Node -> Bool
175
hasGroup Nothing _ = True
176
hasGroup (Just grp) node = Node.group node == Group.idx grp
177

    
178
-- | Predicate of having at least one tag in a given set.
179
hasTag :: Maybe [String] -> Node.Node -> Bool
180
hasTag Nothing _ = True
181
hasTag (Just tags) node = not . null $ Node.nTags node `intersect` tags
182

    
183
-- | From a cluster configuration, get the list of non-redundant instances
184
-- of a node.
185
nonRedundant :: (Node.List, Instance.List) -> Ndx -> [Idx]
186
nonRedundant (nl, il) ndx =
187
  filter (not . Instance.hasSecondary . flip Container.find  il) $
188
  Node.pList (Container.find ndx nl)
189

    
190
-- | Within a cluster configuration, decide if the node hosts non-redundant
191
-- Instances.
192
noNonRedundant :: (Node.List, Instance.List) -> Node.Node -> Bool
193
noNonRedundant conf = null . nonRedundant conf . Node.idx
194

    
195
-- | Put the master node last.
196
-- Reorder a list groups of nodes (with additional information) such that the
197
-- master node (if present) is the last node of the last group.
198
masterLast :: [([Node.Node], a)] -> [([Node.Node], a)]
199
masterLast rebootgroups =
200
  map (first $ uncurry (++)) . uncurry (++) . partition (null . snd . fst) $
201
  map (first $ partition (not . Node.isMaster)) rebootgroups
202

    
203
-- | From two configurations compute the list of moved instances.
204
getMoves :: (Node.List, Instance.List) -> (Node.List, Instance.List)
205
            -> [(Instance.Instance, Node.Node)]
206
getMoves (_, il) (nl', il') = do
207
  ix <- Container.keys il
208
  let inst = Container.find ix il
209
      inst' = Container.find ix il'
210
  guard $ Instance.pNode inst /= Instance.pNode inst'
211
  return (inst', Container.find (Instance.pNode inst') nl')
212

    
213
-- | Main function.
214
main :: Options -> [String] -> IO ()
215
main opts args = do
216
  unless (null args) $ exitErr "This program doesn't take any arguments."
217

    
218
  let verbose = optVerbose opts
219
      maybeExit = if optForce opts then warn else exitErr
220

    
221
  -- Load cluster data. The last two arguments, cluster tags and ipolicy, are
222
  -- currently not used by this tool.
223
  ini_cdata@(ClusterData gl fixed_nl ilf _ _) <- loadExternalData opts
224

    
225
  let master_names = map Node.name . filter Node.isMaster . IntMap.elems $
226
                     fixed_nl
227
  case master_names of
228
    [] -> maybeExit "No master node found (maybe not supported by backend)."
229
    [ _ ] -> return ()
230
    _ -> exitErr $ "Found more than one master node: " ++  show master_names
231

    
232
  nlf <- setNodeStatus opts fixed_nl
233

    
234
  maybeSaveData (optSaveCluster opts) "original" "before hroller run" ini_cdata
235

    
236
  -- Find the wanted node group, if any.
237
  wantedGroup <- case optGroup opts of
238
    Nothing -> return Nothing
239
    Just name -> case Container.findByName gl name of
240
      Nothing -> exitErr "Cannot find target group."
241
      Just grp -> return (Just grp)
242

    
243
  let nodes = IntMap.filter (foldl (liftA2 (&&)) (const True)
244
                             [ not . Node.offline
245
                             , if optSkipNonRedundant opts
246
                                  then noNonRedundant (nlf, ilf)
247
                                  else const True
248
                             , hasTag $ optNodeTags opts
249
                             , hasGroup wantedGroup ])
250
              nlf
251
      mkGraph = if optOfflineMaintenance opts
252
                   then Node.mkNodeGraph
253
                   else Node.mkRebootNodeGraph nlf
254

    
255
  nodeGraph <- case mkGraph nodes ilf of
256
                     Nothing -> exitErr "Cannot create node graph"
257
                     Just g -> return g
258

    
259
  when (verbose > 2) . putStrLn $ "Node Graph: " ++ show nodeGraph
260

    
261
  let colorAlgorithms = [ ("LF", colorLF)
262
                        , ("Dsatur", colorDsatur)
263
                        , ("Dcolor", colorDcolor)
264
                        ]
265
      colorings = map (\(v,a) -> (v,(colorVertMap.a) nodeGraph)) colorAlgorithms
266
      smallestColoring = IntMap.elems $
267
        (snd . minimumBy (comparing (IntMap.size . snd))) colorings
268
      allNdx = map Node.idx . filter (not . Node.offline) . Container.elems
269
               $ nlf
270
      splitted = mapM (\ grp -> partitionNonRedundant grp allNdx (nlf,ilf))
271
                 smallestColoring
272
  rebootGroups <- if optIgnoreNonRedundant opts
273
                     then return $ zip smallestColoring (repeat (nlf, ilf))
274
                     else case splitted of
275
                            Ok splitgroups -> return $ concat splitgroups
276
                            Bad _ -> exitErr "Not enough capacity to move\ 
277
                                             \ non-redundant instances"
278
  let idToNode = (`Container.find` nodes)
279
      nodesRebootGroups =
280
        map (first $ map idToNode . filter (`IntMap.member` nodes)) rebootGroups
281
      outputRebootGroups = masterLast .
282
                           sortBy (flip compare `on` length . fst) $
283
                           nodesRebootGroups
284
      confToMoveNames = map (Instance.name *** Node.name) . getMoves (nlf, ilf)
285
      namesAndMoves = map (map Node.name *** confToMoveNames) outputRebootGroups
286

    
287
  when (verbose > 1) . putStrLn $ getStats colorings
288

    
289
  let showGroup = if optOneStepOnly opts
290
                    then mapM_ putStrLn
291
                    else putStrLn . commaJoin
292
      showMoves :: [(String, String)] -> IO ()
293
      showMoves = if optPrintMoves opts
294
                    then mapM_ $ putStrLn . uncurry (printf "  %s %s")
295
                    else const $ return ()
296
      showBoth = liftM2 (>>) (showGroup . fst) (showMoves . snd)
297

    
298

    
299
  if optOneStepOnly opts
300
     then do
301
       unless (optNoHeaders opts) $
302
              putStrLn "'First Reboot Group'"
303
       case namesAndMoves of
304
         [] -> return ()
305
         y : _ -> showBoth y
306
     else do
307
       unless (optNoHeaders opts) $
308
              putStrLn "'Node Reboot Groups'"
309
       mapM_ showBoth namesAndMoves