Statistics
| Branch: | Tag: | Revision:

root / src / Ganeti / HTools / Program / Hroller.hs @ fdbdeaa3

History | View | Annotate | Download (12.5 kB)

1
{-| Cluster rolling maintenance helper.
2

    
3
-}
4

    
5
{-
6

    
7
Copyright (C) 2012 Google Inc.
8

    
9
This program is free software; you can redistribute it and/or modify
10
it under the terms of the GNU General Public License as published by
11
the Free Software Foundation; either version 2 of the License, or
12
(at your option) any later version.
13

    
14
This program is distributed in the hope that it will be useful, but
15
WITHOUT ANY WARRANTY; without even the implied warranty of
16
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17
General Public License for more details.
18

    
19
You should have received a copy of the GNU General Public License
20
along with this program; if not, write to the Free Software
21
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22
02110-1301, USA.
23

    
24
-}
25

    
26
module Ganeti.HTools.Program.Hroller
27
  ( main
28
  , options
29
  , arguments
30
  ) where
31

    
32
import Control.Applicative
33
import Control.Arrow
34
import Control.Monad
35
import Data.Function
36
import Data.List
37
import Data.Ord
38
import Text.Printf
39

    
40
import qualified Data.IntMap as IntMap
41

    
42
import qualified Ganeti.HTools.Container as Container
43
import qualified Ganeti.HTools.Node as Node
44
import qualified Ganeti.HTools.Instance as Instance
45
import qualified Ganeti.HTools.Group as Group
46

    
47
import Ganeti.BasicTypes
48
import Ganeti.Common
49
import Ganeti.HTools.CLI
50
import Ganeti.HTools.ExtLoader
51
import Ganeti.HTools.Graph
52
import Ganeti.HTools.Loader
53
import Ganeti.HTools.Types
54
import Ganeti.Utils
55

    
56
-- | Options list and functions.
57
options :: IO [OptType]
58
options = do
59
  luxi <- oLuxiSocket
60
  return
61
    [ luxi
62
    , oRapiMaster
63
    , oDataFile
64
    , oIAllocSrc
65
    , oOfflineNode
66
    , oOfflineMaintenance
67
    , oVerbose
68
    , oQuiet
69
    , oNoHeaders
70
    , oNodeTags
71
    , oSaveCluster
72
    , oGroup
73
    , oPrintMoves
74
    , oSkipNonRedundant
75
    , oIgnoreNonRedundant
76
    , oForce
77
    , oOneStepOnly
78
    ]
79

    
80
-- | The list of arguments supported by the program.
81
arguments :: [ArgCompletion]
82
arguments = []
83

    
84
-- | Compute the result of moving an instance to a different node.
85
move :: Idx -> Ndx -> (Node.List, Instance.List)
86
        -> OpResult (Node.List, Instance.List)
87
move idx new_ndx (nl, il) = do
88
  let new_node = Container.find new_ndx nl
89
      inst = Container.find idx il
90
      old_ndx = Instance.pNode inst
91
      old_node = Container.find old_ndx nl
92
  new_node' <- Node.addPriEx True new_node inst
93
  let old_node' = Node.removePri old_node inst
94
      inst' = Instance.setPri inst new_ndx
95
      nl' = Container.addTwo old_ndx old_node' new_ndx new_node' nl
96
      il' = Container.add idx inst' il
97
  return (nl', il')
98

    
99
-- | Move a non-redundant instance to one of the candidate nodes mentioned.
100
locateInstance :: Idx -> [Ndx] -> (Node.List, Instance.List)
101
                  -> Result (Node.List, Instance.List)
102
locateInstance idx ndxs conf =
103
  msum $ map (opToResult . flip (move idx) conf) ndxs
104

    
105
-- | Move a list of non-redundant instances to some of the nodes mentioned.
106
locateInstances :: [Idx] -> [Ndx] -> (Node.List, Instance.List)
107
                   -> Result (Node.List, Instance.List)
108
locateInstances idxs ndxs conf =
109
  foldM (\ cf idx -> locateInstance idx ndxs cf) conf idxs
110

    
111

    
112
-- | Greedily clear a node of a kind of instances by a given relocation method.
113
-- The arguments are a function providing the list of instances to be cleared,
114
-- the relocation function, the list of nodes to be cleared, a list of nodes
115
-- that can be relocated to, and the initial configuration. Returned is a list
116
-- of nodes that can be cleared simultaneously and the configuration after
117
-- clearing these nodes.
118
greedyClearNodes :: ((Node.List, Instance.List) -> Ndx -> [Idx])
119
                    -> ([Idx] -> [Ndx] -> (Node.List, Instance.List)
120
                        -> Result (Node.List, Instance.List))
121
                    -> [Ndx] -> [Ndx] -> (Node.List, Instance.List)
122
                    -> Result ([Ndx], (Node.List, Instance.List))
123
greedyClearNodes  _ _ [] _ conf = return ([], conf)
124
greedyClearNodes getInstances relocate (ndx:ndxs) targets conf@(nl, _) =
125
  withFirst `mplus` withoutFirst where
126
  withFirst = do
127
     let othernodes = delete ndx targets
128
         grp = Node.group $ Container.find ndx nl
129
         othernodesSameGroup =
130
           filter ((==) grp . Node.group . flip Container.find nl) othernodes
131
     conf' <- relocate (getInstances conf ndx) othernodesSameGroup conf
132
     (ndxs', conf'') <- greedyClearNodes getInstances relocate
133
                        ndxs othernodes conf'
134
     return (ndx:ndxs', conf'')
135
  withoutFirst = greedyClearNodes getInstances relocate ndxs targets conf
136
                    
137
-- | Greedily move the non-redundant instances away from a list of nodes.
138
-- Returns a list of ndoes that can be cleared simultaneously and the
139
-- configuration after clearing these nodes.
140
clearNodes :: [Ndx] -> [Ndx] -> (Node.List, Instance.List)
141
              -> Result ([Ndx], (Node.List, Instance.List))
142
clearNodes = greedyClearNodes nonRedundant locateInstances
143

    
144
-- | Partition nodes according to some clearing strategy.
145
-- Arguments are the clearing strategy, the list of nodes to be cleared,
146
-- the list of nodes that instances can be moved to, and the initial
147
-- configuration. Returned is a partion of the nodes to be cleared with the
148
-- configuration in that clearing situation.
149
partitionNodes :: ([Ndx] -> [Ndx] -> (Node.List, Instance.List)
150
                   -> Result ([Ndx], (Node.List, Instance.List)))
151
                  -> [Ndx] -> [Ndx] -> (Node.List, Instance.List)
152
                  -> Result [([Ndx], (Node.List, Instance.List))]
153
partitionNodes _ [] _  _ = return []
154
partitionNodes clear ndxs targets conf = do
155
  (grp, conf') <- clear ndxs targets conf
156
  guard . not . null $ grp
157
  let remaining = ndxs \\ grp
158
  part <- partitionNodes clear remaining targets conf
159
  return $ (grp, conf') : part
160

    
161
-- | Parition a list of nodes into chunks according cluster capacity.
162
partitionNonRedundant :: [Ndx] -> [Ndx] -> (Node.List, Instance.List)
163
                         -> Result [([Ndx], (Node.List, Instance.List))]
164
partitionNonRedundant = partitionNodes clearNodes
165

    
166
-- | Gather statistics for the coloring algorithms.
167
-- Returns a string with a summary on how each algorithm has performed,
168
-- in order of non-decreasing effectiveness, and whether it tied or lost
169
-- with the previous one.
170
getStats :: [(String, ColorVertMap)] -> String
171
getStats colorings = snd . foldr helper (0,"") $ algBySize colorings
172
    where algostat (algo, cmap) = algo ++ ": " ++ size cmap ++ grpsizes cmap
173
          size cmap = show (IntMap.size cmap) ++ " "
174
          grpsizes cmap =
175
            "(" ++ commaJoin (map (show.length) (IntMap.elems cmap)) ++ ")"
176
          algBySize = sortBy (flip (comparing (IntMap.size.snd)))
177
          helper :: (String, ColorVertMap) -> (Int, String) -> (Int, String)
178
          helper el (0, _) = ((IntMap.size.snd) el, algostat el)
179
          helper el (old, str)
180
            | old == elsize = (elsize, str ++ " TIE " ++ algostat el)
181
            | otherwise = (elsize, str ++ " LOOSE " ++ algostat el)
182
              where elsize = (IntMap.size.snd) el
183

    
184
-- | Predicate of belonging to a given group restriction.
185
hasGroup :: Maybe Group.Group -> Node.Node -> Bool
186
hasGroup Nothing _ = True
187
hasGroup (Just grp) node = Node.group node == Group.idx grp
188

    
189
-- | Predicate of having at least one tag in a given set.
190
hasTag :: Maybe [String] -> Node.Node -> Bool
191
hasTag Nothing _ = True
192
hasTag (Just tags) node = not . null $ Node.nTags node `intersect` tags
193

    
194
-- | From a cluster configuration, get the list of non-redundant instances
195
-- of a node.
196
nonRedundant :: (Node.List, Instance.List) -> Ndx -> [Idx]
197
nonRedundant (nl, il) ndx =
198
  filter (not . Instance.hasSecondary . flip Container.find  il) $
199
  Node.pList (Container.find ndx nl)
200

    
201
-- | Within a cluster configuration, decide if the node hosts non-redundant
202
-- Instances.
203
noNonRedundant :: (Node.List, Instance.List) -> Node.Node -> Bool
204
noNonRedundant conf = null . nonRedundant conf . Node.idx
205

    
206
-- | Put the master node last.
207
-- Reorder a list groups of nodes (with additional information) such that the
208
-- master node (if present) is the last node of the last group.
209
masterLast :: [([Node.Node], a)] -> [([Node.Node], a)]
210
masterLast rebootgroups =
211
  map (first $ uncurry (++)) . uncurry (++) . partition (null . snd . fst) $
212
  map (first $ partition (not . Node.isMaster)) rebootgroups
213

    
214
-- | From two configurations compute the list of moved instances.
215
getMoves :: (Node.List, Instance.List) -> (Node.List, Instance.List)
216
            -> [(Instance.Instance, Node.Node)]
217
getMoves (_, il) (nl', il') = do
218
  ix <- Container.keys il
219
  let inst = Container.find ix il
220
      inst' = Container.find ix il'
221
  guard $ Instance.pNode inst /= Instance.pNode inst'
222
  return (inst', Container.find (Instance.pNode inst') nl')
223

    
224
-- | Main function.
225
main :: Options -> [String] -> IO ()
226
main opts args = do
227
  unless (null args) $ exitErr "This program doesn't take any arguments."
228

    
229
  let verbose = optVerbose opts
230
      maybeExit = if optForce opts then warn else exitErr
231

    
232
  -- Load cluster data. The last two arguments, cluster tags and ipolicy, are
233
  -- currently not used by this tool.
234
  ini_cdata@(ClusterData gl fixed_nl ilf _ _) <- loadExternalData opts
235

    
236
  let master_names = map Node.name . filter Node.isMaster . IntMap.elems $
237
                     fixed_nl
238
  case master_names of
239
    [] -> maybeExit "No master node found (maybe not supported by backend)."
240
    [ _ ] -> return ()
241
    _ -> exitErr $ "Found more than one master node: " ++  show master_names
242

    
243
  nlf <- setNodeStatus opts fixed_nl
244

    
245
  maybeSaveData (optSaveCluster opts) "original" "before hroller run" ini_cdata
246

    
247
  -- Find the wanted node group, if any.
248
  wantedGroup <- case optGroup opts of
249
    Nothing -> return Nothing
250
    Just name -> case Container.findByName gl name of
251
      Nothing -> exitErr "Cannot find target group."
252
      Just grp -> return (Just grp)
253

    
254
  let nodes = IntMap.filter (foldl (liftA2 (&&)) (const True)
255
                             [ not . Node.offline
256
                             , if optSkipNonRedundant opts
257
                                  then noNonRedundant (nlf, ilf)
258
                                  else const True
259
                             , hasTag $ optNodeTags opts
260
                             , hasGroup wantedGroup ])
261
              nlf
262
      mkGraph = if optOfflineMaintenance opts
263
                   then Node.mkNodeGraph
264
                   else Node.mkRebootNodeGraph nlf
265

    
266
  nodeGraph <- case mkGraph nodes ilf of
267
                     Nothing -> exitErr "Cannot create node graph"
268
                     Just g -> return g
269

    
270
  when (verbose > 2) . putStrLn $ "Node Graph: " ++ show nodeGraph
271

    
272
  let colorAlgorithms = [ ("LF", colorLF)
273
                        , ("Dsatur", colorDsatur)
274
                        , ("Dcolor", colorDcolor)
275
                        ]
276
      colorings = map (\(v,a) -> (v,(colorVertMap.a) nodeGraph)) colorAlgorithms
277
      smallestColoring = IntMap.elems $
278
        (snd . minimumBy (comparing (IntMap.size . snd))) colorings
279
      allNdx = map Node.idx . filter (not . Node.offline) . Container.elems
280
               $ nlf
281
      splitted = mapM (\ grp -> partitionNonRedundant grp allNdx (nlf,ilf))
282
                 smallestColoring
283
  rebootGroups <- if optIgnoreNonRedundant opts
284
                     then return $ zip smallestColoring (repeat (nlf, ilf))
285
                     else case splitted of
286
                            Ok splitgroups -> return $ concat splitgroups
287
                            Bad _ -> exitErr "Not enough capacity to move\ 
288
                                             \ non-redundant instances"
289
  let idToNode = (`Container.find` nodes)
290
      nodesRebootGroups =
291
        map (first $ map idToNode . filter (`IntMap.member` nodes)) rebootGroups
292
      outputRebootGroups = masterLast .
293
                           sortBy (flip compare `on` length . fst) $
294
                           nodesRebootGroups
295
      confToMoveNames = map (Instance.name *** Node.name) . getMoves (nlf, ilf)
296
      namesAndMoves = map (map Node.name *** confToMoveNames) outputRebootGroups
297

    
298
  when (verbose > 1) . putStrLn $ getStats colorings
299

    
300
  let showGroup = if optOneStepOnly opts
301
                    then mapM_ putStrLn
302
                    else putStrLn . commaJoin
303
      showMoves :: [(String, String)] -> IO ()
304
      showMoves = if optPrintMoves opts
305
                    then mapM_ $ putStrLn . uncurry (printf "  %s %s")
306
                    else const $ return ()
307
      showBoth = liftM2 (>>) (showGroup . fst) (showMoves . snd)
308

    
309

    
310
  if optOneStepOnly opts
311
     then do
312
       unless (optNoHeaders opts) $
313
              putStrLn "'First Reboot Group'"
314
       case namesAndMoves of
315
         [] -> return ()
316
         y : _ -> showBoth y
317
     else do
318
       unless (optNoHeaders opts) $
319
              putStrLn "'Node Reboot Groups'"
320
       mapM_ showBoth namesAndMoves