Statistics
| Branch: | Tag: | Revision:

root / src / Ganeti / HTools / Program / Hroller.hs @ a8cbe1d7

History | View | Annotate | Download (11.1 kB)

1
{-| Cluster rolling maintenance helper.
2

    
3
-}
4

    
5
{-
6

    
7
Copyright (C) 2012 Google Inc.
8

    
9
This program is free software; you can redistribute it and/or modify
10
it under the terms of the GNU General Public License as published by
11
the Free Software Foundation; either version 2 of the License, or
12
(at your option) any later version.
13

    
14
This program is distributed in the hope that it will be useful, but
15
WITHOUT ANY WARRANTY; without even the implied warranty of
16
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17
General Public License for more details.
18

    
19
You should have received a copy of the GNU General Public License
20
along with this program; if not, write to the Free Software
21
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22
02110-1301, USA.
23

    
24
-}
25

    
26
module Ganeti.HTools.Program.Hroller
27
  ( main
28
  , options
29
  , arguments
30
  ) where
31

    
32
import Control.Applicative
33
import Control.Arrow
34
import Control.Monad
35
import Data.Function
36
import Data.List
37
import Data.Ord
38
import Text.Printf
39

    
40
import qualified Data.IntMap as IntMap
41

    
42
import qualified Ganeti.HTools.Container as Container
43
import qualified Ganeti.HTools.Node as Node
44
import qualified Ganeti.HTools.Instance as Instance
45
import qualified Ganeti.HTools.Group as Group
46

    
47
import Ganeti.BasicTypes
48
import Ganeti.Common
49
import Ganeti.HTools.CLI
50
import Ganeti.HTools.ExtLoader
51
import Ganeti.HTools.Graph
52
import Ganeti.HTools.Loader
53
import Ganeti.HTools.Types
54
import Ganeti.Utils
55

    
56
-- | Options list and functions.
57
options :: IO [OptType]
58
options = do
59
  luxi <- oLuxiSocket
60
  return
61
    [ luxi
62
    , oRapiMaster
63
    , oDataFile
64
    , oIAllocSrc
65
    , oOfflineNode
66
    , oOfflineMaintenance
67
    , oVerbose
68
    , oQuiet
69
    , oNoHeaders
70
    , oNodeTags
71
    , oSaveCluster
72
    , oGroup
73
    , oPrintMoves
74
    , oSkipNonRedundant
75
    , oIgnoreNonRedundant
76
    , oForce
77
    , oOneStepOnly
78
    ]
79

    
80
-- | The list of arguments supported by the program.
81
arguments :: [ArgCompletion]
82
arguments = []
83

    
84
-- | Compute the result of moving an instance to a different node.
85
move :: Idx -> Ndx -> (Node.List, Instance.List)
86
        -> OpResult (Node.List, Instance.List)
87
move idx new_ndx (nl, il) = do
88
  let new_node = Container.find new_ndx nl
89
      inst = Container.find idx il
90
      old_ndx = Instance.pNode inst
91
      old_node = Container.find old_ndx nl
92
  new_node' <- Node.addPriEx True new_node inst
93
  let old_node' = Node.removePri old_node inst
94
      inst' = Instance.setPri inst new_ndx
95
      nl' = Container.addTwo old_ndx old_node' new_ndx new_node' nl
96
      il' = Container.add idx inst' il
97
  return (nl', il')
98

    
99
-- | Move a non-redundant instance to one of the candidate nodes mentioned.
100
locateInstance :: Idx -> [Ndx] -> (Node.List, Instance.List)
101
                  -> Result (Node.List, Instance.List)
102
locateInstance idx ndxs conf =
103
  msum $ map (opToResult . flip (move idx) conf) ndxs
104

    
105
-- | Move a list of non-redundant instances to some of the nodes mentioned.
106
locateInstances :: [Idx] -> [Ndx] -> (Node.List, Instance.List)
107
                   -> Result (Node.List, Instance.List)
108
locateInstances idxs ndxs conf =
109
  foldM (\ cf idx -> locateInstance idx ndxs cf) conf idxs
110

    
111
-- | Greedily move the non-redundant instances away from a list of nodes.
112
-- The arguments are the list of nodes to be cleared, a list of nodes the
113
-- instances can be moved to, and an initial configuration. Returned is a
114
-- list of nodes that can be cleared simultaneously and the configuration
115
-- after these nodes are cleared.
116
clearNodes :: [Ndx] -> [Ndx] -> (Node.List, Instance.List)
117
              -> Result ([Ndx], (Node.List, Instance.List))
118
clearNodes [] _ conf = return ([], conf)
119
clearNodes (ndx:ndxs) targets conf@(nl, _) =
120
  withFirst `mplus` withoutFirst where
121
  withFirst = do
122
     let othernodes = delete ndx targets
123
         grp = Node.group $ Container.find ndx nl
124
         othernodesSameGroup =
125
           filter ((==) grp . Node.group . flip Container.find nl) othernodes
126
     conf' <- locateInstances (nonRedundant conf ndx) othernodesSameGroup conf
127
     (ndxs', conf'') <- clearNodes ndxs othernodes conf'
128
     return (ndx:ndxs', conf'')
129
  withoutFirst = clearNodes ndxs targets conf
130

    
131
-- | Parition a list of nodes into chunks according cluster capacity.
132
partitionNonRedundant :: [Ndx] -> [Ndx] -> (Node.List, Instance.List)
133
                         -> Result [([Ndx], (Node.List, Instance.List))]
134
partitionNonRedundant [] _ _ = return []
135
partitionNonRedundant ndxs targets conf = do
136
  (grp, conf') <- clearNodes ndxs targets conf
137
  guard . not . null $ grp
138
  let remaining = ndxs \\ grp
139
  part <- partitionNonRedundant remaining targets conf
140
  return $ (grp, conf') : part
141

    
142
-- | Gather statistics for the coloring algorithms.
143
-- Returns a string with a summary on how each algorithm has performed,
144
-- in order of non-decreasing effectiveness, and whether it tied or lost
145
-- with the previous one.
146
getStats :: [(String, ColorVertMap)] -> String
147
getStats colorings = snd . foldr helper (0,"") $ algBySize colorings
148
    where algostat (algo, cmap) = algo ++ ": " ++ size cmap ++ grpsizes cmap
149
          size cmap = show (IntMap.size cmap) ++ " "
150
          grpsizes cmap =
151
            "(" ++ commaJoin (map (show.length) (IntMap.elems cmap)) ++ ")"
152
          algBySize = sortBy (flip (comparing (IntMap.size.snd)))
153
          helper :: (String, ColorVertMap) -> (Int, String) -> (Int, String)
154
          helper el (0, _) = ((IntMap.size.snd) el, algostat el)
155
          helper el (old, str)
156
            | old == elsize = (elsize, str ++ " TIE " ++ algostat el)
157
            | otherwise = (elsize, str ++ " LOOSE " ++ algostat el)
158
              where elsize = (IntMap.size.snd) el
159

    
160
-- | Predicate of belonging to a given group restriction.
161
hasGroup :: Maybe Group.Group -> Node.Node -> Bool
162
hasGroup Nothing _ = True
163
hasGroup (Just grp) node = Node.group node == Group.idx grp
164

    
165
-- | Predicate of having at least one tag in a given set.
166
hasTag :: Maybe [String] -> Node.Node -> Bool
167
hasTag Nothing _ = True
168
hasTag (Just tags) node = not . null $ Node.nTags node `intersect` tags
169

    
170
-- | From a cluster configuration, get the list of non-redundant instances
171
-- of a node.
172
nonRedundant :: (Node.List, Instance.List) -> Ndx -> [Idx]
173
nonRedundant (nl, il) ndx =
174
  filter (not . Instance.hasSecondary . flip Container.find  il) $
175
  Node.pList (Container.find ndx nl)
176

    
177
-- | Within a cluster configuration, decide if the node hosts non-redundant
178
-- Instances.
179
noNonRedundant :: (Node.List, Instance.List) -> Node.Node -> Bool
180
noNonRedundant conf = null . nonRedundant conf . Node.idx
181

    
182
-- | Put the master node last.
183
-- Reorder a list groups of nodes (with additional information) such that the
184
-- master node (if present) is the last node of the last group.
185
masterLast :: [([Node.Node], a)] -> [([Node.Node], a)]
186
masterLast rebootgroups =
187
  map (first $ uncurry (++)) . uncurry (++) . partition (null . snd . fst) $
188
  map (first $ partition (not . Node.isMaster)) rebootgroups
189

    
190
-- | From two configurations compute the list of moved instances.
191
getMoves :: (Node.List, Instance.List) -> (Node.List, Instance.List)
192
            -> [(Instance.Instance, Node.Node)]
193
getMoves (_, il) (nl', il') = do
194
  ix <- Container.keys il
195
  let inst = Container.find ix il
196
      inst' = Container.find ix il'
197
  guard $ Instance.pNode inst /= Instance.pNode inst'
198
  return (inst', Container.find (Instance.pNode inst') nl')
199

    
200
-- | Main function.
201
main :: Options -> [String] -> IO ()
202
main opts args = do
203
  unless (null args) $ exitErr "This program doesn't take any arguments."
204

    
205
  let verbose = optVerbose opts
206
      maybeExit = if optForce opts then warn else exitErr
207

    
208
  -- Load cluster data. The last two arguments, cluster tags and ipolicy, are
209
  -- currently not used by this tool.
210
  ini_cdata@(ClusterData gl fixed_nl ilf _ _) <- loadExternalData opts
211

    
212
  let master_names = map Node.name . filter Node.isMaster . IntMap.elems $
213
                     fixed_nl
214
  case master_names of
215
    [] -> maybeExit "No master node found (maybe not supported by backend)."
216
    [ _ ] -> return ()
217
    _ -> exitErr $ "Found more than one master node: " ++  show master_names
218

    
219
  nlf <- setNodeStatus opts fixed_nl
220

    
221
  maybeSaveData (optSaveCluster opts) "original" "before hroller run" ini_cdata
222

    
223
  -- Find the wanted node group, if any.
224
  wantedGroup <- case optGroup opts of
225
    Nothing -> return Nothing
226
    Just name -> case Container.findByName gl name of
227
      Nothing -> exitErr "Cannot find target group."
228
      Just grp -> return (Just grp)
229

    
230
  let nodes = IntMap.filter (foldl (liftA2 (&&)) (const True)
231
                             [ not . Node.offline
232
                             , if optSkipNonRedundant opts
233
                                  then noNonRedundant (nlf, ilf)
234
                                  else const True
235
                             , hasTag $ optNodeTags opts
236
                             , hasGroup wantedGroup ])
237
              nlf
238
      mkGraph = if optOfflineMaintenance opts
239
                   then Node.mkNodeGraph
240
                   else Node.mkRebootNodeGraph nlf
241

    
242
  nodeGraph <- case mkGraph nodes ilf of
243
                     Nothing -> exitErr "Cannot create node graph"
244
                     Just g -> return g
245

    
246
  when (verbose > 2) . putStrLn $ "Node Graph: " ++ show nodeGraph
247

    
248
  let colorAlgorithms = [ ("LF", colorLF)
249
                        , ("Dsatur", colorDsatur)
250
                        , ("Dcolor", colorDcolor)
251
                        ]
252
      colorings = map (\(v,a) -> (v,(colorVertMap.a) nodeGraph)) colorAlgorithms
253
      smallestColoring = IntMap.elems $
254
        (snd . minimumBy (comparing (IntMap.size . snd))) colorings
255
      allNdx = map Node.idx $ Container.elems nlf
256
      splitted = mapM (\ grp -> partitionNonRedundant grp allNdx (nlf,ilf))
257
                 smallestColoring
258
  rebootGroups <- if optIgnoreNonRedundant opts
259
                     then return $ zip smallestColoring (repeat (nlf, ilf))
260
                     else case splitted of
261
                            Ok splitgroups -> return $ concat splitgroups
262
                            Bad _ -> exitErr "Not enough capacity to move\ 
263
                                             \ non-redundant instances"
264
  let idToNode = (`Container.find` nodes)
265
      nodesRebootGroups =
266
        map (first $ map idToNode . filter (`IntMap.member` nodes)) rebootGroups
267
      outputRebootGroups = masterLast .
268
                           sortBy (flip compare `on` length . fst) $
269
                           nodesRebootGroups
270
      confToMoveNames = map (Instance.name *** Node.name) . getMoves (nlf, ilf)
271
      namesAndMoves = map (map Node.name *** confToMoveNames) outputRebootGroups
272

    
273
  when (verbose > 1) . putStrLn $ getStats colorings
274

    
275
  let showGroup = if optOneStepOnly opts
276
                    then mapM_ putStrLn
277
                    else putStrLn . commaJoin
278
      showMoves :: [(String, String)] -> IO ()
279
      showMoves = if optPrintMoves opts
280
                    then mapM_ $ putStrLn . uncurry (printf "  %s %s")
281
                    else const $ return ()
282
      showBoth = liftM2 (>>) (showGroup . fst) (showMoves . snd)
283

    
284

    
285
  if optOneStepOnly opts
286
     then do
287
       unless (optNoHeaders opts) $
288
              putStrLn "'First Reboot Group'"
289
       case namesAndMoves of
290
         [] -> return ()
291
         y : _ -> showBoth y
292
     else do
293
       unless (optNoHeaders opts) $
294
              putStrLn "'Node Reboot Groups'"
295
       mapM_ showBoth namesAndMoves