Statistics
| Branch: | Tag: | Revision:

root / src / Ganeti / HTools / Program / Hroller.hs @ 23247a73

History | View | Annotate | Download (9.9 kB)

1
{-| Cluster rolling maintenance helper.
2

    
3
-}
4

    
5
{-
6

    
7
Copyright (C) 2012 Google Inc.
8

    
9
This program is free software; you can redistribute it and/or modify
10
it under the terms of the GNU General Public License as published by
11
the Free Software Foundation; either version 2 of the License, or
12
(at your option) any later version.
13

    
14
This program is distributed in the hope that it will be useful, but
15
WITHOUT ANY WARRANTY; without even the implied warranty of
16
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17
General Public License for more details.
18

    
19
You should have received a copy of the GNU General Public License
20
along with this program; if not, write to the Free Software
21
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22
02110-1301, USA.
23

    
24
-}
25

    
26
module Ganeti.HTools.Program.Hroller
27
  ( main
28
  , options
29
  , arguments
30
  ) where
31

    
32
import Control.Applicative
33
import Control.Monad
34
import Data.Function
35
import Data.List
36
import Data.Ord
37

    
38
import qualified Data.IntMap as IntMap
39

    
40
import qualified Ganeti.HTools.Container as Container
41
import qualified Ganeti.HTools.Node as Node
42
import qualified Ganeti.HTools.Instance as Instance
43
import qualified Ganeti.HTools.Group as Group
44

    
45
import Ganeti.BasicTypes
46
import Ganeti.Common
47
import Ganeti.HTools.CLI
48
import Ganeti.HTools.ExtLoader
49
import Ganeti.HTools.Graph
50
import Ganeti.HTools.Loader
51
import Ganeti.HTools.Types
52
import Ganeti.Utils
53

    
54
-- | Options list and functions.
55
options :: IO [OptType]
56
options = do
57
  luxi <- oLuxiSocket
58
  return
59
    [ luxi
60
    , oRapiMaster
61
    , oDataFile
62
    , oIAllocSrc
63
    , oOfflineNode
64
    , oOfflineMaintenance
65
    , oVerbose
66
    , oQuiet
67
    , oNoHeaders
68
    , oNodeTags
69
    , oSaveCluster
70
    , oGroup
71
    , oSkipNonRedundant
72
    , oIgnoreNonRedundant
73
    , oForce
74
    , oOneStepOnly
75
    ]
76

    
77
-- | The list of arguments supported by the program.
78
arguments :: [ArgCompletion]
79
arguments = []
80

    
81
-- | Compute the result of moving an instance to a different node.
82
move :: Idx -> Ndx -> (Node.List, Instance.List)
83
        -> OpResult (Node.List, Instance.List)
84
move idx new_ndx (nl, il) = do
85
  let new_node = Container.find new_ndx nl
86
      inst = Container.find idx il
87
      old_ndx = Instance.pNode inst
88
      old_node = Container.find old_ndx nl
89
  new_node' <- Node.addPriEx True new_node inst
90
  let old_node' = Node.removePri old_node inst
91
      inst' = Instance.setPri inst new_ndx
92
      nl' = Container.addTwo old_ndx old_node' new_ndx new_node' nl
93
      il' = Container.add idx inst' il
94
  return (nl', il')
95

    
96
-- | Move an instance to one of the candidate nodes mentioned.
97
locateInstance :: Idx -> [Ndx] -> (Node.List, Instance.List)
98
                  -> Result (Node.List, Instance.List)
99
locateInstance idx ndxs conf =
100
  msum $ map (opToResult . flip (move idx) conf) ndxs
101

    
102
-- | Move a list of instances to some of the candidate nodes mentioned.
103
locateInstances :: [Idx] -> [Ndx] -> (Node.List, Instance.List)
104
                   -> Result (Node.List, Instance.List)
105
locateInstances idxs ndxs conf =
106
  foldM (\ cf idx -> locateInstance idx ndxs cf) conf idxs
107

    
108
-- | Greedily move the non-redundant instances away from a list of nodes.
109
-- The arguments are the list of nodes to be cleared, a list of nodes the
110
-- instances can be moved to, and an initial configuration. Returned is a
111
-- list of nodes that can be cleared simultaneously and the configuration
112
-- after these nodes are cleared.
113
clearNodes :: [Ndx] -> [Ndx] -> (Node.List, Instance.List)
114
              -> Result ([Ndx], (Node.List, Instance.List))
115
clearNodes [] _ conf = return ([], conf)
116
clearNodes (ndx:ndxs) targets conf = withFirst `mplus` withoutFirst where
117
  withFirst = do
118
     let othernodes = delete ndx targets
119
     conf' <- locateInstances (nonRedundant conf ndx) othernodes conf
120
     (ndxs', conf'') <- clearNodes ndxs othernodes conf'
121
     return (ndx:ndxs', conf'')
122
  withoutFirst = clearNodes ndxs targets conf
123

    
124
-- | Parition a list of nodes into chunks according cluster capacity.
125
partitionNonRedundant :: [Ndx] -> [Ndx] -> (Node.List, Instance.List)
126
                         -> Result [[Ndx]]
127
partitionNonRedundant [] _ _ = return []
128
partitionNonRedundant ndxs targets conf = do
129
  (grp, _) <- clearNodes ndxs targets conf
130
  guard . not . null $ grp
131
  let remaining = ndxs \\ grp
132
  part <- partitionNonRedundant remaining targets conf
133
  return $ grp : part
134

    
135
-- | Gather statistics for the coloring algorithms.
136
-- Returns a string with a summary on how each algorithm has performed,
137
-- in order of non-decreasing effectiveness, and whether it tied or lost
138
-- with the previous one.
139
getStats :: [(String, ColorVertMap)] -> String
140
getStats colorings = snd . foldr helper (0,"") $ algBySize colorings
141
    where algostat (algo, cmap) = algo ++ ": " ++ size cmap ++ grpsizes cmap
142
          size cmap = show (IntMap.size cmap) ++ " "
143
          grpsizes cmap =
144
            "(" ++ commaJoin (map (show.length) (IntMap.elems cmap)) ++ ")"
145
          algBySize = sortBy (flip (comparing (IntMap.size.snd)))
146
          helper :: (String, ColorVertMap) -> (Int, String) -> (Int, String)
147
          helper el (0, _) = ((IntMap.size.snd) el, algostat el)
148
          helper el (old, str)
149
            | old == elsize = (elsize, str ++ " TIE " ++ algostat el)
150
            | otherwise = (elsize, str ++ " LOOSE " ++ algostat el)
151
              where elsize = (IntMap.size.snd) el
152

    
153
-- | Predicate of belonging to a given group restriction.
154
hasGroup :: Maybe Group.Group -> Node.Node -> Bool
155
hasGroup Nothing _ = True
156
hasGroup (Just grp) node = Node.group node == Group.idx grp
157

    
158
-- | Predicate of having at least one tag in a given set.
159
hasTag :: Maybe [String] -> Node.Node -> Bool
160
hasTag Nothing _ = True
161
hasTag (Just tags) node = not . null $ Node.nTags node `intersect` tags
162

    
163
-- | From a cluster configuration, get the list of non-redundant instances
164
-- of a node.
165
nonRedundant :: (Node.List, Instance.List) -> Ndx -> [Idx]
166
nonRedundant (nl, il) ndx =
167
  filter (not . Instance.hasSecondary . flip Container.find  il) $
168
  Node.pList (Container.find ndx nl)
169

    
170
-- | Within a cluster configuration, decide if the node hosts non-redundant
171
-- Instances.
172
noNonRedundant :: (Node.List, Instance.List) -> Node.Node -> Bool
173
noNonRedundant conf = null . nonRedundant conf . Node.idx
174

    
175
-- | Put the master node last.
176
-- Reorder a list of lists of nodes such that the master node (if present)
177
-- is the last node of the last group.
178
masterLast :: [[Node.Node]] -> [[Node.Node]]
179
masterLast rebootgroups =
180
  map (uncurry (++)) . uncurry (++) . partition (null . snd) $
181
  map (partition (not . Node.isMaster)) rebootgroups
182

    
183
-- | Main function.
184
main :: Options -> [String] -> IO ()
185
main opts args = do
186
  unless (null args) $ exitErr "This program doesn't take any arguments."
187

    
188
  let verbose = optVerbose opts
189
      maybeExit = if optForce opts then warn else exitErr
190

    
191
  -- Load cluster data. The last two arguments, cluster tags and ipolicy, are
192
  -- currently not used by this tool.
193
  ini_cdata@(ClusterData gl fixed_nl ilf _ _) <- loadExternalData opts
194

    
195
  let master_names = map Node.name . filter Node.isMaster . IntMap.elems $
196
                     fixed_nl
197
  case master_names of
198
    [] -> maybeExit "No master node found (maybe not supported by backend)."
199
    [ _ ] -> return ()
200
    _ -> exitErr $ "Found more than one master node: " ++  show master_names
201

    
202
  nlf <- setNodeStatus opts fixed_nl
203

    
204
  maybeSaveData (optSaveCluster opts) "original" "before hroller run" ini_cdata
205

    
206
  -- Find the wanted node group, if any.
207
  wantedGroup <- case optGroup opts of
208
    Nothing -> return Nothing
209
    Just name -> case Container.findByName gl name of
210
      Nothing -> exitErr "Cannot find target group."
211
      Just grp -> return (Just grp)
212

    
213
  let nodes = IntMap.filter (foldl (liftA2 (&&)) (const True)
214
                             [ not . Node.offline
215
                             , if optSkipNonRedundant opts
216
                                  then noNonRedundant (nlf, ilf)
217
                                  else const True
218
                             , hasTag $ optNodeTags opts
219
                             , hasGroup wantedGroup ])
220
              nlf
221
      mkGraph = if optOfflineMaintenance opts
222
                   then Node.mkNodeGraph
223
                   else Node.mkRebootNodeGraph nlf
224

    
225
  -- TODO: fail if instances are running (with option to warn only)
226

    
227
  nodeGraph <- case mkGraph nodes ilf of
228
                     Nothing -> exitErr "Cannot create node graph"
229
                     Just g -> return g
230

    
231
  when (verbose > 2) . putStrLn $ "Node Graph: " ++ show nodeGraph
232

    
233
  let colorAlgorithms = [ ("LF", colorLF)
234
                        , ("Dsatur", colorDsatur)
235
                        , ("Dcolor", colorDcolor)
236
                        ]
237
      colorings = map (\(v,a) -> (v,(colorVertMap.a) nodeGraph)) colorAlgorithms
238
      smallestColoring = IntMap.elems $
239
        (snd . minimumBy (comparing (IntMap.size . snd))) colorings
240
      allNdx = map Node.idx $ Container.elems nlf
241
      splitted = mapM (\ grp -> partitionNonRedundant grp allNdx (nlf,ilf))
242
                 smallestColoring
243
  rebootGroups <- if optIgnoreNonRedundant opts
244
                     then return smallestColoring
245
                     else case splitted of
246
                            Ok splitgroups -> return $ concat splitgroups
247
                            Bad _ -> exitErr "Not enough capacity to move\ 
248
                                             \ non-redundant instances"
249
  let idToNode = (`Container.find` nodes)
250
      nodesRebootGroups =
251
        map (map idToNode . filter (`IntMap.member` nodes)) rebootGroups
252
      outputRebootGroups = masterLast .
253
                           sortBy (flip compare `on` length) $
254
                           nodesRebootGroups
255
      outputRebootNames = map (map Node.name) outputRebootGroups
256

    
257
  when (verbose > 1) . putStrLn $ getStats colorings
258

    
259
  if optOneStepOnly opts
260
     then do
261
       unless (optNoHeaders opts) $
262
              putStrLn "'First Reboot Group'"
263
       case outputRebootNames of
264
         [] -> return ()
265
         y : _ -> mapM_ putStrLn y
266
     else do
267
       unless (optNoHeaders opts) $
268
              putStrLn "'Node Reboot Groups'"
269
       mapM_ (putStrLn . commaJoin) outputRebootNames