Statistics
| Branch: | Tag: | Revision:

root / src / Ganeti / HTools / Program / Hroller.hs @ 86c346db

History | View | Annotate | Download (9.8 kB)

1
{-| Cluster rolling maintenance helper.
2

    
3
-}
4

    
5
{-
6

    
7
Copyright (C) 2012 Google Inc.
8

    
9
This program is free software; you can redistribute it and/or modify
10
it under the terms of the GNU General Public License as published by
11
the Free Software Foundation; either version 2 of the License, or
12
(at your option) any later version.
13

    
14
This program is distributed in the hope that it will be useful, but
15
WITHOUT ANY WARRANTY; without even the implied warranty of
16
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17
General Public License for more details.
18

    
19
You should have received a copy of the GNU General Public License
20
along with this program; if not, write to the Free Software
21
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22
02110-1301, USA.
23

    
24
-}
25

    
26
module Ganeti.HTools.Program.Hroller
27
  ( main
28
  , options
29
  , arguments
30
  ) where
31

    
32
import Control.Applicative
33
import Control.Monad
34
import Data.Function
35
import Data.List
36
import Data.Ord
37

    
38
import qualified Data.IntMap as IntMap
39

    
40
import qualified Ganeti.HTools.Container as Container
41
import qualified Ganeti.HTools.Node as Node
42
import qualified Ganeti.HTools.Instance as Instance
43
import qualified Ganeti.HTools.Group as Group
44

    
45
import Ganeti.BasicTypes
46
import Ganeti.Common
47
import Ganeti.HTools.CLI
48
import Ganeti.HTools.ExtLoader
49
import Ganeti.HTools.Graph
50
import Ganeti.HTools.Loader
51
import Ganeti.HTools.Types
52
import Ganeti.Utils
53

    
54
-- | Options list and functions.
55
options :: IO [OptType]
56
options = do
57
  luxi <- oLuxiSocket
58
  return
59
    [ luxi
60
    , oRapiMaster
61
    , oDataFile
62
    , oIAllocSrc
63
    , oOfflineNode
64
    , oOfflineMaintenance
65
    , oVerbose
66
    , oQuiet
67
    , oNoHeaders
68
    , oNodeTags
69
    , oSaveCluster
70
    , oGroup
71
    , oSkipNonRedundant
72
    , oForce
73
    , oOneStepOnly
74
    ]
75

    
76
-- | The list of arguments supported by the program.
77
arguments :: [ArgCompletion]
78
arguments = []
79

    
80
-- | Compute the result of moving an instance to a different node.
81
move :: Idx -> Ndx -> (Node.List, Instance.List)
82
        -> OpResult (Node.List, Instance.List)
83
move idx new_ndx (nl, il) = do
84
  let new_node = Container.find new_ndx nl
85
      inst = Container.find idx il
86
      old_ndx = Instance.pNode inst
87
      old_node = Container.find old_ndx nl
88
  new_node' <- Node.addPriEx True new_node inst
89
  let old_node' = Node.removePri old_node inst
90
      inst' = Instance.setPri inst new_ndx
91
      nl' = Container.addTwo old_ndx old_node' new_ndx new_node' nl
92
      il' = Container.add idx inst' il
93
  return (nl', il')
94

    
95
-- | Move an instance to one of the candidate nodes mentioned.
96
locateInstance :: Idx -> [Ndx] -> (Node.List, Instance.List)
97
                  -> Result (Node.List, Instance.List)
98
locateInstance idx ndxs conf =
99
  msum $ map (opToResult . flip (move idx) conf) ndxs
100

    
101
-- | Move a list of instances to some of the candidate nodes mentioned.
102
locateInstances :: [Idx] -> [Ndx] -> (Node.List, Instance.List)
103
                   -> Result (Node.List, Instance.List)
104
locateInstances idxs ndxs conf =
105
  foldM (\ cf idx -> locateInstance idx ndxs cf) conf idxs
106

    
107
-- | Greedily move the non-redundant instances away from a list of nodes.
108
-- The arguments are the list of nodes to be cleared, a list of nodes the
109
-- instances can be moved to, and an initial configuration. Returned is a
110
-- list of nodes that can be cleared simultaneously and the configuration
111
-- after these nodes are cleared.
112
clearNodes :: [Ndx] -> [Ndx] -> (Node.List, Instance.List)
113
              -> Result ([Ndx], (Node.List, Instance.List))
114
clearNodes [] _ conf = return ([], conf)
115
clearNodes (ndx:ndxs) targets conf = withFirst `mplus` withoutFirst where
116
  withFirst = do
117
     let othernodes = delete ndx targets
118
     conf' <- locateInstances (nonRedundant conf ndx) othernodes conf
119
     (ndxs', conf'') <- clearNodes ndxs othernodes conf'
120
     return (ndx:ndxs', conf'')
121
  withoutFirst = clearNodes ndxs targets conf
122

    
123
-- | Parition a list of nodes into chunks according cluster capacity.
124
partitionNonRedundant :: [Ndx] -> [Ndx] -> (Node.List, Instance.List)
125
                         -> Result [[Ndx]]
126
partitionNonRedundant [] _ _ = return []
127
partitionNonRedundant ndxs targets conf = do
128
  (grp, _) <- clearNodes ndxs targets conf
129
  guard . not . null $ grp
130
  let remaining = ndxs \\ grp
131
  part <- partitionNonRedundant remaining targets conf
132
  return $ grp : part
133

    
134
-- | Gather statistics for the coloring algorithms.
135
-- Returns a string with a summary on how each algorithm has performed,
136
-- in order of non-decreasing effectiveness, and whether it tied or lost
137
-- with the previous one.
138
getStats :: [(String, ColorVertMap)] -> String
139
getStats colorings = snd . foldr helper (0,"") $ algBySize colorings
140
    where algostat (algo, cmap) = algo ++ ": " ++ size cmap ++ grpsizes cmap
141
          size cmap = show (IntMap.size cmap) ++ " "
142
          grpsizes cmap =
143
            "(" ++ commaJoin (map (show.length) (IntMap.elems cmap)) ++ ")"
144
          algBySize = sortBy (flip (comparing (IntMap.size.snd)))
145
          helper :: (String, ColorVertMap) -> (Int, String) -> (Int, String)
146
          helper el (0, _) = ((IntMap.size.snd) el, algostat el)
147
          helper el (old, str)
148
            | old == elsize = (elsize, str ++ " TIE " ++ algostat el)
149
            | otherwise = (elsize, str ++ " LOOSE " ++ algostat el)
150
              where elsize = (IntMap.size.snd) el
151

    
152
-- | Predicate of belonging to a given group restriction.
153
hasGroup :: Maybe Group.Group -> Node.Node -> Bool
154
hasGroup Nothing _ = True
155
hasGroup (Just grp) node = Node.group node == Group.idx grp
156

    
157
-- | Predicate of having at least one tag in a given set.
158
hasTag :: Maybe [String] -> Node.Node -> Bool
159
hasTag Nothing _ = True
160
hasTag (Just tags) node = not . null $ Node.nTags node `intersect` tags
161

    
162
-- | From a cluster configuration, get the list of non-redundant instances
163
-- of a node.
164
nonRedundant :: (Node.List, Instance.List) -> Ndx -> [Idx]
165
nonRedundant (nl, il) ndx =
166
  filter (not . Instance.hasSecondary . flip Container.find  il) $
167
  Node.pList (Container.find ndx nl)
168

    
169
-- | Within a cluster configuration, decide if the node hosts non-redundant
170
-- Instances.
171
noNonRedundant :: (Node.List, Instance.List) -> Node.Node -> Bool
172
noNonRedundant conf = null . nonRedundant conf . Node.idx
173

    
174
-- | Put the master node last.
175
-- Reorder a list of lists of nodes such that the master node (if present)
176
-- is the last node of the last group.
177
masterLast :: [[Node.Node]] -> [[Node.Node]]
178
masterLast rebootgroups =
179
  map (uncurry (++)) . uncurry (++) . partition (null . snd) $
180
  map (partition (not . Node.isMaster)) rebootgroups
181

    
182
-- | Main function.
183
main :: Options -> [String] -> IO ()
184
main opts args = do
185
  unless (null args) $ exitErr "This program doesn't take any arguments."
186

    
187
  let verbose = optVerbose opts
188
      maybeExit = if optForce opts then warn else exitErr
189

    
190
  -- Load cluster data. The last two arguments, cluster tags and ipolicy, are
191
  -- currently not used by this tool.
192
  ini_cdata@(ClusterData gl fixed_nl ilf _ _) <- loadExternalData opts
193

    
194
  let master_names = map Node.name . filter Node.isMaster . IntMap.elems $
195
                     fixed_nl
196
  case master_names of
197
    [] -> maybeExit "No master node found (maybe not supported by backend)."
198
    [ _ ] -> return ()
199
    _ -> exitErr $ "Found more than one master node: " ++  show master_names
200

    
201
  nlf <- setNodeStatus opts fixed_nl
202

    
203
  maybeSaveData (optSaveCluster opts) "original" "before hroller run" ini_cdata
204

    
205
  -- Find the wanted node group, if any.
206
  wantedGroup <- case optGroup opts of
207
    Nothing -> return Nothing
208
    Just name -> case Container.findByName gl name of
209
      Nothing -> exitErr "Cannot find target group."
210
      Just grp -> return (Just grp)
211

    
212
  let nodes = IntMap.filter (foldl (liftA2 (&&)) (const True)
213
                             [ not . Node.offline
214
                             , if optSkipNonRedundant opts
215
                                  then noNonRedundant (nlf, ilf)
216
                                  else const True
217
                             , hasTag $ optNodeTags opts
218
                             , hasGroup wantedGroup ])
219
              nlf
220
      mkGraph = if optOfflineMaintenance opts
221
                   then Node.mkNodeGraph
222
                   else Node.mkRebootNodeGraph nlf
223

    
224
  -- TODO: fail if instances are running (with option to warn only)
225

    
226
  nodeGraph <- case mkGraph nodes ilf of
227
                     Nothing -> exitErr "Cannot create node graph"
228
                     Just g -> return g
229

    
230
  when (verbose > 2) . putStrLn $ "Node Graph: " ++ show nodeGraph
231

    
232
  let colorAlgorithms = [ ("LF", colorLF)
233
                        , ("Dsatur", colorDsatur)
234
                        , ("Dcolor", colorDcolor)
235
                        ]
236
      colorings = map (\(v,a) -> (v,(colorVertMap.a) nodeGraph)) colorAlgorithms
237
      smallestColoring =
238
        (snd . minimumBy (comparing (IntMap.size . snd))) colorings
239
      allNdx = map Node.idx $ Container.elems nlf
240
      splitted = mapM (\ grp -> partitionNonRedundant grp allNdx (nlf,ilf)) $
241
                 IntMap.elems smallestColoring
242
  rebootGroups <- case splitted of
243
                    Ok splitgroups -> return $ concat splitgroups
244
                    Bad _ -> exitErr "Not enough capacity to move non-redundant\ 
245
                                     \ instances"
246
  let idToNode = (`Container.find` nodes)
247
      nodesRebootGroups =
248
        map (map idToNode . filter (`IntMap.member` nodes)) rebootGroups
249
      outputRebootGroups = masterLast .
250
                           sortBy (flip compare `on` length) $
251
                           nodesRebootGroups
252
      outputRebootNames = map (map Node.name) outputRebootGroups
253

    
254
  when (verbose > 1) . putStrLn $ getStats colorings
255

    
256
  if optOneStepOnly opts
257
     then do
258
       unless (optNoHeaders opts) $
259
              putStrLn "'First Reboot Group'"
260
       case outputRebootNames of
261
         [] -> return ()
262
         y : _ -> mapM_ putStrLn y
263
     else do
264
       unless (optNoHeaders opts) $
265
              putStrLn "'Node Reboot Groups'"
266
       mapM_ (putStrLn . commaJoin) outputRebootNames