Statistics
| Branch: | Tag: | Revision:

root / src / Ganeti / HTools / Program / Hroller.hs @ 9d049fb4

History | View | Annotate | Download (17.7 kB)

1
{-| Cluster rolling maintenance helper.
2

    
3
-}
4

    
5
{-
6

    
7
Copyright (C) 2012 Google Inc.
8

    
9
This program is free software; you can redistribute it and/or modify
10
it under the terms of the GNU General Public License as published by
11
the Free Software Foundation; either version 2 of the License, or
12
(at your option) any later version.
13

    
14
This program is distributed in the hope that it will be useful, but
15
WITHOUT ANY WARRANTY; without even the implied warranty of
16
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17
General Public License for more details.
18

    
19
You should have received a copy of the GNU General Public License
20
along with this program; if not, write to the Free Software
21
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22
02110-1301, USA.
23

    
24
-}
25

    
26
module Ganeti.HTools.Program.Hroller
27
  ( main
28
  , options
29
  , arguments
30
  ) where
31

    
32
import Control.Applicative
33
import Control.Arrow
34
import Control.Monad
35
import Data.Function
36
import Data.List
37
import Data.Ord
38
import Text.Printf
39

    
40
import qualified Data.IntMap as IntMap
41

    
42
import qualified Ganeti.HTools.Container as Container
43
import qualified Ganeti.HTools.Node as Node
44
import qualified Ganeti.HTools.Instance as Instance
45
import qualified Ganeti.HTools.Group as Group
46

    
47
import Ganeti.BasicTypes
48
import Ganeti.Common
49
import Ganeti.HTools.CLI
50
import Ganeti.HTools.ExtLoader
51
import Ganeti.HTools.Graph
52
import Ganeti.HTools.Loader
53
import Ganeti.HTools.Types
54
import Ganeti.Utils
55

    
56
-- | Options list and functions.
57
options :: IO [OptType]
58
options = do
59
  luxi <- oLuxiSocket
60
  return
61
    [ luxi
62
    , oRapiMaster
63
    , oDataFile
64
    , oIAllocSrc
65
    , oOfflineNode
66
    , oOfflineMaintenance
67
    , oVerbose
68
    , oQuiet
69
    , oNoHeaders
70
    , oNodeTags
71
    , oSaveCluster
72
    , oGroup
73
    , oPrintMoves
74
    , oFullEvacuation
75
    , oSkipNonRedundant
76
    , oIgnoreNonRedundant
77
    , oForce
78
    , oOneStepOnly
79
    ]
80

    
81
-- | The list of arguments supported by the program.
82
arguments :: [ArgCompletion]
83
arguments = []
84

    
85
-- | Compute the result of moving an instance to a different node.
86
move :: Idx -> Ndx -> (Node.List, Instance.List)
87
        -> OpResult (Node.List, Instance.List)
88
move idx new_ndx (nl, il) = do
89
  let new_node = Container.find new_ndx nl
90
      inst = Container.find idx il
91
      old_ndx = Instance.pNode inst
92
      old_node = Container.find old_ndx nl
93
  new_node' <- Node.addPriEx True new_node inst
94
  let old_node' = Node.removePri old_node inst
95
      inst' = Instance.setPri inst new_ndx
96
      nl' = Container.addTwo old_ndx old_node' new_ndx new_node' nl
97
      il' = Container.add idx inst' il
98
  return (nl', il')
99

    
100
-- | Move a non-redundant instance to one of the candidate nodes mentioned.
101
locateInstance :: Idx -> [Ndx] -> (Node.List, Instance.List)
102
                  -> Result (Node.List, Instance.List)
103
locateInstance idx ndxs conf =
104
  msum $ map (opToResult . flip (move idx) conf) ndxs
105

    
106
-- | Move a list of non-redundant instances to some of the nodes mentioned.
107
locateInstances :: [Idx] -> [Ndx] -> (Node.List, Instance.List)
108
                   -> Result (Node.List, Instance.List)
109
locateInstances idxs ndxs conf =
110
  foldM (\ cf idx -> locateInstance idx ndxs cf) conf idxs
111

    
112

    
113
-- | Greedily clear a node of a kind of instances by a given relocation method.
114
-- The arguments are a function providing the list of instances to be cleared,
115
-- the relocation function, the list of nodes to be cleared, a list of nodes
116
-- that can be relocated to, and the initial configuration. Returned is a list
117
-- of nodes that can be cleared simultaneously and the configuration after
118
-- clearing these nodes.
119
greedyClearNodes :: ((Node.List, Instance.List) -> Ndx -> [Idx])
120
                    -> ([Idx] -> [Ndx] -> (Node.List, Instance.List)
121
                        -> Result (Node.List, Instance.List))
122
                    -> [Ndx] -> [Ndx] -> (Node.List, Instance.List)
123
                    -> Result ([Ndx], (Node.List, Instance.List))
124
greedyClearNodes  _ _ [] _ conf = return ([], conf)
125
greedyClearNodes getInstances relocate (ndx:ndxs) targets conf@(nl, _) =
126
  withFirst `mplus` withoutFirst where
127
  withFirst = do
128
     let othernodes = delete ndx targets
129
         grp = Node.group $ Container.find ndx nl
130
         othernodesSameGroup =
131
           filter ((==) grp . Node.group . flip Container.find nl) othernodes
132
     conf' <- relocate (getInstances conf ndx) othernodesSameGroup conf
133
     (ndxs', conf'') <- greedyClearNodes getInstances relocate
134
                        ndxs othernodes conf'
135
     return (ndx:ndxs', conf'')
136
  withoutFirst = greedyClearNodes getInstances relocate ndxs targets conf
137
                    
138
-- | Greedily move the non-redundant instances away from a list of nodes.
139
-- Returns a list of ndoes that can be cleared simultaneously and the
140
-- configuration after clearing these nodes.
141
clearNodes :: [Ndx] -> [Ndx] -> (Node.List, Instance.List)
142
              -> Result ([Ndx], (Node.List, Instance.List))
143
clearNodes = greedyClearNodes nonRedundant locateInstances
144

    
145
-- | Partition nodes according to some clearing strategy.
146
-- Arguments are the clearing strategy, the list of nodes to be cleared,
147
-- the list of nodes that instances can be moved to, and the initial
148
-- configuration. Returned is a partion of the nodes to be cleared with the
149
-- configuration in that clearing situation.
150
partitionNodes :: ([Ndx] -> [Ndx] -> (Node.List, Instance.List)
151
                   -> Result ([Ndx], (Node.List, Instance.List)))
152
                  -> [Ndx] -> [Ndx] -> (Node.List, Instance.List)
153
                  -> Result [([Ndx], (Node.List, Instance.List))]
154
partitionNodes _ [] _  _ = return []
155
partitionNodes clear ndxs targets conf = do
156
  (grp, conf') <- clear ndxs targets conf
157
  guard . not . null $ grp
158
  let remaining = ndxs \\ grp
159
  part <- partitionNodes clear remaining targets conf
160
  return $ (grp, conf') : part
161

    
162
-- | Parition a list of nodes into chunks according cluster capacity.
163
partitionNonRedundant :: [Ndx] -> [Ndx] -> (Node.List, Instance.List)
164
                         -> Result [([Ndx], (Node.List, Instance.List))]
165
partitionNonRedundant = partitionNodes clearNodes
166

    
167
-- | Compute the result of migrating an instance.
168
migrate :: Idx -> (Node.List, Instance.List)
169
           -> OpResult (Node.List, Instance.List)
170
migrate idx (nl, il) = do
171
  let inst = Container.find idx il
172
      pdx = Instance.pNode inst
173
      sdx = Instance.sNode inst
174
      pNode = Container.find pdx nl
175
      sNode = Container.find sdx nl
176
      pNode' = Node.removePri pNode inst
177
      sNode' = Node.removeSec sNode inst
178
  sNode'' <- Node.addPriEx True sNode' inst
179
  pNode'' <- Node.addSecEx True pNode' inst sdx
180
  let inst' = Instance.setBoth inst sdx pdx
181
      nl' = Container.addTwo pdx pNode'' sdx sNode'' nl
182
      il' = Container.add idx inst' il
183
  return (nl', il')
184

    
185
-- | Obtain the list of primaries for a given node.
186
-- This restricts to those instances that have a secondary node.
187
primaries :: (Node.List, Instance.List) -> Ndx -> [Idx]
188
primaries (nl, il) = 
189
  filter (Instance.hasSecondary . flip Container.find  il) 
190
  . Node.pList . flip Container.find nl
191

    
192
-- | Migrate all instances of a given list of nodes.
193
-- The list of nodes is repeated as first argument in the result.
194
migrateOffNodes :: ([Ndx], (Node.List, Instance.List))
195
                   -> OpResult ([Ndx], (Node.List, Instance.List))
196
migrateOffNodes (ndxs, conf) = do
197
  let instances = ndxs >>= primaries conf
198
  conf' <- foldM (flip migrate) conf instances
199
  return (ndxs, conf')
200

    
201
-- | Compute the result of replacing the secondary node of an instance.
202
replaceSecondary :: Idx -> Ndx -> (Node.List, Instance.List)
203
        -> OpResult (Node.List, Instance.List)
204
replaceSecondary idx new_ndx (nl, il) = do
205
  let  new_secondary = Container.find new_ndx nl
206
       inst = Container.find idx il
207
       old_ndx = Instance.sNode inst
208
       pdx = Instance.pNode inst
209
       old_secondary = Container.find pdx nl
210
  if pdx == new_ndx then Bad FailInternal else Ok ()
211
  new_secondary' <- Node.addSecEx True new_secondary inst pdx
212
  let old_secondary' = Node.removeSec old_secondary inst
213
      inst' = Instance.setSec inst new_ndx
214
      nl' = Container.addTwo old_ndx old_secondary' new_ndx new_secondary' nl
215
      il' = Container.add idx inst' il
216
  return (nl', il')
217

    
218
-- | Find a suitable secondary node for the given instance from a list of nodes.
219
findSecondary :: Idx -> [Ndx] -> (Node.List, Instance.List)
220
                 -> Result (Node.List, Instance.List)
221
findSecondary idx ndxs conf =
222
  msum $ map (opToResult . flip (replaceSecondary idx) conf) ndxs
223

    
224
-- | Find suitable secondary nodes from the given nodes for a list of instances.
225
findSecondaries :: [Idx] -> [Ndx] -> (Node.List, Instance.List) 
226
                   -> Result (Node.List, Instance.List)
227
findSecondaries idxs ndxs conf =
228
  foldM (\ cf idx -> findSecondary idx ndxs cf) conf idxs
229

    
230
-- | Obtain the list of secondaries for a given node.
231
secondaries :: (Node.List, Instance.List) -> Ndx -> [Idx]
232
secondaries (nl, _) = Node.sList . flip Container.find nl
233

    
234
-- | Greedily move secondaries away from a list of nodes.
235
-- Returns a list of nodes that can be cleared simultaneously, 
236
-- and the configuration after these nodes are cleared.
237
clearSecondaries :: [Ndx] -> [Ndx] -> (Node.List, Instance.List)
238
                    -> Result ([Ndx], (Node.List, Instance.List))
239
clearSecondaries = greedyClearNodes secondaries findSecondaries
240

    
241
-- | Partition a list of nodes into chunks according to the ability to find
242
-- suitable replacement secondary nodes.
243
partitionSecondaries :: [Ndx] -> [Ndx] -> (Node.List, Instance.List)
244
                        -> Result [([Ndx], (Node.List, Instance.List))]
245
partitionSecondaries = partitionNodes clearSecondaries
246

    
247
-- | Gather statistics for the coloring algorithms.
248
-- Returns a string with a summary on how each algorithm has performed,
249
-- in order of non-decreasing effectiveness, and whether it tied or lost
250
-- with the previous one.
251
getStats :: [(String, ColorVertMap)] -> String
252
getStats colorings = snd . foldr helper (0,"") $ algBySize colorings
253
    where algostat (algo, cmap) = algo ++ ": " ++ size cmap ++ grpsizes cmap
254
          size cmap = show (IntMap.size cmap) ++ " "
255
          grpsizes cmap =
256
            "(" ++ commaJoin (map (show.length) (IntMap.elems cmap)) ++ ")"
257
          algBySize = sortBy (flip (comparing (IntMap.size.snd)))
258
          helper :: (String, ColorVertMap) -> (Int, String) -> (Int, String)
259
          helper el (0, _) = ((IntMap.size.snd) el, algostat el)
260
          helper el (old, str)
261
            | old == elsize = (elsize, str ++ " TIE " ++ algostat el)
262
            | otherwise = (elsize, str ++ " LOOSE " ++ algostat el)
263
              where elsize = (IntMap.size.snd) el
264

    
265
-- | Predicate of belonging to a given group restriction.
266
hasGroup :: Maybe Group.Group -> Node.Node -> Bool
267
hasGroup Nothing _ = True
268
hasGroup (Just grp) node = Node.group node == Group.idx grp
269

    
270
-- | Predicate of having at least one tag in a given set.
271
hasTag :: Maybe [String] -> Node.Node -> Bool
272
hasTag Nothing _ = True
273
hasTag (Just tags) node = not . null $ Node.nTags node `intersect` tags
274

    
275
-- | From a cluster configuration, get the list of non-redundant instances
276
-- of a node.
277
nonRedundant :: (Node.List, Instance.List) -> Ndx -> [Idx]
278
nonRedundant (nl, il) ndx =
279
  filter (not . Instance.hasSecondary . flip Container.find  il) $
280
  Node.pList (Container.find ndx nl)
281

    
282
-- | Within a cluster configuration, decide if the node hosts non-redundant
283
-- Instances.
284
noNonRedundant :: (Node.List, Instance.List) -> Node.Node -> Bool
285
noNonRedundant conf = null . nonRedundant conf . Node.idx
286

    
287
-- | Put the master node last.
288
-- Reorder a list groups of nodes (with additional information) such that the
289
-- master node (if present) is the last node of the last group.
290
masterLast :: [([Node.Node], a)] -> [([Node.Node], a)]
291
masterLast rebootgroups =
292
  map (first $ uncurry (++)) . uncurry (++) . partition (null . snd . fst) $
293
  map (first $ partition (not . Node.isMaster)) rebootgroups
294

    
295
-- | From two configurations compute the list of moved instances.
296
-- Do not show instances where only primary and secondary switched their
297
-- role, as here the instance is not moved in a proper sense.
298
getMoves :: (Node.List, Instance.List) -> (Node.List, Instance.List)
299
            -> [(Instance.Instance, (Node.Node, Maybe Node.Node))]
300
getMoves (_, il) (nl', il') = do
301
  ix <- Container.keys il
302
  let inst = Container.find ix il
303
      inst' = Container.find ix il'
304
      hasSec = Instance.hasSecondary inst
305
  guard $ Instance.pNode inst /= Instance.pNode inst'
306
          || (hasSec && Instance.sNode inst /= Instance.sNode inst')
307
  guard . not $ Instance.pNode inst' == Instance.sNode inst
308
                && Instance.sNode inst' == Instance.pNode inst
309
  return (inst', (Container.find (Instance.pNode inst') nl', 
310
                  if hasSec
311
                     then Just $ Container.find (Instance.sNode inst') nl'
312
                     else Nothing))
313

    
314
-- | Main function.
315
main :: Options -> [String] -> IO ()
316
main opts args = do
317
  unless (null args) $ exitErr "This program doesn't take any arguments."
318

    
319
  let verbose = optVerbose opts
320
      maybeExit = if optForce opts then warn else exitErr
321

    
322
  -- Load cluster data. The last two arguments, cluster tags and ipolicy, are
323
  -- currently not used by this tool.
324
  ini_cdata@(ClusterData gl fixed_nl ilf _ _) <- loadExternalData opts
325

    
326
  let master_names = map Node.name . filter Node.isMaster . IntMap.elems $
327
                     fixed_nl
328
  case master_names of
329
    [] -> maybeExit "No master node found (maybe not supported by backend)."
330
    [ _ ] -> return ()
331
    _ -> exitErr $ "Found more than one master node: " ++  show master_names
332

    
333
  nlf <- setNodeStatus opts fixed_nl
334

    
335
  maybeSaveData (optSaveCluster opts) "original" "before hroller run" ini_cdata
336

    
337
  -- Find the wanted node group, if any.
338
  wantedGroup <- case optGroup opts of
339
    Nothing -> return Nothing
340
    Just name -> case Container.findByName gl name of
341
      Nothing -> exitErr "Cannot find target group."
342
      Just grp -> return (Just grp)
343

    
344
  let nodes = IntMap.filter (foldl (liftA2 (&&)) (const True)
345
                             [ not . Node.offline
346
                             , if optSkipNonRedundant opts
347
                                  then noNonRedundant (nlf, ilf)
348
                                  else const True
349
                             , hasTag $ optNodeTags opts
350
                             , hasGroup wantedGroup ])
351
              nlf
352
      mkGraph = if optOfflineMaintenance opts
353
                   then Node.mkNodeGraph
354
                   else Node.mkRebootNodeGraph nlf
355

    
356
  nodeGraph <- case mkGraph nodes ilf of
357
                     Nothing -> exitErr "Cannot create node graph"
358
                     Just g -> return g
359

    
360
  when (verbose > 2) . putStrLn $ "Node Graph: " ++ show nodeGraph
361

    
362
  let colorAlgorithms = [ ("LF", colorLF)
363
                        , ("Dsatur", colorDsatur)
364
                        , ("Dcolor", colorDcolor)
365
                        ]
366
      colorings = map (\(v,a) -> (v,(colorVertMap.a) nodeGraph)) colorAlgorithms
367
      smallestColoring = IntMap.elems $
368
        (snd . minimumBy (comparing (IntMap.size . snd))) colorings
369
      allNdx = map Node.idx . filter (not . Node.offline) . Container.elems
370
               $ nlf
371
      splitted = mapM (\ grp -> partitionNonRedundant grp allNdx (nlf,ilf))
372
                 smallestColoring
373
  rebootGroups <- if optIgnoreNonRedundant opts
374
                     then return $ zip smallestColoring (repeat (nlf, ilf))
375
                     else case splitted of
376
                            Ok splitgroups -> return $ concat splitgroups
377
                            Bad _ -> exitErr "Not enough capacity to move\ 
378
                                             \ non-redundant instances"
379
  
380
  let migrated = mapM migrateOffNodes rebootGroups
381
  rebootGroups' <- if not . optFullEvacuation $ opts
382
                      then return rebootGroups
383
                      else case migrated of
384
                             Ok migratedGroup -> return migratedGroup
385
                             Bad _ -> exitErr "Failed to migrate instances\ 
386
                                              \ off nodes"
387
  let splitted' = mapM (\(grp, conf) -> partitionSecondaries grp allNdx conf)
388
                  rebootGroups'
389
  rebootGroups'' <- if optFullEvacuation opts
390
                      then case splitted' of
391
                             Ok splitgroups -> return $ concat splitgroups
392
                             Bad _ -> exitErr "Not enough capacity to move\
393
                                              \ secondaries"
394
                      else return rebootGroups'
395
  let idToNode = (`Container.find` nodes)
396
      nodesRebootGroups = map (first $ map idToNode
397
                                       . filter (`IntMap.member` nodes))
398
                          rebootGroups''
399
      outputRebootGroups = masterLast .
400
                           sortBy (flip compare `on` length . fst) $
401
                           nodesRebootGroups
402
      confToMoveNames =
403
        map (Instance.name *** (Node.name *** flip (>>=) (return . Node.name)))
404
        . getMoves (nlf, ilf)
405
      namesAndMoves = map (map Node.name *** confToMoveNames) outputRebootGroups
406

    
407
  when (verbose > 1) . putStrLn $ getStats colorings
408

    
409
  let showGroup = if optOneStepOnly opts
410
                    then mapM_ putStrLn
411
                    else putStrLn . commaJoin
412
      showMoves :: [(String, (String, Maybe String))] -> IO ()
413
      showMoves = if optPrintMoves opts
414
                    then mapM_ $ putStrLn . \(a,(b,c)) ->
415
                                                maybe (printf "  %s %s" a b)
416
                                                      (printf "  %s %s %s" a b)
417
                                                      c
418
                    else const $ return ()
419
      showBoth = liftM2 (>>) (showGroup . fst) (showMoves . snd)
420

    
421

    
422
  if optOneStepOnly opts
423
     then do
424
       unless (optNoHeaders opts) $
425
              putStrLn "'First Reboot Group'"
426
       case namesAndMoves of
427
         [] -> return ()
428
         y : _ -> showBoth y
429
     else do
430
       unless (optNoHeaders opts) $
431
              putStrLn "'Node Reboot Groups'"
432
       mapM_ showBoth namesAndMoves