Statistics
| Branch: | Tag: | Revision:

root / src / Ganeti / HTools / Program / Harep.hs @ 3416e3e7

History | View | Annotate | Download (6.6 kB)

1
{-| Auto-repair tool for Ganeti.
2

    
3
-}
4

    
5
{-
6

    
7
Copyright (C) 2013 Google Inc.
8

    
9
This program is free software; you can redistribute it and/or modify
10
it under the terms of the GNU General Public License as published by
11
the Free Software Foundation; either version 2 of the License, or
12
(at your option) any later version.
13

    
14
This program is distributed in the hope that it will be useful, but
15
WITHOUT ANY WARRANTY; without even the implied warranty of
16
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17
General Public License for more details.
18

    
19
You should have received a copy of the GNU General Public License
20
along with this program; if not, write to the Free Software
21
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22
02110-1301, USA.
23

    
24
-}
25

    
26
module Ganeti.HTools.Program.Harep
27
  ( main
28
  , arguments
29
  , options) where
30

    
31
import Control.Monad
32
import Data.Function
33
import Data.List
34
import Data.Maybe
35
import Data.Ord
36
import System.Time
37

    
38
import Ganeti.BasicTypes
39
import Ganeti.Common
40
import Ganeti.Types
41
import Ganeti.Utils
42
import qualified Ganeti.Constants as C
43
import qualified Ganeti.Path as Path
44

    
45
import Ganeti.HTools.CLI
46
import Ganeti.HTools.Loader
47
import Ganeti.HTools.ExtLoader
48
import Ganeti.HTools.Types
49
import qualified Ganeti.HTools.Container as Container
50
import qualified Ganeti.HTools.Instance as Instance
51

    
52
-- | Options list and functions.
53
options :: IO [OptType]
54
options = do
55
  luxi <- oLuxiSocket
56
  return
57
    [ luxi
58
    ]
59

    
60
arguments :: [ArgCompletion]
61
arguments = []
62

    
63
data InstanceData = InstanceData { arInstance :: Instance.Instance
64
                                 , arState :: AutoRepairStatus
65
                                 , tagsToRemove :: [String]
66
                                 }
67
                    deriving (Eq, Show)
68

    
69
-- | Parse a tag into an 'AutoRepairData' record.
70
--
71
-- @Nothing@ is returned if the tag is not an auto-repair tag, or if it's
72
-- malformed.
73
parseInitTag :: String -> Maybe AutoRepairData
74
parseInitTag tag =
75
  let parsePending = do
76
        subtag <- chompPrefix C.autoRepairTagPending tag
77
        case sepSplit ':' subtag of
78
          [rtype, uuid, ts, jobs] -> makeArData rtype uuid ts jobs
79
          _                       -> fail ("Invalid tag: " ++ show tag)
80

    
81
      parseResult = do
82
        subtag <- chompPrefix C.autoRepairTagResult tag
83
        case sepSplit ':' subtag of
84
          [rtype, uuid, ts, result, jobs] -> do
85
            arData <- makeArData rtype uuid ts jobs
86
            result' <- autoRepairResultFromRaw result
87
            return arData { arResult = Just result' }
88
          _                               -> fail ("Invalid tag: " ++ show tag)
89

    
90
      makeArData rtype uuid ts jobs = do
91
        rtype' <- autoRepairTypeFromRaw rtype
92
        ts' <- tryRead "auto-repair time" ts
93
        jobs' <- mapM makeJobIdS $ sepSplit '+' jobs
94
        return AutoRepairData { arType = rtype'
95
                              , arUuid = uuid
96
                              , arTime = TOD ts' 0
97
                              , arJobs = jobs'
98
                              , arResult = Nothing
99
                              , arTag = tag
100
                              }
101
  in
102
   parsePending `mplus` parseResult
103

    
104
-- | Return the 'AutoRepairData' element of an 'AutoRepairStatus' type.
105
getArData :: AutoRepairStatus -> Maybe AutoRepairData
106
getArData status =
107
  case status of
108
    ArHealthy (Just d) -> Just d
109
    ArFailedRepair  d  -> Just d
110
    ArPendingRepair d  -> Just d
111
    ArNeedsRepair   d  -> Just d
112
    _                  -> Nothing
113

    
114
-- | Return a new list of tags to remove that includes @arTag@ if present.
115
delCurTag :: InstanceData -> [String]
116
delCurTag instData =
117
  let arData = getArData $ arState instData
118
      rmTags = tagsToRemove instData
119
  in
120
   case arData of
121
     Just d  -> arTag d : rmTags
122
     Nothing -> rmTags
123

    
124
-- | Set the initial auto-repair state of an instance from its auto-repair tags.
125
--
126
-- The rules when there are multiple tags is:
127
--
128
--   * the earliest failure result always wins
129
--
130
--   * two or more pending repairs results in a fatal error
131
--
132
--   * a pending result from id X and a success result from id Y result in error
133
--     if Y is newer than X
134
--
135
--   * if there are no pending repairs, the newest success result wins,
136
--     otherwise the pending result is used.
137
setInitialState :: Instance.Instance -> Result InstanceData
138
setInitialState inst =
139
  let arData = mapMaybe parseInitTag $ Instance.allTags inst
140
      -- Group all the AutoRepairData records by id (i.e. by repair task), and
141
      -- present them from oldest to newest.
142
      arData' = sortBy (comparing arUuid) arData
143
      arGroups = groupBy ((==) `on` arUuid) arData'
144
      arGroups' = sortBy (comparing $ minimum . map arTime) arGroups
145
  in
146
   foldM arStatusCmp (InstanceData inst (ArHealthy Nothing) []) arGroups'
147

    
148
-- | Update the initial status of an instance with new repair task tags.
149
--
150
-- This function gets called once per repair group in an instance's tag, and it
151
-- determines whether to set the status of the instance according to this new
152
-- group, or to keep the existing state. See the documentation for
153
-- 'setInitialState' for the rules to be followed when determining this.
154
arStatusCmp :: InstanceData -> [AutoRepairData] -> Result InstanceData
155
arStatusCmp instData arData =
156
  let curSt = arState instData
157
      arData' = sortBy (comparing keyfn) arData
158
      keyfn d = (arResult d, arTime d)
159
      newData = last arData'
160
      newSt = case arResult newData of
161
                Just ArSuccess -> ArHealthy $ Just newData
162
                Just ArEnoperm -> ArHealthy $ Just newData
163
                Just ArFailure -> ArFailedRepair newData
164
                Nothing        -> ArPendingRepair newData
165
  in
166
   case curSt of
167
     ArFailedRepair _ -> Ok instData  -- Always keep the earliest failure.
168
     ArHealthy _      -> Ok instData { arState = newSt
169
                                     , tagsToRemove = delCurTag instData
170
                                     }
171
     ArPendingRepair d -> Bad (
172
       "An unfinished repair was found in instance " ++
173
       Instance.name (arInstance instData) ++ ": found tag " ++
174
       show (arTag newData) ++ ", but older pending tag " ++
175
       show (arTag d) ++ "exists.")
176

    
177
     ArNeedsRepair _ -> Bad
178
       "programming error: ArNeedsRepair found as an initial state"
179

    
180
-- | Main function.
181
main :: Options -> [String] -> IO ()
182
main opts args = do
183
  unless (null args) $
184
    exitErr "this program doesn't take any arguments."
185

    
186
  luxiDef <- Path.defaultLuxiSocket
187
  let master = fromMaybe luxiDef $ optLuxi opts
188
      opts' = opts { optLuxi = Just master }
189

    
190
  (ClusterData _ _ il _ _) <- loadExternalData opts'
191

    
192
  let iniDataRes = mapM setInitialState $ Container.elems il
193
  _unused_iniData <- exitIfBad "when parsing auto-repair tags" iniDataRes
194

    
195
  return ()