root / src / Ganeti / HTools / Program / Harep.hs @ 8af72964
History | View | Annotate | Download (18.4 kB)
1 |
{-| Auto-repair tool for Ganeti. |
---|---|
2 |
|
3 |
-} |
4 |
|
5 |
{- |
6 |
|
7 |
Copyright (C) 2013 Google Inc. |
8 |
|
9 |
This program is free software; you can redistribute it and/or modify |
10 |
it under the terms of the GNU General Public License as published by |
11 |
the Free Software Foundation; either version 2 of the License, or |
12 |
(at your option) any later version. |
13 |
|
14 |
This program is distributed in the hope that it will be useful, but |
15 |
WITHOUT ANY WARRANTY; without even the implied warranty of |
16 |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
17 |
General Public License for more details. |
18 |
|
19 |
You should have received a copy of the GNU General Public License |
20 |
along with this program; if not, write to the Free Software |
21 |
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA |
22 |
02110-1301, USA. |
23 |
|
24 |
-} |
25 |
|
26 |
module Ganeti.HTools.Program.Harep |
27 |
( main |
28 |
, arguments |
29 |
, options) where |
30 |
|
31 |
import Control.Exception (bracket) |
32 |
import Control.Monad |
33 |
import Data.Function |
34 |
import Data.List |
35 |
import Data.Maybe |
36 |
import Data.Ord |
37 |
import System.Time |
38 |
|
39 |
import Ganeti.BasicTypes |
40 |
import Ganeti.Common |
41 |
import Ganeti.Errors |
42 |
import Ganeti.Jobs |
43 |
import Ganeti.OpCodes |
44 |
import Ganeti.OpParams |
45 |
import Ganeti.Types |
46 |
import Ganeti.Utils |
47 |
import qualified Ganeti.Constants as C |
48 |
import qualified Ganeti.Luxi as L |
49 |
import qualified Ganeti.Path as Path |
50 |
|
51 |
import Ganeti.HTools.CLI |
52 |
import Ganeti.HTools.Loader |
53 |
import Ganeti.HTools.ExtLoader |
54 |
import Ganeti.HTools.Types |
55 |
import qualified Ganeti.HTools.Container as Container |
56 |
import qualified Ganeti.HTools.Instance as Instance |
57 |
import qualified Ganeti.HTools.Node as Node |
58 |
|
59 |
-- | Options list and functions. |
60 |
options :: IO [OptType] |
61 |
options = do |
62 |
luxi <- oLuxiSocket |
63 |
return |
64 |
[ luxi |
65 |
, oJobDelay |
66 |
] |
67 |
|
68 |
arguments :: [ArgCompletion] |
69 |
arguments = [] |
70 |
|
71 |
data InstanceData = InstanceData { arInstance :: Instance.Instance |
72 |
, arState :: AutoRepairStatus |
73 |
, tagsToRemove :: [String] |
74 |
} |
75 |
deriving (Eq, Show) |
76 |
|
77 |
-- | Parse a tag into an 'AutoRepairData' record. |
78 |
-- |
79 |
-- @Nothing@ is returned if the tag is not an auto-repair tag, or if it's |
80 |
-- malformed. |
81 |
parseInitTag :: String -> Maybe AutoRepairData |
82 |
parseInitTag tag = |
83 |
let parsePending = do |
84 |
subtag <- chompPrefix C.autoRepairTagPending tag |
85 |
case sepSplit ':' subtag of |
86 |
[rtype, uuid, ts, jobs] -> makeArData rtype uuid ts jobs |
87 |
_ -> fail ("Invalid tag: " ++ show tag) |
88 |
|
89 |
parseResult = do |
90 |
subtag <- chompPrefix C.autoRepairTagResult tag |
91 |
case sepSplit ':' subtag of |
92 |
[rtype, uuid, ts, result, jobs] -> do |
93 |
arData <- makeArData rtype uuid ts jobs |
94 |
result' <- autoRepairResultFromRaw result |
95 |
return arData { arResult = Just result' } |
96 |
_ -> fail ("Invalid tag: " ++ show tag) |
97 |
|
98 |
makeArData rtype uuid ts jobs = do |
99 |
rtype' <- autoRepairTypeFromRaw rtype |
100 |
ts' <- tryRead "auto-repair time" ts |
101 |
jobs' <- mapM makeJobIdS $ sepSplit '+' jobs |
102 |
return AutoRepairData { arType = rtype' |
103 |
, arUuid = uuid |
104 |
, arTime = TOD ts' 0 |
105 |
, arJobs = jobs' |
106 |
, arResult = Nothing |
107 |
, arTag = tag |
108 |
} |
109 |
in |
110 |
parsePending `mplus` parseResult |
111 |
|
112 |
-- | Return the 'AutoRepairData' element of an 'AutoRepairStatus' type. |
113 |
getArData :: AutoRepairStatus -> Maybe AutoRepairData |
114 |
getArData status = |
115 |
case status of |
116 |
ArHealthy (Just d) -> Just d |
117 |
ArFailedRepair d -> Just d |
118 |
ArPendingRepair d -> Just d |
119 |
ArNeedsRepair d -> Just d |
120 |
_ -> Nothing |
121 |
|
122 |
-- | Return a short name for each auto-repair status. |
123 |
-- |
124 |
-- This is a more concise representation of the status, because the default |
125 |
-- "Show" formatting includes all the accompanying auto-repair data. |
126 |
arStateName :: AutoRepairStatus -> String |
127 |
arStateName status = |
128 |
case status of |
129 |
ArHealthy _ -> "Healthy" |
130 |
ArFailedRepair _ -> "Failure" |
131 |
ArPendingRepair _ -> "Pending repair" |
132 |
ArNeedsRepair _ -> "Needs repair" |
133 |
|
134 |
-- | Return a new list of tags to remove that includes @arTag@ if present. |
135 |
delCurTag :: InstanceData -> [String] |
136 |
delCurTag instData = |
137 |
let arData = getArData $ arState instData |
138 |
rmTags = tagsToRemove instData |
139 |
in |
140 |
case arData of |
141 |
Just d -> arTag d : rmTags |
142 |
Nothing -> rmTags |
143 |
|
144 |
-- | Set the initial auto-repair state of an instance from its auto-repair tags. |
145 |
-- |
146 |
-- The rules when there are multiple tags is: |
147 |
-- |
148 |
-- * the earliest failure result always wins |
149 |
-- |
150 |
-- * two or more pending repairs results in a fatal error |
151 |
-- |
152 |
-- * a pending result from id X and a success result from id Y result in error |
153 |
-- if Y is newer than X |
154 |
-- |
155 |
-- * if there are no pending repairs, the newest success result wins, |
156 |
-- otherwise the pending result is used. |
157 |
setInitialState :: Instance.Instance -> Result InstanceData |
158 |
setInitialState inst = |
159 |
let arData = mapMaybe parseInitTag $ Instance.allTags inst |
160 |
-- Group all the AutoRepairData records by id (i.e. by repair task), and |
161 |
-- present them from oldest to newest. |
162 |
arData' = sortBy (comparing arUuid) arData |
163 |
arGroups = groupBy ((==) `on` arUuid) arData' |
164 |
arGroups' = sortBy (comparing $ minimum . map arTime) arGroups |
165 |
in |
166 |
foldM arStatusCmp (InstanceData inst (ArHealthy Nothing) []) arGroups' |
167 |
|
168 |
-- | Update the initial status of an instance with new repair task tags. |
169 |
-- |
170 |
-- This function gets called once per repair group in an instance's tag, and it |
171 |
-- determines whether to set the status of the instance according to this new |
172 |
-- group, or to keep the existing state. See the documentation for |
173 |
-- 'setInitialState' for the rules to be followed when determining this. |
174 |
arStatusCmp :: InstanceData -> [AutoRepairData] -> Result InstanceData |
175 |
arStatusCmp instData arData = |
176 |
let curSt = arState instData |
177 |
arData' = sortBy (comparing keyfn) arData |
178 |
keyfn d = (arResult d, arTime d) |
179 |
newData = last arData' |
180 |
newSt = case arResult newData of |
181 |
Just ArSuccess -> ArHealthy $ Just newData |
182 |
Just ArEnoperm -> ArHealthy $ Just newData |
183 |
Just ArFailure -> ArFailedRepair newData |
184 |
Nothing -> ArPendingRepair newData |
185 |
in |
186 |
case curSt of |
187 |
ArFailedRepair _ -> Ok instData -- Always keep the earliest failure. |
188 |
ArHealthy _ -> Ok instData { arState = newSt |
189 |
, tagsToRemove = delCurTag instData |
190 |
} |
191 |
ArPendingRepair d -> Bad ( |
192 |
"An unfinished repair was found in instance " ++ |
193 |
Instance.name (arInstance instData) ++ ": found tag " ++ |
194 |
show (arTag newData) ++ ", but older pending tag " ++ |
195 |
show (arTag d) ++ "exists.") |
196 |
|
197 |
ArNeedsRepair _ -> Bad |
198 |
"programming error: ArNeedsRepair found as an initial state" |
199 |
|
200 |
-- | Query jobs of a pending repair, returning the new instance data. |
201 |
processPending :: L.Client -> InstanceData -> IO InstanceData |
202 |
processPending client instData = |
203 |
case arState instData of |
204 |
(ArPendingRepair arData) -> do |
205 |
sts <- L.queryJobsStatus client $ arJobs arData |
206 |
time <- getClockTime |
207 |
case sts of |
208 |
Bad e -> exitErr $ "could not check job status: " ++ formatError e |
209 |
Ok sts' -> |
210 |
if any (<= JOB_STATUS_RUNNING) sts' then |
211 |
return instData -- (no change) |
212 |
else do |
213 |
let iname = Instance.name $ arInstance instData |
214 |
srcSt = arStateName $ arState instData |
215 |
destSt = arStateName arState' |
216 |
putStrLn ("Moving " ++ iname ++ " from " ++ show srcSt ++ " to " ++ |
217 |
show destSt) |
218 |
commitChange client instData' |
219 |
where |
220 |
instData' = |
221 |
instData { arState = arState' |
222 |
, tagsToRemove = delCurTag instData |
223 |
} |
224 |
arState' = |
225 |
if all (== JOB_STATUS_SUCCESS) sts' then |
226 |
ArHealthy $ Just (updateTag $ arData { arResult = Just ArSuccess |
227 |
, arTime = time }) |
228 |
else |
229 |
ArFailedRepair (updateTag $ arData { arResult = Just ArFailure |
230 |
, arTime = time }) |
231 |
|
232 |
_ -> return instData |
233 |
|
234 |
-- | Update the tag of an 'AutoRepairData' record to match all the other fields. |
235 |
updateTag :: AutoRepairData -> AutoRepairData |
236 |
updateTag arData = |
237 |
let ini = [autoRepairTypeToRaw $ arType arData, |
238 |
arUuid arData, |
239 |
clockTimeToString $ arTime arData] |
240 |
end = [intercalate "+" . map (show . fromJobId) $ arJobs arData] |
241 |
(pfx, middle) = |
242 |
case arResult arData of |
243 |
Nothing -> (C.autoRepairTagPending, []) |
244 |
Just rs -> (C.autoRepairTagResult, [autoRepairResultToRaw rs]) |
245 |
in |
246 |
arData { arTag = pfx ++ intercalate ":" (ini ++ middle ++ end) } |
247 |
|
248 |
-- | Apply and remove tags from an instance as indicated by 'InstanceData'. |
249 |
-- |
250 |
-- If the /arState/ of the /InstanceData/ record has an associated |
251 |
-- 'AutoRepairData', add its tag to the instance object. Additionally, if |
252 |
-- /tagsToRemove/ is not empty, remove those tags from the instance object. The |
253 |
-- returned /InstanceData/ object always has an empty /tagsToRemove/. |
254 |
commitChange :: L.Client -> InstanceData -> IO InstanceData |
255 |
commitChange client instData = do |
256 |
let iname = Instance.name $ arInstance instData |
257 |
arData = getArData $ arState instData |
258 |
rmTags = tagsToRemove instData |
259 |
execJobsWaitOk' opcodes = do |
260 |
res <- execJobsWaitOk [map wrapOpCode opcodes] client |
261 |
case res of |
262 |
Ok _ -> return () |
263 |
Bad e -> exitErr e |
264 |
|
265 |
when (isJust arData) $ do |
266 |
let tag = arTag $ fromJust arData |
267 |
putStrLn (">>> Adding the following tag to " ++ iname ++ ":\n" ++ show tag) |
268 |
execJobsWaitOk' [OpTagsSet (TagInstance iname) [tag]] |
269 |
|
270 |
unless (null rmTags) $ do |
271 |
putStr (">>> Removing the following tags from " ++ iname ++ ":\n" ++ |
272 |
unlines (map show rmTags)) |
273 |
execJobsWaitOk' [OpTagsDel (TagInstance iname) rmTags] |
274 |
|
275 |
return instData { tagsToRemove = [] } |
276 |
|
277 |
-- | Detect brokeness with an instance and suggest repair type and jobs to run. |
278 |
detectBroken :: Node.List -> Instance.Instance |
279 |
-> Maybe (AutoRepairType, [OpCode]) |
280 |
detectBroken nl inst = |
281 |
let disk = Instance.diskTemplate inst |
282 |
iname = Instance.name inst |
283 |
offPri = Node.offline $ Container.find (Instance.pNode inst) nl |
284 |
offSec = Node.offline $ Container.find (Instance.sNode inst) nl |
285 |
in |
286 |
case disk of |
287 |
DTDrbd8 |
288 |
| offPri && offSec -> |
289 |
Just ( |
290 |
ArReinstall, |
291 |
[ OpInstanceRecreateDisks { opInstanceName = iname |
292 |
, opRecreateDisksInfo = RecreateDisksAll |
293 |
, opNodes = [] |
294 |
-- FIXME: there should be a better way to |
295 |
-- specify opcode paramteres than abusing |
296 |
-- mkNonEmpty in this way (using the fact |
297 |
-- that Maybe is used both for optional |
298 |
-- fields, and to express failure). |
299 |
, opIallocator = mkNonEmpty "hail" |
300 |
} |
301 |
, OpInstanceReinstall { opInstanceName = iname |
302 |
, opOsType = Nothing |
303 |
, opTempOsParams = Nothing |
304 |
, opForceVariant = False |
305 |
} |
306 |
]) |
307 |
| offPri -> |
308 |
Just ( |
309 |
ArFailover, |
310 |
[ OpInstanceFailover { opInstanceName = iname |
311 |
-- FIXME: ditto, see above. |
312 |
, opShutdownTimeout = fromJust $ mkNonNegative |
313 |
C.defaultShutdownTimeout |
314 |
, opIgnoreConsistency = False |
315 |
, opTargetNode = Nothing |
316 |
, opIgnoreIpolicy = False |
317 |
, opIallocator = Nothing |
318 |
} |
319 |
]) |
320 |
| offSec -> |
321 |
Just ( |
322 |
ArFixStorage, |
323 |
[ OpInstanceReplaceDisks { opInstanceName = iname |
324 |
, opReplaceDisksMode = ReplaceNewSecondary |
325 |
, opReplaceDisksList = [] |
326 |
, opRemoteNode = Nothing |
327 |
-- FIXME: ditto, see above. |
328 |
, opIallocator = mkNonEmpty "hail" |
329 |
, opEarlyRelease = False |
330 |
, opIgnoreIpolicy = False |
331 |
} |
332 |
]) |
333 |
| otherwise -> Nothing |
334 |
|
335 |
DTPlain |
336 |
| offPri -> |
337 |
Just ( |
338 |
ArReinstall, |
339 |
[ OpInstanceRecreateDisks { opInstanceName = iname |
340 |
, opRecreateDisksInfo = RecreateDisksAll |
341 |
, opNodes = [] |
342 |
-- FIXME: ditto, see above. |
343 |
, opIallocator = mkNonEmpty "hail" |
344 |
} |
345 |
, OpInstanceReinstall { opInstanceName = iname |
346 |
, opOsType = Nothing |
347 |
, opTempOsParams = Nothing |
348 |
, opForceVariant = False |
349 |
} |
350 |
]) |
351 |
| otherwise -> Nothing |
352 |
|
353 |
_ -> Nothing -- Other cases are unimplemented for now: DTDiskless, |
354 |
-- DTFile, DTSharedFile, DTBlock, DTRbd, DTExt. |
355 |
|
356 |
-- | Perform the suggested repair on an instance if its policy allows it. |
357 |
doRepair :: L.Client -- ^ The Luxi client |
358 |
-> Double -- ^ Delay to insert before the first repair opcode |
359 |
-> InstanceData -- ^ The instance data |
360 |
-> (AutoRepairType, [OpCode]) -- ^ The repair job to perform |
361 |
-> IO InstanceData -- ^ The updated instance data |
362 |
doRepair client delay instData (rtype, opcodes) = |
363 |
let inst = arInstance instData |
364 |
ipol = Instance.arPolicy inst |
365 |
iname = Instance.name inst |
366 |
in |
367 |
case ipol of |
368 |
ArEnabled maxtype -> |
369 |
if rtype > maxtype then do |
370 |
uuid <- newUUID |
371 |
time <- getClockTime |
372 |
|
373 |
let arState' = ArNeedsRepair ( |
374 |
updateTag $ AutoRepairData rtype uuid time [] (Just ArEnoperm) "") |
375 |
instData' = instData { arState = arState' |
376 |
, tagsToRemove = delCurTag instData |
377 |
} |
378 |
|
379 |
putStrLn ("Not performing a repair of type " ++ show rtype ++ " on " ++ |
380 |
iname ++ " because only repairs up to " ++ show maxtype ++ |
381 |
" are allowed") |
382 |
commitChange client instData' -- Adds "enoperm" result label. |
383 |
else do |
384 |
putStrLn ("Executing " ++ show rtype ++ " repair on " ++ iname) |
385 |
|
386 |
-- After submitting the job, we must write an autorepair:pending tag, |
387 |
-- that includes the repair job IDs so that they can be checked later. |
388 |
-- One problem we run into is that the repair job immediately grabs |
389 |
-- locks for the affected instance, and the subsequent TAGS_SET job is |
390 |
-- blocked, introducing an unnecesary delay for the end-user. One |
391 |
-- alternative would be not to wait for the completion of the TAGS_SET |
392 |
-- job, contrary to what commitChange normally does; but we insist on |
393 |
-- waiting for the tag to be set so as to abort in case of failure, |
394 |
-- because the cluster is left in an invalid state in that case. |
395 |
-- |
396 |
-- The proper solution (in 2.9+) would be not to use tags for storing |
397 |
-- autorepair data, or make the TAGS_SET opcode not grab an instance's |
398 |
-- locks (if that's deemed safe). In the meantime, we introduce an |
399 |
-- artificial delay in the repair job (via a TestDelay opcode) so that |
400 |
-- once we have the job ID, the TAGS_SET job can complete before the |
401 |
-- repair job actually grabs the locks. (Please note that this is not |
402 |
-- about synchronization, but merely about speeding up the execution of |
403 |
-- the harep tool. If this TestDelay opcode is removed, the program is |
404 |
-- still correct.) |
405 |
let opcodes' = |
406 |
if delay > 0 then |
407 |
OpTestDelay { opDelayDuration = delay |
408 |
, opDelayOnMaster = True |
409 |
, opDelayOnNodes = [] |
410 |
, opDelayRepeat = fromJust $ mkNonNegative 0 |
411 |
} : opcodes |
412 |
else |
413 |
opcodes |
414 |
|
415 |
uuid <- newUUID |
416 |
time <- getClockTime |
417 |
jids <- submitJobs [map wrapOpCode opcodes'] client |
418 |
|
419 |
case jids of |
420 |
Bad e -> exitErr e |
421 |
Ok jids' -> |
422 |
let arState' = ArPendingRepair ( |
423 |
updateTag $ AutoRepairData rtype uuid time jids' Nothing "") |
424 |
instData' = instData { arState = arState' |
425 |
, tagsToRemove = delCurTag instData |
426 |
} |
427 |
in |
428 |
commitChange client instData' -- Adds "pending" label. |
429 |
|
430 |
otherSt -> do |
431 |
putStrLn ("Not repairing " ++ iname ++ " because it's in state " ++ |
432 |
show otherSt) |
433 |
return instData |
434 |
|
435 |
-- | Main function. |
436 |
main :: Options -> [String] -> IO () |
437 |
main opts args = do |
438 |
unless (null args) $ |
439 |
exitErr "this program doesn't take any arguments." |
440 |
|
441 |
luxiDef <- Path.defaultLuxiSocket |
442 |
let master = fromMaybe luxiDef $ optLuxi opts |
443 |
opts' = opts { optLuxi = Just master } |
444 |
|
445 |
(ClusterData _ nl il _ _) <- loadExternalData opts' |
446 |
|
447 |
let iniDataRes = mapM setInitialState $ Container.elems il |
448 |
iniData <- exitIfBad "when parsing auto-repair tags" iniDataRes |
449 |
|
450 |
-- First step: check all pending repairs, see if they are completed. |
451 |
iniData' <- bracket (L.getClient master) L.closeClient $ |
452 |
forM iniData . processPending |
453 |
|
454 |
-- Second step: detect any problems. |
455 |
let repairs = map (detectBroken nl . arInstance) iniData' |
456 |
|
457 |
-- Third step: create repair jobs for broken instances that are in ArHealthy. |
458 |
let maybeRepair c (i, r) = maybe (return i) (repairHealthy c i) r |
459 |
jobDelay = optJobDelay opts |
460 |
repairHealthy c i = case arState i of |
461 |
ArHealthy _ -> doRepair c jobDelay i |
462 |
_ -> const (return i) |
463 |
|
464 |
_unused_repairDone <- bracket (L.getClient master) L.closeClient $ |
465 |
forM (zip iniData' repairs) . maybeRepair |
466 |
|
467 |
return () |