root / src / Ganeti / HTools / Program / Harep.hs @ 2a58a7b1
History | View | Annotate | Download (19.5 kB)
1 |
{-# LANGUAGE TupleSections #-} |
---|---|
2 |
|
3 |
{-| Auto-repair tool for Ganeti. |
4 |
|
5 |
-} |
6 |
|
7 |
{- |
8 |
|
9 |
Copyright (C) 2013 Google Inc. |
10 |
|
11 |
This program is free software; you can redistribute it and/or modify |
12 |
it under the terms of the GNU General Public License as published by |
13 |
the Free Software Foundation; either version 2 of the License, or |
14 |
(at your option) any later version. |
15 |
|
16 |
This program is distributed in the hope that it will be useful, but |
17 |
WITHOUT ANY WARRANTY; without even the implied warranty of |
18 |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
19 |
General Public License for more details. |
20 |
|
21 |
You should have received a copy of the GNU General Public License |
22 |
along with this program; if not, write to the Free Software |
23 |
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA |
24 |
02110-1301, USA. |
25 |
|
26 |
-} |
27 |
|
28 |
module Ganeti.HTools.Program.Harep |
29 |
( main |
30 |
, arguments |
31 |
, options) where |
32 |
|
33 |
import Control.Exception (bracket) |
34 |
import Control.Monad |
35 |
import Data.Function |
36 |
import Data.List |
37 |
import Data.Maybe |
38 |
import Data.Ord |
39 |
import System.Time |
40 |
import qualified Data.Map as Map |
41 |
|
42 |
import Ganeti.BasicTypes |
43 |
import Ganeti.Common |
44 |
import Ganeti.Errors |
45 |
import Ganeti.Jobs |
46 |
import Ganeti.OpCodes |
47 |
import Ganeti.OpParams |
48 |
import Ganeti.Types |
49 |
import Ganeti.Utils |
50 |
import qualified Ganeti.Constants as C |
51 |
import qualified Ganeti.Luxi as L |
52 |
import qualified Ganeti.Path as Path |
53 |
|
54 |
import Ganeti.HTools.CLI |
55 |
import Ganeti.HTools.Loader |
56 |
import Ganeti.HTools.ExtLoader |
57 |
import Ganeti.HTools.Types |
58 |
import qualified Ganeti.HTools.Container as Container |
59 |
import qualified Ganeti.HTools.Instance as Instance |
60 |
import qualified Ganeti.HTools.Node as Node |
61 |
|
62 |
-- | Options list and functions. |
63 |
options :: IO [OptType] |
64 |
options = do |
65 |
luxi <- oLuxiSocket |
66 |
return |
67 |
[ luxi |
68 |
, oJobDelay |
69 |
] |
70 |
|
71 |
arguments :: [ArgCompletion] |
72 |
arguments = [] |
73 |
|
74 |
data InstanceData = InstanceData { arInstance :: Instance.Instance |
75 |
, arState :: AutoRepairStatus |
76 |
, tagsToRemove :: [String] |
77 |
} |
78 |
deriving (Eq, Show) |
79 |
|
80 |
-- | Parse a tag into an 'AutoRepairData' record. |
81 |
-- |
82 |
-- @Nothing@ is returned if the tag is not an auto-repair tag, or if it's |
83 |
-- malformed. |
84 |
parseInitTag :: String -> Maybe AutoRepairData |
85 |
parseInitTag tag = |
86 |
let parsePending = do |
87 |
subtag <- chompPrefix C.autoRepairTagPending tag |
88 |
case sepSplit ':' subtag of |
89 |
[rtype, uuid, ts, jobs] -> makeArData rtype uuid ts jobs |
90 |
_ -> fail ("Invalid tag: " ++ show tag) |
91 |
|
92 |
parseResult = do |
93 |
subtag <- chompPrefix C.autoRepairTagResult tag |
94 |
case sepSplit ':' subtag of |
95 |
[rtype, uuid, ts, result, jobs] -> do |
96 |
arData <- makeArData rtype uuid ts jobs |
97 |
result' <- autoRepairResultFromRaw result |
98 |
return arData { arResult = Just result' } |
99 |
_ -> fail ("Invalid tag: " ++ show tag) |
100 |
|
101 |
makeArData rtype uuid ts jobs = do |
102 |
rtype' <- autoRepairTypeFromRaw rtype |
103 |
ts' <- tryRead "auto-repair time" ts |
104 |
jobs' <- mapM makeJobIdS $ sepSplit '+' jobs |
105 |
return AutoRepairData { arType = rtype' |
106 |
, arUuid = uuid |
107 |
, arTime = TOD ts' 0 |
108 |
, arJobs = jobs' |
109 |
, arResult = Nothing |
110 |
, arTag = tag |
111 |
} |
112 |
in |
113 |
parsePending `mplus` parseResult |
114 |
|
115 |
-- | Return the 'AutoRepairData' element of an 'AutoRepairStatus' type. |
116 |
getArData :: AutoRepairStatus -> Maybe AutoRepairData |
117 |
getArData status = |
118 |
case status of |
119 |
ArHealthy (Just d) -> Just d |
120 |
ArFailedRepair d -> Just d |
121 |
ArPendingRepair d -> Just d |
122 |
ArNeedsRepair d -> Just d |
123 |
_ -> Nothing |
124 |
|
125 |
-- | Return a short name for each auto-repair status. |
126 |
-- |
127 |
-- This is a more concise representation of the status, because the default |
128 |
-- "Show" formatting includes all the accompanying auto-repair data. |
129 |
arStateName :: AutoRepairStatus -> String |
130 |
arStateName status = |
131 |
case status of |
132 |
ArHealthy _ -> "Healthy" |
133 |
ArFailedRepair _ -> "Failure" |
134 |
ArPendingRepair _ -> "Pending repair" |
135 |
ArNeedsRepair _ -> "Needs repair" |
136 |
|
137 |
-- | Return a new list of tags to remove that includes @arTag@ if present. |
138 |
delCurTag :: InstanceData -> [String] |
139 |
delCurTag instData = |
140 |
let arData = getArData $ arState instData |
141 |
rmTags = tagsToRemove instData |
142 |
in |
143 |
case arData of |
144 |
Just d -> arTag d : rmTags |
145 |
Nothing -> rmTags |
146 |
|
147 |
-- | Set the initial auto-repair state of an instance from its auto-repair tags. |
148 |
-- |
149 |
-- The rules when there are multiple tags is: |
150 |
-- |
151 |
-- * the earliest failure result always wins |
152 |
-- |
153 |
-- * two or more pending repairs results in a fatal error |
154 |
-- |
155 |
-- * a pending result from id X and a success result from id Y result in error |
156 |
-- if Y is newer than X |
157 |
-- |
158 |
-- * if there are no pending repairs, the newest success result wins, |
159 |
-- otherwise the pending result is used. |
160 |
setInitialState :: Instance.Instance -> Result InstanceData |
161 |
setInitialState inst = |
162 |
let arData = mapMaybe parseInitTag $ Instance.allTags inst |
163 |
-- Group all the AutoRepairData records by id (i.e. by repair task), and |
164 |
-- present them from oldest to newest. |
165 |
arData' = sortBy (comparing arUuid) arData |
166 |
arGroups = groupBy ((==) `on` arUuid) arData' |
167 |
arGroups' = sortBy (comparing $ minimum . map arTime) arGroups |
168 |
in |
169 |
foldM arStatusCmp (InstanceData inst (ArHealthy Nothing) []) arGroups' |
170 |
|
171 |
-- | Update the initial status of an instance with new repair task tags. |
172 |
-- |
173 |
-- This function gets called once per repair group in an instance's tag, and it |
174 |
-- determines whether to set the status of the instance according to this new |
175 |
-- group, or to keep the existing state. See the documentation for |
176 |
-- 'setInitialState' for the rules to be followed when determining this. |
177 |
arStatusCmp :: InstanceData -> [AutoRepairData] -> Result InstanceData |
178 |
arStatusCmp instData arData = |
179 |
let curSt = arState instData |
180 |
arData' = sortBy (comparing keyfn) arData |
181 |
keyfn d = (arResult d, arTime d) |
182 |
newData = last arData' |
183 |
newSt = case arResult newData of |
184 |
Just ArSuccess -> ArHealthy $ Just newData |
185 |
Just ArEnoperm -> ArHealthy $ Just newData |
186 |
Just ArFailure -> ArFailedRepair newData |
187 |
Nothing -> ArPendingRepair newData |
188 |
in |
189 |
case curSt of |
190 |
ArFailedRepair _ -> Ok instData -- Always keep the earliest failure. |
191 |
ArHealthy _ -> Ok instData { arState = newSt |
192 |
, tagsToRemove = delCurTag instData |
193 |
} |
194 |
ArPendingRepair d -> Bad ( |
195 |
"An unfinished repair was found in instance " ++ |
196 |
Instance.name (arInstance instData) ++ ": found tag " ++ |
197 |
show (arTag newData) ++ ", but older pending tag " ++ |
198 |
show (arTag d) ++ "exists.") |
199 |
|
200 |
ArNeedsRepair _ -> Bad |
201 |
"programming error: ArNeedsRepair found as an initial state" |
202 |
|
203 |
-- | Query jobs of a pending repair, returning the new instance data. |
204 |
processPending :: L.Client -> InstanceData -> IO InstanceData |
205 |
processPending client instData = |
206 |
case arState instData of |
207 |
(ArPendingRepair arData) -> do |
208 |
sts <- L.queryJobsStatus client $ arJobs arData |
209 |
time <- getClockTime |
210 |
case sts of |
211 |
Bad e -> exitErr $ "could not check job status: " ++ formatError e |
212 |
Ok sts' -> |
213 |
if any (<= JOB_STATUS_RUNNING) sts' then |
214 |
return instData -- (no change) |
215 |
else do |
216 |
let iname = Instance.name $ arInstance instData |
217 |
srcSt = arStateName $ arState instData |
218 |
destSt = arStateName arState' |
219 |
putStrLn ("Moving " ++ iname ++ " from " ++ show srcSt ++ " to " ++ |
220 |
show destSt) |
221 |
commitChange client instData' |
222 |
where |
223 |
instData' = |
224 |
instData { arState = arState' |
225 |
, tagsToRemove = delCurTag instData |
226 |
} |
227 |
arState' = |
228 |
if all (== JOB_STATUS_SUCCESS) sts' then |
229 |
ArHealthy $ Just (updateTag $ arData { arResult = Just ArSuccess |
230 |
, arTime = time }) |
231 |
else |
232 |
ArFailedRepair (updateTag $ arData { arResult = Just ArFailure |
233 |
, arTime = time }) |
234 |
|
235 |
_ -> return instData |
236 |
|
237 |
-- | Update the tag of an 'AutoRepairData' record to match all the other fields. |
238 |
updateTag :: AutoRepairData -> AutoRepairData |
239 |
updateTag arData = |
240 |
let ini = [autoRepairTypeToRaw $ arType arData, |
241 |
arUuid arData, |
242 |
clockTimeToString $ arTime arData] |
243 |
end = [intercalate "+" . map (show . fromJobId) $ arJobs arData] |
244 |
(pfx, middle) = |
245 |
case arResult arData of |
246 |
Nothing -> (C.autoRepairTagPending, []) |
247 |
Just rs -> (C.autoRepairTagResult, [autoRepairResultToRaw rs]) |
248 |
in |
249 |
arData { arTag = pfx ++ intercalate ":" (ini ++ middle ++ end) } |
250 |
|
251 |
-- | Apply and remove tags from an instance as indicated by 'InstanceData'. |
252 |
-- |
253 |
-- If the /arState/ of the /InstanceData/ record has an associated |
254 |
-- 'AutoRepairData', add its tag to the instance object. Additionally, if |
255 |
-- /tagsToRemove/ is not empty, remove those tags from the instance object. The |
256 |
-- returned /InstanceData/ object always has an empty /tagsToRemove/. |
257 |
commitChange :: L.Client -> InstanceData -> IO InstanceData |
258 |
commitChange client instData = do |
259 |
let iname = Instance.name $ arInstance instData |
260 |
arData = getArData $ arState instData |
261 |
rmTags = tagsToRemove instData |
262 |
execJobsWaitOk' opcodes = do |
263 |
res <- execJobsWaitOk [map wrapOpCode opcodes] client |
264 |
case res of |
265 |
Ok _ -> return () |
266 |
Bad e -> exitErr e |
267 |
|
268 |
when (isJust arData) $ do |
269 |
let tag = arTag $ fromJust arData |
270 |
putStrLn (">>> Adding the following tag to " ++ iname ++ ":\n" ++ show tag) |
271 |
execJobsWaitOk' [OpTagsSet TagKindInstance [tag] (Just iname)] |
272 |
|
273 |
unless (null rmTags) $ do |
274 |
putStr (">>> Removing the following tags from " ++ iname ++ ":\n" ++ |
275 |
unlines (map show rmTags)) |
276 |
execJobsWaitOk' [OpTagsDel TagKindInstance rmTags (Just iname)] |
277 |
|
278 |
return instData { tagsToRemove = [] } |
279 |
|
280 |
-- | Detect brokenness with an instance and suggest repair type and jobs to run. |
281 |
detectBroken :: Node.List -> Instance.Instance |
282 |
-> Maybe (AutoRepairType, [OpCode]) |
283 |
detectBroken nl inst = |
284 |
let disk = Instance.diskTemplate inst |
285 |
iname = Instance.name inst |
286 |
offPri = Node.offline $ Container.find (Instance.pNode inst) nl |
287 |
offSec = Node.offline $ Container.find (Instance.sNode inst) nl |
288 |
in |
289 |
case disk of |
290 |
DTDrbd8 |
291 |
| offPri && offSec -> |
292 |
Just ( |
293 |
ArReinstall, |
294 |
[ OpInstanceRecreateDisks { opInstanceName = iname |
295 |
, opInstanceUuid = Nothing |
296 |
, opRecreateDisksInfo = RecreateDisksAll |
297 |
, opNodes = [] |
298 |
-- FIXME: there should be a better way to |
299 |
-- specify opcode parameters than abusing |
300 |
-- mkNonEmpty in this way (using the fact |
301 |
-- that Maybe is used both for optional |
302 |
-- fields, and to express failure). |
303 |
, opNodeUuids = Nothing |
304 |
, opIallocator = mkNonEmpty "hail" |
305 |
} |
306 |
, OpInstanceReinstall { opInstanceName = iname |
307 |
, opInstanceUuid = Nothing |
308 |
, opOsType = Nothing |
309 |
, opTempOsParams = Nothing |
310 |
, opForceVariant = False |
311 |
} |
312 |
]) |
313 |
| offPri -> |
314 |
Just ( |
315 |
ArFailover, |
316 |
[ OpInstanceFailover { opInstanceName = iname |
317 |
, opInstanceUuid = Nothing |
318 |
-- FIXME: ditto, see above. |
319 |
, opShutdownTimeout = fromJust $ mkNonNegative |
320 |
C.defaultShutdownTimeout |
321 |
, opIgnoreConsistency = False |
322 |
, opTargetNode = Nothing |
323 |
, opTargetNodeUuid = Nothing |
324 |
, opIgnoreIpolicy = False |
325 |
, opIallocator = Nothing |
326 |
, opMigrationCleanup = False |
327 |
} |
328 |
]) |
329 |
| offSec -> |
330 |
Just ( |
331 |
ArFixStorage, |
332 |
[ OpInstanceReplaceDisks { opInstanceName = iname |
333 |
, opInstanceUuid = Nothing |
334 |
, opReplaceDisksMode = ReplaceNewSecondary |
335 |
, opReplaceDisksList = [] |
336 |
, opRemoteNode = Nothing |
337 |
-- FIXME: ditto, see above. |
338 |
, opRemoteNodeUuid = Nothing |
339 |
, opIallocator = mkNonEmpty "hail" |
340 |
, opEarlyRelease = False |
341 |
, opIgnoreIpolicy = False |
342 |
} |
343 |
]) |
344 |
| otherwise -> Nothing |
345 |
|
346 |
DTPlain |
347 |
| offPri -> |
348 |
Just ( |
349 |
ArReinstall, |
350 |
[ OpInstanceRecreateDisks { opInstanceName = iname |
351 |
, opInstanceUuid = Nothing |
352 |
, opRecreateDisksInfo = RecreateDisksAll |
353 |
, opNodes = [] |
354 |
-- FIXME: ditto, see above. |
355 |
, opNodeUuids = Nothing |
356 |
, opIallocator = mkNonEmpty "hail" |
357 |
} |
358 |
, OpInstanceReinstall { opInstanceName = iname |
359 |
, opInstanceUuid = Nothing |
360 |
, opOsType = Nothing |
361 |
, opTempOsParams = Nothing |
362 |
, opForceVariant = False |
363 |
} |
364 |
]) |
365 |
| otherwise -> Nothing |
366 |
|
367 |
_ -> Nothing -- Other cases are unimplemented for now: DTDiskless, |
368 |
-- DTFile, DTSharedFile, DTBlock, DTRbd, DTExt. |
369 |
|
370 |
-- | Perform the suggested repair on an instance if its policy allows it. |
371 |
doRepair :: L.Client -- ^ The Luxi client |
372 |
-> Double -- ^ Delay to insert before the first repair opcode |
373 |
-> InstanceData -- ^ The instance data |
374 |
-> (AutoRepairType, [OpCode]) -- ^ The repair job to perform |
375 |
-> IO InstanceData -- ^ The updated instance data |
376 |
doRepair client delay instData (rtype, opcodes) = |
377 |
let inst = arInstance instData |
378 |
ipol = Instance.arPolicy inst |
379 |
iname = Instance.name inst |
380 |
in |
381 |
case ipol of |
382 |
ArEnabled maxtype -> |
383 |
if rtype > maxtype then do |
384 |
uuid <- newUUID |
385 |
time <- getClockTime |
386 |
|
387 |
let arState' = ArNeedsRepair ( |
388 |
updateTag $ AutoRepairData rtype uuid time [] (Just ArEnoperm) "") |
389 |
instData' = instData { arState = arState' |
390 |
, tagsToRemove = delCurTag instData |
391 |
} |
392 |
|
393 |
putStrLn ("Not performing a repair of type " ++ show rtype ++ " on " ++ |
394 |
iname ++ " because only repairs up to " ++ show maxtype ++ |
395 |
" are allowed") |
396 |
commitChange client instData' -- Adds "enoperm" result label. |
397 |
else do |
398 |
putStrLn ("Executing " ++ show rtype ++ " repair on " ++ iname) |
399 |
|
400 |
-- After submitting the job, we must write an autorepair:pending tag, |
401 |
-- that includes the repair job IDs so that they can be checked later. |
402 |
-- One problem we run into is that the repair job immediately grabs |
403 |
-- locks for the affected instance, and the subsequent TAGS_SET job is |
404 |
-- blocked, introducing an unnecessary delay for the end-user. One |
405 |
-- alternative would be not to wait for the completion of the TAGS_SET |
406 |
-- job, contrary to what commitChange normally does; but we insist on |
407 |
-- waiting for the tag to be set so as to abort in case of failure, |
408 |
-- because the cluster is left in an invalid state in that case. |
409 |
-- |
410 |
-- The proper solution (in 2.9+) would be not to use tags for storing |
411 |
-- autorepair data, or make the TAGS_SET opcode not grab an instance's |
412 |
-- locks (if that's deemed safe). In the meantime, we introduce an |
413 |
-- artificial delay in the repair job (via a TestDelay opcode) so that |
414 |
-- once we have the job ID, the TAGS_SET job can complete before the |
415 |
-- repair job actually grabs the locks. (Please note that this is not |
416 |
-- about synchronization, but merely about speeding up the execution of |
417 |
-- the harep tool. If this TestDelay opcode is removed, the program is |
418 |
-- still correct.) |
419 |
let opcodes' = |
420 |
if delay > 0 then |
421 |
OpTestDelay { opDelayDuration = delay |
422 |
, opDelayOnMaster = True |
423 |
, opDelayOnNodes = [] |
424 |
, opDelayOnNodeUuids = Nothing |
425 |
, opDelayRepeat = fromJust $ mkNonNegative 0 |
426 |
} : opcodes |
427 |
else |
428 |
opcodes |
429 |
|
430 |
uuid <- newUUID |
431 |
time <- getClockTime |
432 |
jids <- submitJobs [map wrapOpCode opcodes'] client |
433 |
|
434 |
case jids of |
435 |
Bad e -> exitErr e |
436 |
Ok jids' -> |
437 |
let arState' = ArPendingRepair ( |
438 |
updateTag $ AutoRepairData rtype uuid time jids' Nothing "") |
439 |
instData' = instData { arState = arState' |
440 |
, tagsToRemove = delCurTag instData |
441 |
} |
442 |
in |
443 |
commitChange client instData' -- Adds "pending" label. |
444 |
|
445 |
otherSt -> do |
446 |
putStrLn ("Not repairing " ++ iname ++ " because it's in state " ++ |
447 |
show otherSt) |
448 |
return instData |
449 |
|
450 |
-- | Main function. |
451 |
main :: Options -> [String] -> IO () |
452 |
main opts args = do |
453 |
unless (null args) $ |
454 |
exitErr "this program doesn't take any arguments." |
455 |
|
456 |
luxiDef <- Path.defaultMasterSocket |
457 |
let master = fromMaybe luxiDef $ optLuxi opts |
458 |
opts' = opts { optLuxi = Just master } |
459 |
|
460 |
(ClusterData _ nl il _ _) <- loadExternalData opts' |
461 |
|
462 |
let iniDataRes = mapM setInitialState $ Container.elems il |
463 |
iniData <- exitIfBad "when parsing auto-repair tags" iniDataRes |
464 |
|
465 |
-- First step: check all pending repairs, see if they are completed. |
466 |
iniData' <- bracket (L.getLuxiClient master) L.closeClient $ |
467 |
forM iniData . processPending |
468 |
|
469 |
-- Second step: detect any problems. |
470 |
let repairs = map (detectBroken nl . arInstance) iniData' |
471 |
|
472 |
-- Third step: create repair jobs for broken instances that are in ArHealthy. |
473 |
let maybeRepair c (i, r) = maybe (return i) (repairHealthy c i) r |
474 |
jobDelay = optJobDelay opts |
475 |
repairHealthy c i = case arState i of |
476 |
ArHealthy _ -> doRepair c jobDelay i |
477 |
_ -> const (return i) |
478 |
|
479 |
repairDone <- bracket (L.getLuxiClient master) L.closeClient $ |
480 |
forM (zip iniData' repairs) . maybeRepair |
481 |
|
482 |
-- Print some stats and exit. |
483 |
let states = map ((, 1 :: Int) . arStateName . arState) repairDone |
484 |
counts = Map.fromListWith (+) states |
485 |
|
486 |
putStrLn "---------------------" |
487 |
putStrLn "Instance status count" |
488 |
putStrLn "---------------------" |
489 |
putStr . unlines . Map.elems $ |
490 |
Map.mapWithKey (\k v -> k ++ ": " ++ show v) counts |