Statistics
| Branch: | Tag: | Revision:

root / src / Ganeti / HTools / Program / Harep.hs @ 2868f3f7

History | View | Annotate | Download (19.8 kB)

1
{-# LANGUAGE TupleSections #-}
2

    
3
{-| Auto-repair tool for Ganeti.
4

    
5
-}
6

    
7
{-
8

    
9
Copyright (C) 2013 Google Inc.
10

    
11
This program is free software; you can redistribute it and/or modify
12
it under the terms of the GNU General Public License as published by
13
the Free Software Foundation; either version 2 of the License, or
14
(at your option) any later version.
15

    
16
This program is distributed in the hope that it will be useful, but
17
WITHOUT ANY WARRANTY; without even the implied warranty of
18
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19
General Public License for more details.
20

    
21
You should have received a copy of the GNU General Public License
22
along with this program; if not, write to the Free Software
23
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
24
02110-1301, USA.
25

    
26
-}
27

    
28
module Ganeti.HTools.Program.Harep
29
  ( main
30
  , arguments
31
  , options) where
32

    
33
import Control.Exception (bracket)
34
import Control.Monad
35
import Data.Function
36
import Data.List
37
import Data.Maybe
38
import Data.Ord
39
import System.Time
40
import qualified Data.Map as Map
41

    
42
import Ganeti.BasicTypes
43
import Ganeti.Common
44
import Ganeti.Errors
45
import Ganeti.Jobs
46
import Ganeti.OpCodes
47
import Ganeti.OpParams
48
import Ganeti.Types
49
import Ganeti.Utils
50
import qualified Ganeti.Constants as C
51
import qualified Ganeti.Luxi as L
52
import qualified Ganeti.Path as Path
53

    
54
import Ganeti.HTools.CLI
55
import Ganeti.HTools.Loader
56
import Ganeti.HTools.ExtLoader
57
import Ganeti.HTools.Types
58
import qualified Ganeti.HTools.Container as Container
59
import qualified Ganeti.HTools.Instance as Instance
60
import qualified Ganeti.HTools.Node as Node
61

    
62
-- | Options list and functions.
63
options :: IO [OptType]
64
options = do
65
  luxi <- oLuxiSocket
66
  return
67
    [ luxi
68
    , oJobDelay
69
    ]
70

    
71
arguments :: [ArgCompletion]
72
arguments = []
73

    
74
data InstanceData = InstanceData { arInstance :: Instance.Instance
75
                                 , arState :: AutoRepairStatus
76
                                 , tagsToRemove :: [String]
77
                                 }
78
                    deriving (Eq, Show)
79

    
80
-- | Parse a tag into an 'AutoRepairData' record.
81
--
82
-- @Nothing@ is returned if the tag is not an auto-repair tag, or if it's
83
-- malformed.
84
parseInitTag :: String -> Maybe AutoRepairData
85
parseInitTag tag =
86
  let parsePending = do
87
        subtag <- chompPrefix C.autoRepairTagPending tag
88
        case sepSplit ':' subtag of
89
          [rtype, uuid, ts, jobs] -> makeArData rtype uuid ts jobs
90
          _                       -> fail ("Invalid tag: " ++ show tag)
91

    
92
      parseResult = do
93
        subtag <- chompPrefix C.autoRepairTagResult tag
94
        case sepSplit ':' subtag of
95
          [rtype, uuid, ts, result, jobs] -> do
96
            arData <- makeArData rtype uuid ts jobs
97
            result' <- autoRepairResultFromRaw result
98
            return arData { arResult = Just result' }
99
          _                               -> fail ("Invalid tag: " ++ show tag)
100

    
101
      makeArData rtype uuid ts jobs = do
102
        rtype' <- autoRepairTypeFromRaw rtype
103
        ts' <- tryRead "auto-repair time" ts
104
        jobs' <- mapM makeJobIdS $ sepSplit '+' jobs
105
        return AutoRepairData { arType = rtype'
106
                              , arUuid = uuid
107
                              , arTime = TOD ts' 0
108
                              , arJobs = jobs'
109
                              , arResult = Nothing
110
                              , arTag = tag
111
                              }
112
  in
113
   parsePending `mplus` parseResult
114

    
115
-- | Return the 'AutoRepairData' element of an 'AutoRepairStatus' type.
116
getArData :: AutoRepairStatus -> Maybe AutoRepairData
117
getArData status =
118
  case status of
119
    ArHealthy (Just d) -> Just d
120
    ArFailedRepair  d  -> Just d
121
    ArPendingRepair d  -> Just d
122
    ArNeedsRepair   d  -> Just d
123
    _                  -> Nothing
124

    
125
-- | Return a short name for each auto-repair status.
126
--
127
-- This is a more concise representation of the status, because the default
128
-- "Show" formatting includes all the accompanying auto-repair data.
129
arStateName :: AutoRepairStatus -> String
130
arStateName status =
131
  case status of
132
    ArHealthy _       -> "Healthy"
133
    ArFailedRepair _  -> "Failure"
134
    ArPendingRepair _ -> "Pending repair"
135
    ArNeedsRepair _   -> "Needs repair"
136

    
137
-- | Return a new list of tags to remove that includes @arTag@ if present.
138
delCurTag :: InstanceData -> [String]
139
delCurTag instData =
140
  let arData = getArData $ arState instData
141
      rmTags = tagsToRemove instData
142
  in
143
   case arData of
144
     Just d  -> arTag d : rmTags
145
     Nothing -> rmTags
146

    
147
-- | Set the initial auto-repair state of an instance from its auto-repair tags.
148
--
149
-- The rules when there are multiple tags is:
150
--
151
--   * the earliest failure result always wins
152
--
153
--   * two or more pending repairs results in a fatal error
154
--
155
--   * a pending result from id X and a success result from id Y result in error
156
--     if Y is newer than X
157
--
158
--   * if there are no pending repairs, the newest success result wins,
159
--     otherwise the pending result is used.
160
setInitialState :: Instance.Instance -> Result InstanceData
161
setInitialState inst =
162
  let arData = mapMaybe parseInitTag $ Instance.allTags inst
163
      -- Group all the AutoRepairData records by id (i.e. by repair task), and
164
      -- present them from oldest to newest.
165
      arData' = sortBy (comparing arUuid) arData
166
      arGroups = groupBy ((==) `on` arUuid) arData'
167
      arGroups' = sortBy (comparing $ minimum . map arTime) arGroups
168
  in
169
   foldM arStatusCmp (InstanceData inst (ArHealthy Nothing) []) arGroups'
170

    
171
-- | Update the initial status of an instance with new repair task tags.
172
--
173
-- This function gets called once per repair group in an instance's tag, and it
174
-- determines whether to set the status of the instance according to this new
175
-- group, or to keep the existing state. See the documentation for
176
-- 'setInitialState' for the rules to be followed when determining this.
177
arStatusCmp :: InstanceData -> [AutoRepairData] -> Result InstanceData
178
arStatusCmp instData arData =
179
  let curSt = arState instData
180
      arData' = sortBy (comparing keyfn) arData
181
      keyfn d = (arResult d, arTime d)
182
      newData = last arData'
183
      newSt = case arResult newData of
184
                Just ArSuccess -> ArHealthy $ Just newData
185
                Just ArEnoperm -> ArHealthy $ Just newData
186
                Just ArFailure -> ArFailedRepair newData
187
                Nothing        -> ArPendingRepair newData
188
  in
189
   case curSt of
190
     ArFailedRepair _ -> Ok instData  -- Always keep the earliest failure.
191
     ArHealthy _      -> Ok instData { arState = newSt
192
                                     , tagsToRemove = delCurTag instData
193
                                     }
194
     ArPendingRepair d -> Bad (
195
       "An unfinished repair was found in instance " ++
196
       Instance.name (arInstance instData) ++ ": found tag " ++
197
       show (arTag newData) ++ ", but older pending tag " ++
198
       show (arTag d) ++ "exists.")
199

    
200
     ArNeedsRepair _ -> Bad
201
       "programming error: ArNeedsRepair found as an initial state"
202

    
203
-- | Query jobs of a pending repair, returning the new instance data.
204
processPending :: L.Client -> InstanceData -> IO InstanceData
205
processPending client instData =
206
  case arState instData of
207
    (ArPendingRepair arData) -> do
208
      sts <- L.queryJobsStatus client $ arJobs arData
209
      time <- getClockTime
210
      case sts of
211
        Bad e -> exitErr $ "could not check job status: " ++ formatError e
212
        Ok sts' ->
213
          if any (<= JOB_STATUS_RUNNING) sts' then
214
            return instData -- (no change)
215
          else do
216
            let iname = Instance.name $ arInstance instData
217
                srcSt = arStateName $ arState instData
218
                destSt = arStateName arState'
219
            putStrLn ("Moving " ++ iname ++ " from " ++ show srcSt ++ " to " ++
220
                      show destSt)
221
            commitChange client instData'
222
          where
223
            instData' =
224
              instData { arState = arState'
225
                       , tagsToRemove = delCurTag instData
226
                       }
227
            arState' =
228
              if all (== JOB_STATUS_SUCCESS) sts' then
229
                ArHealthy $ Just (updateTag $ arData { arResult = Just ArSuccess
230
                                                     , arTime = time })
231
              else
232
                ArFailedRepair (updateTag $ arData { arResult = Just ArFailure
233
                                                   , arTime = time })
234

    
235
    _ -> return instData
236

    
237
-- | Update the tag of an 'AutoRepairData' record to match all the other fields.
238
updateTag :: AutoRepairData -> AutoRepairData
239
updateTag arData =
240
  let ini = [autoRepairTypeToRaw $ arType arData,
241
             arUuid arData,
242
             clockTimeToString $ arTime arData]
243
      end = [intercalate "+" . map (show . fromJobId) $ arJobs arData]
244
      (pfx, middle) =
245
         case arResult arData of
246
          Nothing -> (C.autoRepairTagPending, [])
247
          Just rs -> (C.autoRepairTagResult, [autoRepairResultToRaw rs])
248
  in
249
   arData { arTag = pfx ++ intercalate ":" (ini ++ middle ++ end) }
250

    
251
-- | Apply and remove tags from an instance as indicated by 'InstanceData'.
252
--
253
-- If the /arState/ of the /InstanceData/ record has an associated
254
-- 'AutoRepairData', add its tag to the instance object. Additionally, if
255
-- /tagsToRemove/ is not empty, remove those tags from the instance object. The
256
-- returned /InstanceData/ object always has an empty /tagsToRemove/.
257
commitChange :: L.Client -> InstanceData -> IO InstanceData
258
commitChange client instData = do
259
  let iname = Instance.name $ arInstance instData
260
      arData = getArData $ arState instData
261
      rmTags = tagsToRemove instData
262
      execJobsWaitOk' opcodes = do
263
        res <- execJobsWaitOk [map wrapOpCode opcodes] client
264
        case res of
265
          Ok _ -> return ()
266
          Bad e -> exitErr e
267

    
268
  when (isJust arData) $ do
269
    let tag = arTag $ fromJust arData
270
    putStrLn (">>> Adding the following tag to " ++ iname ++ ":\n" ++ show tag)
271
    execJobsWaitOk' [OpTagsSet TagKindInstance [tag] (Just iname)]
272

    
273
  unless (null rmTags) $ do
274
    putStr (">>> Removing the following tags from " ++ iname ++ ":\n" ++
275
            unlines (map show rmTags))
276
    execJobsWaitOk' [OpTagsDel TagKindInstance rmTags (Just iname)]
277

    
278
  return instData { tagsToRemove = [] }
279

    
280
-- | Detect brokenness with an instance and suggest repair type and jobs to run.
281
detectBroken :: Node.List -> Instance.Instance
282
             -> Maybe (AutoRepairType, [OpCode])
283
detectBroken nl inst =
284
  let disk = Instance.diskTemplate inst
285
      iname = Instance.name inst
286
      offPri = Node.offline $ Container.find (Instance.pNode inst) nl
287
      offSec = Node.offline $ Container.find (Instance.sNode inst) nl
288
  in
289
   case disk of
290
     DTDrbd8
291
       | offPri && offSec ->
292
         Just (
293
           ArReinstall,
294
           [ OpInstanceRecreateDisks { opInstanceName = iname
295
                                     , opInstanceUuid = Nothing
296
                                     , opRecreateDisksInfo = RecreateDisksAll
297
                                     , opNodes = []
298
                                       -- FIXME: there should be a better way to
299
                                       -- specify opcode parameters than abusing
300
                                       -- mkNonEmpty in this way (using the fact
301
                                       -- that Maybe is used both for optional
302
                                       -- fields, and to express failure).
303
                                     , opNodeUuids = Nothing
304
                                     , opIallocator = mkNonEmpty "hail"
305
                                     }
306
           , OpInstanceReinstall { opInstanceName = iname
307
                                 , opInstanceUuid = Nothing
308
                                 , opOsType = Nothing
309
                                 , opTempOsParams = Nothing
310
                                 , opOsparamsPrivate = Nothing
311
                                 , opOsparamsSecret = Nothing
312
                                 , opForceVariant = False
313
                                 }
314
           ])
315
       | offPri ->
316
         Just (
317
           ArFailover,
318
           [ OpInstanceFailover { opInstanceName = iname
319
                                , opInstanceUuid = Nothing
320
                                  -- FIXME: ditto, see above.
321
                                , opShutdownTimeout = fromJust $ mkNonNegative
322
                                                      C.defaultShutdownTimeout
323
                                , opIgnoreConsistency = False
324
                                , opTargetNode = Nothing
325
                                , opTargetNodeUuid = Nothing
326
                                , opIgnoreIpolicy = False
327
                                , opIallocator = Nothing
328
                                , opMigrationCleanup = False
329
                                }
330
           ])
331
       | offSec ->
332
         Just (
333
           ArFixStorage,
334
           [ OpInstanceReplaceDisks { opInstanceName = iname
335
                                    , opInstanceUuid = Nothing
336
                                    , opReplaceDisksMode = ReplaceNewSecondary
337
                                    , opReplaceDisksList = []
338
                                    , opRemoteNode = Nothing
339
                                      -- FIXME: ditto, see above.
340
                                    , opRemoteNodeUuid = Nothing
341
                                    , opIallocator = mkNonEmpty "hail"
342
                                    , opEarlyRelease = False
343
                                    , opIgnoreIpolicy = False
344
                                    }
345
            ])
346
       | otherwise -> Nothing
347

    
348
     DTPlain
349
       | offPri ->
350
         Just (
351
           ArReinstall,
352
           [ OpInstanceRecreateDisks { opInstanceName = iname
353
                                     , opInstanceUuid = Nothing
354
                                     , opRecreateDisksInfo = RecreateDisksAll
355
                                     , opNodes = []
356
                                       -- FIXME: ditto, see above.
357
                                     , opNodeUuids = Nothing
358
                                     , opIallocator = mkNonEmpty "hail"
359
                                     }
360
           , OpInstanceReinstall { opInstanceName = iname
361
                                 , opInstanceUuid = Nothing
362
                                 , opOsType = Nothing
363
                                 , opTempOsParams = Nothing
364
                                 , opOsparamsPrivate = Nothing
365
                                 , opOsparamsSecret = Nothing
366
                                 , opForceVariant = False
367
                                 }
368
           ])
369
       | otherwise -> Nothing
370

    
371
     _ -> Nothing  -- Other cases are unimplemented for now: DTDiskless,
372
                   -- DTFile, DTSharedFile, DTBlock, DTRbd, DTExt.
373

    
374
-- | Perform the suggested repair on an instance if its policy allows it.
375
doRepair :: L.Client     -- ^ The Luxi client
376
         -> Double       -- ^ Delay to insert before the first repair opcode
377
         -> InstanceData -- ^ The instance data
378
         -> (AutoRepairType, [OpCode]) -- ^ The repair job to perform
379
         -> IO InstanceData -- ^ The updated instance data
380
doRepair client delay instData (rtype, opcodes) =
381
  let inst = arInstance instData
382
      ipol = Instance.arPolicy inst
383
      iname = Instance.name inst
384
  in
385
  case ipol of
386
    ArEnabled maxtype ->
387
      if rtype > maxtype then do
388
        uuid <- newUUID
389
        time <- getClockTime
390

    
391
        let arState' = ArNeedsRepair (
392
              updateTag $ AutoRepairData rtype uuid time [] (Just ArEnoperm) "")
393
            instData' = instData { arState = arState'
394
                                 , tagsToRemove = delCurTag instData
395
                                 }
396

    
397
        putStrLn ("Not performing a repair of type " ++ show rtype ++ " on " ++
398
          iname ++ " because only repairs up to " ++ show maxtype ++
399
          " are allowed")
400
        commitChange client instData'  -- Adds "enoperm" result label.
401
      else do
402
        putStrLn ("Executing " ++ show rtype ++ " repair on " ++ iname)
403

    
404
        -- After submitting the job, we must write an autorepair:pending tag,
405
        -- that includes the repair job IDs so that they can be checked later.
406
        -- One problem we run into is that the repair job immediately grabs
407
        -- locks for the affected instance, and the subsequent TAGS_SET job is
408
        -- blocked, introducing an unnecessary delay for the end-user. One
409
        -- alternative would be not to wait for the completion of the TAGS_SET
410
        -- job, contrary to what commitChange normally does; but we insist on
411
        -- waiting for the tag to be set so as to abort in case of failure,
412
        -- because the cluster is left in an invalid state in that case.
413
        --
414
        -- The proper solution (in 2.9+) would be not to use tags for storing
415
        -- autorepair data, or make the TAGS_SET opcode not grab an instance's
416
        -- locks (if that's deemed safe). In the meantime, we introduce an
417
        -- artificial delay in the repair job (via a TestDelay opcode) so that
418
        -- once we have the job ID, the TAGS_SET job can complete before the
419
        -- repair job actually grabs the locks. (Please note that this is not
420
        -- about synchronization, but merely about speeding up the execution of
421
        -- the harep tool. If this TestDelay opcode is removed, the program is
422
        -- still correct.)
423
        let opcodes' =
424
              if delay > 0 then
425
                OpTestDelay { opDelayDuration = delay
426
                            , opDelayOnMaster = True
427
                            , opDelayOnNodes = []
428
                            , opDelayOnNodeUuids = Nothing
429
                            , opDelayRepeat = fromJust $ mkNonNegative 0
430
                            , opDelayInterruptible = False
431
                            } : opcodes
432
              else
433
                opcodes
434

    
435
        uuid <- newUUID
436
        time <- getClockTime
437
        jids <- submitJobs [map wrapOpCode opcodes'] client
438

    
439
        case jids of
440
          Bad e    -> exitErr e
441
          Ok jids' ->
442
            let arState' = ArPendingRepair (
443
                  updateTag $ AutoRepairData rtype uuid time jids' Nothing "")
444
                instData' = instData { arState = arState'
445
                                     , tagsToRemove = delCurTag instData
446
                                     }
447
            in
448
             commitChange client instData'  -- Adds "pending" label.
449

    
450
    otherSt -> do
451
      putStrLn ("Not repairing " ++ iname ++ " because it's in state " ++
452
                show otherSt)
453
      return instData
454

    
455
-- | Main function.
456
main :: Options -> [String] -> IO ()
457
main opts args = do
458
  unless (null args) $
459
    exitErr "this program doesn't take any arguments."
460

    
461
  luxiDef <- Path.defaultMasterSocket
462
  let master = fromMaybe luxiDef $ optLuxi opts
463
      opts' = opts { optLuxi = Just master }
464

    
465
  (ClusterData _ nl il _ _) <- loadExternalData opts'
466

    
467
  let iniDataRes = mapM setInitialState $ Container.elems il
468
  iniData <- exitIfBad "when parsing auto-repair tags" iniDataRes
469

    
470
  -- First step: check all pending repairs, see if they are completed.
471
  iniData' <- bracket (L.getLuxiClient master) L.closeClient $
472
              forM iniData . processPending
473

    
474
  -- Second step: detect any problems.
475
  let repairs = map (detectBroken nl . arInstance) iniData'
476

    
477
  -- Third step: create repair jobs for broken instances that are in ArHealthy.
478
  let maybeRepair c (i, r) = maybe (return i) (repairHealthy c i) r
479
      jobDelay = optJobDelay opts
480
      repairHealthy c i = case arState i of
481
                            ArHealthy _ -> doRepair c jobDelay i
482
                            _           -> const (return i)
483

    
484
  repairDone <- bracket (L.getLuxiClient master) L.closeClient $
485
                forM (zip iniData' repairs) . maybeRepair
486

    
487
  -- Print some stats and exit.
488
  let states = map ((, 1 :: Int) . arStateName . arState) repairDone
489
      counts = Map.fromListWith (+) states
490

    
491
  putStrLn "---------------------"
492
  putStrLn "Instance status count"
493
  putStrLn "---------------------"
494
  putStr . unlines . Map.elems $
495
    Map.mapWithKey (\k v -> k ++ ": " ++ show v) counts