1 {-# LANGUAGE BangPatterns #-}
3 {-| Implementation of the Ganeti confd server functionality.
9 Copyright (C) 2011, 2012 Google Inc.
11 This program is free software; you can redistribute it and/or modify
12 it under the terms of the GNU General Public License as published by
13 the Free Software Foundation; either version 2 of the License, or
14 (at your option) any later version.
16 This program is distributed in the hope that it will be useful, but
17 WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 General Public License for more details.
21 You should have received a copy of the GNU General Public License
22 along with this program; if not, write to the Free Software
23 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
28 module Ganeti.Confd.Server
34 import Control.Concurrent
35 import Control.Exception
36 import Control.Monad (forever, liftM, when)
39 import qualified Data.Map as M
40 import Data.Maybe (fromMaybe)
41 import qualified Network.Socket as S
44 import System.Posix.Files
45 import System.Posix.Types
47 import qualified Text.JSON as J
50 import Ganeti.BasicTypes
55 import Ganeti.Confd.Types
56 import Ganeti.Confd.Utils
60 import qualified Ganeti.Constants as C
61 import qualified Ganeti.Path as Path
62 import Ganeti.Query.Server (prepQueryD, runQueryD)
64 -- * Types and constants definitions
66 -- | What we store as configuration.
67 type CRef = IORef (Result (ConfigData, LinkIpMap))
69 -- | File stat identifier.
70 type FStat = (EpochTime, FileID, FileOffset)
72 -- | Null 'FStat' value.
74 nullFStat = (-1, -1, -1)
76 -- | A small type alias for readability.
77 type StatusAnswer = (ConfdReplyStatus, J.JSValue)
79 -- | Reload model data type.
80 data ReloadModel = ReloadNotify -- ^ We are using notifications
81 | ReloadPoll Int -- ^ We are using polling
84 -- | Server state data type.
85 data ServerState = ServerState
86 { reloadModel :: ReloadModel
87 , reloadTime :: Integer
88 , reloadFStat :: FStat
91 -- | Maximum no-reload poll rounds before reverting to inotify.
92 maxIdlePollRounds :: Int
95 -- | Reload timeout in microseconds.
96 configReloadTimeout :: Int
97 configReloadTimeout = C.confdConfigReloadTimeout * 1000000
99 -- | Ratelimit timeout in microseconds.
100 configReloadRatelimit :: Int
101 configReloadRatelimit = C.confdConfigReloadRatelimit * 1000000
103 -- | Ratelimit timeout in seconds, as an 'Integer'.
104 reloadRatelimitSec :: Integer
105 reloadRatelimitSec = fromIntegral C.confdConfigReloadRatelimit
107 -- | Initial poll round.
108 initialPoll :: ReloadModel
109 initialPoll = ReloadPoll 0
111 -- | Initial server state.
112 initialState :: ServerState
113 initialState = ServerState initialPoll 0 nullFStat
115 -- | Reload status data type.
116 data ConfigReload = ConfigToDate -- ^ No need to reload
117 | ConfigReloaded -- ^ Configuration reloaded
118 | ConfigIOError -- ^ Error during configuration reload
120 -- | Unknown entry standard response.
121 queryUnknownEntry :: StatusAnswer
122 queryUnknownEntry = (ReplyStatusError, J.showJSON ConfdErrorUnknownEntry)
125 -- | Internal error standard response.
126 queryInternalError :: StatusAnswer
127 queryInternalError = (ReplyStatusError, J.showJSON ConfdErrorInternal)
130 -- | Argument error standard response.
131 queryArgumentError :: StatusAnswer
132 queryArgumentError = (ReplyStatusError, J.showJSON ConfdErrorArgument)
134 -- | Returns the current time.
135 getCurrentTime :: IO Integer
137 TOD ctime _ <- getClockTime
140 -- | Converter from specific error to a string format.
141 gntErrorToResult :: ErrorResult a -> Result a
142 gntErrorToResult (Bad err) = Bad (show err)
143 gntErrorToResult (Ok x) = Ok x
145 -- * Confd base functionality
147 -- | Computes the node role.
148 nodeRole :: ConfigData -> String -> Result ConfdNodeRole
150 let cmaster = clusterMasterNode . configCluster $ cfg
151 mnode = M.lookup name . fromContainer . configNodes $ cfg
153 Nothing -> Bad "Node not found"
154 Just node | cmaster == name -> Ok NodeRoleMaster
155 | nodeDrained node -> Ok NodeRoleDrained
156 | nodeOffline node -> Ok NodeRoleOffline
157 | nodeMasterCandidate node -> Ok NodeRoleCandidate
158 _ -> Ok NodeRoleRegular
160 -- | Does an instance ip -> instance -> primary node -> primary ip
162 getNodePipByInstanceIp :: ConfigData
167 getNodePipByInstanceIp cfg linkipmap link instip =
168 case M.lookup instip (M.findWithDefault M.empty link linkipmap) of
169 Nothing -> queryUnknownEntry
171 case getInstPrimaryNode cfg instname of
172 Bad _ -> queryUnknownEntry -- either instance or node not found
173 Ok node -> (ReplyStatusOk, J.showJSON (nodePrimaryIp node))
175 -- | Builds the response to a given query.
176 buildResponse :: (ConfigData, LinkIpMap) -> ConfdRequest -> Result StatusAnswer
177 buildResponse (cfg, _) (ConfdRequest { confdRqType = ReqPing }) =
178 return (ReplyStatusOk, J.showJSON (configVersion cfg))
180 buildResponse cdata req@(ConfdRequest { confdRqType = ReqClusterMaster }) =
181 case confdRqQuery req of
182 EmptyQuery -> return (ReplyStatusOk, J.showJSON master_name)
183 PlainQuery _ -> return queryArgumentError
185 mnode <- gntErrorToResult $ getNode cfg master_name
186 let fvals = map (\field -> case field of
187 ReqFieldName -> master_name
188 ReqFieldIp -> clusterMasterIp cluster
189 ReqFieldMNodePip -> nodePrimaryIp mnode
190 ) (confdReqQFields reqq)
191 return (ReplyStatusOk, J.showJSON fvals)
192 where master_name = clusterMasterNode cluster
193 cluster = configCluster cfg
196 buildResponse cdata req@(ConfdRequest { confdRqType = ReqNodeRoleByName }) = do
197 node_name <- case confdRqQuery req of
198 PlainQuery str -> return str
199 _ -> fail $ "Invalid query type " ++ show (confdRqQuery req)
200 role <- nodeRole (fst cdata) node_name
201 return (ReplyStatusOk, J.showJSON role)
203 buildResponse cdata (ConfdRequest { confdRqType = ReqNodePipList }) =
204 -- note: we use foldlWithKey because that's present accross more
205 -- versions of the library
206 return (ReplyStatusOk, J.showJSON $
207 M.foldlWithKey (\accu _ n -> nodePrimaryIp n:accu) []
208 (fromContainer . configNodes . fst $ cdata))
210 buildResponse cdata (ConfdRequest { confdRqType = ReqMcPipList }) =
211 -- note: we use foldlWithKey because that's present accross more
212 -- versions of the library
213 return (ReplyStatusOk, J.showJSON $
214 M.foldlWithKey (\accu _ n -> if nodeMasterCandidate n
215 then nodePrimaryIp n:accu
217 (fromContainer . configNodes . fst $ cdata))
219 buildResponse (cfg, linkipmap)
220 req@(ConfdRequest { confdRqType = ReqInstIpsList }) = do
221 link <- case confdRqQuery req of
222 PlainQuery str -> return str
223 EmptyQuery -> return (getDefaultNicLink cfg)
224 _ -> fail "Invalid query type"
225 return (ReplyStatusOk, J.showJSON $ getInstancesIpByLink linkipmap link)
227 buildResponse cdata (ConfdRequest { confdRqType = ReqNodePipByInstPip
228 , confdRqQuery = DictQuery query}) =
229 let (cfg, linkipmap) = cdata
230 link = fromMaybe (getDefaultNicLink cfg) (confdReqQLink query)
231 in case confdReqQIp query of
232 Just ip -> return $ getNodePipByInstanceIp cfg linkipmap link ip
233 Nothing -> return (ReplyStatusOk,
235 map (getNodePipByInstanceIp cfg linkipmap link)
236 (confdReqQIpList query))
238 buildResponse _ (ConfdRequest { confdRqType = ReqNodePipByInstPip }) =
239 return queryArgumentError
241 buildResponse cdata req@(ConfdRequest { confdRqType = ReqNodeDrbd }) = do
243 node_name <- case confdRqQuery req of
244 PlainQuery str -> return str
245 _ -> fail $ "Invalid query type " ++ show (confdRqQuery req)
246 node <- gntErrorToResult $ getNode cfg node_name
247 let minors = concatMap (getInstMinorsForNode (nodeName node)) .
248 M.elems . fromContainer . configInstances $ cfg
249 encoded = [J.JSArray [J.showJSON a, J.showJSON b, J.showJSON c,
250 J.showJSON d, J.showJSON e, J.showJSON f] |
251 (a, b, c, d, e, f) <- minors]
252 return (ReplyStatusOk, J.showJSON encoded)
254 -- | Creates a ConfdReply from a given answer.
255 serializeResponse :: Result StatusAnswer -> ConfdReply
256 serializeResponse r =
257 let (status, result) = case r of
258 Bad err -> (ReplyStatusError, J.showJSON err)
259 Ok (code, val) -> (code, val)
260 in ConfdReply { confdReplyProtocol = 1
261 , confdReplyStatus = status
262 , confdReplyAnswer = result
263 , confdReplySerial = 0 }
265 -- * Configuration handling
267 -- ** Helper functions
269 -- | Helper function for logging transition into polling mode.
270 moveToPolling :: String -> INotify -> FilePath -> CRef -> MVar ServerState
272 moveToPolling msg inotify path cref mstate = do
273 logInfo $ "Moving to polling mode: " ++ msg
274 let inotiaction = addNotifier inotify path cref mstate
275 _ <- forkIO $ onReloadTimer inotiaction path cref mstate
278 -- | Helper function for logging transition into inotify mode.
279 moveToNotify :: IO ReloadModel
281 logInfo "Moving to inotify mode"
284 -- ** Configuration loading
286 -- | (Re)loads the configuration.
287 updateConfig :: FilePath -> CRef -> IO ()
288 updateConfig path r = do
289 newcfg <- loadConfig path
290 let !newdata = case newcfg of
291 Ok !cfg -> Ok (cfg, buildLinkIpInstnameMap cfg)
292 Bad _ -> Bad "Cannot load configuration"
295 Ok cfg -> logInfo ("Loaded new config, serial " ++
296 show (configSerial cfg))
297 Bad msg -> logError $ "Failed to load config: " ++ msg
300 -- | Wrapper over 'updateConfig' that handles IO errors.
301 safeUpdateConfig :: FilePath -> FStat -> CRef -> IO (FStat, ConfigReload)
302 safeUpdateConfig path oldfstat cref =
303 Control.Exception.catch
305 nt <- needsReload oldfstat path
307 Nothing -> return (oldfstat, ConfigToDate)
309 updateConfig path cref
310 return (nt', ConfigReloaded)
312 let msg = "Failure during configuration update: " ++
314 writeIORef cref (Bad msg)
315 return (nullFStat, ConfigIOError)
318 -- | Computes the file cache data from a FileStatus structure.
319 buildFileStatus :: FileStatus -> FStat
320 buildFileStatus ofs =
321 let modt = modificationTime ofs
324 in (modt, inum, fsize)
326 -- | Wrapper over 'buildFileStatus'. This reads the data from the
327 -- filesystem and then builds our cache structure.
328 getFStat :: FilePath -> IO FStat
329 getFStat p = liftM buildFileStatus (getFileStatus p)
331 -- | Check if the file needs reloading
332 needsReload :: FStat -> FilePath -> IO (Maybe FStat)
333 needsReload oldstat path = do
334 newstat <- getFStat path
335 return $ if newstat /= oldstat
339 -- ** Watcher threads
342 -- We have three threads/functions that can mutate the server state:
344 -- 1. the long-interval watcher ('onTimeoutTimer')
346 -- 2. the polling watcher ('onReloadTimer')
348 -- 3. the inotify event handler ('onInotify')
350 -- All of these will mutate the server state under 'modifyMVar' or
351 -- 'modifyMVar_', so that server transitions are more or less
352 -- atomic. The inotify handler remains active during polling mode, but
353 -- checks for polling mode and doesn't do anything in this case (this
354 -- check is needed even if we would unregister the event handler due
355 -- to how events are serialised).
357 -- | Long-interval reload watcher.
359 -- This is on top of the inotify-based triggered reload.
360 onTimeoutTimer :: IO Bool -> FilePath -> CRef -> MVar ServerState -> IO ()
361 onTimeoutTimer inotiaction path cref state = do
362 threadDelay configReloadTimeout
363 modifyMVar_ state (onTimeoutInner path cref)
365 onTimeoutTimer inotiaction path cref state
367 -- | Inner onTimeout handler.
369 -- This mutates the server state under a modifyMVar_ call. It never
370 -- changes the reload model, just does a safety reload and tried to
371 -- re-establish the inotify watcher.
372 onTimeoutInner :: FilePath -> CRef -> ServerState -> IO ServerState
373 onTimeoutInner path cref state = do
374 (newfstat, _) <- safeUpdateConfig path (reloadFStat state) cref
375 return state { reloadFStat = newfstat }
377 -- | Short-interval (polling) reload watcher.
379 -- This is only active when we're in polling mode; it will
380 -- automatically exit when it detects that the state has changed to
382 onReloadTimer :: IO Bool -> FilePath -> CRef -> MVar ServerState -> IO ()
383 onReloadTimer inotiaction path cref state = do
384 continue <- modifyMVar state (onReloadInner inotiaction path cref)
386 do threadDelay configReloadRatelimit
387 onReloadTimer inotiaction path cref state
388 -- the inotify watch has been re-established, we can exit
390 -- | Inner onReload handler.
392 -- This again mutates the state under a modifyMVar call, and also
393 -- returns whether the thread should continue or not.
394 onReloadInner :: IO Bool -> FilePath -> CRef -> ServerState
395 -> IO (ServerState, Bool)
396 onReloadInner _ _ _ state@(ServerState { reloadModel = ReloadNotify } ) =
397 return (state, False)
398 onReloadInner inotiaction path cref
399 state@(ServerState { reloadModel = ReloadPoll pround } ) = do
400 (newfstat, reload) <- safeUpdateConfig path (reloadFStat state) cref
401 let state' = state { reloadFStat = newfstat }
402 -- compute new poll model based on reload data; however, failure to
403 -- re-establish the inotifier means we stay on polling
404 newmode <- case reload of
406 if pround >= maxIdlePollRounds
407 then do -- try to switch to notify
408 result <- inotiaction
411 else return initialPoll
412 else return (ReloadPoll (pround + 1))
413 _ -> return initialPoll
414 let continue = case newmode of
415 ReloadNotify -> False
417 return (state' { reloadModel = newmode }, continue)
419 -- the following hint is because hlint doesn't understand our const
420 -- (return False) is so that we can give a signature to 'e'
421 {-# ANN addNotifier "HLint: ignore Evaluate" #-}
422 -- | Setup inotify watcher.
424 -- This tries to setup the watch descriptor; in case of any IO errors,
425 -- it will return False.
426 addNotifier :: INotify -> FilePath -> CRef -> MVar ServerState -> IO Bool
427 addNotifier inotify path cref mstate =
428 Control.Exception.catch
429 (addWatch inotify [CloseWrite] path
430 (onInotify inotify path cref mstate) >> return True)
431 (\e -> const (return False) (e::IOError))
433 -- | Inotify event handler.
434 onInotify :: INotify -> String -> CRef -> MVar ServerState -> Event -> IO ()
435 onInotify inotify path cref mstate Ignored = do
436 logInfo "File lost, trying to re-establish notifier"
437 modifyMVar_ mstate $ \state -> do
438 result <- addNotifier inotify path cref mstate
439 (newfstat, _) <- safeUpdateConfig path (reloadFStat state) cref
440 let state' = state { reloadFStat = newfstat }
442 then return state' -- keep notify
444 mode <- moveToPolling "cannot re-establish inotify watch" inotify
446 return state' { reloadModel = mode }
448 onInotify inotify path cref mstate _ =
449 modifyMVar_ mstate $ \state ->
450 if reloadModel state == ReloadNotify
452 ctime <- getCurrentTime
453 (newfstat, _) <- safeUpdateConfig path (reloadFStat state) cref
454 let state' = state { reloadFStat = newfstat, reloadTime = ctime }
455 if abs (reloadTime state - ctime) < reloadRatelimitSec
457 mode <- moveToPolling "too many reloads" inotify path cref mstate
458 return state' { reloadModel = mode }
462 -- ** Client input/output handlers
464 -- | Main loop for a given client.
465 responder :: CRef -> S.Socket -> HashKey -> String -> S.SockAddr -> IO ()
466 responder cfgref socket hmac msg peer = do
467 ctime <- getCurrentTime
468 case parseMessage hmac msg ctime of
469 Ok (origmsg, rq) -> do
470 logDebug $ "Processing request: " ++ origmsg
471 mcfg <- readIORef cfgref
472 let response = respondInner mcfg hmac rq
473 _ <- S.sendTo socket response peer
475 Bad err -> logInfo $ "Failed to parse incoming message: " ++ err
478 -- | Inner helper function for a given client. This generates the
479 -- final encoded message (as a string), ready to be sent out to the
481 respondInner :: Result (ConfigData, LinkIpMap) -> HashKey
482 -> ConfdRequest -> String
483 respondInner cfg hmac rq =
484 let rsalt = confdRqRsalt rq
485 innermsg = serializeResponse (cfg >>= flip buildResponse rq)
486 innerserialised = J.encodeStrict innermsg
487 outermsg = signMessage hmac rsalt innerserialised
488 outerserialised = confdMagicFourcc ++ J.encodeStrict outermsg
491 -- | Main listener loop.
492 listener :: S.Socket -> HashKey
493 -> (S.Socket -> HashKey -> String -> S.SockAddr -> IO ())
495 listener s hmac resp = do
496 (msg, _, peer) <- S.recvFrom s 4096
497 if confdMagicFourcc `isPrefixOf` msg
498 then forkIO (resp s hmac (drop 4 msg) peer) >> return ()
499 else logDebug "Invalid magic code!" >> return ()
502 -- | Extract the configuration from our IORef.
503 configReader :: CRef -> IO (Result ConfigData)
504 configReader cref = do
505 cdata <- readIORef cref
506 return $ liftM fst cdata
508 -- | Type alias for prepMain results
509 type PrepResult = (S.Socket, (FilePath, S.Socket),
510 IORef (Result (ConfigData, LinkIpMap)))
512 -- | Check function for confd.
513 checkMain :: CheckFn (S.Family, S.SockAddr)
515 parseresult <- parseAddress opts C.defaultConfdPort
518 hPutStrLn stderr $ "parsing bind address: " ++ msg
519 return . Left $ ExitFailure 1
520 Ok v -> return $ Right v
522 -- | Prepare function for confd.
523 prepMain :: PrepFn (S.Family, S.SockAddr) PrepResult
524 prepMain _ (af_family, bindaddr) = do
525 s <- S.socket af_family S.Datagram S.defaultProtocol
526 S.bindSocket s bindaddr
527 -- prepare the queryd listener
528 query_data <- prepQueryD Nothing
529 cref <- newIORef (Bad "Configuration not yet loaded")
530 return (s, query_data, cref)
533 main :: MainFn (S.Family, S.SockAddr) PrepResult
534 main _ _ (s, query_data, cref) = do
535 statemvar <- newMVar initialState
536 hmac <- getClusterHmac
538 inotify <- initINotify
539 conf_file <- Path.clusterConfFile
540 let inotiaction = addNotifier inotify conf_file cref statemvar
541 -- fork the timeout timer
542 _ <- forkIO $ onTimeoutTimer inotiaction conf_file cref statemvar
543 -- fork the polling timer
544 _ <- forkIO $ onReloadTimer inotiaction conf_file cref statemvar
545 -- launch the queryd listener
546 _ <- forkIO $ runQueryD query_data (configReader cref)
547 -- and finally enter the responder loop
548 forever $ listener s hmac (responder cref)