1 {-# LANGUAGE BangPatterns #-}
3 {-| Implementation of the Ganeti confd server functionality.
9 Copyright (C) 2011, 2012 Google Inc.
11 This program is free software; you can redistribute it and/or modify
12 it under the terms of the GNU General Public License as published by
13 the Free Software Foundation; either version 2 of the License, or
14 (at your option) any later version.
16 This program is distributed in the hope that it will be useful, but
17 WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 General Public License for more details.
21 You should have received a copy of the GNU General Public License
22 along with this program; if not, write to the Free Software
23 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
28 module Ganeti.Confd.Server
34 import Control.Concurrent
35 import Control.Exception
36 import Control.Monad (forever, liftM, when)
39 import qualified Data.Map as M
40 import Data.Maybe (fromMaybe)
41 import qualified Network.Socket as S
44 import System.Posix.Files
45 import System.Posix.Types
47 import qualified Text.JSON as J
50 import Ganeti.BasicTypes
55 import Ganeti.Confd.Types
56 import Ganeti.Confd.Utils
60 import qualified Ganeti.Constants as C
61 import qualified Ganeti.Path as Path
62 import Ganeti.Query.Server (prepQueryD, runQueryD)
64 -- * Types and constants definitions
66 -- | What we store as configuration.
67 type CRef = IORef (Result (ConfigData, LinkIpMap))
69 -- | File stat identifier.
70 type FStat = (EpochTime, FileID, FileOffset)
72 -- | Null 'FStat' value.
74 nullFStat = (-1, -1, -1)
76 -- | A small type alias for readability.
77 type StatusAnswer = (ConfdReplyStatus, J.JSValue)
79 -- | Reload model data type.
80 data ReloadModel = ReloadNotify -- ^ We are using notifications
81 | ReloadPoll Int -- ^ We are using polling
84 -- | Server state data type.
85 data ServerState = ServerState
86 { reloadModel :: ReloadModel
87 , reloadTime :: Integer
88 , reloadFStat :: FStat
91 -- | Maximum no-reload poll rounds before reverting to inotify.
92 maxIdlePollRounds :: Int
95 -- | Reload timeout in microseconds.
96 configReloadTimeout :: Int
97 configReloadTimeout = C.confdConfigReloadTimeout * 1000000
99 -- | Ratelimit timeout in microseconds.
100 configReloadRatelimit :: Int
101 configReloadRatelimit = C.confdConfigReloadRatelimit * 1000000
103 -- | Initial poll round.
104 initialPoll :: ReloadModel
105 initialPoll = ReloadPoll 0
107 -- | Initial server state.
108 initialState :: ServerState
109 initialState = ServerState initialPoll 0 nullFStat
111 -- | Reload status data type.
112 data ConfigReload = ConfigToDate -- ^ No need to reload
113 | ConfigReloaded -- ^ Configuration reloaded
114 | ConfigIOError -- ^ Error during configuration reload
116 -- | Unknown entry standard response.
117 queryUnknownEntry :: StatusAnswer
118 queryUnknownEntry = (ReplyStatusError, J.showJSON ConfdErrorUnknownEntry)
121 -- | Internal error standard response.
122 queryInternalError :: StatusAnswer
123 queryInternalError = (ReplyStatusError, J.showJSON ConfdErrorInternal)
126 -- | Argument error standard response.
127 queryArgumentError :: StatusAnswer
128 queryArgumentError = (ReplyStatusError, J.showJSON ConfdErrorArgument)
130 -- | Returns the current time.
131 getCurrentTime :: IO Integer
133 TOD ctime _ <- getClockTime
136 -- | Converter from specific error to a string format.
137 gntErrorToResult :: ErrorResult a -> Result a
138 gntErrorToResult (Bad err) = Bad (show err)
139 gntErrorToResult (Ok x) = Ok x
141 -- * Confd base functionality
143 -- | Computes the node role.
144 nodeRole :: ConfigData -> String -> Result ConfdNodeRole
146 let cmaster = clusterMasterNode . configCluster $ cfg
147 mnode = M.lookup name . fromContainer . configNodes $ cfg
149 Nothing -> Bad "Node not found"
150 Just node | cmaster == name -> Ok NodeRoleMaster
151 | nodeDrained node -> Ok NodeRoleDrained
152 | nodeOffline node -> Ok NodeRoleOffline
153 | nodeMasterCandidate node -> Ok NodeRoleCandidate
154 _ -> Ok NodeRoleRegular
156 -- | Does an instance ip -> instance -> primary node -> primary ip
158 getNodePipByInstanceIp :: ConfigData
163 getNodePipByInstanceIp cfg linkipmap link instip =
164 case M.lookup instip (M.findWithDefault M.empty link linkipmap) of
165 Nothing -> queryUnknownEntry
167 case getInstPrimaryNode cfg instname of
168 Bad _ -> queryUnknownEntry -- either instance or node not found
169 Ok node -> (ReplyStatusOk, J.showJSON (nodePrimaryIp node))
171 -- | Builds the response to a given query.
172 buildResponse :: (ConfigData, LinkIpMap) -> ConfdRequest -> Result StatusAnswer
173 buildResponse (cfg, _) (ConfdRequest { confdRqType = ReqPing }) =
174 return (ReplyStatusOk, J.showJSON (configVersion cfg))
176 buildResponse cdata req@(ConfdRequest { confdRqType = ReqClusterMaster }) =
177 case confdRqQuery req of
178 EmptyQuery -> return (ReplyStatusOk, J.showJSON master_name)
179 PlainQuery _ -> return queryArgumentError
181 mnode <- gntErrorToResult $ getNode cfg master_name
182 let fvals = map (\field -> case field of
183 ReqFieldName -> master_name
184 ReqFieldIp -> clusterMasterIp cluster
185 ReqFieldMNodePip -> nodePrimaryIp mnode
186 ) (confdReqQFields reqq)
187 return (ReplyStatusOk, J.showJSON fvals)
188 where master_name = clusterMasterNode cluster
189 cluster = configCluster cfg
192 buildResponse cdata req@(ConfdRequest { confdRqType = ReqNodeRoleByName }) = do
193 node_name <- case confdRqQuery req of
194 PlainQuery str -> return str
195 _ -> fail $ "Invalid query type " ++ show (confdRqQuery req)
196 role <- nodeRole (fst cdata) node_name
197 return (ReplyStatusOk, J.showJSON role)
199 buildResponse cdata (ConfdRequest { confdRqType = ReqNodePipList }) =
200 -- note: we use foldlWithKey because that's present accross more
201 -- versions of the library
202 return (ReplyStatusOk, J.showJSON $
203 M.foldlWithKey (\accu _ n -> nodePrimaryIp n:accu) []
204 (fromContainer . configNodes . fst $ cdata))
206 buildResponse cdata (ConfdRequest { confdRqType = ReqMcPipList }) =
207 -- note: we use foldlWithKey because that's present accross more
208 -- versions of the library
209 return (ReplyStatusOk, J.showJSON $
210 M.foldlWithKey (\accu _ n -> if nodeMasterCandidate n
211 then nodePrimaryIp n:accu
213 (fromContainer . configNodes . fst $ cdata))
215 buildResponse (cfg, linkipmap)
216 req@(ConfdRequest { confdRqType = ReqInstIpsList }) = do
217 link <- case confdRqQuery req of
218 PlainQuery str -> return str
219 EmptyQuery -> return (getDefaultNicLink cfg)
220 _ -> fail "Invalid query type"
221 return (ReplyStatusOk, J.showJSON $ getInstancesIpByLink linkipmap link)
223 buildResponse cdata (ConfdRequest { confdRqType = ReqNodePipByInstPip
224 , confdRqQuery = DictQuery query}) =
225 let (cfg, linkipmap) = cdata
226 link = fromMaybe (getDefaultNicLink cfg) (confdReqQLink query)
227 in case confdReqQIp query of
228 Just ip -> return $ getNodePipByInstanceIp cfg linkipmap link ip
229 Nothing -> return (ReplyStatusOk,
231 map (getNodePipByInstanceIp cfg linkipmap link)
232 (confdReqQIpList query))
234 buildResponse _ (ConfdRequest { confdRqType = ReqNodePipByInstPip }) =
235 return queryArgumentError
237 buildResponse cdata req@(ConfdRequest { confdRqType = ReqNodeDrbd }) = do
239 node_name <- case confdRqQuery req of
240 PlainQuery str -> return str
241 _ -> fail $ "Invalid query type " ++ show (confdRqQuery req)
242 node <- gntErrorToResult $ getNode cfg node_name
243 let minors = concatMap (getInstMinorsForNode (nodeName node)) .
244 M.elems . fromContainer . configInstances $ cfg
245 encoded = [J.JSArray [J.showJSON a, J.showJSON b, J.showJSON c,
246 J.showJSON d, J.showJSON e, J.showJSON f] |
247 (a, b, c, d, e, f) <- minors]
248 return (ReplyStatusOk, J.showJSON encoded)
250 -- | Creates a ConfdReply from a given answer.
251 serializeResponse :: Result StatusAnswer -> ConfdReply
252 serializeResponse r =
253 let (status, result) = case r of
254 Bad err -> (ReplyStatusError, J.showJSON err)
255 Ok (code, val) -> (code, val)
256 in ConfdReply { confdReplyProtocol = 1
257 , confdReplyStatus = status
258 , confdReplyAnswer = result
259 , confdReplySerial = 0 }
261 -- * Configuration handling
263 -- ** Helper functions
265 -- | Helper function for logging transition into polling mode.
266 moveToPolling :: String -> INotify -> FilePath -> CRef -> MVar ServerState
268 moveToPolling msg inotify path cref mstate = do
269 logInfo $ "Moving to polling mode: " ++ msg
270 let inotiaction = addNotifier inotify path cref mstate
271 _ <- forkIO $ onReloadTimer inotiaction path cref mstate
274 -- | Helper function for logging transition into inotify mode.
275 moveToNotify :: IO ReloadModel
277 logInfo "Moving to inotify mode"
280 -- ** Configuration loading
282 -- | (Re)loads the configuration.
283 updateConfig :: FilePath -> CRef -> IO ()
284 updateConfig path r = do
285 newcfg <- loadConfig path
286 let !newdata = case newcfg of
287 Ok !cfg -> Ok (cfg, buildLinkIpInstnameMap cfg)
288 Bad _ -> Bad "Cannot load configuration"
291 Ok cfg -> logInfo ("Loaded new config, serial " ++
292 show (configSerial cfg))
293 Bad msg -> logError $ "Failed to load config: " ++ msg
296 -- | Wrapper over 'updateConfig' that handles IO errors.
297 safeUpdateConfig :: FilePath -> FStat -> CRef -> IO (FStat, ConfigReload)
298 safeUpdateConfig path oldfstat cref =
299 Control.Exception.catch
301 nt <- needsReload oldfstat path
303 Nothing -> return (oldfstat, ConfigToDate)
305 updateConfig path cref
306 return (nt', ConfigReloaded)
308 let msg = "Failure during configuration update: " ++
310 writeIORef cref (Bad msg)
311 return (nullFStat, ConfigIOError)
314 -- | Computes the file cache data from a FileStatus structure.
315 buildFileStatus :: FileStatus -> FStat
316 buildFileStatus ofs =
317 let modt = modificationTime ofs
320 in (modt, inum, fsize)
322 -- | Wrapper over 'buildFileStatus'. This reads the data from the
323 -- filesystem and then builds our cache structure.
324 getFStat :: FilePath -> IO FStat
325 getFStat p = liftM buildFileStatus (getFileStatus p)
327 -- | Check if the file needs reloading
328 needsReload :: FStat -> FilePath -> IO (Maybe FStat)
329 needsReload oldstat path = do
330 newstat <- getFStat path
331 return $ if newstat /= oldstat
335 -- ** Watcher threads
338 -- We have three threads/functions that can mutate the server state:
340 -- 1. the long-interval watcher ('onTimeoutTimer')
342 -- 2. the polling watcher ('onReloadTimer')
344 -- 3. the inotify event handler ('onInotify')
346 -- All of these will mutate the server state under 'modifyMVar' or
347 -- 'modifyMVar_', so that server transitions are more or less
348 -- atomic. The inotify handler remains active during polling mode, but
349 -- checks for polling mode and doesn't do anything in this case (this
350 -- check is needed even if we would unregister the event handler due
351 -- to how events are serialised).
353 -- | Long-interval reload watcher.
355 -- This is on top of the inotify-based triggered reload.
356 onTimeoutTimer :: IO Bool -> FilePath -> CRef -> MVar ServerState -> IO ()
357 onTimeoutTimer inotiaction path cref state = do
358 threadDelay configReloadTimeout
359 modifyMVar_ state (onTimeoutInner path cref)
361 onTimeoutTimer inotiaction path cref state
363 -- | Inner onTimeout handler.
365 -- This mutates the server state under a modifyMVar_ call. It never
366 -- changes the reload model, just does a safety reload and tried to
367 -- re-establish the inotify watcher.
368 onTimeoutInner :: FilePath -> CRef -> ServerState -> IO ServerState
369 onTimeoutInner path cref state = do
370 (newfstat, _) <- safeUpdateConfig path (reloadFStat state) cref
371 return state { reloadFStat = newfstat }
373 -- | Short-interval (polling) reload watcher.
375 -- This is only active when we're in polling mode; it will
376 -- automatically exit when it detects that the state has changed to
378 onReloadTimer :: IO Bool -> FilePath -> CRef -> MVar ServerState -> IO ()
379 onReloadTimer inotiaction path cref state = do
380 continue <- modifyMVar state (onReloadInner inotiaction path cref)
382 do threadDelay configReloadRatelimit
383 onReloadTimer inotiaction path cref state
384 -- the inotify watch has been re-established, we can exit
386 -- | Inner onReload handler.
388 -- This again mutates the state under a modifyMVar call, and also
389 -- returns whether the thread should continue or not.
390 onReloadInner :: IO Bool -> FilePath -> CRef -> ServerState
391 -> IO (ServerState, Bool)
392 onReloadInner _ _ _ state@(ServerState { reloadModel = ReloadNotify } ) =
393 return (state, False)
394 onReloadInner inotiaction path cref
395 state@(ServerState { reloadModel = ReloadPoll pround } ) = do
396 (newfstat, reload) <- safeUpdateConfig path (reloadFStat state) cref
397 let state' = state { reloadFStat = newfstat }
398 -- compute new poll model based on reload data; however, failure to
399 -- re-establish the inotifier means we stay on polling
400 newmode <- case reload of
402 if pround >= maxIdlePollRounds
403 then do -- try to switch to notify
404 result <- inotiaction
407 else return initialPoll
408 else return (ReloadPoll (pround + 1))
409 _ -> return initialPoll
410 let continue = case newmode of
411 ReloadNotify -> False
413 return (state' { reloadModel = newmode }, continue)
415 -- the following hint is because hlint doesn't understand our const
416 -- (return False) is so that we can give a signature to 'e'
417 {-# ANN addNotifier "HLint: ignore Evaluate" #-}
418 -- | Setup inotify watcher.
420 -- This tries to setup the watch descriptor; in case of any IO errors,
421 -- it will return False.
422 addNotifier :: INotify -> FilePath -> CRef -> MVar ServerState -> IO Bool
423 addNotifier inotify path cref mstate =
424 Control.Exception.catch
425 (addWatch inotify [CloseWrite] path
426 (onInotify inotify path cref mstate) >> return True)
427 (\e -> const (return False) (e::IOError))
429 -- | Inotify event handler.
430 onInotify :: INotify -> String -> CRef -> MVar ServerState -> Event -> IO ()
431 onInotify inotify path cref mstate Ignored = do
432 logInfo "File lost, trying to re-establish notifier"
433 modifyMVar_ mstate $ \state -> do
434 result <- addNotifier inotify path cref mstate
435 (newfstat, _) <- safeUpdateConfig path (reloadFStat state) cref
436 let state' = state { reloadFStat = newfstat }
438 then return state' -- keep notify
440 mode <- moveToPolling "cannot re-establish inotify watch" inotify
442 return state' { reloadModel = mode }
444 onInotify inotify path cref mstate _ =
445 modifyMVar_ mstate $ \state ->
446 if reloadModel state == ReloadNotify
448 ctime <- getCurrentTime
449 (newfstat, _) <- safeUpdateConfig path (reloadFStat state) cref
450 let state' = state { reloadFStat = newfstat, reloadTime = ctime }
451 if abs (reloadTime state - ctime) <
452 fromIntegral C.confdConfigReloadRatelimit
454 mode <- moveToPolling "too many reloads" inotify path cref mstate
455 return state' { reloadModel = mode }
459 -- ** Client input/output handlers
461 -- | Main loop for a given client.
462 responder :: CRef -> S.Socket -> HashKey -> String -> S.SockAddr -> IO ()
463 responder cfgref socket hmac msg peer = do
464 ctime <- getCurrentTime
465 case parseMessage hmac msg ctime of
466 Ok (origmsg, rq) -> do
467 logDebug $ "Processing request: " ++ origmsg
468 mcfg <- readIORef cfgref
469 let response = respondInner mcfg hmac rq
470 _ <- S.sendTo socket response peer
472 Bad err -> logInfo $ "Failed to parse incoming message: " ++ err
475 -- | Inner helper function for a given client. This generates the
476 -- final encoded message (as a string), ready to be sent out to the
478 respondInner :: Result (ConfigData, LinkIpMap) -> HashKey
479 -> ConfdRequest -> String
480 respondInner cfg hmac rq =
481 let rsalt = confdRqRsalt rq
482 innermsg = serializeResponse (cfg >>= flip buildResponse rq)
483 innerserialised = J.encodeStrict innermsg
484 outermsg = signMessage hmac rsalt innerserialised
485 outerserialised = confdMagicFourcc ++ J.encodeStrict outermsg
488 -- | Main listener loop.
489 listener :: S.Socket -> HashKey
490 -> (S.Socket -> HashKey -> String -> S.SockAddr -> IO ())
492 listener s hmac resp = do
493 (msg, _, peer) <- S.recvFrom s 4096
494 if confdMagicFourcc `isPrefixOf` msg
495 then forkIO (resp s hmac (drop 4 msg) peer) >> return ()
496 else logDebug "Invalid magic code!" >> return ()
499 -- | Extract the configuration from our IORef.
500 configReader :: CRef -> IO (Result ConfigData)
501 configReader cref = do
502 cdata <- readIORef cref
503 return $ liftM fst cdata
505 -- | Type alias for prepMain results
506 type PrepResult = (S.Socket, (FilePath, S.Socket),
507 IORef (Result (ConfigData, LinkIpMap)))
509 -- | Check function for confd.
510 checkMain :: CheckFn (S.Family, S.SockAddr)
512 parseresult <- parseAddress opts C.defaultConfdPort
515 hPutStrLn stderr $ "parsing bind address: " ++ msg
516 return . Left $ ExitFailure 1
517 Ok v -> return $ Right v
519 -- | Prepare function for confd.
520 prepMain :: PrepFn (S.Family, S.SockAddr) PrepResult
521 prepMain _ (af_family, bindaddr) = do
522 s <- S.socket af_family S.Datagram S.defaultProtocol
523 S.bindSocket s bindaddr
524 -- prepare the queryd listener
525 query_data <- prepQueryD Nothing
526 cref <- newIORef (Bad "Configuration not yet loaded")
527 return (s, query_data, cref)
530 main :: MainFn (S.Family, S.SockAddr) PrepResult
531 main _ _ (s, query_data, cref) = do
532 statemvar <- newMVar initialState
533 hmac <- getClusterHmac
535 inotify <- initINotify
536 conf_file <- Path.clusterConfFile
537 let inotiaction = addNotifier inotify conf_file cref statemvar
538 -- fork the timeout timer
539 _ <- forkIO $ onTimeoutTimer inotiaction conf_file cref statemvar
540 -- fork the polling timer
541 _ <- forkIO $ onReloadTimer inotiaction conf_file cref statemvar
542 -- launch the queryd listener
543 _ <- forkIO $ runQueryD query_data (configReader cref)
544 -- and finally enter the responder loop
545 forever $ listener s hmac (responder cref)