1 {-# LANGUAGE BangPatterns #-}
3 {-| Implementation of the Ganeti confd server functionality.
9 Copyright (C) 2011, 2012, 2013 Google Inc.
11 This program is free software; you can redistribute it and/or modify
12 it under the terms of the GNU General Public License as published by
13 the Free Software Foundation; either version 2 of the License, or
14 (at your option) any later version.
16 This program is distributed in the hope that it will be useful, but
17 WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 General Public License for more details.
21 You should have received a copy of the GNU General Public License
22 along with this program; if not, write to the Free Software
23 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
28 module Ganeti.Confd.Server
34 import Control.Concurrent
35 import Control.Exception
36 import Control.Monad (forever, liftM, unless)
39 import qualified Data.Map as M
40 import Data.Maybe (fromMaybe)
41 import qualified Network.Socket as S
44 import System.Posix.Files
45 import System.Posix.Types
46 import qualified Text.JSON as J
49 import Ganeti.BasicTypes
54 import Ganeti.Confd.Types
55 import Ganeti.Confd.Utils
59 import qualified Ganeti.Constants as C
60 import qualified Ganeti.Path as Path
61 import Ganeti.Query.Server (prepQueryD, runQueryD)
64 -- * Types and constants definitions
66 -- | What we store as configuration.
67 type CRef = IORef (Result (ConfigData, LinkIpMap))
69 -- | File stat identifier.
70 type FStat = (EpochTime, FileID, FileOffset)
72 -- | Null 'FStat' value.
74 nullFStat = (-1, -1, -1)
76 -- | A small type alias for readability.
77 type StatusAnswer = (ConfdReplyStatus, J.JSValue)
79 -- | Reload model data type.
80 data ReloadModel = ReloadNotify -- ^ We are using notifications
81 | ReloadPoll Int -- ^ We are using polling
84 -- | Server state data type.
85 data ServerState = ServerState
86 { reloadModel :: ReloadModel
87 , reloadTime :: Integer -- ^ Reload time (epoch) in microseconds
88 , reloadFStat :: FStat
91 -- | Maximum no-reload poll rounds before reverting to inotify.
92 maxIdlePollRounds :: Int
95 -- | Reload timeout in microseconds.
97 watchInterval = C.confdConfigReloadTimeout * 1000000
99 -- | Ratelimit timeout in microseconds.
101 pollInterval = C.confdConfigReloadRatelimit
103 -- | Ratelimit timeout in microseconds, as an 'Integer'.
104 reloadRatelimit :: Integer
105 reloadRatelimit = fromIntegral C.confdConfigReloadRatelimit
107 -- | Initial poll round.
108 initialPoll :: ReloadModel
109 initialPoll = ReloadPoll 0
111 -- | Reload status data type.
112 data ConfigReload = ConfigToDate -- ^ No need to reload
113 | ConfigReloaded -- ^ Configuration reloaded
114 | ConfigIOError -- ^ Error during configuration reload
117 -- | Unknown entry standard response.
118 queryUnknownEntry :: StatusAnswer
119 queryUnknownEntry = (ReplyStatusError, J.showJSON ConfdErrorUnknownEntry)
122 -- | Internal error standard response.
123 queryInternalError :: StatusAnswer
124 queryInternalError = (ReplyStatusError, J.showJSON ConfdErrorInternal)
127 -- | Argument error standard response.
128 queryArgumentError :: StatusAnswer
129 queryArgumentError = (ReplyStatusError, J.showJSON ConfdErrorArgument)
131 -- | Converter from specific error to a string format.
132 gntErrorToResult :: ErrorResult a -> Result a
133 gntErrorToResult (Bad err) = Bad (show err)
134 gntErrorToResult (Ok x) = Ok x
136 -- * Confd base functionality
138 -- | Computes the node role.
139 nodeRole :: ConfigData -> String -> Result ConfdNodeRole
141 let cmaster = clusterMasterNode . configCluster $ cfg
142 mnode = M.lookup name . fromContainer . configNodes $ cfg
144 Nothing -> Bad "Node not found"
145 Just node | cmaster == name -> Ok NodeRoleMaster
146 | nodeDrained node -> Ok NodeRoleDrained
147 | nodeOffline node -> Ok NodeRoleOffline
148 | nodeMasterCandidate node -> Ok NodeRoleCandidate
149 _ -> Ok NodeRoleRegular
151 -- | Does an instance ip -> instance -> primary node -> primary ip
153 getNodePipByInstanceIp :: ConfigData
158 getNodePipByInstanceIp cfg linkipmap link instip =
159 case M.lookup instip (M.findWithDefault M.empty link linkipmap) of
160 Nothing -> queryUnknownEntry
162 case getInstPrimaryNode cfg instname of
163 Bad _ -> queryUnknownEntry -- either instance or node not found
164 Ok node -> (ReplyStatusOk, J.showJSON (nodePrimaryIp node))
166 -- | Builds the response to a given query.
167 buildResponse :: (ConfigData, LinkIpMap) -> ConfdRequest -> Result StatusAnswer
168 buildResponse (cfg, _) (ConfdRequest { confdRqType = ReqPing }) =
169 return (ReplyStatusOk, J.showJSON (configVersion cfg))
171 buildResponse cdata req@(ConfdRequest { confdRqType = ReqClusterMaster }) =
172 case confdRqQuery req of
173 EmptyQuery -> return (ReplyStatusOk, J.showJSON master_name)
174 PlainQuery _ -> return queryArgumentError
176 mnode <- gntErrorToResult $ getNode cfg master_name
177 let fvals = map (\field -> case field of
178 ReqFieldName -> master_name
179 ReqFieldIp -> clusterMasterIp cluster
180 ReqFieldMNodePip -> nodePrimaryIp mnode
181 ) (confdReqQFields reqq)
182 return (ReplyStatusOk, J.showJSON fvals)
183 where master_name = clusterMasterNode cluster
184 cluster = configCluster cfg
187 buildResponse cdata req@(ConfdRequest { confdRqType = ReqNodeRoleByName }) = do
188 node_name <- case confdRqQuery req of
189 PlainQuery str -> return str
190 _ -> fail $ "Invalid query type " ++ show (confdRqQuery req)
191 role <- nodeRole (fst cdata) node_name
192 return (ReplyStatusOk, J.showJSON role)
194 buildResponse cdata (ConfdRequest { confdRqType = ReqNodePipList }) =
195 -- note: we use foldlWithKey because that's present accross more
196 -- versions of the library
197 return (ReplyStatusOk, J.showJSON $
198 M.foldlWithKey (\accu _ n -> nodePrimaryIp n:accu) []
199 (fromContainer . configNodes . fst $ cdata))
201 buildResponse cdata (ConfdRequest { confdRqType = ReqMcPipList }) =
202 -- note: we use foldlWithKey because that's present accross more
203 -- versions of the library
204 return (ReplyStatusOk, J.showJSON $
205 M.foldlWithKey (\accu _ n -> if nodeMasterCandidate n
206 then nodePrimaryIp n:accu
208 (fromContainer . configNodes . fst $ cdata))
210 buildResponse (cfg, linkipmap)
211 req@(ConfdRequest { confdRqType = ReqInstIpsList }) = do
212 link <- case confdRqQuery req of
213 PlainQuery str -> return str
214 EmptyQuery -> return (getDefaultNicLink cfg)
215 _ -> fail "Invalid query type"
216 return (ReplyStatusOk, J.showJSON $ getInstancesIpByLink linkipmap link)
218 buildResponse cdata (ConfdRequest { confdRqType = ReqNodePipByInstPip
219 , confdRqQuery = DictQuery query}) =
220 let (cfg, linkipmap) = cdata
221 link = fromMaybe (getDefaultNicLink cfg) (confdReqQLink query)
222 in case confdReqQIp query of
223 Just ip -> return $ getNodePipByInstanceIp cfg linkipmap link ip
224 Nothing -> return (ReplyStatusOk,
226 map (getNodePipByInstanceIp cfg linkipmap link)
227 (confdReqQIpList query))
229 buildResponse _ (ConfdRequest { confdRqType = ReqNodePipByInstPip }) =
230 return queryArgumentError
232 buildResponse cdata req@(ConfdRequest { confdRqType = ReqNodeDrbd }) = do
234 node_name <- case confdRqQuery req of
235 PlainQuery str -> return str
236 _ -> fail $ "Invalid query type " ++ show (confdRqQuery req)
237 node <- gntErrorToResult $ getNode cfg node_name
238 let minors = concatMap (getInstMinorsForNode (nodeName node)) .
239 M.elems . fromContainer . configInstances $ cfg
240 encoded = [J.JSArray [J.showJSON a, J.showJSON b, J.showJSON c,
241 J.showJSON d, J.showJSON e, J.showJSON f] |
242 (a, b, c, d, e, f) <- minors]
243 return (ReplyStatusOk, J.showJSON encoded)
245 -- | Creates a ConfdReply from a given answer.
246 serializeResponse :: Result StatusAnswer -> ConfdReply
247 serializeResponse r =
248 let (status, result) = case r of
249 Bad err -> (ReplyStatusError, J.showJSON err)
250 Ok (code, val) -> (code, val)
251 in ConfdReply { confdReplyProtocol = 1
252 , confdReplyStatus = status
253 , confdReplyAnswer = result
254 , confdReplySerial = 0 }
256 -- * Configuration handling
258 -- ** Helper functions
260 -- | Helper function for logging transition into polling mode.
261 moveToPolling :: String -> INotify -> FilePath -> CRef -> MVar ServerState
263 moveToPolling msg inotify path cref mstate = do
264 logInfo $ "Moving to polling mode: " ++ msg
265 let inotiaction = addNotifier inotify path cref mstate
266 _ <- forkIO $ onPollTimer inotiaction path cref mstate
269 -- | Helper function for logging transition into inotify mode.
270 moveToNotify :: IO ReloadModel
272 logInfo "Moving to inotify mode"
275 -- ** Configuration loading
277 -- | (Re)loads the configuration.
278 updateConfig :: FilePath -> CRef -> IO ()
279 updateConfig path r = do
280 newcfg <- loadConfig path
281 let !newdata = case newcfg of
282 Ok !cfg -> Ok (cfg, buildLinkIpInstnameMap cfg)
283 Bad _ -> Bad "Cannot load configuration"
286 Ok cfg -> logInfo ("Loaded new config, serial " ++
287 show (configSerial cfg))
288 Bad msg -> logError $ "Failed to load config: " ++ msg
291 -- | Wrapper over 'updateConfig' that handles IO errors.
292 safeUpdateConfig :: FilePath -> FStat -> CRef -> IO (FStat, ConfigReload)
293 safeUpdateConfig path oldfstat cref =
294 Control.Exception.catch
296 nt <- needsReload oldfstat path
298 Nothing -> return (oldfstat, ConfigToDate)
300 updateConfig path cref
301 return (nt', ConfigReloaded)
303 let msg = "Failure during configuration update: " ++
305 writeIORef cref (Bad msg)
306 return (nullFStat, ConfigIOError)
309 -- | Computes the file cache data from a FileStatus structure.
310 buildFileStatus :: FileStatus -> FStat
311 buildFileStatus ofs =
312 let modt = modificationTime ofs
315 in (modt, inum, fsize)
317 -- | Wrapper over 'buildFileStatus'. This reads the data from the
318 -- filesystem and then builds our cache structure.
319 getFStat :: FilePath -> IO FStat
320 getFStat p = liftM buildFileStatus (getFileStatus p)
322 -- | Check if the file needs reloading
323 needsReload :: FStat -> FilePath -> IO (Maybe FStat)
324 needsReload oldstat path = do
325 newstat <- getFStat path
326 return $ if newstat /= oldstat
330 -- ** Watcher threads
333 -- We have three threads/functions that can mutate the server state:
335 -- 1. the long-interval watcher ('onWatcherTimer')
337 -- 2. the polling watcher ('onPollTimer')
339 -- 3. the inotify event handler ('onInotify')
341 -- All of these will mutate the server state under 'modifyMVar' or
342 -- 'modifyMVar_', so that server transitions are more or less
343 -- atomic. The inotify handler remains active during polling mode, but
344 -- checks for polling mode and doesn't do anything in this case (this
345 -- check is needed even if we would unregister the event handler due
346 -- to how events are serialised).
348 -- | Long-interval reload watcher.
350 -- This is on top of the inotify-based triggered reload.
351 onWatcherTimer :: IO Bool -> FilePath -> CRef -> MVar ServerState -> IO ()
352 onWatcherTimer inotiaction path cref state = do
353 threadDelay watchInterval
354 logDebug "Watcher timer fired"
355 modifyMVar_ state (onWatcherInner path cref)
357 onWatcherTimer inotiaction path cref state
359 -- | Inner onWatcher handler.
361 -- This mutates the server state under a modifyMVar_ call. It never
362 -- changes the reload model, just does a safety reload and tried to
363 -- re-establish the inotify watcher.
364 onWatcherInner :: FilePath -> CRef -> ServerState -> IO ServerState
365 onWatcherInner path cref state = do
366 (newfstat, _) <- safeUpdateConfig path (reloadFStat state) cref
367 return state { reloadFStat = newfstat }
369 -- | Short-interval (polling) reload watcher.
371 -- This is only active when we're in polling mode; it will
372 -- automatically exit when it detects that the state has changed to
374 onPollTimer :: IO Bool -> FilePath -> CRef -> MVar ServerState -> IO ()
375 onPollTimer inotiaction path cref state = do
376 threadDelay pollInterval
377 logDebug "Poll timer fired"
378 continue <- modifyMVar state (onPollInner inotiaction path cref)
380 then onPollTimer inotiaction path cref state
381 else logDebug "Inotify watch active, polling thread exiting"
383 -- | Inner onPoll handler.
385 -- This again mutates the state under a modifyMVar call, and also
386 -- returns whether the thread should continue or not.
387 onPollInner :: IO Bool -> FilePath -> CRef -> ServerState
388 -> IO (ServerState, Bool)
389 onPollInner _ _ _ state@(ServerState { reloadModel = ReloadNotify } ) =
390 return (state, False)
391 onPollInner inotiaction path cref
392 state@(ServerState { reloadModel = ReloadPoll pround } ) = do
393 (newfstat, reload) <- safeUpdateConfig path (reloadFStat state) cref
394 let state' = state { reloadFStat = newfstat }
395 -- compute new poll model based on reload data; however, failure to
396 -- re-establish the inotifier means we stay on polling
397 newmode <- case reload of
399 if pround >= maxIdlePollRounds
400 then do -- try to switch to notify
401 result <- inotiaction
404 else return initialPoll
405 else return (ReloadPoll (pround + 1))
406 _ -> return initialPoll
407 let continue = case newmode of
408 ReloadNotify -> False
410 return (state' { reloadModel = newmode }, continue)
412 -- the following hint is because hlint doesn't understand our const
413 -- (return False) is so that we can give a signature to 'e'
414 {-# ANN addNotifier "HLint: ignore Evaluate" #-}
415 -- | Setup inotify watcher.
417 -- This tries to setup the watch descriptor; in case of any IO errors,
418 -- it will return False.
419 addNotifier :: INotify -> FilePath -> CRef -> MVar ServerState -> IO Bool
420 addNotifier inotify path cref mstate =
421 Control.Exception.catch
422 (addWatch inotify [CloseWrite] path
423 (onInotify inotify path cref mstate) >> return True)
424 (\e -> const (return False) (e::IOError))
426 -- | Inotify event handler.
427 onInotify :: INotify -> String -> CRef -> MVar ServerState -> Event -> IO ()
428 onInotify inotify path cref mstate Ignored = do
429 logDebug "File lost, trying to re-establish notifier"
430 modifyMVar_ mstate $ \state -> do
431 result <- addNotifier inotify path cref mstate
432 (newfstat, _) <- safeUpdateConfig path (reloadFStat state) cref
433 let state' = state { reloadFStat = newfstat }
435 then return state' -- keep notify
437 mode <- moveToPolling "cannot re-establish inotify watch" inotify
439 return state' { reloadModel = mode }
441 onInotify inotify path cref mstate _ =
442 modifyMVar_ mstate $ \state ->
443 if reloadModel state == ReloadNotify
445 ctime <- getCurrentTimeUSec
446 (newfstat, _) <- safeUpdateConfig path (reloadFStat state) cref
447 let state' = state { reloadFStat = newfstat, reloadTime = ctime }
448 if abs (reloadTime state - ctime) < reloadRatelimit
450 mode <- moveToPolling "too many reloads" inotify path cref mstate
451 return state' { reloadModel = mode }
455 -- ** Client input/output handlers
457 -- | Main loop for a given client.
458 responder :: CRef -> S.Socket -> HashKey -> String -> S.SockAddr -> IO ()
459 responder cfgref socket hmac msg peer = do
460 ctime <- getCurrentTime
461 case parseRequest hmac msg ctime of
462 Ok (origmsg, rq) -> do
463 logDebug $ "Processing request: " ++ rStripSpace origmsg
464 mcfg <- readIORef cfgref
465 let response = respondInner mcfg hmac rq
466 _ <- S.sendTo socket response peer
468 Bad err -> logInfo $ "Failed to parse incoming message: " ++ err
471 -- | Inner helper function for a given client. This generates the
472 -- final encoded message (as a string), ready to be sent out to the
474 respondInner :: Result (ConfigData, LinkIpMap) -> HashKey
475 -> ConfdRequest -> String
476 respondInner cfg hmac rq =
477 let rsalt = confdRqRsalt rq
478 innermsg = serializeResponse (cfg >>= flip buildResponse rq)
479 innerserialised = J.encodeStrict innermsg
480 outermsg = signMessage hmac rsalt innerserialised
481 outerserialised = confdMagicFourcc ++ J.encodeStrict outermsg
484 -- | Main listener loop.
485 listener :: S.Socket -> HashKey
486 -> (S.Socket -> HashKey -> String -> S.SockAddr -> IO ())
488 listener s hmac resp = do
489 (msg, _, peer) <- S.recvFrom s 4096
490 if confdMagicFourcc `isPrefixOf` msg
491 then forkIO (resp s hmac (drop 4 msg) peer) >> return ()
492 else logDebug "Invalid magic code!" >> return ()
495 -- | Extract the configuration from our IORef.
496 configReader :: CRef -> IO (Result ConfigData)
497 configReader cref = do
498 cdata <- readIORef cref
499 return $ liftM fst cdata
501 -- | Type alias for prepMain results
502 type PrepResult = (S.Socket, (FilePath, S.Socket),
503 IORef (Result (ConfigData, LinkIpMap)))
505 -- | Check function for confd.
506 checkMain :: CheckFn (S.Family, S.SockAddr)
508 parseresult <- parseAddress opts C.defaultConfdPort
511 hPutStrLn stderr $ "parsing bind address: " ++ msg
512 return . Left $ ExitFailure 1
513 Ok v -> return $ Right v
515 -- | Prepare function for confd.
516 prepMain :: PrepFn (S.Family, S.SockAddr) PrepResult
517 prepMain _ (af_family, bindaddr) = do
518 s <- S.socket af_family S.Datagram S.defaultProtocol
519 S.bindSocket s bindaddr
520 -- prepare the queryd listener
521 query_data <- prepQueryD Nothing
522 cref <- newIORef (Bad "Configuration not yet loaded")
523 return (s, query_data, cref)
526 main :: MainFn (S.Family, S.SockAddr) PrepResult
527 main _ _ (s, query_data, cref) = do
529 inotify <- initINotify
530 -- try to load the configuration, if possible
531 conf_file <- Path.clusterConfFile
532 (fstat, reloaded) <- safeUpdateConfig conf_file nullFStat cref
533 ctime <- getCurrentTime
534 statemvar <- newMVar $ ServerState ReloadNotify ctime fstat
535 let inotiaction = addNotifier inotify conf_file cref statemvar
536 has_inotify <- if reloaded == ConfigReloaded
540 then logInfo "Starting up in inotify mode"
542 -- inotify was not enabled, we need to update the reload model
543 logInfo "Starting up in polling mode"
544 modifyMVar_ statemvar
545 (\state -> return state { reloadModel = initialPoll })
546 hmac <- getClusterHmac
547 -- fork the timeout timer
548 _ <- forkIO $ onWatcherTimer inotiaction conf_file cref statemvar
549 -- fork the polling timer
550 unless has_inotify $ do
551 _ <- forkIO $ onPollTimer inotiaction conf_file cref statemvar
553 -- launch the queryd listener
554 _ <- forkIO $ runQueryD query_data (configReader cref)
555 -- and finally enter the responder loop
556 forever $ listener s hmac (responder cref)