1 {-# LANGUAGE BangPatterns #-}
3 {-| Implementation of the Ganeti confd server functionality.
9 Copyright (C) 2011, 2012, 2013 Google Inc.
11 This program is free software; you can redistribute it and/or modify
12 it under the terms of the GNU General Public License as published by
13 the Free Software Foundation; either version 2 of the License, or
14 (at your option) any later version.
16 This program is distributed in the hope that it will be useful, but
17 WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 General Public License for more details.
21 You should have received a copy of the GNU General Public License
22 along with this program; if not, write to the Free Software
23 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
28 module Ganeti.Confd.Server
34 import Control.Concurrent
35 import Control.Exception
36 import Control.Monad (forever, liftM, unless)
39 import qualified Data.Map as M
40 import Data.Maybe (fromMaybe)
41 import qualified Network.Socket as S
44 import System.Posix.Files
45 import System.Posix.Types
46 import qualified Text.JSON as J
49 import Ganeti.BasicTypes
54 import Ganeti.Confd.Types
55 import Ganeti.Confd.Utils
59 import qualified Ganeti.Constants as C
60 import qualified Ganeti.Path as Path
61 import Ganeti.Query.Server (prepQueryD, runQueryD)
64 -- * Types and constants definitions
66 -- | What we store as configuration.
67 type CRef = IORef (Result (ConfigData, LinkIpMap))
69 -- | File stat identifier.
70 type FStat = (EpochTime, FileID, FileOffset)
72 -- | Null 'FStat' value.
74 nullFStat = (-1, -1, -1)
76 -- | A small type alias for readability.
77 type StatusAnswer = (ConfdReplyStatus, J.JSValue)
79 -- | Reload model data type.
80 data ReloadModel = ReloadNotify -- ^ We are using notifications
81 | ReloadPoll Int -- ^ We are using polling
84 -- | Server state data type.
85 data ServerState = ServerState
86 { reloadModel :: ReloadModel
87 , reloadTime :: Integer -- ^ Reload time (epoch) in microseconds
88 , reloadFStat :: FStat
91 -- | Maximum no-reload poll rounds before reverting to inotify.
92 maxIdlePollRounds :: Int
95 -- | Reload timeout in microseconds.
97 watchInterval = C.confdConfigReloadTimeout * 1000000
99 -- | Ratelimit timeout in microseconds.
101 pollInterval = C.confdConfigReloadRatelimit
103 -- | Ratelimit timeout in microseconds, as an 'Integer'.
104 reloadRatelimit :: Integer
105 reloadRatelimit = fromIntegral C.confdConfigReloadRatelimit
107 -- | Initial poll round.
108 initialPoll :: ReloadModel
109 initialPoll = ReloadPoll 0
111 -- | Reload status data type.
112 data ConfigReload = ConfigToDate -- ^ No need to reload
113 | ConfigReloaded -- ^ Configuration reloaded
114 | ConfigIOError -- ^ Error during configuration reload
117 -- | Unknown entry standard response.
118 queryUnknownEntry :: StatusAnswer
119 queryUnknownEntry = (ReplyStatusError, J.showJSON ConfdErrorUnknownEntry)
122 -- | Internal error standard response.
123 queryInternalError :: StatusAnswer
124 queryInternalError = (ReplyStatusError, J.showJSON ConfdErrorInternal)
127 -- | Argument error standard response.
128 queryArgumentError :: StatusAnswer
129 queryArgumentError = (ReplyStatusError, J.showJSON ConfdErrorArgument)
131 -- | Converter from specific error to a string format.
132 gntErrorToResult :: ErrorResult a -> Result a
133 gntErrorToResult (Bad err) = Bad (show err)
134 gntErrorToResult (Ok x) = Ok x
136 -- * Confd base functionality
138 -- | Computes the node role.
139 nodeRole :: ConfigData -> String -> Result ConfdNodeRole
141 let cmaster = clusterMasterNode . configCluster $ cfg
142 mnode = M.lookup name . fromContainer . configNodes $ cfg
144 Nothing -> Bad "Node not found"
145 Just node | cmaster == name -> Ok NodeRoleMaster
146 | nodeDrained node -> Ok NodeRoleDrained
147 | nodeOffline node -> Ok NodeRoleOffline
148 | nodeMasterCandidate node -> Ok NodeRoleCandidate
149 _ -> Ok NodeRoleRegular
151 -- | Does an instance ip -> instance -> primary node -> primary ip
153 getNodePipByInstanceIp :: ConfigData
158 getNodePipByInstanceIp cfg linkipmap link instip =
159 case M.lookup instip (M.findWithDefault M.empty link linkipmap) of
160 Nothing -> queryUnknownEntry
162 case getInstPrimaryNode cfg instname of
163 Bad _ -> queryUnknownEntry -- either instance or node not found
164 Ok node -> (ReplyStatusOk, J.showJSON (nodePrimaryIp node))
166 -- | Builds the response to a given query.
167 buildResponse :: (ConfigData, LinkIpMap) -> ConfdRequest -> Result StatusAnswer
168 buildResponse (cfg, _) (ConfdRequest { confdRqType = ReqPing }) =
169 return (ReplyStatusOk, J.showJSON (configVersion cfg))
171 buildResponse cdata req@(ConfdRequest { confdRqType = ReqClusterMaster }) =
172 case confdRqQuery req of
173 EmptyQuery -> return (ReplyStatusOk, J.showJSON master_name)
174 PlainQuery _ -> return queryArgumentError
176 mnode <- gntErrorToResult $ getNode cfg master_name
177 let fvals = map (\field -> case field of
178 ReqFieldName -> master_name
179 ReqFieldIp -> clusterMasterIp cluster
180 ReqFieldMNodePip -> nodePrimaryIp mnode
181 ) (confdReqQFields reqq)
182 return (ReplyStatusOk, J.showJSON fvals)
183 where master_name = clusterMasterNode cluster
184 cluster = configCluster cfg
187 buildResponse cdata req@(ConfdRequest { confdRqType = ReqNodeRoleByName }) = do
188 node_name <- case confdRqQuery req of
189 PlainQuery str -> return str
190 _ -> fail $ "Invalid query type " ++ show (confdRqQuery req)
191 role <- nodeRole (fst cdata) node_name
192 return (ReplyStatusOk, J.showJSON role)
194 buildResponse cdata (ConfdRequest { confdRqType = ReqNodePipList }) =
195 -- note: we use foldlWithKey because that's present accross more
196 -- versions of the library
197 return (ReplyStatusOk, J.showJSON $
198 M.foldlWithKey (\accu _ n -> nodePrimaryIp n:accu) []
199 (fromContainer . configNodes . fst $ cdata))
201 buildResponse cdata (ConfdRequest { confdRqType = ReqMcPipList }) =
202 -- note: we use foldlWithKey because that's present accross more
203 -- versions of the library
204 return (ReplyStatusOk, J.showJSON $
205 M.foldlWithKey (\accu _ n -> if nodeMasterCandidate n
206 then nodePrimaryIp n:accu
208 (fromContainer . configNodes . fst $ cdata))
210 buildResponse (cfg, linkipmap)
211 req@(ConfdRequest { confdRqType = ReqInstIpsList }) = do
212 link <- case confdRqQuery req of
213 PlainQuery str -> return str
214 EmptyQuery -> return (getDefaultNicLink cfg)
215 _ -> fail "Invalid query type"
216 return (ReplyStatusOk, J.showJSON $ getInstancesIpByLink linkipmap link)
218 buildResponse cdata (ConfdRequest { confdRqType = ReqNodePipByInstPip
219 , confdRqQuery = DictQuery query}) =
220 let (cfg, linkipmap) = cdata
221 link = fromMaybe (getDefaultNicLink cfg) (confdReqQLink query)
222 in case confdReqQIp query of
223 Just ip -> return $ getNodePipByInstanceIp cfg linkipmap link ip
224 Nothing -> return (ReplyStatusOk,
226 map (getNodePipByInstanceIp cfg linkipmap link)
227 (confdReqQIpList query))
229 buildResponse _ (ConfdRequest { confdRqType = ReqNodePipByInstPip }) =
230 return queryArgumentError
232 buildResponse cdata req@(ConfdRequest { confdRqType = ReqNodeDrbd }) = do
234 node_name <- case confdRqQuery req of
235 PlainQuery str -> return str
236 _ -> fail $ "Invalid query type " ++ show (confdRqQuery req)
237 node <- gntErrorToResult $ getNode cfg node_name
238 let minors = concatMap (getInstMinorsForNode (nodeName node)) .
239 M.elems . fromContainer . configInstances $ cfg
240 encoded = [J.JSArray [J.showJSON a, J.showJSON b, J.showJSON c,
241 J.showJSON d, J.showJSON e, J.showJSON f] |
242 (a, b, c, d, e, f) <- minors]
243 return (ReplyStatusOk, J.showJSON encoded)
245 buildResponse cdata req@(ConfdRequest { confdRqType = ReqNodeInstances }) = do
247 node_name <- case confdRqQuery req of
248 PlainQuery str -> return str
249 _ -> fail $ "Invalid query type " ++ show (confdRqQuery req)
250 let instances = getNodeInstances cfg node_name
251 return (ReplyStatusOk, J.showJSON instances)
253 -- | Creates a ConfdReply from a given answer.
254 serializeResponse :: Result StatusAnswer -> ConfdReply
255 serializeResponse r =
256 let (status, result) = case r of
257 Bad err -> (ReplyStatusError, J.showJSON err)
258 Ok (code, val) -> (code, val)
259 in ConfdReply { confdReplyProtocol = 1
260 , confdReplyStatus = status
261 , confdReplyAnswer = result
262 , confdReplySerial = 0 }
264 -- * Configuration handling
266 -- ** Helper functions
268 -- | Helper function for logging transition into polling mode.
269 moveToPolling :: String -> INotify -> FilePath -> CRef -> MVar ServerState
271 moveToPolling msg inotify path cref mstate = do
272 logInfo $ "Moving to polling mode: " ++ msg
273 let inotiaction = addNotifier inotify path cref mstate
274 _ <- forkIO $ onPollTimer inotiaction path cref mstate
277 -- | Helper function for logging transition into inotify mode.
278 moveToNotify :: IO ReloadModel
280 logInfo "Moving to inotify mode"
283 -- ** Configuration loading
285 -- | (Re)loads the configuration.
286 updateConfig :: FilePath -> CRef -> IO ()
287 updateConfig path r = do
288 newcfg <- loadConfig path
289 let !newdata = case newcfg of
290 Ok !cfg -> Ok (cfg, buildLinkIpInstnameMap cfg)
291 Bad _ -> Bad "Cannot load configuration"
294 Ok cfg -> logInfo ("Loaded new config, serial " ++
295 show (configSerial cfg))
296 Bad msg -> logError $ "Failed to load config: " ++ msg
299 -- | Wrapper over 'updateConfig' that handles IO errors.
300 safeUpdateConfig :: FilePath -> FStat -> CRef -> IO (FStat, ConfigReload)
301 safeUpdateConfig path oldfstat cref =
302 Control.Exception.catch
304 nt <- needsReload oldfstat path
306 Nothing -> return (oldfstat, ConfigToDate)
308 updateConfig path cref
309 return (nt', ConfigReloaded)
311 let msg = "Failure during configuration update: " ++
313 writeIORef cref (Bad msg)
314 return (nullFStat, ConfigIOError)
317 -- | Computes the file cache data from a FileStatus structure.
318 buildFileStatus :: FileStatus -> FStat
319 buildFileStatus ofs =
320 let modt = modificationTime ofs
323 in (modt, inum, fsize)
325 -- | Wrapper over 'buildFileStatus'. This reads the data from the
326 -- filesystem and then builds our cache structure.
327 getFStat :: FilePath -> IO FStat
328 getFStat p = liftM buildFileStatus (getFileStatus p)
330 -- | Check if the file needs reloading
331 needsReload :: FStat -> FilePath -> IO (Maybe FStat)
332 needsReload oldstat path = do
333 newstat <- getFStat path
334 return $ if newstat /= oldstat
338 -- ** Watcher threads
341 -- We have three threads/functions that can mutate the server state:
343 -- 1. the long-interval watcher ('onWatcherTimer')
345 -- 2. the polling watcher ('onPollTimer')
347 -- 3. the inotify event handler ('onInotify')
349 -- All of these will mutate the server state under 'modifyMVar' or
350 -- 'modifyMVar_', so that server transitions are more or less
351 -- atomic. The inotify handler remains active during polling mode, but
352 -- checks for polling mode and doesn't do anything in this case (this
353 -- check is needed even if we would unregister the event handler due
354 -- to how events are serialised).
356 -- | Long-interval reload watcher.
358 -- This is on top of the inotify-based triggered reload.
359 onWatcherTimer :: IO Bool -> FilePath -> CRef -> MVar ServerState -> IO ()
360 onWatcherTimer inotiaction path cref state = do
361 threadDelay watchInterval
362 logDebug "Watcher timer fired"
363 modifyMVar_ state (onWatcherInner path cref)
365 onWatcherTimer inotiaction path cref state
367 -- | Inner onWatcher handler.
369 -- This mutates the server state under a modifyMVar_ call. It never
370 -- changes the reload model, just does a safety reload and tried to
371 -- re-establish the inotify watcher.
372 onWatcherInner :: FilePath -> CRef -> ServerState -> IO ServerState
373 onWatcherInner path cref state = do
374 (newfstat, _) <- safeUpdateConfig path (reloadFStat state) cref
375 return state { reloadFStat = newfstat }
377 -- | Short-interval (polling) reload watcher.
379 -- This is only active when we're in polling mode; it will
380 -- automatically exit when it detects that the state has changed to
382 onPollTimer :: IO Bool -> FilePath -> CRef -> MVar ServerState -> IO ()
383 onPollTimer inotiaction path cref state = do
384 threadDelay pollInterval
385 logDebug "Poll timer fired"
386 continue <- modifyMVar state (onPollInner inotiaction path cref)
388 then onPollTimer inotiaction path cref state
389 else logDebug "Inotify watch active, polling thread exiting"
391 -- | Inner onPoll handler.
393 -- This again mutates the state under a modifyMVar call, and also
394 -- returns whether the thread should continue or not.
395 onPollInner :: IO Bool -> FilePath -> CRef -> ServerState
396 -> IO (ServerState, Bool)
397 onPollInner _ _ _ state@(ServerState { reloadModel = ReloadNotify } ) =
398 return (state, False)
399 onPollInner inotiaction path cref
400 state@(ServerState { reloadModel = ReloadPoll pround } ) = do
401 (newfstat, reload) <- safeUpdateConfig path (reloadFStat state) cref
402 let state' = state { reloadFStat = newfstat }
403 -- compute new poll model based on reload data; however, failure to
404 -- re-establish the inotifier means we stay on polling
405 newmode <- case reload of
407 if pround >= maxIdlePollRounds
408 then do -- try to switch to notify
409 result <- inotiaction
412 else return initialPoll
413 else return (ReloadPoll (pround + 1))
414 _ -> return initialPoll
415 let continue = case newmode of
416 ReloadNotify -> False
418 return (state' { reloadModel = newmode }, continue)
420 -- the following hint is because hlint doesn't understand our const
421 -- (return False) is so that we can give a signature to 'e'
422 {-# ANN addNotifier "HLint: ignore Evaluate" #-}
423 -- | Setup inotify watcher.
425 -- This tries to setup the watch descriptor; in case of any IO errors,
426 -- it will return False.
427 addNotifier :: INotify -> FilePath -> CRef -> MVar ServerState -> IO Bool
428 addNotifier inotify path cref mstate =
429 Control.Exception.catch
430 (addWatch inotify [CloseWrite] path
431 (onInotify inotify path cref mstate) >> return True)
432 (\e -> const (return False) (e::IOError))
434 -- | Inotify event handler.
435 onInotify :: INotify -> String -> CRef -> MVar ServerState -> Event -> IO ()
436 onInotify inotify path cref mstate Ignored = do
437 logDebug "File lost, trying to re-establish notifier"
438 modifyMVar_ mstate $ \state -> do
439 result <- addNotifier inotify path cref mstate
440 (newfstat, _) <- safeUpdateConfig path (reloadFStat state) cref
441 let state' = state { reloadFStat = newfstat }
443 then return state' -- keep notify
445 mode <- moveToPolling "cannot re-establish inotify watch" inotify
447 return state' { reloadModel = mode }
449 onInotify inotify path cref mstate _ =
450 modifyMVar_ mstate $ \state ->
451 if reloadModel state == ReloadNotify
453 ctime <- getCurrentTimeUSec
454 (newfstat, _) <- safeUpdateConfig path (reloadFStat state) cref
455 let state' = state { reloadFStat = newfstat, reloadTime = ctime }
456 if abs (reloadTime state - ctime) < reloadRatelimit
458 mode <- moveToPolling "too many reloads" inotify path cref mstate
459 return state' { reloadModel = mode }
463 -- ** Client input/output handlers
465 -- | Main loop for a given client.
466 responder :: CRef -> S.Socket -> HashKey -> String -> S.SockAddr -> IO ()
467 responder cfgref socket hmac msg peer = do
468 ctime <- getCurrentTime
469 case parseRequest hmac msg ctime of
470 Ok (origmsg, rq) -> do
471 logDebug $ "Processing request: " ++ rStripSpace origmsg
472 mcfg <- readIORef cfgref
473 let response = respondInner mcfg hmac rq
474 _ <- S.sendTo socket response peer
476 Bad err -> logInfo $ "Failed to parse incoming message: " ++ err
479 -- | Inner helper function for a given client. This generates the
480 -- final encoded message (as a string), ready to be sent out to the
482 respondInner :: Result (ConfigData, LinkIpMap) -> HashKey
483 -> ConfdRequest -> String
484 respondInner cfg hmac rq =
485 let rsalt = confdRqRsalt rq
486 innermsg = serializeResponse (cfg >>= flip buildResponse rq)
487 innerserialised = J.encodeStrict innermsg
488 outermsg = signMessage hmac rsalt innerserialised
489 outerserialised = confdMagicFourcc ++ J.encodeStrict outermsg
492 -- | Main listener loop.
493 listener :: S.Socket -> HashKey
494 -> (S.Socket -> HashKey -> String -> S.SockAddr -> IO ())
496 listener s hmac resp = do
497 (msg, _, peer) <- S.recvFrom s 4096
498 if confdMagicFourcc `isPrefixOf` msg
499 then forkIO (resp s hmac (drop 4 msg) peer) >> return ()
500 else logDebug "Invalid magic code!" >> return ()
503 -- | Extract the configuration from our IORef.
504 configReader :: CRef -> IO (Result ConfigData)
505 configReader cref = do
506 cdata <- readIORef cref
507 return $ liftM fst cdata
509 -- | Type alias for prepMain results
510 type PrepResult = (S.Socket, (FilePath, S.Socket),
511 IORef (Result (ConfigData, LinkIpMap)))
513 -- | Check function for confd.
514 checkMain :: CheckFn (S.Family, S.SockAddr)
516 parseresult <- parseAddress opts C.defaultConfdPort
519 hPutStrLn stderr $ "parsing bind address: " ++ msg
520 return . Left $ ExitFailure 1
521 Ok v -> return $ Right v
523 -- | Prepare function for confd.
524 prepMain :: PrepFn (S.Family, S.SockAddr) PrepResult
525 prepMain _ (af_family, bindaddr) = do
526 s <- S.socket af_family S.Datagram S.defaultProtocol
527 S.bindSocket s bindaddr
528 -- prepare the queryd listener
529 query_data <- prepQueryD Nothing
530 cref <- newIORef (Bad "Configuration not yet loaded")
531 return (s, query_data, cref)
534 main :: MainFn (S.Family, S.SockAddr) PrepResult
535 main _ _ (s, query_data, cref) = do
537 inotify <- initINotify
538 -- try to load the configuration, if possible
539 conf_file <- Path.clusterConfFile
540 (fstat, reloaded) <- safeUpdateConfig conf_file nullFStat cref
541 ctime <- getCurrentTime
542 statemvar <- newMVar $ ServerState ReloadNotify ctime fstat
543 let inotiaction = addNotifier inotify conf_file cref statemvar
544 has_inotify <- if reloaded == ConfigReloaded
548 then logInfo "Starting up in inotify mode"
550 -- inotify was not enabled, we need to update the reload model
551 logInfo "Starting up in polling mode"
552 modifyMVar_ statemvar
553 (\state -> return state { reloadModel = initialPoll })
554 hmac <- getClusterHmac
555 -- fork the timeout timer
556 _ <- forkIO $ onWatcherTimer inotiaction conf_file cref statemvar
557 -- fork the polling timer
558 unless has_inotify $ do
559 _ <- forkIO $ onPollTimer inotiaction conf_file cref statemvar
561 -- launch the queryd listener
562 _ <- forkIO $ runQueryD query_data (configReader cref)
563 -- and finally enter the responder loop
564 forever $ listener s hmac (responder cref)