1 {-# LANGUAGE BangPatterns #-}
3 {-| Implementation of the Ganeti confd server functionality.
9 Copyright (C) 2011, 2012 Google Inc.
11 This program is free software; you can redistribute it and/or modify
12 it under the terms of the GNU General Public License as published by
13 the Free Software Foundation; either version 2 of the License, or
14 (at your option) any later version.
16 This program is distributed in the hope that it will be useful, but
17 WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 General Public License for more details.
21 You should have received a copy of the GNU General Public License
22 along with this program; if not, write to the Free Software
23 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
28 module Ganeti.Confd.Server
32 import Control.Concurrent
33 import Control.Exception
34 import Control.Monad (forever)
35 import qualified Data.ByteString as B
38 import qualified Data.Map as M
39 import qualified Network.Socket as S
40 import Prelude hiding (catch)
41 import System.Posix.Files
42 import System.Posix.Types
44 import qualified Text.JSON as J
48 import Ganeti.HTools.JSON
49 import Ganeti.HTools.Types
50 import Ganeti.HTools.Utils
56 import qualified Ganeti.Constants as C
58 -- * Types and constants definitions
60 -- | What we store as configuration.
61 type CRef = IORef (Result (ConfigData, LinkIpMap))
63 -- | File stat identifier.
64 type FStat = (EpochTime, FileID, FileOffset)
66 -- | Null 'FStat' value.
68 nullFStat = (-1, -1, -1)
70 -- | A small type alias for readability.
71 type StatusAnswer = (ConfdReplyStatus, J.JSValue)
73 -- | Reload model data type.
74 data ReloadModel = ReloadNotify -- ^ We are using notifications
75 | ReloadPoll Int -- ^ We are using polling
78 -- | Server state data type.
79 data ServerState = ServerState
80 { reloadModel :: ReloadModel
81 , reloadTime :: Integer
82 , reloadFStat :: FStat
85 -- | Maximum no-reload poll rounds before reverting to inotify.
86 maxIdlePollRounds :: Int
89 -- | Reload timeout in microseconds.
90 configReloadTimeout :: Int
91 configReloadTimeout = C.confdConfigReloadTimeout * 1000000
93 -- | Ratelimit timeout in microseconds.
94 configReloadRatelimit :: Int
95 configReloadRatelimit = C.confdConfigReloadRatelimit * 1000000
97 -- | Initial poll round.
98 initialPoll :: ReloadModel
99 initialPoll = ReloadPoll 0
101 -- | Initial server state.
102 initialState :: ServerState
103 initialState = ServerState initialPoll 0 nullFStat
105 -- | Reload status data type.
106 data ConfigReload = ConfigToDate -- ^ No need to reload
107 | ConfigReloaded -- ^ Configuration reloaded
108 | ConfigIOError -- ^ Error during configuration reload
110 -- | Unknown entry standard response.
111 queryUnknownEntry :: StatusAnswer
112 queryUnknownEntry = (ReplyStatusError, J.showJSON ConfdErrorUnknownEntry)
115 -- | Internal error standard response.
116 queryInternalError :: StatusAnswer
117 queryInternalError = (ReplyStatusError, J.showJSON ConfdErrorInternal)
120 -- | Argument error standard response.
121 queryArgumentError :: StatusAnswer
122 queryArgumentError = (ReplyStatusError, J.showJSON ConfdErrorArgument)
124 -- | Returns the current time.
125 getCurrentTime :: IO Integer
127 TOD ctime _ <- getClockTime
130 -- * Confd base functionality
132 -- | Returns the HMAC key.
133 getClusterHmac :: IO HashKey
134 getClusterHmac = fmap B.unpack $ B.readFile C.confdHmacKey
136 -- | Computes the node role.
137 nodeRole :: ConfigData -> String -> Result ConfdNodeRole
139 let cmaster = clusterMasterNode . configCluster $ cfg
140 mnode = M.lookup name . configNodes $ cfg
142 Nothing -> Bad "Node not found"
143 Just node | cmaster == name -> Ok NodeRoleMaster
144 | nodeDrained node -> Ok NodeRoleDrained
145 | nodeOffline node -> Ok NodeRoleOffline
146 | nodeMasterCandidate node -> Ok NodeRoleCandidate
147 _ -> Ok NodeRoleRegular
149 -- | Does an instance ip -> instance -> primary node -> primary ip
151 getNodePipByInstanceIp :: ConfigData
156 getNodePipByInstanceIp cfg linkipmap link instip =
157 case M.lookup instip (M.findWithDefault M.empty link linkipmap) of
158 Nothing -> queryUnknownEntry
160 case getInstPrimaryNode cfg instname of
161 Bad _ -> queryUnknownEntry -- either instance or node not found
162 Ok node -> (ReplyStatusOk, J.showJSON (nodePrimaryIp node))
164 -- | Builds the response to a given query.
165 buildResponse :: (ConfigData, LinkIpMap) -> ConfdRequest -> Result StatusAnswer
166 buildResponse (cfg, _) (ConfdRequest { confdRqType = ReqPing }) =
167 return (ReplyStatusOk, J.showJSON (configVersion cfg))
169 buildResponse cdata req@(ConfdRequest { confdRqType = ReqClusterMaster }) =
170 case confdRqQuery req of
171 EmptyQuery -> return (ReplyStatusOk, J.showJSON master_name)
172 PlainQuery _ -> return queryArgumentError
174 mnode <- getNode cfg master_name
175 let fvals =map (\field -> case field of
176 ReqFieldName -> master_name
177 ReqFieldIp -> clusterMasterIp cluster
178 ReqFieldMNodePip -> nodePrimaryIp mnode
179 ) (confdReqQFields reqq)
180 return (ReplyStatusOk, J.showJSON fvals)
181 where master_name = clusterMasterNode cluster
182 cluster = configCluster cfg
185 buildResponse cdata req@(ConfdRequest { confdRqType = ReqNodeRoleByName }) = do
186 node_name <- case confdRqQuery req of
187 PlainQuery str -> return str
188 _ -> fail $ "Invalid query type " ++ show (confdRqQuery req)
189 role <- nodeRole (fst cdata) node_name
190 return (ReplyStatusOk, J.showJSON role)
192 buildResponse cdata (ConfdRequest { confdRqType = ReqNodePipList }) =
193 -- note: we use foldlWithKey because that's present accross more
194 -- versions of the library
195 return (ReplyStatusOk, J.showJSON $
196 M.foldlWithKey (\accu _ n -> nodePrimaryIp n:accu) []
197 (configNodes (fst cdata)))
199 buildResponse cdata (ConfdRequest { confdRqType = ReqMcPipList }) =
200 -- note: we use foldlWithKey because that's present accross more
201 -- versions of the library
202 return (ReplyStatusOk, J.showJSON $
203 M.foldlWithKey (\accu _ n -> if nodeMasterCandidate n
204 then nodePrimaryIp n:accu
206 (configNodes (fst cdata)))
208 buildResponse (cfg, linkipmap)
209 req@(ConfdRequest { confdRqType = ReqInstIpsList }) = do
210 link <- case confdRqQuery req of
211 PlainQuery str -> return str
212 EmptyQuery -> return (getDefaultNicLink cfg)
213 _ -> fail "Invalid query type"
214 return (ReplyStatusOk, J.showJSON $ getInstancesIpByLink linkipmap link)
216 buildResponse cdata (ConfdRequest { confdRqType = ReqNodePipByInstPip
217 , confdRqQuery = DictQuery query}) =
218 let (cfg, linkipmap) = cdata
219 link = maybe (getDefaultNicLink cfg) id (confdReqQLink query)
220 in case confdReqQIp query of
221 Just ip -> return $ getNodePipByInstanceIp cfg linkipmap link ip
222 Nothing -> return (ReplyStatusOk,
224 map (getNodePipByInstanceIp cfg linkipmap link)
225 (confdReqQIpList query))
227 buildResponse _ (ConfdRequest { confdRqType = ReqNodePipByInstPip }) =
228 return queryArgumentError
230 -- | Parses a signed request.
231 parseRequest :: HashKey -> String -> Result (String, String, ConfdRequest)
232 parseRequest key str = do
233 (SignedMessage hmac msg salt) <- fromJResult "parsing request" $ J.decode str
234 req <- if verifyMac key (Just salt) msg hmac
235 then fromJResult "parsing message" $ J.decode msg
236 else Bad "HMAC verification failed"
237 return (salt, msg, req)
239 -- | Creates a ConfdReply from a given answer.
240 serializeResponse :: Result StatusAnswer -> ConfdReply
241 serializeResponse r =
242 let (status, result) = case r of
243 Bad err -> (ReplyStatusError, J.showJSON err)
244 Ok (code, val) -> (code, val)
245 in ConfdReply { confdReplyProtocol = 1
246 , confdReplyStatus = status
247 , confdReplyAnswer = result
248 , confdReplySerial = 0 }
250 -- | Signs a message with a given key and salt.
251 signMessage :: HashKey -> String -> String -> SignedMessage
252 signMessage key salt msg =
253 SignedMessage { signedMsgMsg = msg
254 , signedMsgSalt = salt
255 , signedMsgHmac = hmac
257 where hmac = computeMac key (Just salt) msg
259 -- * Configuration handling
261 -- ** Helper functions
263 -- | Helper function for logging transition into polling mode.
264 moveToPolling :: String -> INotify -> FilePath -> CRef -> MVar ServerState
266 moveToPolling msg inotify path cref mstate = do
267 logInfo $ "Moving to polling mode: " ++ msg
268 let inotiaction = addNotifier inotify path cref mstate
269 _ <- forkIO $ onReloadTimer inotiaction path cref mstate
272 -- | Helper function for logging transition into inotify mode.
273 moveToNotify :: IO ReloadModel
275 logInfo "Moving to inotify mode"
278 -- ** Configuration loading
280 -- | (Re)loads the configuration.
281 updateConfig :: FilePath -> CRef -> IO ()
282 updateConfig path r = do
283 newcfg <- loadConfig path
284 let !newdata = case newcfg of
285 Ok !cfg -> Ok (cfg, buildLinkIpInstnameMap cfg)
286 Bad _ -> Bad "Cannot load configuration"
289 Ok cfg -> logInfo ("Loaded new config, serial " ++
290 show (configSerial cfg))
291 Bad msg -> logError $ "Failed to load config: " ++ msg
294 -- | Wrapper over 'updateConfig' that handles IO errors.
295 safeUpdateConfig :: FilePath -> FStat -> CRef -> IO (FStat, ConfigReload)
296 safeUpdateConfig path oldfstat cref = do
298 nt <- needsReload oldfstat path
300 Nothing -> return (oldfstat, ConfigToDate)
302 updateConfig path cref
303 return (nt', ConfigReloaded)
305 let msg = "Failure during configuration update: " ++
307 writeIORef cref (Bad msg)
308 return (nullFStat, ConfigIOError)
311 -- | Computes the file cache data from a FileStatus structure.
312 buildFileStatus :: FileStatus -> FStat
313 buildFileStatus ofs =
314 let modt = modificationTime ofs
317 in (modt, inum, fsize)
319 -- | Wrapper over 'buildFileStatus'. This reads the data from the
320 -- filesystem and then builds our cache structure.
321 getFStat :: FilePath -> IO FStat
322 getFStat p = getFileStatus p >>= (return . buildFileStatus)
324 -- | Check if the file needs reloading
325 needsReload :: FStat -> FilePath -> IO (Maybe FStat)
326 needsReload oldstat path = do
327 newstat <- getFStat path
328 return $ if newstat /= oldstat
332 -- ** Watcher threads
335 -- We have three threads/functions that can mutate the server state:
337 -- 1. the long-interval watcher ('onTimeoutTimer')
339 -- 2. the polling watcher ('onReloadTimer')
341 -- 3. the inotify event handler ('onInotify')
343 -- All of these will mutate the server state under 'modifyMVar' or
344 -- 'modifyMVar_', so that server transitions are more or less
345 -- atomic. The inotify handler remains active during polling mode, but
346 -- checks for polling mode and doesn't do anything in this case (this
347 -- check is needed even if we would unregister the event handler due
348 -- to how events are serialised).
350 -- | Long-interval reload watcher.
352 -- This is on top of the inotify-based triggered reload.
353 onTimeoutTimer :: IO Bool -> FilePath -> CRef -> MVar ServerState -> IO ()
354 onTimeoutTimer inotiaction path cref state = do
355 threadDelay configReloadTimeout
356 modifyMVar_ state (onTimeoutInner path cref)
358 onTimeoutTimer inotiaction path cref state
360 -- | Inner onTimeout handler.
362 -- This mutates the server state under a modifyMVar_ call. It never
363 -- changes the reload model, just does a safety reload and tried to
364 -- re-establish the inotify watcher.
365 onTimeoutInner :: FilePath -> CRef -> ServerState -> IO ServerState
366 onTimeoutInner path cref state = do
367 (newfstat, _) <- safeUpdateConfig path (reloadFStat state) cref
368 return state { reloadFStat = newfstat }
370 -- | Short-interval (polling) reload watcher.
372 -- This is only active when we're in polling mode; it will
373 -- automatically exit when it detects that the state has changed to
375 onReloadTimer :: IO Bool -> FilePath -> CRef -> MVar ServerState -> IO ()
376 onReloadTimer inotiaction path cref state = do
377 continue <- modifyMVar state (onReloadInner inotiaction path cref)
380 threadDelay configReloadRatelimit
381 onReloadTimer inotiaction path cref state
382 else -- the inotify watch has been re-established, we can exit
385 -- | Inner onReload handler.
387 -- This again mutates the state under a modifyMVar call, and also
388 -- returns whether the thread should continue or not.
389 onReloadInner :: IO Bool -> FilePath -> CRef -> ServerState
390 -> IO (ServerState, Bool)
391 onReloadInner _ _ _ state@(ServerState { reloadModel = ReloadNotify } ) =
392 return (state, False)
393 onReloadInner inotiaction path cref
394 state@(ServerState { reloadModel = ReloadPoll pround } ) = do
395 (newfstat, reload) <- safeUpdateConfig path (reloadFStat state) cref
396 let state' = state { reloadFStat = newfstat }
397 -- compute new poll model based on reload data; however, failure to
398 -- re-establish the inotifier means we stay on polling
399 newmode <- case reload of
401 if pround >= maxIdlePollRounds
402 then do -- try to switch to notify
403 result <- inotiaction
406 else return initialPoll
407 else return (ReloadPoll (pround + 1))
408 _ -> return initialPoll
409 let continue = case newmode of
410 ReloadNotify -> False
412 return (state' { reloadModel = newmode }, continue)
414 -- | Setup inotify watcher.
416 -- This tries to setup the watch descriptor; in case of any IO errors,
417 -- it will return False.
418 addNotifier :: INotify -> FilePath -> CRef -> MVar ServerState -> IO Bool
419 addNotifier inotify path cref mstate = do
420 catch (addWatch inotify [CloseWrite] path
421 (onInotify inotify path cref mstate) >> return True)
422 (\e -> const (return False) (e::IOError))
424 -- | Inotify event handler.
425 onInotify :: INotify -> String -> CRef -> MVar ServerState -> Event -> IO ()
426 onInotify inotify path cref mstate Ignored = do
427 logInfo "File lost, trying to re-establish notifier"
428 modifyMVar_ mstate $ \state -> do
429 result <- addNotifier inotify path cref mstate
430 (newfstat, _) <- safeUpdateConfig path (reloadFStat state) cref
431 let state' = state { reloadFStat = newfstat }
433 then return state' -- keep notify
435 mode <- moveToPolling "cannot re-establish inotify watch" inotify
437 return state' { reloadModel = mode }
439 onInotify inotify path cref mstate _ = do
440 modifyMVar_ mstate $ \state ->
441 if (reloadModel state == ReloadNotify)
443 ctime <- getCurrentTime
444 (newfstat, _) <- safeUpdateConfig path (reloadFStat state) cref
445 let state' = state { reloadFStat = newfstat, reloadTime = ctime }
446 if abs (reloadTime state - ctime) <
447 fromIntegral C.confdConfigReloadRatelimit
449 mode <- moveToPolling "too many reloads" inotify path cref mstate
450 return state' { reloadModel = mode }
454 -- ** Client input/output handlers
456 -- | Main loop for a given client.
457 responder :: CRef -> S.Socket -> HashKey -> String -> S.SockAddr -> IO ()
458 responder cfgref socket hmac msg peer = do
459 ctime <- getCurrentTime
460 case parseMessage hmac msg ctime of
461 Ok (origmsg, rq) -> do
462 logDebug $ "Processing request: " ++ origmsg
463 mcfg <- readIORef cfgref
464 let response = respondInner mcfg hmac rq
465 _ <- S.sendTo socket response peer
467 Bad err -> logInfo $ "Failed to parse incoming message: " ++ err
470 -- | Mesage parsing. This can either result in a good, valid message,
471 -- or fail in the Result monad.
472 parseMessage :: HashKey -> String -> Integer
473 -> Result (String, ConfdRequest)
474 parseMessage hmac msg curtime = do
475 (salt, origmsg, request) <- parseRequest hmac msg
476 ts <- tryRead "Parsing timestamp" salt::Result Integer
477 if (abs (ts - curtime) > (fromIntegral C.confdMaxClockSkew))
478 then fail "Too old/too new timestamp or clock skew"
479 else return (origmsg, request)
481 -- | Inner helper function for a given client. This generates the
482 -- final encoded message (as a string), ready to be sent out to the
484 respondInner :: Result (ConfigData, LinkIpMap) -> HashKey
485 -> ConfdRequest -> String
486 respondInner cfg hmac rq =
487 let rsalt = confdRqRsalt rq
488 innermsg = serializeResponse (cfg >>= flip buildResponse rq)
489 innerserialised = J.encodeStrict innermsg
490 outermsg = signMessage hmac rsalt innerserialised
491 outerserialised = confdMagicFourcc ++ J.encodeStrict outermsg
494 -- | Main listener loop.
495 listener :: S.Socket -> HashKey
496 -> (S.Socket -> HashKey -> String -> S.SockAddr -> IO ())
498 listener s hmac resp = do
499 (msg, _, peer) <- S.recvFrom s 4096
500 if confdMagicFourcc `isPrefixOf` msg
501 then (forkIO $ resp s hmac (drop 4 msg) peer) >> return ()
502 else logDebug "Invalid magic code!" >> return ()
506 main :: DaemonOptions -> IO ()
508 parseresult <- parseAddress opts C.defaultConfdPort
509 (af_family, bindaddr) <- exitIfBad "parsing bind address" parseresult
510 s <- S.socket af_family S.Datagram S.defaultProtocol
511 S.bindSocket s bindaddr
512 cref <- newIORef (Bad "Configuration not yet loaded")
513 statemvar <- newMVar initialState
514 hmac <- getClusterHmac
516 inotify <- initINotify
517 let inotiaction = addNotifier inotify C.clusterConfFile cref statemvar
518 -- fork the timeout timer
519 _ <- forkIO $ onTimeoutTimer inotiaction C.clusterConfFile cref statemvar
520 -- fork the polling timer
521 _ <- forkIO $ onReloadTimer inotiaction C.clusterConfFile cref statemvar
522 -- and finally enter the responder loop
523 forever $ listener s hmac (responder cref)