root / src / Ganeti / Query / Server.hs @ 521136df
History | View | Annotate | Download (22.4 kB)
1 |
{-| Implementation of the Ganeti Query2 server. |
---|---|
2 |
|
3 |
-} |
4 |
|
5 |
{- |
6 |
|
7 |
Copyright (C) 2012, 2013, 2014 Google Inc. |
8 |
|
9 |
This program is free software; you can redistribute it and/or modify |
10 |
it under the terms of the GNU General Public License as published by |
11 |
the Free Software Foundation; either version 2 of the License, or |
12 |
(at your option) any later version. |
13 |
|
14 |
This program is distributed in the hope that it will be useful, but |
15 |
WITHOUT ANY WARRANTY; without even the implied warranty of |
16 |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
17 |
General Public License for more details. |
18 |
|
19 |
You should have received a copy of the GNU General Public License |
20 |
along with this program; if not, write to the Free Software |
21 |
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA |
22 |
02110-1301, USA. |
23 |
|
24 |
-} |
25 |
|
26 |
module Ganeti.Query.Server |
27 |
( main |
28 |
, checkMain |
29 |
, prepMain |
30 |
) where |
31 |
|
32 |
import Control.Applicative |
33 |
import Control.Concurrent |
34 |
import Control.Exception |
35 |
import Control.Monad (forever, when, mzero, guard, zipWithM, liftM, void, |
36 |
unless) |
37 |
import Control.Monad.IO.Class |
38 |
import Control.Monad.Trans (lift) |
39 |
import Control.Monad.Trans.Maybe |
40 |
import Data.Bits (bitSize) |
41 |
import Data.Either (rights) |
42 |
import qualified Data.Foldable as F |
43 |
import qualified Data.Set as Set (toList) |
44 |
import Data.IORef |
45 |
import Data.List (partition) |
46 |
import Data.Maybe (fromMaybe) |
47 |
import qualified Text.JSON as J |
48 |
import Text.JSON (encode, showJSON, JSValue(..)) |
49 |
import System.Info (arch) |
50 |
import System.Directory |
51 |
import System.Exit (ExitCode(..)) |
52 |
import System.Posix.Signals as P |
53 |
|
54 |
import qualified Ganeti.Constants as C |
55 |
import qualified Ganeti.ConstantUtils as ConstantUtils (unFrozenSet) |
56 |
import Ganeti.Errors |
57 |
import qualified Ganeti.Path as Path |
58 |
import Ganeti.Daemon |
59 |
import Ganeti.Objects |
60 |
import qualified Ganeti.Config as Config |
61 |
import Ganeti.ConfigReader |
62 |
import Ganeti.BasicTypes |
63 |
import Ganeti.JQueue |
64 |
import Ganeti.JQScheduler |
65 |
import Ganeti.JSON (TimeAsDoubleJSON(..)) |
66 |
import Ganeti.Logging |
67 |
import Ganeti.Luxi |
68 |
import qualified Ganeti.Query.Language as Qlang |
69 |
import qualified Ganeti.Query.Cluster as QCluster |
70 |
import Ganeti.Path ( queueDir, jobQueueLockFile, jobQueueDrainFile ) |
71 |
import Ganeti.Rpc |
72 |
import qualified Ganeti.Query.Exec as Exec |
73 |
import Ganeti.Query.Query |
74 |
import Ganeti.Query.Filter (makeSimpleFilter) |
75 |
import Ganeti.Types |
76 |
import qualified Ganeti.UDSServer as U (Handler(..), listener) |
77 |
import Ganeti.Utils ( lockFile, exitIfBad, exitUnless, watchFile |
78 |
, safeRenameFile ) |
79 |
import Ganeti.Utils.MVarLock |
80 |
import qualified Ganeti.Version as Version |
81 |
|
82 |
-- | Helper for classic queries. |
83 |
handleClassicQuery :: ConfigData -- ^ Cluster config |
84 |
-> Qlang.ItemType -- ^ Query type |
85 |
-> [Either String Integer] -- ^ Requested names |
86 |
-- (empty means all) |
87 |
-> [String] -- ^ Requested fields |
88 |
-> Bool -- ^ Whether to do sync queries or not |
89 |
-> IO (GenericResult GanetiException JSValue) |
90 |
handleClassicQuery _ _ _ _ True = |
91 |
return . Bad $ OpPrereqError "Sync queries are not allowed" ECodeInval |
92 |
handleClassicQuery cfg qkind names fields _ = do |
93 |
let simpleNameFilter field = makeSimpleFilter (field qkind) names |
94 |
flt = Qlang.OrFilter $ map simpleNameFilter [nameField, uuidField] |
95 |
qr <- query cfg True (Qlang.Query qkind fields flt) |
96 |
return $ showJSON <$> (qr >>= queryCompat) |
97 |
|
98 |
-- | Minimal wrapper to handle the missing config case. |
99 |
handleCallWrapper :: Lock -> JQStatus -> Result ConfigData |
100 |
-> LuxiOp -> IO (ErrorResult JSValue) |
101 |
handleCallWrapper _ _ (Bad msg) _ = |
102 |
return . Bad . ConfigurationError $ |
103 |
"I do not have access to a valid configuration, cannot\ |
104 |
\ process queries: " ++ msg |
105 |
handleCallWrapper qlock qstat (Ok config) op = handleCall qlock qstat config op |
106 |
|
107 |
-- | Actual luxi operation handler. |
108 |
handleCall :: Lock -> JQStatus |
109 |
-> ConfigData -> LuxiOp -> IO (ErrorResult JSValue) |
110 |
handleCall _ _ cdata QueryClusterInfo = |
111 |
let cluster = configCluster cdata |
112 |
master = QCluster.clusterMasterNodeName cdata |
113 |
hypervisors = clusterEnabledHypervisors cluster |
114 |
diskTemplates = clusterEnabledDiskTemplates cluster |
115 |
def_hv = case hypervisors of |
116 |
x:_ -> showJSON x |
117 |
[] -> JSNull |
118 |
bits = show (bitSize (0::Int)) ++ "bits" |
119 |
arch_tuple = [bits, arch] |
120 |
obj = [ ("software_version", showJSON C.releaseVersion) |
121 |
, ("protocol_version", showJSON C.protocolVersion) |
122 |
, ("config_version", showJSON C.configVersion) |
123 |
, ("os_api_version", showJSON . maximum . |
124 |
Set.toList . ConstantUtils.unFrozenSet $ |
125 |
C.osApiVersions) |
126 |
, ("export_version", showJSON C.exportVersion) |
127 |
, ("vcs_version", showJSON Version.version) |
128 |
, ("architecture", showJSON arch_tuple) |
129 |
, ("name", showJSON $ clusterClusterName cluster) |
130 |
, ("master", showJSON (case master of |
131 |
Ok name -> name |
132 |
_ -> undefined)) |
133 |
, ("default_hypervisor", def_hv) |
134 |
, ("enabled_hypervisors", showJSON hypervisors) |
135 |
, ("hvparams", showJSON $ clusterHvparams cluster) |
136 |
, ("os_hvp", showJSON $ clusterOsHvp cluster) |
137 |
, ("beparams", showJSON $ clusterBeparams cluster) |
138 |
, ("osparams", showJSON $ clusterOsparams cluster) |
139 |
, ("ipolicy", showJSON $ clusterIpolicy cluster) |
140 |
, ("nicparams", showJSON $ clusterNicparams cluster) |
141 |
, ("ndparams", showJSON $ clusterNdparams cluster) |
142 |
, ("diskparams", showJSON $ clusterDiskparams cluster) |
143 |
, ("candidate_pool_size", |
144 |
showJSON $ clusterCandidatePoolSize cluster) |
145 |
, ("max_running_jobs", |
146 |
showJSON $ clusterMaxRunningJobs cluster) |
147 |
, ("mac_prefix", showJSON $ clusterMacPrefix cluster) |
148 |
, ("master_netdev", showJSON $ clusterMasterNetdev cluster) |
149 |
, ("master_netmask", showJSON $ clusterMasterNetmask cluster) |
150 |
, ("use_external_mip_script", |
151 |
showJSON $ clusterUseExternalMipScript cluster) |
152 |
, ("volume_group_name", |
153 |
maybe JSNull showJSON (clusterVolumeGroupName cluster)) |
154 |
, ("drbd_usermode_helper", |
155 |
maybe JSNull showJSON (clusterDrbdUsermodeHelper cluster)) |
156 |
, ("file_storage_dir", showJSON $ clusterFileStorageDir cluster) |
157 |
, ("shared_file_storage_dir", |
158 |
showJSON $ clusterSharedFileStorageDir cluster) |
159 |
, ("gluster_storage_dir", |
160 |
showJSON $ clusterGlusterStorageDir cluster) |
161 |
, ("maintain_node_health", |
162 |
showJSON $ clusterMaintainNodeHealth cluster) |
163 |
, ("ctime", showJSON . TimeAsDoubleJSON $ clusterCtime cluster) |
164 |
, ("mtime", showJSON . TimeAsDoubleJSON $ clusterMtime cluster) |
165 |
, ("uuid", showJSON $ clusterUuid cluster) |
166 |
, ("tags", showJSON $ clusterTags cluster) |
167 |
, ("uid_pool", showJSON $ clusterUidPool cluster) |
168 |
, ("default_iallocator", |
169 |
showJSON $ clusterDefaultIallocator cluster) |
170 |
, ("default_iallocator_params", |
171 |
showJSON $ clusterDefaultIallocatorParams cluster) |
172 |
, ("reserved_lvs", showJSON $ clusterReservedLvs cluster) |
173 |
, ("primary_ip_version", |
174 |
showJSON . ipFamilyToVersion $ clusterPrimaryIpFamily cluster) |
175 |
, ("prealloc_wipe_disks", |
176 |
showJSON $ clusterPreallocWipeDisks cluster) |
177 |
, ("hidden_os", showJSON $ clusterHiddenOs cluster) |
178 |
, ("blacklisted_os", showJSON $ clusterBlacklistedOs cluster) |
179 |
, ("enabled_disk_templates", showJSON diskTemplates) |
180 |
, ("install_image", showJSON $ clusterInstallImage cluster) |
181 |
, ("instance_communication_network", |
182 |
showJSON (clusterInstanceCommunicationNetwork cluster)) |
183 |
, ("zeroing_image", showJSON $ clusterZeroingImage cluster) |
184 |
, ("compression_tools", |
185 |
showJSON $ clusterCompressionTools cluster) |
186 |
] |
187 |
|
188 |
in case master of |
189 |
Ok _ -> return . Ok . J.makeObj $ obj |
190 |
Bad ex -> return $ Bad ex |
191 |
|
192 |
handleCall _ _ cfg (QueryTags kind name) = do |
193 |
let tags = case kind of |
194 |
TagKindCluster -> Ok . clusterTags $ configCluster cfg |
195 |
TagKindGroup -> groupTags <$> Config.getGroup cfg name |
196 |
TagKindNode -> nodeTags <$> Config.getNode cfg name |
197 |
TagKindInstance -> instTags <$> Config.getInstance cfg name |
198 |
TagKindNetwork -> networkTags <$> Config.getNetwork cfg name |
199 |
return (J.showJSON <$> tags) |
200 |
|
201 |
handleCall _ _ cfg (Query qkind qfields qfilter) = do |
202 |
result <- query cfg True (Qlang.Query qkind qfields qfilter) |
203 |
return $ J.showJSON <$> result |
204 |
|
205 |
handleCall _ _ _ (QueryFields qkind qfields) = do |
206 |
let result = queryFields (Qlang.QueryFields qkind qfields) |
207 |
return $ J.showJSON <$> result |
208 |
|
209 |
handleCall _ _ cfg (QueryNodes names fields lock) = |
210 |
handleClassicQuery cfg (Qlang.ItemTypeOpCode Qlang.QRNode) |
211 |
(map Left names) fields lock |
212 |
|
213 |
handleCall _ _ cfg (QueryInstances names fields lock) = |
214 |
handleClassicQuery cfg (Qlang.ItemTypeOpCode Qlang.QRInstance) |
215 |
(map Left names) fields lock |
216 |
|
217 |
handleCall _ _ cfg (QueryGroups names fields lock) = |
218 |
handleClassicQuery cfg (Qlang.ItemTypeOpCode Qlang.QRGroup) |
219 |
(map Left names) fields lock |
220 |
|
221 |
handleCall _ _ cfg (QueryJobs names fields) = |
222 |
handleClassicQuery cfg (Qlang.ItemTypeLuxi Qlang.QRJob) |
223 |
(map (Right . fromIntegral . fromJobId) names) fields False |
224 |
|
225 |
handleCall _ _ cfg (QueryNetworks names fields lock) = |
226 |
handleClassicQuery cfg (Qlang.ItemTypeOpCode Qlang.QRNetwork) |
227 |
(map Left names) fields lock |
228 |
|
229 |
handleCall _ _ cfg (QueryConfigValues fields) = do |
230 |
let params = [ ("cluster_name", return . showJSON . clusterClusterName |
231 |
. configCluster $ cfg) |
232 |
, ("watcher_pause", liftM (maybe JSNull showJSON) |
233 |
QCluster.isWatcherPaused) |
234 |
, ("master_node", return . genericResult (const JSNull) showJSON |
235 |
$ QCluster.clusterMasterNodeName cfg) |
236 |
, ("drain_flag", liftM (showJSON . not) isQueueOpen) |
237 |
] :: [(String, IO JSValue)] |
238 |
let answer = map (fromMaybe (return JSNull) . flip lookup params) fields |
239 |
answerEval <- sequence answer |
240 |
return . Ok . showJSON $ answerEval |
241 |
|
242 |
handleCall _ _ cfg (QueryExports nodes lock) = |
243 |
handleClassicQuery cfg (Qlang.ItemTypeOpCode Qlang.QRExport) |
244 |
(map Left nodes) ["node", "export"] lock |
245 |
|
246 |
handleCall qlock qstat cfg (SubmitJobToDrainedQueue ops) = runResultT $ do |
247 |
jid <- mkResultT $ allocateJobId (Config.getMasterCandidates cfg) qlock |
248 |
ts <- liftIO currentTimestamp |
249 |
job <- liftM (extendJobReasonTrail . setReceivedTimestamp ts) |
250 |
$ queuedJobFromOpCodes jid ops |
251 |
qDir <- liftIO queueDir |
252 |
_ <- writeAndReplicateJob cfg qDir job |
253 |
_ <- liftIO . forkIO $ enqueueNewJobs qstat [job] |
254 |
return . showJSON . fromJobId $ jid |
255 |
|
256 |
handleCall qlock qstat cfg (SubmitJob ops) = |
257 |
do |
258 |
open <- isQueueOpen |
259 |
if not open |
260 |
then return . Bad . GenericError $ "Queue drained" |
261 |
else handleCall qlock qstat cfg (SubmitJobToDrainedQueue ops) |
262 |
|
263 |
handleCall qlock qstat cfg (SubmitManyJobs lops) = |
264 |
do |
265 |
open <- isQueueOpen |
266 |
if not open |
267 |
then return . Bad . GenericError $ "Queue drained" |
268 |
else do |
269 |
let mcs = Config.getMasterCandidates cfg |
270 |
result_jobids <- allocateJobIds mcs qlock (length lops) |
271 |
case result_jobids of |
272 |
Bad s -> return . Bad . GenericError $ s |
273 |
Ok jids -> do |
274 |
ts <- currentTimestamp |
275 |
jobs <- liftM (map $ extendJobReasonTrail . setReceivedTimestamp ts) |
276 |
$ zipWithM queuedJobFromOpCodes jids lops |
277 |
qDir <- queueDir |
278 |
write_results <- mapM (writeJobToDisk qDir) jobs |
279 |
let annotated_results = zip write_results jobs |
280 |
succeeded = map snd $ filter (isOk . fst) annotated_results |
281 |
when (any isBad write_results) . logWarning |
282 |
$ "Writing some jobs failed " ++ show annotated_results |
283 |
replicateManyJobs qDir mcs succeeded |
284 |
_ <- forkIO $ enqueueNewJobs qstat succeeded |
285 |
return . Ok . JSArray |
286 |
. map (\(res, job) -> |
287 |
if isOk res |
288 |
then showJSON (True, fromJobId $ qjId job) |
289 |
else showJSON (False, genericResult id (const "") res)) |
290 |
$ annotated_results |
291 |
|
292 |
handleCall _ _ cfg (WaitForJobChange jid fields prev_job prev_log tmout) = do |
293 |
let compute_fn = computeJobUpdate cfg jid fields prev_log |
294 |
qDir <- queueDir |
295 |
-- verify if the job is finalized, and return immediately in this case |
296 |
jobresult <- loadJobFromDisk qDir False jid |
297 |
case jobresult of |
298 |
Bad s -> return . Bad $ JobLost s |
299 |
Ok (job, _) | not (jobFinalized job) -> do |
300 |
let jobfile = liveJobFile qDir jid |
301 |
answer <- watchFile jobfile (min tmout C.luxiWfjcTimeout) |
302 |
(prev_job, JSArray []) compute_fn |
303 |
return . Ok $ showJSON answer |
304 |
_ -> liftM (Ok . showJSON) compute_fn |
305 |
|
306 |
handleCall _ _ cfg (SetWatcherPause time) = do |
307 |
let mcs = Config.getMasterOrCandidates cfg |
308 |
_ <- executeRpcCall mcs $ RpcCallSetWatcherPause time |
309 |
return . Ok . maybe JSNull showJSON $ fmap TimeAsDoubleJSON time |
310 |
|
311 |
handleCall _ _ cfg (SetDrainFlag value) = do |
312 |
let mcs = Config.getMasterCandidates cfg |
313 |
fpath <- jobQueueDrainFile |
314 |
if value |
315 |
then writeFile fpath "" |
316 |
else removeFile fpath |
317 |
_ <- executeRpcCall mcs $ RpcCallSetDrainFlag value |
318 |
return . Ok . showJSON $ True |
319 |
|
320 |
handleCall _ qstat cfg (ChangeJobPriority jid prio) = do |
321 |
maybeJob <- setJobPriority qstat jid prio |
322 |
case maybeJob of |
323 |
Bad s -> return . Ok $ showJSON (False, s) |
324 |
Ok (Just job) -> runResultT $ do |
325 |
let mcs = Config.getMasterCandidates cfg |
326 |
qDir <- liftIO queueDir |
327 |
liftIO $ replicateManyJobs qDir mcs [job] |
328 |
return $ showJSON (True, "Priorities of pending opcodes for job " |
329 |
++ show (fromJobId jid) ++ " have been changed" |
330 |
++ " to " ++ show prio) |
331 |
Ok Nothing -> |
332 |
-- Job has already started; so we have to forward the request |
333 |
-- to the job. |
334 |
runResultT . return $ showJSON (False, "Job already forked off") |
335 |
|
336 |
handleCall _ qstat cfg (CancelJob jid) = do |
337 |
let jName = (++) "job " . show $ fromJobId jid |
338 |
dequeueResult <- dequeueJob qstat jid |
339 |
case dequeueResult of |
340 |
Ok True -> |
341 |
let jobFileFailed = (,) False |
342 |
. (++) ("Dequeued " ++ jName |
343 |
++ ", but failed to mark as cancelled: ") |
344 |
jobFileSucceeded _ = (True, "Dequeued " ++ jName) |
345 |
in liftM (Ok . showJSON . genericResult jobFileFailed jobFileSucceeded) |
346 |
. runResultT $ do |
347 |
logDebug $ jName ++ " dequeued, marking as canceled" |
348 |
qDir <- liftIO queueDir |
349 |
(job, _) <- ResultT $ loadJobFromDisk qDir True jid |
350 |
now <- liftIO currentTimestamp |
351 |
let job' = cancelQueuedJob now job |
352 |
writeAndReplicateJob cfg qDir job' |
353 |
Ok False -> do |
354 |
logDebug $ jName ++ " not queued; trying to cancel directly" |
355 |
fmap showJSON <$> cancelJob (jqLivelock qstat) jid |
356 |
Bad s -> return . Ok . showJSON $ (False, s) |
357 |
|
358 |
handleCall qlock _ cfg (ArchiveJob jid) = |
359 |
-- By adding a layer of MaybeT, we can prematurely end a computation |
360 |
-- using 'mzero' or other 'MonadPlus' primitive and return 'Ok False'. |
361 |
runResultT . liftM (showJSON . fromMaybe False) . runMaybeT $ do |
362 |
qDir <- liftIO queueDir |
363 |
let mcs = Config.getMasterCandidates cfg |
364 |
live = liveJobFile qDir jid |
365 |
archive = archivedJobFile qDir jid |
366 |
withLock qlock $ do |
367 |
(job, _) <- (lift . mkResultT $ loadJobFromDisk qDir False jid) |
368 |
`orElse` mzero |
369 |
guard $ jobFinalized job |
370 |
lift . withErrorT JobQueueError |
371 |
. annotateError "Archiving failed in an unexpected way" |
372 |
. mkResultT $ safeRenameFile queueDirPermissions live archive |
373 |
_ <- liftIO . executeRpcCall mcs |
374 |
$ RpcCallJobqueueRename [(live, archive)] |
375 |
return True |
376 |
|
377 |
handleCall qlock _ cfg (AutoArchiveJobs age timeout) = do |
378 |
qDir <- queueDir |
379 |
resultJids <- getJobIDs [qDir] |
380 |
case resultJids of |
381 |
Bad s -> return . Bad . JobQueueError $ show s |
382 |
Ok jids -> do |
383 |
result <- withLock qlock |
384 |
. archiveJobs cfg age timeout |
385 |
$ sortJobIDs jids |
386 |
return . Ok $ showJSON result |
387 |
|
388 |
handleCall _ _ _ (PickupJob _) = |
389 |
return . Bad |
390 |
$ GenericError "Luxi call 'PickupJob' is for internal use only" |
391 |
|
392 |
{-# ANN handleCall "HLint: ignore Too strict if" #-} |
393 |
|
394 |
-- | Query the status of a job and return the requested fields |
395 |
-- and the logs newer than the given log number. |
396 |
computeJobUpdate :: ConfigData -> JobId -> [String] -> JSValue |
397 |
-> IO (JSValue, JSValue) |
398 |
computeJobUpdate cfg jid fields prev_log = do |
399 |
let sjid = show $ fromJobId jid |
400 |
logDebug $ "Inspecting fields " ++ show fields ++ " of job " ++ sjid |
401 |
let fromJSArray (JSArray xs) = xs |
402 |
fromJSArray _ = [] |
403 |
let logFilter JSNull (JSArray _) = True |
404 |
logFilter (JSRational _ n) (JSArray (JSRational _ m:_)) = n < m |
405 |
logFilter _ _ = False |
406 |
let filterLogs n logs = JSArray (filter (logFilter n) (logs >>= fromJSArray)) |
407 |
jobQuery <- handleClassicQuery cfg (Qlang.ItemTypeLuxi Qlang.QRJob) |
408 |
[Right . fromIntegral $ fromJobId jid] ("oplog" : fields) False |
409 |
let (rfields, rlogs) = case jobQuery of |
410 |
Ok (JSArray [JSArray (JSArray logs : answer)]) -> |
411 |
(answer, filterLogs prev_log logs) |
412 |
_ -> (map (const JSNull) fields, JSArray []) |
413 |
logDebug $ "Updates for job " ++ sjid ++ " are " ++ encode (rfields, rlogs) |
414 |
return (JSArray rfields, rlogs) |
415 |
|
416 |
|
417 |
type LuxiConfig = (Lock, JQStatus, ConfigReader) |
418 |
|
419 |
luxiExec |
420 |
:: LuxiConfig |
421 |
-> LuxiOp |
422 |
-> IO (Bool, GenericResult GanetiException JSValue) |
423 |
luxiExec (qlock, qstat, creader) args = do |
424 |
cfg <- creader |
425 |
result <- handleCallWrapper qlock qstat cfg args |
426 |
return (True, result) |
427 |
|
428 |
luxiHandler :: LuxiConfig -> U.Handler LuxiOp IO JSValue |
429 |
luxiHandler cfg = U.Handler { U.hParse = decodeLuxiCall |
430 |
, U.hInputLogShort = strOfOp |
431 |
, U.hInputLogLong = show |
432 |
, U.hExec = luxiExec cfg |
433 |
} |
434 |
|
435 |
-- | Type alias for prepMain results |
436 |
type PrepResult = (Server, IORef (Result ConfigData), JQStatus) |
437 |
|
438 |
-- | Activate the master IP address. |
439 |
activateMasterIP :: IO (Result ()) |
440 |
activateMasterIP = runResultT $ do |
441 |
liftIO $ logDebug "Activating master IP address" |
442 |
conf_file <- liftIO Path.clusterConfFile |
443 |
config <- mkResultT $ Config.loadConfig conf_file |
444 |
let mnp = Config.getMasterNetworkParameters config |
445 |
masters = Config.getMasterNodes config |
446 |
ems = clusterUseExternalMipScript $ configCluster config |
447 |
liftIO . logDebug $ "Master IP params: " ++ show mnp |
448 |
res <- liftIO . executeRpcCall masters $ RpcCallNodeActivateMasterIp mnp ems |
449 |
_ <- liftIO $ logRpcErrors res |
450 |
liftIO $ logDebug "finished activating master IP address" |
451 |
return () |
452 |
|
453 |
-- | Gather votes from all nodes and verify that we we are |
454 |
-- the master. Return True if the voting is won, False if |
455 |
-- not enough |
456 |
verifyMasterVotes :: IO (Result Bool) |
457 |
verifyMasterVotes = runResultT $ do |
458 |
liftIO $ logDebug "Gathering votes for the master node" |
459 |
myName <- liftIO getFQDN |
460 |
liftIO . logDebug $ "My hostname is " ++ myName |
461 |
conf_file <- liftIO Path.clusterConfFile |
462 |
config <- mkResultT $ Config.loadConfig conf_file |
463 |
let nodes = F.toList $ configNodes config |
464 |
votes <- liftIO . executeRpcCall nodes $ RpcCallMasterNodeName |
465 |
let (missing, valid) = partition (isLeft . snd) votes |
466 |
noDataNodes = map (nodeName . fst) missing |
467 |
validVotes = map rpcResultMasterNodeNameMaster . rights $ map snd valid |
468 |
inFavor = length $ filter (== myName) validVotes |
469 |
voters = length nodes |
470 |
unknown = length missing |
471 |
liftIO . unless (null noDataNodes) . logWarning |
472 |
. (++) "No voting RPC result from " $ show noDataNodes |
473 |
liftIO . logDebug . (++) "Valid votes: " $ show validVotes |
474 |
if 2 * inFavor > voters |
475 |
then return True |
476 |
else if 2 * (inFavor + unknown) > voters |
477 |
then return False |
478 |
else fail $ "Voting cannot be won by " ++ myName |
479 |
++ ", valid votes of " ++ show voters |
480 |
++ " are " ++ show validVotes |
481 |
|
482 |
-- | Verify, by voting, that this node is the master. Bad if we're not. |
483 |
-- Allow the given number of retries to wait for not available nodes. |
484 |
verifyMaster :: Int -> IO (Result ()) |
485 |
verifyMaster retries = runResultT $ do |
486 |
won <- mkResultT verifyMasterVotes |
487 |
unless won $ |
488 |
if retries <= 0 |
489 |
then fail "Couldn't gather voting results of enough nodes" |
490 |
else do |
491 |
liftIO $ logDebug "Voting not final due to missing votes." |
492 |
liftIO . threadDelay $ C.masterVotingRetryIntervall * 1000000 |
493 |
mkResultT $ verifyMaster (retries - 1) |
494 |
|
495 |
-- | Check function for luxid. |
496 |
checkMain :: CheckFn () |
497 |
checkMain opts = |
498 |
if optNoVoting opts |
499 |
then if optYesDoIt opts |
500 |
then return $ Right () |
501 |
else do |
502 |
logError "The no-voting option is dangerous and cannot be\ |
503 |
\ given without providing yes-do-it as well." |
504 |
return . Left $ ExitFailure C.exitFailure |
505 |
else do |
506 |
masterStatus <- verifyMaster C.masterVotingRetries |
507 |
case masterStatus of |
508 |
Bad s -> do |
509 |
logError $ "Failed to verify master status: " ++ s |
510 |
return . Left $ ExitFailure C.exitFailure |
511 |
Ok _ -> return $ Right () |
512 |
|
513 |
-- | Prepare function for luxid. |
514 |
prepMain :: PrepFn () PrepResult |
515 |
prepMain _ _ = do |
516 |
Exec.isForkSupported |
517 |
>>= flip exitUnless "The daemon must be compiled without -threaded" |
518 |
|
519 |
socket_path <- Path.defaultQuerySocket |
520 |
cleanupSocket socket_path |
521 |
s <- describeError "binding to the Luxi socket" |
522 |
Nothing (Just socket_path) $ getLuxiServer True socket_path |
523 |
cref <- newIORef (Bad "Configuration not yet loaded") |
524 |
jq <- emptyJQStatus cref |
525 |
return (s, cref, jq) |
526 |
|
527 |
-- | Main function. |
528 |
main :: MainFn () PrepResult |
529 |
main _ _ (server, cref, jq) = do |
530 |
initConfigReader id cref |
531 |
let creader = readIORef cref |
532 |
|
533 |
qlockFile <- jobQueueLockFile |
534 |
_ <- lockFile qlockFile >>= exitIfBad "Failed to obtain the job-queue lock" |
535 |
qlock <- newLock |
536 |
|
537 |
_ <- P.installHandler P.sigCHLD P.Ignore Nothing |
538 |
|
539 |
_ <- forkIO . void $ activateMasterIP |
540 |
|
541 |
initJQScheduler jq |
542 |
|
543 |
finally |
544 |
(forever $ U.listener (luxiHandler (qlock, jq, creader)) server) |
545 |
(closeServer server) |