Revision 36205981
b/daemons/ganeti-masterd | ||
---|---|---|
349 | 349 |
return options, args |
350 | 350 |
|
351 | 351 |
|
352 |
def CheckAgreement(): |
|
353 |
"""Check the agreement on who is the master. |
|
354 |
|
|
355 |
The function uses a very simple algorithm: we must get more positive |
|
356 |
than negative answers. Since in most of the cases we are the master, |
|
357 |
we'll use our own config file for getting the node list. In the |
|
358 |
future we could collect the current node list from our (possibly |
|
359 |
obsolete) known nodes. |
|
360 |
|
|
361 |
""" |
|
362 |
myself = utils.HostInfo().name |
|
363 |
#temp instantiation of a config writer, used only to get the node list |
|
364 |
cfg = config.ConfigWriter() |
|
365 |
node_list = cfg.GetNodeList() |
|
366 |
del cfg |
|
367 |
try: |
|
368 |
node_list.remove(myself) |
|
369 |
except KeyError: |
|
370 |
pass |
|
371 |
if not node_list: |
|
372 |
# either single node cluster, or a misconfiguration, but I won't |
|
373 |
# break any other node, so I can proceed |
|
374 |
return True |
|
375 |
results = rpc.call_master_info(node_list) |
|
376 |
if not isinstance(results, dict): |
|
377 |
# this should not happen (unless internal error in rpc) |
|
378 |
logging.critical("Can't complete rpc call, aborting master startup") |
|
379 |
return False |
|
380 |
positive = negative = 0 |
|
381 |
other_masters = {} |
|
382 |
for node in results: |
|
383 |
if not isinstance(results[node], (tuple, list)) or len(results[node]) < 3: |
|
384 |
logging.warning("Can't contact node %s", node) |
|
385 |
continue |
|
386 |
master_node = results[node][2] |
|
387 |
if master_node == myself: |
|
388 |
positive += 1 |
|
389 |
else: |
|
390 |
negative += 1 |
|
391 |
if not master_node in other_masters: |
|
392 |
other_masters[master_node] = 0 |
|
393 |
other_masters[master_node] += 1 |
|
394 |
if positive <= negative: |
|
395 |
# bad! |
|
396 |
logging.critical("It seems we are not the master (%d votes for," |
|
397 |
" %d votes against)", positive, negative) |
|
398 |
if len(other_masters) > 1: |
|
399 |
logging.critical("The other nodes do not agree on a single master") |
|
400 |
elif other_masters: |
|
401 |
# TODO: resync my files from the master |
|
402 |
logging.critical("It seems the real master is %s", |
|
403 |
other_masters.keys()[0]) |
|
404 |
else: |
|
405 |
logging.critical("Can't contact any node for data, aborting startup") |
|
406 |
return False |
|
407 |
return True |
|
408 |
|
|
409 |
|
|
352 | 410 |
def main(): |
353 | 411 |
"""Main function""" |
354 | 412 |
|
... | ... | |
358 | 416 |
|
359 | 417 |
ssconf.CheckMaster(options.debug) |
360 | 418 |
|
419 |
# we believe we are the master, let's ask the other nodes... |
|
420 |
if not CheckAgreement(): |
|
421 |
return |
|
422 |
|
|
361 | 423 |
master = IOServer(constants.MASTER_SOCKET, ClientRqHandler) |
362 | 424 |
|
363 | 425 |
# become a daemon |
Also available in: Unified diff