Revision 36205981 daemons/ganeti-masterd

b/daemons/ganeti-masterd
349 349
  return options, args
350 350

  
351 351

  
352
def CheckAgreement():
353
  """Check the agreement on who is the master.
354

  
355
  The function uses a very simple algorithm: we must get more positive
356
  than negative answers. Since in most of the cases we are the master,
357
  we'll use our own config file for getting the node list. In the
358
  future we could collect the current node list from our (possibly
359
  obsolete) known nodes.
360

  
361
  """
362
  myself = utils.HostInfo().name
363
  #temp instantiation of a config writer, used only to get the node list
364
  cfg = config.ConfigWriter()
365
  node_list = cfg.GetNodeList()
366
  del cfg
367
  try:
368
    node_list.remove(myself)
369
  except KeyError:
370
    pass
371
  if not node_list:
372
    # either single node cluster, or a misconfiguration, but I won't
373
    # break any other node, so I can proceed
374
    return True
375
  results = rpc.call_master_info(node_list)
376
  if not isinstance(results, dict):
377
    # this should not happen (unless internal error in rpc)
378
    logging.critical("Can't complete rpc call, aborting master startup")
379
    return False
380
  positive = negative = 0
381
  other_masters = {}
382
  for node in results:
383
    if not isinstance(results[node], (tuple, list)) or len(results[node]) < 3:
384
      logging.warning("Can't contact node %s", node)
385
      continue
386
    master_node = results[node][2]
387
    if master_node == myself:
388
      positive += 1
389
    else:
390
      negative += 1
391
      if not master_node in other_masters:
392
        other_masters[master_node] = 0
393
      other_masters[master_node] += 1
394
  if positive <= negative:
395
    # bad!
396
    logging.critical("It seems we are not the master (%d votes for,"
397
                     " %d votes against)", positive, negative)
398
    if len(other_masters) > 1:
399
      logging.critical("The other nodes do not agree on a single master")
400
    elif other_masters:
401
      # TODO: resync my files from the master
402
      logging.critical("It seems the real master is %s",
403
                       other_masters.keys()[0])
404
    else:
405
      logging.critical("Can't contact any node for data, aborting startup")
406
    return False
407
  return True
408

  
409

  
352 410
def main():
353 411
  """Main function"""
354 412

  
......
358 416

  
359 417
  ssconf.CheckMaster(options.debug)
360 418

  
419
  # we believe we are the master, let's ask the other nodes...
420
  if not CheckAgreement():
421
    return
422

  
361 423
  master = IOServer(constants.MASTER_SOCKET, ClientRqHandler)
362 424

  
363 425
  # become a daemon

Also available in: Unified diff