Statistics
| Branch: | Tag: | Revision:

root / lib / bootstrap.py @ 3953242f

History | View | Annotate | Download (22.5 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007, 2008 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Functions to bootstrap a new cluster.
23

24
"""
25

    
26
import os
27
import os.path
28
import re
29
import logging
30
import tempfile
31
import time
32

    
33
from ganeti import rpc
34
from ganeti import ssh
35
from ganeti import utils
36
from ganeti import errors
37
from ganeti import config
38
from ganeti import constants
39
from ganeti import objects
40
from ganeti import ssconf
41
from ganeti import serializer
42
from ganeti import hypervisor
43

    
44

    
45
def _InitSSHSetup():
46
  """Setup the SSH configuration for the cluster.
47

48
  This generates a dsa keypair for root, adds the pub key to the
49
  permitted hosts and adds the hostkey to its own known hosts.
50

51
  """
52
  priv_key, pub_key, auth_keys = ssh.GetUserFiles(constants.GANETI_RUNAS)
53

    
54
  for name in priv_key, pub_key:
55
    if os.path.exists(name):
56
      utils.CreateBackup(name)
57
    utils.RemoveFile(name)
58

    
59
  result = utils.RunCmd(["ssh-keygen", "-t", "dsa",
60
                         "-f", priv_key,
61
                         "-q", "-N", ""])
62
  if result.failed:
63
    raise errors.OpExecError("Could not generate ssh keypair, error %s" %
64
                             result.output)
65

    
66
  utils.AddAuthorizedKey(auth_keys, utils.ReadFile(pub_key))
67

    
68

    
69
def GenerateSelfSignedSslCert(file_name, validity=(365 * 5)):
70
  """Generates a self-signed SSL certificate.
71

72
  @type file_name: str
73
  @param file_name: Path to output file
74
  @type validity: int
75
  @param validity: Validity for certificate in days
76

77
  """
78
  (fd, tmp_file_name) = tempfile.mkstemp(dir=os.path.dirname(file_name))
79
  try:
80
    try:
81
      # Set permissions before writing key
82
      os.chmod(tmp_file_name, 0600)
83

    
84
      result = utils.RunCmd(["openssl", "req", "-new", "-newkey", "rsa:1024",
85
                             "-days", str(validity), "-nodes", "-x509",
86
                             "-keyout", tmp_file_name, "-out", tmp_file_name,
87
                             "-batch"])
88
      if result.failed:
89
        raise errors.OpExecError("Could not generate SSL certificate, command"
90
                                 " %s had exitcode %s and error message %s" %
91
                                 (result.cmd, result.exit_code, result.output))
92

    
93
      # Make read-only
94
      os.chmod(tmp_file_name, 0400)
95

    
96
      os.rename(tmp_file_name, file_name)
97
    finally:
98
      utils.RemoveFile(tmp_file_name)
99
  finally:
100
    os.close(fd)
101

    
102

    
103
def GenerateHmacKey(file_name):
104
  """Writes a new HMAC key.
105

106
  @type file_name: str
107
  @param file_name: Path to output file
108

109
  """
110
  utils.WriteFile(file_name, data="%s\n" % utils.GenerateSecret(), mode=0400,
111
                  backup=True)
112

    
113

    
114
def GenerateClusterCrypto(new_cluster_cert, new_rapi_cert, new_confd_hmac_key,
115
                          rapi_cert_pem=None):
116
  """Updates the cluster certificates, keys and secrets.
117

118
  @type new_cluster_cert: bool
119
  @param new_cluster_cert: Whether to generate a new cluster certificate
120
  @type new_rapi_cert: bool
121
  @param new_rapi_cert: Whether to generate a new RAPI certificate
122
  @type new_confd_hmac_key: bool
123
  @param new_confd_hmac_key: Whether to generate a new HMAC key
124
  @type rapi_cert_pem: string
125
  @param rapi_cert_pem: New RAPI certificate in PEM format
126

127
  """
128
  # noded SSL certificate
129
  cluster_cert_exists = os.path.exists(constants.NODED_CERT_FILE)
130
  if new_cluster_cert or not cluster_cert_exists:
131
    if cluster_cert_exists:
132
      utils.CreateBackup(constants.NODED_CERT_FILE)
133

    
134
    logging.debug("Generating new cluster certificate at %s",
135
                  constants.NODED_CERT_FILE)
136
    GenerateSelfSignedSslCert(constants.NODED_CERT_FILE)
137

    
138
  # confd HMAC key
139
  if new_confd_hmac_key or not os.path.exists(constants.CONFD_HMAC_KEY):
140
    logging.debug("Writing new confd HMAC key to %s", constants.CONFD_HMAC_KEY)
141
    GenerateHmacKey(constants.CONFD_HMAC_KEY)
142

    
143
  # RAPI
144
  rapi_cert_exists = os.path.exists(constants.RAPI_CERT_FILE)
145

    
146
  if rapi_cert_pem:
147
    # Assume rapi_pem contains a valid PEM-formatted certificate and key
148
    logging.debug("Writing RAPI certificate at %s",
149
                  constants.RAPI_CERT_FILE)
150
    utils.WriteFile(constants.RAPI_CERT_FILE, data=rapi_cert_pem, backup=True)
151

    
152
  elif new_rapi_cert or not rapi_cert_exists:
153
    if rapi_cert_exists:
154
      utils.CreateBackup(constants.RAPI_CERT_FILE)
155

    
156
    logging.debug("Generating new RAPI certificate at %s",
157
                  constants.RAPI_CERT_FILE)
158
    GenerateSelfSignedSslCert(constants.RAPI_CERT_FILE)
159

    
160

    
161
def _InitGanetiServerSetup(master_name):
162
  """Setup the necessary configuration for the initial node daemon.
163

164
  This creates the nodepass file containing the shared password for
165
  the cluster and also generates the SSL certificate.
166

167
  """
168
  # Generate cluster secrets
169
  GenerateClusterCrypto(True, False, False)
170

    
171
  result = utils.RunCmd([constants.DAEMON_UTIL, "start", constants.NODED])
172
  if result.failed:
173
    raise errors.OpExecError("Could not start the node daemon, command %s"
174
                             " had exitcode %s and error %s" %
175
                             (result.cmd, result.exit_code, result.output))
176

    
177
  _WaitForNodeDaemon(master_name)
178

    
179

    
180
def _WaitForNodeDaemon(node_name):
181
  """Wait for node daemon to become responsive.
182

183
  """
184
  def _CheckNodeDaemon():
185
    result = rpc.RpcRunner.call_version([node_name])[node_name]
186
    if result.fail_msg:
187
      raise utils.RetryAgain()
188

    
189
  try:
190
    utils.Retry(_CheckNodeDaemon, 1.0, 10.0)
191
  except utils.RetryTimeout:
192
    raise errors.OpExecError("Node daemon on %s didn't answer queries within"
193
                             " 10 seconds" % node_name)
194

    
195

    
196
def InitCluster(cluster_name, mac_prefix,
197
                master_netdev, file_storage_dir, candidate_pool_size,
198
                secondary_ip=None, vg_name=None, beparams=None,
199
                nicparams=None, hvparams=None, enabled_hypervisors=None,
200
                modify_etc_hosts=True, modify_ssh_setup=True,
201
                maintain_node_health=False):
202
  """Initialise the cluster.
203

204
  @type candidate_pool_size: int
205
  @param candidate_pool_size: master candidate pool size
206

207
  """
208
  # TODO: complete the docstring
209
  if config.ConfigWriter.IsCluster():
210
    raise errors.OpPrereqError("Cluster is already initialised",
211
                               errors.ECODE_STATE)
212

    
213
  if not enabled_hypervisors:
214
    raise errors.OpPrereqError("Enabled hypervisors list must contain at"
215
                               " least one member", errors.ECODE_INVAL)
216
  invalid_hvs = set(enabled_hypervisors) - constants.HYPER_TYPES
217
  if invalid_hvs:
218
    raise errors.OpPrereqError("Enabled hypervisors contains invalid"
219
                               " entries: %s" % invalid_hvs,
220
                               errors.ECODE_INVAL)
221

    
222
  hostname = utils.GetHostInfo()
223

    
224
  if hostname.ip.startswith("127."):
225
    raise errors.OpPrereqError("This host's IP resolves to the private"
226
                               " range (%s). Please fix DNS or %s." %
227
                               (hostname.ip, constants.ETC_HOSTS),
228
                               errors.ECODE_ENVIRON)
229

    
230
  if not utils.OwnIpAddress(hostname.ip):
231
    raise errors.OpPrereqError("Inconsistency: this host's name resolves"
232
                               " to %s,\nbut this ip address does not"
233
                               " belong to this host. Aborting." %
234
                               hostname.ip, errors.ECODE_ENVIRON)
235

    
236
  clustername = utils.GetHostInfo(utils.HostInfo.NormalizeName(cluster_name))
237

    
238
  if utils.TcpPing(clustername.ip, constants.DEFAULT_NODED_PORT,
239
                   timeout=5):
240
    raise errors.OpPrereqError("Cluster IP already active. Aborting.",
241
                               errors.ECODE_NOTUNIQUE)
242

    
243
  if secondary_ip:
244
    if not utils.IsValidIP(secondary_ip):
245
      raise errors.OpPrereqError("Invalid secondary ip given",
246
                                 errors.ECODE_INVAL)
247
    if (secondary_ip != hostname.ip and
248
        not utils.OwnIpAddress(secondary_ip)):
249
      raise errors.OpPrereqError("You gave %s as secondary IP,"
250
                                 " but it does not belong to this host." %
251
                                 secondary_ip, errors.ECODE_ENVIRON)
252
  else:
253
    secondary_ip = hostname.ip
254

    
255
  if vg_name is not None:
256
    # Check if volume group is valid
257
    vgstatus = utils.CheckVolumeGroupSize(utils.ListVolumeGroups(), vg_name,
258
                                          constants.MIN_VG_SIZE)
259
    if vgstatus:
260
      raise errors.OpPrereqError("Error: %s\nspecify --no-lvm-storage if"
261
                                 " you are not using lvm" % vgstatus,
262
                                 errors.ECODE_INVAL)
263

    
264
  file_storage_dir = os.path.normpath(file_storage_dir)
265

    
266
  if not os.path.isabs(file_storage_dir):
267
    raise errors.OpPrereqError("The file storage directory you passed is"
268
                               " not an absolute path.", errors.ECODE_INVAL)
269

    
270
  if not os.path.exists(file_storage_dir):
271
    try:
272
      os.makedirs(file_storage_dir, 0750)
273
    except OSError, err:
274
      raise errors.OpPrereqError("Cannot create file storage directory"
275
                                 " '%s': %s" % (file_storage_dir, err),
276
                                 errors.ECODE_ENVIRON)
277

    
278
  if not os.path.isdir(file_storage_dir):
279
    raise errors.OpPrereqError("The file storage directory '%s' is not"
280
                               " a directory." % file_storage_dir,
281
                               errors.ECODE_ENVIRON)
282

    
283
  if not re.match("^[0-9a-z]{2}:[0-9a-z]{2}:[0-9a-z]{2}$", mac_prefix):
284
    raise errors.OpPrereqError("Invalid mac prefix given '%s'" % mac_prefix,
285
                               errors.ECODE_INVAL)
286

    
287
  result = utils.RunCmd(["ip", "link", "show", "dev", master_netdev])
288
  if result.failed:
289
    raise errors.OpPrereqError("Invalid master netdev given (%s): '%s'" %
290
                               (master_netdev,
291
                                result.output.strip()), errors.ECODE_INVAL)
292

    
293
  dirs = [(constants.RUN_GANETI_DIR, constants.RUN_DIRS_MODE)]
294
  utils.EnsureDirs(dirs)
295

    
296
  utils.ForceDictType(beparams, constants.BES_PARAMETER_TYPES)
297
  utils.ForceDictType(nicparams, constants.NICS_PARAMETER_TYPES)
298
  objects.NIC.CheckParameterSyntax(nicparams)
299

    
300
  # hvparams is a mapping of hypervisor->hvparams dict
301
  for hv_name, hv_params in hvparams.iteritems():
302
    utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
303
    hv_class = hypervisor.GetHypervisor(hv_name)
304
    hv_class.CheckParameterSyntax(hv_params)
305

    
306
  # set up the inter-node password and certificate
307
  _InitGanetiServerSetup(hostname.name)
308

    
309
  # set up ssh config and /etc/hosts
310
  sshline = utils.ReadFile(constants.SSH_HOST_RSA_PUB)
311
  sshkey = sshline.split(" ")[1]
312

    
313
  if modify_etc_hosts:
314
    utils.AddHostToEtcHosts(hostname.name)
315

    
316
  if modify_ssh_setup:
317
    _InitSSHSetup()
318

    
319
  now = time.time()
320

    
321
  # init of cluster config file
322
  cluster_config = objects.Cluster(
323
    serial_no=1,
324
    rsahostkeypub=sshkey,
325
    highest_used_port=(constants.FIRST_DRBD_PORT - 1),
326
    mac_prefix=mac_prefix,
327
    volume_group_name=vg_name,
328
    tcpudp_port_pool=set(),
329
    master_node=hostname.name,
330
    master_ip=clustername.ip,
331
    master_netdev=master_netdev,
332
    cluster_name=clustername.name,
333
    file_storage_dir=file_storage_dir,
334
    enabled_hypervisors=enabled_hypervisors,
335
    beparams={constants.PP_DEFAULT: beparams},
336
    nicparams={constants.PP_DEFAULT: nicparams},
337
    hvparams=hvparams,
338
    candidate_pool_size=candidate_pool_size,
339
    modify_etc_hosts=modify_etc_hosts,
340
    modify_ssh_setup=modify_ssh_setup,
341
    ctime=now,
342
    mtime=now,
343
    uuid=utils.NewUUID(),
344
    maintain_node_health=maintain_node_health,
345
    )
346
  master_node_config = objects.Node(name=hostname.name,
347
                                    primary_ip=hostname.ip,
348
                                    secondary_ip=secondary_ip,
349
                                    serial_no=1,
350
                                    master_candidate=True,
351
                                    offline=False, drained=False,
352
                                    )
353
  InitConfig(constants.CONFIG_VERSION, cluster_config, master_node_config)
354
  cfg = config.ConfigWriter()
355
  ssh.WriteKnownHostsFile(cfg, constants.SSH_KNOWN_HOSTS_FILE)
356
  cfg.Update(cfg.GetClusterInfo(), logging.error)
357

    
358
  # start the master ip
359
  # TODO: Review rpc call from bootstrap
360
  # TODO: Warn on failed start master
361
  rpc.RpcRunner.call_node_start_master(hostname.name, True, False)
362

    
363

    
364
def InitConfig(version, cluster_config, master_node_config,
365
               cfg_file=constants.CLUSTER_CONF_FILE):
366
  """Create the initial cluster configuration.
367

368
  It will contain the current node, which will also be the master
369
  node, and no instances.
370

371
  @type version: int
372
  @param version: configuration version
373
  @type cluster_config: L{objects.Cluster}
374
  @param cluster_config: cluster configuration
375
  @type master_node_config: L{objects.Node}
376
  @param master_node_config: master node configuration
377
  @type cfg_file: string
378
  @param cfg_file: configuration file path
379

380
  """
381
  nodes = {
382
    master_node_config.name: master_node_config,
383
    }
384

    
385
  now = time.time()
386
  config_data = objects.ConfigData(version=version,
387
                                   cluster=cluster_config,
388
                                   nodes=nodes,
389
                                   instances={},
390
                                   serial_no=1,
391
                                   ctime=now, mtime=now)
392
  utils.WriteFile(cfg_file,
393
                  data=serializer.Dump(config_data.ToDict()),
394
                  mode=0600)
395

    
396

    
397
def FinalizeClusterDestroy(master):
398
  """Execute the last steps of cluster destroy
399

400
  This function shuts down all the daemons, completing the destroy
401
  begun in cmdlib.LUDestroyOpcode.
402

403
  """
404
  cfg = config.ConfigWriter()
405
  modify_ssh_setup = cfg.GetClusterInfo().modify_ssh_setup
406
  result = rpc.RpcRunner.call_node_stop_master(master, True)
407
  msg = result.fail_msg
408
  if msg:
409
    logging.warning("Could not disable the master role: %s", msg)
410
  result = rpc.RpcRunner.call_node_leave_cluster(master, modify_ssh_setup)
411
  msg = result.fail_msg
412
  if msg:
413
    logging.warning("Could not shutdown the node daemon and cleanup"
414
                    " the node: %s", msg)
415

    
416

    
417
def SetupNodeDaemon(cluster_name, node, ssh_key_check):
418
  """Add a node to the cluster.
419

420
  This function must be called before the actual opcode, and will ssh
421
  to the remote node, copy the needed files, and start ganeti-noded,
422
  allowing the master to do the rest via normal rpc calls.
423

424
  @param cluster_name: the cluster name
425
  @param node: the name of the new node
426
  @param ssh_key_check: whether to do a strict key check
427

428
  """
429
  sshrunner = ssh.SshRunner(cluster_name)
430

    
431
  noded_cert = utils.ReadFile(constants.NODED_CERT_FILE)
432
  rapi_cert = utils.ReadFile(constants.RAPI_CERT_FILE)
433
  confd_hmac_key = utils.ReadFile(constants.CONFD_HMAC_KEY)
434

    
435
  # in the base64 pem encoding, neither '!' nor '.' are valid chars,
436
  # so we use this to detect an invalid certificate; as long as the
437
  # cert doesn't contain this, the here-document will be correctly
438
  # parsed by the shell sequence below. HMAC keys are hexadecimal strings,
439
  # so the same restrictions apply.
440
  for content in (noded_cert, rapi_cert, confd_hmac_key):
441
    if re.search('^!EOF\.', content, re.MULTILINE):
442
      raise errors.OpExecError("invalid SSL certificate or HMAC key")
443

    
444
  if not noded_cert.endswith("\n"):
445
    noded_cert += "\n"
446
  if not rapi_cert.endswith("\n"):
447
    rapi_cert += "\n"
448
  if not confd_hmac_key.endswith("\n"):
449
    confd_hmac_key += "\n"
450

    
451
  # set up inter-node password and certificate and restarts the node daemon
452
  # and then connect with ssh to set password and start ganeti-noded
453
  # note that all the below variables are sanitized at this point,
454
  # either by being constants or by the checks above
455
  mycommand = ("umask 077 && "
456
               "cat > '%s' << '!EOF.' && \n"
457
               "%s!EOF.\n"
458
               "cat > '%s' << '!EOF.' && \n"
459
               "%s!EOF.\n"
460
               "cat > '%s' << '!EOF.' && \n"
461
               "%s!EOF.\n"
462
               "chmod 0400 %s %s %s && "
463
               "%s start %s" %
464
               (constants.NODED_CERT_FILE, noded_cert,
465
                constants.RAPI_CERT_FILE, rapi_cert,
466
                constants.CONFD_HMAC_KEY, confd_hmac_key,
467
                constants.NODED_CERT_FILE, constants.RAPI_CERT_FILE,
468
                constants.CONFD_HMAC_KEY,
469
                constants.DAEMON_UTIL, constants.NODED))
470

    
471
  result = sshrunner.Run(node, 'root', mycommand, batch=False,
472
                         ask_key=ssh_key_check,
473
                         use_cluster_key=False,
474
                         strict_host_check=ssh_key_check)
475
  if result.failed:
476
    raise errors.OpExecError("Remote command on node %s, error: %s,"
477
                             " output: %s" %
478
                             (node, result.fail_reason, result.output))
479

    
480
  _WaitForNodeDaemon(node)
481

    
482

    
483
def MasterFailover(no_voting=False):
484
  """Failover the master node.
485

486
  This checks that we are not already the master, and will cause the
487
  current master to cease being master, and the non-master to become
488
  new master.
489

490
  @type no_voting: boolean
491
  @param no_voting: force the operation without remote nodes agreement
492
                      (dangerous)
493

494
  """
495
  sstore = ssconf.SimpleStore()
496

    
497
  old_master, new_master = ssconf.GetMasterAndMyself(sstore)
498
  node_list = sstore.GetNodeList()
499
  mc_list = sstore.GetMasterCandidates()
500

    
501
  if old_master == new_master:
502
    raise errors.OpPrereqError("This commands must be run on the node"
503
                               " where you want the new master to be."
504
                               " %s is already the master" %
505
                               old_master, errors.ECODE_INVAL)
506

    
507
  if new_master not in mc_list:
508
    mc_no_master = [name for name in mc_list if name != old_master]
509
    raise errors.OpPrereqError("This node is not among the nodes marked"
510
                               " as master candidates. Only these nodes"
511
                               " can become masters. Current list of"
512
                               " master candidates is:\n"
513
                               "%s" % ('\n'.join(mc_no_master)),
514
                               errors.ECODE_STATE)
515

    
516
  if not no_voting:
517
    vote_list = GatherMasterVotes(node_list)
518

    
519
    if vote_list:
520
      voted_master = vote_list[0][0]
521
      if voted_master is None:
522
        raise errors.OpPrereqError("Cluster is inconsistent, most nodes did"
523
                                   " not respond.", errors.ECODE_ENVIRON)
524
      elif voted_master != old_master:
525
        raise errors.OpPrereqError("I have a wrong configuration, I believe"
526
                                   " the master is %s but the other nodes"
527
                                   " voted %s. Please resync the configuration"
528
                                   " of this node." %
529
                                   (old_master, voted_master),
530
                                   errors.ECODE_STATE)
531
  # end checks
532

    
533
  rcode = 0
534

    
535
  logging.info("Setting master to %s, old master: %s", new_master, old_master)
536

    
537
  result = rpc.RpcRunner.call_node_stop_master(old_master, True)
538
  msg = result.fail_msg
539
  if msg:
540
    logging.error("Could not disable the master role on the old master"
541
                 " %s, please disable manually: %s", old_master, msg)
542

    
543
  # Here we have a phase where no master should be running
544

    
545
  # instantiate a real config writer, as we now know we have the
546
  # configuration data
547
  cfg = config.ConfigWriter()
548

    
549
  cluster_info = cfg.GetClusterInfo()
550
  cluster_info.master_node = new_master
551
  # this will also regenerate the ssconf files, since we updated the
552
  # cluster info
553
  cfg.Update(cluster_info, logging.error)
554

    
555
  result = rpc.RpcRunner.call_node_start_master(new_master, True, no_voting)
556
  msg = result.fail_msg
557
  if msg:
558
    logging.error("Could not start the master role on the new master"
559
                  " %s, please check: %s", new_master, msg)
560
    rcode = 1
561

    
562
  return rcode
563

    
564

    
565
def GetMaster():
566
  """Returns the current master node.
567

568
  This is a separate function in bootstrap since it's needed by
569
  gnt-cluster, and instead of importing directly ssconf, it's better
570
  to abstract it in bootstrap, where we do use ssconf in other
571
  functions too.
572

573
  """
574
  sstore = ssconf.SimpleStore()
575

    
576
  old_master, _ = ssconf.GetMasterAndMyself(sstore)
577

    
578
  return old_master
579

    
580

    
581
def GatherMasterVotes(node_list):
582
  """Check the agreement on who is the master.
583

584
  This function will return a list of (node, number of votes), ordered
585
  by the number of votes. Errors will be denoted by the key 'None'.
586

587
  Note that the sum of votes is the number of nodes this machine
588
  knows, whereas the number of entries in the list could be different
589
  (if some nodes vote for another master).
590

591
  We remove ourselves from the list since we know that (bugs aside)
592
  since we use the same source for configuration information for both
593
  backend and boostrap, we'll always vote for ourselves.
594

595
  @type node_list: list
596
  @param node_list: the list of nodes to query for master info; the current
597
      node will be removed if it is in the list
598
  @rtype: list
599
  @return: list of (node, votes)
600

601
  """
602
  myself = utils.HostInfo().name
603
  try:
604
    node_list.remove(myself)
605
  except ValueError:
606
    pass
607
  if not node_list:
608
    # no nodes left (eventually after removing myself)
609
    return []
610
  results = rpc.RpcRunner.call_master_info(node_list)
611
  if not isinstance(results, dict):
612
    # this should not happen (unless internal error in rpc)
613
    logging.critical("Can't complete rpc call, aborting master startup")
614
    return [(None, len(node_list))]
615
  votes = {}
616
  for node in results:
617
    nres = results[node]
618
    data = nres.payload
619
    msg = nres.fail_msg
620
    fail = False
621
    if msg:
622
      logging.warning("Error contacting node %s: %s", node, msg)
623
      fail = True
624
    elif not isinstance(data, (tuple, list)) or len(data) < 3:
625
      logging.warning("Invalid data received from node %s: %s", node, data)
626
      fail = True
627
    if fail:
628
      if None not in votes:
629
        votes[None] = 0
630
      votes[None] += 1
631
      continue
632
    master_node = data[2]
633
    if master_node not in votes:
634
      votes[master_node] = 0
635
    votes[master_node] += 1
636

    
637
  vote_list = [v for v in votes.items()]
638
  # sort first on number of votes then on name, since we want None
639
  # sorted later if we have the half of the nodes not responding, and
640
  # half voting all for the same master
641
  vote_list.sort(key=lambda x: (x[1], x[0]), reverse=True)
642

    
643
  return vote_list