Statistics
| Branch: | Tag: | Revision:

root / lib / bootstrap.py @ 43575108

History | View | Annotate | Download (22.3 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007, 2008 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Functions to bootstrap a new cluster.
23

24
"""
25

    
26
import os
27
import os.path
28
import re
29
import logging
30
import tempfile
31
import time
32

    
33
from ganeti import rpc
34
from ganeti import ssh
35
from ganeti import utils
36
from ganeti import errors
37
from ganeti import config
38
from ganeti import constants
39
from ganeti import objects
40
from ganeti import ssconf
41
from ganeti import serializer
42
from ganeti import hypervisor
43

    
44

    
45
def _InitSSHSetup():
46
  """Setup the SSH configuration for the cluster.
47

48
  This generates a dsa keypair for root, adds the pub key to the
49
  permitted hosts and adds the hostkey to its own known hosts.
50

51
  """
52
  priv_key, pub_key, auth_keys = ssh.GetUserFiles(constants.GANETI_RUNAS)
53

    
54
  for name in priv_key, pub_key:
55
    if os.path.exists(name):
56
      utils.CreateBackup(name)
57
    utils.RemoveFile(name)
58

    
59
  result = utils.RunCmd(["ssh-keygen", "-t", "dsa",
60
                         "-f", priv_key,
61
                         "-q", "-N", ""])
62
  if result.failed:
63
    raise errors.OpExecError("Could not generate ssh keypair, error %s" %
64
                             result.output)
65

    
66
  utils.AddAuthorizedKey(auth_keys, utils.ReadFile(pub_key))
67

    
68

    
69
def GenerateSelfSignedSslCert(file_name, validity=(365 * 5)):
70
  """Generates a self-signed SSL certificate.
71

72
  @type file_name: str
73
  @param file_name: Path to output file
74
  @type validity: int
75
  @param validity: Validity for certificate in days
76

77
  """
78
  (fd, tmp_file_name) = tempfile.mkstemp(dir=os.path.dirname(file_name))
79
  try:
80
    try:
81
      # Set permissions before writing key
82
      os.chmod(tmp_file_name, 0600)
83

    
84
      result = utils.RunCmd(["openssl", "req", "-new", "-newkey", "rsa:1024",
85
                             "-days", str(validity), "-nodes", "-x509",
86
                             "-keyout", tmp_file_name, "-out", tmp_file_name,
87
                             "-batch"])
88
      if result.failed:
89
        raise errors.OpExecError("Could not generate SSL certificate, command"
90
                                 " %s had exitcode %s and error message %s" %
91
                                 (result.cmd, result.exit_code, result.output))
92

    
93
      # Make read-only
94
      os.chmod(tmp_file_name, 0400)
95

    
96
      os.rename(tmp_file_name, file_name)
97
    finally:
98
      utils.RemoveFile(tmp_file_name)
99
  finally:
100
    os.close(fd)
101

    
102

    
103
def GenerateHmacKey(file_name):
104
  """Writes a new HMAC key.
105

106
  @type file_name: str
107
  @param file_name: Path to output file
108

109
  """
110
  utils.WriteFile(file_name, data="%s\n" % utils.GenerateSecret(), mode=0400,
111
                  backup=True)
112

    
113

    
114
def GenerateClusterCrypto(new_cluster_cert, new_rapi_cert, new_hmac_key,
115
                          rapi_cert_pem=None):
116
  """Updates the cluster certificates, keys and secrets.
117

118
  @type new_cluster_cert: bool
119
  @param new_cluster_cert: Whether to generate a new cluster certificate
120
  @type new_rapi_cert: bool
121
  @param new_rapi_cert: Whether to generate a new RAPI certificate
122
  @type new_hmac_key: bool
123
  @param new_hmac_key: Whether to generate a new HMAC key
124
  @type rapi_cert_pem: string
125
  @param rapi_cert_pem: New RAPI certificate in PEM format
126

127
  """
128
  # SSL certificate
129
  cluster_cert_exists = os.path.exists(constants.SSL_CERT_FILE)
130
  if new_cluster_cert or not cluster_cert_exists:
131
    if cluster_cert_exists:
132
      utils.CreateBackup(constants.SSL_CERT_FILE)
133

    
134
    logging.debug("Generating new cluster certificate at %s",
135
                  constants.SSL_CERT_FILE)
136
    GenerateSelfSignedSslCert(constants.SSL_CERT_FILE)
137

    
138
  # HMAC key
139
  if new_hmac_key or not os.path.exists(constants.HMAC_CLUSTER_KEY):
140
    logging.debug("Writing new HMAC key to %s", constants.HMAC_CLUSTER_KEY)
141
    GenerateHmacKey(constants.HMAC_CLUSTER_KEY)
142

    
143
  # RAPI
144
  rapi_cert_exists = os.path.exists(constants.RAPI_CERT_FILE)
145

    
146
  if rapi_cert_pem:
147
    # Assume rapi_pem contains a valid PEM-formatted certificate and key
148
    logging.debug("Writing RAPI certificate at %s",
149
                  constants.RAPI_CERT_FILE)
150
    utils.WriteFile(constants.RAPI_CERT_FILE, data=rapi_cert_pem, backup=True)
151

    
152
  elif new_rapi_cert or not rapi_cert_exists:
153
    if rapi_cert_exists:
154
      utils.CreateBackup(constants.RAPI_CERT_FILE)
155

    
156
    logging.debug("Generating new RAPI certificate at %s",
157
                  constants.RAPI_CERT_FILE)
158
    GenerateSelfSignedSslCert(constants.RAPI_CERT_FILE)
159

    
160

    
161
def _InitGanetiServerSetup(master_name):
162
  """Setup the necessary configuration for the initial node daemon.
163

164
  This creates the nodepass file containing the shared password for
165
  the cluster and also generates the SSL certificate.
166

167
  """
168
  # Generate cluster secrets
169
  GenerateClusterCrypto(True, False, False)
170

    
171
  result = utils.RunCmd([constants.DAEMON_UTIL, "start", constants.NODED])
172
  if result.failed:
173
    raise errors.OpExecError("Could not start the node daemon, command %s"
174
                             " had exitcode %s and error %s" %
175
                             (result.cmd, result.exit_code, result.output))
176

    
177
  _WaitForNodeDaemon(master_name)
178

    
179

    
180
def _WaitForNodeDaemon(node_name):
181
  """Wait for node daemon to become responsive.
182

183
  """
184
  def _CheckNodeDaemon():
185
    result = rpc.RpcRunner.call_version([node_name])[node_name]
186
    if result.fail_msg:
187
      raise utils.RetryAgain()
188

    
189
  try:
190
    utils.Retry(_CheckNodeDaemon, 1.0, 10.0)
191
  except utils.RetryTimeout:
192
    raise errors.OpExecError("Node daemon on %s didn't answer queries within"
193
                             " 10 seconds" % node_name)
194

    
195

    
196
def InitCluster(cluster_name, mac_prefix,
197
                master_netdev, file_storage_dir, candidate_pool_size,
198
                secondary_ip=None, vg_name=None, beparams=None,
199
                nicparams=None, hvparams=None, enabled_hypervisors=None,
200
                modify_etc_hosts=True, modify_ssh_setup=True):
201
  """Initialise the cluster.
202

203
  @type candidate_pool_size: int
204
  @param candidate_pool_size: master candidate pool size
205

206
  """
207
  # TODO: complete the docstring
208
  if config.ConfigWriter.IsCluster():
209
    raise errors.OpPrereqError("Cluster is already initialised",
210
                               errors.ECODE_STATE)
211

    
212
  if not enabled_hypervisors:
213
    raise errors.OpPrereqError("Enabled hypervisors list must contain at"
214
                               " least one member", errors.ECODE_INVAL)
215
  invalid_hvs = set(enabled_hypervisors) - constants.HYPER_TYPES
216
  if invalid_hvs:
217
    raise errors.OpPrereqError("Enabled hypervisors contains invalid"
218
                               " entries: %s" % invalid_hvs,
219
                               errors.ECODE_INVAL)
220

    
221
  hostname = utils.GetHostInfo()
222

    
223
  if hostname.ip.startswith("127."):
224
    raise errors.OpPrereqError("This host's IP resolves to the private"
225
                               " range (%s). Please fix DNS or %s." %
226
                               (hostname.ip, constants.ETC_HOSTS),
227
                               errors.ECODE_ENVIRON)
228

    
229
  if not utils.OwnIpAddress(hostname.ip):
230
    raise errors.OpPrereqError("Inconsistency: this host's name resolves"
231
                               " to %s,\nbut this ip address does not"
232
                               " belong to this host. Aborting." %
233
                               hostname.ip, errors.ECODE_ENVIRON)
234

    
235
  clustername = utils.GetHostInfo(utils.HostInfo.NormalizeName(cluster_name))
236

    
237
  if utils.TcpPing(clustername.ip, constants.DEFAULT_NODED_PORT,
238
                   timeout=5):
239
    raise errors.OpPrereqError("Cluster IP already active. Aborting.",
240
                               errors.ECODE_NOTUNIQUE)
241

    
242
  if secondary_ip:
243
    if not utils.IsValidIP(secondary_ip):
244
      raise errors.OpPrereqError("Invalid secondary ip given",
245
                                 errors.ECODE_INVAL)
246
    if (secondary_ip != hostname.ip and
247
        not utils.OwnIpAddress(secondary_ip)):
248
      raise errors.OpPrereqError("You gave %s as secondary IP,"
249
                                 " but it does not belong to this host." %
250
                                 secondary_ip, errors.ECODE_ENVIRON)
251
  else:
252
    secondary_ip = hostname.ip
253

    
254
  if vg_name is not None:
255
    # Check if volume group is valid
256
    vgstatus = utils.CheckVolumeGroupSize(utils.ListVolumeGroups(), vg_name,
257
                                          constants.MIN_VG_SIZE)
258
    if vgstatus:
259
      raise errors.OpPrereqError("Error: %s\nspecify --no-lvm-storage if"
260
                                 " you are not using lvm" % vgstatus,
261
                                 errors.ECODE_INVAL)
262

    
263
  file_storage_dir = os.path.normpath(file_storage_dir)
264

    
265
  if not os.path.isabs(file_storage_dir):
266
    raise errors.OpPrereqError("The file storage directory you passed is"
267
                               " not an absolute path.", errors.ECODE_INVAL)
268

    
269
  if not os.path.exists(file_storage_dir):
270
    try:
271
      os.makedirs(file_storage_dir, 0750)
272
    except OSError, err:
273
      raise errors.OpPrereqError("Cannot create file storage directory"
274
                                 " '%s': %s" % (file_storage_dir, err),
275
                                 errors.ECODE_ENVIRON)
276

    
277
  if not os.path.isdir(file_storage_dir):
278
    raise errors.OpPrereqError("The file storage directory '%s' is not"
279
                               " a directory." % file_storage_dir,
280
                               errors.ECODE_ENVIRON)
281

    
282
  if not re.match("^[0-9a-z]{2}:[0-9a-z]{2}:[0-9a-z]{2}$", mac_prefix):
283
    raise errors.OpPrereqError("Invalid mac prefix given '%s'" % mac_prefix,
284
                               errors.ECODE_INVAL)
285

    
286
  result = utils.RunCmd(["ip", "link", "show", "dev", master_netdev])
287
  if result.failed:
288
    raise errors.OpPrereqError("Invalid master netdev given (%s): '%s'" %
289
                               (master_netdev,
290
                                result.output.strip()), errors.ECODE_INVAL)
291

    
292
  dirs = [(constants.RUN_GANETI_DIR, constants.RUN_DIRS_MODE)]
293
  utils.EnsureDirs(dirs)
294

    
295
  utils.ForceDictType(beparams, constants.BES_PARAMETER_TYPES)
296
  utils.ForceDictType(nicparams, constants.NICS_PARAMETER_TYPES)
297
  objects.NIC.CheckParameterSyntax(nicparams)
298

    
299
  # hvparams is a mapping of hypervisor->hvparams dict
300
  for hv_name, hv_params in hvparams.iteritems():
301
    utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
302
    hv_class = hypervisor.GetHypervisor(hv_name)
303
    hv_class.CheckParameterSyntax(hv_params)
304

    
305
  # set up the inter-node password and certificate
306
  _InitGanetiServerSetup(hostname.name)
307

    
308
  # set up ssh config and /etc/hosts
309
  sshline = utils.ReadFile(constants.SSH_HOST_RSA_PUB)
310
  sshkey = sshline.split(" ")[1]
311

    
312
  if modify_etc_hosts:
313
    utils.AddHostToEtcHosts(hostname.name)
314

    
315
  if modify_ssh_setup:
316
    _InitSSHSetup()
317

    
318
  now = time.time()
319

    
320
  # init of cluster config file
321
  cluster_config = objects.Cluster(
322
    serial_no=1,
323
    rsahostkeypub=sshkey,
324
    highest_used_port=(constants.FIRST_DRBD_PORT - 1),
325
    mac_prefix=mac_prefix,
326
    volume_group_name=vg_name,
327
    tcpudp_port_pool=set(),
328
    master_node=hostname.name,
329
    master_ip=clustername.ip,
330
    master_netdev=master_netdev,
331
    cluster_name=clustername.name,
332
    file_storage_dir=file_storage_dir,
333
    enabled_hypervisors=enabled_hypervisors,
334
    beparams={constants.PP_DEFAULT: beparams},
335
    nicparams={constants.PP_DEFAULT: nicparams},
336
    hvparams=hvparams,
337
    candidate_pool_size=candidate_pool_size,
338
    modify_etc_hosts=modify_etc_hosts,
339
    modify_ssh_setup=modify_ssh_setup,
340
    ctime=now,
341
    mtime=now,
342
    uuid=utils.NewUUID(),
343
    )
344
  master_node_config = objects.Node(name=hostname.name,
345
                                    primary_ip=hostname.ip,
346
                                    secondary_ip=secondary_ip,
347
                                    serial_no=1,
348
                                    master_candidate=True,
349
                                    offline=False, drained=False,
350
                                    )
351
  InitConfig(constants.CONFIG_VERSION, cluster_config, master_node_config)
352
  cfg = config.ConfigWriter()
353
  ssh.WriteKnownHostsFile(cfg, constants.SSH_KNOWN_HOSTS_FILE)
354
  cfg.Update(cfg.GetClusterInfo(), logging.error)
355

    
356
  # start the master ip
357
  # TODO: Review rpc call from bootstrap
358
  # TODO: Warn on failed start master
359
  rpc.RpcRunner.call_node_start_master(hostname.name, True, False)
360

    
361

    
362
def InitConfig(version, cluster_config, master_node_config,
363
               cfg_file=constants.CLUSTER_CONF_FILE):
364
  """Create the initial cluster configuration.
365

366
  It will contain the current node, which will also be the master
367
  node, and no instances.
368

369
  @type version: int
370
  @param version: configuration version
371
  @type cluster_config: L{objects.Cluster}
372
  @param cluster_config: cluster configuration
373
  @type master_node_config: L{objects.Node}
374
  @param master_node_config: master node configuration
375
  @type cfg_file: string
376
  @param cfg_file: configuration file path
377

378
  """
379
  nodes = {
380
    master_node_config.name: master_node_config,
381
    }
382

    
383
  now = time.time()
384
  config_data = objects.ConfigData(version=version,
385
                                   cluster=cluster_config,
386
                                   nodes=nodes,
387
                                   instances={},
388
                                   serial_no=1,
389
                                   ctime=now, mtime=now)
390
  utils.WriteFile(cfg_file,
391
                  data=serializer.Dump(config_data.ToDict()),
392
                  mode=0600)
393

    
394

    
395
def FinalizeClusterDestroy(master):
396
  """Execute the last steps of cluster destroy
397

398
  This function shuts down all the daemons, completing the destroy
399
  begun in cmdlib.LUDestroyOpcode.
400

401
  """
402
  cfg = config.ConfigWriter()
403
  modify_ssh_setup = cfg.GetClusterInfo().modify_ssh_setup
404
  result = rpc.RpcRunner.call_node_stop_master(master, True)
405
  msg = result.fail_msg
406
  if msg:
407
    logging.warning("Could not disable the master role: %s", msg)
408
  result = rpc.RpcRunner.call_node_leave_cluster(master, modify_ssh_setup)
409
  msg = result.fail_msg
410
  if msg:
411
    logging.warning("Could not shutdown the node daemon and cleanup"
412
                    " the node: %s", msg)
413

    
414

    
415
def SetupNodeDaemon(cluster_name, node, ssh_key_check):
416
  """Add a node to the cluster.
417

418
  This function must be called before the actual opcode, and will ssh
419
  to the remote node, copy the needed files, and start ganeti-noded,
420
  allowing the master to do the rest via normal rpc calls.
421

422
  @param cluster_name: the cluster name
423
  @param node: the name of the new node
424
  @param ssh_key_check: whether to do a strict key check
425

426
  """
427
  sshrunner = ssh.SshRunner(cluster_name)
428

    
429
  noded_cert = utils.ReadFile(constants.SSL_CERT_FILE)
430
  rapi_cert = utils.ReadFile(constants.RAPI_CERT_FILE)
431
  hmac_key = utils.ReadFile(constants.HMAC_CLUSTER_KEY)
432

    
433
  # in the base64 pem encoding, neither '!' nor '.' are valid chars,
434
  # so we use this to detect an invalid certificate; as long as the
435
  # cert doesn't contain this, the here-document will be correctly
436
  # parsed by the shell sequence below. HMAC keys are hexadecimal strings,
437
  # so the same restrictions apply.
438
  for content in (noded_cert, rapi_cert, hmac_key):
439
    if re.search('^!EOF\.', content, re.MULTILINE):
440
      raise errors.OpExecError("invalid SSL certificate or HMAC key")
441

    
442
  if not noded_cert.endswith("\n"):
443
    noded_cert += "\n"
444
  if not rapi_cert.endswith("\n"):
445
    rapi_cert += "\n"
446
  if not hmac_key.endswith("\n"):
447
    hmac_key += "\n"
448

    
449
  # set up inter-node password and certificate and restarts the node daemon
450
  # and then connect with ssh to set password and start ganeti-noded
451
  # note that all the below variables are sanitized at this point,
452
  # either by being constants or by the checks above
453
  mycommand = ("umask 077 && "
454
               "cat > '%s' << '!EOF.' && \n"
455
               "%s!EOF.\n"
456
               "cat > '%s' << '!EOF.' && \n"
457
               "%s!EOF.\n"
458
               "cat > '%s' << '!EOF.' && \n"
459
               "%s!EOF.\n"
460
               "chmod 0400 %s %s %s && "
461
               "%s start %s" %
462
               (constants.SSL_CERT_FILE, noded_cert,
463
                constants.RAPI_CERT_FILE, rapi_cert,
464
                constants.HMAC_CLUSTER_KEY, hmac_key,
465
                constants.SSL_CERT_FILE, constants.RAPI_CERT_FILE,
466
                constants.HMAC_CLUSTER_KEY,
467
                constants.DAEMON_UTIL, constants.NODED))
468

    
469
  result = sshrunner.Run(node, 'root', mycommand, batch=False,
470
                         ask_key=ssh_key_check,
471
                         use_cluster_key=False,
472
                         strict_host_check=ssh_key_check)
473
  if result.failed:
474
    raise errors.OpExecError("Remote command on node %s, error: %s,"
475
                             " output: %s" %
476
                             (node, result.fail_reason, result.output))
477

    
478
  _WaitForNodeDaemon(node)
479

    
480

    
481
def MasterFailover(no_voting=False):
482
  """Failover the master node.
483

484
  This checks that we are not already the master, and will cause the
485
  current master to cease being master, and the non-master to become
486
  new master.
487

488
  @type no_voting: boolean
489
  @param no_voting: force the operation without remote nodes agreement
490
                      (dangerous)
491

492
  """
493
  sstore = ssconf.SimpleStore()
494

    
495
  old_master, new_master = ssconf.GetMasterAndMyself(sstore)
496
  node_list = sstore.GetNodeList()
497
  mc_list = sstore.GetMasterCandidates()
498

    
499
  if old_master == new_master:
500
    raise errors.OpPrereqError("This commands must be run on the node"
501
                               " where you want the new master to be."
502
                               " %s is already the master" %
503
                               old_master, errors.ECODE_INVAL)
504

    
505
  if new_master not in mc_list:
506
    mc_no_master = [name for name in mc_list if name != old_master]
507
    raise errors.OpPrereqError("This node is not among the nodes marked"
508
                               " as master candidates. Only these nodes"
509
                               " can become masters. Current list of"
510
                               " master candidates is:\n"
511
                               "%s" % ('\n'.join(mc_no_master)),
512
                               errors.ECODE_STATE)
513

    
514
  if not no_voting:
515
    vote_list = GatherMasterVotes(node_list)
516

    
517
    if vote_list:
518
      voted_master = vote_list[0][0]
519
      if voted_master is None:
520
        raise errors.OpPrereqError("Cluster is inconsistent, most nodes did"
521
                                   " not respond.", errors.ECODE_ENVIRON)
522
      elif voted_master != old_master:
523
        raise errors.OpPrereqError("I have a wrong configuration, I believe"
524
                                   " the master is %s but the other nodes"
525
                                   " voted %s. Please resync the configuration"
526
                                   " of this node." %
527
                                   (old_master, voted_master),
528
                                   errors.ECODE_STATE)
529
  # end checks
530

    
531
  rcode = 0
532

    
533
  logging.info("Setting master to %s, old master: %s", new_master, old_master)
534

    
535
  result = rpc.RpcRunner.call_node_stop_master(old_master, True)
536
  msg = result.fail_msg
537
  if msg:
538
    logging.error("Could not disable the master role on the old master"
539
                 " %s, please disable manually: %s", old_master, msg)
540

    
541
  # Here we have a phase where no master should be running
542

    
543
  # instantiate a real config writer, as we now know we have the
544
  # configuration data
545
  cfg = config.ConfigWriter()
546

    
547
  cluster_info = cfg.GetClusterInfo()
548
  cluster_info.master_node = new_master
549
  # this will also regenerate the ssconf files, since we updated the
550
  # cluster info
551
  cfg.Update(cluster_info, logging.error)
552

    
553
  result = rpc.RpcRunner.call_node_start_master(new_master, True, no_voting)
554
  msg = result.fail_msg
555
  if msg:
556
    logging.error("Could not start the master role on the new master"
557
                  " %s, please check: %s", new_master, msg)
558
    rcode = 1
559

    
560
  return rcode
561

    
562

    
563
def GetMaster():
564
  """Returns the current master node.
565

566
  This is a separate function in bootstrap since it's needed by
567
  gnt-cluster, and instead of importing directly ssconf, it's better
568
  to abstract it in bootstrap, where we do use ssconf in other
569
  functions too.
570

571
  """
572
  sstore = ssconf.SimpleStore()
573

    
574
  old_master, _ = ssconf.GetMasterAndMyself(sstore)
575

    
576
  return old_master
577

    
578

    
579
def GatherMasterVotes(node_list):
580
  """Check the agreement on who is the master.
581

582
  This function will return a list of (node, number of votes), ordered
583
  by the number of votes. Errors will be denoted by the key 'None'.
584

585
  Note that the sum of votes is the number of nodes this machine
586
  knows, whereas the number of entries in the list could be different
587
  (if some nodes vote for another master).
588

589
  We remove ourselves from the list since we know that (bugs aside)
590
  since we use the same source for configuration information for both
591
  backend and boostrap, we'll always vote for ourselves.
592

593
  @type node_list: list
594
  @param node_list: the list of nodes to query for master info; the current
595
      node will be removed if it is in the list
596
  @rtype: list
597
  @return: list of (node, votes)
598

599
  """
600
  myself = utils.HostInfo().name
601
  try:
602
    node_list.remove(myself)
603
  except ValueError:
604
    pass
605
  if not node_list:
606
    # no nodes left (eventually after removing myself)
607
    return []
608
  results = rpc.RpcRunner.call_master_info(node_list)
609
  if not isinstance(results, dict):
610
    # this should not happen (unless internal error in rpc)
611
    logging.critical("Can't complete rpc call, aborting master startup")
612
    return [(None, len(node_list))]
613
  votes = {}
614
  for node in results:
615
    nres = results[node]
616
    data = nres.payload
617
    msg = nres.fail_msg
618
    fail = False
619
    if msg:
620
      logging.warning("Error contacting node %s: %s", node, msg)
621
      fail = True
622
    elif not isinstance(data, (tuple, list)) or len(data) < 3:
623
      logging.warning("Invalid data received from node %s: %s", node, data)
624
      fail = True
625
    if fail:
626
      if None not in votes:
627
        votes[None] = 0
628
      votes[None] += 1
629
      continue
630
    master_node = data[2]
631
    if master_node not in votes:
632
      votes[master_node] = 0
633
    votes[master_node] += 1
634

    
635
  vote_list = [v for v in votes.items()]
636
  # sort first on number of votes then on name, since we want None
637
  # sorted later if we have the half of the nodes not responding, and
638
  # half voting all for the same master
639
  vote_list.sort(key=lambda x: (x[1], x[0]), reverse=True)
640

    
641
  return vote_list