Statistics
| Branch: | Tag: | Revision:

root / lib / bootstrap.py @ 87622829

History | View | Annotate | Download (15 kB)

1 a0c9f010 Michael Hanselmann
#
2 a0c9f010 Michael Hanselmann
#
3 a0c9f010 Michael Hanselmann
4 a0c9f010 Michael Hanselmann
# Copyright (C) 2006, 2007, 2008 Google Inc.
5 a0c9f010 Michael Hanselmann
#
6 a0c9f010 Michael Hanselmann
# This program is free software; you can redistribute it and/or modify
7 a0c9f010 Michael Hanselmann
# it under the terms of the GNU General Public License as published by
8 a0c9f010 Michael Hanselmann
# the Free Software Foundation; either version 2 of the License, or
9 a0c9f010 Michael Hanselmann
# (at your option) any later version.
10 a0c9f010 Michael Hanselmann
#
11 a0c9f010 Michael Hanselmann
# This program is distributed in the hope that it will be useful, but
12 a0c9f010 Michael Hanselmann
# WITHOUT ANY WARRANTY; without even the implied warranty of
13 a0c9f010 Michael Hanselmann
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14 a0c9f010 Michael Hanselmann
# General Public License for more details.
15 a0c9f010 Michael Hanselmann
#
16 a0c9f010 Michael Hanselmann
# You should have received a copy of the GNU General Public License
17 a0c9f010 Michael Hanselmann
# along with this program; if not, write to the Free Software
18 a0c9f010 Michael Hanselmann
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19 a0c9f010 Michael Hanselmann
# 02110-1301, USA.
20 a0c9f010 Michael Hanselmann
21 a0c9f010 Michael Hanselmann
22 a0c9f010 Michael Hanselmann
"""Functions to bootstrap a new cluster.
23 a0c9f010 Michael Hanselmann

24 a0c9f010 Michael Hanselmann
"""
25 a0c9f010 Michael Hanselmann
26 a0c9f010 Michael Hanselmann
import os
27 a0c9f010 Michael Hanselmann
import os.path
28 a0c9f010 Michael Hanselmann
import sha
29 a0c9f010 Michael Hanselmann
import re
30 b1b6ea87 Iustin Pop
import logging
31 a0c9f010 Michael Hanselmann
32 a0c9f010 Michael Hanselmann
from ganeti import rpc
33 a0c9f010 Michael Hanselmann
from ganeti import ssh
34 a0c9f010 Michael Hanselmann
from ganeti import utils
35 a0c9f010 Michael Hanselmann
from ganeti import errors
36 a0c9f010 Michael Hanselmann
from ganeti import config
37 a0c9f010 Michael Hanselmann
from ganeti import constants
38 b9eeeb02 Michael Hanselmann
from ganeti import objects
39 a0c9f010 Michael Hanselmann
from ganeti import ssconf
40 a0c9f010 Michael Hanselmann
41 a0c9f010 Michael Hanselmann
def _InitSSHSetup(node):
42 a0c9f010 Michael Hanselmann
  """Setup the SSH configuration for the cluster.
43 a0c9f010 Michael Hanselmann

44 a0c9f010 Michael Hanselmann

45 a0c9f010 Michael Hanselmann
  This generates a dsa keypair for root, adds the pub key to the
46 a0c9f010 Michael Hanselmann
  permitted hosts and adds the hostkey to its own known hosts.
47 a0c9f010 Michael Hanselmann

48 a0c9f010 Michael Hanselmann
  Args:
49 a0c9f010 Michael Hanselmann
    node: the name of this host as a fqdn
50 a0c9f010 Michael Hanselmann

51 a0c9f010 Michael Hanselmann
  """
52 a0c9f010 Michael Hanselmann
  priv_key, pub_key, auth_keys = ssh.GetUserFiles(constants.GANETI_RUNAS)
53 a0c9f010 Michael Hanselmann
54 a0c9f010 Michael Hanselmann
  for name in priv_key, pub_key:
55 a0c9f010 Michael Hanselmann
    if os.path.exists(name):
56 a0c9f010 Michael Hanselmann
      utils.CreateBackup(name)
57 a0c9f010 Michael Hanselmann
    utils.RemoveFile(name)
58 a0c9f010 Michael Hanselmann
59 a0c9f010 Michael Hanselmann
  result = utils.RunCmd(["ssh-keygen", "-t", "dsa",
60 a0c9f010 Michael Hanselmann
                         "-f", priv_key,
61 a0c9f010 Michael Hanselmann
                         "-q", "-N", ""])
62 a0c9f010 Michael Hanselmann
  if result.failed:
63 a0c9f010 Michael Hanselmann
    raise errors.OpExecError("Could not generate ssh keypair, error %s" %
64 a0c9f010 Michael Hanselmann
                             result.output)
65 a0c9f010 Michael Hanselmann
66 a0c9f010 Michael Hanselmann
  f = open(pub_key, 'r')
67 a0c9f010 Michael Hanselmann
  try:
68 a0c9f010 Michael Hanselmann
    utils.AddAuthorizedKey(auth_keys, f.read(8192))
69 a0c9f010 Michael Hanselmann
  finally:
70 a0c9f010 Michael Hanselmann
    f.close()
71 a0c9f010 Michael Hanselmann
72 a0c9f010 Michael Hanselmann
73 d23ef431 Michael Hanselmann
def _InitGanetiServerSetup():
74 a0c9f010 Michael Hanselmann
  """Setup the necessary configuration for the initial node daemon.
75 a0c9f010 Michael Hanselmann

76 a0c9f010 Michael Hanselmann
  This creates the nodepass file containing the shared password for
77 a0c9f010 Michael Hanselmann
  the cluster and also generates the SSL certificate.
78 a0c9f010 Michael Hanselmann

79 a0c9f010 Michael Hanselmann
  """
80 a0c9f010 Michael Hanselmann
  result = utils.RunCmd(["openssl", "req", "-new", "-newkey", "rsa:1024",
81 a0c9f010 Michael Hanselmann
                         "-days", str(365*5), "-nodes", "-x509",
82 a0c9f010 Michael Hanselmann
                         "-keyout", constants.SSL_CERT_FILE,
83 a0c9f010 Michael Hanselmann
                         "-out", constants.SSL_CERT_FILE, "-batch"])
84 a0c9f010 Michael Hanselmann
  if result.failed:
85 a0c9f010 Michael Hanselmann
    raise errors.OpExecError("could not generate server ssl cert, command"
86 a0c9f010 Michael Hanselmann
                             " %s had exitcode %s and error message %s" %
87 a0c9f010 Michael Hanselmann
                             (result.cmd, result.exit_code, result.output))
88 a0c9f010 Michael Hanselmann
89 a0c9f010 Michael Hanselmann
  os.chmod(constants.SSL_CERT_FILE, 0400)
90 a0c9f010 Michael Hanselmann
91 a0c9f010 Michael Hanselmann
  result = utils.RunCmd([constants.NODE_INITD_SCRIPT, "restart"])
92 a0c9f010 Michael Hanselmann
93 a0c9f010 Michael Hanselmann
  if result.failed:
94 a0c9f010 Michael Hanselmann
    raise errors.OpExecError("Could not start the node daemon, command %s"
95 a0c9f010 Michael Hanselmann
                             " had exitcode %s and error %s" %
96 a0c9f010 Michael Hanselmann
                             (result.cmd, result.exit_code, result.output))
97 a0c9f010 Michael Hanselmann
98 a0c9f010 Michael Hanselmann
99 4342e89b Alexander Schreiber
def InitCluster(cluster_name, mac_prefix, def_bridge,
100 a0c9f010 Michael Hanselmann
                master_netdev, file_storage_dir,
101 a0c9f010 Michael Hanselmann
                secondary_ip=None,
102 ea3a925f Alexander Schreiber
                vg_name=None, beparams=None, hvparams=None,
103 02691904 Alexander Schreiber
                enabled_hypervisors=None, default_hypervisor=None):
104 a0c9f010 Michael Hanselmann
  """Initialise the cluster.
105 a0c9f010 Michael Hanselmann

106 a0c9f010 Michael Hanselmann
  """
107 a0c9f010 Michael Hanselmann
  if config.ConfigWriter.IsCluster():
108 a0c9f010 Michael Hanselmann
    raise errors.OpPrereqError("Cluster is already initialised")
109 a0c9f010 Michael Hanselmann
110 a0c9f010 Michael Hanselmann
  hostname = utils.HostInfo()
111 a0c9f010 Michael Hanselmann
112 a0c9f010 Michael Hanselmann
  if hostname.ip.startswith("127."):
113 a0c9f010 Michael Hanselmann
    raise errors.OpPrereqError("This host's IP resolves to the private"
114 a0c9f010 Michael Hanselmann
                               " range (%s). Please fix DNS or %s." %
115 a0c9f010 Michael Hanselmann
                               (hostname.ip, constants.ETC_HOSTS))
116 a0c9f010 Michael Hanselmann
117 caad16e2 Iustin Pop
  if not utils.OwnIpAddress(hostname.ip):
118 a0c9f010 Michael Hanselmann
    raise errors.OpPrereqError("Inconsistency: this host's name resolves"
119 a0c9f010 Michael Hanselmann
                               " to %s,\nbut this ip address does not"
120 a0c9f010 Michael Hanselmann
                               " belong to this host."
121 a0c9f010 Michael Hanselmann
                               " Aborting." % hostname.ip)
122 a0c9f010 Michael Hanselmann
123 a0c9f010 Michael Hanselmann
  clustername = utils.HostInfo(cluster_name)
124 a0c9f010 Michael Hanselmann
125 a0c9f010 Michael Hanselmann
  if utils.TcpPing(clustername.ip, constants.DEFAULT_NODED_PORT,
126 a0c9f010 Michael Hanselmann
                   timeout=5):
127 a0c9f010 Michael Hanselmann
    raise errors.OpPrereqError("Cluster IP already active. Aborting.")
128 a0c9f010 Michael Hanselmann
129 a0c9f010 Michael Hanselmann
  if secondary_ip:
130 a0c9f010 Michael Hanselmann
    if not utils.IsValidIP(secondary_ip):
131 a0c9f010 Michael Hanselmann
      raise errors.OpPrereqError("Invalid secondary ip given")
132 a0c9f010 Michael Hanselmann
    if (secondary_ip != hostname.ip and
133 caad16e2 Iustin Pop
        not utils.OwnIpAddress(secondary_ip)):
134 a0c9f010 Michael Hanselmann
      raise errors.OpPrereqError("You gave %s as secondary IP,"
135 a0c9f010 Michael Hanselmann
                                 " but it does not belong to this host." %
136 a0c9f010 Michael Hanselmann
                                 secondary_ip)
137 b9eeeb02 Michael Hanselmann
  else:
138 b9eeeb02 Michael Hanselmann
    secondary_ip = hostname.ip
139 a0c9f010 Michael Hanselmann
140 a0c9f010 Michael Hanselmann
  if vg_name is not None:
141 a0c9f010 Michael Hanselmann
    # Check if volume group is valid
142 a0c9f010 Michael Hanselmann
    vgstatus = utils.CheckVolumeGroupSize(utils.ListVolumeGroups(), vg_name,
143 a0c9f010 Michael Hanselmann
                                          constants.MIN_VG_SIZE)
144 a0c9f010 Michael Hanselmann
    if vgstatus:
145 a0c9f010 Michael Hanselmann
      raise errors.OpPrereqError("Error: %s\nspecify --no-lvm-storage if"
146 a0c9f010 Michael Hanselmann
                                 " you are not using lvm" % vgstatus)
147 a0c9f010 Michael Hanselmann
148 a0c9f010 Michael Hanselmann
  file_storage_dir = os.path.normpath(file_storage_dir)
149 a0c9f010 Michael Hanselmann
150 a0c9f010 Michael Hanselmann
  if not os.path.isabs(file_storage_dir):
151 a0c9f010 Michael Hanselmann
    raise errors.OpPrereqError("The file storage directory you passed is"
152 a0c9f010 Michael Hanselmann
                               " not an absolute path.")
153 a0c9f010 Michael Hanselmann
154 a0c9f010 Michael Hanselmann
  if not os.path.exists(file_storage_dir):
155 a0c9f010 Michael Hanselmann
    try:
156 a0c9f010 Michael Hanselmann
      os.makedirs(file_storage_dir, 0750)
157 a0c9f010 Michael Hanselmann
    except OSError, err:
158 a0c9f010 Michael Hanselmann
      raise errors.OpPrereqError("Cannot create file storage directory"
159 a0c9f010 Michael Hanselmann
                                 " '%s': %s" %
160 a0c9f010 Michael Hanselmann
                                 (file_storage_dir, err))
161 a0c9f010 Michael Hanselmann
162 a0c9f010 Michael Hanselmann
  if not os.path.isdir(file_storage_dir):
163 a0c9f010 Michael Hanselmann
    raise errors.OpPrereqError("The file storage directory '%s' is not"
164 a0c9f010 Michael Hanselmann
                               " a directory." % file_storage_dir)
165 a0c9f010 Michael Hanselmann
166 a0c9f010 Michael Hanselmann
  if not re.match("^[0-9a-z]{2}:[0-9a-z]{2}:[0-9a-z]{2}$", mac_prefix):
167 a0c9f010 Michael Hanselmann
    raise errors.OpPrereqError("Invalid mac prefix given '%s'" % mac_prefix)
168 a0c9f010 Michael Hanselmann
169 a0c9f010 Michael Hanselmann
  result = utils.RunCmd(["ip", "link", "show", "dev", master_netdev])
170 a0c9f010 Michael Hanselmann
  if result.failed:
171 a0c9f010 Michael Hanselmann
    raise errors.OpPrereqError("Invalid master netdev given (%s): '%s'" %
172 a0c9f010 Michael Hanselmann
                               (master_netdev,
173 a0c9f010 Michael Hanselmann
                                result.output.strip()))
174 a0c9f010 Michael Hanselmann
175 a0c9f010 Michael Hanselmann
  if not (os.path.isfile(constants.NODE_INITD_SCRIPT) and
176 a0c9f010 Michael Hanselmann
          os.access(constants.NODE_INITD_SCRIPT, os.X_OK)):
177 a0c9f010 Michael Hanselmann
    raise errors.OpPrereqError("Init.d script '%s' missing or not"
178 a0c9f010 Michael Hanselmann
                               " executable." % constants.NODE_INITD_SCRIPT)
179 a0c9f010 Michael Hanselmann
180 a0c9f010 Michael Hanselmann
  # set up the inter-node password and certificate
181 d23ef431 Michael Hanselmann
  _InitGanetiServerSetup()
182 a0c9f010 Michael Hanselmann
183 a0c9f010 Michael Hanselmann
  # set up ssh config and /etc/hosts
184 a0c9f010 Michael Hanselmann
  f = open(constants.SSH_HOST_RSA_PUB, 'r')
185 a0c9f010 Michael Hanselmann
  try:
186 a0c9f010 Michael Hanselmann
    sshline = f.read()
187 a0c9f010 Michael Hanselmann
  finally:
188 a0c9f010 Michael Hanselmann
    f.close()
189 a0c9f010 Michael Hanselmann
  sshkey = sshline.split(" ")[1]
190 a0c9f010 Michael Hanselmann
191 a0c9f010 Michael Hanselmann
  utils.AddHostToEtcHosts(hostname.name)
192 a0c9f010 Michael Hanselmann
  _InitSSHSetup(hostname.name)
193 a0c9f010 Michael Hanselmann
194 a0c9f010 Michael Hanselmann
  # init of cluster config file
195 b9eeeb02 Michael Hanselmann
  cluster_config = objects.Cluster(
196 b9eeeb02 Michael Hanselmann
    serial_no=1,
197 b9eeeb02 Michael Hanselmann
    rsahostkeypub=sshkey,
198 b9eeeb02 Michael Hanselmann
    highest_used_port=(constants.FIRST_DRBD_PORT - 1),
199 b9eeeb02 Michael Hanselmann
    mac_prefix=mac_prefix,
200 b9eeeb02 Michael Hanselmann
    volume_group_name=vg_name,
201 b9eeeb02 Michael Hanselmann
    default_bridge=def_bridge,
202 b9eeeb02 Michael Hanselmann
    tcpudp_port_pool=set(),
203 f6bd6e98 Michael Hanselmann
    master_node=hostname.name,
204 f6bd6e98 Michael Hanselmann
    master_ip=clustername.ip,
205 f6bd6e98 Michael Hanselmann
    master_netdev=master_netdev,
206 f6bd6e98 Michael Hanselmann
    cluster_name=clustername.name,
207 f6bd6e98 Michael Hanselmann
    file_storage_dir=file_storage_dir,
208 ea3a925f Alexander Schreiber
    enabled_hypervisors=enabled_hypervisors,
209 02691904 Alexander Schreiber
    default_hypervisor=default_hypervisor,
210 ea3a925f Alexander Schreiber
    beparams={constants.BEGR_DEFAULT: beparams},
211 ea3a925f Alexander Schreiber
    hvparams=hvparams,
212 b9eeeb02 Michael Hanselmann
    )
213 b9eeeb02 Michael Hanselmann
  master_node_config = objects.Node(name=hostname.name,
214 b9eeeb02 Michael Hanselmann
                                    primary_ip=hostname.ip,
215 b9eeeb02 Michael Hanselmann
                                    secondary_ip=secondary_ip)
216 a0c9f010 Michael Hanselmann
217 02f99608 Oleksiy Mishchenko
  cfg = InitConfig(constants.CONFIG_VERSION,
218 02f99608 Oleksiy Mishchenko
                   cluster_config, master_node_config)
219 7688d0d3 Michael Hanselmann
  ssh.WriteKnownHostsFile(cfg, constants.SSH_KNOWN_HOSTS_FILE)
220 827f753e Guido Trotter
221 b3f1cf6f Iustin Pop
  # start the master ip
222 b3f1cf6f Iustin Pop
  # TODO: Review rpc call from bootstrap
223 fda5f19f Michael Hanselmann
  rpc.RpcRunner.call_node_start_master(hostname.name, True)
224 b3f1cf6f Iustin Pop
225 b1b6ea87 Iustin Pop
226 02f99608 Oleksiy Mishchenko
def InitConfig(version, cluster_config, master_node_config,
227 02f99608 Oleksiy Mishchenko
               cfg_file=constants.CLUSTER_CONF_FILE):
228 7b3a8fb5 Iustin Pop
  """Create the initial cluster configuration.
229 7b3a8fb5 Iustin Pop

230 7b3a8fb5 Iustin Pop
  It will contain the current node, which will also be the master
231 7b3a8fb5 Iustin Pop
  node, and no instances.
232 7b3a8fb5 Iustin Pop

233 7b3a8fb5 Iustin Pop
  @type version: int
234 7b3a8fb5 Iustin Pop
  @param version: Configuration version
235 7b3a8fb5 Iustin Pop
  @type cluster_config: objects.Cluster
236 7b3a8fb5 Iustin Pop
  @param cluster_config: Cluster configuration
237 7b3a8fb5 Iustin Pop
  @type master_node_config: objects.Node
238 7b3a8fb5 Iustin Pop
  @param master_node_config: Master node configuration
239 7b3a8fb5 Iustin Pop
  @type file_name: string
240 7b3a8fb5 Iustin Pop
  @param file_name: Configuration file path
241 7b3a8fb5 Iustin Pop

242 7b3a8fb5 Iustin Pop
  @rtype: ssconf.SimpleConfigWriter
243 7b3a8fb5 Iustin Pop
  @returns: Initialized config instance
244 7b3a8fb5 Iustin Pop

245 7b3a8fb5 Iustin Pop
  """
246 7b3a8fb5 Iustin Pop
  nodes = {
247 7b3a8fb5 Iustin Pop
    master_node_config.name: master_node_config,
248 7b3a8fb5 Iustin Pop
    }
249 7b3a8fb5 Iustin Pop
250 7b3a8fb5 Iustin Pop
  config_data = objects.ConfigData(version=version,
251 7b3a8fb5 Iustin Pop
                                   cluster=cluster_config,
252 7b3a8fb5 Iustin Pop
                                   nodes=nodes,
253 7b3a8fb5 Iustin Pop
                                   instances={},
254 7b3a8fb5 Iustin Pop
                                   serial_no=1)
255 7b3a8fb5 Iustin Pop
  cfg = ssconf.SimpleConfigWriter.FromDict(config_data.ToDict(), cfg_file)
256 7b3a8fb5 Iustin Pop
  cfg.Save()
257 7b3a8fb5 Iustin Pop
258 7b3a8fb5 Iustin Pop
  return cfg
259 02f99608 Oleksiy Mishchenko
260 02f99608 Oleksiy Mishchenko
261 140aa4a8 Iustin Pop
def FinalizeClusterDestroy(master):
262 140aa4a8 Iustin Pop
  """Execute the last steps of cluster destroy
263 140aa4a8 Iustin Pop

264 140aa4a8 Iustin Pop
  This function shuts down all the daemons, completing the destroy
265 140aa4a8 Iustin Pop
  begun in cmdlib.LUDestroyOpcode.
266 140aa4a8 Iustin Pop

267 140aa4a8 Iustin Pop
  """
268 fda5f19f Michael Hanselmann
  if not rpc.RpcRunner.call_node_stop_master(master, True):
269 140aa4a8 Iustin Pop
    logging.warning("Could not disable the master role")
270 fda5f19f Michael Hanselmann
  if not rpc.RpcRunner.call_node_leave_cluster(master):
271 140aa4a8 Iustin Pop
    logging.warning("Could not shutdown the node daemon and cleanup the node")
272 140aa4a8 Iustin Pop
273 140aa4a8 Iustin Pop
274 87622829 Iustin Pop
def SetupNodeDaemon(cluster_name, node, ssh_key_check):
275 827f753e Guido Trotter
  """Add a node to the cluster.
276 827f753e Guido Trotter

277 b1b6ea87 Iustin Pop
  This function must be called before the actual opcode, and will ssh
278 b1b6ea87 Iustin Pop
  to the remote node, copy the needed files, and start ganeti-noded,
279 b1b6ea87 Iustin Pop
  allowing the master to do the rest via normal rpc calls.
280 827f753e Guido Trotter

281 87622829 Iustin Pop
  @param cluster_name: the cluster name
282 87622829 Iustin Pop
  @param node: the name of the new node
283 87622829 Iustin Pop
  @param ssh_key_check: whether to do a strict key check
284 827f753e Guido Trotter

285 827f753e Guido Trotter
  """
286 87622829 Iustin Pop
  sshrunner = ssh.SshRunner(cluster_name)
287 8049a1d7 Michael Hanselmann
  gntpem = utils.ReadFile(constants.SSL_CERT_FILE)
288 827f753e Guido Trotter
  # in the base64 pem encoding, neither '!' nor '.' are valid chars,
289 827f753e Guido Trotter
  # so we use this to detect an invalid certificate; as long as the
290 827f753e Guido Trotter
  # cert doesn't contain this, the here-document will be correctly
291 827f753e Guido Trotter
  # parsed by the shell sequence below
292 827f753e Guido Trotter
  if re.search('^!EOF\.', gntpem, re.MULTILINE):
293 827f753e Guido Trotter
    raise errors.OpExecError("invalid PEM encoding in the SSL certificate")
294 827f753e Guido Trotter
  if not gntpem.endswith("\n"):
295 827f753e Guido Trotter
    raise errors.OpExecError("PEM must end with newline")
296 827f753e Guido Trotter
297 827f753e Guido Trotter
  # set up inter-node password and certificate and restarts the node daemon
298 827f753e Guido Trotter
  # and then connect with ssh to set password and start ganeti-noded
299 827f753e Guido Trotter
  # note that all the below variables are sanitized at this point,
300 827f753e Guido Trotter
  # either by being constants or by the checks above
301 827f753e Guido Trotter
  mycommand = ("umask 077 && "
302 827f753e Guido Trotter
               "cat > '%s' << '!EOF.' && \n"
303 827f753e Guido Trotter
               "%s!EOF.\n%s restart" %
304 ec17d09c Michael Hanselmann
               (constants.SSL_CERT_FILE, gntpem,
305 827f753e Guido Trotter
                constants.NODE_INITD_SCRIPT))
306 827f753e Guido Trotter
307 c4b6c29c Michael Hanselmann
  result = sshrunner.Run(node, 'root', mycommand, batch=False,
308 c4b6c29c Michael Hanselmann
                         ask_key=ssh_key_check,
309 c4b6c29c Michael Hanselmann
                         use_cluster_key=False,
310 c4b6c29c Michael Hanselmann
                         strict_host_check=ssh_key_check)
311 827f753e Guido Trotter
  if result.failed:
312 827f753e Guido Trotter
    raise errors.OpExecError("Remote command on node %s, error: %s,"
313 827f753e Guido Trotter
                             " output: %s" %
314 827f753e Guido Trotter
                             (node, result.fail_reason, result.output))
315 827f753e Guido Trotter
316 b1b6ea87 Iustin Pop
317 b1b6ea87 Iustin Pop
def MasterFailover():
318 b1b6ea87 Iustin Pop
  """Failover the master node.
319 b1b6ea87 Iustin Pop

320 b1b6ea87 Iustin Pop
  This checks that we are not already the master, and will cause the
321 b1b6ea87 Iustin Pop
  current master to cease being master, and the non-master to become
322 b1b6ea87 Iustin Pop
  new master.
323 b1b6ea87 Iustin Pop

324 b1b6ea87 Iustin Pop
  """
325 d23ef431 Michael Hanselmann
  cfg = ssconf.SimpleConfigWriter()
326 b1b6ea87 Iustin Pop
327 b1b6ea87 Iustin Pop
  new_master = utils.HostInfo().name
328 d23ef431 Michael Hanselmann
  old_master = cfg.GetMasterNode()
329 d5927e48 Iustin Pop
  node_list = cfg.GetNodeList()
330 b1b6ea87 Iustin Pop
331 b1b6ea87 Iustin Pop
  if old_master == new_master:
332 b1b6ea87 Iustin Pop
    raise errors.OpPrereqError("This commands must be run on the node"
333 b1b6ea87 Iustin Pop
                               " where you want the new master to be."
334 b1b6ea87 Iustin Pop
                               " %s is already the master" %
335 b1b6ea87 Iustin Pop
                               old_master)
336 d5927e48 Iustin Pop
337 d5927e48 Iustin Pop
  vote_list = GatherMasterVotes(node_list)
338 d5927e48 Iustin Pop
339 d5927e48 Iustin Pop
  if vote_list:
340 d5927e48 Iustin Pop
    voted_master = vote_list[0][0]
341 d5927e48 Iustin Pop
    if voted_master is None:
342 d5927e48 Iustin Pop
      raise errors.OpPrereqError("Cluster is inconsistent, most nodes did not"
343 d5927e48 Iustin Pop
                                 " respond.")
344 d5927e48 Iustin Pop
    elif voted_master != old_master:
345 d5927e48 Iustin Pop
      raise errors.OpPrereqError("I have wrong configuration, I believe the"
346 d5927e48 Iustin Pop
                                 " master is %s but the other nodes voted for"
347 d5927e48 Iustin Pop
                                 " %s. Please resync the configuration of"
348 d5927e48 Iustin Pop
                                 " this node." % (old_master, voted_master))
349 b1b6ea87 Iustin Pop
  # end checks
350 b1b6ea87 Iustin Pop
351 b1b6ea87 Iustin Pop
  rcode = 0
352 b1b6ea87 Iustin Pop
353 d5927e48 Iustin Pop
  logging.info("Setting master to %s, old master: %s", new_master, old_master)
354 b1b6ea87 Iustin Pop
355 fda5f19f Michael Hanselmann
  if not rpc.RpcRunner.call_node_stop_master(old_master, True):
356 d5927e48 Iustin Pop
    logging.error("Could not disable the master role on the old master"
357 b1b6ea87 Iustin Pop
                 " %s, please disable manually", old_master)
358 b1b6ea87 Iustin Pop
359 d23ef431 Michael Hanselmann
  cfg.SetMasterNode(new_master)
360 d23ef431 Michael Hanselmann
  cfg.Save()
361 b1b6ea87 Iustin Pop
362 d23ef431 Michael Hanselmann
  # Here we have a phase where no master should be running
363 b1b6ea87 Iustin Pop
364 fda5f19f Michael Hanselmann
  if not rpc.RpcRunner.call_upload_file(cfg.GetNodeList(),
365 72737a7f Iustin Pop
                                    constants.CLUSTER_CONF_FILE):
366 d5927e48 Iustin Pop
    logging.error("Could not distribute the new configuration"
367 3b9e6a30 Iustin Pop
                  " to the other nodes, please check.")
368 b1b6ea87 Iustin Pop
369 d5927e48 Iustin Pop
370 fda5f19f Michael Hanselmann
  if not rpc.RpcRunner.call_node_start_master(new_master, True):
371 d5927e48 Iustin Pop
    logging.error("Could not start the master role on the new master"
372 b1b6ea87 Iustin Pop
                  " %s, please check", new_master)
373 b1b6ea87 Iustin Pop
    rcode = 1
374 b1b6ea87 Iustin Pop
375 b1b6ea87 Iustin Pop
  return rcode
376 d7cdb55d Iustin Pop
377 d7cdb55d Iustin Pop
378 d7cdb55d Iustin Pop
def GatherMasterVotes(node_list):
379 d7cdb55d Iustin Pop
  """Check the agreement on who is the master.
380 d7cdb55d Iustin Pop

381 d7cdb55d Iustin Pop
  This function will return a list of (node, number of votes), ordered
382 d7cdb55d Iustin Pop
  by the number of votes. Errors will be denoted by the key 'None'.
383 d7cdb55d Iustin Pop

384 d7cdb55d Iustin Pop
  Note that the sum of votes is the number of nodes this machine
385 d7cdb55d Iustin Pop
  knows, whereas the number of entries in the list could be different
386 d7cdb55d Iustin Pop
  (if some nodes vote for another master).
387 d7cdb55d Iustin Pop

388 d7cdb55d Iustin Pop
  We remove ourselves from the list since we know that (bugs aside)
389 d7cdb55d Iustin Pop
  since we use the same source for configuration information for both
390 d7cdb55d Iustin Pop
  backend and boostrap, we'll always vote for ourselves.
391 d7cdb55d Iustin Pop

392 d7cdb55d Iustin Pop
  @type node_list: list
393 d7cdb55d Iustin Pop
  @param node_list: the list of nodes to query for master info; the current
394 d7cdb55d Iustin Pop
      node wil be removed if it is in the list
395 d7cdb55d Iustin Pop
  @rtype: list
396 d7cdb55d Iustin Pop
  @return: list of (node, votes)
397 d7cdb55d Iustin Pop

398 d7cdb55d Iustin Pop
  """
399 d7cdb55d Iustin Pop
  myself = utils.HostInfo().name
400 d7cdb55d Iustin Pop
  try:
401 d7cdb55d Iustin Pop
    node_list.remove(myself)
402 d7cdb55d Iustin Pop
  except ValueError:
403 d7cdb55d Iustin Pop
    pass
404 d7cdb55d Iustin Pop
  if not node_list:
405 d7cdb55d Iustin Pop
    # no nodes left (eventually after removing myself)
406 d7cdb55d Iustin Pop
    return []
407 d7cdb55d Iustin Pop
  results = rpc.RpcRunner.call_master_info(node_list)
408 d7cdb55d Iustin Pop
  if not isinstance(results, dict):
409 d7cdb55d Iustin Pop
    # this should not happen (unless internal error in rpc)
410 d7cdb55d Iustin Pop
    logging.critical("Can't complete rpc call, aborting master startup")
411 d7cdb55d Iustin Pop
    return [(None, len(node_list))]
412 d7cdb55d Iustin Pop
  positive = negative = 0
413 d7cdb55d Iustin Pop
  other_masters = {}
414 d7cdb55d Iustin Pop
  votes = {}
415 d7cdb55d Iustin Pop
  for node in results:
416 d7cdb55d Iustin Pop
    if not isinstance(results[node], (tuple, list)) or len(results[node]) < 3:
417 d7cdb55d Iustin Pop
      # here the rpc layer should have already logged errors
418 d7cdb55d Iustin Pop
      if None not in votes:
419 d7cdb55d Iustin Pop
        votes[None] = 0
420 d7cdb55d Iustin Pop
      votes[None] += 1
421 d7cdb55d Iustin Pop
      continue
422 d7cdb55d Iustin Pop
    master_node = results[node][2]
423 d7cdb55d Iustin Pop
    if master_node not in votes:
424 d7cdb55d Iustin Pop
      votes[master_node] = 0
425 d7cdb55d Iustin Pop
    votes[master_node] += 1
426 d7cdb55d Iustin Pop
427 d7cdb55d Iustin Pop
  vote_list = [v for v in votes.items()]
428 d7cdb55d Iustin Pop
  # sort first on number of votes then on name, since we want None
429 d7cdb55d Iustin Pop
  # sorted later if we have the half of the nodes not responding, and
430 d7cdb55d Iustin Pop
  # half voting all for the same master
431 d7cdb55d Iustin Pop
  vote_list.sort(key=lambda x: (x[1], x[0]), reverse=True)
432 d7cdb55d Iustin Pop
433 d7cdb55d Iustin Pop
  return vote_list