Statistics
| Branch: | Tag: | Revision:

root / lib / bootstrap.py @ 6b93ec9d

History | View | Annotate | Download (17.1 kB)

1 a0c9f010 Michael Hanselmann
#
2 a0c9f010 Michael Hanselmann
#
3 a0c9f010 Michael Hanselmann
4 a0c9f010 Michael Hanselmann
# Copyright (C) 2006, 2007, 2008 Google Inc.
5 a0c9f010 Michael Hanselmann
#
6 a0c9f010 Michael Hanselmann
# This program is free software; you can redistribute it and/or modify
7 a0c9f010 Michael Hanselmann
# it under the terms of the GNU General Public License as published by
8 a0c9f010 Michael Hanselmann
# the Free Software Foundation; either version 2 of the License, or
9 a0c9f010 Michael Hanselmann
# (at your option) any later version.
10 a0c9f010 Michael Hanselmann
#
11 a0c9f010 Michael Hanselmann
# This program is distributed in the hope that it will be useful, but
12 a0c9f010 Michael Hanselmann
# WITHOUT ANY WARRANTY; without even the implied warranty of
13 a0c9f010 Michael Hanselmann
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14 a0c9f010 Michael Hanselmann
# General Public License for more details.
15 a0c9f010 Michael Hanselmann
#
16 a0c9f010 Michael Hanselmann
# You should have received a copy of the GNU General Public License
17 a0c9f010 Michael Hanselmann
# along with this program; if not, write to the Free Software
18 a0c9f010 Michael Hanselmann
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19 a0c9f010 Michael Hanselmann
# 02110-1301, USA.
20 a0c9f010 Michael Hanselmann
21 a0c9f010 Michael Hanselmann
22 a0c9f010 Michael Hanselmann
"""Functions to bootstrap a new cluster.
23 a0c9f010 Michael Hanselmann

24 a0c9f010 Michael Hanselmann
"""
25 a0c9f010 Michael Hanselmann
26 a0c9f010 Michael Hanselmann
import os
27 a0c9f010 Michael Hanselmann
import os.path
28 a0c9f010 Michael Hanselmann
import sha
29 a0c9f010 Michael Hanselmann
import re
30 b1b6ea87 Iustin Pop
import logging
31 c4415fd5 Michael Hanselmann
import tempfile
32 a0c9f010 Michael Hanselmann
33 a0c9f010 Michael Hanselmann
from ganeti import rpc
34 a0c9f010 Michael Hanselmann
from ganeti import ssh
35 a0c9f010 Michael Hanselmann
from ganeti import utils
36 a0c9f010 Michael Hanselmann
from ganeti import errors
37 a0c9f010 Michael Hanselmann
from ganeti import config
38 a0c9f010 Michael Hanselmann
from ganeti import constants
39 b9eeeb02 Michael Hanselmann
from ganeti import objects
40 a0c9f010 Michael Hanselmann
from ganeti import ssconf
41 a0c9f010 Michael Hanselmann
42 e38220e4 Michael Hanselmann
43 531baf8e Iustin Pop
def _InitSSHSetup():
44 a0c9f010 Michael Hanselmann
  """Setup the SSH configuration for the cluster.
45 a0c9f010 Michael Hanselmann

46 a0c9f010 Michael Hanselmann
  This generates a dsa keypair for root, adds the pub key to the
47 a0c9f010 Michael Hanselmann
  permitted hosts and adds the hostkey to its own known hosts.
48 a0c9f010 Michael Hanselmann

49 a0c9f010 Michael Hanselmann
  """
50 a0c9f010 Michael Hanselmann
  priv_key, pub_key, auth_keys = ssh.GetUserFiles(constants.GANETI_RUNAS)
51 a0c9f010 Michael Hanselmann
52 a0c9f010 Michael Hanselmann
  for name in priv_key, pub_key:
53 a0c9f010 Michael Hanselmann
    if os.path.exists(name):
54 a0c9f010 Michael Hanselmann
      utils.CreateBackup(name)
55 a0c9f010 Michael Hanselmann
    utils.RemoveFile(name)
56 a0c9f010 Michael Hanselmann
57 a0c9f010 Michael Hanselmann
  result = utils.RunCmd(["ssh-keygen", "-t", "dsa",
58 a0c9f010 Michael Hanselmann
                         "-f", priv_key,
59 a0c9f010 Michael Hanselmann
                         "-q", "-N", ""])
60 a0c9f010 Michael Hanselmann
  if result.failed:
61 a0c9f010 Michael Hanselmann
    raise errors.OpExecError("Could not generate ssh keypair, error %s" %
62 a0c9f010 Michael Hanselmann
                             result.output)
63 a0c9f010 Michael Hanselmann
64 a0c9f010 Michael Hanselmann
  f = open(pub_key, 'r')
65 a0c9f010 Michael Hanselmann
  try:
66 a0c9f010 Michael Hanselmann
    utils.AddAuthorizedKey(auth_keys, f.read(8192))
67 a0c9f010 Michael Hanselmann
  finally:
68 a0c9f010 Michael Hanselmann
    f.close()
69 a0c9f010 Michael Hanselmann
70 a0c9f010 Michael Hanselmann
71 40a97d80 Michael Hanselmann
def _GenerateSelfSignedSslCert(file_name, validity=(365 * 5)):
72 40a97d80 Michael Hanselmann
  """Generates a self-signed SSL certificate.
73 a0c9f010 Michael Hanselmann

74 40a97d80 Michael Hanselmann
  @type file_name: str
75 40a97d80 Michael Hanselmann
  @param file_name: Path to output file
76 40a97d80 Michael Hanselmann
  @type validity: int
77 40a97d80 Michael Hanselmann
  @param validity: Validity for certificate in days
78 a0c9f010 Michael Hanselmann

79 a0c9f010 Michael Hanselmann
  """
80 c4415fd5 Michael Hanselmann
  (fd, tmp_file_name) = tempfile.mkstemp(dir=os.path.dirname(file_name))
81 c4415fd5 Michael Hanselmann
  try:
82 c4415fd5 Michael Hanselmann
    # Set permissions before writing key
83 c4415fd5 Michael Hanselmann
    os.chmod(tmp_file_name, 0600)
84 c4415fd5 Michael Hanselmann
85 c4415fd5 Michael Hanselmann
    result = utils.RunCmd(["openssl", "req", "-new", "-newkey", "rsa:1024",
86 c4415fd5 Michael Hanselmann
                           "-days", str(validity), "-nodes", "-x509",
87 c4415fd5 Michael Hanselmann
                           "-keyout", tmp_file_name, "-out", tmp_file_name,
88 c4415fd5 Michael Hanselmann
                           "-batch"])
89 c4415fd5 Michael Hanselmann
    if result.failed:
90 c4415fd5 Michael Hanselmann
      raise errors.OpExecError("Could not generate SSL certificate, command"
91 c4415fd5 Michael Hanselmann
                               " %s had exitcode %s and error message %s" %
92 c4415fd5 Michael Hanselmann
                               (result.cmd, result.exit_code, result.output))
93 c4415fd5 Michael Hanselmann
94 c4415fd5 Michael Hanselmann
    # Make read-only
95 c4415fd5 Michael Hanselmann
    os.chmod(tmp_file_name, 0400)
96 c4415fd5 Michael Hanselmann
97 c4415fd5 Michael Hanselmann
    os.rename(tmp_file_name, file_name)
98 c4415fd5 Michael Hanselmann
  finally:
99 c4415fd5 Michael Hanselmann
    utils.RemoveFile(tmp_file_name)
100 40a97d80 Michael Hanselmann
101 40a97d80 Michael Hanselmann
102 40a97d80 Michael Hanselmann
def _InitGanetiServerSetup():
103 40a97d80 Michael Hanselmann
  """Setup the necessary configuration for the initial node daemon.
104 40a97d80 Michael Hanselmann

105 40a97d80 Michael Hanselmann
  This creates the nodepass file containing the shared password for
106 40a97d80 Michael Hanselmann
  the cluster and also generates the SSL certificate.
107 40a97d80 Michael Hanselmann

108 40a97d80 Michael Hanselmann
  """
109 40a97d80 Michael Hanselmann
  _GenerateSelfSignedSslCert(constants.SSL_CERT_FILE)
110 a0c9f010 Michael Hanselmann
111 61a08fa3 Michael Hanselmann
  # Don't overwrite existing file
112 61a08fa3 Michael Hanselmann
  if not os.path.exists(constants.RAPI_CERT_FILE):
113 61a08fa3 Michael Hanselmann
    _GenerateSelfSignedSslCert(constants.RAPI_CERT_FILE)
114 61a08fa3 Michael Hanselmann
115 a0c9f010 Michael Hanselmann
  result = utils.RunCmd([constants.NODE_INITD_SCRIPT, "restart"])
116 a0c9f010 Michael Hanselmann
117 a0c9f010 Michael Hanselmann
  if result.failed:
118 a0c9f010 Michael Hanselmann
    raise errors.OpExecError("Could not start the node daemon, command %s"
119 a0c9f010 Michael Hanselmann
                             " had exitcode %s and error %s" %
120 a0c9f010 Michael Hanselmann
                             (result.cmd, result.exit_code, result.output))
121 a0c9f010 Michael Hanselmann
122 a0c9f010 Michael Hanselmann
123 4342e89b Alexander Schreiber
def InitCluster(cluster_name, mac_prefix, def_bridge,
124 ce735215 Guido Trotter
                master_netdev, file_storage_dir, candidate_pool_size,
125 ce735215 Guido Trotter
                secondary_ip=None, vg_name=None, beparams=None, hvparams=None,
126 02691904 Alexander Schreiber
                enabled_hypervisors=None, default_hypervisor=None):
127 a0c9f010 Michael Hanselmann
  """Initialise the cluster.
128 a0c9f010 Michael Hanselmann

129 ce735215 Guido Trotter
  @type candidate_pool_size: int
130 ce735215 Guido Trotter
  @param candidate_pool_size: master candidate pool size
131 ce735215 Guido Trotter

132 a0c9f010 Michael Hanselmann
  """
133 ce735215 Guido Trotter
  # TODO: complete the docstring
134 a0c9f010 Michael Hanselmann
  if config.ConfigWriter.IsCluster():
135 a0c9f010 Michael Hanselmann
    raise errors.OpPrereqError("Cluster is already initialised")
136 a0c9f010 Michael Hanselmann
137 a0c9f010 Michael Hanselmann
  hostname = utils.HostInfo()
138 a0c9f010 Michael Hanselmann
139 a0c9f010 Michael Hanselmann
  if hostname.ip.startswith("127."):
140 a0c9f010 Michael Hanselmann
    raise errors.OpPrereqError("This host's IP resolves to the private"
141 a0c9f010 Michael Hanselmann
                               " range (%s). Please fix DNS or %s." %
142 a0c9f010 Michael Hanselmann
                               (hostname.ip, constants.ETC_HOSTS))
143 a0c9f010 Michael Hanselmann
144 caad16e2 Iustin Pop
  if not utils.OwnIpAddress(hostname.ip):
145 a0c9f010 Michael Hanselmann
    raise errors.OpPrereqError("Inconsistency: this host's name resolves"
146 a0c9f010 Michael Hanselmann
                               " to %s,\nbut this ip address does not"
147 a0c9f010 Michael Hanselmann
                               " belong to this host."
148 a0c9f010 Michael Hanselmann
                               " Aborting." % hostname.ip)
149 a0c9f010 Michael Hanselmann
150 a0c9f010 Michael Hanselmann
  clustername = utils.HostInfo(cluster_name)
151 a0c9f010 Michael Hanselmann
152 a0c9f010 Michael Hanselmann
  if utils.TcpPing(clustername.ip, constants.DEFAULT_NODED_PORT,
153 a0c9f010 Michael Hanselmann
                   timeout=5):
154 a0c9f010 Michael Hanselmann
    raise errors.OpPrereqError("Cluster IP already active. Aborting.")
155 a0c9f010 Michael Hanselmann
156 a0c9f010 Michael Hanselmann
  if secondary_ip:
157 a0c9f010 Michael Hanselmann
    if not utils.IsValidIP(secondary_ip):
158 a0c9f010 Michael Hanselmann
      raise errors.OpPrereqError("Invalid secondary ip given")
159 a0c9f010 Michael Hanselmann
    if (secondary_ip != hostname.ip and
160 caad16e2 Iustin Pop
        not utils.OwnIpAddress(secondary_ip)):
161 a0c9f010 Michael Hanselmann
      raise errors.OpPrereqError("You gave %s as secondary IP,"
162 a0c9f010 Michael Hanselmann
                                 " but it does not belong to this host." %
163 a0c9f010 Michael Hanselmann
                                 secondary_ip)
164 b9eeeb02 Michael Hanselmann
  else:
165 b9eeeb02 Michael Hanselmann
    secondary_ip = hostname.ip
166 a0c9f010 Michael Hanselmann
167 a0c9f010 Michael Hanselmann
  if vg_name is not None:
168 a0c9f010 Michael Hanselmann
    # Check if volume group is valid
169 a0c9f010 Michael Hanselmann
    vgstatus = utils.CheckVolumeGroupSize(utils.ListVolumeGroups(), vg_name,
170 a0c9f010 Michael Hanselmann
                                          constants.MIN_VG_SIZE)
171 a0c9f010 Michael Hanselmann
    if vgstatus:
172 a0c9f010 Michael Hanselmann
      raise errors.OpPrereqError("Error: %s\nspecify --no-lvm-storage if"
173 a0c9f010 Michael Hanselmann
                                 " you are not using lvm" % vgstatus)
174 a0c9f010 Michael Hanselmann
175 a0c9f010 Michael Hanselmann
  file_storage_dir = os.path.normpath(file_storage_dir)
176 a0c9f010 Michael Hanselmann
177 a0c9f010 Michael Hanselmann
  if not os.path.isabs(file_storage_dir):
178 a0c9f010 Michael Hanselmann
    raise errors.OpPrereqError("The file storage directory you passed is"
179 a0c9f010 Michael Hanselmann
                               " not an absolute path.")
180 a0c9f010 Michael Hanselmann
181 a0c9f010 Michael Hanselmann
  if not os.path.exists(file_storage_dir):
182 a0c9f010 Michael Hanselmann
    try:
183 a0c9f010 Michael Hanselmann
      os.makedirs(file_storage_dir, 0750)
184 a0c9f010 Michael Hanselmann
    except OSError, err:
185 a0c9f010 Michael Hanselmann
      raise errors.OpPrereqError("Cannot create file storage directory"
186 a0c9f010 Michael Hanselmann
                                 " '%s': %s" %
187 a0c9f010 Michael Hanselmann
                                 (file_storage_dir, err))
188 a0c9f010 Michael Hanselmann
189 a0c9f010 Michael Hanselmann
  if not os.path.isdir(file_storage_dir):
190 a0c9f010 Michael Hanselmann
    raise errors.OpPrereqError("The file storage directory '%s' is not"
191 a0c9f010 Michael Hanselmann
                               " a directory." % file_storage_dir)
192 a0c9f010 Michael Hanselmann
193 a0c9f010 Michael Hanselmann
  if not re.match("^[0-9a-z]{2}:[0-9a-z]{2}:[0-9a-z]{2}$", mac_prefix):
194 a0c9f010 Michael Hanselmann
    raise errors.OpPrereqError("Invalid mac prefix given '%s'" % mac_prefix)
195 a0c9f010 Michael Hanselmann
196 a0c9f010 Michael Hanselmann
  result = utils.RunCmd(["ip", "link", "show", "dev", master_netdev])
197 a0c9f010 Michael Hanselmann
  if result.failed:
198 a0c9f010 Michael Hanselmann
    raise errors.OpPrereqError("Invalid master netdev given (%s): '%s'" %
199 a0c9f010 Michael Hanselmann
                               (master_netdev,
200 a0c9f010 Michael Hanselmann
                                result.output.strip()))
201 a0c9f010 Michael Hanselmann
202 a0c9f010 Michael Hanselmann
  if not (os.path.isfile(constants.NODE_INITD_SCRIPT) and
203 a0c9f010 Michael Hanselmann
          os.access(constants.NODE_INITD_SCRIPT, os.X_OK)):
204 a0c9f010 Michael Hanselmann
    raise errors.OpPrereqError("Init.d script '%s' missing or not"
205 a0c9f010 Michael Hanselmann
                               " executable." % constants.NODE_INITD_SCRIPT)
206 a0c9f010 Michael Hanselmann
207 d4b72030 Guido Trotter
  utils.CheckBEParams(beparams)
208 d4b72030 Guido Trotter
209 a0c9f010 Michael Hanselmann
  # set up the inter-node password and certificate
210 d23ef431 Michael Hanselmann
  _InitGanetiServerSetup()
211 a0c9f010 Michael Hanselmann
212 a0c9f010 Michael Hanselmann
  # set up ssh config and /etc/hosts
213 a0c9f010 Michael Hanselmann
  f = open(constants.SSH_HOST_RSA_PUB, 'r')
214 a0c9f010 Michael Hanselmann
  try:
215 a0c9f010 Michael Hanselmann
    sshline = f.read()
216 a0c9f010 Michael Hanselmann
  finally:
217 a0c9f010 Michael Hanselmann
    f.close()
218 a0c9f010 Michael Hanselmann
  sshkey = sshline.split(" ")[1]
219 a0c9f010 Michael Hanselmann
220 a0c9f010 Michael Hanselmann
  utils.AddHostToEtcHosts(hostname.name)
221 531baf8e Iustin Pop
  _InitSSHSetup()
222 a0c9f010 Michael Hanselmann
223 a0c9f010 Michael Hanselmann
  # init of cluster config file
224 b9eeeb02 Michael Hanselmann
  cluster_config = objects.Cluster(
225 b9eeeb02 Michael Hanselmann
    serial_no=1,
226 b9eeeb02 Michael Hanselmann
    rsahostkeypub=sshkey,
227 b9eeeb02 Michael Hanselmann
    highest_used_port=(constants.FIRST_DRBD_PORT - 1),
228 b9eeeb02 Michael Hanselmann
    mac_prefix=mac_prefix,
229 b9eeeb02 Michael Hanselmann
    volume_group_name=vg_name,
230 b9eeeb02 Michael Hanselmann
    default_bridge=def_bridge,
231 b9eeeb02 Michael Hanselmann
    tcpudp_port_pool=set(),
232 f6bd6e98 Michael Hanselmann
    master_node=hostname.name,
233 f6bd6e98 Michael Hanselmann
    master_ip=clustername.ip,
234 f6bd6e98 Michael Hanselmann
    master_netdev=master_netdev,
235 f6bd6e98 Michael Hanselmann
    cluster_name=clustername.name,
236 f6bd6e98 Michael Hanselmann
    file_storage_dir=file_storage_dir,
237 ea3a925f Alexander Schreiber
    enabled_hypervisors=enabled_hypervisors,
238 02691904 Alexander Schreiber
    default_hypervisor=default_hypervisor,
239 ea3a925f Alexander Schreiber
    beparams={constants.BEGR_DEFAULT: beparams},
240 ea3a925f Alexander Schreiber
    hvparams=hvparams,
241 ce735215 Guido Trotter
    candidate_pool_size=candidate_pool_size,
242 b9eeeb02 Michael Hanselmann
    )
243 b9eeeb02 Michael Hanselmann
  master_node_config = objects.Node(name=hostname.name,
244 b9eeeb02 Michael Hanselmann
                                    primary_ip=hostname.ip,
245 b9222f32 Guido Trotter
                                    secondary_ip=secondary_ip,
246 c044f32c Guido Trotter
                                    serial_no=1,
247 c044f32c Guido Trotter
                                    master_candidate=True,
248 fc0fe88c Iustin Pop
                                    offline=False,
249 c044f32c Guido Trotter
                                    )
250 a0c9f010 Michael Hanselmann
251 05cc153f Guido Trotter
  sscfg = InitConfig(constants.CONFIG_VERSION,
252 05cc153f Guido Trotter
                     cluster_config, master_node_config)
253 05cc153f Guido Trotter
  ssh.WriteKnownHostsFile(sscfg, constants.SSH_KNOWN_HOSTS_FILE)
254 05cc153f Guido Trotter
  cfg = config.ConfigWriter()
255 05cc153f Guido Trotter
  cfg.Update(cfg.GetClusterInfo())
256 827f753e Guido Trotter
257 b3f1cf6f Iustin Pop
  # start the master ip
258 b3f1cf6f Iustin Pop
  # TODO: Review rpc call from bootstrap
259 fda5f19f Michael Hanselmann
  rpc.RpcRunner.call_node_start_master(hostname.name, True)
260 b3f1cf6f Iustin Pop
261 b1b6ea87 Iustin Pop
262 02f99608 Oleksiy Mishchenko
def InitConfig(version, cluster_config, master_node_config,
263 02f99608 Oleksiy Mishchenko
               cfg_file=constants.CLUSTER_CONF_FILE):
264 7b3a8fb5 Iustin Pop
  """Create the initial cluster configuration.
265 7b3a8fb5 Iustin Pop

266 7b3a8fb5 Iustin Pop
  It will contain the current node, which will also be the master
267 7b3a8fb5 Iustin Pop
  node, and no instances.
268 7b3a8fb5 Iustin Pop

269 7b3a8fb5 Iustin Pop
  @type version: int
270 c41eea6e Iustin Pop
  @param version: configuration version
271 c41eea6e Iustin Pop
  @type cluster_config: L{objects.Cluster}
272 c41eea6e Iustin Pop
  @param cluster_config: cluster configuration
273 c41eea6e Iustin Pop
  @type master_node_config: L{objects.Node}
274 c41eea6e Iustin Pop
  @param master_node_config: master node configuration
275 c41eea6e Iustin Pop
  @type cfg_file: string
276 c41eea6e Iustin Pop
  @param cfg_file: configuration file path
277 c41eea6e Iustin Pop

278 c41eea6e Iustin Pop
  @rtype: L{ssconf.SimpleConfigWriter}
279 c41eea6e Iustin Pop
  @returns: initialized config instance
280 7b3a8fb5 Iustin Pop

281 7b3a8fb5 Iustin Pop
  """
282 7b3a8fb5 Iustin Pop
  nodes = {
283 7b3a8fb5 Iustin Pop
    master_node_config.name: master_node_config,
284 7b3a8fb5 Iustin Pop
    }
285 7b3a8fb5 Iustin Pop
286 7b3a8fb5 Iustin Pop
  config_data = objects.ConfigData(version=version,
287 7b3a8fb5 Iustin Pop
                                   cluster=cluster_config,
288 7b3a8fb5 Iustin Pop
                                   nodes=nodes,
289 7b3a8fb5 Iustin Pop
                                   instances={},
290 7b3a8fb5 Iustin Pop
                                   serial_no=1)
291 7b3a8fb5 Iustin Pop
  cfg = ssconf.SimpleConfigWriter.FromDict(config_data.ToDict(), cfg_file)
292 7b3a8fb5 Iustin Pop
  cfg.Save()
293 7b3a8fb5 Iustin Pop
294 7b3a8fb5 Iustin Pop
  return cfg
295 02f99608 Oleksiy Mishchenko
296 02f99608 Oleksiy Mishchenko
297 140aa4a8 Iustin Pop
def FinalizeClusterDestroy(master):
298 140aa4a8 Iustin Pop
  """Execute the last steps of cluster destroy
299 140aa4a8 Iustin Pop

300 140aa4a8 Iustin Pop
  This function shuts down all the daemons, completing the destroy
301 140aa4a8 Iustin Pop
  begun in cmdlib.LUDestroyOpcode.
302 140aa4a8 Iustin Pop

303 140aa4a8 Iustin Pop
  """
304 781de953 Iustin Pop
  result = rpc.RpcRunner.call_node_stop_master(master, True)
305 781de953 Iustin Pop
  if result.failed or not result.data:
306 140aa4a8 Iustin Pop
    logging.warning("Could not disable the master role")
307 781de953 Iustin Pop
  result = rpc.RpcRunner.call_node_leave_cluster(master)
308 781de953 Iustin Pop
  if result.failed or not result.data:
309 140aa4a8 Iustin Pop
    logging.warning("Could not shutdown the node daemon and cleanup the node")
310 140aa4a8 Iustin Pop
311 140aa4a8 Iustin Pop
312 87622829 Iustin Pop
def SetupNodeDaemon(cluster_name, node, ssh_key_check):
313 827f753e Guido Trotter
  """Add a node to the cluster.
314 827f753e Guido Trotter

315 b1b6ea87 Iustin Pop
  This function must be called before the actual opcode, and will ssh
316 b1b6ea87 Iustin Pop
  to the remote node, copy the needed files, and start ganeti-noded,
317 b1b6ea87 Iustin Pop
  allowing the master to do the rest via normal rpc calls.
318 827f753e Guido Trotter

319 87622829 Iustin Pop
  @param cluster_name: the cluster name
320 87622829 Iustin Pop
  @param node: the name of the new node
321 87622829 Iustin Pop
  @param ssh_key_check: whether to do a strict key check
322 827f753e Guido Trotter

323 827f753e Guido Trotter
  """
324 87622829 Iustin Pop
  sshrunner = ssh.SshRunner(cluster_name)
325 5557b04c Michael Hanselmann
326 5557b04c Michael Hanselmann
  noded_cert = utils.ReadFile(constants.SSL_CERT_FILE)
327 2438c157 Michael Hanselmann
  rapi_cert = utils.ReadFile(constants.RAPI_CERT_FILE)
328 5557b04c Michael Hanselmann
329 827f753e Guido Trotter
  # in the base64 pem encoding, neither '!' nor '.' are valid chars,
330 827f753e Guido Trotter
  # so we use this to detect an invalid certificate; as long as the
331 827f753e Guido Trotter
  # cert doesn't contain this, the here-document will be correctly
332 827f753e Guido Trotter
  # parsed by the shell sequence below
333 2438c157 Michael Hanselmann
  if (re.search('^!EOF\.', noded_cert, re.MULTILINE) or
334 2438c157 Michael Hanselmann
      re.search('^!EOF\.', rapi_cert, re.MULTILINE)):
335 827f753e Guido Trotter
    raise errors.OpExecError("invalid PEM encoding in the SSL certificate")
336 5557b04c Michael Hanselmann
337 5557b04c Michael Hanselmann
  if not noded_cert.endswith("\n"):
338 5557b04c Michael Hanselmann
    noded_cert += "\n"
339 2438c157 Michael Hanselmann
  if not rapi_cert.endswith("\n"):
340 2438c157 Michael Hanselmann
    rapi_cert += "\n"
341 827f753e Guido Trotter
342 827f753e Guido Trotter
  # set up inter-node password and certificate and restarts the node daemon
343 827f753e Guido Trotter
  # and then connect with ssh to set password and start ganeti-noded
344 827f753e Guido Trotter
  # note that all the below variables are sanitized at this point,
345 827f753e Guido Trotter
  # either by being constants or by the checks above
346 827f753e Guido Trotter
  mycommand = ("umask 077 && "
347 827f753e Guido Trotter
               "cat > '%s' << '!EOF.' && \n"
348 2438c157 Michael Hanselmann
               "%s!EOF.\n"
349 2438c157 Michael Hanselmann
               "cat > '%s' << '!EOF.' && \n"
350 2438c157 Michael Hanselmann
               "%s!EOF.\n"
351 5b099da9 Michael Hanselmann
               "chmod 0400 %s %s && "
352 2438c157 Michael Hanselmann
               "%s restart" %
353 5557b04c Michael Hanselmann
               (constants.SSL_CERT_FILE, noded_cert,
354 2438c157 Michael Hanselmann
                constants.RAPI_CERT_FILE, rapi_cert,
355 5b099da9 Michael Hanselmann
                constants.SSL_CERT_FILE, constants.RAPI_CERT_FILE,
356 827f753e Guido Trotter
                constants.NODE_INITD_SCRIPT))
357 827f753e Guido Trotter
358 c4b6c29c Michael Hanselmann
  result = sshrunner.Run(node, 'root', mycommand, batch=False,
359 c4b6c29c Michael Hanselmann
                         ask_key=ssh_key_check,
360 c4b6c29c Michael Hanselmann
                         use_cluster_key=False,
361 c4b6c29c Michael Hanselmann
                         strict_host_check=ssh_key_check)
362 827f753e Guido Trotter
  if result.failed:
363 827f753e Guido Trotter
    raise errors.OpExecError("Remote command on node %s, error: %s,"
364 827f753e Guido Trotter
                             " output: %s" %
365 827f753e Guido Trotter
                             (node, result.fail_reason, result.output))
366 827f753e Guido Trotter
367 b1b6ea87 Iustin Pop
368 b1b6ea87 Iustin Pop
def MasterFailover():
369 b1b6ea87 Iustin Pop
  """Failover the master node.
370 b1b6ea87 Iustin Pop

371 b1b6ea87 Iustin Pop
  This checks that we are not already the master, and will cause the
372 b1b6ea87 Iustin Pop
  current master to cease being master, and the non-master to become
373 b1b6ea87 Iustin Pop
  new master.
374 b1b6ea87 Iustin Pop

375 b1b6ea87 Iustin Pop
  """
376 8135a2db Iustin Pop
  sstore = ssconf.SimpleStore()
377 b1b6ea87 Iustin Pop
378 8135a2db Iustin Pop
  old_master, new_master = ssconf.GetMasterAndMyself(sstore)
379 8135a2db Iustin Pop
  node_list = sstore.GetNodeList()
380 8135a2db Iustin Pop
  mc_list = sstore.GetMasterCandidates()
381 b1b6ea87 Iustin Pop
382 b1b6ea87 Iustin Pop
  if old_master == new_master:
383 b1b6ea87 Iustin Pop
    raise errors.OpPrereqError("This commands must be run on the node"
384 b1b6ea87 Iustin Pop
                               " where you want the new master to be."
385 b1b6ea87 Iustin Pop
                               " %s is already the master" %
386 b1b6ea87 Iustin Pop
                               old_master)
387 d5927e48 Iustin Pop
388 8135a2db Iustin Pop
  if new_master not in mc_list:
389 8135a2db Iustin Pop
    mc_no_master = [name for name in mc_list if name != old_master]
390 8135a2db Iustin Pop
    raise errors.OpPrereqError("This node is not among the nodes marked"
391 8135a2db Iustin Pop
                               " as master candidates. Only these nodes"
392 8135a2db Iustin Pop
                               " can become masters. Current list of"
393 8135a2db Iustin Pop
                               " master candidates is:\n"
394 8135a2db Iustin Pop
                               "%s" % ('\n'.join(mc_no_master)))
395 8135a2db Iustin Pop
396 d5927e48 Iustin Pop
  vote_list = GatherMasterVotes(node_list)
397 d5927e48 Iustin Pop
398 d5927e48 Iustin Pop
  if vote_list:
399 d5927e48 Iustin Pop
    voted_master = vote_list[0][0]
400 d5927e48 Iustin Pop
    if voted_master is None:
401 d5927e48 Iustin Pop
      raise errors.OpPrereqError("Cluster is inconsistent, most nodes did not"
402 d5927e48 Iustin Pop
                                 " respond.")
403 d5927e48 Iustin Pop
    elif voted_master != old_master:
404 d5927e48 Iustin Pop
      raise errors.OpPrereqError("I have wrong configuration, I believe the"
405 d5927e48 Iustin Pop
                                 " master is %s but the other nodes voted for"
406 d5927e48 Iustin Pop
                                 " %s. Please resync the configuration of"
407 d5927e48 Iustin Pop
                                 " this node." % (old_master, voted_master))
408 b1b6ea87 Iustin Pop
  # end checks
409 b1b6ea87 Iustin Pop
410 b1b6ea87 Iustin Pop
  rcode = 0
411 b1b6ea87 Iustin Pop
412 d5927e48 Iustin Pop
  logging.info("Setting master to %s, old master: %s", new_master, old_master)
413 b1b6ea87 Iustin Pop
414 781de953 Iustin Pop
  result = rpc.RpcRunner.call_node_stop_master(old_master, True)
415 781de953 Iustin Pop
  if result.failed or not result.data:
416 d5927e48 Iustin Pop
    logging.error("Could not disable the master role on the old master"
417 b1b6ea87 Iustin Pop
                 " %s, please disable manually", old_master)
418 b1b6ea87 Iustin Pop
419 d23ef431 Michael Hanselmann
  # Here we have a phase where no master should be running
420 b1b6ea87 Iustin Pop
421 bbe19c17 Iustin Pop
  # instantiate a real config writer, as we now know we have the
422 bbe19c17 Iustin Pop
  # configuration data
423 bbe19c17 Iustin Pop
  cfg = config.ConfigWriter()
424 b1b6ea87 Iustin Pop
425 bbe19c17 Iustin Pop
  cluster_info = cfg.GetClusterInfo()
426 bbe19c17 Iustin Pop
  cluster_info.master_node = new_master
427 bbe19c17 Iustin Pop
  # this will also regenerate the ssconf files, since we updated the
428 bbe19c17 Iustin Pop
  # cluster info
429 bbe19c17 Iustin Pop
  cfg.Update(cluster_info)
430 d5927e48 Iustin Pop
431 781de953 Iustin Pop
  result = rpc.RpcRunner.call_node_start_master(new_master, True)
432 781de953 Iustin Pop
  if result.failed or not result.data:
433 d5927e48 Iustin Pop
    logging.error("Could not start the master role on the new master"
434 b1b6ea87 Iustin Pop
                  " %s, please check", new_master)
435 b1b6ea87 Iustin Pop
    rcode = 1
436 b1b6ea87 Iustin Pop
437 b1b6ea87 Iustin Pop
  return rcode
438 d7cdb55d Iustin Pop
439 d7cdb55d Iustin Pop
440 d7cdb55d Iustin Pop
def GatherMasterVotes(node_list):
441 d7cdb55d Iustin Pop
  """Check the agreement on who is the master.
442 d7cdb55d Iustin Pop

443 d7cdb55d Iustin Pop
  This function will return a list of (node, number of votes), ordered
444 d7cdb55d Iustin Pop
  by the number of votes. Errors will be denoted by the key 'None'.
445 d7cdb55d Iustin Pop

446 d7cdb55d Iustin Pop
  Note that the sum of votes is the number of nodes this machine
447 d7cdb55d Iustin Pop
  knows, whereas the number of entries in the list could be different
448 d7cdb55d Iustin Pop
  (if some nodes vote for another master).
449 d7cdb55d Iustin Pop

450 d7cdb55d Iustin Pop
  We remove ourselves from the list since we know that (bugs aside)
451 d7cdb55d Iustin Pop
  since we use the same source for configuration information for both
452 d7cdb55d Iustin Pop
  backend and boostrap, we'll always vote for ourselves.
453 d7cdb55d Iustin Pop

454 d7cdb55d Iustin Pop
  @type node_list: list
455 d7cdb55d Iustin Pop
  @param node_list: the list of nodes to query for master info; the current
456 d7cdb55d Iustin Pop
      node wil be removed if it is in the list
457 d7cdb55d Iustin Pop
  @rtype: list
458 d7cdb55d Iustin Pop
  @return: list of (node, votes)
459 d7cdb55d Iustin Pop

460 d7cdb55d Iustin Pop
  """
461 d7cdb55d Iustin Pop
  myself = utils.HostInfo().name
462 d7cdb55d Iustin Pop
  try:
463 d7cdb55d Iustin Pop
    node_list.remove(myself)
464 d7cdb55d Iustin Pop
  except ValueError:
465 d7cdb55d Iustin Pop
    pass
466 d7cdb55d Iustin Pop
  if not node_list:
467 d7cdb55d Iustin Pop
    # no nodes left (eventually after removing myself)
468 d7cdb55d Iustin Pop
    return []
469 d7cdb55d Iustin Pop
  results = rpc.RpcRunner.call_master_info(node_list)
470 d7cdb55d Iustin Pop
  if not isinstance(results, dict):
471 d7cdb55d Iustin Pop
    # this should not happen (unless internal error in rpc)
472 d7cdb55d Iustin Pop
    logging.critical("Can't complete rpc call, aborting master startup")
473 d7cdb55d Iustin Pop
    return [(None, len(node_list))]
474 d7cdb55d Iustin Pop
  votes = {}
475 d7cdb55d Iustin Pop
  for node in results:
476 781de953 Iustin Pop
    nres = results[node]
477 781de953 Iustin Pop
    data = nres.data
478 781de953 Iustin Pop
    if nres.failed or not isinstance(data, (tuple, list)) or len(data) < 3:
479 d7cdb55d Iustin Pop
      # here the rpc layer should have already logged errors
480 d7cdb55d Iustin Pop
      if None not in votes:
481 d7cdb55d Iustin Pop
        votes[None] = 0
482 d7cdb55d Iustin Pop
      votes[None] += 1
483 d7cdb55d Iustin Pop
      continue
484 781de953 Iustin Pop
    master_node = data[2]
485 d7cdb55d Iustin Pop
    if master_node not in votes:
486 d7cdb55d Iustin Pop
      votes[master_node] = 0
487 d7cdb55d Iustin Pop
    votes[master_node] += 1
488 d7cdb55d Iustin Pop
489 d7cdb55d Iustin Pop
  vote_list = [v for v in votes.items()]
490 d7cdb55d Iustin Pop
  # sort first on number of votes then on name, since we want None
491 d7cdb55d Iustin Pop
  # sorted later if we have the half of the nodes not responding, and
492 d7cdb55d Iustin Pop
  # half voting all for the same master
493 d7cdb55d Iustin Pop
  vote_list.sort(key=lambda x: (x[1], x[0]), reverse=True)
494 d7cdb55d Iustin Pop
495 d7cdb55d Iustin Pop
  return vote_list