Statistics
| Branch: | Tag: | Revision:

root / lib / bootstrap.py @ ea3a925f

History | View | Annotate | Download (15.9 kB)

1 a0c9f010 Michael Hanselmann
#
2 a0c9f010 Michael Hanselmann
#
3 a0c9f010 Michael Hanselmann
4 a0c9f010 Michael Hanselmann
# Copyright (C) 2006, 2007, 2008 Google Inc.
5 a0c9f010 Michael Hanselmann
#
6 a0c9f010 Michael Hanselmann
# This program is free software; you can redistribute it and/or modify
7 a0c9f010 Michael Hanselmann
# it under the terms of the GNU General Public License as published by
8 a0c9f010 Michael Hanselmann
# the Free Software Foundation; either version 2 of the License, or
9 a0c9f010 Michael Hanselmann
# (at your option) any later version.
10 a0c9f010 Michael Hanselmann
#
11 a0c9f010 Michael Hanselmann
# This program is distributed in the hope that it will be useful, but
12 a0c9f010 Michael Hanselmann
# WITHOUT ANY WARRANTY; without even the implied warranty of
13 a0c9f010 Michael Hanselmann
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14 a0c9f010 Michael Hanselmann
# General Public License for more details.
15 a0c9f010 Michael Hanselmann
#
16 a0c9f010 Michael Hanselmann
# You should have received a copy of the GNU General Public License
17 a0c9f010 Michael Hanselmann
# along with this program; if not, write to the Free Software
18 a0c9f010 Michael Hanselmann
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19 a0c9f010 Michael Hanselmann
# 02110-1301, USA.
20 a0c9f010 Michael Hanselmann
21 a0c9f010 Michael Hanselmann
22 a0c9f010 Michael Hanselmann
"""Functions to bootstrap a new cluster.
23 a0c9f010 Michael Hanselmann

24 a0c9f010 Michael Hanselmann
"""
25 a0c9f010 Michael Hanselmann
26 a0c9f010 Michael Hanselmann
import os
27 a0c9f010 Michael Hanselmann
import os.path
28 a0c9f010 Michael Hanselmann
import sha
29 a0c9f010 Michael Hanselmann
import re
30 b1b6ea87 Iustin Pop
import logging
31 a0c9f010 Michael Hanselmann
32 a0c9f010 Michael Hanselmann
from ganeti import rpc
33 a0c9f010 Michael Hanselmann
from ganeti import ssh
34 a0c9f010 Michael Hanselmann
from ganeti import utils
35 a0c9f010 Michael Hanselmann
from ganeti import errors
36 a0c9f010 Michael Hanselmann
from ganeti import config
37 a0c9f010 Michael Hanselmann
from ganeti import constants
38 b9eeeb02 Michael Hanselmann
from ganeti import objects
39 a0c9f010 Michael Hanselmann
from ganeti import ssconf
40 a0c9f010 Michael Hanselmann
41 72737a7f Iustin Pop
from ganeti.rpc import RpcRunner
42 a0c9f010 Michael Hanselmann
43 a0c9f010 Michael Hanselmann
def _InitSSHSetup(node):
44 a0c9f010 Michael Hanselmann
  """Setup the SSH configuration for the cluster.
45 a0c9f010 Michael Hanselmann

46 a0c9f010 Michael Hanselmann

47 a0c9f010 Michael Hanselmann
  This generates a dsa keypair for root, adds the pub key to the
48 a0c9f010 Michael Hanselmann
  permitted hosts and adds the hostkey to its own known hosts.
49 a0c9f010 Michael Hanselmann

50 a0c9f010 Michael Hanselmann
  Args:
51 a0c9f010 Michael Hanselmann
    node: the name of this host as a fqdn
52 a0c9f010 Michael Hanselmann

53 a0c9f010 Michael Hanselmann
  """
54 a0c9f010 Michael Hanselmann
  priv_key, pub_key, auth_keys = ssh.GetUserFiles(constants.GANETI_RUNAS)
55 a0c9f010 Michael Hanselmann
56 a0c9f010 Michael Hanselmann
  for name in priv_key, pub_key:
57 a0c9f010 Michael Hanselmann
    if os.path.exists(name):
58 a0c9f010 Michael Hanselmann
      utils.CreateBackup(name)
59 a0c9f010 Michael Hanselmann
    utils.RemoveFile(name)
60 a0c9f010 Michael Hanselmann
61 a0c9f010 Michael Hanselmann
  result = utils.RunCmd(["ssh-keygen", "-t", "dsa",
62 a0c9f010 Michael Hanselmann
                         "-f", priv_key,
63 a0c9f010 Michael Hanselmann
                         "-q", "-N", ""])
64 a0c9f010 Michael Hanselmann
  if result.failed:
65 a0c9f010 Michael Hanselmann
    raise errors.OpExecError("Could not generate ssh keypair, error %s" %
66 a0c9f010 Michael Hanselmann
                             result.output)
67 a0c9f010 Michael Hanselmann
68 a0c9f010 Michael Hanselmann
  f = open(pub_key, 'r')
69 a0c9f010 Michael Hanselmann
  try:
70 a0c9f010 Michael Hanselmann
    utils.AddAuthorizedKey(auth_keys, f.read(8192))
71 a0c9f010 Michael Hanselmann
  finally:
72 a0c9f010 Michael Hanselmann
    f.close()
73 a0c9f010 Michael Hanselmann
74 a0c9f010 Michael Hanselmann
75 d23ef431 Michael Hanselmann
def _InitGanetiServerSetup():
76 a0c9f010 Michael Hanselmann
  """Setup the necessary configuration for the initial node daemon.
77 a0c9f010 Michael Hanselmann

78 a0c9f010 Michael Hanselmann
  This creates the nodepass file containing the shared password for
79 a0c9f010 Michael Hanselmann
  the cluster and also generates the SSL certificate.
80 a0c9f010 Michael Hanselmann

81 a0c9f010 Michael Hanselmann
  """
82 a0c9f010 Michael Hanselmann
  # Create pseudo random password
83 33081d90 Iustin Pop
  randpass = utils.GenerateSecret()
84 d23ef431 Michael Hanselmann
85 d23ef431 Michael Hanselmann
  # and write it into the config file
86 d23ef431 Michael Hanselmann
  utils.WriteFile(constants.CLUSTER_PASSWORD_FILE,
87 d23ef431 Michael Hanselmann
                  data="%s\n" % randpass, mode=0400)
88 a0c9f010 Michael Hanselmann
89 a0c9f010 Michael Hanselmann
  result = utils.RunCmd(["openssl", "req", "-new", "-newkey", "rsa:1024",
90 a0c9f010 Michael Hanselmann
                         "-days", str(365*5), "-nodes", "-x509",
91 a0c9f010 Michael Hanselmann
                         "-keyout", constants.SSL_CERT_FILE,
92 a0c9f010 Michael Hanselmann
                         "-out", constants.SSL_CERT_FILE, "-batch"])
93 a0c9f010 Michael Hanselmann
  if result.failed:
94 a0c9f010 Michael Hanselmann
    raise errors.OpExecError("could not generate server ssl cert, command"
95 a0c9f010 Michael Hanselmann
                             " %s had exitcode %s and error message %s" %
96 a0c9f010 Michael Hanselmann
                             (result.cmd, result.exit_code, result.output))
97 a0c9f010 Michael Hanselmann
98 a0c9f010 Michael Hanselmann
  os.chmod(constants.SSL_CERT_FILE, 0400)
99 a0c9f010 Michael Hanselmann
100 a0c9f010 Michael Hanselmann
  result = utils.RunCmd([constants.NODE_INITD_SCRIPT, "restart"])
101 a0c9f010 Michael Hanselmann
102 a0c9f010 Michael Hanselmann
  if result.failed:
103 a0c9f010 Michael Hanselmann
    raise errors.OpExecError("Could not start the node daemon, command %s"
104 a0c9f010 Michael Hanselmann
                             " had exitcode %s and error %s" %
105 a0c9f010 Michael Hanselmann
                             (result.cmd, result.exit_code, result.output))
106 a0c9f010 Michael Hanselmann
107 a0c9f010 Michael Hanselmann
108 a0c9f010 Michael Hanselmann
def InitCluster(cluster_name, hypervisor_type, mac_prefix, def_bridge,
109 a0c9f010 Michael Hanselmann
                master_netdev, file_storage_dir,
110 a0c9f010 Michael Hanselmann
                secondary_ip=None,
111 ea3a925f Alexander Schreiber
                vg_name=None, beparams=None, hvparams=None,
112 ea3a925f Alexander Schreiber
                enabled_hypervisors=None):
113 a0c9f010 Michael Hanselmann
  """Initialise the cluster.
114 a0c9f010 Michael Hanselmann

115 a0c9f010 Michael Hanselmann
  """
116 a0c9f010 Michael Hanselmann
  if config.ConfigWriter.IsCluster():
117 a0c9f010 Michael Hanselmann
    raise errors.OpPrereqError("Cluster is already initialised")
118 a0c9f010 Michael Hanselmann
119 00cd937c Iustin Pop
  if hypervisor_type == constants.HT_XEN_HVM:
120 a0c9f010 Michael Hanselmann
    if not os.path.exists(constants.VNC_PASSWORD_FILE):
121 a0c9f010 Michael Hanselmann
      raise errors.OpPrereqError("Please prepare the cluster VNC"
122 a0c9f010 Michael Hanselmann
                                 "password file %s" %
123 a0c9f010 Michael Hanselmann
                                 constants.VNC_PASSWORD_FILE)
124 a0c9f010 Michael Hanselmann
125 a0c9f010 Michael Hanselmann
  hostname = utils.HostInfo()
126 a0c9f010 Michael Hanselmann
127 a0c9f010 Michael Hanselmann
  if hostname.ip.startswith("127."):
128 a0c9f010 Michael Hanselmann
    raise errors.OpPrereqError("This host's IP resolves to the private"
129 a0c9f010 Michael Hanselmann
                               " range (%s). Please fix DNS or %s." %
130 a0c9f010 Michael Hanselmann
                               (hostname.ip, constants.ETC_HOSTS))
131 a0c9f010 Michael Hanselmann
132 caad16e2 Iustin Pop
  if not utils.OwnIpAddress(hostname.ip):
133 a0c9f010 Michael Hanselmann
    raise errors.OpPrereqError("Inconsistency: this host's name resolves"
134 a0c9f010 Michael Hanselmann
                               " to %s,\nbut this ip address does not"
135 a0c9f010 Michael Hanselmann
                               " belong to this host."
136 a0c9f010 Michael Hanselmann
                               " Aborting." % hostname.ip)
137 a0c9f010 Michael Hanselmann
138 a0c9f010 Michael Hanselmann
  clustername = utils.HostInfo(cluster_name)
139 a0c9f010 Michael Hanselmann
140 a0c9f010 Michael Hanselmann
  if utils.TcpPing(clustername.ip, constants.DEFAULT_NODED_PORT,
141 a0c9f010 Michael Hanselmann
                   timeout=5):
142 a0c9f010 Michael Hanselmann
    raise errors.OpPrereqError("Cluster IP already active. Aborting.")
143 a0c9f010 Michael Hanselmann
144 a0c9f010 Michael Hanselmann
  if secondary_ip:
145 a0c9f010 Michael Hanselmann
    if not utils.IsValidIP(secondary_ip):
146 a0c9f010 Michael Hanselmann
      raise errors.OpPrereqError("Invalid secondary ip given")
147 a0c9f010 Michael Hanselmann
    if (secondary_ip != hostname.ip and
148 caad16e2 Iustin Pop
        not utils.OwnIpAddress(secondary_ip)):
149 a0c9f010 Michael Hanselmann
      raise errors.OpPrereqError("You gave %s as secondary IP,"
150 a0c9f010 Michael Hanselmann
                                 " but it does not belong to this host." %
151 a0c9f010 Michael Hanselmann
                                 secondary_ip)
152 b9eeeb02 Michael Hanselmann
  else:
153 b9eeeb02 Michael Hanselmann
    secondary_ip = hostname.ip
154 a0c9f010 Michael Hanselmann
155 a0c9f010 Michael Hanselmann
  if vg_name is not None:
156 a0c9f010 Michael Hanselmann
    # Check if volume group is valid
157 a0c9f010 Michael Hanselmann
    vgstatus = utils.CheckVolumeGroupSize(utils.ListVolumeGroups(), vg_name,
158 a0c9f010 Michael Hanselmann
                                          constants.MIN_VG_SIZE)
159 a0c9f010 Michael Hanselmann
    if vgstatus:
160 a0c9f010 Michael Hanselmann
      raise errors.OpPrereqError("Error: %s\nspecify --no-lvm-storage if"
161 a0c9f010 Michael Hanselmann
                                 " you are not using lvm" % vgstatus)
162 a0c9f010 Michael Hanselmann
163 a0c9f010 Michael Hanselmann
  file_storage_dir = os.path.normpath(file_storage_dir)
164 a0c9f010 Michael Hanselmann
165 a0c9f010 Michael Hanselmann
  if not os.path.isabs(file_storage_dir):
166 a0c9f010 Michael Hanselmann
    raise errors.OpPrereqError("The file storage directory you passed is"
167 a0c9f010 Michael Hanselmann
                               " not an absolute path.")
168 a0c9f010 Michael Hanselmann
169 a0c9f010 Michael Hanselmann
  if not os.path.exists(file_storage_dir):
170 a0c9f010 Michael Hanselmann
    try:
171 a0c9f010 Michael Hanselmann
      os.makedirs(file_storage_dir, 0750)
172 a0c9f010 Michael Hanselmann
    except OSError, err:
173 a0c9f010 Michael Hanselmann
      raise errors.OpPrereqError("Cannot create file storage directory"
174 a0c9f010 Michael Hanselmann
                                 " '%s': %s" %
175 a0c9f010 Michael Hanselmann
                                 (file_storage_dir, err))
176 a0c9f010 Michael Hanselmann
177 a0c9f010 Michael Hanselmann
  if not os.path.isdir(file_storage_dir):
178 a0c9f010 Michael Hanselmann
    raise errors.OpPrereqError("The file storage directory '%s' is not"
179 a0c9f010 Michael Hanselmann
                               " a directory." % file_storage_dir)
180 a0c9f010 Michael Hanselmann
181 a0c9f010 Michael Hanselmann
  if not re.match("^[0-9a-z]{2}:[0-9a-z]{2}:[0-9a-z]{2}$", mac_prefix):
182 a0c9f010 Michael Hanselmann
    raise errors.OpPrereqError("Invalid mac prefix given '%s'" % mac_prefix)
183 a0c9f010 Michael Hanselmann
184 a0c9f010 Michael Hanselmann
  if hypervisor_type not in constants.HYPER_TYPES:
185 a0c9f010 Michael Hanselmann
    raise errors.OpPrereqError("Invalid hypervisor type given '%s'" %
186 a0c9f010 Michael Hanselmann
                               hypervisor_type)
187 a0c9f010 Michael Hanselmann
188 a0c9f010 Michael Hanselmann
  result = utils.RunCmd(["ip", "link", "show", "dev", master_netdev])
189 a0c9f010 Michael Hanselmann
  if result.failed:
190 a0c9f010 Michael Hanselmann
    raise errors.OpPrereqError("Invalid master netdev given (%s): '%s'" %
191 a0c9f010 Michael Hanselmann
                               (master_netdev,
192 a0c9f010 Michael Hanselmann
                                result.output.strip()))
193 a0c9f010 Michael Hanselmann
194 a0c9f010 Michael Hanselmann
  if not (os.path.isfile(constants.NODE_INITD_SCRIPT) and
195 a0c9f010 Michael Hanselmann
          os.access(constants.NODE_INITD_SCRIPT, os.X_OK)):
196 a0c9f010 Michael Hanselmann
    raise errors.OpPrereqError("Init.d script '%s' missing or not"
197 a0c9f010 Michael Hanselmann
                               " executable." % constants.NODE_INITD_SCRIPT)
198 a0c9f010 Michael Hanselmann
199 a0c9f010 Michael Hanselmann
  # set up the inter-node password and certificate
200 d23ef431 Michael Hanselmann
  _InitGanetiServerSetup()
201 a0c9f010 Michael Hanselmann
202 a0c9f010 Michael Hanselmann
  # set up ssh config and /etc/hosts
203 a0c9f010 Michael Hanselmann
  f = open(constants.SSH_HOST_RSA_PUB, 'r')
204 a0c9f010 Michael Hanselmann
  try:
205 a0c9f010 Michael Hanselmann
    sshline = f.read()
206 a0c9f010 Michael Hanselmann
  finally:
207 a0c9f010 Michael Hanselmann
    f.close()
208 a0c9f010 Michael Hanselmann
  sshkey = sshline.split(" ")[1]
209 a0c9f010 Michael Hanselmann
210 a0c9f010 Michael Hanselmann
  utils.AddHostToEtcHosts(hostname.name)
211 a0c9f010 Michael Hanselmann
  _InitSSHSetup(hostname.name)
212 a0c9f010 Michael Hanselmann
213 a0c9f010 Michael Hanselmann
  # init of cluster config file
214 b9eeeb02 Michael Hanselmann
  cluster_config = objects.Cluster(
215 b9eeeb02 Michael Hanselmann
    serial_no=1,
216 b9eeeb02 Michael Hanselmann
    rsahostkeypub=sshkey,
217 b9eeeb02 Michael Hanselmann
    highest_used_port=(constants.FIRST_DRBD_PORT - 1),
218 b9eeeb02 Michael Hanselmann
    mac_prefix=mac_prefix,
219 b9eeeb02 Michael Hanselmann
    volume_group_name=vg_name,
220 b9eeeb02 Michael Hanselmann
    default_bridge=def_bridge,
221 b9eeeb02 Michael Hanselmann
    tcpudp_port_pool=set(),
222 f6bd6e98 Michael Hanselmann
    hypervisor=hypervisor_type,
223 f6bd6e98 Michael Hanselmann
    master_node=hostname.name,
224 f6bd6e98 Michael Hanselmann
    master_ip=clustername.ip,
225 f6bd6e98 Michael Hanselmann
    master_netdev=master_netdev,
226 f6bd6e98 Michael Hanselmann
    cluster_name=clustername.name,
227 f6bd6e98 Michael Hanselmann
    file_storage_dir=file_storage_dir,
228 ea3a925f Alexander Schreiber
    enabled_hypervisors=enabled_hypervisors,
229 ea3a925f Alexander Schreiber
    beparams={constants.BEGR_DEFAULT: beparams},
230 ea3a925f Alexander Schreiber
    hvparams=hvparams,
231 b9eeeb02 Michael Hanselmann
    )
232 b9eeeb02 Michael Hanselmann
  master_node_config = objects.Node(name=hostname.name,
233 b9eeeb02 Michael Hanselmann
                                    primary_ip=hostname.ip,
234 b9eeeb02 Michael Hanselmann
                                    secondary_ip=secondary_ip)
235 a0c9f010 Michael Hanselmann
236 02f99608 Oleksiy Mishchenko
  cfg = InitConfig(constants.CONFIG_VERSION,
237 02f99608 Oleksiy Mishchenko
                   cluster_config, master_node_config)
238 7688d0d3 Michael Hanselmann
  ssh.WriteKnownHostsFile(cfg, constants.SSH_KNOWN_HOSTS_FILE)
239 827f753e Guido Trotter
240 b3f1cf6f Iustin Pop
  # start the master ip
241 b3f1cf6f Iustin Pop
  # TODO: Review rpc call from bootstrap
242 72737a7f Iustin Pop
  RpcRunner.call_node_start_master(hostname.name, True)
243 b3f1cf6f Iustin Pop
244 b1b6ea87 Iustin Pop
245 02f99608 Oleksiy Mishchenko
def InitConfig(version, cluster_config, master_node_config,
246 02f99608 Oleksiy Mishchenko
               cfg_file=constants.CLUSTER_CONF_FILE):
247 7b3a8fb5 Iustin Pop
  """Create the initial cluster configuration.
248 7b3a8fb5 Iustin Pop

249 7b3a8fb5 Iustin Pop
  It will contain the current node, which will also be the master
250 7b3a8fb5 Iustin Pop
  node, and no instances.
251 7b3a8fb5 Iustin Pop

252 7b3a8fb5 Iustin Pop
  @type version: int
253 7b3a8fb5 Iustin Pop
  @param version: Configuration version
254 7b3a8fb5 Iustin Pop
  @type cluster_config: objects.Cluster
255 7b3a8fb5 Iustin Pop
  @param cluster_config: Cluster configuration
256 7b3a8fb5 Iustin Pop
  @type master_node_config: objects.Node
257 7b3a8fb5 Iustin Pop
  @param master_node_config: Master node configuration
258 7b3a8fb5 Iustin Pop
  @type file_name: string
259 7b3a8fb5 Iustin Pop
  @param file_name: Configuration file path
260 7b3a8fb5 Iustin Pop

261 7b3a8fb5 Iustin Pop
  @rtype: ssconf.SimpleConfigWriter
262 7b3a8fb5 Iustin Pop
  @returns: Initialized config instance
263 7b3a8fb5 Iustin Pop

264 7b3a8fb5 Iustin Pop
  """
265 7b3a8fb5 Iustin Pop
  nodes = {
266 7b3a8fb5 Iustin Pop
    master_node_config.name: master_node_config,
267 7b3a8fb5 Iustin Pop
    }
268 7b3a8fb5 Iustin Pop
269 7b3a8fb5 Iustin Pop
  config_data = objects.ConfigData(version=version,
270 7b3a8fb5 Iustin Pop
                                   cluster=cluster_config,
271 7b3a8fb5 Iustin Pop
                                   nodes=nodes,
272 7b3a8fb5 Iustin Pop
                                   instances={},
273 7b3a8fb5 Iustin Pop
                                   serial_no=1)
274 7b3a8fb5 Iustin Pop
  cfg = ssconf.SimpleConfigWriter.FromDict(config_data.ToDict(), cfg_file)
275 7b3a8fb5 Iustin Pop
  cfg.Save()
276 7b3a8fb5 Iustin Pop
277 7b3a8fb5 Iustin Pop
  return cfg
278 02f99608 Oleksiy Mishchenko
279 02f99608 Oleksiy Mishchenko
280 140aa4a8 Iustin Pop
def FinalizeClusterDestroy(master):
281 140aa4a8 Iustin Pop
  """Execute the last steps of cluster destroy
282 140aa4a8 Iustin Pop

283 140aa4a8 Iustin Pop
  This function shuts down all the daemons, completing the destroy
284 140aa4a8 Iustin Pop
  begun in cmdlib.LUDestroyOpcode.
285 140aa4a8 Iustin Pop

286 140aa4a8 Iustin Pop
  """
287 72737a7f Iustin Pop
  if not RpcRunner.call_node_stop_master(master, True):
288 140aa4a8 Iustin Pop
    logging.warning("Could not disable the master role")
289 72737a7f Iustin Pop
  if not RpcRunner.call_node_leave_cluster(master):
290 140aa4a8 Iustin Pop
    logging.warning("Could not shutdown the node daemon and cleanup the node")
291 140aa4a8 Iustin Pop
292 140aa4a8 Iustin Pop
293 c4b6c29c Michael Hanselmann
def SetupNodeDaemon(node, ssh_key_check):
294 827f753e Guido Trotter
  """Add a node to the cluster.
295 827f753e Guido Trotter

296 b1b6ea87 Iustin Pop
  This function must be called before the actual opcode, and will ssh
297 b1b6ea87 Iustin Pop
  to the remote node, copy the needed files, and start ganeti-noded,
298 b1b6ea87 Iustin Pop
  allowing the master to do the rest via normal rpc calls.
299 827f753e Guido Trotter

300 827f753e Guido Trotter
  Args:
301 827f753e Guido Trotter
    node: fully qualified domain name for the new node
302 827f753e Guido Trotter

303 827f753e Guido Trotter
  """
304 7688d0d3 Michael Hanselmann
  cfg = ssconf.SimpleConfigReader()
305 6b0469d2 Iustin Pop
  sshrunner = ssh.SshRunner(cfg.GetClusterName())
306 d23ef431 Michael Hanselmann
  gntpass = utils.GetNodeDaemonPassword()
307 827f753e Guido Trotter
  if not re.match('^[a-zA-Z0-9.]{1,64}$', gntpass):
308 827f753e Guido Trotter
    raise errors.OpExecError("ganeti password corruption detected")
309 827f753e Guido Trotter
  f = open(constants.SSL_CERT_FILE)
310 827f753e Guido Trotter
  try:
311 827f753e Guido Trotter
    gntpem = f.read(8192)
312 827f753e Guido Trotter
  finally:
313 827f753e Guido Trotter
    f.close()
314 827f753e Guido Trotter
  # in the base64 pem encoding, neither '!' nor '.' are valid chars,
315 827f753e Guido Trotter
  # so we use this to detect an invalid certificate; as long as the
316 827f753e Guido Trotter
  # cert doesn't contain this, the here-document will be correctly
317 827f753e Guido Trotter
  # parsed by the shell sequence below
318 827f753e Guido Trotter
  if re.search('^!EOF\.', gntpem, re.MULTILINE):
319 827f753e Guido Trotter
    raise errors.OpExecError("invalid PEM encoding in the SSL certificate")
320 827f753e Guido Trotter
  if not gntpem.endswith("\n"):
321 827f753e Guido Trotter
    raise errors.OpExecError("PEM must end with newline")
322 827f753e Guido Trotter
323 827f753e Guido Trotter
  # set up inter-node password and certificate and restarts the node daemon
324 827f753e Guido Trotter
  # and then connect with ssh to set password and start ganeti-noded
325 827f753e Guido Trotter
  # note that all the below variables are sanitized at this point,
326 827f753e Guido Trotter
  # either by being constants or by the checks above
327 827f753e Guido Trotter
  mycommand = ("umask 077 && "
328 827f753e Guido Trotter
               "echo '%s' > '%s' && "
329 827f753e Guido Trotter
               "cat > '%s' << '!EOF.' && \n"
330 827f753e Guido Trotter
               "%s!EOF.\n%s restart" %
331 d23ef431 Michael Hanselmann
               (gntpass, constants.CLUSTER_PASSWORD_FILE,
332 827f753e Guido Trotter
                constants.SSL_CERT_FILE, gntpem,
333 827f753e Guido Trotter
                constants.NODE_INITD_SCRIPT))
334 827f753e Guido Trotter
335 c4b6c29c Michael Hanselmann
  result = sshrunner.Run(node, 'root', mycommand, batch=False,
336 c4b6c29c Michael Hanselmann
                         ask_key=ssh_key_check,
337 c4b6c29c Michael Hanselmann
                         use_cluster_key=False,
338 c4b6c29c Michael Hanselmann
                         strict_host_check=ssh_key_check)
339 827f753e Guido Trotter
  if result.failed:
340 827f753e Guido Trotter
    raise errors.OpExecError("Remote command on node %s, error: %s,"
341 827f753e Guido Trotter
                             " output: %s" %
342 827f753e Guido Trotter
                             (node, result.fail_reason, result.output))
343 827f753e Guido Trotter
344 827f753e Guido Trotter
  return 0
345 827f753e Guido Trotter
346 b1b6ea87 Iustin Pop
347 b1b6ea87 Iustin Pop
def MasterFailover():
348 b1b6ea87 Iustin Pop
  """Failover the master node.
349 b1b6ea87 Iustin Pop

350 b1b6ea87 Iustin Pop
  This checks that we are not already the master, and will cause the
351 b1b6ea87 Iustin Pop
  current master to cease being master, and the non-master to become
352 b1b6ea87 Iustin Pop
  new master.
353 b1b6ea87 Iustin Pop

354 b1b6ea87 Iustin Pop
  """
355 d23ef431 Michael Hanselmann
  cfg = ssconf.SimpleConfigWriter()
356 b1b6ea87 Iustin Pop
357 b1b6ea87 Iustin Pop
  new_master = utils.HostInfo().name
358 d23ef431 Michael Hanselmann
  old_master = cfg.GetMasterNode()
359 d5927e48 Iustin Pop
  node_list = cfg.GetNodeList()
360 b1b6ea87 Iustin Pop
361 b1b6ea87 Iustin Pop
  if old_master == new_master:
362 b1b6ea87 Iustin Pop
    raise errors.OpPrereqError("This commands must be run on the node"
363 b1b6ea87 Iustin Pop
                               " where you want the new master to be."
364 b1b6ea87 Iustin Pop
                               " %s is already the master" %
365 b1b6ea87 Iustin Pop
                               old_master)
366 d5927e48 Iustin Pop
367 d5927e48 Iustin Pop
  vote_list = GatherMasterVotes(node_list)
368 d5927e48 Iustin Pop
369 d5927e48 Iustin Pop
  if vote_list:
370 d5927e48 Iustin Pop
    voted_master = vote_list[0][0]
371 d5927e48 Iustin Pop
    if voted_master is None:
372 d5927e48 Iustin Pop
      raise errors.OpPrereqError("Cluster is inconsistent, most nodes did not"
373 d5927e48 Iustin Pop
                                 " respond.")
374 d5927e48 Iustin Pop
    elif voted_master != old_master:
375 d5927e48 Iustin Pop
      raise errors.OpPrereqError("I have wrong configuration, I believe the"
376 d5927e48 Iustin Pop
                                 " master is %s but the other nodes voted for"
377 d5927e48 Iustin Pop
                                 " %s. Please resync the configuration of"
378 d5927e48 Iustin Pop
                                 " this node." % (old_master, voted_master))
379 b1b6ea87 Iustin Pop
  # end checks
380 b1b6ea87 Iustin Pop
381 b1b6ea87 Iustin Pop
  rcode = 0
382 b1b6ea87 Iustin Pop
383 d5927e48 Iustin Pop
  logging.info("Setting master to %s, old master: %s", new_master, old_master)
384 b1b6ea87 Iustin Pop
385 72737a7f Iustin Pop
  if not RpcRunner.call_node_stop_master(old_master, True):
386 d5927e48 Iustin Pop
    logging.error("Could not disable the master role on the old master"
387 b1b6ea87 Iustin Pop
                 " %s, please disable manually", old_master)
388 b1b6ea87 Iustin Pop
389 d23ef431 Michael Hanselmann
  cfg.SetMasterNode(new_master)
390 d23ef431 Michael Hanselmann
  cfg.Save()
391 b1b6ea87 Iustin Pop
392 d23ef431 Michael Hanselmann
  # Here we have a phase where no master should be running
393 b1b6ea87 Iustin Pop
394 72737a7f Iustin Pop
  if not RpcRunner.call_upload_file(cfg.GetNodeList(),
395 72737a7f Iustin Pop
                                    constants.CLUSTER_CONF_FILE):
396 d5927e48 Iustin Pop
    logging.error("Could not distribute the new configuration"
397 3b9e6a30 Iustin Pop
                  " to the other nodes, please check.")
398 b1b6ea87 Iustin Pop
399 d5927e48 Iustin Pop
400 72737a7f Iustin Pop
  if not RpcRunner.call_node_start_master(new_master, True):
401 d5927e48 Iustin Pop
    logging.error("Could not start the master role on the new master"
402 b1b6ea87 Iustin Pop
                  " %s, please check", new_master)
403 b1b6ea87 Iustin Pop
    rcode = 1
404 b1b6ea87 Iustin Pop
405 b1b6ea87 Iustin Pop
  return rcode
406 d7cdb55d Iustin Pop
407 d7cdb55d Iustin Pop
408 d7cdb55d Iustin Pop
def GatherMasterVotes(node_list):
409 d7cdb55d Iustin Pop
  """Check the agreement on who is the master.
410 d7cdb55d Iustin Pop

411 d7cdb55d Iustin Pop
  This function will return a list of (node, number of votes), ordered
412 d7cdb55d Iustin Pop
  by the number of votes. Errors will be denoted by the key 'None'.
413 d7cdb55d Iustin Pop

414 d7cdb55d Iustin Pop
  Note that the sum of votes is the number of nodes this machine
415 d7cdb55d Iustin Pop
  knows, whereas the number of entries in the list could be different
416 d7cdb55d Iustin Pop
  (if some nodes vote for another master).
417 d7cdb55d Iustin Pop

418 d7cdb55d Iustin Pop
  We remove ourselves from the list since we know that (bugs aside)
419 d7cdb55d Iustin Pop
  since we use the same source for configuration information for both
420 d7cdb55d Iustin Pop
  backend and boostrap, we'll always vote for ourselves.
421 d7cdb55d Iustin Pop

422 d7cdb55d Iustin Pop
  @type node_list: list
423 d7cdb55d Iustin Pop
  @param node_list: the list of nodes to query for master info; the current
424 d7cdb55d Iustin Pop
      node wil be removed if it is in the list
425 d7cdb55d Iustin Pop
  @rtype: list
426 d7cdb55d Iustin Pop
  @return: list of (node, votes)
427 d7cdb55d Iustin Pop

428 d7cdb55d Iustin Pop
  """
429 d7cdb55d Iustin Pop
  myself = utils.HostInfo().name
430 d7cdb55d Iustin Pop
  try:
431 d7cdb55d Iustin Pop
    node_list.remove(myself)
432 d7cdb55d Iustin Pop
  except ValueError:
433 d7cdb55d Iustin Pop
    pass
434 d7cdb55d Iustin Pop
  if not node_list:
435 d7cdb55d Iustin Pop
    # no nodes left (eventually after removing myself)
436 d7cdb55d Iustin Pop
    return []
437 d7cdb55d Iustin Pop
  results = rpc.RpcRunner.call_master_info(node_list)
438 d7cdb55d Iustin Pop
  if not isinstance(results, dict):
439 d7cdb55d Iustin Pop
    # this should not happen (unless internal error in rpc)
440 d7cdb55d Iustin Pop
    logging.critical("Can't complete rpc call, aborting master startup")
441 d7cdb55d Iustin Pop
    return [(None, len(node_list))]
442 d7cdb55d Iustin Pop
  positive = negative = 0
443 d7cdb55d Iustin Pop
  other_masters = {}
444 d7cdb55d Iustin Pop
  votes = {}
445 d7cdb55d Iustin Pop
  for node in results:
446 d7cdb55d Iustin Pop
    if not isinstance(results[node], (tuple, list)) or len(results[node]) < 3:
447 d7cdb55d Iustin Pop
      # here the rpc layer should have already logged errors
448 d7cdb55d Iustin Pop
      if None not in votes:
449 d7cdb55d Iustin Pop
        votes[None] = 0
450 d7cdb55d Iustin Pop
      votes[None] += 1
451 d7cdb55d Iustin Pop
      continue
452 d7cdb55d Iustin Pop
    master_node = results[node][2]
453 d7cdb55d Iustin Pop
    if master_node not in votes:
454 d7cdb55d Iustin Pop
      votes[master_node] = 0
455 d7cdb55d Iustin Pop
    votes[master_node] += 1
456 d7cdb55d Iustin Pop
457 d7cdb55d Iustin Pop
  vote_list = [v for v in votes.items()]
458 d7cdb55d Iustin Pop
  # sort first on number of votes then on name, since we want None
459 d7cdb55d Iustin Pop
  # sorted later if we have the half of the nodes not responding, and
460 d7cdb55d Iustin Pop
  # half voting all for the same master
461 d7cdb55d Iustin Pop
  vote_list.sort(key=lambda x: (x[1], x[0]), reverse=True)
462 d7cdb55d Iustin Pop
463 d7cdb55d Iustin Pop
  return vote_list