Statistics
| Branch: | Tag: | Revision:

root / lib / bootstrap.py @ 44caf5a8

History | View | Annotate | Download (20.8 kB)

1 a0c9f010 Michael Hanselmann
#
2 a0c9f010 Michael Hanselmann
#
3 a0c9f010 Michael Hanselmann
4 a0c9f010 Michael Hanselmann
# Copyright (C) 2006, 2007, 2008 Google Inc.
5 a0c9f010 Michael Hanselmann
#
6 a0c9f010 Michael Hanselmann
# This program is free software; you can redistribute it and/or modify
7 a0c9f010 Michael Hanselmann
# it under the terms of the GNU General Public License as published by
8 a0c9f010 Michael Hanselmann
# the Free Software Foundation; either version 2 of the License, or
9 a0c9f010 Michael Hanselmann
# (at your option) any later version.
10 a0c9f010 Michael Hanselmann
#
11 a0c9f010 Michael Hanselmann
# This program is distributed in the hope that it will be useful, but
12 a0c9f010 Michael Hanselmann
# WITHOUT ANY WARRANTY; without even the implied warranty of
13 a0c9f010 Michael Hanselmann
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14 a0c9f010 Michael Hanselmann
# General Public License for more details.
15 a0c9f010 Michael Hanselmann
#
16 a0c9f010 Michael Hanselmann
# You should have received a copy of the GNU General Public License
17 a0c9f010 Michael Hanselmann
# along with this program; if not, write to the Free Software
18 a0c9f010 Michael Hanselmann
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19 a0c9f010 Michael Hanselmann
# 02110-1301, USA.
20 a0c9f010 Michael Hanselmann
21 a0c9f010 Michael Hanselmann
22 a0c9f010 Michael Hanselmann
"""Functions to bootstrap a new cluster.
23 a0c9f010 Michael Hanselmann

24 a0c9f010 Michael Hanselmann
"""
25 a0c9f010 Michael Hanselmann
26 a0c9f010 Michael Hanselmann
import os
27 a0c9f010 Michael Hanselmann
import os.path
28 a0c9f010 Michael Hanselmann
import re
29 b1b6ea87 Iustin Pop
import logging
30 c4415fd5 Michael Hanselmann
import tempfile
31 d693c864 Iustin Pop
import time
32 a0c9f010 Michael Hanselmann
33 a0c9f010 Michael Hanselmann
from ganeti import rpc
34 a0c9f010 Michael Hanselmann
from ganeti import ssh
35 a0c9f010 Michael Hanselmann
from ganeti import utils
36 a0c9f010 Michael Hanselmann
from ganeti import errors
37 a0c9f010 Michael Hanselmann
from ganeti import config
38 a0c9f010 Michael Hanselmann
from ganeti import constants
39 b9eeeb02 Michael Hanselmann
from ganeti import objects
40 a0c9f010 Michael Hanselmann
from ganeti import ssconf
41 a33848a5 Guido Trotter
from ganeti import serializer
42 a5728081 Guido Trotter
from ganeti import hypervisor
43 a0c9f010 Michael Hanselmann
44 e38220e4 Michael Hanselmann
45 531baf8e Iustin Pop
def _InitSSHSetup():
46 a0c9f010 Michael Hanselmann
  """Setup the SSH configuration for the cluster.
47 a0c9f010 Michael Hanselmann

48 a0c9f010 Michael Hanselmann
  This generates a dsa keypair for root, adds the pub key to the
49 a0c9f010 Michael Hanselmann
  permitted hosts and adds the hostkey to its own known hosts.
50 a0c9f010 Michael Hanselmann

51 a0c9f010 Michael Hanselmann
  """
52 a0c9f010 Michael Hanselmann
  priv_key, pub_key, auth_keys = ssh.GetUserFiles(constants.GANETI_RUNAS)
53 a0c9f010 Michael Hanselmann
54 a0c9f010 Michael Hanselmann
  for name in priv_key, pub_key:
55 a0c9f010 Michael Hanselmann
    if os.path.exists(name):
56 a0c9f010 Michael Hanselmann
      utils.CreateBackup(name)
57 a0c9f010 Michael Hanselmann
    utils.RemoveFile(name)
58 a0c9f010 Michael Hanselmann
59 a0c9f010 Michael Hanselmann
  result = utils.RunCmd(["ssh-keygen", "-t", "dsa",
60 a0c9f010 Michael Hanselmann
                         "-f", priv_key,
61 a0c9f010 Michael Hanselmann
                         "-q", "-N", ""])
62 a0c9f010 Michael Hanselmann
  if result.failed:
63 a0c9f010 Michael Hanselmann
    raise errors.OpExecError("Could not generate ssh keypair, error %s" %
64 a0c9f010 Michael Hanselmann
                             result.output)
65 a0c9f010 Michael Hanselmann
66 7a0156dc Luca Bigliardi
  utils.AddAuthorizedKey(auth_keys, utils.ReadFile(pub_key))
67 a0c9f010 Michael Hanselmann
68 a0c9f010 Michael Hanselmann
69 cd34faf2 Michael Hanselmann
def GenerateSelfSignedSslCert(file_name, validity=(365 * 5)):
70 40a97d80 Michael Hanselmann
  """Generates a self-signed SSL certificate.
71 a0c9f010 Michael Hanselmann

72 40a97d80 Michael Hanselmann
  @type file_name: str
73 40a97d80 Michael Hanselmann
  @param file_name: Path to output file
74 40a97d80 Michael Hanselmann
  @type validity: int
75 40a97d80 Michael Hanselmann
  @param validity: Validity for certificate in days
76 a0c9f010 Michael Hanselmann

77 a0c9f010 Michael Hanselmann
  """
78 c4415fd5 Michael Hanselmann
  (fd, tmp_file_name) = tempfile.mkstemp(dir=os.path.dirname(file_name))
79 c4415fd5 Michael Hanselmann
  try:
80 88828491 Michael Hanselmann
    try:
81 88828491 Michael Hanselmann
      # Set permissions before writing key
82 88828491 Michael Hanselmann
      os.chmod(tmp_file_name, 0600)
83 88828491 Michael Hanselmann
84 88828491 Michael Hanselmann
      result = utils.RunCmd(["openssl", "req", "-new", "-newkey", "rsa:1024",
85 88828491 Michael Hanselmann
                             "-days", str(validity), "-nodes", "-x509",
86 88828491 Michael Hanselmann
                             "-keyout", tmp_file_name, "-out", tmp_file_name,
87 88828491 Michael Hanselmann
                             "-batch"])
88 88828491 Michael Hanselmann
      if result.failed:
89 88828491 Michael Hanselmann
        raise errors.OpExecError("Could not generate SSL certificate, command"
90 88828491 Michael Hanselmann
                                 " %s had exitcode %s and error message %s" %
91 88828491 Michael Hanselmann
                                 (result.cmd, result.exit_code, result.output))
92 88828491 Michael Hanselmann
93 88828491 Michael Hanselmann
      # Make read-only
94 88828491 Michael Hanselmann
      os.chmod(tmp_file_name, 0400)
95 88828491 Michael Hanselmann
96 88828491 Michael Hanselmann
      os.rename(tmp_file_name, file_name)
97 88828491 Michael Hanselmann
    finally:
98 88828491 Michael Hanselmann
      utils.RemoveFile(tmp_file_name)
99 c4415fd5 Michael Hanselmann
  finally:
100 88828491 Michael Hanselmann
    os.close(fd)
101 40a97d80 Michael Hanselmann
102 40a97d80 Michael Hanselmann
103 c008906b Michael Hanselmann
def GenerateHmacKey(file_name):
104 c008906b Michael Hanselmann
  """Writes a new HMAC key.
105 c008906b Michael Hanselmann

106 c008906b Michael Hanselmann
  @type file_name: str
107 c008906b Michael Hanselmann
  @param file_name: Path to output file
108 c008906b Michael Hanselmann

109 c008906b Michael Hanselmann
  """
110 e2e92ea0 Guido Trotter
  utils.WriteFile(file_name, data="%s\n" % utils.GenerateSecret(), mode=0400)
111 c008906b Michael Hanselmann
112 c008906b Michael Hanselmann
113 8f215968 Michael Hanselmann
def _InitGanetiServerSetup(master_name):
114 40a97d80 Michael Hanselmann
  """Setup the necessary configuration for the initial node daemon.
115 40a97d80 Michael Hanselmann

116 40a97d80 Michael Hanselmann
  This creates the nodepass file containing the shared password for
117 40a97d80 Michael Hanselmann
  the cluster and also generates the SSL certificate.
118 40a97d80 Michael Hanselmann

119 40a97d80 Michael Hanselmann
  """
120 cd34faf2 Michael Hanselmann
  GenerateSelfSignedSslCert(constants.SSL_CERT_FILE)
121 a0c9f010 Michael Hanselmann
122 61a08fa3 Michael Hanselmann
  # Don't overwrite existing file
123 61a08fa3 Michael Hanselmann
  if not os.path.exists(constants.RAPI_CERT_FILE):
124 cd34faf2 Michael Hanselmann
    GenerateSelfSignedSslCert(constants.RAPI_CERT_FILE)
125 61a08fa3 Michael Hanselmann
126 4a34c5cf Guido Trotter
  if not os.path.exists(constants.HMAC_CLUSTER_KEY):
127 c008906b Michael Hanselmann
    GenerateHmacKey(constants.HMAC_CLUSTER_KEY)
128 4a34c5cf Guido Trotter
129 f154a7a3 Michael Hanselmann
  result = utils.RunCmd([constants.DAEMON_UTIL, "start", constants.NODED])
130 a0c9f010 Michael Hanselmann
  if result.failed:
131 a0c9f010 Michael Hanselmann
    raise errors.OpExecError("Could not start the node daemon, command %s"
132 a0c9f010 Michael Hanselmann
                             " had exitcode %s and error %s" %
133 a0c9f010 Michael Hanselmann
                             (result.cmd, result.exit_code, result.output))
134 a0c9f010 Michael Hanselmann
135 5627f375 Michael Hanselmann
  _WaitForNodeDaemon(master_name)
136 5627f375 Michael Hanselmann
137 5627f375 Michael Hanselmann
138 5627f375 Michael Hanselmann
def _WaitForNodeDaemon(node_name):
139 5627f375 Michael Hanselmann
  """Wait for node daemon to become responsive.
140 5627f375 Michael Hanselmann

141 5627f375 Michael Hanselmann
  """
142 d3833ebd Michael Hanselmann
  def _CheckNodeDaemon():
143 5627f375 Michael Hanselmann
    result = rpc.RpcRunner.call_version([node_name])[node_name]
144 d3833ebd Michael Hanselmann
    if result.fail_msg:
145 d3833ebd Michael Hanselmann
      raise utils.RetryAgain()
146 8f215968 Michael Hanselmann
147 d3833ebd Michael Hanselmann
  try:
148 d3833ebd Michael Hanselmann
    utils.Retry(_CheckNodeDaemon, 1.0, 10.0)
149 d3833ebd Michael Hanselmann
  except utils.RetryTimeout:
150 5627f375 Michael Hanselmann
    raise errors.OpExecError("Node daemon on %s didn't answer queries within"
151 5627f375 Michael Hanselmann
                             " 10 seconds" % node_name)
152 5627f375 Michael Hanselmann
153 a0c9f010 Michael Hanselmann
154 ec0652ad Guido Trotter
def InitCluster(cluster_name, mac_prefix,
155 ce735215 Guido Trotter
                master_netdev, file_storage_dir, candidate_pool_size,
156 b6a30b0d Guido Trotter
                secondary_ip=None, vg_name=None, beparams=None,
157 b6a30b0d Guido Trotter
                nicparams=None, hvparams=None, enabled_hypervisors=None,
158 b989b9d9 Ken Wehr
                modify_etc_hosts=True, modify_ssh_setup=True):
159 a0c9f010 Michael Hanselmann
  """Initialise the cluster.
160 a0c9f010 Michael Hanselmann

161 ce735215 Guido Trotter
  @type candidate_pool_size: int
162 ce735215 Guido Trotter
  @param candidate_pool_size: master candidate pool size
163 ce735215 Guido Trotter

164 a0c9f010 Michael Hanselmann
  """
165 ce735215 Guido Trotter
  # TODO: complete the docstring
166 a0c9f010 Michael Hanselmann
  if config.ConfigWriter.IsCluster():
167 debac808 Iustin Pop
    raise errors.OpPrereqError("Cluster is already initialised",
168 debac808 Iustin Pop
                               errors.ECODE_STATE)
169 a0c9f010 Michael Hanselmann
170 b119bccb Guido Trotter
  if not enabled_hypervisors:
171 b119bccb Guido Trotter
    raise errors.OpPrereqError("Enabled hypervisors list must contain at"
172 debac808 Iustin Pop
                               " least one member", errors.ECODE_INVAL)
173 b119bccb Guido Trotter
  invalid_hvs = set(enabled_hypervisors) - constants.HYPER_TYPES
174 b119bccb Guido Trotter
  if invalid_hvs:
175 b119bccb Guido Trotter
    raise errors.OpPrereqError("Enabled hypervisors contains invalid"
176 debac808 Iustin Pop
                               " entries: %s" % invalid_hvs,
177 debac808 Iustin Pop
                               errors.ECODE_INVAL)
178 b119bccb Guido Trotter
179 104f4ca1 Iustin Pop
  hostname = utils.GetHostInfo()
180 a0c9f010 Michael Hanselmann
181 a0c9f010 Michael Hanselmann
  if hostname.ip.startswith("127."):
182 a0c9f010 Michael Hanselmann
    raise errors.OpPrereqError("This host's IP resolves to the private"
183 a0c9f010 Michael Hanselmann
                               " range (%s). Please fix DNS or %s." %
184 debac808 Iustin Pop
                               (hostname.ip, constants.ETC_HOSTS),
185 debac808 Iustin Pop
                               errors.ECODE_ENVIRON)
186 a0c9f010 Michael Hanselmann
187 caad16e2 Iustin Pop
  if not utils.OwnIpAddress(hostname.ip):
188 a0c9f010 Michael Hanselmann
    raise errors.OpPrereqError("Inconsistency: this host's name resolves"
189 a0c9f010 Michael Hanselmann
                               " to %s,\nbut this ip address does not"
190 debac808 Iustin Pop
                               " belong to this host. Aborting." %
191 debac808 Iustin Pop
                               hostname.ip, errors.ECODE_ENVIRON)
192 a0c9f010 Michael Hanselmann
193 44caf5a8 Iustin Pop
  clustername = utils.GetHostInfo(utils.HostInfo.NormalizeName(cluster_name))
194 a0c9f010 Michael Hanselmann
195 a0c9f010 Michael Hanselmann
  if utils.TcpPing(clustername.ip, constants.DEFAULT_NODED_PORT,
196 a0c9f010 Michael Hanselmann
                   timeout=5):
197 debac808 Iustin Pop
    raise errors.OpPrereqError("Cluster IP already active. Aborting.",
198 debac808 Iustin Pop
                               errors.ECODE_NOTUNIQUE)
199 a0c9f010 Michael Hanselmann
200 a0c9f010 Michael Hanselmann
  if secondary_ip:
201 a0c9f010 Michael Hanselmann
    if not utils.IsValidIP(secondary_ip):
202 debac808 Iustin Pop
      raise errors.OpPrereqError("Invalid secondary ip given",
203 debac808 Iustin Pop
                                 errors.ECODE_INVAL)
204 a0c9f010 Michael Hanselmann
    if (secondary_ip != hostname.ip and
205 caad16e2 Iustin Pop
        not utils.OwnIpAddress(secondary_ip)):
206 a0c9f010 Michael Hanselmann
      raise errors.OpPrereqError("You gave %s as secondary IP,"
207 a0c9f010 Michael Hanselmann
                                 " but it does not belong to this host." %
208 debac808 Iustin Pop
                                 secondary_ip, errors.ECODE_ENVIRON)
209 b9eeeb02 Michael Hanselmann
  else:
210 b9eeeb02 Michael Hanselmann
    secondary_ip = hostname.ip
211 a0c9f010 Michael Hanselmann
212 a0c9f010 Michael Hanselmann
  if vg_name is not None:
213 a0c9f010 Michael Hanselmann
    # Check if volume group is valid
214 a0c9f010 Michael Hanselmann
    vgstatus = utils.CheckVolumeGroupSize(utils.ListVolumeGroups(), vg_name,
215 a0c9f010 Michael Hanselmann
                                          constants.MIN_VG_SIZE)
216 a0c9f010 Michael Hanselmann
    if vgstatus:
217 a0c9f010 Michael Hanselmann
      raise errors.OpPrereqError("Error: %s\nspecify --no-lvm-storage if"
218 debac808 Iustin Pop
                                 " you are not using lvm" % vgstatus,
219 debac808 Iustin Pop
                                 errors.ECODE_INVAL)
220 a0c9f010 Michael Hanselmann
221 a0c9f010 Michael Hanselmann
  file_storage_dir = os.path.normpath(file_storage_dir)
222 a0c9f010 Michael Hanselmann
223 a0c9f010 Michael Hanselmann
  if not os.path.isabs(file_storage_dir):
224 a0c9f010 Michael Hanselmann
    raise errors.OpPrereqError("The file storage directory you passed is"
225 debac808 Iustin Pop
                               " not an absolute path.", errors.ECODE_INVAL)
226 a0c9f010 Michael Hanselmann
227 a0c9f010 Michael Hanselmann
  if not os.path.exists(file_storage_dir):
228 a0c9f010 Michael Hanselmann
    try:
229 a0c9f010 Michael Hanselmann
      os.makedirs(file_storage_dir, 0750)
230 a0c9f010 Michael Hanselmann
    except OSError, err:
231 a0c9f010 Michael Hanselmann
      raise errors.OpPrereqError("Cannot create file storage directory"
232 debac808 Iustin Pop
                                 " '%s': %s" % (file_storage_dir, err),
233 debac808 Iustin Pop
                                 errors.ECODE_ENVIRON)
234 a0c9f010 Michael Hanselmann
235 a0c9f010 Michael Hanselmann
  if not os.path.isdir(file_storage_dir):
236 a0c9f010 Michael Hanselmann
    raise errors.OpPrereqError("The file storage directory '%s' is not"
237 debac808 Iustin Pop
                               " a directory." % file_storage_dir,
238 debac808 Iustin Pop
                               errors.ECODE_ENVIRON)
239 a0c9f010 Michael Hanselmann
240 a0c9f010 Michael Hanselmann
  if not re.match("^[0-9a-z]{2}:[0-9a-z]{2}:[0-9a-z]{2}$", mac_prefix):
241 debac808 Iustin Pop
    raise errors.OpPrereqError("Invalid mac prefix given '%s'" % mac_prefix,
242 debac808 Iustin Pop
                               errors.ECODE_INVAL)
243 a0c9f010 Michael Hanselmann
244 a0c9f010 Michael Hanselmann
  result = utils.RunCmd(["ip", "link", "show", "dev", master_netdev])
245 a0c9f010 Michael Hanselmann
  if result.failed:
246 a0c9f010 Michael Hanselmann
    raise errors.OpPrereqError("Invalid master netdev given (%s): '%s'" %
247 a0c9f010 Michael Hanselmann
                               (master_netdev,
248 debac808 Iustin Pop
                                result.output.strip()), errors.ECODE_INVAL)
249 a0c9f010 Michael Hanselmann
250 9dae41ad Guido Trotter
  dirs = [(constants.RUN_GANETI_DIR, constants.RUN_DIRS_MODE)]
251 9dae41ad Guido Trotter
  utils.EnsureDirs(dirs)
252 9dae41ad Guido Trotter
253 a5728081 Guido Trotter
  utils.ForceDictType(beparams, constants.BES_PARAMETER_TYPES)
254 b6a30b0d Guido Trotter
  utils.ForceDictType(nicparams, constants.NICS_PARAMETER_TYPES)
255 b6a30b0d Guido Trotter
  objects.NIC.CheckParameterSyntax(nicparams)
256 b6a30b0d Guido Trotter
257 a5728081 Guido Trotter
  # hvparams is a mapping of hypervisor->hvparams dict
258 a5728081 Guido Trotter
  for hv_name, hv_params in hvparams.iteritems():
259 a5728081 Guido Trotter
    utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
260 a5728081 Guido Trotter
    hv_class = hypervisor.GetHypervisor(hv_name)
261 a5728081 Guido Trotter
    hv_class.CheckParameterSyntax(hv_params)
262 d4b72030 Guido Trotter
263 a0c9f010 Michael Hanselmann
  # set up the inter-node password and certificate
264 8f215968 Michael Hanselmann
  _InitGanetiServerSetup(hostname.name)
265 a0c9f010 Michael Hanselmann
266 a0c9f010 Michael Hanselmann
  # set up ssh config and /etc/hosts
267 13998ef2 Michael Hanselmann
  sshline = utils.ReadFile(constants.SSH_HOST_RSA_PUB)
268 a0c9f010 Michael Hanselmann
  sshkey = sshline.split(" ")[1]
269 a0c9f010 Michael Hanselmann
270 b86a6bcd Guido Trotter
  if modify_etc_hosts:
271 b86a6bcd Guido Trotter
    utils.AddHostToEtcHosts(hostname.name)
272 b86a6bcd Guido Trotter
273 b989b9d9 Ken Wehr
  if modify_ssh_setup:
274 b989b9d9 Ken Wehr
    _InitSSHSetup()
275 a0c9f010 Michael Hanselmann
276 430b923c Iustin Pop
  now = time.time()
277 430b923c Iustin Pop
278 a0c9f010 Michael Hanselmann
  # init of cluster config file
279 b9eeeb02 Michael Hanselmann
  cluster_config = objects.Cluster(
280 b9eeeb02 Michael Hanselmann
    serial_no=1,
281 b9eeeb02 Michael Hanselmann
    rsahostkeypub=sshkey,
282 b9eeeb02 Michael Hanselmann
    highest_used_port=(constants.FIRST_DRBD_PORT - 1),
283 b9eeeb02 Michael Hanselmann
    mac_prefix=mac_prefix,
284 b9eeeb02 Michael Hanselmann
    volume_group_name=vg_name,
285 b9eeeb02 Michael Hanselmann
    tcpudp_port_pool=set(),
286 f6bd6e98 Michael Hanselmann
    master_node=hostname.name,
287 f6bd6e98 Michael Hanselmann
    master_ip=clustername.ip,
288 f6bd6e98 Michael Hanselmann
    master_netdev=master_netdev,
289 f6bd6e98 Michael Hanselmann
    cluster_name=clustername.name,
290 f6bd6e98 Michael Hanselmann
    file_storage_dir=file_storage_dir,
291 ea3a925f Alexander Schreiber
    enabled_hypervisors=enabled_hypervisors,
292 4ef7f423 Guido Trotter
    beparams={constants.PP_DEFAULT: beparams},
293 b6a30b0d Guido Trotter
    nicparams={constants.PP_DEFAULT: nicparams},
294 ea3a925f Alexander Schreiber
    hvparams=hvparams,
295 ce735215 Guido Trotter
    candidate_pool_size=candidate_pool_size,
296 022c3a0b Guido Trotter
    modify_etc_hosts=modify_etc_hosts,
297 b989b9d9 Ken Wehr
    modify_ssh_setup=modify_ssh_setup,
298 430b923c Iustin Pop
    ctime=now,
299 430b923c Iustin Pop
    mtime=now,
300 430b923c Iustin Pop
    uuid=utils.NewUUID(),
301 b9eeeb02 Michael Hanselmann
    )
302 b9eeeb02 Michael Hanselmann
  master_node_config = objects.Node(name=hostname.name,
303 b9eeeb02 Michael Hanselmann
                                    primary_ip=hostname.ip,
304 b9222f32 Guido Trotter
                                    secondary_ip=secondary_ip,
305 c044f32c Guido Trotter
                                    serial_no=1,
306 c044f32c Guido Trotter
                                    master_candidate=True,
307 af64c0ea Iustin Pop
                                    offline=False, drained=False,
308 c044f32c Guido Trotter
                                    )
309 9e1333b9 Guido Trotter
  InitConfig(constants.CONFIG_VERSION, cluster_config, master_node_config)
310 05cc153f Guido Trotter
  cfg = config.ConfigWriter()
311 9e1333b9 Guido Trotter
  ssh.WriteKnownHostsFile(cfg, constants.SSH_KNOWN_HOSTS_FILE)
312 a4eae71f Michael Hanselmann
  cfg.Update(cfg.GetClusterInfo(), logging.error)
313 827f753e Guido Trotter
314 b3f1cf6f Iustin Pop
  # start the master ip
315 b3f1cf6f Iustin Pop
  # TODO: Review rpc call from bootstrap
316 b726aff0 Iustin Pop
  # TODO: Warn on failed start master
317 3583908a Guido Trotter
  rpc.RpcRunner.call_node_start_master(hostname.name, True, False)
318 b3f1cf6f Iustin Pop
319 b1b6ea87 Iustin Pop
320 02f99608 Oleksiy Mishchenko
def InitConfig(version, cluster_config, master_node_config,
321 02f99608 Oleksiy Mishchenko
               cfg_file=constants.CLUSTER_CONF_FILE):
322 7b3a8fb5 Iustin Pop
  """Create the initial cluster configuration.
323 7b3a8fb5 Iustin Pop

324 7b3a8fb5 Iustin Pop
  It will contain the current node, which will also be the master
325 7b3a8fb5 Iustin Pop
  node, and no instances.
326 7b3a8fb5 Iustin Pop

327 7b3a8fb5 Iustin Pop
  @type version: int
328 c41eea6e Iustin Pop
  @param version: configuration version
329 c41eea6e Iustin Pop
  @type cluster_config: L{objects.Cluster}
330 c41eea6e Iustin Pop
  @param cluster_config: cluster configuration
331 c41eea6e Iustin Pop
  @type master_node_config: L{objects.Node}
332 c41eea6e Iustin Pop
  @param master_node_config: master node configuration
333 c41eea6e Iustin Pop
  @type cfg_file: string
334 c41eea6e Iustin Pop
  @param cfg_file: configuration file path
335 c41eea6e Iustin Pop

336 7b3a8fb5 Iustin Pop
  """
337 7b3a8fb5 Iustin Pop
  nodes = {
338 7b3a8fb5 Iustin Pop
    master_node_config.name: master_node_config,
339 7b3a8fb5 Iustin Pop
    }
340 7b3a8fb5 Iustin Pop
341 d693c864 Iustin Pop
  now = time.time()
342 7b3a8fb5 Iustin Pop
  config_data = objects.ConfigData(version=version,
343 7b3a8fb5 Iustin Pop
                                   cluster=cluster_config,
344 7b3a8fb5 Iustin Pop
                                   nodes=nodes,
345 7b3a8fb5 Iustin Pop
                                   instances={},
346 d693c864 Iustin Pop
                                   serial_no=1,
347 d693c864 Iustin Pop
                                   ctime=now, mtime=now)
348 a33848a5 Guido Trotter
  utils.WriteFile(cfg_file,
349 a33848a5 Guido Trotter
                  data=serializer.Dump(config_data.ToDict()),
350 a33848a5 Guido Trotter
                  mode=0600)
351 02f99608 Oleksiy Mishchenko
352 02f99608 Oleksiy Mishchenko
353 140aa4a8 Iustin Pop
def FinalizeClusterDestroy(master):
354 140aa4a8 Iustin Pop
  """Execute the last steps of cluster destroy
355 140aa4a8 Iustin Pop

356 140aa4a8 Iustin Pop
  This function shuts down all the daemons, completing the destroy
357 140aa4a8 Iustin Pop
  begun in cmdlib.LUDestroyOpcode.
358 140aa4a8 Iustin Pop

359 140aa4a8 Iustin Pop
  """
360 b989b9d9 Ken Wehr
  cfg = config.ConfigWriter()
361 b989b9d9 Ken Wehr
  modify_ssh_setup = cfg.GetClusterInfo().modify_ssh_setup
362 781de953 Iustin Pop
  result = rpc.RpcRunner.call_node_stop_master(master, True)
363 3cebe102 Michael Hanselmann
  msg = result.fail_msg
364 6c00d19a Iustin Pop
  if msg:
365 099c52ad Iustin Pop
    logging.warning("Could not disable the master role: %s", msg)
366 b989b9d9 Ken Wehr
  result = rpc.RpcRunner.call_node_leave_cluster(master, modify_ssh_setup)
367 3cebe102 Michael Hanselmann
  msg = result.fail_msg
368 0623d351 Iustin Pop
  if msg:
369 0623d351 Iustin Pop
    logging.warning("Could not shutdown the node daemon and cleanup"
370 0623d351 Iustin Pop
                    " the node: %s", msg)
371 140aa4a8 Iustin Pop
372 140aa4a8 Iustin Pop
373 87622829 Iustin Pop
def SetupNodeDaemon(cluster_name, node, ssh_key_check):
374 827f753e Guido Trotter
  """Add a node to the cluster.
375 827f753e Guido Trotter

376 b1b6ea87 Iustin Pop
  This function must be called before the actual opcode, and will ssh
377 b1b6ea87 Iustin Pop
  to the remote node, copy the needed files, and start ganeti-noded,
378 b1b6ea87 Iustin Pop
  allowing the master to do the rest via normal rpc calls.
379 827f753e Guido Trotter

380 87622829 Iustin Pop
  @param cluster_name: the cluster name
381 87622829 Iustin Pop
  @param node: the name of the new node
382 87622829 Iustin Pop
  @param ssh_key_check: whether to do a strict key check
383 827f753e Guido Trotter

384 827f753e Guido Trotter
  """
385 87622829 Iustin Pop
  sshrunner = ssh.SshRunner(cluster_name)
386 5557b04c Michael Hanselmann
387 5557b04c Michael Hanselmann
  noded_cert = utils.ReadFile(constants.SSL_CERT_FILE)
388 2438c157 Michael Hanselmann
  rapi_cert = utils.ReadFile(constants.RAPI_CERT_FILE)
389 77b076ca Guido Trotter
  hmac_key = utils.ReadFile(constants.HMAC_CLUSTER_KEY)
390 5557b04c Michael Hanselmann
391 827f753e Guido Trotter
  # in the base64 pem encoding, neither '!' nor '.' are valid chars,
392 827f753e Guido Trotter
  # so we use this to detect an invalid certificate; as long as the
393 827f753e Guido Trotter
  # cert doesn't contain this, the here-document will be correctly
394 77b076ca Guido Trotter
  # parsed by the shell sequence below. HMAC keys are hexadecimal strings,
395 77b076ca Guido Trotter
  # so the same restrictions apply.
396 77b076ca Guido Trotter
  for content in (noded_cert, rapi_cert, hmac_key):
397 77b076ca Guido Trotter
    if re.search('^!EOF\.', content, re.MULTILINE):
398 77b076ca Guido Trotter
      raise errors.OpExecError("invalid SSL certificate or HMAC key")
399 5557b04c Michael Hanselmann
400 5557b04c Michael Hanselmann
  if not noded_cert.endswith("\n"):
401 5557b04c Michael Hanselmann
    noded_cert += "\n"
402 2438c157 Michael Hanselmann
  if not rapi_cert.endswith("\n"):
403 2438c157 Michael Hanselmann
    rapi_cert += "\n"
404 77b076ca Guido Trotter
  if not hmac_key.endswith("\n"):
405 77b076ca Guido Trotter
    hmac_key += "\n"
406 827f753e Guido Trotter
407 827f753e Guido Trotter
  # set up inter-node password and certificate and restarts the node daemon
408 827f753e Guido Trotter
  # and then connect with ssh to set password and start ganeti-noded
409 827f753e Guido Trotter
  # note that all the below variables are sanitized at this point,
410 827f753e Guido Trotter
  # either by being constants or by the checks above
411 827f753e Guido Trotter
  mycommand = ("umask 077 && "
412 827f753e Guido Trotter
               "cat > '%s' << '!EOF.' && \n"
413 2438c157 Michael Hanselmann
               "%s!EOF.\n"
414 2438c157 Michael Hanselmann
               "cat > '%s' << '!EOF.' && \n"
415 2438c157 Michael Hanselmann
               "%s!EOF.\n"
416 77b076ca Guido Trotter
               "cat > '%s' << '!EOF.' && \n"
417 77b076ca Guido Trotter
               "%s!EOF.\n"
418 77b076ca Guido Trotter
               "chmod 0400 %s %s %s && "
419 f154a7a3 Michael Hanselmann
               "%s start %s" %
420 5557b04c Michael Hanselmann
               (constants.SSL_CERT_FILE, noded_cert,
421 2438c157 Michael Hanselmann
                constants.RAPI_CERT_FILE, rapi_cert,
422 77b076ca Guido Trotter
                constants.HMAC_CLUSTER_KEY, hmac_key,
423 5b099da9 Michael Hanselmann
                constants.SSL_CERT_FILE, constants.RAPI_CERT_FILE,
424 77b076ca Guido Trotter
                constants.HMAC_CLUSTER_KEY,
425 f154a7a3 Michael Hanselmann
                constants.DAEMON_UTIL, constants.NODED))
426 827f753e Guido Trotter
427 c4b6c29c Michael Hanselmann
  result = sshrunner.Run(node, 'root', mycommand, batch=False,
428 c4b6c29c Michael Hanselmann
                         ask_key=ssh_key_check,
429 c4b6c29c Michael Hanselmann
                         use_cluster_key=False,
430 c4b6c29c Michael Hanselmann
                         strict_host_check=ssh_key_check)
431 827f753e Guido Trotter
  if result.failed:
432 827f753e Guido Trotter
    raise errors.OpExecError("Remote command on node %s, error: %s,"
433 827f753e Guido Trotter
                             " output: %s" %
434 827f753e Guido Trotter
                             (node, result.fail_reason, result.output))
435 827f753e Guido Trotter
436 5627f375 Michael Hanselmann
  _WaitForNodeDaemon(node)
437 5627f375 Michael Hanselmann
438 b1b6ea87 Iustin Pop
439 8e2524c3 Guido Trotter
def MasterFailover(no_voting=False):
440 b1b6ea87 Iustin Pop
  """Failover the master node.
441 b1b6ea87 Iustin Pop

442 b1b6ea87 Iustin Pop
  This checks that we are not already the master, and will cause the
443 b1b6ea87 Iustin Pop
  current master to cease being master, and the non-master to become
444 b1b6ea87 Iustin Pop
  new master.
445 b1b6ea87 Iustin Pop

446 8e2524c3 Guido Trotter
  @type no_voting: boolean
447 8e2524c3 Guido Trotter
  @param no_voting: force the operation without remote nodes agreement
448 8e2524c3 Guido Trotter
                      (dangerous)
449 8e2524c3 Guido Trotter

450 b1b6ea87 Iustin Pop
  """
451 8135a2db Iustin Pop
  sstore = ssconf.SimpleStore()
452 b1b6ea87 Iustin Pop
453 8135a2db Iustin Pop
  old_master, new_master = ssconf.GetMasterAndMyself(sstore)
454 8135a2db Iustin Pop
  node_list = sstore.GetNodeList()
455 8135a2db Iustin Pop
  mc_list = sstore.GetMasterCandidates()
456 b1b6ea87 Iustin Pop
457 b1b6ea87 Iustin Pop
  if old_master == new_master:
458 b1b6ea87 Iustin Pop
    raise errors.OpPrereqError("This commands must be run on the node"
459 b1b6ea87 Iustin Pop
                               " where you want the new master to be."
460 b1b6ea87 Iustin Pop
                               " %s is already the master" %
461 debac808 Iustin Pop
                               old_master, errors.ECODE_INVAL)
462 d5927e48 Iustin Pop
463 8135a2db Iustin Pop
  if new_master not in mc_list:
464 8135a2db Iustin Pop
    mc_no_master = [name for name in mc_list if name != old_master]
465 8135a2db Iustin Pop
    raise errors.OpPrereqError("This node is not among the nodes marked"
466 8135a2db Iustin Pop
                               " as master candidates. Only these nodes"
467 8135a2db Iustin Pop
                               " can become masters. Current list of"
468 8135a2db Iustin Pop
                               " master candidates is:\n"
469 debac808 Iustin Pop
                               "%s" % ('\n'.join(mc_no_master)),
470 debac808 Iustin Pop
                               errors.ECODE_STATE)
471 8135a2db Iustin Pop
472 8e2524c3 Guido Trotter
  if not no_voting:
473 8e2524c3 Guido Trotter
    vote_list = GatherMasterVotes(node_list)
474 8e2524c3 Guido Trotter
475 8e2524c3 Guido Trotter
    if vote_list:
476 8e2524c3 Guido Trotter
      voted_master = vote_list[0][0]
477 8e2524c3 Guido Trotter
      if voted_master is None:
478 8e2524c3 Guido Trotter
        raise errors.OpPrereqError("Cluster is inconsistent, most nodes did"
479 debac808 Iustin Pop
                                   " not respond.", errors.ECODE_ENVIRON)
480 8e2524c3 Guido Trotter
      elif voted_master != old_master:
481 8e2524c3 Guido Trotter
        raise errors.OpPrereqError("I have a wrong configuration, I believe"
482 8e2524c3 Guido Trotter
                                   " the master is %s but the other nodes"
483 8e2524c3 Guido Trotter
                                   " voted %s. Please resync the configuration"
484 8e2524c3 Guido Trotter
                                   " of this node." %
485 debac808 Iustin Pop
                                   (old_master, voted_master),
486 debac808 Iustin Pop
                                   errors.ECODE_STATE)
487 b1b6ea87 Iustin Pop
  # end checks
488 b1b6ea87 Iustin Pop
489 b1b6ea87 Iustin Pop
  rcode = 0
490 b1b6ea87 Iustin Pop
491 d5927e48 Iustin Pop
  logging.info("Setting master to %s, old master: %s", new_master, old_master)
492 b1b6ea87 Iustin Pop
493 781de953 Iustin Pop
  result = rpc.RpcRunner.call_node_stop_master(old_master, True)
494 3cebe102 Michael Hanselmann
  msg = result.fail_msg
495 6c00d19a Iustin Pop
  if msg:
496 d5927e48 Iustin Pop
    logging.error("Could not disable the master role on the old master"
497 6c00d19a Iustin Pop
                 " %s, please disable manually: %s", old_master, msg)
498 b1b6ea87 Iustin Pop
499 d23ef431 Michael Hanselmann
  # Here we have a phase where no master should be running
500 b1b6ea87 Iustin Pop
501 bbe19c17 Iustin Pop
  # instantiate a real config writer, as we now know we have the
502 bbe19c17 Iustin Pop
  # configuration data
503 bbe19c17 Iustin Pop
  cfg = config.ConfigWriter()
504 b1b6ea87 Iustin Pop
505 bbe19c17 Iustin Pop
  cluster_info = cfg.GetClusterInfo()
506 bbe19c17 Iustin Pop
  cluster_info.master_node = new_master
507 bbe19c17 Iustin Pop
  # this will also regenerate the ssconf files, since we updated the
508 bbe19c17 Iustin Pop
  # cluster info
509 a4eae71f Michael Hanselmann
  cfg.Update(cluster_info, logging.error)
510 d5927e48 Iustin Pop
511 3583908a Guido Trotter
  result = rpc.RpcRunner.call_node_start_master(new_master, True, no_voting)
512 3cebe102 Michael Hanselmann
  msg = result.fail_msg
513 b726aff0 Iustin Pop
  if msg:
514 d5927e48 Iustin Pop
    logging.error("Could not start the master role on the new master"
515 b726aff0 Iustin Pop
                  " %s, please check: %s", new_master, msg)
516 b1b6ea87 Iustin Pop
    rcode = 1
517 b1b6ea87 Iustin Pop
518 b1b6ea87 Iustin Pop
  return rcode
519 d7cdb55d Iustin Pop
520 d7cdb55d Iustin Pop
521 8eb148ae Iustin Pop
def GetMaster():
522 8eb148ae Iustin Pop
  """Returns the current master node.
523 8eb148ae Iustin Pop

524 8eb148ae Iustin Pop
  This is a separate function in bootstrap since it's needed by
525 8eb148ae Iustin Pop
  gnt-cluster, and instead of importing directly ssconf, it's better
526 8eb148ae Iustin Pop
  to abstract it in bootstrap, where we do use ssconf in other
527 8eb148ae Iustin Pop
  functions too.
528 8eb148ae Iustin Pop

529 8eb148ae Iustin Pop
  """
530 8eb148ae Iustin Pop
  sstore = ssconf.SimpleStore()
531 8eb148ae Iustin Pop
532 8eb148ae Iustin Pop
  old_master, _ = ssconf.GetMasterAndMyself(sstore)
533 8eb148ae Iustin Pop
534 8eb148ae Iustin Pop
  return old_master
535 8eb148ae Iustin Pop
536 8eb148ae Iustin Pop
537 d7cdb55d Iustin Pop
def GatherMasterVotes(node_list):
538 d7cdb55d Iustin Pop
  """Check the agreement on who is the master.
539 d7cdb55d Iustin Pop

540 d7cdb55d Iustin Pop
  This function will return a list of (node, number of votes), ordered
541 d7cdb55d Iustin Pop
  by the number of votes. Errors will be denoted by the key 'None'.
542 d7cdb55d Iustin Pop

543 d7cdb55d Iustin Pop
  Note that the sum of votes is the number of nodes this machine
544 d7cdb55d Iustin Pop
  knows, whereas the number of entries in the list could be different
545 d7cdb55d Iustin Pop
  (if some nodes vote for another master).
546 d7cdb55d Iustin Pop

547 d7cdb55d Iustin Pop
  We remove ourselves from the list since we know that (bugs aside)
548 d7cdb55d Iustin Pop
  since we use the same source for configuration information for both
549 d7cdb55d Iustin Pop
  backend and boostrap, we'll always vote for ourselves.
550 d7cdb55d Iustin Pop

551 d7cdb55d Iustin Pop
  @type node_list: list
552 d7cdb55d Iustin Pop
  @param node_list: the list of nodes to query for master info; the current
553 5bbd3f7f Michael Hanselmann
      node will be removed if it is in the list
554 d7cdb55d Iustin Pop
  @rtype: list
555 d7cdb55d Iustin Pop
  @return: list of (node, votes)
556 d7cdb55d Iustin Pop

557 d7cdb55d Iustin Pop
  """
558 d7cdb55d Iustin Pop
  myself = utils.HostInfo().name
559 d7cdb55d Iustin Pop
  try:
560 d7cdb55d Iustin Pop
    node_list.remove(myself)
561 d7cdb55d Iustin Pop
  except ValueError:
562 d7cdb55d Iustin Pop
    pass
563 d7cdb55d Iustin Pop
  if not node_list:
564 d7cdb55d Iustin Pop
    # no nodes left (eventually after removing myself)
565 d7cdb55d Iustin Pop
    return []
566 d7cdb55d Iustin Pop
  results = rpc.RpcRunner.call_master_info(node_list)
567 d7cdb55d Iustin Pop
  if not isinstance(results, dict):
568 d7cdb55d Iustin Pop
    # this should not happen (unless internal error in rpc)
569 d7cdb55d Iustin Pop
    logging.critical("Can't complete rpc call, aborting master startup")
570 d7cdb55d Iustin Pop
    return [(None, len(node_list))]
571 d7cdb55d Iustin Pop
  votes = {}
572 d7cdb55d Iustin Pop
  for node in results:
573 781de953 Iustin Pop
    nres = results[node]
574 2a52a064 Iustin Pop
    data = nres.payload
575 3cebe102 Michael Hanselmann
    msg = nres.fail_msg
576 2a52a064 Iustin Pop
    fail = False
577 2a52a064 Iustin Pop
    if msg:
578 2a52a064 Iustin Pop
      logging.warning("Error contacting node %s: %s", node, msg)
579 2a52a064 Iustin Pop
      fail = True
580 2a52a064 Iustin Pop
    elif not isinstance(data, (tuple, list)) or len(data) < 3:
581 2a52a064 Iustin Pop
      logging.warning("Invalid data received from node %s: %s", node, data)
582 2a52a064 Iustin Pop
      fail = True
583 2a52a064 Iustin Pop
    if fail:
584 d7cdb55d Iustin Pop
      if None not in votes:
585 d7cdb55d Iustin Pop
        votes[None] = 0
586 d7cdb55d Iustin Pop
      votes[None] += 1
587 d7cdb55d Iustin Pop
      continue
588 781de953 Iustin Pop
    master_node = data[2]
589 d7cdb55d Iustin Pop
    if master_node not in votes:
590 d7cdb55d Iustin Pop
      votes[master_node] = 0
591 d7cdb55d Iustin Pop
    votes[master_node] += 1
592 d7cdb55d Iustin Pop
593 d7cdb55d Iustin Pop
  vote_list = [v for v in votes.items()]
594 d7cdb55d Iustin Pop
  # sort first on number of votes then on name, since we want None
595 d7cdb55d Iustin Pop
  # sorted later if we have the half of the nodes not responding, and
596 d7cdb55d Iustin Pop
  # half voting all for the same master
597 d7cdb55d Iustin Pop
  vote_list.sort(key=lambda x: (x[1], x[0]), reverse=True)
598 d7cdb55d Iustin Pop
599 d7cdb55d Iustin Pop
  return vote_list