Statistics
| Branch: | Tag: | Revision:

root / lib / bootstrap.py @ 5a9c3f46

History | View | Annotate | Download (16 kB)

1 a0c9f010 Michael Hanselmann
#
2 a0c9f010 Michael Hanselmann
#
3 a0c9f010 Michael Hanselmann
4 a0c9f010 Michael Hanselmann
# Copyright (C) 2006, 2007, 2008 Google Inc.
5 a0c9f010 Michael Hanselmann
#
6 a0c9f010 Michael Hanselmann
# This program is free software; you can redistribute it and/or modify
7 a0c9f010 Michael Hanselmann
# it under the terms of the GNU General Public License as published by
8 a0c9f010 Michael Hanselmann
# the Free Software Foundation; either version 2 of the License, or
9 a0c9f010 Michael Hanselmann
# (at your option) any later version.
10 a0c9f010 Michael Hanselmann
#
11 a0c9f010 Michael Hanselmann
# This program is distributed in the hope that it will be useful, but
12 a0c9f010 Michael Hanselmann
# WITHOUT ANY WARRANTY; without even the implied warranty of
13 a0c9f010 Michael Hanselmann
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14 a0c9f010 Michael Hanselmann
# General Public License for more details.
15 a0c9f010 Michael Hanselmann
#
16 a0c9f010 Michael Hanselmann
# You should have received a copy of the GNU General Public License
17 a0c9f010 Michael Hanselmann
# along with this program; if not, write to the Free Software
18 a0c9f010 Michael Hanselmann
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19 a0c9f010 Michael Hanselmann
# 02110-1301, USA.
20 a0c9f010 Michael Hanselmann
21 a0c9f010 Michael Hanselmann
22 a0c9f010 Michael Hanselmann
"""Functions to bootstrap a new cluster.
23 a0c9f010 Michael Hanselmann

24 a0c9f010 Michael Hanselmann
"""
25 a0c9f010 Michael Hanselmann
26 a0c9f010 Michael Hanselmann
import os
27 a0c9f010 Michael Hanselmann
import os.path
28 a0c9f010 Michael Hanselmann
import sha
29 a0c9f010 Michael Hanselmann
import re
30 b1b6ea87 Iustin Pop
import logging
31 a0c9f010 Michael Hanselmann
32 a0c9f010 Michael Hanselmann
from ganeti import rpc
33 a0c9f010 Michael Hanselmann
from ganeti import ssh
34 a0c9f010 Michael Hanselmann
from ganeti import utils
35 a0c9f010 Michael Hanselmann
from ganeti import errors
36 a0c9f010 Michael Hanselmann
from ganeti import config
37 a0c9f010 Michael Hanselmann
from ganeti import constants
38 b9eeeb02 Michael Hanselmann
from ganeti import objects
39 a0c9f010 Michael Hanselmann
from ganeti import ssconf
40 a0c9f010 Michael Hanselmann
41 531baf8e Iustin Pop
def _InitSSHSetup():
42 a0c9f010 Michael Hanselmann
  """Setup the SSH configuration for the cluster.
43 a0c9f010 Michael Hanselmann

44 a0c9f010 Michael Hanselmann
  This generates a dsa keypair for root, adds the pub key to the
45 a0c9f010 Michael Hanselmann
  permitted hosts and adds the hostkey to its own known hosts.
46 a0c9f010 Michael Hanselmann

47 a0c9f010 Michael Hanselmann
  """
48 a0c9f010 Michael Hanselmann
  priv_key, pub_key, auth_keys = ssh.GetUserFiles(constants.GANETI_RUNAS)
49 a0c9f010 Michael Hanselmann
50 a0c9f010 Michael Hanselmann
  for name in priv_key, pub_key:
51 a0c9f010 Michael Hanselmann
    if os.path.exists(name):
52 a0c9f010 Michael Hanselmann
      utils.CreateBackup(name)
53 a0c9f010 Michael Hanselmann
    utils.RemoveFile(name)
54 a0c9f010 Michael Hanselmann
55 a0c9f010 Michael Hanselmann
  result = utils.RunCmd(["ssh-keygen", "-t", "dsa",
56 a0c9f010 Michael Hanselmann
                         "-f", priv_key,
57 a0c9f010 Michael Hanselmann
                         "-q", "-N", ""])
58 a0c9f010 Michael Hanselmann
  if result.failed:
59 a0c9f010 Michael Hanselmann
    raise errors.OpExecError("Could not generate ssh keypair, error %s" %
60 a0c9f010 Michael Hanselmann
                             result.output)
61 a0c9f010 Michael Hanselmann
62 a0c9f010 Michael Hanselmann
  f = open(pub_key, 'r')
63 a0c9f010 Michael Hanselmann
  try:
64 a0c9f010 Michael Hanselmann
    utils.AddAuthorizedKey(auth_keys, f.read(8192))
65 a0c9f010 Michael Hanselmann
  finally:
66 a0c9f010 Michael Hanselmann
    f.close()
67 a0c9f010 Michael Hanselmann
68 a0c9f010 Michael Hanselmann
69 d23ef431 Michael Hanselmann
def _InitGanetiServerSetup():
70 a0c9f010 Michael Hanselmann
  """Setup the necessary configuration for the initial node daemon.
71 a0c9f010 Michael Hanselmann

72 a0c9f010 Michael Hanselmann
  This creates the nodepass file containing the shared password for
73 a0c9f010 Michael Hanselmann
  the cluster and also generates the SSL certificate.
74 a0c9f010 Michael Hanselmann

75 a0c9f010 Michael Hanselmann
  """
76 a0c9f010 Michael Hanselmann
  result = utils.RunCmd(["openssl", "req", "-new", "-newkey", "rsa:1024",
77 a0c9f010 Michael Hanselmann
                         "-days", str(365*5), "-nodes", "-x509",
78 a0c9f010 Michael Hanselmann
                         "-keyout", constants.SSL_CERT_FILE,
79 a0c9f010 Michael Hanselmann
                         "-out", constants.SSL_CERT_FILE, "-batch"])
80 a0c9f010 Michael Hanselmann
  if result.failed:
81 a0c9f010 Michael Hanselmann
    raise errors.OpExecError("could not generate server ssl cert, command"
82 a0c9f010 Michael Hanselmann
                             " %s had exitcode %s and error message %s" %
83 a0c9f010 Michael Hanselmann
                             (result.cmd, result.exit_code, result.output))
84 a0c9f010 Michael Hanselmann
85 a0c9f010 Michael Hanselmann
  os.chmod(constants.SSL_CERT_FILE, 0400)
86 a0c9f010 Michael Hanselmann
87 a0c9f010 Michael Hanselmann
  result = utils.RunCmd([constants.NODE_INITD_SCRIPT, "restart"])
88 a0c9f010 Michael Hanselmann
89 a0c9f010 Michael Hanselmann
  if result.failed:
90 a0c9f010 Michael Hanselmann
    raise errors.OpExecError("Could not start the node daemon, command %s"
91 a0c9f010 Michael Hanselmann
                             " had exitcode %s and error %s" %
92 a0c9f010 Michael Hanselmann
                             (result.cmd, result.exit_code, result.output))
93 a0c9f010 Michael Hanselmann
94 a0c9f010 Michael Hanselmann
95 4342e89b Alexander Schreiber
def InitCluster(cluster_name, mac_prefix, def_bridge,
96 ce735215 Guido Trotter
                master_netdev, file_storage_dir, candidate_pool_size,
97 ce735215 Guido Trotter
                secondary_ip=None, vg_name=None, beparams=None, hvparams=None,
98 02691904 Alexander Schreiber
                enabled_hypervisors=None, default_hypervisor=None):
99 a0c9f010 Michael Hanselmann
  """Initialise the cluster.
100 a0c9f010 Michael Hanselmann

101 ce735215 Guido Trotter
  @type candidate_pool_size: int
102 ce735215 Guido Trotter
  @param candidate_pool_size: master candidate pool size
103 ce735215 Guido Trotter

104 a0c9f010 Michael Hanselmann
  """
105 ce735215 Guido Trotter
  # TODO: complete the docstring
106 a0c9f010 Michael Hanselmann
  if config.ConfigWriter.IsCluster():
107 a0c9f010 Michael Hanselmann
    raise errors.OpPrereqError("Cluster is already initialised")
108 a0c9f010 Michael Hanselmann
109 a0c9f010 Michael Hanselmann
  hostname = utils.HostInfo()
110 a0c9f010 Michael Hanselmann
111 a0c9f010 Michael Hanselmann
  if hostname.ip.startswith("127."):
112 a0c9f010 Michael Hanselmann
    raise errors.OpPrereqError("This host's IP resolves to the private"
113 a0c9f010 Michael Hanselmann
                               " range (%s). Please fix DNS or %s." %
114 a0c9f010 Michael Hanselmann
                               (hostname.ip, constants.ETC_HOSTS))
115 a0c9f010 Michael Hanselmann
116 caad16e2 Iustin Pop
  if not utils.OwnIpAddress(hostname.ip):
117 a0c9f010 Michael Hanselmann
    raise errors.OpPrereqError("Inconsistency: this host's name resolves"
118 a0c9f010 Michael Hanselmann
                               " to %s,\nbut this ip address does not"
119 a0c9f010 Michael Hanselmann
                               " belong to this host."
120 a0c9f010 Michael Hanselmann
                               " Aborting." % hostname.ip)
121 a0c9f010 Michael Hanselmann
122 a0c9f010 Michael Hanselmann
  clustername = utils.HostInfo(cluster_name)
123 a0c9f010 Michael Hanselmann
124 a0c9f010 Michael Hanselmann
  if utils.TcpPing(clustername.ip, constants.DEFAULT_NODED_PORT,
125 a0c9f010 Michael Hanselmann
                   timeout=5):
126 a0c9f010 Michael Hanselmann
    raise errors.OpPrereqError("Cluster IP already active. Aborting.")
127 a0c9f010 Michael Hanselmann
128 a0c9f010 Michael Hanselmann
  if secondary_ip:
129 a0c9f010 Michael Hanselmann
    if not utils.IsValidIP(secondary_ip):
130 a0c9f010 Michael Hanselmann
      raise errors.OpPrereqError("Invalid secondary ip given")
131 a0c9f010 Michael Hanselmann
    if (secondary_ip != hostname.ip and
132 caad16e2 Iustin Pop
        not utils.OwnIpAddress(secondary_ip)):
133 a0c9f010 Michael Hanselmann
      raise errors.OpPrereqError("You gave %s as secondary IP,"
134 a0c9f010 Michael Hanselmann
                                 " but it does not belong to this host." %
135 a0c9f010 Michael Hanselmann
                                 secondary_ip)
136 b9eeeb02 Michael Hanselmann
  else:
137 b9eeeb02 Michael Hanselmann
    secondary_ip = hostname.ip
138 a0c9f010 Michael Hanselmann
139 a0c9f010 Michael Hanselmann
  if vg_name is not None:
140 a0c9f010 Michael Hanselmann
    # Check if volume group is valid
141 a0c9f010 Michael Hanselmann
    vgstatus = utils.CheckVolumeGroupSize(utils.ListVolumeGroups(), vg_name,
142 a0c9f010 Michael Hanselmann
                                          constants.MIN_VG_SIZE)
143 a0c9f010 Michael Hanselmann
    if vgstatus:
144 a0c9f010 Michael Hanselmann
      raise errors.OpPrereqError("Error: %s\nspecify --no-lvm-storage if"
145 a0c9f010 Michael Hanselmann
                                 " you are not using lvm" % vgstatus)
146 a0c9f010 Michael Hanselmann
147 a0c9f010 Michael Hanselmann
  file_storage_dir = os.path.normpath(file_storage_dir)
148 a0c9f010 Michael Hanselmann
149 a0c9f010 Michael Hanselmann
  if not os.path.isabs(file_storage_dir):
150 a0c9f010 Michael Hanselmann
    raise errors.OpPrereqError("The file storage directory you passed is"
151 a0c9f010 Michael Hanselmann
                               " not an absolute path.")
152 a0c9f010 Michael Hanselmann
153 a0c9f010 Michael Hanselmann
  if not os.path.exists(file_storage_dir):
154 a0c9f010 Michael Hanselmann
    try:
155 a0c9f010 Michael Hanselmann
      os.makedirs(file_storage_dir, 0750)
156 a0c9f010 Michael Hanselmann
    except OSError, err:
157 a0c9f010 Michael Hanselmann
      raise errors.OpPrereqError("Cannot create file storage directory"
158 a0c9f010 Michael Hanselmann
                                 " '%s': %s" %
159 a0c9f010 Michael Hanselmann
                                 (file_storage_dir, err))
160 a0c9f010 Michael Hanselmann
161 a0c9f010 Michael Hanselmann
  if not os.path.isdir(file_storage_dir):
162 a0c9f010 Michael Hanselmann
    raise errors.OpPrereqError("The file storage directory '%s' is not"
163 a0c9f010 Michael Hanselmann
                               " a directory." % file_storage_dir)
164 a0c9f010 Michael Hanselmann
165 a0c9f010 Michael Hanselmann
  if not re.match("^[0-9a-z]{2}:[0-9a-z]{2}:[0-9a-z]{2}$", mac_prefix):
166 a0c9f010 Michael Hanselmann
    raise errors.OpPrereqError("Invalid mac prefix given '%s'" % mac_prefix)
167 a0c9f010 Michael Hanselmann
168 a0c9f010 Michael Hanselmann
  result = utils.RunCmd(["ip", "link", "show", "dev", master_netdev])
169 a0c9f010 Michael Hanselmann
  if result.failed:
170 a0c9f010 Michael Hanselmann
    raise errors.OpPrereqError("Invalid master netdev given (%s): '%s'" %
171 a0c9f010 Michael Hanselmann
                               (master_netdev,
172 a0c9f010 Michael Hanselmann
                                result.output.strip()))
173 a0c9f010 Michael Hanselmann
174 a0c9f010 Michael Hanselmann
  if not (os.path.isfile(constants.NODE_INITD_SCRIPT) and
175 a0c9f010 Michael Hanselmann
          os.access(constants.NODE_INITD_SCRIPT, os.X_OK)):
176 a0c9f010 Michael Hanselmann
    raise errors.OpPrereqError("Init.d script '%s' missing or not"
177 a0c9f010 Michael Hanselmann
                               " executable." % constants.NODE_INITD_SCRIPT)
178 a0c9f010 Michael Hanselmann
179 d4b72030 Guido Trotter
  utils.CheckBEParams(beparams)
180 d4b72030 Guido Trotter
181 a0c9f010 Michael Hanselmann
  # set up the inter-node password and certificate
182 d23ef431 Michael Hanselmann
  _InitGanetiServerSetup()
183 a0c9f010 Michael Hanselmann
184 a0c9f010 Michael Hanselmann
  # set up ssh config and /etc/hosts
185 a0c9f010 Michael Hanselmann
  f = open(constants.SSH_HOST_RSA_PUB, 'r')
186 a0c9f010 Michael Hanselmann
  try:
187 a0c9f010 Michael Hanselmann
    sshline = f.read()
188 a0c9f010 Michael Hanselmann
  finally:
189 a0c9f010 Michael Hanselmann
    f.close()
190 a0c9f010 Michael Hanselmann
  sshkey = sshline.split(" ")[1]
191 a0c9f010 Michael Hanselmann
192 a0c9f010 Michael Hanselmann
  utils.AddHostToEtcHosts(hostname.name)
193 531baf8e Iustin Pop
  _InitSSHSetup()
194 a0c9f010 Michael Hanselmann
195 a0c9f010 Michael Hanselmann
  # init of cluster config file
196 b9eeeb02 Michael Hanselmann
  cluster_config = objects.Cluster(
197 b9eeeb02 Michael Hanselmann
    serial_no=1,
198 b9eeeb02 Michael Hanselmann
    rsahostkeypub=sshkey,
199 b9eeeb02 Michael Hanselmann
    highest_used_port=(constants.FIRST_DRBD_PORT - 1),
200 b9eeeb02 Michael Hanselmann
    mac_prefix=mac_prefix,
201 b9eeeb02 Michael Hanselmann
    volume_group_name=vg_name,
202 b9eeeb02 Michael Hanselmann
    default_bridge=def_bridge,
203 b9eeeb02 Michael Hanselmann
    tcpudp_port_pool=set(),
204 f6bd6e98 Michael Hanselmann
    master_node=hostname.name,
205 f6bd6e98 Michael Hanselmann
    master_ip=clustername.ip,
206 f6bd6e98 Michael Hanselmann
    master_netdev=master_netdev,
207 f6bd6e98 Michael Hanselmann
    cluster_name=clustername.name,
208 f6bd6e98 Michael Hanselmann
    file_storage_dir=file_storage_dir,
209 ea3a925f Alexander Schreiber
    enabled_hypervisors=enabled_hypervisors,
210 02691904 Alexander Schreiber
    default_hypervisor=default_hypervisor,
211 ea3a925f Alexander Schreiber
    beparams={constants.BEGR_DEFAULT: beparams},
212 ea3a925f Alexander Schreiber
    hvparams=hvparams,
213 ce735215 Guido Trotter
    candidate_pool_size=candidate_pool_size,
214 b9eeeb02 Michael Hanselmann
    )
215 b9eeeb02 Michael Hanselmann
  master_node_config = objects.Node(name=hostname.name,
216 b9eeeb02 Michael Hanselmann
                                    primary_ip=hostname.ip,
217 b9222f32 Guido Trotter
                                    secondary_ip=secondary_ip,
218 c044f32c Guido Trotter
                                    serial_no=1,
219 c044f32c Guido Trotter
                                    master_candidate=True,
220 fc0fe88c Iustin Pop
                                    offline=False,
221 c044f32c Guido Trotter
                                    )
222 a0c9f010 Michael Hanselmann
223 05cc153f Guido Trotter
  sscfg = InitConfig(constants.CONFIG_VERSION,
224 05cc153f Guido Trotter
                     cluster_config, master_node_config)
225 05cc153f Guido Trotter
  ssh.WriteKnownHostsFile(sscfg, constants.SSH_KNOWN_HOSTS_FILE)
226 05cc153f Guido Trotter
  cfg = config.ConfigWriter()
227 05cc153f Guido Trotter
  cfg.Update(cfg.GetClusterInfo())
228 827f753e Guido Trotter
229 b3f1cf6f Iustin Pop
  # start the master ip
230 b3f1cf6f Iustin Pop
  # TODO: Review rpc call from bootstrap
231 fda5f19f Michael Hanselmann
  rpc.RpcRunner.call_node_start_master(hostname.name, True)
232 b3f1cf6f Iustin Pop
233 b1b6ea87 Iustin Pop
234 02f99608 Oleksiy Mishchenko
def InitConfig(version, cluster_config, master_node_config,
235 02f99608 Oleksiy Mishchenko
               cfg_file=constants.CLUSTER_CONF_FILE):
236 7b3a8fb5 Iustin Pop
  """Create the initial cluster configuration.
237 7b3a8fb5 Iustin Pop

238 7b3a8fb5 Iustin Pop
  It will contain the current node, which will also be the master
239 7b3a8fb5 Iustin Pop
  node, and no instances.
240 7b3a8fb5 Iustin Pop

241 7b3a8fb5 Iustin Pop
  @type version: int
242 c41eea6e Iustin Pop
  @param version: configuration version
243 c41eea6e Iustin Pop
  @type cluster_config: L{objects.Cluster}
244 c41eea6e Iustin Pop
  @param cluster_config: cluster configuration
245 c41eea6e Iustin Pop
  @type master_node_config: L{objects.Node}
246 c41eea6e Iustin Pop
  @param master_node_config: master node configuration
247 c41eea6e Iustin Pop
  @type cfg_file: string
248 c41eea6e Iustin Pop
  @param cfg_file: configuration file path
249 c41eea6e Iustin Pop

250 c41eea6e Iustin Pop
  @rtype: L{ssconf.SimpleConfigWriter}
251 c41eea6e Iustin Pop
  @returns: initialized config instance
252 7b3a8fb5 Iustin Pop

253 7b3a8fb5 Iustin Pop
  """
254 7b3a8fb5 Iustin Pop
  nodes = {
255 7b3a8fb5 Iustin Pop
    master_node_config.name: master_node_config,
256 7b3a8fb5 Iustin Pop
    }
257 7b3a8fb5 Iustin Pop
258 7b3a8fb5 Iustin Pop
  config_data = objects.ConfigData(version=version,
259 7b3a8fb5 Iustin Pop
                                   cluster=cluster_config,
260 7b3a8fb5 Iustin Pop
                                   nodes=nodes,
261 7b3a8fb5 Iustin Pop
                                   instances={},
262 7b3a8fb5 Iustin Pop
                                   serial_no=1)
263 7b3a8fb5 Iustin Pop
  cfg = ssconf.SimpleConfigWriter.FromDict(config_data.ToDict(), cfg_file)
264 7b3a8fb5 Iustin Pop
  cfg.Save()
265 7b3a8fb5 Iustin Pop
266 7b3a8fb5 Iustin Pop
  return cfg
267 02f99608 Oleksiy Mishchenko
268 02f99608 Oleksiy Mishchenko
269 140aa4a8 Iustin Pop
def FinalizeClusterDestroy(master):
270 140aa4a8 Iustin Pop
  """Execute the last steps of cluster destroy
271 140aa4a8 Iustin Pop

272 140aa4a8 Iustin Pop
  This function shuts down all the daemons, completing the destroy
273 140aa4a8 Iustin Pop
  begun in cmdlib.LUDestroyOpcode.
274 140aa4a8 Iustin Pop

275 140aa4a8 Iustin Pop
  """
276 781de953 Iustin Pop
  result = rpc.RpcRunner.call_node_stop_master(master, True)
277 781de953 Iustin Pop
  if result.failed or not result.data:
278 140aa4a8 Iustin Pop
    logging.warning("Could not disable the master role")
279 781de953 Iustin Pop
  result = rpc.RpcRunner.call_node_leave_cluster(master)
280 781de953 Iustin Pop
  if result.failed or not result.data:
281 140aa4a8 Iustin Pop
    logging.warning("Could not shutdown the node daemon and cleanup the node")
282 140aa4a8 Iustin Pop
283 140aa4a8 Iustin Pop
284 87622829 Iustin Pop
def SetupNodeDaemon(cluster_name, node, ssh_key_check):
285 827f753e Guido Trotter
  """Add a node to the cluster.
286 827f753e Guido Trotter

287 b1b6ea87 Iustin Pop
  This function must be called before the actual opcode, and will ssh
288 b1b6ea87 Iustin Pop
  to the remote node, copy the needed files, and start ganeti-noded,
289 b1b6ea87 Iustin Pop
  allowing the master to do the rest via normal rpc calls.
290 827f753e Guido Trotter

291 87622829 Iustin Pop
  @param cluster_name: the cluster name
292 87622829 Iustin Pop
  @param node: the name of the new node
293 87622829 Iustin Pop
  @param ssh_key_check: whether to do a strict key check
294 827f753e Guido Trotter

295 827f753e Guido Trotter
  """
296 87622829 Iustin Pop
  sshrunner = ssh.SshRunner(cluster_name)
297 8049a1d7 Michael Hanselmann
  gntpem = utils.ReadFile(constants.SSL_CERT_FILE)
298 827f753e Guido Trotter
  # in the base64 pem encoding, neither '!' nor '.' are valid chars,
299 827f753e Guido Trotter
  # so we use this to detect an invalid certificate; as long as the
300 827f753e Guido Trotter
  # cert doesn't contain this, the here-document will be correctly
301 827f753e Guido Trotter
  # parsed by the shell sequence below
302 827f753e Guido Trotter
  if re.search('^!EOF\.', gntpem, re.MULTILINE):
303 827f753e Guido Trotter
    raise errors.OpExecError("invalid PEM encoding in the SSL certificate")
304 827f753e Guido Trotter
  if not gntpem.endswith("\n"):
305 827f753e Guido Trotter
    raise errors.OpExecError("PEM must end with newline")
306 827f753e Guido Trotter
307 827f753e Guido Trotter
  # set up inter-node password and certificate and restarts the node daemon
308 827f753e Guido Trotter
  # and then connect with ssh to set password and start ganeti-noded
309 827f753e Guido Trotter
  # note that all the below variables are sanitized at this point,
310 827f753e Guido Trotter
  # either by being constants or by the checks above
311 827f753e Guido Trotter
  mycommand = ("umask 077 && "
312 827f753e Guido Trotter
               "cat > '%s' << '!EOF.' && \n"
313 827f753e Guido Trotter
               "%s!EOF.\n%s restart" %
314 ec17d09c Michael Hanselmann
               (constants.SSL_CERT_FILE, gntpem,
315 827f753e Guido Trotter
                constants.NODE_INITD_SCRIPT))
316 827f753e Guido Trotter
317 c4b6c29c Michael Hanselmann
  result = sshrunner.Run(node, 'root', mycommand, batch=False,
318 c4b6c29c Michael Hanselmann
                         ask_key=ssh_key_check,
319 c4b6c29c Michael Hanselmann
                         use_cluster_key=False,
320 c4b6c29c Michael Hanselmann
                         strict_host_check=ssh_key_check)
321 827f753e Guido Trotter
  if result.failed:
322 827f753e Guido Trotter
    raise errors.OpExecError("Remote command on node %s, error: %s,"
323 827f753e Guido Trotter
                             " output: %s" %
324 827f753e Guido Trotter
                             (node, result.fail_reason, result.output))
325 827f753e Guido Trotter
326 b1b6ea87 Iustin Pop
327 b1b6ea87 Iustin Pop
def MasterFailover():
328 b1b6ea87 Iustin Pop
  """Failover the master node.
329 b1b6ea87 Iustin Pop

330 b1b6ea87 Iustin Pop
  This checks that we are not already the master, and will cause the
331 b1b6ea87 Iustin Pop
  current master to cease being master, and the non-master to become
332 b1b6ea87 Iustin Pop
  new master.
333 b1b6ea87 Iustin Pop

334 b1b6ea87 Iustin Pop
  """
335 8135a2db Iustin Pop
  sstore = ssconf.SimpleStore()
336 b1b6ea87 Iustin Pop
337 8135a2db Iustin Pop
  old_master, new_master = ssconf.GetMasterAndMyself(sstore)
338 8135a2db Iustin Pop
  node_list = sstore.GetNodeList()
339 8135a2db Iustin Pop
  mc_list = sstore.GetMasterCandidates()
340 b1b6ea87 Iustin Pop
341 b1b6ea87 Iustin Pop
  if old_master == new_master:
342 b1b6ea87 Iustin Pop
    raise errors.OpPrereqError("This commands must be run on the node"
343 b1b6ea87 Iustin Pop
                               " where you want the new master to be."
344 b1b6ea87 Iustin Pop
                               " %s is already the master" %
345 b1b6ea87 Iustin Pop
                               old_master)
346 d5927e48 Iustin Pop
347 8135a2db Iustin Pop
  if new_master not in mc_list:
348 8135a2db Iustin Pop
    mc_no_master = [name for name in mc_list if name != old_master]
349 8135a2db Iustin Pop
    raise errors.OpPrereqError("This node is not among the nodes marked"
350 8135a2db Iustin Pop
                               " as master candidates. Only these nodes"
351 8135a2db Iustin Pop
                               " can become masters. Current list of"
352 8135a2db Iustin Pop
                               " master candidates is:\n"
353 8135a2db Iustin Pop
                               "%s" % ('\n'.join(mc_no_master)))
354 8135a2db Iustin Pop
355 d5927e48 Iustin Pop
  vote_list = GatherMasterVotes(node_list)
356 d5927e48 Iustin Pop
357 d5927e48 Iustin Pop
  if vote_list:
358 d5927e48 Iustin Pop
    voted_master = vote_list[0][0]
359 d5927e48 Iustin Pop
    if voted_master is None:
360 d5927e48 Iustin Pop
      raise errors.OpPrereqError("Cluster is inconsistent, most nodes did not"
361 d5927e48 Iustin Pop
                                 " respond.")
362 d5927e48 Iustin Pop
    elif voted_master != old_master:
363 d5927e48 Iustin Pop
      raise errors.OpPrereqError("I have wrong configuration, I believe the"
364 d5927e48 Iustin Pop
                                 " master is %s but the other nodes voted for"
365 d5927e48 Iustin Pop
                                 " %s. Please resync the configuration of"
366 d5927e48 Iustin Pop
                                 " this node." % (old_master, voted_master))
367 b1b6ea87 Iustin Pop
  # end checks
368 b1b6ea87 Iustin Pop
369 b1b6ea87 Iustin Pop
  rcode = 0
370 b1b6ea87 Iustin Pop
371 d5927e48 Iustin Pop
  logging.info("Setting master to %s, old master: %s", new_master, old_master)
372 b1b6ea87 Iustin Pop
373 781de953 Iustin Pop
  result = rpc.RpcRunner.call_node_stop_master(old_master, True)
374 781de953 Iustin Pop
  if result.failed or not result.data:
375 d5927e48 Iustin Pop
    logging.error("Could not disable the master role on the old master"
376 b1b6ea87 Iustin Pop
                 " %s, please disable manually", old_master)
377 b1b6ea87 Iustin Pop
378 d23ef431 Michael Hanselmann
  # Here we have a phase where no master should be running
379 b1b6ea87 Iustin Pop
380 bbe19c17 Iustin Pop
  # instantiate a real config writer, as we now know we have the
381 bbe19c17 Iustin Pop
  # configuration data
382 bbe19c17 Iustin Pop
  cfg = config.ConfigWriter()
383 b1b6ea87 Iustin Pop
384 bbe19c17 Iustin Pop
  cluster_info = cfg.GetClusterInfo()
385 bbe19c17 Iustin Pop
  cluster_info.master_node = new_master
386 bbe19c17 Iustin Pop
  # this will also regenerate the ssconf files, since we updated the
387 bbe19c17 Iustin Pop
  # cluster info
388 bbe19c17 Iustin Pop
  cfg.Update(cluster_info)
389 d5927e48 Iustin Pop
390 781de953 Iustin Pop
  result = rpc.RpcRunner.call_node_start_master(new_master, True)
391 781de953 Iustin Pop
  if result.failed or not result.data:
392 d5927e48 Iustin Pop
    logging.error("Could not start the master role on the new master"
393 b1b6ea87 Iustin Pop
                  " %s, please check", new_master)
394 b1b6ea87 Iustin Pop
    rcode = 1
395 b1b6ea87 Iustin Pop
396 b1b6ea87 Iustin Pop
  return rcode
397 d7cdb55d Iustin Pop
398 d7cdb55d Iustin Pop
399 d7cdb55d Iustin Pop
def GatherMasterVotes(node_list):
400 d7cdb55d Iustin Pop
  """Check the agreement on who is the master.
401 d7cdb55d Iustin Pop

402 d7cdb55d Iustin Pop
  This function will return a list of (node, number of votes), ordered
403 d7cdb55d Iustin Pop
  by the number of votes. Errors will be denoted by the key 'None'.
404 d7cdb55d Iustin Pop

405 d7cdb55d Iustin Pop
  Note that the sum of votes is the number of nodes this machine
406 d7cdb55d Iustin Pop
  knows, whereas the number of entries in the list could be different
407 d7cdb55d Iustin Pop
  (if some nodes vote for another master).
408 d7cdb55d Iustin Pop

409 d7cdb55d Iustin Pop
  We remove ourselves from the list since we know that (bugs aside)
410 d7cdb55d Iustin Pop
  since we use the same source for configuration information for both
411 d7cdb55d Iustin Pop
  backend and boostrap, we'll always vote for ourselves.
412 d7cdb55d Iustin Pop

413 d7cdb55d Iustin Pop
  @type node_list: list
414 d7cdb55d Iustin Pop
  @param node_list: the list of nodes to query for master info; the current
415 d7cdb55d Iustin Pop
      node wil be removed if it is in the list
416 d7cdb55d Iustin Pop
  @rtype: list
417 d7cdb55d Iustin Pop
  @return: list of (node, votes)
418 d7cdb55d Iustin Pop

419 d7cdb55d Iustin Pop
  """
420 d7cdb55d Iustin Pop
  myself = utils.HostInfo().name
421 d7cdb55d Iustin Pop
  try:
422 d7cdb55d Iustin Pop
    node_list.remove(myself)
423 d7cdb55d Iustin Pop
  except ValueError:
424 d7cdb55d Iustin Pop
    pass
425 d7cdb55d Iustin Pop
  if not node_list:
426 d7cdb55d Iustin Pop
    # no nodes left (eventually after removing myself)
427 d7cdb55d Iustin Pop
    return []
428 d7cdb55d Iustin Pop
  results = rpc.RpcRunner.call_master_info(node_list)
429 d7cdb55d Iustin Pop
  if not isinstance(results, dict):
430 d7cdb55d Iustin Pop
    # this should not happen (unless internal error in rpc)
431 d7cdb55d Iustin Pop
    logging.critical("Can't complete rpc call, aborting master startup")
432 d7cdb55d Iustin Pop
    return [(None, len(node_list))]
433 d7cdb55d Iustin Pop
  votes = {}
434 d7cdb55d Iustin Pop
  for node in results:
435 781de953 Iustin Pop
    nres = results[node]
436 781de953 Iustin Pop
    data = nres.data
437 781de953 Iustin Pop
    if nres.failed or not isinstance(data, (tuple, list)) or len(data) < 3:
438 d7cdb55d Iustin Pop
      # here the rpc layer should have already logged errors
439 d7cdb55d Iustin Pop
      if None not in votes:
440 d7cdb55d Iustin Pop
        votes[None] = 0
441 d7cdb55d Iustin Pop
      votes[None] += 1
442 d7cdb55d Iustin Pop
      continue
443 781de953 Iustin Pop
    master_node = data[2]
444 d7cdb55d Iustin Pop
    if master_node not in votes:
445 d7cdb55d Iustin Pop
      votes[master_node] = 0
446 d7cdb55d Iustin Pop
    votes[master_node] += 1
447 d7cdb55d Iustin Pop
448 d7cdb55d Iustin Pop
  vote_list = [v for v in votes.items()]
449 d7cdb55d Iustin Pop
  # sort first on number of votes then on name, since we want None
450 d7cdb55d Iustin Pop
  # sorted later if we have the half of the nodes not responding, and
451 d7cdb55d Iustin Pop
  # half voting all for the same master
452 d7cdb55d Iustin Pop
  vote_list.sort(key=lambda x: (x[1], x[0]), reverse=True)
453 d7cdb55d Iustin Pop
454 d7cdb55d Iustin Pop
  return vote_list