Statistics
| Branch: | Tag: | Revision:

root / lib / bootstrap.py @ 42a999d1

History | View | Annotate | Download (9.9 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007, 2008 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Functions to bootstrap a new cluster.
23

24
"""
25

    
26
import os
27
import os.path
28
import sha
29
import re
30

    
31
from ganeti import rpc
32
from ganeti import ssh
33
from ganeti import utils
34
from ganeti import errors
35
from ganeti import config
36
from ganeti import constants
37
from ganeti import ssconf
38

    
39

    
40
def _InitSSHSetup(node):
41
  """Setup the SSH configuration for the cluster.
42

43

44
  This generates a dsa keypair for root, adds the pub key to the
45
  permitted hosts and adds the hostkey to its own known hosts.
46

47
  Args:
48
    node: the name of this host as a fqdn
49

50
  """
51
  priv_key, pub_key, auth_keys = ssh.GetUserFiles(constants.GANETI_RUNAS)
52

    
53
  for name in priv_key, pub_key:
54
    if os.path.exists(name):
55
      utils.CreateBackup(name)
56
    utils.RemoveFile(name)
57

    
58
  result = utils.RunCmd(["ssh-keygen", "-t", "dsa",
59
                         "-f", priv_key,
60
                         "-q", "-N", ""])
61
  if result.failed:
62
    raise errors.OpExecError("Could not generate ssh keypair, error %s" %
63
                             result.output)
64

    
65
  f = open(pub_key, 'r')
66
  try:
67
    utils.AddAuthorizedKey(auth_keys, f.read(8192))
68
  finally:
69
    f.close()
70

    
71

    
72
def _InitGanetiServerSetup(ss):
73
  """Setup the necessary configuration for the initial node daemon.
74

75
  This creates the nodepass file containing the shared password for
76
  the cluster and also generates the SSL certificate.
77

78
  Args:
79
    ss: A WritableSimpleStore
80

81
  """
82
  # Create pseudo random password
83
  randpass = sha.new(os.urandom(64)).hexdigest()
84
  # and write it into sstore
85
  ss.SetKey(ss.SS_NODED_PASS, randpass)
86

    
87
  result = utils.RunCmd(["openssl", "req", "-new", "-newkey", "rsa:1024",
88
                         "-days", str(365*5), "-nodes", "-x509",
89
                         "-keyout", constants.SSL_CERT_FILE,
90
                         "-out", constants.SSL_CERT_FILE, "-batch"])
91
  if result.failed:
92
    raise errors.OpExecError("could not generate server ssl cert, command"
93
                             " %s had exitcode %s and error message %s" %
94
                             (result.cmd, result.exit_code, result.output))
95

    
96
  os.chmod(constants.SSL_CERT_FILE, 0400)
97

    
98
  result = utils.RunCmd([constants.NODE_INITD_SCRIPT, "restart"])
99

    
100
  if result.failed:
101
    raise errors.OpExecError("Could not start the node daemon, command %s"
102
                             " had exitcode %s and error %s" %
103
                             (result.cmd, result.exit_code, result.output))
104

    
105

    
106
def InitCluster(cluster_name, hypervisor_type, mac_prefix, def_bridge,
107
                master_netdev, file_storage_dir,
108
                secondary_ip=None,
109
                vg_name=None):
110
  """Initialise the cluster.
111

112
  """
113
  if config.ConfigWriter.IsCluster():
114
    raise errors.OpPrereqError("Cluster is already initialised")
115

    
116
  if hypervisor_type == constants.HT_XEN_HVM31:
117
    if not os.path.exists(constants.VNC_PASSWORD_FILE):
118
      raise errors.OpPrereqError("Please prepare the cluster VNC"
119
                                 "password file %s" %
120
                                 constants.VNC_PASSWORD_FILE)
121

    
122
  hostname = utils.HostInfo()
123

    
124
  if hostname.ip.startswith("127."):
125
    raise errors.OpPrereqError("This host's IP resolves to the private"
126
                               " range (%s). Please fix DNS or %s." %
127
                               (hostname.ip, constants.ETC_HOSTS))
128

    
129
  if not utils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT,
130
                       source=constants.LOCALHOST_IP_ADDRESS):
131
    raise errors.OpPrereqError("Inconsistency: this host's name resolves"
132
                               " to %s,\nbut this ip address does not"
133
                               " belong to this host."
134
                               " Aborting." % hostname.ip)
135

    
136
  clustername = utils.HostInfo(cluster_name)
137

    
138
  if utils.TcpPing(clustername.ip, constants.DEFAULT_NODED_PORT,
139
                   timeout=5):
140
    raise errors.OpPrereqError("Cluster IP already active. Aborting.")
141

    
142
  if secondary_ip:
143
    if not utils.IsValidIP(secondary_ip):
144
      raise errors.OpPrereqError("Invalid secondary ip given")
145
    if (secondary_ip != hostname.ip and
146
        (not utils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
147
                           source=constants.LOCALHOST_IP_ADDRESS))):
148
      raise errors.OpPrereqError("You gave %s as secondary IP,"
149
                                 " but it does not belong to this host." %
150
                                 secondary_ip)
151

    
152
  if vg_name is not None:
153
    # Check if volume group is valid
154
    vgstatus = utils.CheckVolumeGroupSize(utils.ListVolumeGroups(), vg_name,
155
                                          constants.MIN_VG_SIZE)
156
    if vgstatus:
157
      raise errors.OpPrereqError("Error: %s\nspecify --no-lvm-storage if"
158
                                 " you are not using lvm" % vgstatus)
159

    
160
  file_storage_dir = os.path.normpath(file_storage_dir)
161

    
162
  if not os.path.isabs(file_storage_dir):
163
    raise errors.OpPrereqError("The file storage directory you passed is"
164
                               " not an absolute path.")
165

    
166
  if not os.path.exists(file_storage_dir):
167
    try:
168
      os.makedirs(file_storage_dir, 0750)
169
    except OSError, err:
170
      raise errors.OpPrereqError("Cannot create file storage directory"
171
                                 " '%s': %s" %
172
                                 (file_storage_dir, err))
173

    
174
  if not os.path.isdir(file_storage_dir):
175
    raise errors.OpPrereqError("The file storage directory '%s' is not"
176
                               " a directory." % file_storage_dir)
177

    
178
  if not re.match("^[0-9a-z]{2}:[0-9a-z]{2}:[0-9a-z]{2}$", mac_prefix):
179
    raise errors.OpPrereqError("Invalid mac prefix given '%s'" % mac_prefix)
180

    
181
  if hypervisor_type not in constants.HYPER_TYPES:
182
    raise errors.OpPrereqError("Invalid hypervisor type given '%s'" %
183
                               hypervisor_type)
184

    
185
  result = utils.RunCmd(["ip", "link", "show", "dev", master_netdev])
186
  if result.failed:
187
    raise errors.OpPrereqError("Invalid master netdev given (%s): '%s'" %
188
                               (master_netdev,
189
                                result.output.strip()))
190

    
191
  if not (os.path.isfile(constants.NODE_INITD_SCRIPT) and
192
          os.access(constants.NODE_INITD_SCRIPT, os.X_OK)):
193
    raise errors.OpPrereqError("Init.d script '%s' missing or not"
194
                               " executable." % constants.NODE_INITD_SCRIPT)
195

    
196
  # set up the simple store
197
  ss = ssconf.WritableSimpleStore()
198
  ss.SetKey(ss.SS_HYPERVISOR, hypervisor_type)
199
  ss.SetKey(ss.SS_MASTER_NODE, hostname.name)
200
  ss.SetKey(ss.SS_MASTER_IP, clustername.ip)
201
  ss.SetKey(ss.SS_MASTER_NETDEV, master_netdev)
202
  ss.SetKey(ss.SS_CLUSTER_NAME, clustername.name)
203
  ss.SetKey(ss.SS_FILE_STORAGE_DIR, file_storage_dir)
204
  ss.SetKey(ss.SS_CONFIG_VERSION, constants.CONFIG_VERSION)
205

    
206
  # set up the inter-node password and certificate
207
  _InitGanetiServerSetup(ss)
208

    
209
  # start the master ip
210
  # TODO: Review rpc call from bootstrap
211
  rpc.call_node_start_master(hostname.name)
212

    
213
  # set up ssh config and /etc/hosts
214
  f = open(constants.SSH_HOST_RSA_PUB, 'r')
215
  try:
216
    sshline = f.read()
217
  finally:
218
    f.close()
219
  sshkey = sshline.split(" ")[1]
220

    
221
  utils.AddHostToEtcHosts(hostname.name)
222
  _InitSSHSetup(hostname.name)
223

    
224
  # init of cluster config file
225
  cfg = config.ConfigWriter()
226
  cfg.InitConfig(hostname.name, hostname.ip, secondary_ip, sshkey,
227
                 mac_prefix, vg_name, def_bridge)
228

    
229
  ssh.WriteKnownHostsFile(cfg, ss, constants.SSH_KNOWN_HOSTS_FILE)
230

    
231
def SetupNodeDaemon(node):
232
  """Add a node to the cluster.
233

234
  This function must be called before the actual opcode, and will ssh to the
235
  remote node, copy the needed files, and start ganeti-noded, allowing the master
236
  to do the rest via normal rpc calls.
237

238
  Args:
239
    node: fully qualified domain name for the new node
240

241
  """
242
  ss = ssconf.SimpleStore()
243
  sshrunner = ssh.SshRunner(ss)
244
  gntpass = ss.GetNodeDaemonPassword()
245
  if not re.match('^[a-zA-Z0-9.]{1,64}$', gntpass):
246
    raise errors.OpExecError("ganeti password corruption detected")
247
  f = open(constants.SSL_CERT_FILE)
248
  try:
249
    gntpem = f.read(8192)
250
  finally:
251
    f.close()
252
  # in the base64 pem encoding, neither '!' nor '.' are valid chars,
253
  # so we use this to detect an invalid certificate; as long as the
254
  # cert doesn't contain this, the here-document will be correctly
255
  # parsed by the shell sequence below
256
  if re.search('^!EOF\.', gntpem, re.MULTILINE):
257
    raise errors.OpExecError("invalid PEM encoding in the SSL certificate")
258
  if not gntpem.endswith("\n"):
259
    raise errors.OpExecError("PEM must end with newline")
260

    
261
  # set up inter-node password and certificate and restarts the node daemon
262
  # and then connect with ssh to set password and start ganeti-noded
263
  # note that all the below variables are sanitized at this point,
264
  # either by being constants or by the checks above
265
  mycommand = ("umask 077 && "
266
               "echo '%s' > '%s' && "
267
               "cat > '%s' << '!EOF.' && \n"
268
               "%s!EOF.\n%s restart" %
269
               (gntpass, ss.KeyToFilename(ss.SS_NODED_PASS),
270
                constants.SSL_CERT_FILE, gntpem,
271
                constants.NODE_INITD_SCRIPT))
272

    
273
  result = sshrunner.Run(node, 'root', mycommand, batch=False, ask_key=True)
274
  if result.failed:
275
    raise errors.OpExecError("Remote command on node %s, error: %s,"
276
                             " output: %s" %
277
                             (node, result.fail_reason, result.output))
278

    
279
  return 0
280