root / lib / cmdlib / backup.py @ 4869595d
History | View | Annotate | Download (16.4 kB)
1 |
#
|
---|---|
2 |
#
|
3 |
|
4 |
# Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 Google Inc.
|
5 |
#
|
6 |
# This program is free software; you can redistribute it and/or modify
|
7 |
# it under the terms of the GNU General Public License as published by
|
8 |
# the Free Software Foundation; either version 2 of the License, or
|
9 |
# (at your option) any later version.
|
10 |
#
|
11 |
# This program is distributed in the hope that it will be useful, but
|
12 |
# WITHOUT ANY WARRANTY; without even the implied warranty of
|
13 |
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
14 |
# General Public License for more details.
|
15 |
#
|
16 |
# You should have received a copy of the GNU General Public License
|
17 |
# along with this program; if not, write to the Free Software
|
18 |
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
|
19 |
# 02110-1301, USA.
|
20 |
|
21 |
|
22 |
"""Logical units dealing with backup operations."""
|
23 |
|
24 |
import OpenSSL |
25 |
import logging |
26 |
|
27 |
from ganeti import compat |
28 |
from ganeti import constants |
29 |
from ganeti import errors |
30 |
from ganeti import locking |
31 |
from ganeti import masterd |
32 |
from ganeti import utils |
33 |
|
34 |
from ganeti.cmdlib.base import NoHooksLU, LogicalUnit |
35 |
from ganeti.cmdlib.common import CheckNodeOnline, \ |
36 |
ExpandNodeUuidAndName
|
37 |
from ganeti.cmdlib.instance_storage import StartInstanceDisks, \ |
38 |
ShutdownInstanceDisks
|
39 |
from ganeti.cmdlib.instance_utils import GetClusterDomainSecret, \ |
40 |
BuildInstanceHookEnvByObject, CheckNodeNotDrained, RemoveInstance |
41 |
|
42 |
|
43 |
class LUBackupPrepare(NoHooksLU): |
44 |
"""Prepares an instance for an export and returns useful information.
|
45 |
|
46 |
"""
|
47 |
REQ_BGL = False
|
48 |
|
49 |
def ExpandNames(self): |
50 |
self._ExpandAndLockInstance()
|
51 |
|
52 |
def CheckPrereq(self): |
53 |
"""Check prerequisites.
|
54 |
|
55 |
"""
|
56 |
self.instance = self.cfg.GetInstanceInfoByName(self.op.instance_name) |
57 |
assert self.instance is not None, \ |
58 |
"Cannot retrieve locked instance %s" % self.op.instance_name |
59 |
CheckNodeOnline(self, self.instance.primary_node) |
60 |
|
61 |
self._cds = GetClusterDomainSecret()
|
62 |
|
63 |
def Exec(self, feedback_fn): |
64 |
"""Prepares an instance for an export.
|
65 |
|
66 |
"""
|
67 |
if self.op.mode == constants.EXPORT_MODE_REMOTE: |
68 |
salt = utils.GenerateSecret(8)
|
69 |
|
70 |
feedback_fn("Generating X509 certificate on %s" %
|
71 |
self.cfg.GetNodeName(self.instance.primary_node)) |
72 |
result = self.rpc.call_x509_cert_create(self.instance.primary_node, |
73 |
constants.RIE_CERT_VALIDITY) |
74 |
result.Raise("Can't create X509 key and certificate on %s" %
|
75 |
self.cfg.GetNodeName(result.node))
|
76 |
|
77 |
(name, cert_pem) = result.payload |
78 |
|
79 |
cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM, |
80 |
cert_pem) |
81 |
|
82 |
return {
|
83 |
"handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds), |
84 |
"x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt), |
85 |
salt), |
86 |
"x509_ca": utils.SignX509Certificate(cert, self._cds, salt), |
87 |
} |
88 |
|
89 |
return None |
90 |
|
91 |
|
92 |
class LUBackupExport(LogicalUnit): |
93 |
"""Export an instance to an image in the cluster.
|
94 |
|
95 |
"""
|
96 |
HPATH = "instance-export"
|
97 |
HTYPE = constants.HTYPE_INSTANCE |
98 |
REQ_BGL = False
|
99 |
|
100 |
def CheckArguments(self): |
101 |
"""Check the arguments.
|
102 |
|
103 |
"""
|
104 |
self.x509_key_name = self.op.x509_key_name |
105 |
self.dest_x509_ca_pem = self.op.destination_x509_ca |
106 |
|
107 |
if self.op.mode == constants.EXPORT_MODE_REMOTE: |
108 |
if not self.x509_key_name: |
109 |
raise errors.OpPrereqError("Missing X509 key name for encryption", |
110 |
errors.ECODE_INVAL) |
111 |
|
112 |
if not self.dest_x509_ca_pem: |
113 |
raise errors.OpPrereqError("Missing destination X509 CA", |
114 |
errors.ECODE_INVAL) |
115 |
|
116 |
def ExpandNames(self): |
117 |
self._ExpandAndLockInstance()
|
118 |
|
119 |
# Lock all nodes for local exports
|
120 |
if self.op.mode == constants.EXPORT_MODE_LOCAL: |
121 |
(self.op.target_node_uuid, self.op.target_node) = \ |
122 |
ExpandNodeUuidAndName(self.cfg, self.op.target_node_uuid, |
123 |
self.op.target_node)
|
124 |
# FIXME: lock only instance primary and destination node
|
125 |
#
|
126 |
# Sad but true, for now we have do lock all nodes, as we don't know where
|
127 |
# the previous export might be, and in this LU we search for it and
|
128 |
# remove it from its current node. In the future we could fix this by:
|
129 |
# - making a tasklet to search (share-lock all), then create the
|
130 |
# new one, then one to remove, after
|
131 |
# - removing the removal operation altogether
|
132 |
self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
|
133 |
|
134 |
# Allocations should be stopped while this LU runs with node locks, but
|
135 |
# it doesn't have to be exclusive
|
136 |
self.share_locks[locking.LEVEL_NODE_ALLOC] = 1 |
137 |
self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
|
138 |
|
139 |
def DeclareLocks(self, level): |
140 |
"""Last minute lock declaration."""
|
141 |
# All nodes are locked anyway, so nothing to do here.
|
142 |
|
143 |
def BuildHooksEnv(self): |
144 |
"""Build hooks env.
|
145 |
|
146 |
This will run on the master, primary node and target node.
|
147 |
|
148 |
"""
|
149 |
env = { |
150 |
"EXPORT_MODE": self.op.mode, |
151 |
"EXPORT_NODE": self.op.target_node, |
152 |
"EXPORT_DO_SHUTDOWN": self.op.shutdown, |
153 |
"SHUTDOWN_TIMEOUT": self.op.shutdown_timeout, |
154 |
# TODO: Generic function for boolean env variables
|
155 |
"REMOVE_INSTANCE": str(bool(self.op.remove_instance)), |
156 |
} |
157 |
|
158 |
env.update(BuildInstanceHookEnvByObject(self, self.instance)) |
159 |
|
160 |
return env
|
161 |
|
162 |
def BuildHooksNodes(self): |
163 |
"""Build hooks nodes.
|
164 |
|
165 |
"""
|
166 |
nl = [self.cfg.GetMasterNode(), self.instance.primary_node] |
167 |
|
168 |
if self.op.mode == constants.EXPORT_MODE_LOCAL: |
169 |
nl.append(self.op.target_node_uuid)
|
170 |
|
171 |
return (nl, nl)
|
172 |
|
173 |
def CheckPrereq(self): |
174 |
"""Check prerequisites.
|
175 |
|
176 |
This checks that the instance and node names are valid.
|
177 |
|
178 |
"""
|
179 |
self.instance = self.cfg.GetInstanceInfoByName(self.op.instance_name) |
180 |
assert self.instance is not None, \ |
181 |
"Cannot retrieve locked instance %s" % self.op.instance_name |
182 |
CheckNodeOnline(self, self.instance.primary_node) |
183 |
|
184 |
if (self.op.remove_instance and |
185 |
self.instance.admin_state == constants.ADMINST_UP and |
186 |
not self.op.shutdown): |
187 |
raise errors.OpPrereqError("Can not remove instance without shutting it" |
188 |
" down before", errors.ECODE_STATE)
|
189 |
|
190 |
if self.op.mode == constants.EXPORT_MODE_LOCAL: |
191 |
self.dst_node = self.cfg.GetNodeInfo(self.op.target_node_uuid) |
192 |
assert self.dst_node is not None |
193 |
|
194 |
CheckNodeOnline(self, self.dst_node.uuid) |
195 |
CheckNodeNotDrained(self, self.dst_node.uuid) |
196 |
|
197 |
self._cds = None |
198 |
self.dest_disk_info = None |
199 |
self.dest_x509_ca = None |
200 |
|
201 |
elif self.op.mode == constants.EXPORT_MODE_REMOTE: |
202 |
self.dst_node = None |
203 |
|
204 |
if len(self.op.target_node) != len(self.instance.disks): |
205 |
raise errors.OpPrereqError(("Received destination information for %s" |
206 |
" disks, but instance %s has %s disks") %
|
207 |
(len(self.op.target_node), |
208 |
self.op.instance_name,
|
209 |
len(self.instance.disks)), |
210 |
errors.ECODE_INVAL) |
211 |
|
212 |
cds = GetClusterDomainSecret() |
213 |
|
214 |
# Check X509 key name
|
215 |
try:
|
216 |
(key_name, hmac_digest, hmac_salt) = self.x509_key_name
|
217 |
except (TypeError, ValueError), err: |
218 |
raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err, |
219 |
errors.ECODE_INVAL) |
220 |
|
221 |
if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt): |
222 |
raise errors.OpPrereqError("HMAC for X509 key name is wrong", |
223 |
errors.ECODE_INVAL) |
224 |
|
225 |
# Load and verify CA
|
226 |
try:
|
227 |
(cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
|
228 |
except OpenSSL.crypto.Error, err:
|
229 |
raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" % |
230 |
(err, ), errors.ECODE_INVAL) |
231 |
|
232 |
(errcode, msg) = utils.VerifyX509Certificate(cert, None, None) |
233 |
if errcode is not None: |
234 |
raise errors.OpPrereqError("Invalid destination X509 CA (%s)" % |
235 |
(msg, ), errors.ECODE_INVAL) |
236 |
|
237 |
self.dest_x509_ca = cert
|
238 |
|
239 |
# Verify target information
|
240 |
disk_info = [] |
241 |
for idx, disk_data in enumerate(self.op.target_node): |
242 |
try:
|
243 |
(host, port, magic) = \ |
244 |
masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data) |
245 |
except errors.GenericError, err:
|
246 |
raise errors.OpPrereqError("Target info for disk %s: %s" % |
247 |
(idx, err), errors.ECODE_INVAL) |
248 |
|
249 |
disk_info.append((host, port, magic)) |
250 |
|
251 |
assert len(disk_info) == len(self.op.target_node) |
252 |
self.dest_disk_info = disk_info
|
253 |
|
254 |
else:
|
255 |
raise errors.ProgrammerError("Unhandled export mode %r" % |
256 |
self.op.mode)
|
257 |
|
258 |
# instance disk type verification
|
259 |
# TODO: Implement export support for file-based disks
|
260 |
for disk in self.instance.disks: |
261 |
if disk.dev_type in constants.DTS_FILEBASED: |
262 |
raise errors.OpPrereqError("Export not supported for instances with" |
263 |
" file-based disks", errors.ECODE_INVAL)
|
264 |
|
265 |
def _CleanupExports(self, feedback_fn): |
266 |
"""Removes exports of current instance from all other nodes.
|
267 |
|
268 |
If an instance in a cluster with nodes A..D was exported to node C, its
|
269 |
exports will be removed from the nodes A, B and D.
|
270 |
|
271 |
"""
|
272 |
assert self.op.mode != constants.EXPORT_MODE_REMOTE |
273 |
|
274 |
node_uuids = self.cfg.GetNodeList()
|
275 |
node_uuids.remove(self.dst_node.uuid)
|
276 |
|
277 |
# on one-node clusters nodelist will be empty after the removal
|
278 |
# if we proceed the backup would be removed because OpBackupQuery
|
279 |
# substitutes an empty list with the full cluster node list.
|
280 |
iname = self.instance.name
|
281 |
if node_uuids:
|
282 |
feedback_fn("Removing old exports for instance %s" % iname)
|
283 |
exportlist = self.rpc.call_export_list(node_uuids)
|
284 |
for node_uuid in exportlist: |
285 |
if exportlist[node_uuid].fail_msg:
|
286 |
continue
|
287 |
if iname in exportlist[node_uuid].payload: |
288 |
msg = self.rpc.call_export_remove(node_uuid, iname).fail_msg
|
289 |
if msg:
|
290 |
self.LogWarning("Could not remove older export for instance %s" |
291 |
" on node %s: %s", iname,
|
292 |
self.cfg.GetNodeName(node_uuid), msg)
|
293 |
|
294 |
def Exec(self, feedback_fn): |
295 |
"""Export an instance to an image in the cluster.
|
296 |
|
297 |
"""
|
298 |
assert self.op.mode in constants.EXPORT_MODES |
299 |
|
300 |
src_node_uuid = self.instance.primary_node
|
301 |
|
302 |
if self.op.shutdown: |
303 |
# shutdown the instance, but not the disks
|
304 |
feedback_fn("Shutting down instance %s" % self.instance.name) |
305 |
result = self.rpc.call_instance_shutdown(src_node_uuid, self.instance, |
306 |
self.op.shutdown_timeout,
|
307 |
self.op.reason)
|
308 |
# TODO: Maybe ignore failures if ignore_remove_failures is set
|
309 |
result.Raise("Could not shutdown instance %s on"
|
310 |
" node %s" % (self.instance.name, |
311 |
self.cfg.GetNodeName(src_node_uuid)))
|
312 |
|
313 |
activate_disks = not self.instance.disks_active |
314 |
|
315 |
if activate_disks:
|
316 |
# Activate the instance disks if we're exporting a stopped instance
|
317 |
feedback_fn("Activating disks for %s" % self.instance.name) |
318 |
StartInstanceDisks(self, self.instance, None) |
319 |
|
320 |
try:
|
321 |
helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
|
322 |
self.instance)
|
323 |
|
324 |
helper.CreateSnapshots() |
325 |
try:
|
326 |
if (self.op.shutdown and |
327 |
self.instance.admin_state == constants.ADMINST_UP and |
328 |
not self.op.remove_instance): |
329 |
assert not activate_disks |
330 |
feedback_fn("Starting instance %s" % self.instance.name) |
331 |
result = self.rpc.call_instance_start(src_node_uuid,
|
332 |
(self.instance, None, None), |
333 |
False, self.op.reason) |
334 |
msg = result.fail_msg |
335 |
if msg:
|
336 |
feedback_fn("Failed to start instance: %s" % msg)
|
337 |
ShutdownInstanceDisks(self, self.instance) |
338 |
raise errors.OpExecError("Could not start instance: %s" % msg) |
339 |
|
340 |
if self.op.mode == constants.EXPORT_MODE_LOCAL: |
341 |
(fin_resu, dresults) = helper.LocalExport(self.dst_node,
|
342 |
self.op.compress)
|
343 |
elif self.op.mode == constants.EXPORT_MODE_REMOTE: |
344 |
connect_timeout = constants.RIE_CONNECT_TIMEOUT |
345 |
timeouts = masterd.instance.ImportExportTimeouts(connect_timeout) |
346 |
|
347 |
(key_name, _, _) = self.x509_key_name
|
348 |
|
349 |
dest_ca_pem = \ |
350 |
OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM, |
351 |
self.dest_x509_ca)
|
352 |
|
353 |
(fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
|
354 |
key_name, dest_ca_pem, |
355 |
self.op.compress,
|
356 |
timeouts) |
357 |
finally:
|
358 |
helper.Cleanup() |
359 |
|
360 |
# Check for backwards compatibility
|
361 |
assert len(dresults) == len(self.instance.disks) |
362 |
assert compat.all(isinstance(i, bool) for i in dresults), \ |
363 |
"Not all results are boolean: %r" % dresults
|
364 |
|
365 |
finally:
|
366 |
if activate_disks:
|
367 |
feedback_fn("Deactivating disks for %s" % self.instance.name) |
368 |
ShutdownInstanceDisks(self, self.instance) |
369 |
|
370 |
if not (compat.all(dresults) and fin_resu): |
371 |
failures = [] |
372 |
if not fin_resu: |
373 |
failures.append("export finalization")
|
374 |
if not compat.all(dresults): |
375 |
fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults) |
376 |
if not dsk) |
377 |
failures.append("disk export: disk(s) %s" % fdsk)
|
378 |
|
379 |
raise errors.OpExecError("Export failed, errors in %s" % |
380 |
utils.CommaJoin(failures)) |
381 |
|
382 |
# At this point, the export was successful, we can cleanup/finish
|
383 |
|
384 |
# Remove instance if requested
|
385 |
if self.op.remove_instance: |
386 |
feedback_fn("Removing instance %s" % self.instance.name) |
387 |
RemoveInstance(self, feedback_fn, self.instance, |
388 |
self.op.ignore_remove_failures)
|
389 |
|
390 |
if self.op.mode == constants.EXPORT_MODE_LOCAL: |
391 |
self._CleanupExports(feedback_fn)
|
392 |
|
393 |
return fin_resu, dresults
|
394 |
|
395 |
|
396 |
class LUBackupRemove(NoHooksLU): |
397 |
"""Remove exports related to the named instance.
|
398 |
|
399 |
"""
|
400 |
REQ_BGL = False
|
401 |
|
402 |
def ExpandNames(self): |
403 |
self.needed_locks = {
|
404 |
# We need all nodes to be locked in order for RemoveExport to work, but
|
405 |
# we don't need to lock the instance itself, as nothing will happen to it
|
406 |
# (and we can remove exports also for a removed instance)
|
407 |
locking.LEVEL_NODE: locking.ALL_SET, |
408 |
|
409 |
# Removing backups is quick, so blocking allocations is justified
|
410 |
locking.LEVEL_NODE_ALLOC: locking.ALL_SET, |
411 |
} |
412 |
|
413 |
# Allocations should be stopped while this LU runs with node locks, but it
|
414 |
# doesn't have to be exclusive
|
415 |
self.share_locks[locking.LEVEL_NODE_ALLOC] = 1 |
416 |
|
417 |
def Exec(self, feedback_fn): |
418 |
"""Remove any export.
|
419 |
|
420 |
"""
|
421 |
(_, inst_name) = self.cfg.ExpandInstanceName(self.op.instance_name) |
422 |
# If the instance was not found we'll try with the name that was passed in.
|
423 |
# This will only work if it was an FQDN, though.
|
424 |
fqdn_warn = False
|
425 |
if not inst_name: |
426 |
fqdn_warn = True
|
427 |
inst_name = self.op.instance_name
|
428 |
|
429 |
locked_nodes = self.owned_locks(locking.LEVEL_NODE)
|
430 |
exportlist = self.rpc.call_export_list(locked_nodes)
|
431 |
found = False
|
432 |
for node_uuid in exportlist: |
433 |
msg = exportlist[node_uuid].fail_msg |
434 |
if msg:
|
435 |
self.LogWarning("Failed to query node %s (continuing): %s", |
436 |
self.cfg.GetNodeName(node_uuid), msg)
|
437 |
continue
|
438 |
if inst_name in exportlist[node_uuid].payload: |
439 |
found = True
|
440 |
result = self.rpc.call_export_remove(node_uuid, inst_name)
|
441 |
msg = result.fail_msg |
442 |
if msg:
|
443 |
logging.error("Could not remove export for instance %s"
|
444 |
" on node %s: %s", inst_name,
|
445 |
self.cfg.GetNodeName(node_uuid), msg)
|
446 |
|
447 |
if fqdn_warn and not found: |
448 |
feedback_fn("Export not found. If trying to remove an export belonging"
|
449 |
" to a deleted instance please use its Fully Qualified"
|
450 |
" Domain Name.")
|