root / lib / cmdlib / backup.py @ 7ecd5e87
History | View | Annotate | Download (18 kB)
1 |
#
|
---|---|
2 |
#
|
3 |
|
4 |
# Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 Google Inc.
|
5 |
#
|
6 |
# This program is free software; you can redistribute it and/or modify
|
7 |
# it under the terms of the GNU General Public License as published by
|
8 |
# the Free Software Foundation; either version 2 of the License, or
|
9 |
# (at your option) any later version.
|
10 |
#
|
11 |
# This program is distributed in the hope that it will be useful, but
|
12 |
# WITHOUT ANY WARRANTY; without even the implied warranty of
|
13 |
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
14 |
# General Public License for more details.
|
15 |
#
|
16 |
# You should have received a copy of the GNU General Public License
|
17 |
# along with this program; if not, write to the Free Software
|
18 |
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
|
19 |
# 02110-1301, USA.
|
20 |
|
21 |
|
22 |
"""Logical units dealing with backup operations."""
|
23 |
|
24 |
import OpenSSL |
25 |
import logging |
26 |
|
27 |
from ganeti import compat |
28 |
from ganeti import constants |
29 |
from ganeti import errors |
30 |
from ganeti import locking |
31 |
from ganeti import masterd |
32 |
from ganeti import qlang |
33 |
from ganeti import query |
34 |
from ganeti import utils |
35 |
|
36 |
from ganeti.cmdlib.base import _QueryBase, NoHooksLU, LogicalUnit |
37 |
from ganeti.cmdlib.common import _GetWantedNodes, _ShareAll, \ |
38 |
_CheckNodeOnline, _ExpandNodeName |
39 |
from ganeti.cmdlib.instance_utils import _GetClusterDomainSecret, \ |
40 |
_BuildInstanceHookEnvByObject, _CheckNodeNotDrained, _StartInstanceDisks, \ |
41 |
_ShutdownInstanceDisks, _RemoveInstance |
42 |
|
43 |
|
44 |
class _ExportQuery(_QueryBase): |
45 |
FIELDS = query.EXPORT_FIELDS |
46 |
|
47 |
#: The node name is not a unique key for this query
|
48 |
SORT_FIELD = "node"
|
49 |
|
50 |
def ExpandNames(self, lu): |
51 |
lu.needed_locks = {} |
52 |
|
53 |
# The following variables interact with _QueryBase._GetNames
|
54 |
if self.names: |
55 |
self.wanted = _GetWantedNodes(lu, self.names) |
56 |
else:
|
57 |
self.wanted = locking.ALL_SET
|
58 |
|
59 |
self.do_locking = self.use_locking |
60 |
|
61 |
if self.do_locking: |
62 |
lu.share_locks = _ShareAll() |
63 |
lu.needed_locks = { |
64 |
locking.LEVEL_NODE: self.wanted,
|
65 |
} |
66 |
|
67 |
if not self.names: |
68 |
lu.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET |
69 |
|
70 |
def DeclareLocks(self, lu, level): |
71 |
pass
|
72 |
|
73 |
def _GetQueryData(self, lu): |
74 |
"""Computes the list of nodes and their attributes.
|
75 |
|
76 |
"""
|
77 |
# Locking is not used
|
78 |
# TODO
|
79 |
assert not (compat.any(lu.glm.is_owned(level) |
80 |
for level in locking.LEVELS |
81 |
if level != locking.LEVEL_CLUSTER) or |
82 |
self.do_locking or self.use_locking) |
83 |
|
84 |
nodes = self._GetNames(lu, lu.cfg.GetNodeList(), locking.LEVEL_NODE)
|
85 |
|
86 |
result = [] |
87 |
|
88 |
for (node, nres) in lu.rpc.call_export_list(nodes).items(): |
89 |
if nres.fail_msg:
|
90 |
result.append((node, None))
|
91 |
else:
|
92 |
result.extend((node, expname) for expname in nres.payload) |
93 |
|
94 |
return result
|
95 |
|
96 |
|
97 |
class LUBackupQuery(NoHooksLU): |
98 |
"""Query the exports list
|
99 |
|
100 |
"""
|
101 |
REQ_BGL = False
|
102 |
|
103 |
def CheckArguments(self): |
104 |
self.expq = _ExportQuery(qlang.MakeSimpleFilter("node", self.op.nodes), |
105 |
["node", "export"], self.op.use_locking) |
106 |
|
107 |
def ExpandNames(self): |
108 |
self.expq.ExpandNames(self) |
109 |
|
110 |
def DeclareLocks(self, level): |
111 |
self.expq.DeclareLocks(self, level) |
112 |
|
113 |
def Exec(self, feedback_fn): |
114 |
result = {} |
115 |
|
116 |
for (node, expname) in self.expq.OldStyleQuery(self): |
117 |
if expname is None: |
118 |
result[node] = False
|
119 |
else:
|
120 |
result.setdefault(node, []).append(expname) |
121 |
|
122 |
return result
|
123 |
|
124 |
|
125 |
class LUBackupPrepare(NoHooksLU): |
126 |
"""Prepares an instance for an export and returns useful information.
|
127 |
|
128 |
"""
|
129 |
REQ_BGL = False
|
130 |
|
131 |
def ExpandNames(self): |
132 |
self._ExpandAndLockInstance()
|
133 |
|
134 |
def CheckPrereq(self): |
135 |
"""Check prerequisites.
|
136 |
|
137 |
"""
|
138 |
instance_name = self.op.instance_name
|
139 |
|
140 |
self.instance = self.cfg.GetInstanceInfo(instance_name) |
141 |
assert self.instance is not None, \ |
142 |
"Cannot retrieve locked instance %s" % self.op.instance_name |
143 |
_CheckNodeOnline(self, self.instance.primary_node) |
144 |
|
145 |
self._cds = _GetClusterDomainSecret()
|
146 |
|
147 |
def Exec(self, feedback_fn): |
148 |
"""Prepares an instance for an export.
|
149 |
|
150 |
"""
|
151 |
instance = self.instance
|
152 |
|
153 |
if self.op.mode == constants.EXPORT_MODE_REMOTE: |
154 |
salt = utils.GenerateSecret(8)
|
155 |
|
156 |
feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
|
157 |
result = self.rpc.call_x509_cert_create(instance.primary_node,
|
158 |
constants.RIE_CERT_VALIDITY) |
159 |
result.Raise("Can't create X509 key and certificate on %s" % result.node)
|
160 |
|
161 |
(name, cert_pem) = result.payload |
162 |
|
163 |
cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM, |
164 |
cert_pem) |
165 |
|
166 |
return {
|
167 |
"handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds), |
168 |
"x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt), |
169 |
salt), |
170 |
"x509_ca": utils.SignX509Certificate(cert, self._cds, salt), |
171 |
} |
172 |
|
173 |
return None |
174 |
|
175 |
|
176 |
class LUBackupExport(LogicalUnit): |
177 |
"""Export an instance to an image in the cluster.
|
178 |
|
179 |
"""
|
180 |
HPATH = "instance-export"
|
181 |
HTYPE = constants.HTYPE_INSTANCE |
182 |
REQ_BGL = False
|
183 |
|
184 |
def CheckArguments(self): |
185 |
"""Check the arguments.
|
186 |
|
187 |
"""
|
188 |
self.x509_key_name = self.op.x509_key_name |
189 |
self.dest_x509_ca_pem = self.op.destination_x509_ca |
190 |
|
191 |
if self.op.mode == constants.EXPORT_MODE_REMOTE: |
192 |
if not self.x509_key_name: |
193 |
raise errors.OpPrereqError("Missing X509 key name for encryption", |
194 |
errors.ECODE_INVAL) |
195 |
|
196 |
if not self.dest_x509_ca_pem: |
197 |
raise errors.OpPrereqError("Missing destination X509 CA", |
198 |
errors.ECODE_INVAL) |
199 |
|
200 |
def ExpandNames(self): |
201 |
self._ExpandAndLockInstance()
|
202 |
|
203 |
# Lock all nodes for local exports
|
204 |
if self.op.mode == constants.EXPORT_MODE_LOCAL: |
205 |
# FIXME: lock only instance primary and destination node
|
206 |
#
|
207 |
# Sad but true, for now we have do lock all nodes, as we don't know where
|
208 |
# the previous export might be, and in this LU we search for it and
|
209 |
# remove it from its current node. In the future we could fix this by:
|
210 |
# - making a tasklet to search (share-lock all), then create the
|
211 |
# new one, then one to remove, after
|
212 |
# - removing the removal operation altogether
|
213 |
self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
|
214 |
|
215 |
# Allocations should be stopped while this LU runs with node locks, but
|
216 |
# it doesn't have to be exclusive
|
217 |
self.share_locks[locking.LEVEL_NODE_ALLOC] = 1 |
218 |
self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
|
219 |
|
220 |
def DeclareLocks(self, level): |
221 |
"""Last minute lock declaration."""
|
222 |
# All nodes are locked anyway, so nothing to do here.
|
223 |
|
224 |
def BuildHooksEnv(self): |
225 |
"""Build hooks env.
|
226 |
|
227 |
This will run on the master, primary node and target node.
|
228 |
|
229 |
"""
|
230 |
env = { |
231 |
"EXPORT_MODE": self.op.mode, |
232 |
"EXPORT_NODE": self.op.target_node, |
233 |
"EXPORT_DO_SHUTDOWN": self.op.shutdown, |
234 |
"SHUTDOWN_TIMEOUT": self.op.shutdown_timeout, |
235 |
# TODO: Generic function for boolean env variables
|
236 |
"REMOVE_INSTANCE": str(bool(self.op.remove_instance)), |
237 |
} |
238 |
|
239 |
env.update(_BuildInstanceHookEnvByObject(self, self.instance)) |
240 |
|
241 |
return env
|
242 |
|
243 |
def BuildHooksNodes(self): |
244 |
"""Build hooks nodes.
|
245 |
|
246 |
"""
|
247 |
nl = [self.cfg.GetMasterNode(), self.instance.primary_node] |
248 |
|
249 |
if self.op.mode == constants.EXPORT_MODE_LOCAL: |
250 |
nl.append(self.op.target_node)
|
251 |
|
252 |
return (nl, nl)
|
253 |
|
254 |
def CheckPrereq(self): |
255 |
"""Check prerequisites.
|
256 |
|
257 |
This checks that the instance and node names are valid.
|
258 |
|
259 |
"""
|
260 |
instance_name = self.op.instance_name
|
261 |
|
262 |
self.instance = self.cfg.GetInstanceInfo(instance_name) |
263 |
assert self.instance is not None, \ |
264 |
"Cannot retrieve locked instance %s" % self.op.instance_name |
265 |
_CheckNodeOnline(self, self.instance.primary_node) |
266 |
|
267 |
if (self.op.remove_instance and |
268 |
self.instance.admin_state == constants.ADMINST_UP and |
269 |
not self.op.shutdown): |
270 |
raise errors.OpPrereqError("Can not remove instance without shutting it" |
271 |
" down before", errors.ECODE_STATE)
|
272 |
|
273 |
if self.op.mode == constants.EXPORT_MODE_LOCAL: |
274 |
self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node) |
275 |
self.dst_node = self.cfg.GetNodeInfo(self.op.target_node) |
276 |
assert self.dst_node is not None |
277 |
|
278 |
_CheckNodeOnline(self, self.dst_node.name) |
279 |
_CheckNodeNotDrained(self, self.dst_node.name) |
280 |
|
281 |
self._cds = None |
282 |
self.dest_disk_info = None |
283 |
self.dest_x509_ca = None |
284 |
|
285 |
elif self.op.mode == constants.EXPORT_MODE_REMOTE: |
286 |
self.dst_node = None |
287 |
|
288 |
if len(self.op.target_node) != len(self.instance.disks): |
289 |
raise errors.OpPrereqError(("Received destination information for %s" |
290 |
" disks, but instance %s has %s disks") %
|
291 |
(len(self.op.target_node), instance_name, |
292 |
len(self.instance.disks)), |
293 |
errors.ECODE_INVAL) |
294 |
|
295 |
cds = _GetClusterDomainSecret() |
296 |
|
297 |
# Check X509 key name
|
298 |
try:
|
299 |
(key_name, hmac_digest, hmac_salt) = self.x509_key_name
|
300 |
except (TypeError, ValueError), err: |
301 |
raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err, |
302 |
errors.ECODE_INVAL) |
303 |
|
304 |
if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt): |
305 |
raise errors.OpPrereqError("HMAC for X509 key name is wrong", |
306 |
errors.ECODE_INVAL) |
307 |
|
308 |
# Load and verify CA
|
309 |
try:
|
310 |
(cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
|
311 |
except OpenSSL.crypto.Error, err:
|
312 |
raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" % |
313 |
(err, ), errors.ECODE_INVAL) |
314 |
|
315 |
(errcode, msg) = utils.VerifyX509Certificate(cert, None, None) |
316 |
if errcode is not None: |
317 |
raise errors.OpPrereqError("Invalid destination X509 CA (%s)" % |
318 |
(msg, ), errors.ECODE_INVAL) |
319 |
|
320 |
self.dest_x509_ca = cert
|
321 |
|
322 |
# Verify target information
|
323 |
disk_info = [] |
324 |
for idx, disk_data in enumerate(self.op.target_node): |
325 |
try:
|
326 |
(host, port, magic) = \ |
327 |
masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data) |
328 |
except errors.GenericError, err:
|
329 |
raise errors.OpPrereqError("Target info for disk %s: %s" % |
330 |
(idx, err), errors.ECODE_INVAL) |
331 |
|
332 |
disk_info.append((host, port, magic)) |
333 |
|
334 |
assert len(disk_info) == len(self.op.target_node) |
335 |
self.dest_disk_info = disk_info
|
336 |
|
337 |
else:
|
338 |
raise errors.ProgrammerError("Unhandled export mode %r" % |
339 |
self.op.mode)
|
340 |
|
341 |
# instance disk type verification
|
342 |
# TODO: Implement export support for file-based disks
|
343 |
for disk in self.instance.disks: |
344 |
if disk.dev_type == constants.LD_FILE:
|
345 |
raise errors.OpPrereqError("Export not supported for instances with" |
346 |
" file-based disks", errors.ECODE_INVAL)
|
347 |
|
348 |
def _CleanupExports(self, feedback_fn): |
349 |
"""Removes exports of current instance from all other nodes.
|
350 |
|
351 |
If an instance in a cluster with nodes A..D was exported to node C, its
|
352 |
exports will be removed from the nodes A, B and D.
|
353 |
|
354 |
"""
|
355 |
assert self.op.mode != constants.EXPORT_MODE_REMOTE |
356 |
|
357 |
nodelist = self.cfg.GetNodeList()
|
358 |
nodelist.remove(self.dst_node.name)
|
359 |
|
360 |
# on one-node clusters nodelist will be empty after the removal
|
361 |
# if we proceed the backup would be removed because OpBackupQuery
|
362 |
# substitutes an empty list with the full cluster node list.
|
363 |
iname = self.instance.name
|
364 |
if nodelist:
|
365 |
feedback_fn("Removing old exports for instance %s" % iname)
|
366 |
exportlist = self.rpc.call_export_list(nodelist)
|
367 |
for node in exportlist: |
368 |
if exportlist[node].fail_msg:
|
369 |
continue
|
370 |
if iname in exportlist[node].payload: |
371 |
msg = self.rpc.call_export_remove(node, iname).fail_msg
|
372 |
if msg:
|
373 |
self.LogWarning("Could not remove older export for instance %s" |
374 |
" on node %s: %s", iname, node, msg)
|
375 |
|
376 |
def Exec(self, feedback_fn): |
377 |
"""Export an instance to an image in the cluster.
|
378 |
|
379 |
"""
|
380 |
assert self.op.mode in constants.EXPORT_MODES |
381 |
|
382 |
instance = self.instance
|
383 |
src_node = instance.primary_node |
384 |
|
385 |
if self.op.shutdown: |
386 |
# shutdown the instance, but not the disks
|
387 |
feedback_fn("Shutting down instance %s" % instance.name)
|
388 |
result = self.rpc.call_instance_shutdown(src_node, instance,
|
389 |
self.op.shutdown_timeout,
|
390 |
self.op.reason)
|
391 |
# TODO: Maybe ignore failures if ignore_remove_failures is set
|
392 |
result.Raise("Could not shutdown instance %s on"
|
393 |
" node %s" % (instance.name, src_node))
|
394 |
|
395 |
# set the disks ID correctly since call_instance_start needs the
|
396 |
# correct drbd minor to create the symlinks
|
397 |
for disk in instance.disks: |
398 |
self.cfg.SetDiskID(disk, src_node)
|
399 |
|
400 |
activate_disks = (instance.admin_state != constants.ADMINST_UP) |
401 |
|
402 |
if activate_disks:
|
403 |
# Activate the instance disks if we'exporting a stopped instance
|
404 |
feedback_fn("Activating disks for %s" % instance.name)
|
405 |
_StartInstanceDisks(self, instance, None) |
406 |
|
407 |
try:
|
408 |
helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
|
409 |
instance) |
410 |
|
411 |
helper.CreateSnapshots() |
412 |
try:
|
413 |
if (self.op.shutdown and |
414 |
instance.admin_state == constants.ADMINST_UP and
|
415 |
not self.op.remove_instance): |
416 |
assert not activate_disks |
417 |
feedback_fn("Starting instance %s" % instance.name)
|
418 |
result = self.rpc.call_instance_start(src_node,
|
419 |
(instance, None, None), False, |
420 |
self.op.reason)
|
421 |
msg = result.fail_msg |
422 |
if msg:
|
423 |
feedback_fn("Failed to start instance: %s" % msg)
|
424 |
_ShutdownInstanceDisks(self, instance)
|
425 |
raise errors.OpExecError("Could not start instance: %s" % msg) |
426 |
|
427 |
if self.op.mode == constants.EXPORT_MODE_LOCAL: |
428 |
(fin_resu, dresults) = helper.LocalExport(self.dst_node)
|
429 |
elif self.op.mode == constants.EXPORT_MODE_REMOTE: |
430 |
connect_timeout = constants.RIE_CONNECT_TIMEOUT |
431 |
timeouts = masterd.instance.ImportExportTimeouts(connect_timeout) |
432 |
|
433 |
(key_name, _, _) = self.x509_key_name
|
434 |
|
435 |
dest_ca_pem = \ |
436 |
OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM, |
437 |
self.dest_x509_ca)
|
438 |
|
439 |
(fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
|
440 |
key_name, dest_ca_pem, |
441 |
timeouts) |
442 |
finally:
|
443 |
helper.Cleanup() |
444 |
|
445 |
# Check for backwards compatibility
|
446 |
assert len(dresults) == len(instance.disks) |
447 |
assert compat.all(isinstance(i, bool) for i in dresults), \ |
448 |
"Not all results are boolean: %r" % dresults
|
449 |
|
450 |
finally:
|
451 |
if activate_disks:
|
452 |
feedback_fn("Deactivating disks for %s" % instance.name)
|
453 |
_ShutdownInstanceDisks(self, instance)
|
454 |
|
455 |
if not (compat.all(dresults) and fin_resu): |
456 |
failures = [] |
457 |
if not fin_resu: |
458 |
failures.append("export finalization")
|
459 |
if not compat.all(dresults): |
460 |
fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults) |
461 |
if not dsk) |
462 |
failures.append("disk export: disk(s) %s" % fdsk)
|
463 |
|
464 |
raise errors.OpExecError("Export failed, errors in %s" % |
465 |
utils.CommaJoin(failures)) |
466 |
|
467 |
# At this point, the export was successful, we can cleanup/finish
|
468 |
|
469 |
# Remove instance if requested
|
470 |
if self.op.remove_instance: |
471 |
feedback_fn("Removing instance %s" % instance.name)
|
472 |
_RemoveInstance(self, feedback_fn, instance,
|
473 |
self.op.ignore_remove_failures)
|
474 |
|
475 |
if self.op.mode == constants.EXPORT_MODE_LOCAL: |
476 |
self._CleanupExports(feedback_fn)
|
477 |
|
478 |
return fin_resu, dresults
|
479 |
|
480 |
|
481 |
class LUBackupRemove(NoHooksLU): |
482 |
"""Remove exports related to the named instance.
|
483 |
|
484 |
"""
|
485 |
REQ_BGL = False
|
486 |
|
487 |
def ExpandNames(self): |
488 |
self.needed_locks = {
|
489 |
# We need all nodes to be locked in order for RemoveExport to work, but
|
490 |
# we don't need to lock the instance itself, as nothing will happen to it
|
491 |
# (and we can remove exports also for a removed instance)
|
492 |
locking.LEVEL_NODE: locking.ALL_SET, |
493 |
|
494 |
# Removing backups is quick, so blocking allocations is justified
|
495 |
locking.LEVEL_NODE_ALLOC: locking.ALL_SET, |
496 |
} |
497 |
|
498 |
# Allocations should be stopped while this LU runs with node locks, but it
|
499 |
# doesn't have to be exclusive
|
500 |
self.share_locks[locking.LEVEL_NODE_ALLOC] = 1 |
501 |
|
502 |
def Exec(self, feedback_fn): |
503 |
"""Remove any export.
|
504 |
|
505 |
"""
|
506 |
instance_name = self.cfg.ExpandInstanceName(self.op.instance_name) |
507 |
# If the instance was not found we'll try with the name that was passed in.
|
508 |
# This will only work if it was an FQDN, though.
|
509 |
fqdn_warn = False
|
510 |
if not instance_name: |
511 |
fqdn_warn = True
|
512 |
instance_name = self.op.instance_name
|
513 |
|
514 |
locked_nodes = self.owned_locks(locking.LEVEL_NODE)
|
515 |
exportlist = self.rpc.call_export_list(locked_nodes)
|
516 |
found = False
|
517 |
for node in exportlist: |
518 |
msg = exportlist[node].fail_msg |
519 |
if msg:
|
520 |
self.LogWarning("Failed to query node %s (continuing): %s", node, msg) |
521 |
continue
|
522 |
if instance_name in exportlist[node].payload: |
523 |
found = True
|
524 |
result = self.rpc.call_export_remove(node, instance_name)
|
525 |
msg = result.fail_msg |
526 |
if msg:
|
527 |
logging.error("Could not remove export for instance %s"
|
528 |
" on node %s: %s", instance_name, node, msg)
|
529 |
|
530 |
if fqdn_warn and not found: |
531 |
feedback_fn("Export not found. If trying to remove an export belonging"
|
532 |
" to a deleted instance please use its Fully Qualified"
|
533 |
" Domain Name.")
|