Statistics
| Branch: | Tag: | Revision:

root / lib / cmdlib / backup.py @ 4869595d

History | View | Annotate | Download (16.4 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Logical units dealing with backup operations."""
23

    
24
import OpenSSL
25
import logging
26

    
27
from ganeti import compat
28
from ganeti import constants
29
from ganeti import errors
30
from ganeti import locking
31
from ganeti import masterd
32
from ganeti import utils
33

    
34
from ganeti.cmdlib.base import NoHooksLU, LogicalUnit
35
from ganeti.cmdlib.common import CheckNodeOnline, \
36
  ExpandNodeUuidAndName
37
from ganeti.cmdlib.instance_storage import StartInstanceDisks, \
38
  ShutdownInstanceDisks
39
from ganeti.cmdlib.instance_utils import GetClusterDomainSecret, \
40
  BuildInstanceHookEnvByObject, CheckNodeNotDrained, RemoveInstance
41

    
42

    
43
class LUBackupPrepare(NoHooksLU):
44
  """Prepares an instance for an export and returns useful information.
45

46
  """
47
  REQ_BGL = False
48

    
49
  def ExpandNames(self):
50
    self._ExpandAndLockInstance()
51

    
52
  def CheckPrereq(self):
53
    """Check prerequisites.
54

55
    """
56
    self.instance = self.cfg.GetInstanceInfoByName(self.op.instance_name)
57
    assert self.instance is not None, \
58
          "Cannot retrieve locked instance %s" % self.op.instance_name
59
    CheckNodeOnline(self, self.instance.primary_node)
60

    
61
    self._cds = GetClusterDomainSecret()
62

    
63
  def Exec(self, feedback_fn):
64
    """Prepares an instance for an export.
65

66
    """
67
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
68
      salt = utils.GenerateSecret(8)
69

    
70
      feedback_fn("Generating X509 certificate on %s" %
71
                  self.cfg.GetNodeName(self.instance.primary_node))
72
      result = self.rpc.call_x509_cert_create(self.instance.primary_node,
73
                                              constants.RIE_CERT_VALIDITY)
74
      result.Raise("Can't create X509 key and certificate on %s" %
75
                   self.cfg.GetNodeName(result.node))
76

    
77
      (name, cert_pem) = result.payload
78

    
79
      cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
80
                                             cert_pem)
81

    
82
      return {
83
        "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
84
        "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
85
                          salt),
86
        "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
87
        }
88

    
89
    return None
90

    
91

    
92
class LUBackupExport(LogicalUnit):
93
  """Export an instance to an image in the cluster.
94

95
  """
96
  HPATH = "instance-export"
97
  HTYPE = constants.HTYPE_INSTANCE
98
  REQ_BGL = False
99

    
100
  def CheckArguments(self):
101
    """Check the arguments.
102

103
    """
104
    self.x509_key_name = self.op.x509_key_name
105
    self.dest_x509_ca_pem = self.op.destination_x509_ca
106

    
107
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
108
      if not self.x509_key_name:
109
        raise errors.OpPrereqError("Missing X509 key name for encryption",
110
                                   errors.ECODE_INVAL)
111

    
112
      if not self.dest_x509_ca_pem:
113
        raise errors.OpPrereqError("Missing destination X509 CA",
114
                                   errors.ECODE_INVAL)
115

    
116
  def ExpandNames(self):
117
    self._ExpandAndLockInstance()
118

    
119
    # Lock all nodes for local exports
120
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
121
      (self.op.target_node_uuid, self.op.target_node) = \
122
        ExpandNodeUuidAndName(self.cfg, self.op.target_node_uuid,
123
                              self.op.target_node)
124
      # FIXME: lock only instance primary and destination node
125
      #
126
      # Sad but true, for now we have do lock all nodes, as we don't know where
127
      # the previous export might be, and in this LU we search for it and
128
      # remove it from its current node. In the future we could fix this by:
129
      #  - making a tasklet to search (share-lock all), then create the
130
      #    new one, then one to remove, after
131
      #  - removing the removal operation altogether
132
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
133

    
134
      # Allocations should be stopped while this LU runs with node locks, but
135
      # it doesn't have to be exclusive
136
      self.share_locks[locking.LEVEL_NODE_ALLOC] = 1
137
      self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
138

    
139
  def DeclareLocks(self, level):
140
    """Last minute lock declaration."""
141
    # All nodes are locked anyway, so nothing to do here.
142

    
143
  def BuildHooksEnv(self):
144
    """Build hooks env.
145

146
    This will run on the master, primary node and target node.
147

148
    """
149
    env = {
150
      "EXPORT_MODE": self.op.mode,
151
      "EXPORT_NODE": self.op.target_node,
152
      "EXPORT_DO_SHUTDOWN": self.op.shutdown,
153
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
154
      # TODO: Generic function for boolean env variables
155
      "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
156
      }
157

    
158
    env.update(BuildInstanceHookEnvByObject(self, self.instance))
159

    
160
    return env
161

    
162
  def BuildHooksNodes(self):
163
    """Build hooks nodes.
164

165
    """
166
    nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
167

    
168
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
169
      nl.append(self.op.target_node_uuid)
170

    
171
    return (nl, nl)
172

    
173
  def CheckPrereq(self):
174
    """Check prerequisites.
175

176
    This checks that the instance and node names are valid.
177

178
    """
179
    self.instance = self.cfg.GetInstanceInfoByName(self.op.instance_name)
180
    assert self.instance is not None, \
181
          "Cannot retrieve locked instance %s" % self.op.instance_name
182
    CheckNodeOnline(self, self.instance.primary_node)
183

    
184
    if (self.op.remove_instance and
185
        self.instance.admin_state == constants.ADMINST_UP and
186
        not self.op.shutdown):
187
      raise errors.OpPrereqError("Can not remove instance without shutting it"
188
                                 " down before", errors.ECODE_STATE)
189

    
190
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
191
      self.dst_node = self.cfg.GetNodeInfo(self.op.target_node_uuid)
192
      assert self.dst_node is not None
193

    
194
      CheckNodeOnline(self, self.dst_node.uuid)
195
      CheckNodeNotDrained(self, self.dst_node.uuid)
196

    
197
      self._cds = None
198
      self.dest_disk_info = None
199
      self.dest_x509_ca = None
200

    
201
    elif self.op.mode == constants.EXPORT_MODE_REMOTE:
202
      self.dst_node = None
203

    
204
      if len(self.op.target_node) != len(self.instance.disks):
205
        raise errors.OpPrereqError(("Received destination information for %s"
206
                                    " disks, but instance %s has %s disks") %
207
                                   (len(self.op.target_node),
208
                                    self.op.instance_name,
209
                                    len(self.instance.disks)),
210
                                   errors.ECODE_INVAL)
211

    
212
      cds = GetClusterDomainSecret()
213

    
214
      # Check X509 key name
215
      try:
216
        (key_name, hmac_digest, hmac_salt) = self.x509_key_name
217
      except (TypeError, ValueError), err:
218
        raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err,
219
                                   errors.ECODE_INVAL)
220

    
221
      if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
222
        raise errors.OpPrereqError("HMAC for X509 key name is wrong",
223
                                   errors.ECODE_INVAL)
224

    
225
      # Load and verify CA
226
      try:
227
        (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
228
      except OpenSSL.crypto.Error, err:
229
        raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
230
                                   (err, ), errors.ECODE_INVAL)
231

    
232
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
233
      if errcode is not None:
234
        raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
235
                                   (msg, ), errors.ECODE_INVAL)
236

    
237
      self.dest_x509_ca = cert
238

    
239
      # Verify target information
240
      disk_info = []
241
      for idx, disk_data in enumerate(self.op.target_node):
242
        try:
243
          (host, port, magic) = \
244
            masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
245
        except errors.GenericError, err:
246
          raise errors.OpPrereqError("Target info for disk %s: %s" %
247
                                     (idx, err), errors.ECODE_INVAL)
248

    
249
        disk_info.append((host, port, magic))
250

    
251
      assert len(disk_info) == len(self.op.target_node)
252
      self.dest_disk_info = disk_info
253

    
254
    else:
255
      raise errors.ProgrammerError("Unhandled export mode %r" %
256
                                   self.op.mode)
257

    
258
    # instance disk type verification
259
    # TODO: Implement export support for file-based disks
260
    for disk in self.instance.disks:
261
      if disk.dev_type in constants.DTS_FILEBASED:
262
        raise errors.OpPrereqError("Export not supported for instances with"
263
                                   " file-based disks", errors.ECODE_INVAL)
264

    
265
  def _CleanupExports(self, feedback_fn):
266
    """Removes exports of current instance from all other nodes.
267

268
    If an instance in a cluster with nodes A..D was exported to node C, its
269
    exports will be removed from the nodes A, B and D.
270

271
    """
272
    assert self.op.mode != constants.EXPORT_MODE_REMOTE
273

    
274
    node_uuids = self.cfg.GetNodeList()
275
    node_uuids.remove(self.dst_node.uuid)
276

    
277
    # on one-node clusters nodelist will be empty after the removal
278
    # if we proceed the backup would be removed because OpBackupQuery
279
    # substitutes an empty list with the full cluster node list.
280
    iname = self.instance.name
281
    if node_uuids:
282
      feedback_fn("Removing old exports for instance %s" % iname)
283
      exportlist = self.rpc.call_export_list(node_uuids)
284
      for node_uuid in exportlist:
285
        if exportlist[node_uuid].fail_msg:
286
          continue
287
        if iname in exportlist[node_uuid].payload:
288
          msg = self.rpc.call_export_remove(node_uuid, iname).fail_msg
289
          if msg:
290
            self.LogWarning("Could not remove older export for instance %s"
291
                            " on node %s: %s", iname,
292
                            self.cfg.GetNodeName(node_uuid), msg)
293

    
294
  def Exec(self, feedback_fn):
295
    """Export an instance to an image in the cluster.
296

297
    """
298
    assert self.op.mode in constants.EXPORT_MODES
299

    
300
    src_node_uuid = self.instance.primary_node
301

    
302
    if self.op.shutdown:
303
      # shutdown the instance, but not the disks
304
      feedback_fn("Shutting down instance %s" % self.instance.name)
305
      result = self.rpc.call_instance_shutdown(src_node_uuid, self.instance,
306
                                               self.op.shutdown_timeout,
307
                                               self.op.reason)
308
      # TODO: Maybe ignore failures if ignore_remove_failures is set
309
      result.Raise("Could not shutdown instance %s on"
310
                   " node %s" % (self.instance.name,
311
                                 self.cfg.GetNodeName(src_node_uuid)))
312

    
313
    activate_disks = not self.instance.disks_active
314

    
315
    if activate_disks:
316
      # Activate the instance disks if we're exporting a stopped instance
317
      feedback_fn("Activating disks for %s" % self.instance.name)
318
      StartInstanceDisks(self, self.instance, None)
319

    
320
    try:
321
      helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
322
                                                     self.instance)
323

    
324
      helper.CreateSnapshots()
325
      try:
326
        if (self.op.shutdown and
327
            self.instance.admin_state == constants.ADMINST_UP and
328
            not self.op.remove_instance):
329
          assert not activate_disks
330
          feedback_fn("Starting instance %s" % self.instance.name)
331
          result = self.rpc.call_instance_start(src_node_uuid,
332
                                                (self.instance, None, None),
333
                                                False, self.op.reason)
334
          msg = result.fail_msg
335
          if msg:
336
            feedback_fn("Failed to start instance: %s" % msg)
337
            ShutdownInstanceDisks(self, self.instance)
338
            raise errors.OpExecError("Could not start instance: %s" % msg)
339

    
340
        if self.op.mode == constants.EXPORT_MODE_LOCAL:
341
          (fin_resu, dresults) = helper.LocalExport(self.dst_node,
342
                                                    self.op.compress)
343
        elif self.op.mode == constants.EXPORT_MODE_REMOTE:
344
          connect_timeout = constants.RIE_CONNECT_TIMEOUT
345
          timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
346

    
347
          (key_name, _, _) = self.x509_key_name
348

    
349
          dest_ca_pem = \
350
            OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
351
                                            self.dest_x509_ca)
352

    
353
          (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
354
                                                     key_name, dest_ca_pem,
355
                                                     self.op.compress,
356
                                                     timeouts)
357
      finally:
358
        helper.Cleanup()
359

    
360
      # Check for backwards compatibility
361
      assert len(dresults) == len(self.instance.disks)
362
      assert compat.all(isinstance(i, bool) for i in dresults), \
363
             "Not all results are boolean: %r" % dresults
364

    
365
    finally:
366
      if activate_disks:
367
        feedback_fn("Deactivating disks for %s" % self.instance.name)
368
        ShutdownInstanceDisks(self, self.instance)
369

    
370
    if not (compat.all(dresults) and fin_resu):
371
      failures = []
372
      if not fin_resu:
373
        failures.append("export finalization")
374
      if not compat.all(dresults):
375
        fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
376
                               if not dsk)
377
        failures.append("disk export: disk(s) %s" % fdsk)
378

    
379
      raise errors.OpExecError("Export failed, errors in %s" %
380
                               utils.CommaJoin(failures))
381

    
382
    # At this point, the export was successful, we can cleanup/finish
383

    
384
    # Remove instance if requested
385
    if self.op.remove_instance:
386
      feedback_fn("Removing instance %s" % self.instance.name)
387
      RemoveInstance(self, feedback_fn, self.instance,
388
                     self.op.ignore_remove_failures)
389

    
390
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
391
      self._CleanupExports(feedback_fn)
392

    
393
    return fin_resu, dresults
394

    
395

    
396
class LUBackupRemove(NoHooksLU):
397
  """Remove exports related to the named instance.
398

399
  """
400
  REQ_BGL = False
401

    
402
  def ExpandNames(self):
403
    self.needed_locks = {
404
      # We need all nodes to be locked in order for RemoveExport to work, but
405
      # we don't need to lock the instance itself, as nothing will happen to it
406
      # (and we can remove exports also for a removed instance)
407
      locking.LEVEL_NODE: locking.ALL_SET,
408

    
409
      # Removing backups is quick, so blocking allocations is justified
410
      locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
411
      }
412

    
413
    # Allocations should be stopped while this LU runs with node locks, but it
414
    # doesn't have to be exclusive
415
    self.share_locks[locking.LEVEL_NODE_ALLOC] = 1
416

    
417
  def Exec(self, feedback_fn):
418
    """Remove any export.
419

420
    """
421
    (_, inst_name) = self.cfg.ExpandInstanceName(self.op.instance_name)
422
    # If the instance was not found we'll try with the name that was passed in.
423
    # This will only work if it was an FQDN, though.
424
    fqdn_warn = False
425
    if not inst_name:
426
      fqdn_warn = True
427
      inst_name = self.op.instance_name
428

    
429
    locked_nodes = self.owned_locks(locking.LEVEL_NODE)
430
    exportlist = self.rpc.call_export_list(locked_nodes)
431
    found = False
432
    for node_uuid in exportlist:
433
      msg = exportlist[node_uuid].fail_msg
434
      if msg:
435
        self.LogWarning("Failed to query node %s (continuing): %s",
436
                        self.cfg.GetNodeName(node_uuid), msg)
437
        continue
438
      if inst_name in exportlist[node_uuid].payload:
439
        found = True
440
        result = self.rpc.call_export_remove(node_uuid, inst_name)
441
        msg = result.fail_msg
442
        if msg:
443
          logging.error("Could not remove export for instance %s"
444
                        " on node %s: %s", inst_name,
445
                        self.cfg.GetNodeName(node_uuid), msg)
446

    
447
    if fqdn_warn and not found:
448
      feedback_fn("Export not found. If trying to remove an export belonging"
449
                  " to a deleted instance please use its Fully Qualified"
450
                  " Domain Name.")