Statistics
| Branch: | Tag: | Revision:

root / lib / cmdlib / backup.py @ 8701dfb0

History | View | Annotate | Download (18 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Logical units dealing with backup operations."""
23

    
24
import OpenSSL
25
import logging
26

    
27
from ganeti import compat
28
from ganeti import constants
29
from ganeti import errors
30
from ganeti import locking
31
from ganeti import masterd
32
from ganeti import qlang
33
from ganeti import query
34
from ganeti import utils
35

    
36
from ganeti.cmdlib.base import _QueryBase, NoHooksLU, LogicalUnit
37
from ganeti.cmdlib.common import _GetWantedNodes, _ShareAll, \
38
  _CheckNodeOnline, _ExpandNodeName
39
from ganeti.cmdlib.instance_utils import _GetClusterDomainSecret, \
40
  _BuildInstanceHookEnvByObject, _CheckNodeNotDrained, _StartInstanceDisks, \
41
  _ShutdownInstanceDisks, _RemoveInstance
42

    
43

    
44
class _ExportQuery(_QueryBase):
45
  FIELDS = query.EXPORT_FIELDS
46

    
47
  #: The node name is not a unique key for this query
48
  SORT_FIELD = "node"
49

    
50
  def ExpandNames(self, lu):
51
    lu.needed_locks = {}
52

    
53
    # The following variables interact with _QueryBase._GetNames
54
    if self.names:
55
      self.wanted = _GetWantedNodes(lu, self.names)
56
    else:
57
      self.wanted = locking.ALL_SET
58

    
59
    self.do_locking = self.use_locking
60

    
61
    if self.do_locking:
62
      lu.share_locks = _ShareAll()
63
      lu.needed_locks = {
64
        locking.LEVEL_NODE: self.wanted,
65
        }
66

    
67
      if not self.names:
68
        lu.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
69

    
70
  def DeclareLocks(self, lu, level):
71
    pass
72

    
73
  def _GetQueryData(self, lu):
74
    """Computes the list of nodes and their attributes.
75

76
    """
77
    # Locking is not used
78
    # TODO
79
    assert not (compat.any(lu.glm.is_owned(level)
80
                           for level in locking.LEVELS
81
                           if level != locking.LEVEL_CLUSTER) or
82
                self.do_locking or self.use_locking)
83

    
84
    nodes = self._GetNames(lu, lu.cfg.GetNodeList(), locking.LEVEL_NODE)
85

    
86
    result = []
87

    
88
    for (node, nres) in lu.rpc.call_export_list(nodes).items():
89
      if nres.fail_msg:
90
        result.append((node, None))
91
      else:
92
        result.extend((node, expname) for expname in nres.payload)
93

    
94
    return result
95

    
96

    
97
class LUBackupQuery(NoHooksLU):
98
  """Query the exports list
99

100
  """
101
  REQ_BGL = False
102

    
103
  def CheckArguments(self):
104
    self.expq = _ExportQuery(qlang.MakeSimpleFilter("node", self.op.nodes),
105
                             ["node", "export"], self.op.use_locking)
106

    
107
  def ExpandNames(self):
108
    self.expq.ExpandNames(self)
109

    
110
  def DeclareLocks(self, level):
111
    self.expq.DeclareLocks(self, level)
112

    
113
  def Exec(self, feedback_fn):
114
    result = {}
115

    
116
    for (node, expname) in self.expq.OldStyleQuery(self):
117
      if expname is None:
118
        result[node] = False
119
      else:
120
        result.setdefault(node, []).append(expname)
121

    
122
    return result
123

    
124

    
125
class LUBackupPrepare(NoHooksLU):
126
  """Prepares an instance for an export and returns useful information.
127

128
  """
129
  REQ_BGL = False
130

    
131
  def ExpandNames(self):
132
    self._ExpandAndLockInstance()
133

    
134
  def CheckPrereq(self):
135
    """Check prerequisites.
136

137
    """
138
    instance_name = self.op.instance_name
139

    
140
    self.instance = self.cfg.GetInstanceInfo(instance_name)
141
    assert self.instance is not None, \
142
          "Cannot retrieve locked instance %s" % self.op.instance_name
143
    _CheckNodeOnline(self, self.instance.primary_node)
144

    
145
    self._cds = _GetClusterDomainSecret()
146

    
147
  def Exec(self, feedback_fn):
148
    """Prepares an instance for an export.
149

150
    """
151
    instance = self.instance
152

    
153
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
154
      salt = utils.GenerateSecret(8)
155

    
156
      feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
157
      result = self.rpc.call_x509_cert_create(instance.primary_node,
158
                                              constants.RIE_CERT_VALIDITY)
159
      result.Raise("Can't create X509 key and certificate on %s" % result.node)
160

    
161
      (name, cert_pem) = result.payload
162

    
163
      cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
164
                                             cert_pem)
165

    
166
      return {
167
        "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
168
        "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
169
                          salt),
170
        "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
171
        }
172

    
173
    return None
174

    
175

    
176
class LUBackupExport(LogicalUnit):
177
  """Export an instance to an image in the cluster.
178

179
  """
180
  HPATH = "instance-export"
181
  HTYPE = constants.HTYPE_INSTANCE
182
  REQ_BGL = False
183

    
184
  def CheckArguments(self):
185
    """Check the arguments.
186

187
    """
188
    self.x509_key_name = self.op.x509_key_name
189
    self.dest_x509_ca_pem = self.op.destination_x509_ca
190

    
191
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
192
      if not self.x509_key_name:
193
        raise errors.OpPrereqError("Missing X509 key name for encryption",
194
                                   errors.ECODE_INVAL)
195

    
196
      if not self.dest_x509_ca_pem:
197
        raise errors.OpPrereqError("Missing destination X509 CA",
198
                                   errors.ECODE_INVAL)
199

    
200
  def ExpandNames(self):
201
    self._ExpandAndLockInstance()
202

    
203
    # Lock all nodes for local exports
204
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
205
      # FIXME: lock only instance primary and destination node
206
      #
207
      # Sad but true, for now we have do lock all nodes, as we don't know where
208
      # the previous export might be, and in this LU we search for it and
209
      # remove it from its current node. In the future we could fix this by:
210
      #  - making a tasklet to search (share-lock all), then create the
211
      #    new one, then one to remove, after
212
      #  - removing the removal operation altogether
213
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
214

    
215
      # Allocations should be stopped while this LU runs with node locks, but
216
      # it doesn't have to be exclusive
217
      self.share_locks[locking.LEVEL_NODE_ALLOC] = 1
218
      self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
219

    
220
  def DeclareLocks(self, level):
221
    """Last minute lock declaration."""
222
    # All nodes are locked anyway, so nothing to do here.
223

    
224
  def BuildHooksEnv(self):
225
    """Build hooks env.
226

227
    This will run on the master, primary node and target node.
228

229
    """
230
    env = {
231
      "EXPORT_MODE": self.op.mode,
232
      "EXPORT_NODE": self.op.target_node,
233
      "EXPORT_DO_SHUTDOWN": self.op.shutdown,
234
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
235
      # TODO: Generic function for boolean env variables
236
      "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
237
      }
238

    
239
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
240

    
241
    return env
242

    
243
  def BuildHooksNodes(self):
244
    """Build hooks nodes.
245

246
    """
247
    nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
248

    
249
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
250
      nl.append(self.op.target_node)
251

    
252
    return (nl, nl)
253

    
254
  def CheckPrereq(self):
255
    """Check prerequisites.
256

257
    This checks that the instance and node names are valid.
258

259
    """
260
    instance_name = self.op.instance_name
261

    
262
    self.instance = self.cfg.GetInstanceInfo(instance_name)
263
    assert self.instance is not None, \
264
          "Cannot retrieve locked instance %s" % self.op.instance_name
265
    _CheckNodeOnline(self, self.instance.primary_node)
266

    
267
    if (self.op.remove_instance and
268
        self.instance.admin_state == constants.ADMINST_UP and
269
        not self.op.shutdown):
270
      raise errors.OpPrereqError("Can not remove instance without shutting it"
271
                                 " down before", errors.ECODE_STATE)
272

    
273
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
274
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
275
      self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
276
      assert self.dst_node is not None
277

    
278
      _CheckNodeOnline(self, self.dst_node.name)
279
      _CheckNodeNotDrained(self, self.dst_node.name)
280

    
281
      self._cds = None
282
      self.dest_disk_info = None
283
      self.dest_x509_ca = None
284

    
285
    elif self.op.mode == constants.EXPORT_MODE_REMOTE:
286
      self.dst_node = None
287

    
288
      if len(self.op.target_node) != len(self.instance.disks):
289
        raise errors.OpPrereqError(("Received destination information for %s"
290
                                    " disks, but instance %s has %s disks") %
291
                                   (len(self.op.target_node), instance_name,
292
                                    len(self.instance.disks)),
293
                                   errors.ECODE_INVAL)
294

    
295
      cds = _GetClusterDomainSecret()
296

    
297
      # Check X509 key name
298
      try:
299
        (key_name, hmac_digest, hmac_salt) = self.x509_key_name
300
      except (TypeError, ValueError), err:
301
        raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err,
302
                                   errors.ECODE_INVAL)
303

    
304
      if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
305
        raise errors.OpPrereqError("HMAC for X509 key name is wrong",
306
                                   errors.ECODE_INVAL)
307

    
308
      # Load and verify CA
309
      try:
310
        (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
311
      except OpenSSL.crypto.Error, err:
312
        raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
313
                                   (err, ), errors.ECODE_INVAL)
314

    
315
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
316
      if errcode is not None:
317
        raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
318
                                   (msg, ), errors.ECODE_INVAL)
319

    
320
      self.dest_x509_ca = cert
321

    
322
      # Verify target information
323
      disk_info = []
324
      for idx, disk_data in enumerate(self.op.target_node):
325
        try:
326
          (host, port, magic) = \
327
            masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
328
        except errors.GenericError, err:
329
          raise errors.OpPrereqError("Target info for disk %s: %s" %
330
                                     (idx, err), errors.ECODE_INVAL)
331

    
332
        disk_info.append((host, port, magic))
333

    
334
      assert len(disk_info) == len(self.op.target_node)
335
      self.dest_disk_info = disk_info
336

    
337
    else:
338
      raise errors.ProgrammerError("Unhandled export mode %r" %
339
                                   self.op.mode)
340

    
341
    # instance disk type verification
342
    # TODO: Implement export support for file-based disks
343
    for disk in self.instance.disks:
344
      if disk.dev_type == constants.LD_FILE:
345
        raise errors.OpPrereqError("Export not supported for instances with"
346
                                   " file-based disks", errors.ECODE_INVAL)
347

    
348
  def _CleanupExports(self, feedback_fn):
349
    """Removes exports of current instance from all other nodes.
350

351
    If an instance in a cluster with nodes A..D was exported to node C, its
352
    exports will be removed from the nodes A, B and D.
353

354
    """
355
    assert self.op.mode != constants.EXPORT_MODE_REMOTE
356

    
357
    nodelist = self.cfg.GetNodeList()
358
    nodelist.remove(self.dst_node.name)
359

    
360
    # on one-node clusters nodelist will be empty after the removal
361
    # if we proceed the backup would be removed because OpBackupQuery
362
    # substitutes an empty list with the full cluster node list.
363
    iname = self.instance.name
364
    if nodelist:
365
      feedback_fn("Removing old exports for instance %s" % iname)
366
      exportlist = self.rpc.call_export_list(nodelist)
367
      for node in exportlist:
368
        if exportlist[node].fail_msg:
369
          continue
370
        if iname in exportlist[node].payload:
371
          msg = self.rpc.call_export_remove(node, iname).fail_msg
372
          if msg:
373
            self.LogWarning("Could not remove older export for instance %s"
374
                            " on node %s: %s", iname, node, msg)
375

    
376
  def Exec(self, feedback_fn):
377
    """Export an instance to an image in the cluster.
378

379
    """
380
    assert self.op.mode in constants.EXPORT_MODES
381

    
382
    instance = self.instance
383
    src_node = instance.primary_node
384

    
385
    if self.op.shutdown:
386
      # shutdown the instance, but not the disks
387
      feedback_fn("Shutting down instance %s" % instance.name)
388
      result = self.rpc.call_instance_shutdown(src_node, instance,
389
                                               self.op.shutdown_timeout,
390
                                               self.op.reason)
391
      # TODO: Maybe ignore failures if ignore_remove_failures is set
392
      result.Raise("Could not shutdown instance %s on"
393
                   " node %s" % (instance.name, src_node))
394

    
395
    # set the disks ID correctly since call_instance_start needs the
396
    # correct drbd minor to create the symlinks
397
    for disk in instance.disks:
398
      self.cfg.SetDiskID(disk, src_node)
399

    
400
    activate_disks = (instance.admin_state != constants.ADMINST_UP)
401

    
402
    if activate_disks:
403
      # Activate the instance disks if we'exporting a stopped instance
404
      feedback_fn("Activating disks for %s" % instance.name)
405
      _StartInstanceDisks(self, instance, None)
406

    
407
    try:
408
      helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
409
                                                     instance)
410

    
411
      helper.CreateSnapshots()
412
      try:
413
        if (self.op.shutdown and
414
            instance.admin_state == constants.ADMINST_UP and
415
            not self.op.remove_instance):
416
          assert not activate_disks
417
          feedback_fn("Starting instance %s" % instance.name)
418
          result = self.rpc.call_instance_start(src_node,
419
                                                (instance, None, None), False,
420
                                                 self.op.reason)
421
          msg = result.fail_msg
422
          if msg:
423
            feedback_fn("Failed to start instance: %s" % msg)
424
            _ShutdownInstanceDisks(self, instance)
425
            raise errors.OpExecError("Could not start instance: %s" % msg)
426

    
427
        if self.op.mode == constants.EXPORT_MODE_LOCAL:
428
          (fin_resu, dresults) = helper.LocalExport(self.dst_node)
429
        elif self.op.mode == constants.EXPORT_MODE_REMOTE:
430
          connect_timeout = constants.RIE_CONNECT_TIMEOUT
431
          timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
432

    
433
          (key_name, _, _) = self.x509_key_name
434

    
435
          dest_ca_pem = \
436
            OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
437
                                            self.dest_x509_ca)
438

    
439
          (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
440
                                                     key_name, dest_ca_pem,
441
                                                     timeouts)
442
      finally:
443
        helper.Cleanup()
444

    
445
      # Check for backwards compatibility
446
      assert len(dresults) == len(instance.disks)
447
      assert compat.all(isinstance(i, bool) for i in dresults), \
448
             "Not all results are boolean: %r" % dresults
449

    
450
    finally:
451
      if activate_disks:
452
        feedback_fn("Deactivating disks for %s" % instance.name)
453
        _ShutdownInstanceDisks(self, instance)
454

    
455
    if not (compat.all(dresults) and fin_resu):
456
      failures = []
457
      if not fin_resu:
458
        failures.append("export finalization")
459
      if not compat.all(dresults):
460
        fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
461
                               if not dsk)
462
        failures.append("disk export: disk(s) %s" % fdsk)
463

    
464
      raise errors.OpExecError("Export failed, errors in %s" %
465
                               utils.CommaJoin(failures))
466

    
467
    # At this point, the export was successful, we can cleanup/finish
468

    
469
    # Remove instance if requested
470
    if self.op.remove_instance:
471
      feedback_fn("Removing instance %s" % instance.name)
472
      _RemoveInstance(self, feedback_fn, instance,
473
                      self.op.ignore_remove_failures)
474

    
475
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
476
      self._CleanupExports(feedback_fn)
477

    
478
    return fin_resu, dresults
479

    
480

    
481
class LUBackupRemove(NoHooksLU):
482
  """Remove exports related to the named instance.
483

484
  """
485
  REQ_BGL = False
486

    
487
  def ExpandNames(self):
488
    self.needed_locks = {
489
      # We need all nodes to be locked in order for RemoveExport to work, but
490
      # we don't need to lock the instance itself, as nothing will happen to it
491
      # (and we can remove exports also for a removed instance)
492
      locking.LEVEL_NODE: locking.ALL_SET,
493

    
494
      # Removing backups is quick, so blocking allocations is justified
495
      locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
496
      }
497

    
498
    # Allocations should be stopped while this LU runs with node locks, but it
499
    # doesn't have to be exclusive
500
    self.share_locks[locking.LEVEL_NODE_ALLOC] = 1
501

    
502
  def Exec(self, feedback_fn):
503
    """Remove any export.
504

505
    """
506
    instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
507
    # If the instance was not found we'll try with the name that was passed in.
508
    # This will only work if it was an FQDN, though.
509
    fqdn_warn = False
510
    if not instance_name:
511
      fqdn_warn = True
512
      instance_name = self.op.instance_name
513

    
514
    locked_nodes = self.owned_locks(locking.LEVEL_NODE)
515
    exportlist = self.rpc.call_export_list(locked_nodes)
516
    found = False
517
    for node in exportlist:
518
      msg = exportlist[node].fail_msg
519
      if msg:
520
        self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
521
        continue
522
      if instance_name in exportlist[node].payload:
523
        found = True
524
        result = self.rpc.call_export_remove(node, instance_name)
525
        msg = result.fail_msg
526
        if msg:
527
          logging.error("Could not remove export for instance %s"
528
                        " on node %s: %s", instance_name, node, msg)
529

    
530
    if fqdn_warn and not found:
531
      feedback_fn("Export not found. If trying to remove an export belonging"
532
                  " to a deleted instance please use its Fully Qualified"
533
                  " Domain Name.")