Revision d5cafd31 lib/cmdlib.py

b/lib/cmdlib.py
5951 5951
    self.needed_locks[locking.LEVEL_NODE] = []
5952 5952
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5953 5953

  
5954
    ignore_consistency = self.op.ignore_consistency
5955
    shutdown_timeout = self.op.shutdown_timeout
5956
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
5957
                                       cleanup=False,
5958
                                       iallocator=self.op.iallocator,
5959
                                       target_node=self.op.target_node,
5960
                                       failover=True,
5961
                                       ignore_consistency=ignore_consistency,
5962
                                       shutdown_timeout=shutdown_timeout)
5963
    self.tasklets = [self._migrater]
5964

  
5954 5965
  def DeclareLocks(self, level):
5955 5966
    if level == locking.LEVEL_NODE:
5956 5967
      instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
......
5970 5981
    This runs on master, primary and secondary nodes of the instance.
5971 5982

  
5972 5983
    """
5973
    instance = self.instance
5984
    instance = self._migrater.instance
5974 5985
    source_node = instance.primary_node
5986
    target_node = self._migrater.target_node
5975 5987
    env = {
5976 5988
      "IGNORE_CONSISTENCY": self.op.ignore_consistency,
5977 5989
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5978 5990
      "OLD_PRIMARY": source_node,
5979
      "NEW_PRIMARY": self.op.target_node,
5991
      "NEW_PRIMARY": target_node,
5980 5992
      }
5981 5993

  
5982 5994
    if instance.disk_template in constants.DTS_INT_MIRROR:
......
5993 6005
    """Build hooks nodes.
5994 6006

  
5995 6007
    """
5996
    nl = [self.cfg.GetMasterNode()] + list(self.instance.secondary_nodes)
5997
    return (nl, nl + [self.instance.primary_node])
5998

  
5999
  def CheckPrereq(self):
6000
    """Check prerequisites.
6001

  
6002
    This checks that the instance is in the cluster.
6003

  
6004
    """
6005
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6006
    assert self.instance is not None, \
6007
      "Cannot retrieve locked instance %s" % self.op.instance_name
6008

  
6009
    bep = self.cfg.GetClusterInfo().FillBE(instance)
6010
    if instance.disk_template not in constants.DTS_MIRRORED:
6011
      raise errors.OpPrereqError("Instance's disk layout is not"
6012
                                 " mirrored, cannot failover.",
6013
                                 errors.ECODE_STATE)
6014

  
6015
    if instance.disk_template in constants.DTS_EXT_MIRROR:
6016
      _CheckIAllocatorOrNode(self, "iallocator", "target_node")
6017
      if self.op.iallocator:
6018
        self._RunAllocator()
6019
        # Release all unnecessary node locks
6020
        nodes_keep = [instance.primary_node, self.op.target_node]
6021
        nodes_rel = [node for node in self.acquired_locks[locking.LEVEL_NODE]
6022
                     if node not in nodes_keep]
6023
        self.context.glm.release(locking.LEVEL_NODE, nodes_rel)
6024
        self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
6025

  
6026
      # self.op.target_node is already populated, either directly or by the
6027
      # iallocator run
6028
      target_node = self.op.target_node
6029

  
6030
    else:
6031
      secondary_nodes = instance.secondary_nodes
6032
      if not secondary_nodes:
6033
        raise errors.ConfigurationError("No secondary node but using"
6034
                                        " %s disk template" %
6035
                                        instance.disk_template)
6036
      target_node = secondary_nodes[0]
6037

  
6038
      if self.op.iallocator or (self.op.target_node and
6039
                                self.op.target_node != target_node):
6040
        raise errors.OpPrereqError("Instances with disk template %s cannot"
6041
                                   " be failed over to arbitrary nodes"
6042
                                   " (neither an iallocator nor a target"
6043
                                   " node can be passed)" %
6044
                                   instance.disk_template, errors.ECODE_INVAL)
6045
    _CheckNodeOnline(self, target_node)
6046
    _CheckNodeNotDrained(self, target_node)
6047

  
6048
    # Save target_node so that we can use it in BuildHooksEnv
6049
    self.op.target_node = target_node
6050

  
6051
    if instance.admin_up:
6052
      # check memory requirements on the secondary node
6053
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
6054
                           instance.name, bep[constants.BE_MEMORY],
6055
                           instance.hypervisor)
6056
    else:
6057
      self.LogInfo("Not checking memory on the secondary node as"
6058
                   " instance will not be started")
6059

  
6060
    # check bridge existance
6061
    _CheckInstanceBridgesExist(self, instance, node=target_node)
6062

  
6063
  def Exec(self, feedback_fn):
6064
    """Failover an instance.
6065

  
6066
    The failover is done by shutting it down on its present node and
6067
    starting it on the secondary.
6068

  
6069
    """
6070
    instance = self.instance
6071
    primary_node = self.cfg.GetNodeInfo(instance.primary_node)
6072

  
6073
    source_node = instance.primary_node
6074
    target_node = self.op.target_node
6075

  
6076
    if instance.admin_up:
6077
      feedback_fn("* checking disk consistency between source and target")
6078
      for dev in instance.disks:
6079
        # for drbd, these are drbd over lvm
6080
        if not _CheckDiskConsistency(self, dev, target_node, False):
6081
          if not self.op.ignore_consistency:
6082
            raise errors.OpExecError("Disk %s is degraded on target node,"
6083
                                     " aborting failover." % dev.iv_name)
6084
    else:
6085
      feedback_fn("* not checking disk consistency as instance is not running")
6086

  
6087
    feedback_fn("* shutting down instance on source node")
6088
    logging.info("Shutting down instance %s on node %s",
6089
                 instance.name, source_node)
6090

  
6091
    result = self.rpc.call_instance_shutdown(source_node, instance,
6092
                                             self.op.shutdown_timeout)
6093
    msg = result.fail_msg
6094
    if msg:
6095
      if self.op.ignore_consistency or primary_node.offline:
6096
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
6097
                             " Proceeding anyway. Please make sure node"
6098
                             " %s is down. Error details: %s",
6099
                             instance.name, source_node, source_node, msg)
6100
      else:
6101
        raise errors.OpExecError("Could not shutdown instance %s on"
6102
                                 " node %s: %s" %
6103
                                 (instance.name, source_node, msg))
6104

  
6105
    feedback_fn("* deactivating the instance's disks on source node")
6106
    if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
6107
      raise errors.OpExecError("Can't shut down the instance's disks.")
6108

  
6109
    instance.primary_node = target_node
6110
    # distribute new instance config to the other nodes
6111
    self.cfg.Update(instance, feedback_fn)
6112

  
6113
    # Only start the instance if it's marked as up
6114
    if instance.admin_up:
6115
      feedback_fn("* activating the instance's disks on target node")
6116
      logging.info("Starting instance %s on node %s",
6117
                   instance.name, target_node)
6118

  
6119
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
6120
                                           ignore_secondaries=True)
6121
      if not disks_ok:
6122
        _ShutdownInstanceDisks(self, instance)
6123
        raise errors.OpExecError("Can't activate the instance's disks")
6124

  
6125
      feedback_fn("* starting the instance on the target node")
6126
      result = self.rpc.call_instance_start(target_node, instance, None, None)
6127
      msg = result.fail_msg
6128
      if msg:
6129
        _ShutdownInstanceDisks(self, instance)
6130
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
6131
                                 (instance.name, target_node, msg))
6132

  
6133
  def _RunAllocator(self):
6134
    """Run the allocator based on input opcode.
6135

  
6136
    """
6137
    ial = IAllocator(self.cfg, self.rpc,
6138
                     mode=constants.IALLOCATOR_MODE_RELOC,
6139
                     name=self.instance.name,
6140
                     # TODO See why hail breaks with a single node below
6141
                     relocate_from=[self.instance.primary_node,
6142
                                    self.instance.primary_node],
6143
                     )
6144

  
6145
    ial.Run(self.op.iallocator)
6146

  
6147
    if not ial.success:
6148
      raise errors.OpPrereqError("Can't compute nodes using"
6149
                                 " iallocator '%s': %s" %
6150
                                 (self.op.iallocator, ial.info),
6151
                                 errors.ECODE_NORES)
6152
    if len(ial.result) != ial.required_nodes:
6153
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
6154
                                 " of nodes (%s), required %s" %
6155
                                 (self.op.iallocator, len(ial.result),
6156
                                  ial.required_nodes), errors.ECODE_FAULT)
6157
    self.op.target_node = ial.result[0]
6158
    self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
6159
                 self.instance.name, self.op.iallocator,
6160
                 utils.CommaJoin(ial.result))
6008
    instance = self._migrater.instance
6009
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6010
    return (nl, nl + [instance.primary_node])
6161 6011

  
6162 6012

  
6163 6013
class LUInstanceMigrate(LogicalUnit):
......
6181 6031
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6182 6032

  
6183 6033
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
6184
                                       self.op.cleanup, self.op.iallocator,
6185
                                       self.op.target_node)
6034
                                       cleanup=self.op.cleanup,
6035
                                       iallocator=self.op.iallocator,
6036
                                       target_node=self.op.target_node,
6037
                                       failover=False,
6038
                                       fallback=self.op.allow_failover)
6186 6039
    self.tasklets = [self._migrater]
6187 6040

  
6188 6041
  def DeclareLocks(self, level):
......
6442 6295
      logging.debug("Migrating instance %s", inst.name)
6443 6296
      names.append(inst.name)
6444 6297

  
6445
      tasklets.append(TLMigrateInstance(self, inst.name, False,
6446
                                        self.op.iallocator, None))
6298
      tasklets.append(TLMigrateInstance(self, inst.name, cleanup=False,
6299
                                        iallocator=self.op.iallocator,
6300
                                        taget_node=None))
6447 6301

  
6448 6302
      if inst.disk_template in constants.DTS_EXT_MIRROR:
6449 6303
        # We need to lock all nodes, as the iallocator will choose the
......
6490 6344
  @type live: boolean
6491 6345
  @ivar live: whether the migration will be done live or non-live;
6492 6346
      this variable is initalized only after CheckPrereq has run
6347
  @type cleanup: boolean
6348
  @ivar cleanup: Wheater we cleanup from a failed migration
6349
  @type iallocator: string
6350
  @ivar iallocator: The iallocator used to determine target_node
6351
  @type target_node: string
6352
  @ivar target_node: If given, the target_node to reallocate the instance to
6353
  @type failover: boolean
6354
  @ivar failover: Whether operation results in failover or migration
6355
  @type fallback: boolean
6356
  @ivar fallback: Whether fallback to failover is allowed if migration not
6357
                  possible
6358
  @type ignore_consistency: boolean
6359
  @ivar ignore_consistency: Wheter we should ignore consistency between source
6360
                            and target node
6361
  @type shutdown_timeout: int
6362
  @ivar shutdown_timeout: In case of failover timeout of the shutdown
6493 6363

  
6494 6364
  """
6495
  def __init__(self, lu, instance_name, cleanup,
6496
               iallocator=None, target_node=None):
6365
  def __init__(self, lu, instance_name, cleanup=False, iallocator=None,
6366
               target_node=None, failover=False, fallback=False,
6367
               ignore_consistency=False,
6368
               shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT):
6497 6369
    """Initializes this class.
6498 6370

  
6499 6371
    """
......
6505 6377
    self.live = False # will be overridden later
6506 6378
    self.iallocator = iallocator
6507 6379
    self.target_node = target_node
6380
    self.failover = failover
6381
    self.fallback = fallback
6382
    self.ignore_consistency = ignore_consistency
6383
    self.shutdown_timeout = shutdown_timeout
6508 6384

  
6509 6385
  def CheckPrereq(self):
6510 6386
    """Check prerequisites.
......
6517 6393
    assert instance is not None
6518 6394
    self.instance = instance
6519 6395

  
6396
    if (not self.cleanup and not instance.admin_up and not self.failover and
6397
        self.fallback):
6398
      self.lu.LogInfo("Instance is marked down, fallback allowed, switching"
6399
                      " to failover")
6400
      self.failover = True
6401

  
6520 6402
    if instance.disk_template not in constants.DTS_MIRRORED:
6403
      if self.failover:
6404
        text = "failovers"
6405
      else:
6406
        text = "migrations"
6521 6407
      raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
6522
                                 " migrations" % instance.disk_template,
6408
                                 " %s" % (instance.disk_template, text),
6523 6409
                                 errors.ECODE_STATE)
6524 6410

  
6525 6411
    if instance.disk_template in constants.DTS_EXT_MIRROR:
......
6547 6433
                                        " %s disk template" %
6548 6434
                                        instance.disk_template)
6549 6435
      target_node = secondary_nodes[0]
6550
      if self.lu.op.iallocator or (self.lu.op.target_node and
6551
                                   self.lu.op.target_node != target_node):
6436
      if self.iallocator or (self.target_node and
6437
                             self.target_node != target_node):
6438
        if self.failover:
6439
          text = "failed over"
6440
        else:
6441
          text = "migrated"
6552 6442
        raise errors.OpPrereqError("Instances with disk template %s cannot"
6553
                                   " be migrated over to arbitrary nodes"
6443
                                   " be %s over to arbitrary nodes"
6554 6444
                                   " (neither an iallocator nor a target"
6555 6445
                                   " node can be passed)" %
6556
                                   instance.disk_template, errors.ECODE_INVAL)
6446
                                   (text, instance.disk_template),
6447
                                   errors.ECODE_INVAL)
6557 6448

  
6558 6449
    i_be = self.cfg.GetClusterInfo().FillBE(instance)
6559 6450

  
6560 6451
    # check memory requirements on the secondary node
6561
    _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
6562
                         instance.name, i_be[constants.BE_MEMORY],
6563
                         instance.hypervisor)
6452
    if not self.failover or instance.admin_up:
6453
      _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
6454
                           instance.name, i_be[constants.BE_MEMORY],
6455
                           instance.hypervisor)
6456
    else:
6457
      self.lu.LogInfo("Not checking memory on the secondary node as"
6458
                      " instance will not be started")
6564 6459

  
6565 6460
    # check bridge existance
6566 6461
    _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
6567 6462

  
6568 6463
    if not self.cleanup:
6569 6464
      _CheckNodeNotDrained(self.lu, target_node)
6570
      result = self.rpc.call_instance_migratable(instance.primary_node,
6571
                                                 instance)
6572
      result.Raise("Can't migrate, please use failover",
6573
                   prereq=True, ecode=errors.ECODE_STATE)
6465
      if not self.failover:
6466
        result = self.rpc.call_instance_migratable(instance.primary_node,
6467
                                                   instance)
6468
        if result.fail_msg and self.fallback:
6469
          self.lu.LogInfo("Can't migrate, instance offline, fallback to"
6470
                          " failover")
6471
          self.failover = True
6472
        else:
6473
          result.Raise("Can't migrate, please use failover",
6474
                       prereq=True, ecode=errors.ECODE_STATE)
6574 6475

  
6476
    assert not (self.failover and self.cleanup)
6575 6477

  
6576 6478
  def _RunAllocator(self):
6577 6479
    """Run the allocator based on input opcode.
......
6602 6504
                 self.instance_name, self.iallocator,
6603 6505
                 utils.CommaJoin(ial.result))
6604 6506

  
6605
    if self.lu.op.live is not None and self.lu.op.mode is not None:
6606
      raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
6607
                                 " parameters are accepted",
6608
                                 errors.ECODE_INVAL)
6609
    if self.lu.op.live is not None:
6610
      if self.lu.op.live:
6611
        self.lu.op.mode = constants.HT_MIGRATION_LIVE
6612
      else:
6613
        self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
6614
      # reset the 'live' parameter to None so that repeated
6615
      # invocations of CheckPrereq do not raise an exception
6616
      self.lu.op.live = None
6617
    elif self.lu.op.mode is None:
6618
      # read the default value from the hypervisor
6619
      i_hv = self.cfg.GetClusterInfo().FillHV(self.instance, skip_globals=False)
6620
      self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
6621

  
6622
    self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
6507
    if not self.failover:
6508
      if self.lu.op.live is not None and self.lu.op.mode is not None:
6509
        raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
6510
                                   " parameters are accepted",
6511
                                   errors.ECODE_INVAL)
6512
      if self.lu.op.live is not None:
6513
        if self.lu.op.live:
6514
          self.lu.op.mode = constants.HT_MIGRATION_LIVE
6515
        else:
6516
          self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
6517
        # reset the 'live' parameter to None so that repeated
6518
        # invocations of CheckPrereq do not raise an exception
6519
        self.lu.op.live = None
6520
      elif self.lu.op.mode is None:
6521
        # read the default value from the hypervisor
6522
        i_hv = self.cfg.GetClusterInfo().FillHV(self.instance,
6523
                                                skip_globals=False)
6524
        self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
6525

  
6526
      self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
6527
    else:
6528
      # Failover is never live
6529
      self.live = False
6623 6530

  
6624 6531
  def _WaitUntilSync(self):
6625 6532
    """Poll with custom rpc for disk sync.
......
6885 6792

  
6886 6793
    self.feedback_fn("* done")
6887 6794

  
6795
  def _ExecFailover(self):
6796
    """Failover an instance.
6797

  
6798
    The failover is done by shutting it down on its present node and
6799
    starting it on the secondary.
6800

  
6801
    """
6802
    instance = self.instance
6803
    primary_node = self.cfg.GetNodeInfo(instance.primary_node)
6804

  
6805
    source_node = instance.primary_node
6806
    target_node = self.target_node
6807

  
6808
    if instance.admin_up:
6809
      self.feedback_fn("* checking disk consistency between source and target")
6810
      for dev in instance.disks:
6811
        # for drbd, these are drbd over lvm
6812
        if not _CheckDiskConsistency(self, dev, target_node, False):
6813
          if not self.ignore_consistency:
6814
            raise errors.OpExecError("Disk %s is degraded on target node,"
6815
                                     " aborting failover." % dev.iv_name)
6816
    else:
6817
      self.feedback_fn("* not checking disk consistency as instance is not"
6818
                       " running")
6819

  
6820
    self.feedback_fn("* shutting down instance on source node")
6821
    logging.info("Shutting down instance %s on node %s",
6822
                 instance.name, source_node)
6823

  
6824
    result = self.rpc.call_instance_shutdown(source_node, instance,
6825
                                             self.shutdown_timeout)
6826
    msg = result.fail_msg
6827
    if msg:
6828
      if self.ignore_consistency or primary_node.offline:
6829
        self.lu.LogWarning("Could not shutdown instance %s on node %s."
6830
                           " Proceeding anyway. Please make sure node"
6831
                           " %s is down. Error details: %s",
6832
                           instance.name, source_node, source_node, msg)
6833
      else:
6834
        raise errors.OpExecError("Could not shutdown instance %s on"
6835
                                 " node %s: %s" %
6836
                                 (instance.name, source_node, msg))
6837

  
6838
    self.feedback_fn("* deactivating the instance's disks on source node")
6839
    if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
6840
      raise errors.OpExecError("Can't shut down the instance's disks.")
6841

  
6842
    instance.primary_node = target_node
6843
    # distribute new instance config to the other nodes
6844
    self.cfg.Update(instance, self.feedback_fn)
6845

  
6846
    # Only start the instance if it's marked as up
6847
    if instance.admin_up:
6848
      self.feedback_fn("* activating the instance's disks on target node")
6849
      logging.info("Starting instance %s on node %s",
6850
                   instance.name, target_node)
6851

  
6852
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
6853
                                           ignore_secondaries=True)
6854
      if not disks_ok:
6855
        _ShutdownInstanceDisks(self, instance)
6856
        raise errors.OpExecError("Can't activate the instance's disks")
6857

  
6858
      self.feedback_fn("* starting the instance on the target node")
6859
      result = self.rpc.call_instance_start(target_node, instance, None, None)
6860
      msg = result.fail_msg
6861
      if msg:
6862
        _ShutdownInstanceDisks(self, instance)
6863
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
6864
                                 (instance.name, target_node, msg))
6865

  
6888 6866
  def Exec(self, feedback_fn):
6889 6867
    """Perform the migration.
6890 6868

  
6891 6869
    """
6892
    feedback_fn("Migrating instance %s" % self.instance.name)
6893

  
6894 6870
    self.feedback_fn = feedback_fn
6895

  
6896 6871
    self.source_node = self.instance.primary_node
6897 6872

  
6898 6873
    # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
......
6907 6882
      self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
6908 6883
      }
6909 6884

  
6910
    if self.cleanup:
6911
      return self._ExecCleanup()
6885
    if self.failover:
6886
      feedback_fn("Failover instance %s" % self.instance.name)
6887
      self._ExecFailover()
6912 6888
    else:
6913
      return self._ExecMigration()
6889
      feedback_fn("Migrating instance %s" % self.instance.name)
6890

  
6891
      if self.cleanup:
6892
        return self._ExecCleanup()
6893
      else:
6894
        return self._ExecMigration()
6914 6895

  
6915 6896

  
6916 6897
def _CreateBlockDev(lu, node, instance, device, force_create,

Also available in: Unified diff