Statistics
| Branch: | Tag: | Revision:

root / lib / cmdlib / instance_migration.py @ 87e25be1

History | View | Annotate | Download (35.8 kB)

1 87e25be1 Thomas Thrainer
#
2 87e25be1 Thomas Thrainer
#
3 87e25be1 Thomas Thrainer
4 87e25be1 Thomas Thrainer
# Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 Google Inc.
5 87e25be1 Thomas Thrainer
#
6 87e25be1 Thomas Thrainer
# This program is free software; you can redistribute it and/or modify
7 87e25be1 Thomas Thrainer
# it under the terms of the GNU General Public License as published by
8 87e25be1 Thomas Thrainer
# the Free Software Foundation; either version 2 of the License, or
9 87e25be1 Thomas Thrainer
# (at your option) any later version.
10 87e25be1 Thomas Thrainer
#
11 87e25be1 Thomas Thrainer
# This program is distributed in the hope that it will be useful, but
12 87e25be1 Thomas Thrainer
# WITHOUT ANY WARRANTY; without even the implied warranty of
13 87e25be1 Thomas Thrainer
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14 87e25be1 Thomas Thrainer
# General Public License for more details.
15 87e25be1 Thomas Thrainer
#
16 87e25be1 Thomas Thrainer
# You should have received a copy of the GNU General Public License
17 87e25be1 Thomas Thrainer
# along with this program; if not, write to the Free Software
18 87e25be1 Thomas Thrainer
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19 87e25be1 Thomas Thrainer
# 02110-1301, USA.
20 87e25be1 Thomas Thrainer
21 87e25be1 Thomas Thrainer
22 87e25be1 Thomas Thrainer
"""Logical units dealing with instance migration an failover."""
23 87e25be1 Thomas Thrainer
24 87e25be1 Thomas Thrainer
import logging
25 87e25be1 Thomas Thrainer
import time
26 87e25be1 Thomas Thrainer
27 87e25be1 Thomas Thrainer
from ganeti import constants
28 87e25be1 Thomas Thrainer
from ganeti import errors
29 87e25be1 Thomas Thrainer
from ganeti import locking
30 87e25be1 Thomas Thrainer
from ganeti.masterd import iallocator
31 87e25be1 Thomas Thrainer
from ganeti import utils
32 87e25be1 Thomas Thrainer
from ganeti.cmdlib.base import LogicalUnit, Tasklet
33 87e25be1 Thomas Thrainer
from ganeti.cmdlib.common import _ExpandInstanceName, \
34 87e25be1 Thomas Thrainer
  _CheckIAllocatorOrNode, _ExpandNodeName
35 87e25be1 Thomas Thrainer
from ganeti.cmdlib.instance_storage import _CheckDiskConsistency, \
36 87e25be1 Thomas Thrainer
  _ExpandCheckDisks, _ShutdownInstanceDisks, _AssembleInstanceDisks
37 87e25be1 Thomas Thrainer
from ganeti.cmdlib.instance_utils import _BuildInstanceHookEnvByObject, \
38 87e25be1 Thomas Thrainer
  _CheckTargetNodeIPolicy, _ReleaseLocks, _CheckNodeNotDrained, \
39 87e25be1 Thomas Thrainer
  _CopyLockList, _CheckNodeFreeMemory, _CheckInstanceBridgesExist
40 87e25be1 Thomas Thrainer
41 87e25be1 Thomas Thrainer
import ganeti.masterd.instance
42 87e25be1 Thomas Thrainer
43 87e25be1 Thomas Thrainer
44 87e25be1 Thomas Thrainer
def _ExpandNamesForMigration(lu):
45 87e25be1 Thomas Thrainer
  """Expands names for use with L{TLMigrateInstance}.
46 87e25be1 Thomas Thrainer

47 87e25be1 Thomas Thrainer
  @type lu: L{LogicalUnit}
48 87e25be1 Thomas Thrainer

49 87e25be1 Thomas Thrainer
  """
50 87e25be1 Thomas Thrainer
  if lu.op.target_node is not None:
51 87e25be1 Thomas Thrainer
    lu.op.target_node = _ExpandNodeName(lu.cfg, lu.op.target_node)
52 87e25be1 Thomas Thrainer
53 87e25be1 Thomas Thrainer
  lu.needed_locks[locking.LEVEL_NODE] = []
54 87e25be1 Thomas Thrainer
  lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
55 87e25be1 Thomas Thrainer
56 87e25be1 Thomas Thrainer
  lu.needed_locks[locking.LEVEL_NODE_RES] = []
57 87e25be1 Thomas Thrainer
  lu.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
58 87e25be1 Thomas Thrainer
59 87e25be1 Thomas Thrainer
  # The node allocation lock is actually only needed for externally replicated
60 87e25be1 Thomas Thrainer
  # instances (e.g. sharedfile or RBD) and if an iallocator is used.
61 87e25be1 Thomas Thrainer
  lu.needed_locks[locking.LEVEL_NODE_ALLOC] = []
62 87e25be1 Thomas Thrainer
63 87e25be1 Thomas Thrainer
64 87e25be1 Thomas Thrainer
def _DeclareLocksForMigration(lu, level):
65 87e25be1 Thomas Thrainer
  """Declares locks for L{TLMigrateInstance}.
66 87e25be1 Thomas Thrainer

67 87e25be1 Thomas Thrainer
  @type lu: L{LogicalUnit}
68 87e25be1 Thomas Thrainer
  @param level: Lock level
69 87e25be1 Thomas Thrainer

70 87e25be1 Thomas Thrainer
  """
71 87e25be1 Thomas Thrainer
  if level == locking.LEVEL_NODE_ALLOC:
72 87e25be1 Thomas Thrainer
    assert lu.op.instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
73 87e25be1 Thomas Thrainer
74 87e25be1 Thomas Thrainer
    instance = lu.cfg.GetInstanceInfo(lu.op.instance_name)
75 87e25be1 Thomas Thrainer
76 87e25be1 Thomas Thrainer
    # Node locks are already declared here rather than at LEVEL_NODE as we need
77 87e25be1 Thomas Thrainer
    # the instance object anyway to declare the node allocation lock.
78 87e25be1 Thomas Thrainer
    if instance.disk_template in constants.DTS_EXT_MIRROR:
79 87e25be1 Thomas Thrainer
      if lu.op.target_node is None:
80 87e25be1 Thomas Thrainer
        lu.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
81 87e25be1 Thomas Thrainer
        lu.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
82 87e25be1 Thomas Thrainer
      else:
83 87e25be1 Thomas Thrainer
        lu.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
84 87e25be1 Thomas Thrainer
                                               lu.op.target_node]
85 87e25be1 Thomas Thrainer
      del lu.recalculate_locks[locking.LEVEL_NODE]
86 87e25be1 Thomas Thrainer
    else:
87 87e25be1 Thomas Thrainer
      lu._LockInstancesNodes() # pylint: disable=W0212
88 87e25be1 Thomas Thrainer
89 87e25be1 Thomas Thrainer
  elif level == locking.LEVEL_NODE:
90 87e25be1 Thomas Thrainer
    # Node locks are declared together with the node allocation lock
91 87e25be1 Thomas Thrainer
    assert (lu.needed_locks[locking.LEVEL_NODE] or
92 87e25be1 Thomas Thrainer
            lu.needed_locks[locking.LEVEL_NODE] is locking.ALL_SET)
93 87e25be1 Thomas Thrainer
94 87e25be1 Thomas Thrainer
  elif level == locking.LEVEL_NODE_RES:
95 87e25be1 Thomas Thrainer
    # Copy node locks
96 87e25be1 Thomas Thrainer
    lu.needed_locks[locking.LEVEL_NODE_RES] = \
97 87e25be1 Thomas Thrainer
      _CopyLockList(lu.needed_locks[locking.LEVEL_NODE])
98 87e25be1 Thomas Thrainer
99 87e25be1 Thomas Thrainer
100 87e25be1 Thomas Thrainer
class LUInstanceFailover(LogicalUnit):
101 87e25be1 Thomas Thrainer
  """Failover an instance.
102 87e25be1 Thomas Thrainer

103 87e25be1 Thomas Thrainer
  """
104 87e25be1 Thomas Thrainer
  HPATH = "instance-failover"
105 87e25be1 Thomas Thrainer
  HTYPE = constants.HTYPE_INSTANCE
106 87e25be1 Thomas Thrainer
  REQ_BGL = False
107 87e25be1 Thomas Thrainer
108 87e25be1 Thomas Thrainer
  def CheckArguments(self):
109 87e25be1 Thomas Thrainer
    """Check the arguments.
110 87e25be1 Thomas Thrainer

111 87e25be1 Thomas Thrainer
    """
112 87e25be1 Thomas Thrainer
    self.iallocator = getattr(self.op, "iallocator", None)
113 87e25be1 Thomas Thrainer
    self.target_node = getattr(self.op, "target_node", None)
114 87e25be1 Thomas Thrainer
115 87e25be1 Thomas Thrainer
  def ExpandNames(self):
116 87e25be1 Thomas Thrainer
    self._ExpandAndLockInstance()
117 87e25be1 Thomas Thrainer
    _ExpandNamesForMigration(self)
118 87e25be1 Thomas Thrainer
119 87e25be1 Thomas Thrainer
    self._migrater = \
120 87e25be1 Thomas Thrainer
      TLMigrateInstance(self, self.op.instance_name, False, True, False,
121 87e25be1 Thomas Thrainer
                        self.op.ignore_consistency, True,
122 87e25be1 Thomas Thrainer
                        self.op.shutdown_timeout, self.op.ignore_ipolicy)
123 87e25be1 Thomas Thrainer
124 87e25be1 Thomas Thrainer
    self.tasklets = [self._migrater]
125 87e25be1 Thomas Thrainer
126 87e25be1 Thomas Thrainer
  def DeclareLocks(self, level):
127 87e25be1 Thomas Thrainer
    _DeclareLocksForMigration(self, level)
128 87e25be1 Thomas Thrainer
129 87e25be1 Thomas Thrainer
  def BuildHooksEnv(self):
130 87e25be1 Thomas Thrainer
    """Build hooks env.
131 87e25be1 Thomas Thrainer

132 87e25be1 Thomas Thrainer
    This runs on master, primary and secondary nodes of the instance.
133 87e25be1 Thomas Thrainer

134 87e25be1 Thomas Thrainer
    """
135 87e25be1 Thomas Thrainer
    instance = self._migrater.instance
136 87e25be1 Thomas Thrainer
    source_node = instance.primary_node
137 87e25be1 Thomas Thrainer
    target_node = self.op.target_node
138 87e25be1 Thomas Thrainer
    env = {
139 87e25be1 Thomas Thrainer
      "IGNORE_CONSISTENCY": self.op.ignore_consistency,
140 87e25be1 Thomas Thrainer
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
141 87e25be1 Thomas Thrainer
      "OLD_PRIMARY": source_node,
142 87e25be1 Thomas Thrainer
      "NEW_PRIMARY": target_node,
143 87e25be1 Thomas Thrainer
      }
144 87e25be1 Thomas Thrainer
145 87e25be1 Thomas Thrainer
    if instance.disk_template in constants.DTS_INT_MIRROR:
146 87e25be1 Thomas Thrainer
      env["OLD_SECONDARY"] = instance.secondary_nodes[0]
147 87e25be1 Thomas Thrainer
      env["NEW_SECONDARY"] = source_node
148 87e25be1 Thomas Thrainer
    else:
149 87e25be1 Thomas Thrainer
      env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
150 87e25be1 Thomas Thrainer
151 87e25be1 Thomas Thrainer
    env.update(_BuildInstanceHookEnvByObject(self, instance))
152 87e25be1 Thomas Thrainer
153 87e25be1 Thomas Thrainer
    return env
154 87e25be1 Thomas Thrainer
155 87e25be1 Thomas Thrainer
  def BuildHooksNodes(self):
156 87e25be1 Thomas Thrainer
    """Build hooks nodes.
157 87e25be1 Thomas Thrainer

158 87e25be1 Thomas Thrainer
    """
159 87e25be1 Thomas Thrainer
    instance = self._migrater.instance
160 87e25be1 Thomas Thrainer
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
161 87e25be1 Thomas Thrainer
    return (nl, nl + [instance.primary_node])
162 87e25be1 Thomas Thrainer
163 87e25be1 Thomas Thrainer
164 87e25be1 Thomas Thrainer
class LUInstanceMigrate(LogicalUnit):
165 87e25be1 Thomas Thrainer
  """Migrate an instance.
166 87e25be1 Thomas Thrainer

167 87e25be1 Thomas Thrainer
  This is migration without shutting down, compared to the failover,
168 87e25be1 Thomas Thrainer
  which is done with shutdown.
169 87e25be1 Thomas Thrainer

170 87e25be1 Thomas Thrainer
  """
171 87e25be1 Thomas Thrainer
  HPATH = "instance-migrate"
172 87e25be1 Thomas Thrainer
  HTYPE = constants.HTYPE_INSTANCE
173 87e25be1 Thomas Thrainer
  REQ_BGL = False
174 87e25be1 Thomas Thrainer
175 87e25be1 Thomas Thrainer
  def ExpandNames(self):
176 87e25be1 Thomas Thrainer
    self._ExpandAndLockInstance()
177 87e25be1 Thomas Thrainer
    _ExpandNamesForMigration(self)
178 87e25be1 Thomas Thrainer
179 87e25be1 Thomas Thrainer
    self._migrater = \
180 87e25be1 Thomas Thrainer
      TLMigrateInstance(self, self.op.instance_name, self.op.cleanup,
181 87e25be1 Thomas Thrainer
                        False, self.op.allow_failover, False,
182 87e25be1 Thomas Thrainer
                        self.op.allow_runtime_changes,
183 87e25be1 Thomas Thrainer
                        constants.DEFAULT_SHUTDOWN_TIMEOUT,
184 87e25be1 Thomas Thrainer
                        self.op.ignore_ipolicy)
185 87e25be1 Thomas Thrainer
186 87e25be1 Thomas Thrainer
    self.tasklets = [self._migrater]
187 87e25be1 Thomas Thrainer
188 87e25be1 Thomas Thrainer
  def DeclareLocks(self, level):
189 87e25be1 Thomas Thrainer
    _DeclareLocksForMigration(self, level)
190 87e25be1 Thomas Thrainer
191 87e25be1 Thomas Thrainer
  def BuildHooksEnv(self):
192 87e25be1 Thomas Thrainer
    """Build hooks env.
193 87e25be1 Thomas Thrainer

194 87e25be1 Thomas Thrainer
    This runs on master, primary and secondary nodes of the instance.
195 87e25be1 Thomas Thrainer

196 87e25be1 Thomas Thrainer
    """
197 87e25be1 Thomas Thrainer
    instance = self._migrater.instance
198 87e25be1 Thomas Thrainer
    source_node = instance.primary_node
199 87e25be1 Thomas Thrainer
    target_node = self.op.target_node
200 87e25be1 Thomas Thrainer
    env = _BuildInstanceHookEnvByObject(self, instance)
201 87e25be1 Thomas Thrainer
    env.update({
202 87e25be1 Thomas Thrainer
      "MIGRATE_LIVE": self._migrater.live,
203 87e25be1 Thomas Thrainer
      "MIGRATE_CLEANUP": self.op.cleanup,
204 87e25be1 Thomas Thrainer
      "OLD_PRIMARY": source_node,
205 87e25be1 Thomas Thrainer
      "NEW_PRIMARY": target_node,
206 87e25be1 Thomas Thrainer
      "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
207 87e25be1 Thomas Thrainer
      })
208 87e25be1 Thomas Thrainer
209 87e25be1 Thomas Thrainer
    if instance.disk_template in constants.DTS_INT_MIRROR:
210 87e25be1 Thomas Thrainer
      env["OLD_SECONDARY"] = target_node
211 87e25be1 Thomas Thrainer
      env["NEW_SECONDARY"] = source_node
212 87e25be1 Thomas Thrainer
    else:
213 87e25be1 Thomas Thrainer
      env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
214 87e25be1 Thomas Thrainer
215 87e25be1 Thomas Thrainer
    return env
216 87e25be1 Thomas Thrainer
217 87e25be1 Thomas Thrainer
  def BuildHooksNodes(self):
218 87e25be1 Thomas Thrainer
    """Build hooks nodes.
219 87e25be1 Thomas Thrainer

220 87e25be1 Thomas Thrainer
    """
221 87e25be1 Thomas Thrainer
    instance = self._migrater.instance
222 87e25be1 Thomas Thrainer
    snodes = list(instance.secondary_nodes)
223 87e25be1 Thomas Thrainer
    nl = [self.cfg.GetMasterNode(), instance.primary_node] + snodes
224 87e25be1 Thomas Thrainer
    return (nl, nl)
225 87e25be1 Thomas Thrainer
226 87e25be1 Thomas Thrainer
227 87e25be1 Thomas Thrainer
class TLMigrateInstance(Tasklet):
228 87e25be1 Thomas Thrainer
  """Tasklet class for instance migration.
229 87e25be1 Thomas Thrainer

230 87e25be1 Thomas Thrainer
  @type live: boolean
231 87e25be1 Thomas Thrainer
  @ivar live: whether the migration will be done live or non-live;
232 87e25be1 Thomas Thrainer
      this variable is initalized only after CheckPrereq has run
233 87e25be1 Thomas Thrainer
  @type cleanup: boolean
234 87e25be1 Thomas Thrainer
  @ivar cleanup: Wheater we cleanup from a failed migration
235 87e25be1 Thomas Thrainer
  @type iallocator: string
236 87e25be1 Thomas Thrainer
  @ivar iallocator: The iallocator used to determine target_node
237 87e25be1 Thomas Thrainer
  @type target_node: string
238 87e25be1 Thomas Thrainer
  @ivar target_node: If given, the target_node to reallocate the instance to
239 87e25be1 Thomas Thrainer
  @type failover: boolean
240 87e25be1 Thomas Thrainer
  @ivar failover: Whether operation results in failover or migration
241 87e25be1 Thomas Thrainer
  @type fallback: boolean
242 87e25be1 Thomas Thrainer
  @ivar fallback: Whether fallback to failover is allowed if migration not
243 87e25be1 Thomas Thrainer
                  possible
244 87e25be1 Thomas Thrainer
  @type ignore_consistency: boolean
245 87e25be1 Thomas Thrainer
  @ivar ignore_consistency: Wheter we should ignore consistency between source
246 87e25be1 Thomas Thrainer
                            and target node
247 87e25be1 Thomas Thrainer
  @type shutdown_timeout: int
248 87e25be1 Thomas Thrainer
  @ivar shutdown_timeout: In case of failover timeout of the shutdown
249 87e25be1 Thomas Thrainer
  @type ignore_ipolicy: bool
250 87e25be1 Thomas Thrainer
  @ivar ignore_ipolicy: If true, we can ignore instance policy when migrating
251 87e25be1 Thomas Thrainer

252 87e25be1 Thomas Thrainer
  """
253 87e25be1 Thomas Thrainer
254 87e25be1 Thomas Thrainer
  # Constants
255 87e25be1 Thomas Thrainer
  _MIGRATION_POLL_INTERVAL = 1      # seconds
256 87e25be1 Thomas Thrainer
  _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
257 87e25be1 Thomas Thrainer
258 87e25be1 Thomas Thrainer
  def __init__(self, lu, instance_name, cleanup, failover, fallback,
259 87e25be1 Thomas Thrainer
               ignore_consistency, allow_runtime_changes, shutdown_timeout,
260 87e25be1 Thomas Thrainer
               ignore_ipolicy):
261 87e25be1 Thomas Thrainer
    """Initializes this class.
262 87e25be1 Thomas Thrainer

263 87e25be1 Thomas Thrainer
    """
264 87e25be1 Thomas Thrainer
    Tasklet.__init__(self, lu)
265 87e25be1 Thomas Thrainer
266 87e25be1 Thomas Thrainer
    # Parameters
267 87e25be1 Thomas Thrainer
    self.instance_name = instance_name
268 87e25be1 Thomas Thrainer
    self.cleanup = cleanup
269 87e25be1 Thomas Thrainer
    self.live = False # will be overridden later
270 87e25be1 Thomas Thrainer
    self.failover = failover
271 87e25be1 Thomas Thrainer
    self.fallback = fallback
272 87e25be1 Thomas Thrainer
    self.ignore_consistency = ignore_consistency
273 87e25be1 Thomas Thrainer
    self.shutdown_timeout = shutdown_timeout
274 87e25be1 Thomas Thrainer
    self.ignore_ipolicy = ignore_ipolicy
275 87e25be1 Thomas Thrainer
    self.allow_runtime_changes = allow_runtime_changes
276 87e25be1 Thomas Thrainer
277 87e25be1 Thomas Thrainer
  def CheckPrereq(self):
278 87e25be1 Thomas Thrainer
    """Check prerequisites.
279 87e25be1 Thomas Thrainer

280 87e25be1 Thomas Thrainer
    This checks that the instance is in the cluster.
281 87e25be1 Thomas Thrainer

282 87e25be1 Thomas Thrainer
    """
283 87e25be1 Thomas Thrainer
    instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
284 87e25be1 Thomas Thrainer
    instance = self.cfg.GetInstanceInfo(instance_name)
285 87e25be1 Thomas Thrainer
    assert instance is not None
286 87e25be1 Thomas Thrainer
    self.instance = instance
287 87e25be1 Thomas Thrainer
    cluster = self.cfg.GetClusterInfo()
288 87e25be1 Thomas Thrainer
289 87e25be1 Thomas Thrainer
    if (not self.cleanup and
290 87e25be1 Thomas Thrainer
        not instance.admin_state == constants.ADMINST_UP and
291 87e25be1 Thomas Thrainer
        not self.failover and self.fallback):
292 87e25be1 Thomas Thrainer
      self.lu.LogInfo("Instance is marked down or offline, fallback allowed,"
293 87e25be1 Thomas Thrainer
                      " switching to failover")
294 87e25be1 Thomas Thrainer
      self.failover = True
295 87e25be1 Thomas Thrainer
296 87e25be1 Thomas Thrainer
    if instance.disk_template not in constants.DTS_MIRRORED:
297 87e25be1 Thomas Thrainer
      if self.failover:
298 87e25be1 Thomas Thrainer
        text = "failovers"
299 87e25be1 Thomas Thrainer
      else:
300 87e25be1 Thomas Thrainer
        text = "migrations"
301 87e25be1 Thomas Thrainer
      raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
302 87e25be1 Thomas Thrainer
                                 " %s" % (instance.disk_template, text),
303 87e25be1 Thomas Thrainer
                                 errors.ECODE_STATE)
304 87e25be1 Thomas Thrainer
305 87e25be1 Thomas Thrainer
    if instance.disk_template in constants.DTS_EXT_MIRROR:
306 87e25be1 Thomas Thrainer
      _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
307 87e25be1 Thomas Thrainer
308 87e25be1 Thomas Thrainer
      if self.lu.op.iallocator:
309 87e25be1 Thomas Thrainer
        assert locking.NAL in self.lu.owned_locks(locking.LEVEL_NODE_ALLOC)
310 87e25be1 Thomas Thrainer
        self._RunAllocator()
311 87e25be1 Thomas Thrainer
      else:
312 87e25be1 Thomas Thrainer
        # We set set self.target_node as it is required by
313 87e25be1 Thomas Thrainer
        # BuildHooksEnv
314 87e25be1 Thomas Thrainer
        self.target_node = self.lu.op.target_node
315 87e25be1 Thomas Thrainer
316 87e25be1 Thomas Thrainer
      # Check that the target node is correct in terms of instance policy
317 87e25be1 Thomas Thrainer
      nodeinfo = self.cfg.GetNodeInfo(self.target_node)
318 87e25be1 Thomas Thrainer
      group_info = self.cfg.GetNodeGroup(nodeinfo.group)
319 87e25be1 Thomas Thrainer
      ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
320 87e25be1 Thomas Thrainer
                                                              group_info)
321 87e25be1 Thomas Thrainer
      _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo, self.cfg,
322 87e25be1 Thomas Thrainer
                              ignore=self.ignore_ipolicy)
323 87e25be1 Thomas Thrainer
324 87e25be1 Thomas Thrainer
      # self.target_node is already populated, either directly or by the
325 87e25be1 Thomas Thrainer
      # iallocator run
326 87e25be1 Thomas Thrainer
      target_node = self.target_node
327 87e25be1 Thomas Thrainer
      if self.target_node == instance.primary_node:
328 87e25be1 Thomas Thrainer
        raise errors.OpPrereqError("Cannot migrate instance %s"
329 87e25be1 Thomas Thrainer
                                   " to its primary (%s)" %
330 87e25be1 Thomas Thrainer
                                   (instance.name, instance.primary_node),
331 87e25be1 Thomas Thrainer
                                   errors.ECODE_STATE)
332 87e25be1 Thomas Thrainer
333 87e25be1 Thomas Thrainer
      if len(self.lu.tasklets) == 1:
334 87e25be1 Thomas Thrainer
        # It is safe to release locks only when we're the only tasklet
335 87e25be1 Thomas Thrainer
        # in the LU
336 87e25be1 Thomas Thrainer
        _ReleaseLocks(self.lu, locking.LEVEL_NODE,
337 87e25be1 Thomas Thrainer
                      keep=[instance.primary_node, self.target_node])
338 87e25be1 Thomas Thrainer
        _ReleaseLocks(self.lu, locking.LEVEL_NODE_ALLOC)
339 87e25be1 Thomas Thrainer
340 87e25be1 Thomas Thrainer
    else:
341 87e25be1 Thomas Thrainer
      assert not self.lu.glm.is_owned(locking.LEVEL_NODE_ALLOC)
342 87e25be1 Thomas Thrainer
343 87e25be1 Thomas Thrainer
      secondary_nodes = instance.secondary_nodes
344 87e25be1 Thomas Thrainer
      if not secondary_nodes:
345 87e25be1 Thomas Thrainer
        raise errors.ConfigurationError("No secondary node but using"
346 87e25be1 Thomas Thrainer
                                        " %s disk template" %
347 87e25be1 Thomas Thrainer
                                        instance.disk_template)
348 87e25be1 Thomas Thrainer
      target_node = secondary_nodes[0]
349 87e25be1 Thomas Thrainer
      if self.lu.op.iallocator or (self.lu.op.target_node and
350 87e25be1 Thomas Thrainer
                                   self.lu.op.target_node != target_node):
351 87e25be1 Thomas Thrainer
        if self.failover:
352 87e25be1 Thomas Thrainer
          text = "failed over"
353 87e25be1 Thomas Thrainer
        else:
354 87e25be1 Thomas Thrainer
          text = "migrated"
355 87e25be1 Thomas Thrainer
        raise errors.OpPrereqError("Instances with disk template %s cannot"
356 87e25be1 Thomas Thrainer
                                   " be %s to arbitrary nodes"
357 87e25be1 Thomas Thrainer
                                   " (neither an iallocator nor a target"
358 87e25be1 Thomas Thrainer
                                   " node can be passed)" %
359 87e25be1 Thomas Thrainer
                                   (instance.disk_template, text),
360 87e25be1 Thomas Thrainer
                                   errors.ECODE_INVAL)
361 87e25be1 Thomas Thrainer
      nodeinfo = self.cfg.GetNodeInfo(target_node)
362 87e25be1 Thomas Thrainer
      group_info = self.cfg.GetNodeGroup(nodeinfo.group)
363 87e25be1 Thomas Thrainer
      ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
364 87e25be1 Thomas Thrainer
                                                              group_info)
365 87e25be1 Thomas Thrainer
      _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo, self.cfg,
366 87e25be1 Thomas Thrainer
                              ignore=self.ignore_ipolicy)
367 87e25be1 Thomas Thrainer
368 87e25be1 Thomas Thrainer
    i_be = cluster.FillBE(instance)
369 87e25be1 Thomas Thrainer
370 87e25be1 Thomas Thrainer
    # check memory requirements on the secondary node
371 87e25be1 Thomas Thrainer
    if (not self.cleanup and
372 87e25be1 Thomas Thrainer
         (not self.failover or instance.admin_state == constants.ADMINST_UP)):
373 87e25be1 Thomas Thrainer
      self.tgt_free_mem = _CheckNodeFreeMemory(self.lu, target_node,
374 87e25be1 Thomas Thrainer
                                               "migrating instance %s" %
375 87e25be1 Thomas Thrainer
                                               instance.name,
376 87e25be1 Thomas Thrainer
                                               i_be[constants.BE_MINMEM],
377 87e25be1 Thomas Thrainer
                                               instance.hypervisor)
378 87e25be1 Thomas Thrainer
    else:
379 87e25be1 Thomas Thrainer
      self.lu.LogInfo("Not checking memory on the secondary node as"
380 87e25be1 Thomas Thrainer
                      " instance will not be started")
381 87e25be1 Thomas Thrainer
382 87e25be1 Thomas Thrainer
    # check if failover must be forced instead of migration
383 87e25be1 Thomas Thrainer
    if (not self.cleanup and not self.failover and
384 87e25be1 Thomas Thrainer
        i_be[constants.BE_ALWAYS_FAILOVER]):
385 87e25be1 Thomas Thrainer
      self.lu.LogInfo("Instance configured to always failover; fallback"
386 87e25be1 Thomas Thrainer
                      " to failover")
387 87e25be1 Thomas Thrainer
      self.failover = True
388 87e25be1 Thomas Thrainer
389 87e25be1 Thomas Thrainer
    # check bridge existance
390 87e25be1 Thomas Thrainer
    _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
391 87e25be1 Thomas Thrainer
392 87e25be1 Thomas Thrainer
    if not self.cleanup:
393 87e25be1 Thomas Thrainer
      _CheckNodeNotDrained(self.lu, target_node)
394 87e25be1 Thomas Thrainer
      if not self.failover:
395 87e25be1 Thomas Thrainer
        result = self.rpc.call_instance_migratable(instance.primary_node,
396 87e25be1 Thomas Thrainer
                                                   instance)
397 87e25be1 Thomas Thrainer
        if result.fail_msg and self.fallback:
398 87e25be1 Thomas Thrainer
          self.lu.LogInfo("Can't migrate, instance offline, fallback to"
399 87e25be1 Thomas Thrainer
                          " failover")
400 87e25be1 Thomas Thrainer
          self.failover = True
401 87e25be1 Thomas Thrainer
        else:
402 87e25be1 Thomas Thrainer
          result.Raise("Can't migrate, please use failover",
403 87e25be1 Thomas Thrainer
                       prereq=True, ecode=errors.ECODE_STATE)
404 87e25be1 Thomas Thrainer
405 87e25be1 Thomas Thrainer
    assert not (self.failover and self.cleanup)
406 87e25be1 Thomas Thrainer
407 87e25be1 Thomas Thrainer
    if not self.failover:
408 87e25be1 Thomas Thrainer
      if self.lu.op.live is not None and self.lu.op.mode is not None:
409 87e25be1 Thomas Thrainer
        raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
410 87e25be1 Thomas Thrainer
                                   " parameters are accepted",
411 87e25be1 Thomas Thrainer
                                   errors.ECODE_INVAL)
412 87e25be1 Thomas Thrainer
      if self.lu.op.live is not None:
413 87e25be1 Thomas Thrainer
        if self.lu.op.live:
414 87e25be1 Thomas Thrainer
          self.lu.op.mode = constants.HT_MIGRATION_LIVE
415 87e25be1 Thomas Thrainer
        else:
416 87e25be1 Thomas Thrainer
          self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
417 87e25be1 Thomas Thrainer
        # reset the 'live' parameter to None so that repeated
418 87e25be1 Thomas Thrainer
        # invocations of CheckPrereq do not raise an exception
419 87e25be1 Thomas Thrainer
        self.lu.op.live = None
420 87e25be1 Thomas Thrainer
      elif self.lu.op.mode is None:
421 87e25be1 Thomas Thrainer
        # read the default value from the hypervisor
422 87e25be1 Thomas Thrainer
        i_hv = cluster.FillHV(self.instance, skip_globals=False)
423 87e25be1 Thomas Thrainer
        self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
424 87e25be1 Thomas Thrainer
425 87e25be1 Thomas Thrainer
      self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
426 87e25be1 Thomas Thrainer
    else:
427 87e25be1 Thomas Thrainer
      # Failover is never live
428 87e25be1 Thomas Thrainer
      self.live = False
429 87e25be1 Thomas Thrainer
430 87e25be1 Thomas Thrainer
    if not (self.failover or self.cleanup):
431 87e25be1 Thomas Thrainer
      remote_info = self.rpc.call_instance_info(instance.primary_node,
432 87e25be1 Thomas Thrainer
                                                instance.name,
433 87e25be1 Thomas Thrainer
                                                instance.hypervisor)
434 87e25be1 Thomas Thrainer
      remote_info.Raise("Error checking instance on node %s" %
435 87e25be1 Thomas Thrainer
                        instance.primary_node)
436 87e25be1 Thomas Thrainer
      instance_running = bool(remote_info.payload)
437 87e25be1 Thomas Thrainer
      if instance_running:
438 87e25be1 Thomas Thrainer
        self.current_mem = int(remote_info.payload["memory"])
439 87e25be1 Thomas Thrainer
440 87e25be1 Thomas Thrainer
  def _RunAllocator(self):
441 87e25be1 Thomas Thrainer
    """Run the allocator based on input opcode.
442 87e25be1 Thomas Thrainer

443 87e25be1 Thomas Thrainer
    """
444 87e25be1 Thomas Thrainer
    assert locking.NAL in self.lu.owned_locks(locking.LEVEL_NODE_ALLOC)
445 87e25be1 Thomas Thrainer
446 87e25be1 Thomas Thrainer
    # FIXME: add a self.ignore_ipolicy option
447 87e25be1 Thomas Thrainer
    req = iallocator.IAReqRelocate(name=self.instance_name,
448 87e25be1 Thomas Thrainer
                                   relocate_from=[self.instance.primary_node])
449 87e25be1 Thomas Thrainer
    ial = iallocator.IAllocator(self.cfg, self.rpc, req)
450 87e25be1 Thomas Thrainer
451 87e25be1 Thomas Thrainer
    ial.Run(self.lu.op.iallocator)
452 87e25be1 Thomas Thrainer
453 87e25be1 Thomas Thrainer
    if not ial.success:
454 87e25be1 Thomas Thrainer
      raise errors.OpPrereqError("Can't compute nodes using"
455 87e25be1 Thomas Thrainer
                                 " iallocator '%s': %s" %
456 87e25be1 Thomas Thrainer
                                 (self.lu.op.iallocator, ial.info),
457 87e25be1 Thomas Thrainer
                                 errors.ECODE_NORES)
458 87e25be1 Thomas Thrainer
    self.target_node = ial.result[0]
459 87e25be1 Thomas Thrainer
    self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
460 87e25be1 Thomas Thrainer
                    self.instance_name, self.lu.op.iallocator,
461 87e25be1 Thomas Thrainer
                    utils.CommaJoin(ial.result))
462 87e25be1 Thomas Thrainer
463 87e25be1 Thomas Thrainer
  def _WaitUntilSync(self):
464 87e25be1 Thomas Thrainer
    """Poll with custom rpc for disk sync.
465 87e25be1 Thomas Thrainer

466 87e25be1 Thomas Thrainer
    This uses our own step-based rpc call.
467 87e25be1 Thomas Thrainer

468 87e25be1 Thomas Thrainer
    """
469 87e25be1 Thomas Thrainer
    self.feedback_fn("* wait until resync is done")
470 87e25be1 Thomas Thrainer
    all_done = False
471 87e25be1 Thomas Thrainer
    while not all_done:
472 87e25be1 Thomas Thrainer
      all_done = True
473 87e25be1 Thomas Thrainer
      result = self.rpc.call_drbd_wait_sync(self.all_nodes,
474 87e25be1 Thomas Thrainer
                                            self.nodes_ip,
475 87e25be1 Thomas Thrainer
                                            (self.instance.disks,
476 87e25be1 Thomas Thrainer
                                             self.instance))
477 87e25be1 Thomas Thrainer
      min_percent = 100
478 87e25be1 Thomas Thrainer
      for node, nres in result.items():
479 87e25be1 Thomas Thrainer
        nres.Raise("Cannot resync disks on node %s" % node)
480 87e25be1 Thomas Thrainer
        node_done, node_percent = nres.payload
481 87e25be1 Thomas Thrainer
        all_done = all_done and node_done
482 87e25be1 Thomas Thrainer
        if node_percent is not None:
483 87e25be1 Thomas Thrainer
          min_percent = min(min_percent, node_percent)
484 87e25be1 Thomas Thrainer
      if not all_done:
485 87e25be1 Thomas Thrainer
        if min_percent < 100:
486 87e25be1 Thomas Thrainer
          self.feedback_fn("   - progress: %.1f%%" % min_percent)
487 87e25be1 Thomas Thrainer
        time.sleep(2)
488 87e25be1 Thomas Thrainer
489 87e25be1 Thomas Thrainer
  def _EnsureSecondary(self, node):
490 87e25be1 Thomas Thrainer
    """Demote a node to secondary.
491 87e25be1 Thomas Thrainer

492 87e25be1 Thomas Thrainer
    """
493 87e25be1 Thomas Thrainer
    self.feedback_fn("* switching node %s to secondary mode" % node)
494 87e25be1 Thomas Thrainer
495 87e25be1 Thomas Thrainer
    for dev in self.instance.disks:
496 87e25be1 Thomas Thrainer
      self.cfg.SetDiskID(dev, node)
497 87e25be1 Thomas Thrainer
498 87e25be1 Thomas Thrainer
    result = self.rpc.call_blockdev_close(node, self.instance.name,
499 87e25be1 Thomas Thrainer
                                          self.instance.disks)
500 87e25be1 Thomas Thrainer
    result.Raise("Cannot change disk to secondary on node %s" % node)
501 87e25be1 Thomas Thrainer
502 87e25be1 Thomas Thrainer
  def _GoStandalone(self):
503 87e25be1 Thomas Thrainer
    """Disconnect from the network.
504 87e25be1 Thomas Thrainer

505 87e25be1 Thomas Thrainer
    """
506 87e25be1 Thomas Thrainer
    self.feedback_fn("* changing into standalone mode")
507 87e25be1 Thomas Thrainer
    result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
508 87e25be1 Thomas Thrainer
                                               self.instance.disks)
509 87e25be1 Thomas Thrainer
    for node, nres in result.items():
510 87e25be1 Thomas Thrainer
      nres.Raise("Cannot disconnect disks node %s" % node)
511 87e25be1 Thomas Thrainer
512 87e25be1 Thomas Thrainer
  def _GoReconnect(self, multimaster):
513 87e25be1 Thomas Thrainer
    """Reconnect to the network.
514 87e25be1 Thomas Thrainer

515 87e25be1 Thomas Thrainer
    """
516 87e25be1 Thomas Thrainer
    if multimaster:
517 87e25be1 Thomas Thrainer
      msg = "dual-master"
518 87e25be1 Thomas Thrainer
    else:
519 87e25be1 Thomas Thrainer
      msg = "single-master"
520 87e25be1 Thomas Thrainer
    self.feedback_fn("* changing disks into %s mode" % msg)
521 87e25be1 Thomas Thrainer
    result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
522 87e25be1 Thomas Thrainer
                                           (self.instance.disks, self.instance),
523 87e25be1 Thomas Thrainer
                                           self.instance.name, multimaster)
524 87e25be1 Thomas Thrainer
    for node, nres in result.items():
525 87e25be1 Thomas Thrainer
      nres.Raise("Cannot change disks config on node %s" % node)
526 87e25be1 Thomas Thrainer
527 87e25be1 Thomas Thrainer
  def _ExecCleanup(self):
528 87e25be1 Thomas Thrainer
    """Try to cleanup after a failed migration.
529 87e25be1 Thomas Thrainer

530 87e25be1 Thomas Thrainer
    The cleanup is done by:
531 87e25be1 Thomas Thrainer
      - check that the instance is running only on one node
532 87e25be1 Thomas Thrainer
        (and update the config if needed)
533 87e25be1 Thomas Thrainer
      - change disks on its secondary node to secondary
534 87e25be1 Thomas Thrainer
      - wait until disks are fully synchronized
535 87e25be1 Thomas Thrainer
      - disconnect from the network
536 87e25be1 Thomas Thrainer
      - change disks into single-master mode
537 87e25be1 Thomas Thrainer
      - wait again until disks are fully synchronized
538 87e25be1 Thomas Thrainer

539 87e25be1 Thomas Thrainer
    """
540 87e25be1 Thomas Thrainer
    instance = self.instance
541 87e25be1 Thomas Thrainer
    target_node = self.target_node
542 87e25be1 Thomas Thrainer
    source_node = self.source_node
543 87e25be1 Thomas Thrainer
544 87e25be1 Thomas Thrainer
    # check running on only one node
545 87e25be1 Thomas Thrainer
    self.feedback_fn("* checking where the instance actually runs"
546 87e25be1 Thomas Thrainer
                     " (if this hangs, the hypervisor might be in"
547 87e25be1 Thomas Thrainer
                     " a bad state)")
548 87e25be1 Thomas Thrainer
    ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
549 87e25be1 Thomas Thrainer
    for node, result in ins_l.items():
550 87e25be1 Thomas Thrainer
      result.Raise("Can't contact node %s" % node)
551 87e25be1 Thomas Thrainer
552 87e25be1 Thomas Thrainer
    runningon_source = instance.name in ins_l[source_node].payload
553 87e25be1 Thomas Thrainer
    runningon_target = instance.name in ins_l[target_node].payload
554 87e25be1 Thomas Thrainer
555 87e25be1 Thomas Thrainer
    if runningon_source and runningon_target:
556 87e25be1 Thomas Thrainer
      raise errors.OpExecError("Instance seems to be running on two nodes,"
557 87e25be1 Thomas Thrainer
                               " or the hypervisor is confused; you will have"
558 87e25be1 Thomas Thrainer
                               " to ensure manually that it runs only on one"
559 87e25be1 Thomas Thrainer
                               " and restart this operation")
560 87e25be1 Thomas Thrainer
561 87e25be1 Thomas Thrainer
    if not (runningon_source or runningon_target):
562 87e25be1 Thomas Thrainer
      raise errors.OpExecError("Instance does not seem to be running at all;"
563 87e25be1 Thomas Thrainer
                               " in this case it's safer to repair by"
564 87e25be1 Thomas Thrainer
                               " running 'gnt-instance stop' to ensure disk"
565 87e25be1 Thomas Thrainer
                               " shutdown, and then restarting it")
566 87e25be1 Thomas Thrainer
567 87e25be1 Thomas Thrainer
    if runningon_target:
568 87e25be1 Thomas Thrainer
      # the migration has actually succeeded, we need to update the config
569 87e25be1 Thomas Thrainer
      self.feedback_fn("* instance running on secondary node (%s),"
570 87e25be1 Thomas Thrainer
                       " updating config" % target_node)
571 87e25be1 Thomas Thrainer
      instance.primary_node = target_node
572 87e25be1 Thomas Thrainer
      self.cfg.Update(instance, self.feedback_fn)
573 87e25be1 Thomas Thrainer
      demoted_node = source_node
574 87e25be1 Thomas Thrainer
    else:
575 87e25be1 Thomas Thrainer
      self.feedback_fn("* instance confirmed to be running on its"
576 87e25be1 Thomas Thrainer
                       " primary node (%s)" % source_node)
577 87e25be1 Thomas Thrainer
      demoted_node = target_node
578 87e25be1 Thomas Thrainer
579 87e25be1 Thomas Thrainer
    if instance.disk_template in constants.DTS_INT_MIRROR:
580 87e25be1 Thomas Thrainer
      self._EnsureSecondary(demoted_node)
581 87e25be1 Thomas Thrainer
      try:
582 87e25be1 Thomas Thrainer
        self._WaitUntilSync()
583 87e25be1 Thomas Thrainer
      except errors.OpExecError:
584 87e25be1 Thomas Thrainer
        # we ignore here errors, since if the device is standalone, it
585 87e25be1 Thomas Thrainer
        # won't be able to sync
586 87e25be1 Thomas Thrainer
        pass
587 87e25be1 Thomas Thrainer
      self._GoStandalone()
588 87e25be1 Thomas Thrainer
      self._GoReconnect(False)
589 87e25be1 Thomas Thrainer
      self._WaitUntilSync()
590 87e25be1 Thomas Thrainer
591 87e25be1 Thomas Thrainer
    self.feedback_fn("* done")
592 87e25be1 Thomas Thrainer
593 87e25be1 Thomas Thrainer
  def _RevertDiskStatus(self):
594 87e25be1 Thomas Thrainer
    """Try to revert the disk status after a failed migration.
595 87e25be1 Thomas Thrainer

596 87e25be1 Thomas Thrainer
    """
597 87e25be1 Thomas Thrainer
    target_node = self.target_node
598 87e25be1 Thomas Thrainer
    if self.instance.disk_template in constants.DTS_EXT_MIRROR:
599 87e25be1 Thomas Thrainer
      return
600 87e25be1 Thomas Thrainer
601 87e25be1 Thomas Thrainer
    try:
602 87e25be1 Thomas Thrainer
      self._EnsureSecondary(target_node)
603 87e25be1 Thomas Thrainer
      self._GoStandalone()
604 87e25be1 Thomas Thrainer
      self._GoReconnect(False)
605 87e25be1 Thomas Thrainer
      self._WaitUntilSync()
606 87e25be1 Thomas Thrainer
    except errors.OpExecError, err:
607 87e25be1 Thomas Thrainer
      self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
608 87e25be1 Thomas Thrainer
                         " please try to recover the instance manually;"
609 87e25be1 Thomas Thrainer
                         " error '%s'" % str(err))
610 87e25be1 Thomas Thrainer
611 87e25be1 Thomas Thrainer
  def _AbortMigration(self):
612 87e25be1 Thomas Thrainer
    """Call the hypervisor code to abort a started migration.
613 87e25be1 Thomas Thrainer

614 87e25be1 Thomas Thrainer
    """
615 87e25be1 Thomas Thrainer
    instance = self.instance
616 87e25be1 Thomas Thrainer
    target_node = self.target_node
617 87e25be1 Thomas Thrainer
    source_node = self.source_node
618 87e25be1 Thomas Thrainer
    migration_info = self.migration_info
619 87e25be1 Thomas Thrainer
620 87e25be1 Thomas Thrainer
    abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
621 87e25be1 Thomas Thrainer
                                                                 instance,
622 87e25be1 Thomas Thrainer
                                                                 migration_info,
623 87e25be1 Thomas Thrainer
                                                                 False)
624 87e25be1 Thomas Thrainer
    abort_msg = abort_result.fail_msg
625 87e25be1 Thomas Thrainer
    if abort_msg:
626 87e25be1 Thomas Thrainer
      logging.error("Aborting migration failed on target node %s: %s",
627 87e25be1 Thomas Thrainer
                    target_node, abort_msg)
628 87e25be1 Thomas Thrainer
      # Don't raise an exception here, as we stil have to try to revert the
629 87e25be1 Thomas Thrainer
      # disk status, even if this step failed.
630 87e25be1 Thomas Thrainer
631 87e25be1 Thomas Thrainer
    abort_result = self.rpc.call_instance_finalize_migration_src(
632 87e25be1 Thomas Thrainer
      source_node, instance, False, self.live)
633 87e25be1 Thomas Thrainer
    abort_msg = abort_result.fail_msg
634 87e25be1 Thomas Thrainer
    if abort_msg:
635 87e25be1 Thomas Thrainer
      logging.error("Aborting migration failed on source node %s: %s",
636 87e25be1 Thomas Thrainer
                    source_node, abort_msg)
637 87e25be1 Thomas Thrainer
638 87e25be1 Thomas Thrainer
  def _ExecMigration(self):
639 87e25be1 Thomas Thrainer
    """Migrate an instance.
640 87e25be1 Thomas Thrainer

641 87e25be1 Thomas Thrainer
    The migrate is done by:
642 87e25be1 Thomas Thrainer
      - change the disks into dual-master mode
643 87e25be1 Thomas Thrainer
      - wait until disks are fully synchronized again
644 87e25be1 Thomas Thrainer
      - migrate the instance
645 87e25be1 Thomas Thrainer
      - change disks on the new secondary node (the old primary) to secondary
646 87e25be1 Thomas Thrainer
      - wait until disks are fully synchronized
647 87e25be1 Thomas Thrainer
      - change disks into single-master mode
648 87e25be1 Thomas Thrainer

649 87e25be1 Thomas Thrainer
    """
650 87e25be1 Thomas Thrainer
    instance = self.instance
651 87e25be1 Thomas Thrainer
    target_node = self.target_node
652 87e25be1 Thomas Thrainer
    source_node = self.source_node
653 87e25be1 Thomas Thrainer
654 87e25be1 Thomas Thrainer
    # Check for hypervisor version mismatch and warn the user.
655 87e25be1 Thomas Thrainer
    nodeinfo = self.rpc.call_node_info([source_node, target_node],
656 87e25be1 Thomas Thrainer
                                       None, [self.instance.hypervisor], False)
657 87e25be1 Thomas Thrainer
    for ninfo in nodeinfo.values():
658 87e25be1 Thomas Thrainer
      ninfo.Raise("Unable to retrieve node information from node '%s'" %
659 87e25be1 Thomas Thrainer
                  ninfo.node)
660 87e25be1 Thomas Thrainer
    (_, _, (src_info, )) = nodeinfo[source_node].payload
661 87e25be1 Thomas Thrainer
    (_, _, (dst_info, )) = nodeinfo[target_node].payload
662 87e25be1 Thomas Thrainer
663 87e25be1 Thomas Thrainer
    if ((constants.HV_NODEINFO_KEY_VERSION in src_info) and
664 87e25be1 Thomas Thrainer
        (constants.HV_NODEINFO_KEY_VERSION in dst_info)):
665 87e25be1 Thomas Thrainer
      src_version = src_info[constants.HV_NODEINFO_KEY_VERSION]
666 87e25be1 Thomas Thrainer
      dst_version = dst_info[constants.HV_NODEINFO_KEY_VERSION]
667 87e25be1 Thomas Thrainer
      if src_version != dst_version:
668 87e25be1 Thomas Thrainer
        self.feedback_fn("* warning: hypervisor version mismatch between"
669 87e25be1 Thomas Thrainer
                         " source (%s) and target (%s) node" %
670 87e25be1 Thomas Thrainer
                         (src_version, dst_version))
671 87e25be1 Thomas Thrainer
672 87e25be1 Thomas Thrainer
    self.feedback_fn("* checking disk consistency between source and target")
673 87e25be1 Thomas Thrainer
    for (idx, dev) in enumerate(instance.disks):
674 87e25be1 Thomas Thrainer
      if not _CheckDiskConsistency(self.lu, instance, dev, target_node, False):
675 87e25be1 Thomas Thrainer
        raise errors.OpExecError("Disk %s is degraded or not fully"
676 87e25be1 Thomas Thrainer
                                 " synchronized on target node,"
677 87e25be1 Thomas Thrainer
                                 " aborting migration" % idx)
678 87e25be1 Thomas Thrainer
679 87e25be1 Thomas Thrainer
    if self.current_mem > self.tgt_free_mem:
680 87e25be1 Thomas Thrainer
      if not self.allow_runtime_changes:
681 87e25be1 Thomas Thrainer
        raise errors.OpExecError("Memory ballooning not allowed and not enough"
682 87e25be1 Thomas Thrainer
                                 " free memory to fit instance %s on target"
683 87e25be1 Thomas Thrainer
                                 " node %s (have %dMB, need %dMB)" %
684 87e25be1 Thomas Thrainer
                                 (instance.name, target_node,
685 87e25be1 Thomas Thrainer
                                  self.tgt_free_mem, self.current_mem))
686 87e25be1 Thomas Thrainer
      self.feedback_fn("* setting instance memory to %s" % self.tgt_free_mem)
687 87e25be1 Thomas Thrainer
      rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
688 87e25be1 Thomas Thrainer
                                                     instance,
689 87e25be1 Thomas Thrainer
                                                     self.tgt_free_mem)
690 87e25be1 Thomas Thrainer
      rpcres.Raise("Cannot modify instance runtime memory")
691 87e25be1 Thomas Thrainer
692 87e25be1 Thomas Thrainer
    # First get the migration information from the remote node
693 87e25be1 Thomas Thrainer
    result = self.rpc.call_migration_info(source_node, instance)
694 87e25be1 Thomas Thrainer
    msg = result.fail_msg
695 87e25be1 Thomas Thrainer
    if msg:
696 87e25be1 Thomas Thrainer
      log_err = ("Failed fetching source migration information from %s: %s" %
697 87e25be1 Thomas Thrainer
                 (source_node, msg))
698 87e25be1 Thomas Thrainer
      logging.error(log_err)
699 87e25be1 Thomas Thrainer
      raise errors.OpExecError(log_err)
700 87e25be1 Thomas Thrainer
701 87e25be1 Thomas Thrainer
    self.migration_info = migration_info = result.payload
702 87e25be1 Thomas Thrainer
703 87e25be1 Thomas Thrainer
    if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
704 87e25be1 Thomas Thrainer
      # Then switch the disks to master/master mode
705 87e25be1 Thomas Thrainer
      self._EnsureSecondary(target_node)
706 87e25be1 Thomas Thrainer
      self._GoStandalone()
707 87e25be1 Thomas Thrainer
      self._GoReconnect(True)
708 87e25be1 Thomas Thrainer
      self._WaitUntilSync()
709 87e25be1 Thomas Thrainer
710 87e25be1 Thomas Thrainer
    self.feedback_fn("* preparing %s to accept the instance" % target_node)
711 87e25be1 Thomas Thrainer
    result = self.rpc.call_accept_instance(target_node,
712 87e25be1 Thomas Thrainer
                                           instance,
713 87e25be1 Thomas Thrainer
                                           migration_info,
714 87e25be1 Thomas Thrainer
                                           self.nodes_ip[target_node])
715 87e25be1 Thomas Thrainer
716 87e25be1 Thomas Thrainer
    msg = result.fail_msg
717 87e25be1 Thomas Thrainer
    if msg:
718 87e25be1 Thomas Thrainer
      logging.error("Instance pre-migration failed, trying to revert"
719 87e25be1 Thomas Thrainer
                    " disk status: %s", msg)
720 87e25be1 Thomas Thrainer
      self.feedback_fn("Pre-migration failed, aborting")
721 87e25be1 Thomas Thrainer
      self._AbortMigration()
722 87e25be1 Thomas Thrainer
      self._RevertDiskStatus()
723 87e25be1 Thomas Thrainer
      raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
724 87e25be1 Thomas Thrainer
                               (instance.name, msg))
725 87e25be1 Thomas Thrainer
726 87e25be1 Thomas Thrainer
    self.feedback_fn("* migrating instance to %s" % target_node)
727 87e25be1 Thomas Thrainer
    result = self.rpc.call_instance_migrate(source_node, instance,
728 87e25be1 Thomas Thrainer
                                            self.nodes_ip[target_node],
729 87e25be1 Thomas Thrainer
                                            self.live)
730 87e25be1 Thomas Thrainer
    msg = result.fail_msg
731 87e25be1 Thomas Thrainer
    if msg:
732 87e25be1 Thomas Thrainer
      logging.error("Instance migration failed, trying to revert"
733 87e25be1 Thomas Thrainer
                    " disk status: %s", msg)
734 87e25be1 Thomas Thrainer
      self.feedback_fn("Migration failed, aborting")
735 87e25be1 Thomas Thrainer
      self._AbortMigration()
736 87e25be1 Thomas Thrainer
      self._RevertDiskStatus()
737 87e25be1 Thomas Thrainer
      raise errors.OpExecError("Could not migrate instance %s: %s" %
738 87e25be1 Thomas Thrainer
                               (instance.name, msg))
739 87e25be1 Thomas Thrainer
740 87e25be1 Thomas Thrainer
    self.feedback_fn("* starting memory transfer")
741 87e25be1 Thomas Thrainer
    last_feedback = time.time()
742 87e25be1 Thomas Thrainer
    while True:
743 87e25be1 Thomas Thrainer
      result = self.rpc.call_instance_get_migration_status(source_node,
744 87e25be1 Thomas Thrainer
                                                           instance)
745 87e25be1 Thomas Thrainer
      msg = result.fail_msg
746 87e25be1 Thomas Thrainer
      ms = result.payload   # MigrationStatus instance
747 87e25be1 Thomas Thrainer
      if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
748 87e25be1 Thomas Thrainer
        logging.error("Instance migration failed, trying to revert"
749 87e25be1 Thomas Thrainer
                      " disk status: %s", msg)
750 87e25be1 Thomas Thrainer
        self.feedback_fn("Migration failed, aborting")
751 87e25be1 Thomas Thrainer
        self._AbortMigration()
752 87e25be1 Thomas Thrainer
        self._RevertDiskStatus()
753 87e25be1 Thomas Thrainer
        if not msg:
754 87e25be1 Thomas Thrainer
          msg = "hypervisor returned failure"
755 87e25be1 Thomas Thrainer
        raise errors.OpExecError("Could not migrate instance %s: %s" %
756 87e25be1 Thomas Thrainer
                                 (instance.name, msg))
757 87e25be1 Thomas Thrainer
758 87e25be1 Thomas Thrainer
      if result.payload.status != constants.HV_MIGRATION_ACTIVE:
759 87e25be1 Thomas Thrainer
        self.feedback_fn("* memory transfer complete")
760 87e25be1 Thomas Thrainer
        break
761 87e25be1 Thomas Thrainer
762 87e25be1 Thomas Thrainer
      if (utils.TimeoutExpired(last_feedback,
763 87e25be1 Thomas Thrainer
                               self._MIGRATION_FEEDBACK_INTERVAL) and
764 87e25be1 Thomas Thrainer
          ms.transferred_ram is not None):
765 87e25be1 Thomas Thrainer
        mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
766 87e25be1 Thomas Thrainer
        self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
767 87e25be1 Thomas Thrainer
        last_feedback = time.time()
768 87e25be1 Thomas Thrainer
769 87e25be1 Thomas Thrainer
      time.sleep(self._MIGRATION_POLL_INTERVAL)
770 87e25be1 Thomas Thrainer
771 87e25be1 Thomas Thrainer
    result = self.rpc.call_instance_finalize_migration_src(source_node,
772 87e25be1 Thomas Thrainer
                                                           instance,
773 87e25be1 Thomas Thrainer
                                                           True,
774 87e25be1 Thomas Thrainer
                                                           self.live)
775 87e25be1 Thomas Thrainer
    msg = result.fail_msg
776 87e25be1 Thomas Thrainer
    if msg:
777 87e25be1 Thomas Thrainer
      logging.error("Instance migration succeeded, but finalization failed"
778 87e25be1 Thomas Thrainer
                    " on the source node: %s", msg)
779 87e25be1 Thomas Thrainer
      raise errors.OpExecError("Could not finalize instance migration: %s" %
780 87e25be1 Thomas Thrainer
                               msg)
781 87e25be1 Thomas Thrainer
782 87e25be1 Thomas Thrainer
    instance.primary_node = target_node
783 87e25be1 Thomas Thrainer
784 87e25be1 Thomas Thrainer
    # distribute new instance config to the other nodes
785 87e25be1 Thomas Thrainer
    self.cfg.Update(instance, self.feedback_fn)
786 87e25be1 Thomas Thrainer
787 87e25be1 Thomas Thrainer
    result = self.rpc.call_instance_finalize_migration_dst(target_node,
788 87e25be1 Thomas Thrainer
                                                           instance,
789 87e25be1 Thomas Thrainer
                                                           migration_info,
790 87e25be1 Thomas Thrainer
                                                           True)
791 87e25be1 Thomas Thrainer
    msg = result.fail_msg
792 87e25be1 Thomas Thrainer
    if msg:
793 87e25be1 Thomas Thrainer
      logging.error("Instance migration succeeded, but finalization failed"
794 87e25be1 Thomas Thrainer
                    " on the target node: %s", msg)
795 87e25be1 Thomas Thrainer
      raise errors.OpExecError("Could not finalize instance migration: %s" %
796 87e25be1 Thomas Thrainer
                               msg)
797 87e25be1 Thomas Thrainer
798 87e25be1 Thomas Thrainer
    if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
799 87e25be1 Thomas Thrainer
      self._EnsureSecondary(source_node)
800 87e25be1 Thomas Thrainer
      self._WaitUntilSync()
801 87e25be1 Thomas Thrainer
      self._GoStandalone()
802 87e25be1 Thomas Thrainer
      self._GoReconnect(False)
803 87e25be1 Thomas Thrainer
      self._WaitUntilSync()
804 87e25be1 Thomas Thrainer
805 87e25be1 Thomas Thrainer
    # If the instance's disk template is `rbd' or `ext' and there was a
806 87e25be1 Thomas Thrainer
    # successful migration, unmap the device from the source node.
807 87e25be1 Thomas Thrainer
    if self.instance.disk_template in (constants.DT_RBD, constants.DT_EXT):
808 87e25be1 Thomas Thrainer
      disks = _ExpandCheckDisks(instance, instance.disks)
809 87e25be1 Thomas Thrainer
      self.feedback_fn("* unmapping instance's disks from %s" % source_node)
810 87e25be1 Thomas Thrainer
      for disk in disks:
811 87e25be1 Thomas Thrainer
        result = self.rpc.call_blockdev_shutdown(source_node, (disk, instance))
812 87e25be1 Thomas Thrainer
        msg = result.fail_msg
813 87e25be1 Thomas Thrainer
        if msg:
814 87e25be1 Thomas Thrainer
          logging.error("Migration was successful, but couldn't unmap the"
815 87e25be1 Thomas Thrainer
                        " block device %s on source node %s: %s",
816 87e25be1 Thomas Thrainer
                        disk.iv_name, source_node, msg)
817 87e25be1 Thomas Thrainer
          logging.error("You need to unmap the device %s manually on %s",
818 87e25be1 Thomas Thrainer
                        disk.iv_name, source_node)
819 87e25be1 Thomas Thrainer
820 87e25be1 Thomas Thrainer
    self.feedback_fn("* done")
821 87e25be1 Thomas Thrainer
822 87e25be1 Thomas Thrainer
  def _ExecFailover(self):
823 87e25be1 Thomas Thrainer
    """Failover an instance.
824 87e25be1 Thomas Thrainer

825 87e25be1 Thomas Thrainer
    The failover is done by shutting it down on its present node and
826 87e25be1 Thomas Thrainer
    starting it on the secondary.
827 87e25be1 Thomas Thrainer

828 87e25be1 Thomas Thrainer
    """
829 87e25be1 Thomas Thrainer
    instance = self.instance
830 87e25be1 Thomas Thrainer
    primary_node = self.cfg.GetNodeInfo(instance.primary_node)
831 87e25be1 Thomas Thrainer
832 87e25be1 Thomas Thrainer
    source_node = instance.primary_node
833 87e25be1 Thomas Thrainer
    target_node = self.target_node
834 87e25be1 Thomas Thrainer
835 87e25be1 Thomas Thrainer
    if instance.admin_state == constants.ADMINST_UP:
836 87e25be1 Thomas Thrainer
      self.feedback_fn("* checking disk consistency between source and target")
837 87e25be1 Thomas Thrainer
      for (idx, dev) in enumerate(instance.disks):
838 87e25be1 Thomas Thrainer
        # for drbd, these are drbd over lvm
839 87e25be1 Thomas Thrainer
        if not _CheckDiskConsistency(self.lu, instance, dev, target_node,
840 87e25be1 Thomas Thrainer
                                     False):
841 87e25be1 Thomas Thrainer
          if primary_node.offline:
842 87e25be1 Thomas Thrainer
            self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
843 87e25be1 Thomas Thrainer
                             " target node %s" %
844 87e25be1 Thomas Thrainer
                             (primary_node.name, idx, target_node))
845 87e25be1 Thomas Thrainer
          elif not self.ignore_consistency:
846 87e25be1 Thomas Thrainer
            raise errors.OpExecError("Disk %s is degraded on target node,"
847 87e25be1 Thomas Thrainer
                                     " aborting failover" % idx)
848 87e25be1 Thomas Thrainer
    else:
849 87e25be1 Thomas Thrainer
      self.feedback_fn("* not checking disk consistency as instance is not"
850 87e25be1 Thomas Thrainer
                       " running")
851 87e25be1 Thomas Thrainer
852 87e25be1 Thomas Thrainer
    self.feedback_fn("* shutting down instance on source node")
853 87e25be1 Thomas Thrainer
    logging.info("Shutting down instance %s on node %s",
854 87e25be1 Thomas Thrainer
                 instance.name, source_node)
855 87e25be1 Thomas Thrainer
856 87e25be1 Thomas Thrainer
    result = self.rpc.call_instance_shutdown(source_node, instance,
857 87e25be1 Thomas Thrainer
                                             self.shutdown_timeout,
858 87e25be1 Thomas Thrainer
                                             self.lu.op.reason)
859 87e25be1 Thomas Thrainer
    msg = result.fail_msg
860 87e25be1 Thomas Thrainer
    if msg:
861 87e25be1 Thomas Thrainer
      if self.ignore_consistency or primary_node.offline:
862 87e25be1 Thomas Thrainer
        self.lu.LogWarning("Could not shutdown instance %s on node %s,"
863 87e25be1 Thomas Thrainer
                           " proceeding anyway; please make sure node"
864 87e25be1 Thomas Thrainer
                           " %s is down; error details: %s",
865 87e25be1 Thomas Thrainer
                           instance.name, source_node, source_node, msg)
866 87e25be1 Thomas Thrainer
      else:
867 87e25be1 Thomas Thrainer
        raise errors.OpExecError("Could not shutdown instance %s on"
868 87e25be1 Thomas Thrainer
                                 " node %s: %s" %
869 87e25be1 Thomas Thrainer
                                 (instance.name, source_node, msg))
870 87e25be1 Thomas Thrainer
871 87e25be1 Thomas Thrainer
    self.feedback_fn("* deactivating the instance's disks on source node")
872 87e25be1 Thomas Thrainer
    if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
873 87e25be1 Thomas Thrainer
      raise errors.OpExecError("Can't shut down the instance's disks")
874 87e25be1 Thomas Thrainer
875 87e25be1 Thomas Thrainer
    instance.primary_node = target_node
876 87e25be1 Thomas Thrainer
    # distribute new instance config to the other nodes
877 87e25be1 Thomas Thrainer
    self.cfg.Update(instance, self.feedback_fn)
878 87e25be1 Thomas Thrainer
879 87e25be1 Thomas Thrainer
    # Only start the instance if it's marked as up
880 87e25be1 Thomas Thrainer
    if instance.admin_state == constants.ADMINST_UP:
881 87e25be1 Thomas Thrainer
      self.feedback_fn("* activating the instance's disks on target node %s" %
882 87e25be1 Thomas Thrainer
                       target_node)
883 87e25be1 Thomas Thrainer
      logging.info("Starting instance %s on node %s",
884 87e25be1 Thomas Thrainer
                   instance.name, target_node)
885 87e25be1 Thomas Thrainer
886 87e25be1 Thomas Thrainer
      disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
887 87e25be1 Thomas Thrainer
                                           ignore_secondaries=True)
888 87e25be1 Thomas Thrainer
      if not disks_ok:
889 87e25be1 Thomas Thrainer
        _ShutdownInstanceDisks(self.lu, instance)
890 87e25be1 Thomas Thrainer
        raise errors.OpExecError("Can't activate the instance's disks")
891 87e25be1 Thomas Thrainer
892 87e25be1 Thomas Thrainer
      self.feedback_fn("* starting the instance on the target node %s" %
893 87e25be1 Thomas Thrainer
                       target_node)
894 87e25be1 Thomas Thrainer
      result = self.rpc.call_instance_start(target_node, (instance, None, None),
895 87e25be1 Thomas Thrainer
                                            False, self.lu.op.reason)
896 87e25be1 Thomas Thrainer
      msg = result.fail_msg
897 87e25be1 Thomas Thrainer
      if msg:
898 87e25be1 Thomas Thrainer
        _ShutdownInstanceDisks(self.lu, instance)
899 87e25be1 Thomas Thrainer
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
900 87e25be1 Thomas Thrainer
                                 (instance.name, target_node, msg))
901 87e25be1 Thomas Thrainer
902 87e25be1 Thomas Thrainer
  def Exec(self, feedback_fn):
903 87e25be1 Thomas Thrainer
    """Perform the migration.
904 87e25be1 Thomas Thrainer

905 87e25be1 Thomas Thrainer
    """
906 87e25be1 Thomas Thrainer
    self.feedback_fn = feedback_fn
907 87e25be1 Thomas Thrainer
    self.source_node = self.instance.primary_node
908 87e25be1 Thomas Thrainer
909 87e25be1 Thomas Thrainer
    # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
910 87e25be1 Thomas Thrainer
    if self.instance.disk_template in constants.DTS_INT_MIRROR:
911 87e25be1 Thomas Thrainer
      self.target_node = self.instance.secondary_nodes[0]
912 87e25be1 Thomas Thrainer
      # Otherwise self.target_node has been populated either
913 87e25be1 Thomas Thrainer
      # directly, or through an iallocator.
914 87e25be1 Thomas Thrainer
915 87e25be1 Thomas Thrainer
    self.all_nodes = [self.source_node, self.target_node]
916 87e25be1 Thomas Thrainer
    self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
917 87e25be1 Thomas Thrainer
                         in self.cfg.GetMultiNodeInfo(self.all_nodes))
918 87e25be1 Thomas Thrainer
919 87e25be1 Thomas Thrainer
    if self.failover:
920 87e25be1 Thomas Thrainer
      feedback_fn("Failover instance %s" % self.instance.name)
921 87e25be1 Thomas Thrainer
      self._ExecFailover()
922 87e25be1 Thomas Thrainer
    else:
923 87e25be1 Thomas Thrainer
      feedback_fn("Migrating instance %s" % self.instance.name)
924 87e25be1 Thomas Thrainer
925 87e25be1 Thomas Thrainer
      if self.cleanup:
926 87e25be1 Thomas Thrainer
        return self._ExecCleanup()
927 87e25be1 Thomas Thrainer
      else:
928 87e25be1 Thomas Thrainer
        return self._ExecMigration()