Statistics
| Branch: | Tag: | Revision:

root / lib / cmdlib / instance_migration.py @ bc0a2284

History | View | Annotate | Download (35.8 kB)

1 87e25be1 Thomas Thrainer
#
2 87e25be1 Thomas Thrainer
#
3 87e25be1 Thomas Thrainer
4 87e25be1 Thomas Thrainer
# Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 Google Inc.
5 87e25be1 Thomas Thrainer
#
6 87e25be1 Thomas Thrainer
# This program is free software; you can redistribute it and/or modify
7 87e25be1 Thomas Thrainer
# it under the terms of the GNU General Public License as published by
8 87e25be1 Thomas Thrainer
# the Free Software Foundation; either version 2 of the License, or
9 87e25be1 Thomas Thrainer
# (at your option) any later version.
10 87e25be1 Thomas Thrainer
#
11 87e25be1 Thomas Thrainer
# This program is distributed in the hope that it will be useful, but
12 87e25be1 Thomas Thrainer
# WITHOUT ANY WARRANTY; without even the implied warranty of
13 87e25be1 Thomas Thrainer
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14 87e25be1 Thomas Thrainer
# General Public License for more details.
15 87e25be1 Thomas Thrainer
#
16 87e25be1 Thomas Thrainer
# You should have received a copy of the GNU General Public License
17 87e25be1 Thomas Thrainer
# along with this program; if not, write to the Free Software
18 87e25be1 Thomas Thrainer
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19 87e25be1 Thomas Thrainer
# 02110-1301, USA.
20 87e25be1 Thomas Thrainer
21 87e25be1 Thomas Thrainer
22 87e25be1 Thomas Thrainer
"""Logical units dealing with instance migration an failover."""
23 87e25be1 Thomas Thrainer
24 87e25be1 Thomas Thrainer
import logging
25 87e25be1 Thomas Thrainer
import time
26 87e25be1 Thomas Thrainer
27 87e25be1 Thomas Thrainer
from ganeti import constants
28 87e25be1 Thomas Thrainer
from ganeti import errors
29 87e25be1 Thomas Thrainer
from ganeti import locking
30 87e25be1 Thomas Thrainer
from ganeti.masterd import iallocator
31 87e25be1 Thomas Thrainer
from ganeti import utils
32 87e25be1 Thomas Thrainer
from ganeti.cmdlib.base import LogicalUnit, Tasklet
33 5eacbcae Thomas Thrainer
from ganeti.cmdlib.common import ExpandInstanceName, \
34 5eacbcae Thomas Thrainer
  CheckIAllocatorOrNode, ExpandNodeName
35 5eacbcae Thomas Thrainer
from ganeti.cmdlib.instance_storage import CheckDiskConsistency, \
36 5eacbcae Thomas Thrainer
  ExpandCheckDisks, ShutdownInstanceDisks, AssembleInstanceDisks
37 5eacbcae Thomas Thrainer
from ganeti.cmdlib.instance_utils import BuildInstanceHookEnvByObject, \
38 5eacbcae Thomas Thrainer
  CheckTargetNodeIPolicy, ReleaseLocks, CheckNodeNotDrained, \
39 5eacbcae Thomas Thrainer
  CopyLockList, CheckNodeFreeMemory, CheckInstanceBridgesExist
40 87e25be1 Thomas Thrainer
41 87e25be1 Thomas Thrainer
import ganeti.masterd.instance
42 87e25be1 Thomas Thrainer
43 87e25be1 Thomas Thrainer
44 87e25be1 Thomas Thrainer
def _ExpandNamesForMigration(lu):
45 87e25be1 Thomas Thrainer
  """Expands names for use with L{TLMigrateInstance}.
46 87e25be1 Thomas Thrainer

47 87e25be1 Thomas Thrainer
  @type lu: L{LogicalUnit}
48 87e25be1 Thomas Thrainer

49 87e25be1 Thomas Thrainer
  """
50 87e25be1 Thomas Thrainer
  if lu.op.target_node is not None:
51 5eacbcae Thomas Thrainer
    lu.op.target_node = ExpandNodeName(lu.cfg, lu.op.target_node)
52 87e25be1 Thomas Thrainer
53 87e25be1 Thomas Thrainer
  lu.needed_locks[locking.LEVEL_NODE] = []
54 87e25be1 Thomas Thrainer
  lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
55 87e25be1 Thomas Thrainer
56 87e25be1 Thomas Thrainer
  lu.needed_locks[locking.LEVEL_NODE_RES] = []
57 87e25be1 Thomas Thrainer
  lu.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
58 87e25be1 Thomas Thrainer
59 87e25be1 Thomas Thrainer
  # The node allocation lock is actually only needed for externally replicated
60 87e25be1 Thomas Thrainer
  # instances (e.g. sharedfile or RBD) and if an iallocator is used.
61 87e25be1 Thomas Thrainer
  lu.needed_locks[locking.LEVEL_NODE_ALLOC] = []
62 87e25be1 Thomas Thrainer
63 87e25be1 Thomas Thrainer
64 87e25be1 Thomas Thrainer
def _DeclareLocksForMigration(lu, level):
65 87e25be1 Thomas Thrainer
  """Declares locks for L{TLMigrateInstance}.
66 87e25be1 Thomas Thrainer

67 87e25be1 Thomas Thrainer
  @type lu: L{LogicalUnit}
68 87e25be1 Thomas Thrainer
  @param level: Lock level
69 87e25be1 Thomas Thrainer

70 87e25be1 Thomas Thrainer
  """
71 87e25be1 Thomas Thrainer
  if level == locking.LEVEL_NODE_ALLOC:
72 87e25be1 Thomas Thrainer
    assert lu.op.instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
73 87e25be1 Thomas Thrainer
74 87e25be1 Thomas Thrainer
    instance = lu.cfg.GetInstanceInfo(lu.op.instance_name)
75 87e25be1 Thomas Thrainer
76 87e25be1 Thomas Thrainer
    # Node locks are already declared here rather than at LEVEL_NODE as we need
77 87e25be1 Thomas Thrainer
    # the instance object anyway to declare the node allocation lock.
78 87e25be1 Thomas Thrainer
    if instance.disk_template in constants.DTS_EXT_MIRROR:
79 87e25be1 Thomas Thrainer
      if lu.op.target_node is None:
80 87e25be1 Thomas Thrainer
        lu.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
81 87e25be1 Thomas Thrainer
        lu.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
82 87e25be1 Thomas Thrainer
      else:
83 87e25be1 Thomas Thrainer
        lu.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
84 87e25be1 Thomas Thrainer
                                               lu.op.target_node]
85 87e25be1 Thomas Thrainer
      del lu.recalculate_locks[locking.LEVEL_NODE]
86 87e25be1 Thomas Thrainer
    else:
87 87e25be1 Thomas Thrainer
      lu._LockInstancesNodes() # pylint: disable=W0212
88 87e25be1 Thomas Thrainer
89 87e25be1 Thomas Thrainer
  elif level == locking.LEVEL_NODE:
90 87e25be1 Thomas Thrainer
    # Node locks are declared together with the node allocation lock
91 87e25be1 Thomas Thrainer
    assert (lu.needed_locks[locking.LEVEL_NODE] or
92 87e25be1 Thomas Thrainer
            lu.needed_locks[locking.LEVEL_NODE] is locking.ALL_SET)
93 87e25be1 Thomas Thrainer
94 87e25be1 Thomas Thrainer
  elif level == locking.LEVEL_NODE_RES:
95 87e25be1 Thomas Thrainer
    # Copy node locks
96 87e25be1 Thomas Thrainer
    lu.needed_locks[locking.LEVEL_NODE_RES] = \
97 5eacbcae Thomas Thrainer
      CopyLockList(lu.needed_locks[locking.LEVEL_NODE])
98 87e25be1 Thomas Thrainer
99 87e25be1 Thomas Thrainer
100 87e25be1 Thomas Thrainer
class LUInstanceFailover(LogicalUnit):
101 87e25be1 Thomas Thrainer
  """Failover an instance.
102 87e25be1 Thomas Thrainer

103 87e25be1 Thomas Thrainer
  """
104 87e25be1 Thomas Thrainer
  HPATH = "instance-failover"
105 87e25be1 Thomas Thrainer
  HTYPE = constants.HTYPE_INSTANCE
106 87e25be1 Thomas Thrainer
  REQ_BGL = False
107 87e25be1 Thomas Thrainer
108 87e25be1 Thomas Thrainer
  def CheckArguments(self):
109 87e25be1 Thomas Thrainer
    """Check the arguments.
110 87e25be1 Thomas Thrainer

111 87e25be1 Thomas Thrainer
    """
112 87e25be1 Thomas Thrainer
    self.iallocator = getattr(self.op, "iallocator", None)
113 87e25be1 Thomas Thrainer
    self.target_node = getattr(self.op, "target_node", None)
114 87e25be1 Thomas Thrainer
115 87e25be1 Thomas Thrainer
  def ExpandNames(self):
116 87e25be1 Thomas Thrainer
    self._ExpandAndLockInstance()
117 87e25be1 Thomas Thrainer
    _ExpandNamesForMigration(self)
118 87e25be1 Thomas Thrainer
119 87e25be1 Thomas Thrainer
    self._migrater = \
120 87e25be1 Thomas Thrainer
      TLMigrateInstance(self, self.op.instance_name, False, True, False,
121 87e25be1 Thomas Thrainer
                        self.op.ignore_consistency, True,
122 87e25be1 Thomas Thrainer
                        self.op.shutdown_timeout, self.op.ignore_ipolicy)
123 87e25be1 Thomas Thrainer
124 87e25be1 Thomas Thrainer
    self.tasklets = [self._migrater]
125 87e25be1 Thomas Thrainer
126 87e25be1 Thomas Thrainer
  def DeclareLocks(self, level):
127 87e25be1 Thomas Thrainer
    _DeclareLocksForMigration(self, level)
128 87e25be1 Thomas Thrainer
129 87e25be1 Thomas Thrainer
  def BuildHooksEnv(self):
130 87e25be1 Thomas Thrainer
    """Build hooks env.
131 87e25be1 Thomas Thrainer

132 87e25be1 Thomas Thrainer
    This runs on master, primary and secondary nodes of the instance.
133 87e25be1 Thomas Thrainer

134 87e25be1 Thomas Thrainer
    """
135 87e25be1 Thomas Thrainer
    instance = self._migrater.instance
136 87e25be1 Thomas Thrainer
    source_node = instance.primary_node
137 87e25be1 Thomas Thrainer
    target_node = self.op.target_node
138 87e25be1 Thomas Thrainer
    env = {
139 87e25be1 Thomas Thrainer
      "IGNORE_CONSISTENCY": self.op.ignore_consistency,
140 87e25be1 Thomas Thrainer
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
141 87e25be1 Thomas Thrainer
      "OLD_PRIMARY": source_node,
142 87e25be1 Thomas Thrainer
      "NEW_PRIMARY": target_node,
143 87e25be1 Thomas Thrainer
      }
144 87e25be1 Thomas Thrainer
145 87e25be1 Thomas Thrainer
    if instance.disk_template in constants.DTS_INT_MIRROR:
146 87e25be1 Thomas Thrainer
      env["OLD_SECONDARY"] = instance.secondary_nodes[0]
147 87e25be1 Thomas Thrainer
      env["NEW_SECONDARY"] = source_node
148 87e25be1 Thomas Thrainer
    else:
149 87e25be1 Thomas Thrainer
      env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
150 87e25be1 Thomas Thrainer
151 5eacbcae Thomas Thrainer
    env.update(BuildInstanceHookEnvByObject(self, instance))
152 87e25be1 Thomas Thrainer
153 87e25be1 Thomas Thrainer
    return env
154 87e25be1 Thomas Thrainer
155 87e25be1 Thomas Thrainer
  def BuildHooksNodes(self):
156 87e25be1 Thomas Thrainer
    """Build hooks nodes.
157 87e25be1 Thomas Thrainer

158 87e25be1 Thomas Thrainer
    """
159 87e25be1 Thomas Thrainer
    instance = self._migrater.instance
160 87e25be1 Thomas Thrainer
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
161 87e25be1 Thomas Thrainer
    return (nl, nl + [instance.primary_node])
162 87e25be1 Thomas Thrainer
163 87e25be1 Thomas Thrainer
164 87e25be1 Thomas Thrainer
class LUInstanceMigrate(LogicalUnit):
165 87e25be1 Thomas Thrainer
  """Migrate an instance.
166 87e25be1 Thomas Thrainer

167 87e25be1 Thomas Thrainer
  This is migration without shutting down, compared to the failover,
168 87e25be1 Thomas Thrainer
  which is done with shutdown.
169 87e25be1 Thomas Thrainer

170 87e25be1 Thomas Thrainer
  """
171 87e25be1 Thomas Thrainer
  HPATH = "instance-migrate"
172 87e25be1 Thomas Thrainer
  HTYPE = constants.HTYPE_INSTANCE
173 87e25be1 Thomas Thrainer
  REQ_BGL = False
174 87e25be1 Thomas Thrainer
175 87e25be1 Thomas Thrainer
  def ExpandNames(self):
176 87e25be1 Thomas Thrainer
    self._ExpandAndLockInstance()
177 87e25be1 Thomas Thrainer
    _ExpandNamesForMigration(self)
178 87e25be1 Thomas Thrainer
179 87e25be1 Thomas Thrainer
    self._migrater = \
180 87e25be1 Thomas Thrainer
      TLMigrateInstance(self, self.op.instance_name, self.op.cleanup,
181 87e25be1 Thomas Thrainer
                        False, self.op.allow_failover, False,
182 87e25be1 Thomas Thrainer
                        self.op.allow_runtime_changes,
183 87e25be1 Thomas Thrainer
                        constants.DEFAULT_SHUTDOWN_TIMEOUT,
184 87e25be1 Thomas Thrainer
                        self.op.ignore_ipolicy)
185 87e25be1 Thomas Thrainer
186 87e25be1 Thomas Thrainer
    self.tasklets = [self._migrater]
187 87e25be1 Thomas Thrainer
188 87e25be1 Thomas Thrainer
  def DeclareLocks(self, level):
189 87e25be1 Thomas Thrainer
    _DeclareLocksForMigration(self, level)
190 87e25be1 Thomas Thrainer
191 87e25be1 Thomas Thrainer
  def BuildHooksEnv(self):
192 87e25be1 Thomas Thrainer
    """Build hooks env.
193 87e25be1 Thomas Thrainer

194 87e25be1 Thomas Thrainer
    This runs on master, primary and secondary nodes of the instance.
195 87e25be1 Thomas Thrainer

196 87e25be1 Thomas Thrainer
    """
197 87e25be1 Thomas Thrainer
    instance = self._migrater.instance
198 87e25be1 Thomas Thrainer
    source_node = instance.primary_node
199 87e25be1 Thomas Thrainer
    target_node = self.op.target_node
200 5eacbcae Thomas Thrainer
    env = BuildInstanceHookEnvByObject(self, instance)
201 87e25be1 Thomas Thrainer
    env.update({
202 87e25be1 Thomas Thrainer
      "MIGRATE_LIVE": self._migrater.live,
203 87e25be1 Thomas Thrainer
      "MIGRATE_CLEANUP": self.op.cleanup,
204 87e25be1 Thomas Thrainer
      "OLD_PRIMARY": source_node,
205 87e25be1 Thomas Thrainer
      "NEW_PRIMARY": target_node,
206 87e25be1 Thomas Thrainer
      "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
207 87e25be1 Thomas Thrainer
      })
208 87e25be1 Thomas Thrainer
209 87e25be1 Thomas Thrainer
    if instance.disk_template in constants.DTS_INT_MIRROR:
210 87e25be1 Thomas Thrainer
      env["OLD_SECONDARY"] = target_node
211 87e25be1 Thomas Thrainer
      env["NEW_SECONDARY"] = source_node
212 87e25be1 Thomas Thrainer
    else:
213 87e25be1 Thomas Thrainer
      env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
214 87e25be1 Thomas Thrainer
215 87e25be1 Thomas Thrainer
    return env
216 87e25be1 Thomas Thrainer
217 87e25be1 Thomas Thrainer
  def BuildHooksNodes(self):
218 87e25be1 Thomas Thrainer
    """Build hooks nodes.
219 87e25be1 Thomas Thrainer

220 87e25be1 Thomas Thrainer
    """
221 87e25be1 Thomas Thrainer
    instance = self._migrater.instance
222 87e25be1 Thomas Thrainer
    snodes = list(instance.secondary_nodes)
223 87e25be1 Thomas Thrainer
    nl = [self.cfg.GetMasterNode(), instance.primary_node] + snodes
224 87e25be1 Thomas Thrainer
    return (nl, nl)
225 87e25be1 Thomas Thrainer
226 87e25be1 Thomas Thrainer
227 87e25be1 Thomas Thrainer
class TLMigrateInstance(Tasklet):
228 87e25be1 Thomas Thrainer
  """Tasklet class for instance migration.
229 87e25be1 Thomas Thrainer

230 87e25be1 Thomas Thrainer
  @type live: boolean
231 87e25be1 Thomas Thrainer
  @ivar live: whether the migration will be done live or non-live;
232 87e25be1 Thomas Thrainer
      this variable is initalized only after CheckPrereq has run
233 87e25be1 Thomas Thrainer
  @type cleanup: boolean
234 87e25be1 Thomas Thrainer
  @ivar cleanup: Wheater we cleanup from a failed migration
235 87e25be1 Thomas Thrainer
  @type iallocator: string
236 87e25be1 Thomas Thrainer
  @ivar iallocator: The iallocator used to determine target_node
237 87e25be1 Thomas Thrainer
  @type target_node: string
238 87e25be1 Thomas Thrainer
  @ivar target_node: If given, the target_node to reallocate the instance to
239 87e25be1 Thomas Thrainer
  @type failover: boolean
240 87e25be1 Thomas Thrainer
  @ivar failover: Whether operation results in failover or migration
241 87e25be1 Thomas Thrainer
  @type fallback: boolean
242 87e25be1 Thomas Thrainer
  @ivar fallback: Whether fallback to failover is allowed if migration not
243 87e25be1 Thomas Thrainer
                  possible
244 87e25be1 Thomas Thrainer
  @type ignore_consistency: boolean
245 87e25be1 Thomas Thrainer
  @ivar ignore_consistency: Wheter we should ignore consistency between source
246 87e25be1 Thomas Thrainer
                            and target node
247 87e25be1 Thomas Thrainer
  @type shutdown_timeout: int
248 87e25be1 Thomas Thrainer
  @ivar shutdown_timeout: In case of failover timeout of the shutdown
249 87e25be1 Thomas Thrainer
  @type ignore_ipolicy: bool
250 87e25be1 Thomas Thrainer
  @ivar ignore_ipolicy: If true, we can ignore instance policy when migrating
251 87e25be1 Thomas Thrainer

252 87e25be1 Thomas Thrainer
  """
253 87e25be1 Thomas Thrainer
254 87e25be1 Thomas Thrainer
  # Constants
255 87e25be1 Thomas Thrainer
  _MIGRATION_POLL_INTERVAL = 1      # seconds
256 87e25be1 Thomas Thrainer
  _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
257 87e25be1 Thomas Thrainer
258 87e25be1 Thomas Thrainer
  def __init__(self, lu, instance_name, cleanup, failover, fallback,
259 87e25be1 Thomas Thrainer
               ignore_consistency, allow_runtime_changes, shutdown_timeout,
260 87e25be1 Thomas Thrainer
               ignore_ipolicy):
261 87e25be1 Thomas Thrainer
    """Initializes this class.
262 87e25be1 Thomas Thrainer

263 87e25be1 Thomas Thrainer
    """
264 87e25be1 Thomas Thrainer
    Tasklet.__init__(self, lu)
265 87e25be1 Thomas Thrainer
266 87e25be1 Thomas Thrainer
    # Parameters
267 87e25be1 Thomas Thrainer
    self.instance_name = instance_name
268 87e25be1 Thomas Thrainer
    self.cleanup = cleanup
269 87e25be1 Thomas Thrainer
    self.live = False # will be overridden later
270 87e25be1 Thomas Thrainer
    self.failover = failover
271 87e25be1 Thomas Thrainer
    self.fallback = fallback
272 87e25be1 Thomas Thrainer
    self.ignore_consistency = ignore_consistency
273 87e25be1 Thomas Thrainer
    self.shutdown_timeout = shutdown_timeout
274 87e25be1 Thomas Thrainer
    self.ignore_ipolicy = ignore_ipolicy
275 87e25be1 Thomas Thrainer
    self.allow_runtime_changes = allow_runtime_changes
276 87e25be1 Thomas Thrainer
277 87e25be1 Thomas Thrainer
  def CheckPrereq(self):
278 87e25be1 Thomas Thrainer
    """Check prerequisites.
279 87e25be1 Thomas Thrainer

280 87e25be1 Thomas Thrainer
    This checks that the instance is in the cluster.
281 87e25be1 Thomas Thrainer

282 87e25be1 Thomas Thrainer
    """
283 5eacbcae Thomas Thrainer
    instance_name = ExpandInstanceName(self.lu.cfg, self.instance_name)
284 87e25be1 Thomas Thrainer
    instance = self.cfg.GetInstanceInfo(instance_name)
285 87e25be1 Thomas Thrainer
    assert instance is not None
286 87e25be1 Thomas Thrainer
    self.instance = instance
287 87e25be1 Thomas Thrainer
    cluster = self.cfg.GetClusterInfo()
288 87e25be1 Thomas Thrainer
289 87e25be1 Thomas Thrainer
    if (not self.cleanup and
290 87e25be1 Thomas Thrainer
        not instance.admin_state == constants.ADMINST_UP and
291 87e25be1 Thomas Thrainer
        not self.failover and self.fallback):
292 87e25be1 Thomas Thrainer
      self.lu.LogInfo("Instance is marked down or offline, fallback allowed,"
293 87e25be1 Thomas Thrainer
                      " switching to failover")
294 87e25be1 Thomas Thrainer
      self.failover = True
295 87e25be1 Thomas Thrainer
296 87e25be1 Thomas Thrainer
    if instance.disk_template not in constants.DTS_MIRRORED:
297 87e25be1 Thomas Thrainer
      if self.failover:
298 87e25be1 Thomas Thrainer
        text = "failovers"
299 87e25be1 Thomas Thrainer
      else:
300 87e25be1 Thomas Thrainer
        text = "migrations"
301 87e25be1 Thomas Thrainer
      raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
302 87e25be1 Thomas Thrainer
                                 " %s" % (instance.disk_template, text),
303 87e25be1 Thomas Thrainer
                                 errors.ECODE_STATE)
304 87e25be1 Thomas Thrainer
305 87e25be1 Thomas Thrainer
    if instance.disk_template in constants.DTS_EXT_MIRROR:
306 5eacbcae Thomas Thrainer
      CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
307 87e25be1 Thomas Thrainer
308 87e25be1 Thomas Thrainer
      if self.lu.op.iallocator:
309 87e25be1 Thomas Thrainer
        assert locking.NAL in self.lu.owned_locks(locking.LEVEL_NODE_ALLOC)
310 87e25be1 Thomas Thrainer
        self._RunAllocator()
311 87e25be1 Thomas Thrainer
      else:
312 87e25be1 Thomas Thrainer
        # We set set self.target_node as it is required by
313 87e25be1 Thomas Thrainer
        # BuildHooksEnv
314 87e25be1 Thomas Thrainer
        self.target_node = self.lu.op.target_node
315 87e25be1 Thomas Thrainer
316 87e25be1 Thomas Thrainer
      # Check that the target node is correct in terms of instance policy
317 87e25be1 Thomas Thrainer
      nodeinfo = self.cfg.GetNodeInfo(self.target_node)
318 87e25be1 Thomas Thrainer
      group_info = self.cfg.GetNodeGroup(nodeinfo.group)
319 87e25be1 Thomas Thrainer
      ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
320 87e25be1 Thomas Thrainer
                                                              group_info)
321 5eacbcae Thomas Thrainer
      CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo, self.cfg,
322 5eacbcae Thomas Thrainer
                             ignore=self.ignore_ipolicy)
323 87e25be1 Thomas Thrainer
324 87e25be1 Thomas Thrainer
      # self.target_node is already populated, either directly or by the
325 87e25be1 Thomas Thrainer
      # iallocator run
326 87e25be1 Thomas Thrainer
      target_node = self.target_node
327 87e25be1 Thomas Thrainer
      if self.target_node == instance.primary_node:
328 87e25be1 Thomas Thrainer
        raise errors.OpPrereqError("Cannot migrate instance %s"
329 87e25be1 Thomas Thrainer
                                   " to its primary (%s)" %
330 87e25be1 Thomas Thrainer
                                   (instance.name, instance.primary_node),
331 87e25be1 Thomas Thrainer
                                   errors.ECODE_STATE)
332 87e25be1 Thomas Thrainer
333 87e25be1 Thomas Thrainer
      if len(self.lu.tasklets) == 1:
334 87e25be1 Thomas Thrainer
        # It is safe to release locks only when we're the only tasklet
335 87e25be1 Thomas Thrainer
        # in the LU
336 5eacbcae Thomas Thrainer
        ReleaseLocks(self.lu, locking.LEVEL_NODE,
337 5eacbcae Thomas Thrainer
                     keep=[instance.primary_node, self.target_node])
338 5eacbcae Thomas Thrainer
        ReleaseLocks(self.lu, locking.LEVEL_NODE_ALLOC)
339 87e25be1 Thomas Thrainer
340 87e25be1 Thomas Thrainer
    else:
341 87e25be1 Thomas Thrainer
      assert not self.lu.glm.is_owned(locking.LEVEL_NODE_ALLOC)
342 87e25be1 Thomas Thrainer
343 87e25be1 Thomas Thrainer
      secondary_nodes = instance.secondary_nodes
344 87e25be1 Thomas Thrainer
      if not secondary_nodes:
345 87e25be1 Thomas Thrainer
        raise errors.ConfigurationError("No secondary node but using"
346 87e25be1 Thomas Thrainer
                                        " %s disk template" %
347 87e25be1 Thomas Thrainer
                                        instance.disk_template)
348 87e25be1 Thomas Thrainer
      target_node = secondary_nodes[0]
349 87e25be1 Thomas Thrainer
      if self.lu.op.iallocator or (self.lu.op.target_node and
350 87e25be1 Thomas Thrainer
                                   self.lu.op.target_node != target_node):
351 87e25be1 Thomas Thrainer
        if self.failover:
352 87e25be1 Thomas Thrainer
          text = "failed over"
353 87e25be1 Thomas Thrainer
        else:
354 87e25be1 Thomas Thrainer
          text = "migrated"
355 87e25be1 Thomas Thrainer
        raise errors.OpPrereqError("Instances with disk template %s cannot"
356 87e25be1 Thomas Thrainer
                                   " be %s to arbitrary nodes"
357 87e25be1 Thomas Thrainer
                                   " (neither an iallocator nor a target"
358 87e25be1 Thomas Thrainer
                                   " node can be passed)" %
359 87e25be1 Thomas Thrainer
                                   (instance.disk_template, text),
360 87e25be1 Thomas Thrainer
                                   errors.ECODE_INVAL)
361 87e25be1 Thomas Thrainer
      nodeinfo = self.cfg.GetNodeInfo(target_node)
362 87e25be1 Thomas Thrainer
      group_info = self.cfg.GetNodeGroup(nodeinfo.group)
363 87e25be1 Thomas Thrainer
      ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
364 87e25be1 Thomas Thrainer
                                                              group_info)
365 5eacbcae Thomas Thrainer
      CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo, self.cfg,
366 5eacbcae Thomas Thrainer
                             ignore=self.ignore_ipolicy)
367 87e25be1 Thomas Thrainer
368 87e25be1 Thomas Thrainer
    i_be = cluster.FillBE(instance)
369 87e25be1 Thomas Thrainer
370 87e25be1 Thomas Thrainer
    # check memory requirements on the secondary node
371 87e25be1 Thomas Thrainer
    if (not self.cleanup and
372 87e25be1 Thomas Thrainer
         (not self.failover or instance.admin_state == constants.ADMINST_UP)):
373 a295eb80 Helga Velroyen
      self.tgt_free_mem = CheckNodeFreeMemory(
374 a295eb80 Helga Velroyen
          self.lu, target_node, "migrating instance %s" % instance.name,
375 a295eb80 Helga Velroyen
          i_be[constants.BE_MINMEM], instance.hypervisor,
376 a295eb80 Helga Velroyen
          self.cfg.GetClusterInfo().hvparams[instance.hypervisor])
377 87e25be1 Thomas Thrainer
    else:
378 87e25be1 Thomas Thrainer
      self.lu.LogInfo("Not checking memory on the secondary node as"
379 87e25be1 Thomas Thrainer
                      " instance will not be started")
380 87e25be1 Thomas Thrainer
381 87e25be1 Thomas Thrainer
    # check if failover must be forced instead of migration
382 87e25be1 Thomas Thrainer
    if (not self.cleanup and not self.failover and
383 87e25be1 Thomas Thrainer
        i_be[constants.BE_ALWAYS_FAILOVER]):
384 87e25be1 Thomas Thrainer
      self.lu.LogInfo("Instance configured to always failover; fallback"
385 87e25be1 Thomas Thrainer
                      " to failover")
386 87e25be1 Thomas Thrainer
      self.failover = True
387 87e25be1 Thomas Thrainer
388 87e25be1 Thomas Thrainer
    # check bridge existance
389 5eacbcae Thomas Thrainer
    CheckInstanceBridgesExist(self.lu, instance, node=target_node)
390 87e25be1 Thomas Thrainer
391 87e25be1 Thomas Thrainer
    if not self.cleanup:
392 5eacbcae Thomas Thrainer
      CheckNodeNotDrained(self.lu, target_node)
393 87e25be1 Thomas Thrainer
      if not self.failover:
394 87e25be1 Thomas Thrainer
        result = self.rpc.call_instance_migratable(instance.primary_node,
395 87e25be1 Thomas Thrainer
                                                   instance)
396 87e25be1 Thomas Thrainer
        if result.fail_msg and self.fallback:
397 87e25be1 Thomas Thrainer
          self.lu.LogInfo("Can't migrate, instance offline, fallback to"
398 87e25be1 Thomas Thrainer
                          " failover")
399 87e25be1 Thomas Thrainer
          self.failover = True
400 87e25be1 Thomas Thrainer
        else:
401 87e25be1 Thomas Thrainer
          result.Raise("Can't migrate, please use failover",
402 87e25be1 Thomas Thrainer
                       prereq=True, ecode=errors.ECODE_STATE)
403 87e25be1 Thomas Thrainer
404 87e25be1 Thomas Thrainer
    assert not (self.failover and self.cleanup)
405 87e25be1 Thomas Thrainer
406 87e25be1 Thomas Thrainer
    if not self.failover:
407 87e25be1 Thomas Thrainer
      if self.lu.op.live is not None and self.lu.op.mode is not None:
408 87e25be1 Thomas Thrainer
        raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
409 87e25be1 Thomas Thrainer
                                   " parameters are accepted",
410 87e25be1 Thomas Thrainer
                                   errors.ECODE_INVAL)
411 87e25be1 Thomas Thrainer
      if self.lu.op.live is not None:
412 87e25be1 Thomas Thrainer
        if self.lu.op.live:
413 87e25be1 Thomas Thrainer
          self.lu.op.mode = constants.HT_MIGRATION_LIVE
414 87e25be1 Thomas Thrainer
        else:
415 87e25be1 Thomas Thrainer
          self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
416 87e25be1 Thomas Thrainer
        # reset the 'live' parameter to None so that repeated
417 87e25be1 Thomas Thrainer
        # invocations of CheckPrereq do not raise an exception
418 87e25be1 Thomas Thrainer
        self.lu.op.live = None
419 87e25be1 Thomas Thrainer
      elif self.lu.op.mode is None:
420 87e25be1 Thomas Thrainer
        # read the default value from the hypervisor
421 87e25be1 Thomas Thrainer
        i_hv = cluster.FillHV(self.instance, skip_globals=False)
422 87e25be1 Thomas Thrainer
        self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
423 87e25be1 Thomas Thrainer
424 87e25be1 Thomas Thrainer
      self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
425 87e25be1 Thomas Thrainer
    else:
426 87e25be1 Thomas Thrainer
      # Failover is never live
427 87e25be1 Thomas Thrainer
      self.live = False
428 87e25be1 Thomas Thrainer
429 87e25be1 Thomas Thrainer
    if not (self.failover or self.cleanup):
430 0bbec3af Helga Velroyen
      remote_info = self.rpc.call_instance_info(
431 0bbec3af Helga Velroyen
          instance.primary_node, instance.name, instance.hypervisor,
432 0bbec3af Helga Velroyen
          cluster.hvparams[instance.hypervisor])
433 87e25be1 Thomas Thrainer
      remote_info.Raise("Error checking instance on node %s" %
434 87e25be1 Thomas Thrainer
                        instance.primary_node)
435 87e25be1 Thomas Thrainer
      instance_running = bool(remote_info.payload)
436 87e25be1 Thomas Thrainer
      if instance_running:
437 87e25be1 Thomas Thrainer
        self.current_mem = int(remote_info.payload["memory"])
438 87e25be1 Thomas Thrainer
439 87e25be1 Thomas Thrainer
  def _RunAllocator(self):
440 87e25be1 Thomas Thrainer
    """Run the allocator based on input opcode.
441 87e25be1 Thomas Thrainer

442 87e25be1 Thomas Thrainer
    """
443 87e25be1 Thomas Thrainer
    assert locking.NAL in self.lu.owned_locks(locking.LEVEL_NODE_ALLOC)
444 87e25be1 Thomas Thrainer
445 87e25be1 Thomas Thrainer
    # FIXME: add a self.ignore_ipolicy option
446 87e25be1 Thomas Thrainer
    req = iallocator.IAReqRelocate(name=self.instance_name,
447 87e25be1 Thomas Thrainer
                                   relocate_from=[self.instance.primary_node])
448 87e25be1 Thomas Thrainer
    ial = iallocator.IAllocator(self.cfg, self.rpc, req)
449 87e25be1 Thomas Thrainer
450 87e25be1 Thomas Thrainer
    ial.Run(self.lu.op.iallocator)
451 87e25be1 Thomas Thrainer
452 87e25be1 Thomas Thrainer
    if not ial.success:
453 87e25be1 Thomas Thrainer
      raise errors.OpPrereqError("Can't compute nodes using"
454 87e25be1 Thomas Thrainer
                                 " iallocator '%s': %s" %
455 87e25be1 Thomas Thrainer
                                 (self.lu.op.iallocator, ial.info),
456 87e25be1 Thomas Thrainer
                                 errors.ECODE_NORES)
457 87e25be1 Thomas Thrainer
    self.target_node = ial.result[0]
458 87e25be1 Thomas Thrainer
    self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
459 87e25be1 Thomas Thrainer
                    self.instance_name, self.lu.op.iallocator,
460 87e25be1 Thomas Thrainer
                    utils.CommaJoin(ial.result))
461 87e25be1 Thomas Thrainer
462 87e25be1 Thomas Thrainer
  def _WaitUntilSync(self):
463 87e25be1 Thomas Thrainer
    """Poll with custom rpc for disk sync.
464 87e25be1 Thomas Thrainer

465 87e25be1 Thomas Thrainer
    This uses our own step-based rpc call.
466 87e25be1 Thomas Thrainer

467 87e25be1 Thomas Thrainer
    """
468 87e25be1 Thomas Thrainer
    self.feedback_fn("* wait until resync is done")
469 87e25be1 Thomas Thrainer
    all_done = False
470 87e25be1 Thomas Thrainer
    while not all_done:
471 87e25be1 Thomas Thrainer
      all_done = True
472 87e25be1 Thomas Thrainer
      result = self.rpc.call_drbd_wait_sync(self.all_nodes,
473 87e25be1 Thomas Thrainer
                                            self.nodes_ip,
474 87e25be1 Thomas Thrainer
                                            (self.instance.disks,
475 87e25be1 Thomas Thrainer
                                             self.instance))
476 87e25be1 Thomas Thrainer
      min_percent = 100
477 87e25be1 Thomas Thrainer
      for node, nres in result.items():
478 87e25be1 Thomas Thrainer
        nres.Raise("Cannot resync disks on node %s" % node)
479 87e25be1 Thomas Thrainer
        node_done, node_percent = nres.payload
480 87e25be1 Thomas Thrainer
        all_done = all_done and node_done
481 87e25be1 Thomas Thrainer
        if node_percent is not None:
482 87e25be1 Thomas Thrainer
          min_percent = min(min_percent, node_percent)
483 87e25be1 Thomas Thrainer
      if not all_done:
484 87e25be1 Thomas Thrainer
        if min_percent < 100:
485 87e25be1 Thomas Thrainer
          self.feedback_fn("   - progress: %.1f%%" % min_percent)
486 87e25be1 Thomas Thrainer
        time.sleep(2)
487 87e25be1 Thomas Thrainer
488 87e25be1 Thomas Thrainer
  def _EnsureSecondary(self, node):
489 87e25be1 Thomas Thrainer
    """Demote a node to secondary.
490 87e25be1 Thomas Thrainer

491 87e25be1 Thomas Thrainer
    """
492 87e25be1 Thomas Thrainer
    self.feedback_fn("* switching node %s to secondary mode" % node)
493 87e25be1 Thomas Thrainer
494 87e25be1 Thomas Thrainer
    for dev in self.instance.disks:
495 87e25be1 Thomas Thrainer
      self.cfg.SetDiskID(dev, node)
496 87e25be1 Thomas Thrainer
497 87e25be1 Thomas Thrainer
    result = self.rpc.call_blockdev_close(node, self.instance.name,
498 87e25be1 Thomas Thrainer
                                          self.instance.disks)
499 87e25be1 Thomas Thrainer
    result.Raise("Cannot change disk to secondary on node %s" % node)
500 87e25be1 Thomas Thrainer
501 87e25be1 Thomas Thrainer
  def _GoStandalone(self):
502 87e25be1 Thomas Thrainer
    """Disconnect from the network.
503 87e25be1 Thomas Thrainer

504 87e25be1 Thomas Thrainer
    """
505 87e25be1 Thomas Thrainer
    self.feedback_fn("* changing into standalone mode")
506 87e25be1 Thomas Thrainer
    result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
507 87e25be1 Thomas Thrainer
                                               self.instance.disks)
508 87e25be1 Thomas Thrainer
    for node, nres in result.items():
509 87e25be1 Thomas Thrainer
      nres.Raise("Cannot disconnect disks node %s" % node)
510 87e25be1 Thomas Thrainer
511 87e25be1 Thomas Thrainer
  def _GoReconnect(self, multimaster):
512 87e25be1 Thomas Thrainer
    """Reconnect to the network.
513 87e25be1 Thomas Thrainer

514 87e25be1 Thomas Thrainer
    """
515 87e25be1 Thomas Thrainer
    if multimaster:
516 87e25be1 Thomas Thrainer
      msg = "dual-master"
517 87e25be1 Thomas Thrainer
    else:
518 87e25be1 Thomas Thrainer
      msg = "single-master"
519 87e25be1 Thomas Thrainer
    self.feedback_fn("* changing disks into %s mode" % msg)
520 87e25be1 Thomas Thrainer
    result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
521 87e25be1 Thomas Thrainer
                                           (self.instance.disks, self.instance),
522 87e25be1 Thomas Thrainer
                                           self.instance.name, multimaster)
523 87e25be1 Thomas Thrainer
    for node, nres in result.items():
524 87e25be1 Thomas Thrainer
      nres.Raise("Cannot change disks config on node %s" % node)
525 87e25be1 Thomas Thrainer
526 87e25be1 Thomas Thrainer
  def _ExecCleanup(self):
527 87e25be1 Thomas Thrainer
    """Try to cleanup after a failed migration.
528 87e25be1 Thomas Thrainer

529 87e25be1 Thomas Thrainer
    The cleanup is done by:
530 87e25be1 Thomas Thrainer
      - check that the instance is running only on one node
531 87e25be1 Thomas Thrainer
        (and update the config if needed)
532 87e25be1 Thomas Thrainer
      - change disks on its secondary node to secondary
533 87e25be1 Thomas Thrainer
      - wait until disks are fully synchronized
534 87e25be1 Thomas Thrainer
      - disconnect from the network
535 87e25be1 Thomas Thrainer
      - change disks into single-master mode
536 87e25be1 Thomas Thrainer
      - wait again until disks are fully synchronized
537 87e25be1 Thomas Thrainer

538 87e25be1 Thomas Thrainer
    """
539 87e25be1 Thomas Thrainer
    instance = self.instance
540 87e25be1 Thomas Thrainer
    target_node = self.target_node
541 87e25be1 Thomas Thrainer
    source_node = self.source_node
542 87e25be1 Thomas Thrainer
543 87e25be1 Thomas Thrainer
    # check running on only one node
544 87e25be1 Thomas Thrainer
    self.feedback_fn("* checking where the instance actually runs"
545 87e25be1 Thomas Thrainer
                     " (if this hangs, the hypervisor might be in"
546 87e25be1 Thomas Thrainer
                     " a bad state)")
547 8ac806e6 Helga Velroyen
    cluster_hvparams = self.cfg.GetClusterInfo().hvparams
548 8ac806e6 Helga Velroyen
    ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor],
549 8ac806e6 Helga Velroyen
                                        cluster_hvparams)
550 87e25be1 Thomas Thrainer
    for node, result in ins_l.items():
551 87e25be1 Thomas Thrainer
      result.Raise("Can't contact node %s" % node)
552 87e25be1 Thomas Thrainer
553 87e25be1 Thomas Thrainer
    runningon_source = instance.name in ins_l[source_node].payload
554 87e25be1 Thomas Thrainer
    runningon_target = instance.name in ins_l[target_node].payload
555 87e25be1 Thomas Thrainer
556 87e25be1 Thomas Thrainer
    if runningon_source and runningon_target:
557 87e25be1 Thomas Thrainer
      raise errors.OpExecError("Instance seems to be running on two nodes,"
558 87e25be1 Thomas Thrainer
                               " or the hypervisor is confused; you will have"
559 87e25be1 Thomas Thrainer
                               " to ensure manually that it runs only on one"
560 87e25be1 Thomas Thrainer
                               " and restart this operation")
561 87e25be1 Thomas Thrainer
562 87e25be1 Thomas Thrainer
    if not (runningon_source or runningon_target):
563 87e25be1 Thomas Thrainer
      raise errors.OpExecError("Instance does not seem to be running at all;"
564 87e25be1 Thomas Thrainer
                               " in this case it's safer to repair by"
565 87e25be1 Thomas Thrainer
                               " running 'gnt-instance stop' to ensure disk"
566 87e25be1 Thomas Thrainer
                               " shutdown, and then restarting it")
567 87e25be1 Thomas Thrainer
568 87e25be1 Thomas Thrainer
    if runningon_target:
569 87e25be1 Thomas Thrainer
      # the migration has actually succeeded, we need to update the config
570 87e25be1 Thomas Thrainer
      self.feedback_fn("* instance running on secondary node (%s),"
571 87e25be1 Thomas Thrainer
                       " updating config" % target_node)
572 87e25be1 Thomas Thrainer
      instance.primary_node = target_node
573 87e25be1 Thomas Thrainer
      self.cfg.Update(instance, self.feedback_fn)
574 87e25be1 Thomas Thrainer
      demoted_node = source_node
575 87e25be1 Thomas Thrainer
    else:
576 87e25be1 Thomas Thrainer
      self.feedback_fn("* instance confirmed to be running on its"
577 87e25be1 Thomas Thrainer
                       " primary node (%s)" % source_node)
578 87e25be1 Thomas Thrainer
      demoted_node = target_node
579 87e25be1 Thomas Thrainer
580 87e25be1 Thomas Thrainer
    if instance.disk_template in constants.DTS_INT_MIRROR:
581 87e25be1 Thomas Thrainer
      self._EnsureSecondary(demoted_node)
582 87e25be1 Thomas Thrainer
      try:
583 87e25be1 Thomas Thrainer
        self._WaitUntilSync()
584 87e25be1 Thomas Thrainer
      except errors.OpExecError:
585 87e25be1 Thomas Thrainer
        # we ignore here errors, since if the device is standalone, it
586 87e25be1 Thomas Thrainer
        # won't be able to sync
587 87e25be1 Thomas Thrainer
        pass
588 87e25be1 Thomas Thrainer
      self._GoStandalone()
589 87e25be1 Thomas Thrainer
      self._GoReconnect(False)
590 87e25be1 Thomas Thrainer
      self._WaitUntilSync()
591 87e25be1 Thomas Thrainer
592 87e25be1 Thomas Thrainer
    self.feedback_fn("* done")
593 87e25be1 Thomas Thrainer
594 87e25be1 Thomas Thrainer
  def _RevertDiskStatus(self):
595 87e25be1 Thomas Thrainer
    """Try to revert the disk status after a failed migration.
596 87e25be1 Thomas Thrainer

597 87e25be1 Thomas Thrainer
    """
598 87e25be1 Thomas Thrainer
    target_node = self.target_node
599 87e25be1 Thomas Thrainer
    if self.instance.disk_template in constants.DTS_EXT_MIRROR:
600 87e25be1 Thomas Thrainer
      return
601 87e25be1 Thomas Thrainer
602 87e25be1 Thomas Thrainer
    try:
603 87e25be1 Thomas Thrainer
      self._EnsureSecondary(target_node)
604 87e25be1 Thomas Thrainer
      self._GoStandalone()
605 87e25be1 Thomas Thrainer
      self._GoReconnect(False)
606 87e25be1 Thomas Thrainer
      self._WaitUntilSync()
607 87e25be1 Thomas Thrainer
    except errors.OpExecError, err:
608 87e25be1 Thomas Thrainer
      self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
609 87e25be1 Thomas Thrainer
                         " please try to recover the instance manually;"
610 87e25be1 Thomas Thrainer
                         " error '%s'" % str(err))
611 87e25be1 Thomas Thrainer
612 87e25be1 Thomas Thrainer
  def _AbortMigration(self):
613 87e25be1 Thomas Thrainer
    """Call the hypervisor code to abort a started migration.
614 87e25be1 Thomas Thrainer

615 87e25be1 Thomas Thrainer
    """
616 87e25be1 Thomas Thrainer
    instance = self.instance
617 87e25be1 Thomas Thrainer
    target_node = self.target_node
618 87e25be1 Thomas Thrainer
    source_node = self.source_node
619 87e25be1 Thomas Thrainer
    migration_info = self.migration_info
620 87e25be1 Thomas Thrainer
621 87e25be1 Thomas Thrainer
    abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
622 87e25be1 Thomas Thrainer
                                                                 instance,
623 87e25be1 Thomas Thrainer
                                                                 migration_info,
624 87e25be1 Thomas Thrainer
                                                                 False)
625 87e25be1 Thomas Thrainer
    abort_msg = abort_result.fail_msg
626 87e25be1 Thomas Thrainer
    if abort_msg:
627 87e25be1 Thomas Thrainer
      logging.error("Aborting migration failed on target node %s: %s",
628 87e25be1 Thomas Thrainer
                    target_node, abort_msg)
629 87e25be1 Thomas Thrainer
      # Don't raise an exception here, as we stil have to try to revert the
630 87e25be1 Thomas Thrainer
      # disk status, even if this step failed.
631 87e25be1 Thomas Thrainer
632 87e25be1 Thomas Thrainer
    abort_result = self.rpc.call_instance_finalize_migration_src(
633 87e25be1 Thomas Thrainer
      source_node, instance, False, self.live)
634 87e25be1 Thomas Thrainer
    abort_msg = abort_result.fail_msg
635 87e25be1 Thomas Thrainer
    if abort_msg:
636 87e25be1 Thomas Thrainer
      logging.error("Aborting migration failed on source node %s: %s",
637 87e25be1 Thomas Thrainer
                    source_node, abort_msg)
638 87e25be1 Thomas Thrainer
639 87e25be1 Thomas Thrainer
  def _ExecMigration(self):
640 87e25be1 Thomas Thrainer
    """Migrate an instance.
641 87e25be1 Thomas Thrainer

642 87e25be1 Thomas Thrainer
    The migrate is done by:
643 87e25be1 Thomas Thrainer
      - change the disks into dual-master mode
644 87e25be1 Thomas Thrainer
      - wait until disks are fully synchronized again
645 87e25be1 Thomas Thrainer
      - migrate the instance
646 87e25be1 Thomas Thrainer
      - change disks on the new secondary node (the old primary) to secondary
647 87e25be1 Thomas Thrainer
      - wait until disks are fully synchronized
648 87e25be1 Thomas Thrainer
      - change disks into single-master mode
649 87e25be1 Thomas Thrainer

650 87e25be1 Thomas Thrainer
    """
651 87e25be1 Thomas Thrainer
    instance = self.instance
652 87e25be1 Thomas Thrainer
    target_node = self.target_node
653 87e25be1 Thomas Thrainer
    source_node = self.source_node
654 87e25be1 Thomas Thrainer
655 87e25be1 Thomas Thrainer
    # Check for hypervisor version mismatch and warn the user.
656 a295eb80 Helga Velroyen
    hvspecs = [(instance.hypervisor,
657 a295eb80 Helga Velroyen
                self.cfg.GetClusterInfo().hvparams[instance.hypervisor])]
658 87e25be1 Thomas Thrainer
    nodeinfo = self.rpc.call_node_info([source_node, target_node],
659 a295eb80 Helga Velroyen
                                       None, hvspecs, False)
660 87e25be1 Thomas Thrainer
    for ninfo in nodeinfo.values():
661 87e25be1 Thomas Thrainer
      ninfo.Raise("Unable to retrieve node information from node '%s'" %
662 87e25be1 Thomas Thrainer
                  ninfo.node)
663 87e25be1 Thomas Thrainer
    (_, _, (src_info, )) = nodeinfo[source_node].payload
664 87e25be1 Thomas Thrainer
    (_, _, (dst_info, )) = nodeinfo[target_node].payload
665 87e25be1 Thomas Thrainer
666 87e25be1 Thomas Thrainer
    if ((constants.HV_NODEINFO_KEY_VERSION in src_info) and
667 87e25be1 Thomas Thrainer
        (constants.HV_NODEINFO_KEY_VERSION in dst_info)):
668 87e25be1 Thomas Thrainer
      src_version = src_info[constants.HV_NODEINFO_KEY_VERSION]
669 87e25be1 Thomas Thrainer
      dst_version = dst_info[constants.HV_NODEINFO_KEY_VERSION]
670 87e25be1 Thomas Thrainer
      if src_version != dst_version:
671 87e25be1 Thomas Thrainer
        self.feedback_fn("* warning: hypervisor version mismatch between"
672 87e25be1 Thomas Thrainer
                         " source (%s) and target (%s) node" %
673 87e25be1 Thomas Thrainer
                         (src_version, dst_version))
674 87e25be1 Thomas Thrainer
675 87e25be1 Thomas Thrainer
    self.feedback_fn("* checking disk consistency between source and target")
676 87e25be1 Thomas Thrainer
    for (idx, dev) in enumerate(instance.disks):
677 5eacbcae Thomas Thrainer
      if not CheckDiskConsistency(self.lu, instance, dev, target_node, False):
678 87e25be1 Thomas Thrainer
        raise errors.OpExecError("Disk %s is degraded or not fully"
679 87e25be1 Thomas Thrainer
                                 " synchronized on target node,"
680 87e25be1 Thomas Thrainer
                                 " aborting migration" % idx)
681 87e25be1 Thomas Thrainer
682 87e25be1 Thomas Thrainer
    if self.current_mem > self.tgt_free_mem:
683 87e25be1 Thomas Thrainer
      if not self.allow_runtime_changes:
684 87e25be1 Thomas Thrainer
        raise errors.OpExecError("Memory ballooning not allowed and not enough"
685 87e25be1 Thomas Thrainer
                                 " free memory to fit instance %s on target"
686 87e25be1 Thomas Thrainer
                                 " node %s (have %dMB, need %dMB)" %
687 87e25be1 Thomas Thrainer
                                 (instance.name, target_node,
688 87e25be1 Thomas Thrainer
                                  self.tgt_free_mem, self.current_mem))
689 87e25be1 Thomas Thrainer
      self.feedback_fn("* setting instance memory to %s" % self.tgt_free_mem)
690 87e25be1 Thomas Thrainer
      rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
691 87e25be1 Thomas Thrainer
                                                     instance,
692 87e25be1 Thomas Thrainer
                                                     self.tgt_free_mem)
693 87e25be1 Thomas Thrainer
      rpcres.Raise("Cannot modify instance runtime memory")
694 87e25be1 Thomas Thrainer
695 87e25be1 Thomas Thrainer
    # First get the migration information from the remote node
696 87e25be1 Thomas Thrainer
    result = self.rpc.call_migration_info(source_node, instance)
697 87e25be1 Thomas Thrainer
    msg = result.fail_msg
698 87e25be1 Thomas Thrainer
    if msg:
699 87e25be1 Thomas Thrainer
      log_err = ("Failed fetching source migration information from %s: %s" %
700 87e25be1 Thomas Thrainer
                 (source_node, msg))
701 87e25be1 Thomas Thrainer
      logging.error(log_err)
702 87e25be1 Thomas Thrainer
      raise errors.OpExecError(log_err)
703 87e25be1 Thomas Thrainer
704 87e25be1 Thomas Thrainer
    self.migration_info = migration_info = result.payload
705 87e25be1 Thomas Thrainer
706 87e25be1 Thomas Thrainer
    if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
707 87e25be1 Thomas Thrainer
      # Then switch the disks to master/master mode
708 87e25be1 Thomas Thrainer
      self._EnsureSecondary(target_node)
709 87e25be1 Thomas Thrainer
      self._GoStandalone()
710 87e25be1 Thomas Thrainer
      self._GoReconnect(True)
711 87e25be1 Thomas Thrainer
      self._WaitUntilSync()
712 87e25be1 Thomas Thrainer
713 87e25be1 Thomas Thrainer
    self.feedback_fn("* preparing %s to accept the instance" % target_node)
714 87e25be1 Thomas Thrainer
    result = self.rpc.call_accept_instance(target_node,
715 87e25be1 Thomas Thrainer
                                           instance,
716 87e25be1 Thomas Thrainer
                                           migration_info,
717 87e25be1 Thomas Thrainer
                                           self.nodes_ip[target_node])
718 87e25be1 Thomas Thrainer
719 87e25be1 Thomas Thrainer
    msg = result.fail_msg
720 87e25be1 Thomas Thrainer
    if msg:
721 87e25be1 Thomas Thrainer
      logging.error("Instance pre-migration failed, trying to revert"
722 87e25be1 Thomas Thrainer
                    " disk status: %s", msg)
723 87e25be1 Thomas Thrainer
      self.feedback_fn("Pre-migration failed, aborting")
724 87e25be1 Thomas Thrainer
      self._AbortMigration()
725 87e25be1 Thomas Thrainer
      self._RevertDiskStatus()
726 87e25be1 Thomas Thrainer
      raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
727 87e25be1 Thomas Thrainer
                               (instance.name, msg))
728 87e25be1 Thomas Thrainer
729 87e25be1 Thomas Thrainer
    self.feedback_fn("* migrating instance to %s" % target_node)
730 bc0a2284 Helga Velroyen
    cluster = self.cfg.GetClusterInfo()
731 bc0a2284 Helga Velroyen
    result = self.rpc.call_instance_migrate(
732 bc0a2284 Helga Velroyen
        source_node, cluster.cluster_name, instance, self.nodes_ip[target_node],
733 bc0a2284 Helga Velroyen
        self.live)
734 87e25be1 Thomas Thrainer
    msg = result.fail_msg
735 87e25be1 Thomas Thrainer
    if msg:
736 87e25be1 Thomas Thrainer
      logging.error("Instance migration failed, trying to revert"
737 87e25be1 Thomas Thrainer
                    " disk status: %s", msg)
738 87e25be1 Thomas Thrainer
      self.feedback_fn("Migration failed, aborting")
739 87e25be1 Thomas Thrainer
      self._AbortMigration()
740 87e25be1 Thomas Thrainer
      self._RevertDiskStatus()
741 87e25be1 Thomas Thrainer
      raise errors.OpExecError("Could not migrate instance %s: %s" %
742 87e25be1 Thomas Thrainer
                               (instance.name, msg))
743 87e25be1 Thomas Thrainer
744 87e25be1 Thomas Thrainer
    self.feedback_fn("* starting memory transfer")
745 87e25be1 Thomas Thrainer
    last_feedback = time.time()
746 87e25be1 Thomas Thrainer
    while True:
747 87e25be1 Thomas Thrainer
      result = self.rpc.call_instance_get_migration_status(source_node,
748 87e25be1 Thomas Thrainer
                                                           instance)
749 87e25be1 Thomas Thrainer
      msg = result.fail_msg
750 87e25be1 Thomas Thrainer
      ms = result.payload   # MigrationStatus instance
751 87e25be1 Thomas Thrainer
      if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
752 87e25be1 Thomas Thrainer
        logging.error("Instance migration failed, trying to revert"
753 87e25be1 Thomas Thrainer
                      " disk status: %s", msg)
754 87e25be1 Thomas Thrainer
        self.feedback_fn("Migration failed, aborting")
755 87e25be1 Thomas Thrainer
        self._AbortMigration()
756 87e25be1 Thomas Thrainer
        self._RevertDiskStatus()
757 87e25be1 Thomas Thrainer
        if not msg:
758 87e25be1 Thomas Thrainer
          msg = "hypervisor returned failure"
759 87e25be1 Thomas Thrainer
        raise errors.OpExecError("Could not migrate instance %s: %s" %
760 87e25be1 Thomas Thrainer
                                 (instance.name, msg))
761 87e25be1 Thomas Thrainer
762 87e25be1 Thomas Thrainer
      if result.payload.status != constants.HV_MIGRATION_ACTIVE:
763 87e25be1 Thomas Thrainer
        self.feedback_fn("* memory transfer complete")
764 87e25be1 Thomas Thrainer
        break
765 87e25be1 Thomas Thrainer
766 87e25be1 Thomas Thrainer
      if (utils.TimeoutExpired(last_feedback,
767 87e25be1 Thomas Thrainer
                               self._MIGRATION_FEEDBACK_INTERVAL) and
768 87e25be1 Thomas Thrainer
          ms.transferred_ram is not None):
769 87e25be1 Thomas Thrainer
        mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
770 87e25be1 Thomas Thrainer
        self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
771 87e25be1 Thomas Thrainer
        last_feedback = time.time()
772 87e25be1 Thomas Thrainer
773 87e25be1 Thomas Thrainer
      time.sleep(self._MIGRATION_POLL_INTERVAL)
774 87e25be1 Thomas Thrainer
775 87e25be1 Thomas Thrainer
    result = self.rpc.call_instance_finalize_migration_src(source_node,
776 87e25be1 Thomas Thrainer
                                                           instance,
777 87e25be1 Thomas Thrainer
                                                           True,
778 87e25be1 Thomas Thrainer
                                                           self.live)
779 87e25be1 Thomas Thrainer
    msg = result.fail_msg
780 87e25be1 Thomas Thrainer
    if msg:
781 87e25be1 Thomas Thrainer
      logging.error("Instance migration succeeded, but finalization failed"
782 87e25be1 Thomas Thrainer
                    " on the source node: %s", msg)
783 87e25be1 Thomas Thrainer
      raise errors.OpExecError("Could not finalize instance migration: %s" %
784 87e25be1 Thomas Thrainer
                               msg)
785 87e25be1 Thomas Thrainer
786 87e25be1 Thomas Thrainer
    instance.primary_node = target_node
787 87e25be1 Thomas Thrainer
788 87e25be1 Thomas Thrainer
    # distribute new instance config to the other nodes
789 87e25be1 Thomas Thrainer
    self.cfg.Update(instance, self.feedback_fn)
790 87e25be1 Thomas Thrainer
791 87e25be1 Thomas Thrainer
    result = self.rpc.call_instance_finalize_migration_dst(target_node,
792 87e25be1 Thomas Thrainer
                                                           instance,
793 87e25be1 Thomas Thrainer
                                                           migration_info,
794 87e25be1 Thomas Thrainer
                                                           True)
795 87e25be1 Thomas Thrainer
    msg = result.fail_msg
796 87e25be1 Thomas Thrainer
    if msg:
797 87e25be1 Thomas Thrainer
      logging.error("Instance migration succeeded, but finalization failed"
798 87e25be1 Thomas Thrainer
                    " on the target node: %s", msg)
799 87e25be1 Thomas Thrainer
      raise errors.OpExecError("Could not finalize instance migration: %s" %
800 87e25be1 Thomas Thrainer
                               msg)
801 87e25be1 Thomas Thrainer
802 87e25be1 Thomas Thrainer
    if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
803 87e25be1 Thomas Thrainer
      self._EnsureSecondary(source_node)
804 87e25be1 Thomas Thrainer
      self._WaitUntilSync()
805 87e25be1 Thomas Thrainer
      self._GoStandalone()
806 87e25be1 Thomas Thrainer
      self._GoReconnect(False)
807 87e25be1 Thomas Thrainer
      self._WaitUntilSync()
808 87e25be1 Thomas Thrainer
809 87e25be1 Thomas Thrainer
    # If the instance's disk template is `rbd' or `ext' and there was a
810 87e25be1 Thomas Thrainer
    # successful migration, unmap the device from the source node.
811 87e25be1 Thomas Thrainer
    if self.instance.disk_template in (constants.DT_RBD, constants.DT_EXT):
812 5eacbcae Thomas Thrainer
      disks = ExpandCheckDisks(instance, instance.disks)
813 87e25be1 Thomas Thrainer
      self.feedback_fn("* unmapping instance's disks from %s" % source_node)
814 87e25be1 Thomas Thrainer
      for disk in disks:
815 87e25be1 Thomas Thrainer
        result = self.rpc.call_blockdev_shutdown(source_node, (disk, instance))
816 87e25be1 Thomas Thrainer
        msg = result.fail_msg
817 87e25be1 Thomas Thrainer
        if msg:
818 87e25be1 Thomas Thrainer
          logging.error("Migration was successful, but couldn't unmap the"
819 87e25be1 Thomas Thrainer
                        " block device %s on source node %s: %s",
820 87e25be1 Thomas Thrainer
                        disk.iv_name, source_node, msg)
821 87e25be1 Thomas Thrainer
          logging.error("You need to unmap the device %s manually on %s",
822 87e25be1 Thomas Thrainer
                        disk.iv_name, source_node)
823 87e25be1 Thomas Thrainer
824 87e25be1 Thomas Thrainer
    self.feedback_fn("* done")
825 87e25be1 Thomas Thrainer
826 87e25be1 Thomas Thrainer
  def _ExecFailover(self):
827 87e25be1 Thomas Thrainer
    """Failover an instance.
828 87e25be1 Thomas Thrainer

829 87e25be1 Thomas Thrainer
    The failover is done by shutting it down on its present node and
830 87e25be1 Thomas Thrainer
    starting it on the secondary.
831 87e25be1 Thomas Thrainer

832 87e25be1 Thomas Thrainer
    """
833 87e25be1 Thomas Thrainer
    instance = self.instance
834 87e25be1 Thomas Thrainer
    primary_node = self.cfg.GetNodeInfo(instance.primary_node)
835 87e25be1 Thomas Thrainer
836 87e25be1 Thomas Thrainer
    source_node = instance.primary_node
837 87e25be1 Thomas Thrainer
    target_node = self.target_node
838 87e25be1 Thomas Thrainer
839 1d4a4b26 Thomas Thrainer
    if instance.disks_active:
840 87e25be1 Thomas Thrainer
      self.feedback_fn("* checking disk consistency between source and target")
841 87e25be1 Thomas Thrainer
      for (idx, dev) in enumerate(instance.disks):
842 87e25be1 Thomas Thrainer
        # for drbd, these are drbd over lvm
843 5eacbcae Thomas Thrainer
        if not CheckDiskConsistency(self.lu, instance, dev, target_node,
844 5eacbcae Thomas Thrainer
                                    False):
845 87e25be1 Thomas Thrainer
          if primary_node.offline:
846 87e25be1 Thomas Thrainer
            self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
847 87e25be1 Thomas Thrainer
                             " target node %s" %
848 87e25be1 Thomas Thrainer
                             (primary_node.name, idx, target_node))
849 87e25be1 Thomas Thrainer
          elif not self.ignore_consistency:
850 87e25be1 Thomas Thrainer
            raise errors.OpExecError("Disk %s is degraded on target node,"
851 87e25be1 Thomas Thrainer
                                     " aborting failover" % idx)
852 87e25be1 Thomas Thrainer
    else:
853 87e25be1 Thomas Thrainer
      self.feedback_fn("* not checking disk consistency as instance is not"
854 87e25be1 Thomas Thrainer
                       " running")
855 87e25be1 Thomas Thrainer
856 87e25be1 Thomas Thrainer
    self.feedback_fn("* shutting down instance on source node")
857 87e25be1 Thomas Thrainer
    logging.info("Shutting down instance %s on node %s",
858 87e25be1 Thomas Thrainer
                 instance.name, source_node)
859 87e25be1 Thomas Thrainer
860 87e25be1 Thomas Thrainer
    result = self.rpc.call_instance_shutdown(source_node, instance,
861 87e25be1 Thomas Thrainer
                                             self.shutdown_timeout,
862 87e25be1 Thomas Thrainer
                                             self.lu.op.reason)
863 87e25be1 Thomas Thrainer
    msg = result.fail_msg
864 87e25be1 Thomas Thrainer
    if msg:
865 87e25be1 Thomas Thrainer
      if self.ignore_consistency or primary_node.offline:
866 87e25be1 Thomas Thrainer
        self.lu.LogWarning("Could not shutdown instance %s on node %s,"
867 87e25be1 Thomas Thrainer
                           " proceeding anyway; please make sure node"
868 87e25be1 Thomas Thrainer
                           " %s is down; error details: %s",
869 87e25be1 Thomas Thrainer
                           instance.name, source_node, source_node, msg)
870 87e25be1 Thomas Thrainer
      else:
871 87e25be1 Thomas Thrainer
        raise errors.OpExecError("Could not shutdown instance %s on"
872 87e25be1 Thomas Thrainer
                                 " node %s: %s" %
873 87e25be1 Thomas Thrainer
                                 (instance.name, source_node, msg))
874 87e25be1 Thomas Thrainer
875 87e25be1 Thomas Thrainer
    self.feedback_fn("* deactivating the instance's disks on source node")
876 5eacbcae Thomas Thrainer
    if not ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
877 87e25be1 Thomas Thrainer
      raise errors.OpExecError("Can't shut down the instance's disks")
878 87e25be1 Thomas Thrainer
879 87e25be1 Thomas Thrainer
    instance.primary_node = target_node
880 87e25be1 Thomas Thrainer
    # distribute new instance config to the other nodes
881 87e25be1 Thomas Thrainer
    self.cfg.Update(instance, self.feedback_fn)
882 87e25be1 Thomas Thrainer
883 87e25be1 Thomas Thrainer
    # Only start the instance if it's marked as up
884 87e25be1 Thomas Thrainer
    if instance.admin_state == constants.ADMINST_UP:
885 87e25be1 Thomas Thrainer
      self.feedback_fn("* activating the instance's disks on target node %s" %
886 87e25be1 Thomas Thrainer
                       target_node)
887 87e25be1 Thomas Thrainer
      logging.info("Starting instance %s on node %s",
888 87e25be1 Thomas Thrainer
                   instance.name, target_node)
889 87e25be1 Thomas Thrainer
890 5eacbcae Thomas Thrainer
      disks_ok, _ = AssembleInstanceDisks(self.lu, instance,
891 5eacbcae Thomas Thrainer
                                          ignore_secondaries=True)
892 87e25be1 Thomas Thrainer
      if not disks_ok:
893 5eacbcae Thomas Thrainer
        ShutdownInstanceDisks(self.lu, instance)
894 87e25be1 Thomas Thrainer
        raise errors.OpExecError("Can't activate the instance's disks")
895 87e25be1 Thomas Thrainer
896 87e25be1 Thomas Thrainer
      self.feedback_fn("* starting the instance on the target node %s" %
897 87e25be1 Thomas Thrainer
                       target_node)
898 87e25be1 Thomas Thrainer
      result = self.rpc.call_instance_start(target_node, (instance, None, None),
899 87e25be1 Thomas Thrainer
                                            False, self.lu.op.reason)
900 87e25be1 Thomas Thrainer
      msg = result.fail_msg
901 87e25be1 Thomas Thrainer
      if msg:
902 5eacbcae Thomas Thrainer
        ShutdownInstanceDisks(self.lu, instance)
903 87e25be1 Thomas Thrainer
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
904 87e25be1 Thomas Thrainer
                                 (instance.name, target_node, msg))
905 87e25be1 Thomas Thrainer
906 87e25be1 Thomas Thrainer
  def Exec(self, feedback_fn):
907 87e25be1 Thomas Thrainer
    """Perform the migration.
908 87e25be1 Thomas Thrainer

909 87e25be1 Thomas Thrainer
    """
910 87e25be1 Thomas Thrainer
    self.feedback_fn = feedback_fn
911 87e25be1 Thomas Thrainer
    self.source_node = self.instance.primary_node
912 87e25be1 Thomas Thrainer
913 87e25be1 Thomas Thrainer
    # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
914 87e25be1 Thomas Thrainer
    if self.instance.disk_template in constants.DTS_INT_MIRROR:
915 87e25be1 Thomas Thrainer
      self.target_node = self.instance.secondary_nodes[0]
916 87e25be1 Thomas Thrainer
      # Otherwise self.target_node has been populated either
917 87e25be1 Thomas Thrainer
      # directly, or through an iallocator.
918 87e25be1 Thomas Thrainer
919 87e25be1 Thomas Thrainer
    self.all_nodes = [self.source_node, self.target_node]
920 87e25be1 Thomas Thrainer
    self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
921 87e25be1 Thomas Thrainer
                         in self.cfg.GetMultiNodeInfo(self.all_nodes))
922 87e25be1 Thomas Thrainer
923 87e25be1 Thomas Thrainer
    if self.failover:
924 87e25be1 Thomas Thrainer
      feedback_fn("Failover instance %s" % self.instance.name)
925 87e25be1 Thomas Thrainer
      self._ExecFailover()
926 87e25be1 Thomas Thrainer
    else:
927 87e25be1 Thomas Thrainer
      feedback_fn("Migrating instance %s" % self.instance.name)
928 87e25be1 Thomas Thrainer
929 87e25be1 Thomas Thrainer
      if self.cleanup:
930 87e25be1 Thomas Thrainer
        return self._ExecCleanup()
931 87e25be1 Thomas Thrainer
      else:
932 87e25be1 Thomas Thrainer
        return self._ExecMigration()