Statistics
| Branch: | Tag: | Revision:

root / lib / cmdlib / instance_migration.py @ 3ad780e4

History | View | Annotate | Download (36.1 kB)

1 87e25be1 Thomas Thrainer
#
2 87e25be1 Thomas Thrainer
#
3 87e25be1 Thomas Thrainer
4 87e25be1 Thomas Thrainer
# Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 Google Inc.
5 87e25be1 Thomas Thrainer
#
6 87e25be1 Thomas Thrainer
# This program is free software; you can redistribute it and/or modify
7 87e25be1 Thomas Thrainer
# it under the terms of the GNU General Public License as published by
8 87e25be1 Thomas Thrainer
# the Free Software Foundation; either version 2 of the License, or
9 87e25be1 Thomas Thrainer
# (at your option) any later version.
10 87e25be1 Thomas Thrainer
#
11 87e25be1 Thomas Thrainer
# This program is distributed in the hope that it will be useful, but
12 87e25be1 Thomas Thrainer
# WITHOUT ANY WARRANTY; without even the implied warranty of
13 87e25be1 Thomas Thrainer
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14 87e25be1 Thomas Thrainer
# General Public License for more details.
15 87e25be1 Thomas Thrainer
#
16 87e25be1 Thomas Thrainer
# You should have received a copy of the GNU General Public License
17 87e25be1 Thomas Thrainer
# along with this program; if not, write to the Free Software
18 87e25be1 Thomas Thrainer
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19 87e25be1 Thomas Thrainer
# 02110-1301, USA.
20 87e25be1 Thomas Thrainer
21 87e25be1 Thomas Thrainer
22 87e25be1 Thomas Thrainer
"""Logical units dealing with instance migration an failover."""
23 87e25be1 Thomas Thrainer
24 87e25be1 Thomas Thrainer
import logging
25 87e25be1 Thomas Thrainer
import time
26 87e25be1 Thomas Thrainer
27 87e25be1 Thomas Thrainer
from ganeti import constants
28 87e25be1 Thomas Thrainer
from ganeti import errors
29 87e25be1 Thomas Thrainer
from ganeti import locking
30 87e25be1 Thomas Thrainer
from ganeti.masterd import iallocator
31 87e25be1 Thomas Thrainer
from ganeti import utils
32 87e25be1 Thomas Thrainer
from ganeti.cmdlib.base import LogicalUnit, Tasklet
33 5eacbcae Thomas Thrainer
from ganeti.cmdlib.common import ExpandInstanceName, \
34 5eacbcae Thomas Thrainer
  CheckIAllocatorOrNode, ExpandNodeName
35 5eacbcae Thomas Thrainer
from ganeti.cmdlib.instance_storage import CheckDiskConsistency, \
36 5eacbcae Thomas Thrainer
  ExpandCheckDisks, ShutdownInstanceDisks, AssembleInstanceDisks
37 5eacbcae Thomas Thrainer
from ganeti.cmdlib.instance_utils import BuildInstanceHookEnvByObject, \
38 5eacbcae Thomas Thrainer
  CheckTargetNodeIPolicy, ReleaseLocks, CheckNodeNotDrained, \
39 5eacbcae Thomas Thrainer
  CopyLockList, CheckNodeFreeMemory, CheckInstanceBridgesExist
40 87e25be1 Thomas Thrainer
41 87e25be1 Thomas Thrainer
import ganeti.masterd.instance
42 87e25be1 Thomas Thrainer
43 87e25be1 Thomas Thrainer
44 87e25be1 Thomas Thrainer
def _ExpandNamesForMigration(lu):
45 87e25be1 Thomas Thrainer
  """Expands names for use with L{TLMigrateInstance}.
46 87e25be1 Thomas Thrainer

47 87e25be1 Thomas Thrainer
  @type lu: L{LogicalUnit}
48 87e25be1 Thomas Thrainer

49 87e25be1 Thomas Thrainer
  """
50 87e25be1 Thomas Thrainer
  if lu.op.target_node is not None:
51 5eacbcae Thomas Thrainer
    lu.op.target_node = ExpandNodeName(lu.cfg, lu.op.target_node)
52 87e25be1 Thomas Thrainer
53 87e25be1 Thomas Thrainer
  lu.needed_locks[locking.LEVEL_NODE] = []
54 87e25be1 Thomas Thrainer
  lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
55 87e25be1 Thomas Thrainer
56 87e25be1 Thomas Thrainer
  lu.needed_locks[locking.LEVEL_NODE_RES] = []
57 87e25be1 Thomas Thrainer
  lu.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
58 87e25be1 Thomas Thrainer
59 87e25be1 Thomas Thrainer
  # The node allocation lock is actually only needed for externally replicated
60 87e25be1 Thomas Thrainer
  # instances (e.g. sharedfile or RBD) and if an iallocator is used.
61 87e25be1 Thomas Thrainer
  lu.needed_locks[locking.LEVEL_NODE_ALLOC] = []
62 87e25be1 Thomas Thrainer
63 87e25be1 Thomas Thrainer
64 87e25be1 Thomas Thrainer
def _DeclareLocksForMigration(lu, level):
65 87e25be1 Thomas Thrainer
  """Declares locks for L{TLMigrateInstance}.
66 87e25be1 Thomas Thrainer

67 87e25be1 Thomas Thrainer
  @type lu: L{LogicalUnit}
68 87e25be1 Thomas Thrainer
  @param level: Lock level
69 87e25be1 Thomas Thrainer

70 87e25be1 Thomas Thrainer
  """
71 87e25be1 Thomas Thrainer
  if level == locking.LEVEL_NODE_ALLOC:
72 87e25be1 Thomas Thrainer
    assert lu.op.instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
73 87e25be1 Thomas Thrainer
74 87e25be1 Thomas Thrainer
    instance = lu.cfg.GetInstanceInfo(lu.op.instance_name)
75 87e25be1 Thomas Thrainer
76 87e25be1 Thomas Thrainer
    # Node locks are already declared here rather than at LEVEL_NODE as we need
77 87e25be1 Thomas Thrainer
    # the instance object anyway to declare the node allocation lock.
78 87e25be1 Thomas Thrainer
    if instance.disk_template in constants.DTS_EXT_MIRROR:
79 87e25be1 Thomas Thrainer
      if lu.op.target_node is None:
80 87e25be1 Thomas Thrainer
        lu.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
81 87e25be1 Thomas Thrainer
        lu.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
82 87e25be1 Thomas Thrainer
      else:
83 87e25be1 Thomas Thrainer
        lu.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
84 87e25be1 Thomas Thrainer
                                               lu.op.target_node]
85 87e25be1 Thomas Thrainer
      del lu.recalculate_locks[locking.LEVEL_NODE]
86 87e25be1 Thomas Thrainer
    else:
87 87e25be1 Thomas Thrainer
      lu._LockInstancesNodes() # pylint: disable=W0212
88 87e25be1 Thomas Thrainer
89 87e25be1 Thomas Thrainer
  elif level == locking.LEVEL_NODE:
90 87e25be1 Thomas Thrainer
    # Node locks are declared together with the node allocation lock
91 87e25be1 Thomas Thrainer
    assert (lu.needed_locks[locking.LEVEL_NODE] or
92 87e25be1 Thomas Thrainer
            lu.needed_locks[locking.LEVEL_NODE] is locking.ALL_SET)
93 87e25be1 Thomas Thrainer
94 87e25be1 Thomas Thrainer
  elif level == locking.LEVEL_NODE_RES:
95 87e25be1 Thomas Thrainer
    # Copy node locks
96 87e25be1 Thomas Thrainer
    lu.needed_locks[locking.LEVEL_NODE_RES] = \
97 5eacbcae Thomas Thrainer
      CopyLockList(lu.needed_locks[locking.LEVEL_NODE])
98 87e25be1 Thomas Thrainer
99 87e25be1 Thomas Thrainer
100 87e25be1 Thomas Thrainer
class LUInstanceFailover(LogicalUnit):
101 87e25be1 Thomas Thrainer
  """Failover an instance.
102 87e25be1 Thomas Thrainer

103 87e25be1 Thomas Thrainer
  """
104 87e25be1 Thomas Thrainer
  HPATH = "instance-failover"
105 87e25be1 Thomas Thrainer
  HTYPE = constants.HTYPE_INSTANCE
106 87e25be1 Thomas Thrainer
  REQ_BGL = False
107 87e25be1 Thomas Thrainer
108 87e25be1 Thomas Thrainer
  def CheckArguments(self):
109 87e25be1 Thomas Thrainer
    """Check the arguments.
110 87e25be1 Thomas Thrainer

111 87e25be1 Thomas Thrainer
    """
112 87e25be1 Thomas Thrainer
    self.iallocator = getattr(self.op, "iallocator", None)
113 87e25be1 Thomas Thrainer
    self.target_node = getattr(self.op, "target_node", None)
114 87e25be1 Thomas Thrainer
115 87e25be1 Thomas Thrainer
  def ExpandNames(self):
116 87e25be1 Thomas Thrainer
    self._ExpandAndLockInstance()
117 87e25be1 Thomas Thrainer
    _ExpandNamesForMigration(self)
118 87e25be1 Thomas Thrainer
119 87e25be1 Thomas Thrainer
    self._migrater = \
120 aa7a5c90 Michele Tartara
      TLMigrateInstance(self, self.op.instance_name, self.op.cleanup, True,
121 aa7a5c90 Michele Tartara
                        False, self.op.ignore_consistency, True,
122 87e25be1 Thomas Thrainer
                        self.op.shutdown_timeout, self.op.ignore_ipolicy)
123 87e25be1 Thomas Thrainer
124 87e25be1 Thomas Thrainer
    self.tasklets = [self._migrater]
125 87e25be1 Thomas Thrainer
126 87e25be1 Thomas Thrainer
  def DeclareLocks(self, level):
127 87e25be1 Thomas Thrainer
    _DeclareLocksForMigration(self, level)
128 87e25be1 Thomas Thrainer
129 87e25be1 Thomas Thrainer
  def BuildHooksEnv(self):
130 87e25be1 Thomas Thrainer
    """Build hooks env.
131 87e25be1 Thomas Thrainer

132 87e25be1 Thomas Thrainer
    This runs on master, primary and secondary nodes of the instance.
133 87e25be1 Thomas Thrainer

134 87e25be1 Thomas Thrainer
    """
135 87e25be1 Thomas Thrainer
    instance = self._migrater.instance
136 87e25be1 Thomas Thrainer
    source_node = instance.primary_node
137 3ad780e4 Dimitris Aragiorgis
    target_node = self._migrater.target_node
138 87e25be1 Thomas Thrainer
    env = {
139 87e25be1 Thomas Thrainer
      "IGNORE_CONSISTENCY": self.op.ignore_consistency,
140 87e25be1 Thomas Thrainer
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
141 87e25be1 Thomas Thrainer
      "OLD_PRIMARY": source_node,
142 87e25be1 Thomas Thrainer
      "NEW_PRIMARY": target_node,
143 aa7a5c90 Michele Tartara
      "FAILOVER_CLEANUP": self.op.cleanup,
144 87e25be1 Thomas Thrainer
      }
145 87e25be1 Thomas Thrainer
146 87e25be1 Thomas Thrainer
    if instance.disk_template in constants.DTS_INT_MIRROR:
147 87e25be1 Thomas Thrainer
      env["OLD_SECONDARY"] = instance.secondary_nodes[0]
148 87e25be1 Thomas Thrainer
      env["NEW_SECONDARY"] = source_node
149 87e25be1 Thomas Thrainer
    else:
150 87e25be1 Thomas Thrainer
      env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
151 87e25be1 Thomas Thrainer
152 5eacbcae Thomas Thrainer
    env.update(BuildInstanceHookEnvByObject(self, instance))
153 87e25be1 Thomas Thrainer
154 87e25be1 Thomas Thrainer
    return env
155 87e25be1 Thomas Thrainer
156 87e25be1 Thomas Thrainer
  def BuildHooksNodes(self):
157 87e25be1 Thomas Thrainer
    """Build hooks nodes.
158 87e25be1 Thomas Thrainer

159 87e25be1 Thomas Thrainer
    """
160 87e25be1 Thomas Thrainer
    instance = self._migrater.instance
161 87e25be1 Thomas Thrainer
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
162 3ad780e4 Dimitris Aragiorgis
    nl.append(self._migrater.target_node)
163 87e25be1 Thomas Thrainer
    return (nl, nl + [instance.primary_node])
164 87e25be1 Thomas Thrainer
165 87e25be1 Thomas Thrainer
166 87e25be1 Thomas Thrainer
class LUInstanceMigrate(LogicalUnit):
167 87e25be1 Thomas Thrainer
  """Migrate an instance.
168 87e25be1 Thomas Thrainer

169 87e25be1 Thomas Thrainer
  This is migration without shutting down, compared to the failover,
170 87e25be1 Thomas Thrainer
  which is done with shutdown.
171 87e25be1 Thomas Thrainer

172 87e25be1 Thomas Thrainer
  """
173 87e25be1 Thomas Thrainer
  HPATH = "instance-migrate"
174 87e25be1 Thomas Thrainer
  HTYPE = constants.HTYPE_INSTANCE
175 87e25be1 Thomas Thrainer
  REQ_BGL = False
176 87e25be1 Thomas Thrainer
177 87e25be1 Thomas Thrainer
  def ExpandNames(self):
178 87e25be1 Thomas Thrainer
    self._ExpandAndLockInstance()
179 87e25be1 Thomas Thrainer
    _ExpandNamesForMigration(self)
180 87e25be1 Thomas Thrainer
181 87e25be1 Thomas Thrainer
    self._migrater = \
182 87e25be1 Thomas Thrainer
      TLMigrateInstance(self, self.op.instance_name, self.op.cleanup,
183 87e25be1 Thomas Thrainer
                        False, self.op.allow_failover, False,
184 87e25be1 Thomas Thrainer
                        self.op.allow_runtime_changes,
185 87e25be1 Thomas Thrainer
                        constants.DEFAULT_SHUTDOWN_TIMEOUT,
186 87e25be1 Thomas Thrainer
                        self.op.ignore_ipolicy)
187 87e25be1 Thomas Thrainer
188 87e25be1 Thomas Thrainer
    self.tasklets = [self._migrater]
189 87e25be1 Thomas Thrainer
190 87e25be1 Thomas Thrainer
  def DeclareLocks(self, level):
191 87e25be1 Thomas Thrainer
    _DeclareLocksForMigration(self, level)
192 87e25be1 Thomas Thrainer
193 87e25be1 Thomas Thrainer
  def BuildHooksEnv(self):
194 87e25be1 Thomas Thrainer
    """Build hooks env.
195 87e25be1 Thomas Thrainer

196 87e25be1 Thomas Thrainer
    This runs on master, primary and secondary nodes of the instance.
197 87e25be1 Thomas Thrainer

198 87e25be1 Thomas Thrainer
    """
199 87e25be1 Thomas Thrainer
    instance = self._migrater.instance
200 87e25be1 Thomas Thrainer
    source_node = instance.primary_node
201 3ad780e4 Dimitris Aragiorgis
    target_node = self._migrater.target_node
202 5eacbcae Thomas Thrainer
    env = BuildInstanceHookEnvByObject(self, instance)
203 87e25be1 Thomas Thrainer
    env.update({
204 87e25be1 Thomas Thrainer
      "MIGRATE_LIVE": self._migrater.live,
205 87e25be1 Thomas Thrainer
      "MIGRATE_CLEANUP": self.op.cleanup,
206 87e25be1 Thomas Thrainer
      "OLD_PRIMARY": source_node,
207 87e25be1 Thomas Thrainer
      "NEW_PRIMARY": target_node,
208 87e25be1 Thomas Thrainer
      "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
209 87e25be1 Thomas Thrainer
      })
210 87e25be1 Thomas Thrainer
211 87e25be1 Thomas Thrainer
    if instance.disk_template in constants.DTS_INT_MIRROR:
212 87e25be1 Thomas Thrainer
      env["OLD_SECONDARY"] = target_node
213 87e25be1 Thomas Thrainer
      env["NEW_SECONDARY"] = source_node
214 87e25be1 Thomas Thrainer
    else:
215 3ad780e4 Dimitris Aragiorgis
      env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
216 87e25be1 Thomas Thrainer
217 87e25be1 Thomas Thrainer
    return env
218 87e25be1 Thomas Thrainer
219 87e25be1 Thomas Thrainer
  def BuildHooksNodes(self):
220 87e25be1 Thomas Thrainer
    """Build hooks nodes.
221 87e25be1 Thomas Thrainer

222 87e25be1 Thomas Thrainer
    """
223 87e25be1 Thomas Thrainer
    instance = self._migrater.instance
224 87e25be1 Thomas Thrainer
    snodes = list(instance.secondary_nodes)
225 87e25be1 Thomas Thrainer
    nl = [self.cfg.GetMasterNode(), instance.primary_node] + snodes
226 3ad780e4 Dimitris Aragiorgis
    nl.append(self._migrater.target_node)
227 87e25be1 Thomas Thrainer
    return (nl, nl)
228 87e25be1 Thomas Thrainer
229 87e25be1 Thomas Thrainer
230 87e25be1 Thomas Thrainer
class TLMigrateInstance(Tasklet):
231 87e25be1 Thomas Thrainer
  """Tasklet class for instance migration.
232 87e25be1 Thomas Thrainer

233 87e25be1 Thomas Thrainer
  @type live: boolean
234 87e25be1 Thomas Thrainer
  @ivar live: whether the migration will be done live or non-live;
235 87e25be1 Thomas Thrainer
      this variable is initalized only after CheckPrereq has run
236 87e25be1 Thomas Thrainer
  @type cleanup: boolean
237 87e25be1 Thomas Thrainer
  @ivar cleanup: Wheater we cleanup from a failed migration
238 87e25be1 Thomas Thrainer
  @type iallocator: string
239 87e25be1 Thomas Thrainer
  @ivar iallocator: The iallocator used to determine target_node
240 87e25be1 Thomas Thrainer
  @type target_node: string
241 87e25be1 Thomas Thrainer
  @ivar target_node: If given, the target_node to reallocate the instance to
242 87e25be1 Thomas Thrainer
  @type failover: boolean
243 87e25be1 Thomas Thrainer
  @ivar failover: Whether operation results in failover or migration
244 87e25be1 Thomas Thrainer
  @type fallback: boolean
245 87e25be1 Thomas Thrainer
  @ivar fallback: Whether fallback to failover is allowed if migration not
246 87e25be1 Thomas Thrainer
                  possible
247 87e25be1 Thomas Thrainer
  @type ignore_consistency: boolean
248 87e25be1 Thomas Thrainer
  @ivar ignore_consistency: Wheter we should ignore consistency between source
249 87e25be1 Thomas Thrainer
                            and target node
250 87e25be1 Thomas Thrainer
  @type shutdown_timeout: int
251 87e25be1 Thomas Thrainer
  @ivar shutdown_timeout: In case of failover timeout of the shutdown
252 87e25be1 Thomas Thrainer
  @type ignore_ipolicy: bool
253 87e25be1 Thomas Thrainer
  @ivar ignore_ipolicy: If true, we can ignore instance policy when migrating
254 87e25be1 Thomas Thrainer

255 87e25be1 Thomas Thrainer
  """
256 87e25be1 Thomas Thrainer
257 87e25be1 Thomas Thrainer
  # Constants
258 87e25be1 Thomas Thrainer
  _MIGRATION_POLL_INTERVAL = 1      # seconds
259 87e25be1 Thomas Thrainer
  _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
260 87e25be1 Thomas Thrainer
261 87e25be1 Thomas Thrainer
  def __init__(self, lu, instance_name, cleanup, failover, fallback,
262 87e25be1 Thomas Thrainer
               ignore_consistency, allow_runtime_changes, shutdown_timeout,
263 87e25be1 Thomas Thrainer
               ignore_ipolicy):
264 87e25be1 Thomas Thrainer
    """Initializes this class.
265 87e25be1 Thomas Thrainer

266 87e25be1 Thomas Thrainer
    """
267 87e25be1 Thomas Thrainer
    Tasklet.__init__(self, lu)
268 87e25be1 Thomas Thrainer
269 87e25be1 Thomas Thrainer
    # Parameters
270 87e25be1 Thomas Thrainer
    self.instance_name = instance_name
271 87e25be1 Thomas Thrainer
    self.cleanup = cleanup
272 87e25be1 Thomas Thrainer
    self.live = False # will be overridden later
273 87e25be1 Thomas Thrainer
    self.failover = failover
274 87e25be1 Thomas Thrainer
    self.fallback = fallback
275 87e25be1 Thomas Thrainer
    self.ignore_consistency = ignore_consistency
276 87e25be1 Thomas Thrainer
    self.shutdown_timeout = shutdown_timeout
277 87e25be1 Thomas Thrainer
    self.ignore_ipolicy = ignore_ipolicy
278 87e25be1 Thomas Thrainer
    self.allow_runtime_changes = allow_runtime_changes
279 87e25be1 Thomas Thrainer
280 87e25be1 Thomas Thrainer
  def CheckPrereq(self):
281 87e25be1 Thomas Thrainer
    """Check prerequisites.
282 87e25be1 Thomas Thrainer

283 87e25be1 Thomas Thrainer
    This checks that the instance is in the cluster.
284 87e25be1 Thomas Thrainer

285 87e25be1 Thomas Thrainer
    """
286 5eacbcae Thomas Thrainer
    instance_name = ExpandInstanceName(self.lu.cfg, self.instance_name)
287 87e25be1 Thomas Thrainer
    instance = self.cfg.GetInstanceInfo(instance_name)
288 87e25be1 Thomas Thrainer
    assert instance is not None
289 87e25be1 Thomas Thrainer
    self.instance = instance
290 87e25be1 Thomas Thrainer
    cluster = self.cfg.GetClusterInfo()
291 87e25be1 Thomas Thrainer
292 87e25be1 Thomas Thrainer
    if (not self.cleanup and
293 87e25be1 Thomas Thrainer
        not instance.admin_state == constants.ADMINST_UP and
294 87e25be1 Thomas Thrainer
        not self.failover and self.fallback):
295 87e25be1 Thomas Thrainer
      self.lu.LogInfo("Instance is marked down or offline, fallback allowed,"
296 87e25be1 Thomas Thrainer
                      " switching to failover")
297 87e25be1 Thomas Thrainer
      self.failover = True
298 87e25be1 Thomas Thrainer
299 87e25be1 Thomas Thrainer
    if instance.disk_template not in constants.DTS_MIRRORED:
300 87e25be1 Thomas Thrainer
      if self.failover:
301 87e25be1 Thomas Thrainer
        text = "failovers"
302 87e25be1 Thomas Thrainer
      else:
303 87e25be1 Thomas Thrainer
        text = "migrations"
304 87e25be1 Thomas Thrainer
      raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
305 87e25be1 Thomas Thrainer
                                 " %s" % (instance.disk_template, text),
306 87e25be1 Thomas Thrainer
                                 errors.ECODE_STATE)
307 87e25be1 Thomas Thrainer
308 87e25be1 Thomas Thrainer
    if instance.disk_template in constants.DTS_EXT_MIRROR:
309 5eacbcae Thomas Thrainer
      CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
310 87e25be1 Thomas Thrainer
311 87e25be1 Thomas Thrainer
      if self.lu.op.iallocator:
312 87e25be1 Thomas Thrainer
        assert locking.NAL in self.lu.owned_locks(locking.LEVEL_NODE_ALLOC)
313 87e25be1 Thomas Thrainer
        self._RunAllocator()
314 87e25be1 Thomas Thrainer
      else:
315 87e25be1 Thomas Thrainer
        # We set set self.target_node as it is required by
316 87e25be1 Thomas Thrainer
        # BuildHooksEnv
317 87e25be1 Thomas Thrainer
        self.target_node = self.lu.op.target_node
318 87e25be1 Thomas Thrainer
319 87e25be1 Thomas Thrainer
      # Check that the target node is correct in terms of instance policy
320 87e25be1 Thomas Thrainer
      nodeinfo = self.cfg.GetNodeInfo(self.target_node)
321 87e25be1 Thomas Thrainer
      group_info = self.cfg.GetNodeGroup(nodeinfo.group)
322 87e25be1 Thomas Thrainer
      ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
323 87e25be1 Thomas Thrainer
                                                              group_info)
324 5eacbcae Thomas Thrainer
      CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo, self.cfg,
325 5eacbcae Thomas Thrainer
                             ignore=self.ignore_ipolicy)
326 87e25be1 Thomas Thrainer
327 87e25be1 Thomas Thrainer
      # self.target_node is already populated, either directly or by the
328 87e25be1 Thomas Thrainer
      # iallocator run
329 87e25be1 Thomas Thrainer
      target_node = self.target_node
330 87e25be1 Thomas Thrainer
      if self.target_node == instance.primary_node:
331 87e25be1 Thomas Thrainer
        raise errors.OpPrereqError("Cannot migrate instance %s"
332 87e25be1 Thomas Thrainer
                                   " to its primary (%s)" %
333 87e25be1 Thomas Thrainer
                                   (instance.name, instance.primary_node),
334 87e25be1 Thomas Thrainer
                                   errors.ECODE_STATE)
335 87e25be1 Thomas Thrainer
336 87e25be1 Thomas Thrainer
      if len(self.lu.tasklets) == 1:
337 87e25be1 Thomas Thrainer
        # It is safe to release locks only when we're the only tasklet
338 87e25be1 Thomas Thrainer
        # in the LU
339 5eacbcae Thomas Thrainer
        ReleaseLocks(self.lu, locking.LEVEL_NODE,
340 5eacbcae Thomas Thrainer
                     keep=[instance.primary_node, self.target_node])
341 5eacbcae Thomas Thrainer
        ReleaseLocks(self.lu, locking.LEVEL_NODE_ALLOC)
342 87e25be1 Thomas Thrainer
343 87e25be1 Thomas Thrainer
    else:
344 87e25be1 Thomas Thrainer
      assert not self.lu.glm.is_owned(locking.LEVEL_NODE_ALLOC)
345 87e25be1 Thomas Thrainer
346 87e25be1 Thomas Thrainer
      secondary_nodes = instance.secondary_nodes
347 87e25be1 Thomas Thrainer
      if not secondary_nodes:
348 87e25be1 Thomas Thrainer
        raise errors.ConfigurationError("No secondary node but using"
349 87e25be1 Thomas Thrainer
                                        " %s disk template" %
350 87e25be1 Thomas Thrainer
                                        instance.disk_template)
351 3ad780e4 Dimitris Aragiorgis
      self.target_node = target_node = secondary_nodes[0]
352 87e25be1 Thomas Thrainer
      if self.lu.op.iallocator or (self.lu.op.target_node and
353 87e25be1 Thomas Thrainer
                                   self.lu.op.target_node != target_node):
354 87e25be1 Thomas Thrainer
        if self.failover:
355 87e25be1 Thomas Thrainer
          text = "failed over"
356 87e25be1 Thomas Thrainer
        else:
357 87e25be1 Thomas Thrainer
          text = "migrated"
358 87e25be1 Thomas Thrainer
        raise errors.OpPrereqError("Instances with disk template %s cannot"
359 87e25be1 Thomas Thrainer
                                   " be %s to arbitrary nodes"
360 87e25be1 Thomas Thrainer
                                   " (neither an iallocator nor a target"
361 87e25be1 Thomas Thrainer
                                   " node can be passed)" %
362 87e25be1 Thomas Thrainer
                                   (instance.disk_template, text),
363 87e25be1 Thomas Thrainer
                                   errors.ECODE_INVAL)
364 87e25be1 Thomas Thrainer
      nodeinfo = self.cfg.GetNodeInfo(target_node)
365 87e25be1 Thomas Thrainer
      group_info = self.cfg.GetNodeGroup(nodeinfo.group)
366 87e25be1 Thomas Thrainer
      ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
367 87e25be1 Thomas Thrainer
                                                              group_info)
368 5eacbcae Thomas Thrainer
      CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo, self.cfg,
369 5eacbcae Thomas Thrainer
                             ignore=self.ignore_ipolicy)
370 87e25be1 Thomas Thrainer
371 87e25be1 Thomas Thrainer
    i_be = cluster.FillBE(instance)
372 87e25be1 Thomas Thrainer
373 87e25be1 Thomas Thrainer
    # check memory requirements on the secondary node
374 87e25be1 Thomas Thrainer
    if (not self.cleanup and
375 87e25be1 Thomas Thrainer
         (not self.failover or instance.admin_state == constants.ADMINST_UP)):
376 5eacbcae Thomas Thrainer
      self.tgt_free_mem = CheckNodeFreeMemory(self.lu, target_node,
377 5eacbcae Thomas Thrainer
                                              "migrating instance %s" %
378 5eacbcae Thomas Thrainer
                                              instance.name,
379 5eacbcae Thomas Thrainer
                                              i_be[constants.BE_MINMEM],
380 5eacbcae Thomas Thrainer
                                              instance.hypervisor)
381 87e25be1 Thomas Thrainer
    else:
382 87e25be1 Thomas Thrainer
      self.lu.LogInfo("Not checking memory on the secondary node as"
383 87e25be1 Thomas Thrainer
                      " instance will not be started")
384 87e25be1 Thomas Thrainer
385 87e25be1 Thomas Thrainer
    # check if failover must be forced instead of migration
386 87e25be1 Thomas Thrainer
    if (not self.cleanup and not self.failover and
387 87e25be1 Thomas Thrainer
        i_be[constants.BE_ALWAYS_FAILOVER]):
388 87e25be1 Thomas Thrainer
      self.lu.LogInfo("Instance configured to always failover; fallback"
389 87e25be1 Thomas Thrainer
                      " to failover")
390 87e25be1 Thomas Thrainer
      self.failover = True
391 87e25be1 Thomas Thrainer
392 87e25be1 Thomas Thrainer
    # check bridge existance
393 5eacbcae Thomas Thrainer
    CheckInstanceBridgesExist(self.lu, instance, node=target_node)
394 87e25be1 Thomas Thrainer
395 87e25be1 Thomas Thrainer
    if not self.cleanup:
396 5eacbcae Thomas Thrainer
      CheckNodeNotDrained(self.lu, target_node)
397 87e25be1 Thomas Thrainer
      if not self.failover:
398 87e25be1 Thomas Thrainer
        result = self.rpc.call_instance_migratable(instance.primary_node,
399 87e25be1 Thomas Thrainer
                                                   instance)
400 87e25be1 Thomas Thrainer
        if result.fail_msg and self.fallback:
401 87e25be1 Thomas Thrainer
          self.lu.LogInfo("Can't migrate, instance offline, fallback to"
402 87e25be1 Thomas Thrainer
                          " failover")
403 87e25be1 Thomas Thrainer
          self.failover = True
404 87e25be1 Thomas Thrainer
        else:
405 87e25be1 Thomas Thrainer
          result.Raise("Can't migrate, please use failover",
406 87e25be1 Thomas Thrainer
                       prereq=True, ecode=errors.ECODE_STATE)
407 87e25be1 Thomas Thrainer
408 87e25be1 Thomas Thrainer
    assert not (self.failover and self.cleanup)
409 87e25be1 Thomas Thrainer
410 87e25be1 Thomas Thrainer
    if not self.failover:
411 87e25be1 Thomas Thrainer
      if self.lu.op.live is not None and self.lu.op.mode is not None:
412 87e25be1 Thomas Thrainer
        raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
413 87e25be1 Thomas Thrainer
                                   " parameters are accepted",
414 87e25be1 Thomas Thrainer
                                   errors.ECODE_INVAL)
415 87e25be1 Thomas Thrainer
      if self.lu.op.live is not None:
416 87e25be1 Thomas Thrainer
        if self.lu.op.live:
417 87e25be1 Thomas Thrainer
          self.lu.op.mode = constants.HT_MIGRATION_LIVE
418 87e25be1 Thomas Thrainer
        else:
419 87e25be1 Thomas Thrainer
          self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
420 87e25be1 Thomas Thrainer
        # reset the 'live' parameter to None so that repeated
421 87e25be1 Thomas Thrainer
        # invocations of CheckPrereq do not raise an exception
422 87e25be1 Thomas Thrainer
        self.lu.op.live = None
423 87e25be1 Thomas Thrainer
      elif self.lu.op.mode is None:
424 87e25be1 Thomas Thrainer
        # read the default value from the hypervisor
425 87e25be1 Thomas Thrainer
        i_hv = cluster.FillHV(self.instance, skip_globals=False)
426 87e25be1 Thomas Thrainer
        self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
427 87e25be1 Thomas Thrainer
428 87e25be1 Thomas Thrainer
      self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
429 87e25be1 Thomas Thrainer
    else:
430 87e25be1 Thomas Thrainer
      # Failover is never live
431 87e25be1 Thomas Thrainer
      self.live = False
432 87e25be1 Thomas Thrainer
433 87e25be1 Thomas Thrainer
    if not (self.failover or self.cleanup):
434 87e25be1 Thomas Thrainer
      remote_info = self.rpc.call_instance_info(instance.primary_node,
435 87e25be1 Thomas Thrainer
                                                instance.name,
436 87e25be1 Thomas Thrainer
                                                instance.hypervisor)
437 87e25be1 Thomas Thrainer
      remote_info.Raise("Error checking instance on node %s" %
438 87e25be1 Thomas Thrainer
                        instance.primary_node)
439 87e25be1 Thomas Thrainer
      instance_running = bool(remote_info.payload)
440 87e25be1 Thomas Thrainer
      if instance_running:
441 87e25be1 Thomas Thrainer
        self.current_mem = int(remote_info.payload["memory"])
442 87e25be1 Thomas Thrainer
443 87e25be1 Thomas Thrainer
  def _RunAllocator(self):
444 87e25be1 Thomas Thrainer
    """Run the allocator based on input opcode.
445 87e25be1 Thomas Thrainer

446 87e25be1 Thomas Thrainer
    """
447 87e25be1 Thomas Thrainer
    assert locking.NAL in self.lu.owned_locks(locking.LEVEL_NODE_ALLOC)
448 87e25be1 Thomas Thrainer
449 87e25be1 Thomas Thrainer
    # FIXME: add a self.ignore_ipolicy option
450 87e25be1 Thomas Thrainer
    req = iallocator.IAReqRelocate(name=self.instance_name,
451 87e25be1 Thomas Thrainer
                                   relocate_from=[self.instance.primary_node])
452 87e25be1 Thomas Thrainer
    ial = iallocator.IAllocator(self.cfg, self.rpc, req)
453 87e25be1 Thomas Thrainer
454 87e25be1 Thomas Thrainer
    ial.Run(self.lu.op.iallocator)
455 87e25be1 Thomas Thrainer
456 87e25be1 Thomas Thrainer
    if not ial.success:
457 87e25be1 Thomas Thrainer
      raise errors.OpPrereqError("Can't compute nodes using"
458 87e25be1 Thomas Thrainer
                                 " iallocator '%s': %s" %
459 87e25be1 Thomas Thrainer
                                 (self.lu.op.iallocator, ial.info),
460 87e25be1 Thomas Thrainer
                                 errors.ECODE_NORES)
461 87e25be1 Thomas Thrainer
    self.target_node = ial.result[0]
462 87e25be1 Thomas Thrainer
    self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
463 87e25be1 Thomas Thrainer
                    self.instance_name, self.lu.op.iallocator,
464 87e25be1 Thomas Thrainer
                    utils.CommaJoin(ial.result))
465 87e25be1 Thomas Thrainer
466 87e25be1 Thomas Thrainer
  def _WaitUntilSync(self):
467 87e25be1 Thomas Thrainer
    """Poll with custom rpc for disk sync.
468 87e25be1 Thomas Thrainer

469 87e25be1 Thomas Thrainer
    This uses our own step-based rpc call.
470 87e25be1 Thomas Thrainer

471 87e25be1 Thomas Thrainer
    """
472 87e25be1 Thomas Thrainer
    self.feedback_fn("* wait until resync is done")
473 87e25be1 Thomas Thrainer
    all_done = False
474 87e25be1 Thomas Thrainer
    while not all_done:
475 87e25be1 Thomas Thrainer
      all_done = True
476 87e25be1 Thomas Thrainer
      result = self.rpc.call_drbd_wait_sync(self.all_nodes,
477 87e25be1 Thomas Thrainer
                                            self.nodes_ip,
478 87e25be1 Thomas Thrainer
                                            (self.instance.disks,
479 87e25be1 Thomas Thrainer
                                             self.instance))
480 87e25be1 Thomas Thrainer
      min_percent = 100
481 87e25be1 Thomas Thrainer
      for node, nres in result.items():
482 87e25be1 Thomas Thrainer
        nres.Raise("Cannot resync disks on node %s" % node)
483 87e25be1 Thomas Thrainer
        node_done, node_percent = nres.payload
484 87e25be1 Thomas Thrainer
        all_done = all_done and node_done
485 87e25be1 Thomas Thrainer
        if node_percent is not None:
486 87e25be1 Thomas Thrainer
          min_percent = min(min_percent, node_percent)
487 87e25be1 Thomas Thrainer
      if not all_done:
488 87e25be1 Thomas Thrainer
        if min_percent < 100:
489 87e25be1 Thomas Thrainer
          self.feedback_fn("   - progress: %.1f%%" % min_percent)
490 87e25be1 Thomas Thrainer
        time.sleep(2)
491 87e25be1 Thomas Thrainer
492 87e25be1 Thomas Thrainer
  def _EnsureSecondary(self, node):
493 87e25be1 Thomas Thrainer
    """Demote a node to secondary.
494 87e25be1 Thomas Thrainer

495 87e25be1 Thomas Thrainer
    """
496 87e25be1 Thomas Thrainer
    self.feedback_fn("* switching node %s to secondary mode" % node)
497 87e25be1 Thomas Thrainer
498 87e25be1 Thomas Thrainer
    for dev in self.instance.disks:
499 87e25be1 Thomas Thrainer
      self.cfg.SetDiskID(dev, node)
500 87e25be1 Thomas Thrainer
501 87e25be1 Thomas Thrainer
    result = self.rpc.call_blockdev_close(node, self.instance.name,
502 87e25be1 Thomas Thrainer
                                          self.instance.disks)
503 87e25be1 Thomas Thrainer
    result.Raise("Cannot change disk to secondary on node %s" % node)
504 87e25be1 Thomas Thrainer
505 87e25be1 Thomas Thrainer
  def _GoStandalone(self):
506 87e25be1 Thomas Thrainer
    """Disconnect from the network.
507 87e25be1 Thomas Thrainer

508 87e25be1 Thomas Thrainer
    """
509 87e25be1 Thomas Thrainer
    self.feedback_fn("* changing into standalone mode")
510 87e25be1 Thomas Thrainer
    result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
511 87e25be1 Thomas Thrainer
                                               self.instance.disks)
512 87e25be1 Thomas Thrainer
    for node, nres in result.items():
513 87e25be1 Thomas Thrainer
      nres.Raise("Cannot disconnect disks node %s" % node)
514 87e25be1 Thomas Thrainer
515 87e25be1 Thomas Thrainer
  def _GoReconnect(self, multimaster):
516 87e25be1 Thomas Thrainer
    """Reconnect to the network.
517 87e25be1 Thomas Thrainer

518 87e25be1 Thomas Thrainer
    """
519 87e25be1 Thomas Thrainer
    if multimaster:
520 87e25be1 Thomas Thrainer
      msg = "dual-master"
521 87e25be1 Thomas Thrainer
    else:
522 87e25be1 Thomas Thrainer
      msg = "single-master"
523 87e25be1 Thomas Thrainer
    self.feedback_fn("* changing disks into %s mode" % msg)
524 87e25be1 Thomas Thrainer
    result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
525 87e25be1 Thomas Thrainer
                                           (self.instance.disks, self.instance),
526 87e25be1 Thomas Thrainer
                                           self.instance.name, multimaster)
527 87e25be1 Thomas Thrainer
    for node, nres in result.items():
528 87e25be1 Thomas Thrainer
      nres.Raise("Cannot change disks config on node %s" % node)
529 87e25be1 Thomas Thrainer
530 87e25be1 Thomas Thrainer
  def _ExecCleanup(self):
531 87e25be1 Thomas Thrainer
    """Try to cleanup after a failed migration.
532 87e25be1 Thomas Thrainer

533 87e25be1 Thomas Thrainer
    The cleanup is done by:
534 87e25be1 Thomas Thrainer
      - check that the instance is running only on one node
535 87e25be1 Thomas Thrainer
        (and update the config if needed)
536 87e25be1 Thomas Thrainer
      - change disks on its secondary node to secondary
537 87e25be1 Thomas Thrainer
      - wait until disks are fully synchronized
538 87e25be1 Thomas Thrainer
      - disconnect from the network
539 87e25be1 Thomas Thrainer
      - change disks into single-master mode
540 87e25be1 Thomas Thrainer
      - wait again until disks are fully synchronized
541 87e25be1 Thomas Thrainer

542 87e25be1 Thomas Thrainer
    """
543 87e25be1 Thomas Thrainer
    instance = self.instance
544 87e25be1 Thomas Thrainer
    target_node = self.target_node
545 87e25be1 Thomas Thrainer
    source_node = self.source_node
546 87e25be1 Thomas Thrainer
547 87e25be1 Thomas Thrainer
    # check running on only one node
548 87e25be1 Thomas Thrainer
    self.feedback_fn("* checking where the instance actually runs"
549 87e25be1 Thomas Thrainer
                     " (if this hangs, the hypervisor might be in"
550 87e25be1 Thomas Thrainer
                     " a bad state)")
551 87e25be1 Thomas Thrainer
    ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
552 87e25be1 Thomas Thrainer
    for node, result in ins_l.items():
553 87e25be1 Thomas Thrainer
      result.Raise("Can't contact node %s" % node)
554 87e25be1 Thomas Thrainer
555 87e25be1 Thomas Thrainer
    runningon_source = instance.name in ins_l[source_node].payload
556 87e25be1 Thomas Thrainer
    runningon_target = instance.name in ins_l[target_node].payload
557 87e25be1 Thomas Thrainer
558 87e25be1 Thomas Thrainer
    if runningon_source and runningon_target:
559 87e25be1 Thomas Thrainer
      raise errors.OpExecError("Instance seems to be running on two nodes,"
560 87e25be1 Thomas Thrainer
                               " or the hypervisor is confused; you will have"
561 87e25be1 Thomas Thrainer
                               " to ensure manually that it runs only on one"
562 87e25be1 Thomas Thrainer
                               " and restart this operation")
563 87e25be1 Thomas Thrainer
564 87e25be1 Thomas Thrainer
    if not (runningon_source or runningon_target):
565 87e25be1 Thomas Thrainer
      raise errors.OpExecError("Instance does not seem to be running at all;"
566 87e25be1 Thomas Thrainer
                               " in this case it's safer to repair by"
567 87e25be1 Thomas Thrainer
                               " running 'gnt-instance stop' to ensure disk"
568 87e25be1 Thomas Thrainer
                               " shutdown, and then restarting it")
569 87e25be1 Thomas Thrainer
570 87e25be1 Thomas Thrainer
    if runningon_target:
571 87e25be1 Thomas Thrainer
      # the migration has actually succeeded, we need to update the config
572 87e25be1 Thomas Thrainer
      self.feedback_fn("* instance running on secondary node (%s),"
573 87e25be1 Thomas Thrainer
                       " updating config" % target_node)
574 87e25be1 Thomas Thrainer
      instance.primary_node = target_node
575 87e25be1 Thomas Thrainer
      self.cfg.Update(instance, self.feedback_fn)
576 87e25be1 Thomas Thrainer
      demoted_node = source_node
577 87e25be1 Thomas Thrainer
    else:
578 87e25be1 Thomas Thrainer
      self.feedback_fn("* instance confirmed to be running on its"
579 87e25be1 Thomas Thrainer
                       " primary node (%s)" % source_node)
580 87e25be1 Thomas Thrainer
      demoted_node = target_node
581 87e25be1 Thomas Thrainer
582 87e25be1 Thomas Thrainer
    if instance.disk_template in constants.DTS_INT_MIRROR:
583 87e25be1 Thomas Thrainer
      self._EnsureSecondary(demoted_node)
584 87e25be1 Thomas Thrainer
      try:
585 87e25be1 Thomas Thrainer
        self._WaitUntilSync()
586 87e25be1 Thomas Thrainer
      except errors.OpExecError:
587 87e25be1 Thomas Thrainer
        # we ignore here errors, since if the device is standalone, it
588 87e25be1 Thomas Thrainer
        # won't be able to sync
589 87e25be1 Thomas Thrainer
        pass
590 87e25be1 Thomas Thrainer
      self._GoStandalone()
591 87e25be1 Thomas Thrainer
      self._GoReconnect(False)
592 87e25be1 Thomas Thrainer
      self._WaitUntilSync()
593 87e25be1 Thomas Thrainer
594 87e25be1 Thomas Thrainer
    self.feedback_fn("* done")
595 87e25be1 Thomas Thrainer
596 87e25be1 Thomas Thrainer
  def _RevertDiskStatus(self):
597 87e25be1 Thomas Thrainer
    """Try to revert the disk status after a failed migration.
598 87e25be1 Thomas Thrainer

599 87e25be1 Thomas Thrainer
    """
600 87e25be1 Thomas Thrainer
    target_node = self.target_node
601 87e25be1 Thomas Thrainer
    if self.instance.disk_template in constants.DTS_EXT_MIRROR:
602 87e25be1 Thomas Thrainer
      return
603 87e25be1 Thomas Thrainer
604 87e25be1 Thomas Thrainer
    try:
605 87e25be1 Thomas Thrainer
      self._EnsureSecondary(target_node)
606 87e25be1 Thomas Thrainer
      self._GoStandalone()
607 87e25be1 Thomas Thrainer
      self._GoReconnect(False)
608 87e25be1 Thomas Thrainer
      self._WaitUntilSync()
609 87e25be1 Thomas Thrainer
    except errors.OpExecError, err:
610 87e25be1 Thomas Thrainer
      self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
611 87e25be1 Thomas Thrainer
                         " please try to recover the instance manually;"
612 87e25be1 Thomas Thrainer
                         " error '%s'" % str(err))
613 87e25be1 Thomas Thrainer
614 87e25be1 Thomas Thrainer
  def _AbortMigration(self):
615 87e25be1 Thomas Thrainer
    """Call the hypervisor code to abort a started migration.
616 87e25be1 Thomas Thrainer

617 87e25be1 Thomas Thrainer
    """
618 87e25be1 Thomas Thrainer
    instance = self.instance
619 87e25be1 Thomas Thrainer
    target_node = self.target_node
620 87e25be1 Thomas Thrainer
    source_node = self.source_node
621 87e25be1 Thomas Thrainer
    migration_info = self.migration_info
622 87e25be1 Thomas Thrainer
623 87e25be1 Thomas Thrainer
    abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
624 87e25be1 Thomas Thrainer
                                                                 instance,
625 87e25be1 Thomas Thrainer
                                                                 migration_info,
626 87e25be1 Thomas Thrainer
                                                                 False)
627 87e25be1 Thomas Thrainer
    abort_msg = abort_result.fail_msg
628 87e25be1 Thomas Thrainer
    if abort_msg:
629 87e25be1 Thomas Thrainer
      logging.error("Aborting migration failed on target node %s: %s",
630 87e25be1 Thomas Thrainer
                    target_node, abort_msg)
631 87e25be1 Thomas Thrainer
      # Don't raise an exception here, as we stil have to try to revert the
632 87e25be1 Thomas Thrainer
      # disk status, even if this step failed.
633 87e25be1 Thomas Thrainer
634 87e25be1 Thomas Thrainer
    abort_result = self.rpc.call_instance_finalize_migration_src(
635 87e25be1 Thomas Thrainer
      source_node, instance, False, self.live)
636 87e25be1 Thomas Thrainer
    abort_msg = abort_result.fail_msg
637 87e25be1 Thomas Thrainer
    if abort_msg:
638 87e25be1 Thomas Thrainer
      logging.error("Aborting migration failed on source node %s: %s",
639 87e25be1 Thomas Thrainer
                    source_node, abort_msg)
640 87e25be1 Thomas Thrainer
641 87e25be1 Thomas Thrainer
  def _ExecMigration(self):
642 87e25be1 Thomas Thrainer
    """Migrate an instance.
643 87e25be1 Thomas Thrainer

644 87e25be1 Thomas Thrainer
    The migrate is done by:
645 87e25be1 Thomas Thrainer
      - change the disks into dual-master mode
646 87e25be1 Thomas Thrainer
      - wait until disks are fully synchronized again
647 87e25be1 Thomas Thrainer
      - migrate the instance
648 87e25be1 Thomas Thrainer
      - change disks on the new secondary node (the old primary) to secondary
649 87e25be1 Thomas Thrainer
      - wait until disks are fully synchronized
650 87e25be1 Thomas Thrainer
      - change disks into single-master mode
651 87e25be1 Thomas Thrainer

652 87e25be1 Thomas Thrainer
    """
653 87e25be1 Thomas Thrainer
    instance = self.instance
654 87e25be1 Thomas Thrainer
    target_node = self.target_node
655 87e25be1 Thomas Thrainer
    source_node = self.source_node
656 87e25be1 Thomas Thrainer
657 87e25be1 Thomas Thrainer
    # Check for hypervisor version mismatch and warn the user.
658 87e25be1 Thomas Thrainer
    nodeinfo = self.rpc.call_node_info([source_node, target_node],
659 87e25be1 Thomas Thrainer
                                       None, [self.instance.hypervisor], False)
660 87e25be1 Thomas Thrainer
    for ninfo in nodeinfo.values():
661 87e25be1 Thomas Thrainer
      ninfo.Raise("Unable to retrieve node information from node '%s'" %
662 87e25be1 Thomas Thrainer
                  ninfo.node)
663 87e25be1 Thomas Thrainer
    (_, _, (src_info, )) = nodeinfo[source_node].payload
664 87e25be1 Thomas Thrainer
    (_, _, (dst_info, )) = nodeinfo[target_node].payload
665 87e25be1 Thomas Thrainer
666 87e25be1 Thomas Thrainer
    if ((constants.HV_NODEINFO_KEY_VERSION in src_info) and
667 87e25be1 Thomas Thrainer
        (constants.HV_NODEINFO_KEY_VERSION in dst_info)):
668 87e25be1 Thomas Thrainer
      src_version = src_info[constants.HV_NODEINFO_KEY_VERSION]
669 87e25be1 Thomas Thrainer
      dst_version = dst_info[constants.HV_NODEINFO_KEY_VERSION]
670 87e25be1 Thomas Thrainer
      if src_version != dst_version:
671 87e25be1 Thomas Thrainer
        self.feedback_fn("* warning: hypervisor version mismatch between"
672 87e25be1 Thomas Thrainer
                         " source (%s) and target (%s) node" %
673 87e25be1 Thomas Thrainer
                         (src_version, dst_version))
674 87e25be1 Thomas Thrainer
675 87e25be1 Thomas Thrainer
    self.feedback_fn("* checking disk consistency between source and target")
676 87e25be1 Thomas Thrainer
    for (idx, dev) in enumerate(instance.disks):
677 5eacbcae Thomas Thrainer
      if not CheckDiskConsistency(self.lu, instance, dev, target_node, False):
678 87e25be1 Thomas Thrainer
        raise errors.OpExecError("Disk %s is degraded or not fully"
679 87e25be1 Thomas Thrainer
                                 " synchronized on target node,"
680 87e25be1 Thomas Thrainer
                                 " aborting migration" % idx)
681 87e25be1 Thomas Thrainer
682 87e25be1 Thomas Thrainer
    if self.current_mem > self.tgt_free_mem:
683 87e25be1 Thomas Thrainer
      if not self.allow_runtime_changes:
684 87e25be1 Thomas Thrainer
        raise errors.OpExecError("Memory ballooning not allowed and not enough"
685 87e25be1 Thomas Thrainer
                                 " free memory to fit instance %s on target"
686 87e25be1 Thomas Thrainer
                                 " node %s (have %dMB, need %dMB)" %
687 87e25be1 Thomas Thrainer
                                 (instance.name, target_node,
688 87e25be1 Thomas Thrainer
                                  self.tgt_free_mem, self.current_mem))
689 87e25be1 Thomas Thrainer
      self.feedback_fn("* setting instance memory to %s" % self.tgt_free_mem)
690 87e25be1 Thomas Thrainer
      rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
691 87e25be1 Thomas Thrainer
                                                     instance,
692 87e25be1 Thomas Thrainer
                                                     self.tgt_free_mem)
693 87e25be1 Thomas Thrainer
      rpcres.Raise("Cannot modify instance runtime memory")
694 87e25be1 Thomas Thrainer
695 87e25be1 Thomas Thrainer
    # First get the migration information from the remote node
696 87e25be1 Thomas Thrainer
    result = self.rpc.call_migration_info(source_node, instance)
697 87e25be1 Thomas Thrainer
    msg = result.fail_msg
698 87e25be1 Thomas Thrainer
    if msg:
699 87e25be1 Thomas Thrainer
      log_err = ("Failed fetching source migration information from %s: %s" %
700 87e25be1 Thomas Thrainer
                 (source_node, msg))
701 87e25be1 Thomas Thrainer
      logging.error(log_err)
702 87e25be1 Thomas Thrainer
      raise errors.OpExecError(log_err)
703 87e25be1 Thomas Thrainer
704 87e25be1 Thomas Thrainer
    self.migration_info = migration_info = result.payload
705 87e25be1 Thomas Thrainer
706 87e25be1 Thomas Thrainer
    if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
707 87e25be1 Thomas Thrainer
      # Then switch the disks to master/master mode
708 87e25be1 Thomas Thrainer
      self._EnsureSecondary(target_node)
709 87e25be1 Thomas Thrainer
      self._GoStandalone()
710 87e25be1 Thomas Thrainer
      self._GoReconnect(True)
711 87e25be1 Thomas Thrainer
      self._WaitUntilSync()
712 87e25be1 Thomas Thrainer
713 87e25be1 Thomas Thrainer
    self.feedback_fn("* preparing %s to accept the instance" % target_node)
714 a63985f3 Dimitris Aragiorgis
    # This fills physical_id slot that may be missing on newly created disks
715 a63985f3 Dimitris Aragiorgis
    for disk in instance.disks:
716 a63985f3 Dimitris Aragiorgis
      self.cfg.SetDiskID(disk, target_node)
717 87e25be1 Thomas Thrainer
    result = self.rpc.call_accept_instance(target_node,
718 87e25be1 Thomas Thrainer
                                           instance,
719 87e25be1 Thomas Thrainer
                                           migration_info,
720 87e25be1 Thomas Thrainer
                                           self.nodes_ip[target_node])
721 87e25be1 Thomas Thrainer
722 87e25be1 Thomas Thrainer
    msg = result.fail_msg
723 87e25be1 Thomas Thrainer
    if msg:
724 87e25be1 Thomas Thrainer
      logging.error("Instance pre-migration failed, trying to revert"
725 87e25be1 Thomas Thrainer
                    " disk status: %s", msg)
726 87e25be1 Thomas Thrainer
      self.feedback_fn("Pre-migration failed, aborting")
727 87e25be1 Thomas Thrainer
      self._AbortMigration()
728 87e25be1 Thomas Thrainer
      self._RevertDiskStatus()
729 87e25be1 Thomas Thrainer
      raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
730 87e25be1 Thomas Thrainer
                               (instance.name, msg))
731 87e25be1 Thomas Thrainer
732 87e25be1 Thomas Thrainer
    self.feedback_fn("* migrating instance to %s" % target_node)
733 87e25be1 Thomas Thrainer
    result = self.rpc.call_instance_migrate(source_node, instance,
734 87e25be1 Thomas Thrainer
                                            self.nodes_ip[target_node],
735 87e25be1 Thomas Thrainer
                                            self.live)
736 87e25be1 Thomas Thrainer
    msg = result.fail_msg
737 87e25be1 Thomas Thrainer
    if msg:
738 87e25be1 Thomas Thrainer
      logging.error("Instance migration failed, trying to revert"
739 87e25be1 Thomas Thrainer
                    " disk status: %s", msg)
740 87e25be1 Thomas Thrainer
      self.feedback_fn("Migration failed, aborting")
741 87e25be1 Thomas Thrainer
      self._AbortMigration()
742 87e25be1 Thomas Thrainer
      self._RevertDiskStatus()
743 87e25be1 Thomas Thrainer
      raise errors.OpExecError("Could not migrate instance %s: %s" %
744 87e25be1 Thomas Thrainer
                               (instance.name, msg))
745 87e25be1 Thomas Thrainer
746 87e25be1 Thomas Thrainer
    self.feedback_fn("* starting memory transfer")
747 87e25be1 Thomas Thrainer
    last_feedback = time.time()
748 87e25be1 Thomas Thrainer
    while True:
749 87e25be1 Thomas Thrainer
      result = self.rpc.call_instance_get_migration_status(source_node,
750 87e25be1 Thomas Thrainer
                                                           instance)
751 87e25be1 Thomas Thrainer
      msg = result.fail_msg
752 87e25be1 Thomas Thrainer
      ms = result.payload   # MigrationStatus instance
753 87e25be1 Thomas Thrainer
      if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
754 87e25be1 Thomas Thrainer
        logging.error("Instance migration failed, trying to revert"
755 87e25be1 Thomas Thrainer
                      " disk status: %s", msg)
756 87e25be1 Thomas Thrainer
        self.feedback_fn("Migration failed, aborting")
757 87e25be1 Thomas Thrainer
        self._AbortMigration()
758 87e25be1 Thomas Thrainer
        self._RevertDiskStatus()
759 87e25be1 Thomas Thrainer
        if not msg:
760 87e25be1 Thomas Thrainer
          msg = "hypervisor returned failure"
761 87e25be1 Thomas Thrainer
        raise errors.OpExecError("Could not migrate instance %s: %s" %
762 87e25be1 Thomas Thrainer
                                 (instance.name, msg))
763 87e25be1 Thomas Thrainer
764 87e25be1 Thomas Thrainer
      if result.payload.status != constants.HV_MIGRATION_ACTIVE:
765 87e25be1 Thomas Thrainer
        self.feedback_fn("* memory transfer complete")
766 87e25be1 Thomas Thrainer
        break
767 87e25be1 Thomas Thrainer
768 87e25be1 Thomas Thrainer
      if (utils.TimeoutExpired(last_feedback,
769 87e25be1 Thomas Thrainer
                               self._MIGRATION_FEEDBACK_INTERVAL) and
770 87e25be1 Thomas Thrainer
          ms.transferred_ram is not None):
771 87e25be1 Thomas Thrainer
        mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
772 87e25be1 Thomas Thrainer
        self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
773 87e25be1 Thomas Thrainer
        last_feedback = time.time()
774 87e25be1 Thomas Thrainer
775 87e25be1 Thomas Thrainer
      time.sleep(self._MIGRATION_POLL_INTERVAL)
776 87e25be1 Thomas Thrainer
777 87e25be1 Thomas Thrainer
    result = self.rpc.call_instance_finalize_migration_src(source_node,
778 87e25be1 Thomas Thrainer
                                                           instance,
779 87e25be1 Thomas Thrainer
                                                           True,
780 87e25be1 Thomas Thrainer
                                                           self.live)
781 87e25be1 Thomas Thrainer
    msg = result.fail_msg
782 87e25be1 Thomas Thrainer
    if msg:
783 87e25be1 Thomas Thrainer
      logging.error("Instance migration succeeded, but finalization failed"
784 87e25be1 Thomas Thrainer
                    " on the source node: %s", msg)
785 87e25be1 Thomas Thrainer
      raise errors.OpExecError("Could not finalize instance migration: %s" %
786 87e25be1 Thomas Thrainer
                               msg)
787 87e25be1 Thomas Thrainer
788 87e25be1 Thomas Thrainer
    instance.primary_node = target_node
789 87e25be1 Thomas Thrainer
790 87e25be1 Thomas Thrainer
    # distribute new instance config to the other nodes
791 87e25be1 Thomas Thrainer
    self.cfg.Update(instance, self.feedback_fn)
792 87e25be1 Thomas Thrainer
793 87e25be1 Thomas Thrainer
    result = self.rpc.call_instance_finalize_migration_dst(target_node,
794 87e25be1 Thomas Thrainer
                                                           instance,
795 87e25be1 Thomas Thrainer
                                                           migration_info,
796 87e25be1 Thomas Thrainer
                                                           True)
797 87e25be1 Thomas Thrainer
    msg = result.fail_msg
798 87e25be1 Thomas Thrainer
    if msg:
799 87e25be1 Thomas Thrainer
      logging.error("Instance migration succeeded, but finalization failed"
800 87e25be1 Thomas Thrainer
                    " on the target node: %s", msg)
801 87e25be1 Thomas Thrainer
      raise errors.OpExecError("Could not finalize instance migration: %s" %
802 87e25be1 Thomas Thrainer
                               msg)
803 87e25be1 Thomas Thrainer
804 87e25be1 Thomas Thrainer
    if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
805 87e25be1 Thomas Thrainer
      self._EnsureSecondary(source_node)
806 87e25be1 Thomas Thrainer
      self._WaitUntilSync()
807 87e25be1 Thomas Thrainer
      self._GoStandalone()
808 87e25be1 Thomas Thrainer
      self._GoReconnect(False)
809 87e25be1 Thomas Thrainer
      self._WaitUntilSync()
810 87e25be1 Thomas Thrainer
811 87e25be1 Thomas Thrainer
    # If the instance's disk template is `rbd' or `ext' and there was a
812 87e25be1 Thomas Thrainer
    # successful migration, unmap the device from the source node.
813 87e25be1 Thomas Thrainer
    if self.instance.disk_template in (constants.DT_RBD, constants.DT_EXT):
814 5eacbcae Thomas Thrainer
      disks = ExpandCheckDisks(instance, instance.disks)
815 87e25be1 Thomas Thrainer
      self.feedback_fn("* unmapping instance's disks from %s" % source_node)
816 87e25be1 Thomas Thrainer
      for disk in disks:
817 87e25be1 Thomas Thrainer
        result = self.rpc.call_blockdev_shutdown(source_node, (disk, instance))
818 87e25be1 Thomas Thrainer
        msg = result.fail_msg
819 87e25be1 Thomas Thrainer
        if msg:
820 87e25be1 Thomas Thrainer
          logging.error("Migration was successful, but couldn't unmap the"
821 87e25be1 Thomas Thrainer
                        " block device %s on source node %s: %s",
822 87e25be1 Thomas Thrainer
                        disk.iv_name, source_node, msg)
823 87e25be1 Thomas Thrainer
          logging.error("You need to unmap the device %s manually on %s",
824 87e25be1 Thomas Thrainer
                        disk.iv_name, source_node)
825 87e25be1 Thomas Thrainer
826 87e25be1 Thomas Thrainer
    self.feedback_fn("* done")
827 87e25be1 Thomas Thrainer
828 87e25be1 Thomas Thrainer
  def _ExecFailover(self):
829 87e25be1 Thomas Thrainer
    """Failover an instance.
830 87e25be1 Thomas Thrainer

831 87e25be1 Thomas Thrainer
    The failover is done by shutting it down on its present node and
832 87e25be1 Thomas Thrainer
    starting it on the secondary.
833 87e25be1 Thomas Thrainer

834 87e25be1 Thomas Thrainer
    """
835 87e25be1 Thomas Thrainer
    instance = self.instance
836 87e25be1 Thomas Thrainer
    primary_node = self.cfg.GetNodeInfo(instance.primary_node)
837 87e25be1 Thomas Thrainer
838 87e25be1 Thomas Thrainer
    source_node = instance.primary_node
839 87e25be1 Thomas Thrainer
    target_node = self.target_node
840 87e25be1 Thomas Thrainer
841 1d4a4b26 Thomas Thrainer
    if instance.disks_active:
842 87e25be1 Thomas Thrainer
      self.feedback_fn("* checking disk consistency between source and target")
843 87e25be1 Thomas Thrainer
      for (idx, dev) in enumerate(instance.disks):
844 87e25be1 Thomas Thrainer
        # for drbd, these are drbd over lvm
845 5eacbcae Thomas Thrainer
        if not CheckDiskConsistency(self.lu, instance, dev, target_node,
846 5eacbcae Thomas Thrainer
                                    False):
847 87e25be1 Thomas Thrainer
          if primary_node.offline:
848 87e25be1 Thomas Thrainer
            self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
849 87e25be1 Thomas Thrainer
                             " target node %s" %
850 87e25be1 Thomas Thrainer
                             (primary_node.name, idx, target_node))
851 87e25be1 Thomas Thrainer
          elif not self.ignore_consistency:
852 87e25be1 Thomas Thrainer
            raise errors.OpExecError("Disk %s is degraded on target node,"
853 87e25be1 Thomas Thrainer
                                     " aborting failover" % idx)
854 87e25be1 Thomas Thrainer
    else:
855 87e25be1 Thomas Thrainer
      self.feedback_fn("* not checking disk consistency as instance is not"
856 87e25be1 Thomas Thrainer
                       " running")
857 87e25be1 Thomas Thrainer
858 87e25be1 Thomas Thrainer
    self.feedback_fn("* shutting down instance on source node")
859 87e25be1 Thomas Thrainer
    logging.info("Shutting down instance %s on node %s",
860 87e25be1 Thomas Thrainer
                 instance.name, source_node)
861 87e25be1 Thomas Thrainer
862 87e25be1 Thomas Thrainer
    result = self.rpc.call_instance_shutdown(source_node, instance,
863 87e25be1 Thomas Thrainer
                                             self.shutdown_timeout,
864 87e25be1 Thomas Thrainer
                                             self.lu.op.reason)
865 87e25be1 Thomas Thrainer
    msg = result.fail_msg
866 87e25be1 Thomas Thrainer
    if msg:
867 87e25be1 Thomas Thrainer
      if self.ignore_consistency or primary_node.offline:
868 87e25be1 Thomas Thrainer
        self.lu.LogWarning("Could not shutdown instance %s on node %s,"
869 87e25be1 Thomas Thrainer
                           " proceeding anyway; please make sure node"
870 87e25be1 Thomas Thrainer
                           " %s is down; error details: %s",
871 87e25be1 Thomas Thrainer
                           instance.name, source_node, source_node, msg)
872 87e25be1 Thomas Thrainer
      else:
873 87e25be1 Thomas Thrainer
        raise errors.OpExecError("Could not shutdown instance %s on"
874 87e25be1 Thomas Thrainer
                                 " node %s: %s" %
875 87e25be1 Thomas Thrainer
                                 (instance.name, source_node, msg))
876 87e25be1 Thomas Thrainer
877 87e25be1 Thomas Thrainer
    self.feedback_fn("* deactivating the instance's disks on source node")
878 5eacbcae Thomas Thrainer
    if not ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
879 87e25be1 Thomas Thrainer
      raise errors.OpExecError("Can't shut down the instance's disks")
880 87e25be1 Thomas Thrainer
881 87e25be1 Thomas Thrainer
    instance.primary_node = target_node
882 87e25be1 Thomas Thrainer
    # distribute new instance config to the other nodes
883 87e25be1 Thomas Thrainer
    self.cfg.Update(instance, self.feedback_fn)
884 87e25be1 Thomas Thrainer
885 87e25be1 Thomas Thrainer
    # Only start the instance if it's marked as up
886 87e25be1 Thomas Thrainer
    if instance.admin_state == constants.ADMINST_UP:
887 87e25be1 Thomas Thrainer
      self.feedback_fn("* activating the instance's disks on target node %s" %
888 87e25be1 Thomas Thrainer
                       target_node)
889 87e25be1 Thomas Thrainer
      logging.info("Starting instance %s on node %s",
890 87e25be1 Thomas Thrainer
                   instance.name, target_node)
891 87e25be1 Thomas Thrainer
892 5eacbcae Thomas Thrainer
      disks_ok, _ = AssembleInstanceDisks(self.lu, instance,
893 5eacbcae Thomas Thrainer
                                          ignore_secondaries=True)
894 87e25be1 Thomas Thrainer
      if not disks_ok:
895 5eacbcae Thomas Thrainer
        ShutdownInstanceDisks(self.lu, instance)
896 87e25be1 Thomas Thrainer
        raise errors.OpExecError("Can't activate the instance's disks")
897 87e25be1 Thomas Thrainer
898 87e25be1 Thomas Thrainer
      self.feedback_fn("* starting the instance on the target node %s" %
899 87e25be1 Thomas Thrainer
                       target_node)
900 87e25be1 Thomas Thrainer
      result = self.rpc.call_instance_start(target_node, (instance, None, None),
901 87e25be1 Thomas Thrainer
                                            False, self.lu.op.reason)
902 87e25be1 Thomas Thrainer
      msg = result.fail_msg
903 87e25be1 Thomas Thrainer
      if msg:
904 5eacbcae Thomas Thrainer
        ShutdownInstanceDisks(self.lu, instance)
905 87e25be1 Thomas Thrainer
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
906 87e25be1 Thomas Thrainer
                                 (instance.name, target_node, msg))
907 87e25be1 Thomas Thrainer
908 87e25be1 Thomas Thrainer
  def Exec(self, feedback_fn):
909 87e25be1 Thomas Thrainer
    """Perform the migration.
910 87e25be1 Thomas Thrainer

911 87e25be1 Thomas Thrainer
    """
912 87e25be1 Thomas Thrainer
    self.feedback_fn = feedback_fn
913 87e25be1 Thomas Thrainer
    self.source_node = self.instance.primary_node
914 87e25be1 Thomas Thrainer
915 87e25be1 Thomas Thrainer
    # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
916 87e25be1 Thomas Thrainer
    if self.instance.disk_template in constants.DTS_INT_MIRROR:
917 87e25be1 Thomas Thrainer
      self.target_node = self.instance.secondary_nodes[0]
918 87e25be1 Thomas Thrainer
      # Otherwise self.target_node has been populated either
919 87e25be1 Thomas Thrainer
      # directly, or through an iallocator.
920 87e25be1 Thomas Thrainer
921 87e25be1 Thomas Thrainer
    self.all_nodes = [self.source_node, self.target_node]
922 87e25be1 Thomas Thrainer
    self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
923 87e25be1 Thomas Thrainer
                         in self.cfg.GetMultiNodeInfo(self.all_nodes))
924 87e25be1 Thomas Thrainer
925 87e25be1 Thomas Thrainer
    if self.failover:
926 87e25be1 Thomas Thrainer
      feedback_fn("Failover instance %s" % self.instance.name)
927 87e25be1 Thomas Thrainer
      self._ExecFailover()
928 87e25be1 Thomas Thrainer
    else:
929 87e25be1 Thomas Thrainer
      feedback_fn("Migrating instance %s" % self.instance.name)
930 87e25be1 Thomas Thrainer
931 87e25be1 Thomas Thrainer
      if self.cleanup:
932 87e25be1 Thomas Thrainer
        return self._ExecCleanup()
933 87e25be1 Thomas Thrainer
      else:
934 87e25be1 Thomas Thrainer
        return self._ExecMigration()