Statistics
| Branch: | Tag: | Revision:

root / lib / cmdlib / __init__.py @ 7352d33b

History | View | Annotate | Download (434.3 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Module implementing the master-side code."""
23

    
24
# pylint: disable=W0201,C0302
25

    
26
# W0201 since most LU attributes are defined in CheckPrereq or similar
27
# functions
28

    
29
# C0302: since we have waaaay too many lines in this module
30

    
31
import os
32
import time
33
import logging
34
import copy
35
import OpenSSL
36
import itertools
37
import operator
38

    
39
from ganeti import utils
40
from ganeti import errors
41
from ganeti import hypervisor
42
from ganeti import locking
43
from ganeti import constants
44
from ganeti import objects
45
from ganeti import compat
46
from ganeti import masterd
47
from ganeti import netutils
48
from ganeti import query
49
from ganeti import qlang
50
from ganeti import opcodes
51
from ganeti import ht
52
from ganeti import rpc
53
from ganeti import pathutils
54
from ganeti import network
55
from ganeti.masterd import iallocator
56

    
57
from ganeti.cmdlib.base import ResultWithJobs, LogicalUnit, NoHooksLU, \
58
  Tasklet, _QueryBase
59
from ganeti.cmdlib.common import _ExpandInstanceName, _ExpandItemName, \
60
  _ExpandNodeName, _ShareAll, _CheckNodeGroupInstances, _GetWantedNodes, \
61
  _GetWantedInstances, _RunPostHook, _RedistributeAncillaryFiles, \
62
  _MergeAndVerifyHvState, _MergeAndVerifyDiskState, _GetUpdatedIPolicy, \
63
  _ComputeNewInstanceViolations, _GetUpdatedParams, _CheckOSParams, \
64
  _CheckHVParams, _AdjustCandidatePool, _CheckNodePVs, \
65
  _ComputeIPolicyInstanceViolation, _AnnotateDiskParams, _SupportsOob, \
66
  _ComputeIPolicySpecViolation
67

    
68
from ganeti.cmdlib.cluster import LUClusterActivateMasterIp, \
69
  LUClusterDeactivateMasterIp, LUClusterConfigQuery, LUClusterDestroy, \
70
  LUClusterPostInit, _ClusterQuery, LUClusterQuery, LUClusterRedistConf, \
71
  LUClusterRename, LUClusterRepairDiskSizes, LUClusterSetParams, \
72
  LUClusterVerify, LUClusterVerifyConfig, LUClusterVerifyGroup, \
73
  LUClusterVerifyDisks
74
from ganeti.cmdlib.tags import LUTagsGet, LUTagsSearch, LUTagsSet, LUTagsDel
75
from ganeti.cmdlib.network import LUNetworkAdd, LUNetworkRemove, \
76
  LUNetworkSetParams, _NetworkQuery, LUNetworkQuery, LUNetworkConnect, \
77
  LUNetworkDisconnect
78
from ganeti.cmdlib.test import LUTestDelay, LUTestJqueue, LUTestAllocator
79

    
80
import ganeti.masterd.instance # pylint: disable=W0611
81

    
82

    
83
# States of instance
84
INSTANCE_DOWN = [constants.ADMINST_DOWN]
85
INSTANCE_ONLINE = [constants.ADMINST_DOWN, constants.ADMINST_UP]
86
INSTANCE_NOT_RUNNING = [constants.ADMINST_DOWN, constants.ADMINST_OFFLINE]
87

    
88
#: Instance status in which an instance can be marked as offline/online
89
CAN_CHANGE_INSTANCE_OFFLINE = (frozenset(INSTANCE_DOWN) | frozenset([
90
  constants.ADMINST_OFFLINE,
91
  ]))
92

    
93

    
94
def _CheckInstancesNodeGroups(cfg, instances, owned_groups, owned_nodes,
95
                              cur_group_uuid):
96
  """Checks if node groups for locked instances are still correct.
97

98
  @type cfg: L{config.ConfigWriter}
99
  @param cfg: Cluster configuration
100
  @type instances: dict; string as key, L{objects.Instance} as value
101
  @param instances: Dictionary, instance name as key, instance object as value
102
  @type owned_groups: iterable of string
103
  @param owned_groups: List of owned groups
104
  @type owned_nodes: iterable of string
105
  @param owned_nodes: List of owned nodes
106
  @type cur_group_uuid: string or None
107
  @param cur_group_uuid: Optional group UUID to check against instance's groups
108

109
  """
110
  for (name, inst) in instances.items():
111
    assert owned_nodes.issuperset(inst.all_nodes), \
112
      "Instance %s's nodes changed while we kept the lock" % name
113

    
114
    inst_groups = _CheckInstanceNodeGroups(cfg, name, owned_groups)
115

    
116
    assert cur_group_uuid is None or cur_group_uuid in inst_groups, \
117
      "Instance %s has no node in group %s" % (name, cur_group_uuid)
118

    
119

    
120
def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups,
121
                             primary_only=False):
122
  """Checks if the owned node groups are still correct for an instance.
123

124
  @type cfg: L{config.ConfigWriter}
125
  @param cfg: The cluster configuration
126
  @type instance_name: string
127
  @param instance_name: Instance name
128
  @type owned_groups: set or frozenset
129
  @param owned_groups: List of currently owned node groups
130
  @type primary_only: boolean
131
  @param primary_only: Whether to check node groups for only the primary node
132

133
  """
134
  inst_groups = cfg.GetInstanceNodeGroups(instance_name, primary_only)
135

    
136
  if not owned_groups.issuperset(inst_groups):
137
    raise errors.OpPrereqError("Instance %s's node groups changed since"
138
                               " locks were acquired, current groups are"
139
                               " are '%s', owning groups '%s'; retry the"
140
                               " operation" %
141
                               (instance_name,
142
                                utils.CommaJoin(inst_groups),
143
                                utils.CommaJoin(owned_groups)),
144
                               errors.ECODE_STATE)
145

    
146
  return inst_groups
147

    
148

    
149
def _IsExclusiveStorageEnabledNode(cfg, node):
150
  """Whether exclusive_storage is in effect for the given node.
151

152
  @type cfg: L{config.ConfigWriter}
153
  @param cfg: The cluster configuration
154
  @type node: L{objects.Node}
155
  @param node: The node
156
  @rtype: bool
157
  @return: The effective value of exclusive_storage
158

159
  """
160
  return cfg.GetNdParams(node)[constants.ND_EXCLUSIVE_STORAGE]
161

    
162

    
163
def _IsExclusiveStorageEnabledNodeName(cfg, nodename):
164
  """Whether exclusive_storage is in effect for the given node.
165

166
  @type cfg: L{config.ConfigWriter}
167
  @param cfg: The cluster configuration
168
  @type nodename: string
169
  @param nodename: The node
170
  @rtype: bool
171
  @return: The effective value of exclusive_storage
172
  @raise errors.OpPrereqError: if no node exists with the given name
173

174
  """
175
  ni = cfg.GetNodeInfo(nodename)
176
  if ni is None:
177
    raise errors.OpPrereqError("Invalid node name %s" % nodename,
178
                               errors.ECODE_NOENT)
179
  return _IsExclusiveStorageEnabledNode(cfg, ni)
180

    
181

    
182
def _CopyLockList(names):
183
  """Makes a copy of a list of lock names.
184

185
  Handles L{locking.ALL_SET} correctly.
186

187
  """
188
  if names == locking.ALL_SET:
189
    return locking.ALL_SET
190
  else:
191
    return names[:]
192

    
193

    
194
def _ReleaseLocks(lu, level, names=None, keep=None):
195
  """Releases locks owned by an LU.
196

197
  @type lu: L{LogicalUnit}
198
  @param level: Lock level
199
  @type names: list or None
200
  @param names: Names of locks to release
201
  @type keep: list or None
202
  @param keep: Names of locks to retain
203

204
  """
205
  assert not (keep is not None and names is not None), \
206
         "Only one of the 'names' and the 'keep' parameters can be given"
207

    
208
  if names is not None:
209
    should_release = names.__contains__
210
  elif keep:
211
    should_release = lambda name: name not in keep
212
  else:
213
    should_release = None
214

    
215
  owned = lu.owned_locks(level)
216
  if not owned:
217
    # Not owning any lock at this level, do nothing
218
    pass
219

    
220
  elif should_release:
221
    retain = []
222
    release = []
223

    
224
    # Determine which locks to release
225
    for name in owned:
226
      if should_release(name):
227
        release.append(name)
228
      else:
229
        retain.append(name)
230

    
231
    assert len(lu.owned_locks(level)) == (len(retain) + len(release))
232

    
233
    # Release just some locks
234
    lu.glm.release(level, names=release)
235

    
236
    assert frozenset(lu.owned_locks(level)) == frozenset(retain)
237
  else:
238
    # Release everything
239
    lu.glm.release(level)
240

    
241
    assert not lu.glm.is_owned(level), "No locks should be owned"
242

    
243

    
244
def _MapInstanceDisksToNodes(instances):
245
  """Creates a map from (node, volume) to instance name.
246

247
  @type instances: list of L{objects.Instance}
248
  @rtype: dict; tuple of (node name, volume name) as key, instance name as value
249

250
  """
251
  return dict(((node, vol), inst.name)
252
              for inst in instances
253
              for (node, vols) in inst.MapLVsByNode().items()
254
              for vol in vols)
255

    
256

    
257
def _CheckOutputFields(static, dynamic, selected):
258
  """Checks whether all selected fields are valid.
259

260
  @type static: L{utils.FieldSet}
261
  @param static: static fields set
262
  @type dynamic: L{utils.FieldSet}
263
  @param dynamic: dynamic fields set
264

265
  """
266
  f = utils.FieldSet()
267
  f.Extend(static)
268
  f.Extend(dynamic)
269

    
270
  delta = f.NonMatching(selected)
271
  if delta:
272
    raise errors.OpPrereqError("Unknown output fields selected: %s"
273
                               % ",".join(delta), errors.ECODE_INVAL)
274

    
275

    
276
def _CheckParamsNotGlobal(params, glob_pars, kind, bad_levels, good_levels):
277
  """Make sure that none of the given paramters is global.
278

279
  If a global parameter is found, an L{errors.OpPrereqError} exception is
280
  raised. This is used to avoid setting global parameters for individual nodes.
281

282
  @type params: dictionary
283
  @param params: Parameters to check
284
  @type glob_pars: dictionary
285
  @param glob_pars: Forbidden parameters
286
  @type kind: string
287
  @param kind: Kind of parameters (e.g. "node")
288
  @type bad_levels: string
289
  @param bad_levels: Level(s) at which the parameters are forbidden (e.g.
290
      "instance")
291
  @type good_levels: strings
292
  @param good_levels: Level(s) at which the parameters are allowed (e.g.
293
      "cluster or group")
294

295
  """
296
  used_globals = glob_pars.intersection(params)
297
  if used_globals:
298
    msg = ("The following %s parameters are global and cannot"
299
           " be customized at %s level, please modify them at"
300
           " %s level: %s" %
301
           (kind, bad_levels, good_levels, utils.CommaJoin(used_globals)))
302
    raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
303

    
304

    
305
def _CheckNodeOnline(lu, node, msg=None):
306
  """Ensure that a given node is online.
307

308
  @param lu: the LU on behalf of which we make the check
309
  @param node: the node to check
310
  @param msg: if passed, should be a message to replace the default one
311
  @raise errors.OpPrereqError: if the node is offline
312

313
  """
314
  if msg is None:
315
    msg = "Can't use offline node"
316
  if lu.cfg.GetNodeInfo(node).offline:
317
    raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
318

    
319

    
320
def _CheckNodeNotDrained(lu, node):
321
  """Ensure that a given node is not drained.
322

323
  @param lu: the LU on behalf of which we make the check
324
  @param node: the node to check
325
  @raise errors.OpPrereqError: if the node is drained
326

327
  """
328
  if lu.cfg.GetNodeInfo(node).drained:
329
    raise errors.OpPrereqError("Can't use drained node %s" % node,
330
                               errors.ECODE_STATE)
331

    
332

    
333
def _CheckNodeVmCapable(lu, node):
334
  """Ensure that a given node is vm capable.
335

336
  @param lu: the LU on behalf of which we make the check
337
  @param node: the node to check
338
  @raise errors.OpPrereqError: if the node is not vm capable
339

340
  """
341
  if not lu.cfg.GetNodeInfo(node).vm_capable:
342
    raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
343
                               errors.ECODE_STATE)
344

    
345

    
346
def _CheckNodeHasOS(lu, node, os_name, force_variant):
347
  """Ensure that a node supports a given OS.
348

349
  @param lu: the LU on behalf of which we make the check
350
  @param node: the node to check
351
  @param os_name: the OS to query about
352
  @param force_variant: whether to ignore variant errors
353
  @raise errors.OpPrereqError: if the node is not supporting the OS
354

355
  """
356
  result = lu.rpc.call_os_get(node, os_name)
357
  result.Raise("OS '%s' not in supported OS list for node %s" %
358
               (os_name, node),
359
               prereq=True, ecode=errors.ECODE_INVAL)
360
  if not force_variant:
361
    _CheckOSVariant(result.payload, os_name)
362

    
363

    
364
def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
365
  """Ensure that a node has the given secondary ip.
366

367
  @type lu: L{LogicalUnit}
368
  @param lu: the LU on behalf of which we make the check
369
  @type node: string
370
  @param node: the node to check
371
  @type secondary_ip: string
372
  @param secondary_ip: the ip to check
373
  @type prereq: boolean
374
  @param prereq: whether to throw a prerequisite or an execute error
375
  @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
376
  @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
377

378
  """
379
  result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
380
  result.Raise("Failure checking secondary ip on node %s" % node,
381
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
382
  if not result.payload:
383
    msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
384
           " please fix and re-run this command" % secondary_ip)
385
    if prereq:
386
      raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
387
    else:
388
      raise errors.OpExecError(msg)
389

    
390

    
391
def _GetClusterDomainSecret():
392
  """Reads the cluster domain secret.
393

394
  """
395
  return utils.ReadOneLineFile(pathutils.CLUSTER_DOMAIN_SECRET_FILE,
396
                               strict=True)
397

    
398

    
399
def _CheckInstanceState(lu, instance, req_states, msg=None):
400
  """Ensure that an instance is in one of the required states.
401

402
  @param lu: the LU on behalf of which we make the check
403
  @param instance: the instance to check
404
  @param msg: if passed, should be a message to replace the default one
405
  @raise errors.OpPrereqError: if the instance is not in the required state
406

407
  """
408
  if msg is None:
409
    msg = ("can't use instance from outside %s states" %
410
           utils.CommaJoin(req_states))
411
  if instance.admin_state not in req_states:
412
    raise errors.OpPrereqError("Instance '%s' is marked to be %s, %s" %
413
                               (instance.name, instance.admin_state, msg),
414
                               errors.ECODE_STATE)
415

    
416
  if constants.ADMINST_UP not in req_states:
417
    pnode = instance.primary_node
418
    if not lu.cfg.GetNodeInfo(pnode).offline:
419
      ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
420
      ins_l.Raise("Can't contact node %s for instance information" % pnode,
421
                  prereq=True, ecode=errors.ECODE_ENVIRON)
422
      if instance.name in ins_l.payload:
423
        raise errors.OpPrereqError("Instance %s is running, %s" %
424
                                   (instance.name, msg), errors.ECODE_STATE)
425
    else:
426
      lu.LogWarning("Primary node offline, ignoring check that instance"
427
                     " is down")
428

    
429

    
430
def _ComputeIPolicyInstanceSpecViolation(
431
  ipolicy, instance_spec, disk_template,
432
  _compute_fn=_ComputeIPolicySpecViolation):
433
  """Compute if instance specs meets the specs of ipolicy.
434

435
  @type ipolicy: dict
436
  @param ipolicy: The ipolicy to verify against
437
  @param instance_spec: dict
438
  @param instance_spec: The instance spec to verify
439
  @type disk_template: string
440
  @param disk_template: the disk template of the instance
441
  @param _compute_fn: The function to verify ipolicy (unittest only)
442
  @see: L{_ComputeIPolicySpecViolation}
443

444
  """
445
  mem_size = instance_spec.get(constants.ISPEC_MEM_SIZE, None)
446
  cpu_count = instance_spec.get(constants.ISPEC_CPU_COUNT, None)
447
  disk_count = instance_spec.get(constants.ISPEC_DISK_COUNT, 0)
448
  disk_sizes = instance_spec.get(constants.ISPEC_DISK_SIZE, [])
449
  nic_count = instance_spec.get(constants.ISPEC_NIC_COUNT, 0)
450
  spindle_use = instance_spec.get(constants.ISPEC_SPINDLE_USE, None)
451

    
452
  return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
453
                     disk_sizes, spindle_use, disk_template)
454

    
455

    
456
def _ComputeIPolicyNodeViolation(ipolicy, instance, current_group,
457
                                 target_group, cfg,
458
                                 _compute_fn=_ComputeIPolicyInstanceViolation):
459
  """Compute if instance meets the specs of the new target group.
460

461
  @param ipolicy: The ipolicy to verify
462
  @param instance: The instance object to verify
463
  @param current_group: The current group of the instance
464
  @param target_group: The new group of the instance
465
  @type cfg: L{config.ConfigWriter}
466
  @param cfg: Cluster configuration
467
  @param _compute_fn: The function to verify ipolicy (unittest only)
468
  @see: L{_ComputeIPolicySpecViolation}
469

470
  """
471
  if current_group == target_group:
472
    return []
473
  else:
474
    return _compute_fn(ipolicy, instance, cfg)
475

    
476

    
477
def _CheckTargetNodeIPolicy(lu, ipolicy, instance, node, cfg, ignore=False,
478
                            _compute_fn=_ComputeIPolicyNodeViolation):
479
  """Checks that the target node is correct in terms of instance policy.
480

481
  @param ipolicy: The ipolicy to verify
482
  @param instance: The instance object to verify
483
  @param node: The new node to relocate
484
  @type cfg: L{config.ConfigWriter}
485
  @param cfg: Cluster configuration
486
  @param ignore: Ignore violations of the ipolicy
487
  @param _compute_fn: The function to verify ipolicy (unittest only)
488
  @see: L{_ComputeIPolicySpecViolation}
489

490
  """
491
  primary_node = lu.cfg.GetNodeInfo(instance.primary_node)
492
  res = _compute_fn(ipolicy, instance, primary_node.group, node.group, cfg)
493

    
494
  if res:
495
    msg = ("Instance does not meet target node group's (%s) instance"
496
           " policy: %s") % (node.group, utils.CommaJoin(res))
497
    if ignore:
498
      lu.LogWarning(msg)
499
    else:
500
      raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
501

    
502

    
503
def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
504
                          minmem, maxmem, vcpus, nics, disk_template, disks,
505
                          bep, hvp, hypervisor_name, tags):
506
  """Builds instance related env variables for hooks
507

508
  This builds the hook environment from individual variables.
509

510
  @type name: string
511
  @param name: the name of the instance
512
  @type primary_node: string
513
  @param primary_node: the name of the instance's primary node
514
  @type secondary_nodes: list
515
  @param secondary_nodes: list of secondary nodes as strings
516
  @type os_type: string
517
  @param os_type: the name of the instance's OS
518
  @type status: string
519
  @param status: the desired status of the instance
520
  @type minmem: string
521
  @param minmem: the minimum memory size of the instance
522
  @type maxmem: string
523
  @param maxmem: the maximum memory size of the instance
524
  @type vcpus: string
525
  @param vcpus: the count of VCPUs the instance has
526
  @type nics: list
527
  @param nics: list of tuples (name, uuid, ip, mac, mode, link, net, netinfo)
528
      representing the NICs the instance has
529
  @type disk_template: string
530
  @param disk_template: the disk template of the instance
531
  @type disks: list
532
  @param disks: list of tuples (name, uuid, size, mode)
533
  @type bep: dict
534
  @param bep: the backend parameters for the instance
535
  @type hvp: dict
536
  @param hvp: the hypervisor parameters for the instance
537
  @type hypervisor_name: string
538
  @param hypervisor_name: the hypervisor for the instance
539
  @type tags: list
540
  @param tags: list of instance tags as strings
541
  @rtype: dict
542
  @return: the hook environment for this instance
543

544
  """
545
  env = {
546
    "OP_TARGET": name,
547
    "INSTANCE_NAME": name,
548
    "INSTANCE_PRIMARY": primary_node,
549
    "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
550
    "INSTANCE_OS_TYPE": os_type,
551
    "INSTANCE_STATUS": status,
552
    "INSTANCE_MINMEM": minmem,
553
    "INSTANCE_MAXMEM": maxmem,
554
    # TODO(2.9) remove deprecated "memory" value
555
    "INSTANCE_MEMORY": maxmem,
556
    "INSTANCE_VCPUS": vcpus,
557
    "INSTANCE_DISK_TEMPLATE": disk_template,
558
    "INSTANCE_HYPERVISOR": hypervisor_name,
559
  }
560
  if nics:
561
    nic_count = len(nics)
562
    for idx, (name, _, ip, mac, mode, link, net, netinfo) in enumerate(nics):
563
      if ip is None:
564
        ip = ""
565
      env["INSTANCE_NIC%d_NAME" % idx] = name
566
      env["INSTANCE_NIC%d_IP" % idx] = ip
567
      env["INSTANCE_NIC%d_MAC" % idx] = mac
568
      env["INSTANCE_NIC%d_MODE" % idx] = mode
569
      env["INSTANCE_NIC%d_LINK" % idx] = link
570
      if netinfo:
571
        nobj = objects.Network.FromDict(netinfo)
572
        env.update(nobj.HooksDict("INSTANCE_NIC%d_" % idx))
573
      elif network:
574
        # FIXME: broken network reference: the instance NIC specifies a
575
        # network, but the relevant network entry was not in the config. This
576
        # should be made impossible.
577
        env["INSTANCE_NIC%d_NETWORK_NAME" % idx] = net
578
      if mode == constants.NIC_MODE_BRIDGED:
579
        env["INSTANCE_NIC%d_BRIDGE" % idx] = link
580
  else:
581
    nic_count = 0
582

    
583
  env["INSTANCE_NIC_COUNT"] = nic_count
584

    
585
  if disks:
586
    disk_count = len(disks)
587
    for idx, (name, size, mode) in enumerate(disks):
588
      env["INSTANCE_DISK%d_NAME" % idx] = name
589
      env["INSTANCE_DISK%d_SIZE" % idx] = size
590
      env["INSTANCE_DISK%d_MODE" % idx] = mode
591
  else:
592
    disk_count = 0
593

    
594
  env["INSTANCE_DISK_COUNT"] = disk_count
595

    
596
  if not tags:
597
    tags = []
598

    
599
  env["INSTANCE_TAGS"] = " ".join(tags)
600

    
601
  for source, kind in [(bep, "BE"), (hvp, "HV")]:
602
    for key, value in source.items():
603
      env["INSTANCE_%s_%s" % (kind, key)] = value
604

    
605
  return env
606

    
607

    
608
def _NICToTuple(lu, nic):
609
  """Build a tupple of nic information.
610

611
  @type lu:  L{LogicalUnit}
612
  @param lu: the logical unit on whose behalf we execute
613
  @type nic: L{objects.NIC}
614
  @param nic: nic to convert to hooks tuple
615

616
  """
617
  cluster = lu.cfg.GetClusterInfo()
618
  filled_params = cluster.SimpleFillNIC(nic.nicparams)
619
  mode = filled_params[constants.NIC_MODE]
620
  link = filled_params[constants.NIC_LINK]
621
  netinfo = None
622
  if nic.network:
623
    nobj = lu.cfg.GetNetwork(nic.network)
624
    netinfo = objects.Network.ToDict(nobj)
625
  return (nic.name, nic.uuid, nic.ip, nic.mac, mode, link, nic.network, netinfo)
626

    
627

    
628
def _NICListToTuple(lu, nics):
629
  """Build a list of nic information tuples.
630

631
  This list is suitable to be passed to _BuildInstanceHookEnv or as a return
632
  value in LUInstanceQueryData.
633

634
  @type lu:  L{LogicalUnit}
635
  @param lu: the logical unit on whose behalf we execute
636
  @type nics: list of L{objects.NIC}
637
  @param nics: list of nics to convert to hooks tuples
638

639
  """
640
  hooks_nics = []
641
  for nic in nics:
642
    hooks_nics.append(_NICToTuple(lu, nic))
643
  return hooks_nics
644

    
645

    
646
def _BuildInstanceHookEnvByObject(lu, instance, override=None):
647
  """Builds instance related env variables for hooks from an object.
648

649
  @type lu: L{LogicalUnit}
650
  @param lu: the logical unit on whose behalf we execute
651
  @type instance: L{objects.Instance}
652
  @param instance: the instance for which we should build the
653
      environment
654
  @type override: dict
655
  @param override: dictionary with key/values that will override
656
      our values
657
  @rtype: dict
658
  @return: the hook environment dictionary
659

660
  """
661
  cluster = lu.cfg.GetClusterInfo()
662
  bep = cluster.FillBE(instance)
663
  hvp = cluster.FillHV(instance)
664
  args = {
665
    "name": instance.name,
666
    "primary_node": instance.primary_node,
667
    "secondary_nodes": instance.secondary_nodes,
668
    "os_type": instance.os,
669
    "status": instance.admin_state,
670
    "maxmem": bep[constants.BE_MAXMEM],
671
    "minmem": bep[constants.BE_MINMEM],
672
    "vcpus": bep[constants.BE_VCPUS],
673
    "nics": _NICListToTuple(lu, instance.nics),
674
    "disk_template": instance.disk_template,
675
    "disks": [(disk.name, disk.size, disk.mode)
676
              for disk in instance.disks],
677
    "bep": bep,
678
    "hvp": hvp,
679
    "hypervisor_name": instance.hypervisor,
680
    "tags": instance.tags,
681
  }
682
  if override:
683
    args.update(override)
684
  return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
685

    
686

    
687
def _DecideSelfPromotion(lu, exceptions=None):
688
  """Decide whether I should promote myself as a master candidate.
689

690
  """
691
  cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
692
  mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
693
  # the new node will increase mc_max with one, so:
694
  mc_should = min(mc_should + 1, cp_size)
695
  return mc_now < mc_should
696

    
697

    
698
def _CheckNicsBridgesExist(lu, target_nics, target_node):
699
  """Check that the brigdes needed by a list of nics exist.
700

701
  """
702
  cluster = lu.cfg.GetClusterInfo()
703
  paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
704
  brlist = [params[constants.NIC_LINK] for params in paramslist
705
            if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
706
  if brlist:
707
    result = lu.rpc.call_bridges_exist(target_node, brlist)
708
    result.Raise("Error checking bridges on destination node '%s'" %
709
                 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
710

    
711

    
712
def _CheckInstanceBridgesExist(lu, instance, node=None):
713
  """Check that the brigdes needed by an instance exist.
714

715
  """
716
  if node is None:
717
    node = instance.primary_node
718
  _CheckNicsBridgesExist(lu, instance.nics, node)
719

    
720

    
721
def _CheckOSVariant(os_obj, name):
722
  """Check whether an OS name conforms to the os variants specification.
723

724
  @type os_obj: L{objects.OS}
725
  @param os_obj: OS object to check
726
  @type name: string
727
  @param name: OS name passed by the user, to check for validity
728

729
  """
730
  variant = objects.OS.GetVariant(name)
731
  if not os_obj.supported_variants:
732
    if variant:
733
      raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
734
                                 " passed)" % (os_obj.name, variant),
735
                                 errors.ECODE_INVAL)
736
    return
737
  if not variant:
738
    raise errors.OpPrereqError("OS name must include a variant",
739
                               errors.ECODE_INVAL)
740

    
741
  if variant not in os_obj.supported_variants:
742
    raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
743

    
744

    
745
def _GetNodeInstancesInner(cfg, fn):
746
  return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
747

    
748

    
749
def _GetNodeInstances(cfg, node_name):
750
  """Returns a list of all primary and secondary instances on a node.
751

752
  """
753

    
754
  return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
755

    
756

    
757
def _GetNodePrimaryInstances(cfg, node_name):
758
  """Returns primary instances on a node.
759

760
  """
761
  return _GetNodeInstancesInner(cfg,
762
                                lambda inst: node_name == inst.primary_node)
763

    
764

    
765
def _GetNodeSecondaryInstances(cfg, node_name):
766
  """Returns secondary instances on a node.
767

768
  """
769
  return _GetNodeInstancesInner(cfg,
770
                                lambda inst: node_name in inst.secondary_nodes)
771

    
772

    
773
def _GetStorageTypeArgs(cfg, storage_type):
774
  """Returns the arguments for a storage type.
775

776
  """
777
  # Special case for file storage
778
  if storage_type == constants.ST_FILE:
779
    # storage.FileStorage wants a list of storage directories
780
    return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
781

    
782
  return []
783

    
784

    
785
def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
786
  faulty = []
787

    
788
  for dev in instance.disks:
789
    cfg.SetDiskID(dev, node_name)
790

    
791
  result = rpc_runner.call_blockdev_getmirrorstatus(node_name, (instance.disks,
792
                                                                instance))
793
  result.Raise("Failed to get disk status from node %s" % node_name,
794
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
795

    
796
  for idx, bdev_status in enumerate(result.payload):
797
    if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
798
      faulty.append(idx)
799

    
800
  return faulty
801

    
802

    
803
def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
804
  """Check the sanity of iallocator and node arguments and use the
805
  cluster-wide iallocator if appropriate.
806

807
  Check that at most one of (iallocator, node) is specified. If none is
808
  specified, or the iallocator is L{constants.DEFAULT_IALLOCATOR_SHORTCUT},
809
  then the LU's opcode's iallocator slot is filled with the cluster-wide
810
  default iallocator.
811

812
  @type iallocator_slot: string
813
  @param iallocator_slot: the name of the opcode iallocator slot
814
  @type node_slot: string
815
  @param node_slot: the name of the opcode target node slot
816

817
  """
818
  node = getattr(lu.op, node_slot, None)
819
  ialloc = getattr(lu.op, iallocator_slot, None)
820
  if node == []:
821
    node = None
822

    
823
  if node is not None and ialloc is not None:
824
    raise errors.OpPrereqError("Do not specify both, iallocator and node",
825
                               errors.ECODE_INVAL)
826
  elif ((node is None and ialloc is None) or
827
        ialloc == constants.DEFAULT_IALLOCATOR_SHORTCUT):
828
    default_iallocator = lu.cfg.GetDefaultIAllocator()
829
    if default_iallocator:
830
      setattr(lu.op, iallocator_slot, default_iallocator)
831
    else:
832
      raise errors.OpPrereqError("No iallocator or node given and no"
833
                                 " cluster-wide default iallocator found;"
834
                                 " please specify either an iallocator or a"
835
                                 " node, or set a cluster-wide default"
836
                                 " iallocator", errors.ECODE_INVAL)
837

    
838

    
839
def _GetDefaultIAllocator(cfg, ialloc):
840
  """Decides on which iallocator to use.
841

842
  @type cfg: L{config.ConfigWriter}
843
  @param cfg: Cluster configuration object
844
  @type ialloc: string or None
845
  @param ialloc: Iallocator specified in opcode
846
  @rtype: string
847
  @return: Iallocator name
848

849
  """
850
  if not ialloc:
851
    # Use default iallocator
852
    ialloc = cfg.GetDefaultIAllocator()
853

    
854
  if not ialloc:
855
    raise errors.OpPrereqError("No iallocator was specified, neither in the"
856
                               " opcode nor as a cluster-wide default",
857
                               errors.ECODE_INVAL)
858

    
859
  return ialloc
860

    
861

    
862
def _CheckHostnameSane(lu, name):
863
  """Ensures that a given hostname resolves to a 'sane' name.
864

865
  The given name is required to be a prefix of the resolved hostname,
866
  to prevent accidental mismatches.
867

868
  @param lu: the logical unit on behalf of which we're checking
869
  @param name: the name we should resolve and check
870
  @return: the resolved hostname object
871

872
  """
873
  hostname = netutils.GetHostname(name=name)
874
  if hostname.name != name:
875
    lu.LogInfo("Resolved given name '%s' to '%s'", name, hostname.name)
876
  if not utils.MatchNameComponent(name, [hostname.name]):
877
    raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
878
                                " same as given hostname '%s'") %
879
                                (hostname.name, name), errors.ECODE_INVAL)
880
  return hostname
881

    
882

    
883
class LUGroupVerifyDisks(NoHooksLU):
884
  """Verifies the status of all disks in a node group.
885

886
  """
887
  REQ_BGL = False
888

    
889
  def ExpandNames(self):
890
    # Raises errors.OpPrereqError on its own if group can't be found
891
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
892

    
893
    self.share_locks = _ShareAll()
894
    self.needed_locks = {
895
      locking.LEVEL_INSTANCE: [],
896
      locking.LEVEL_NODEGROUP: [],
897
      locking.LEVEL_NODE: [],
898

    
899
      # This opcode is acquires all node locks in a group. LUClusterVerifyDisks
900
      # starts one instance of this opcode for every group, which means all
901
      # nodes will be locked for a short amount of time, so it's better to
902
      # acquire the node allocation lock as well.
903
      locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
904
      }
905

    
906
  def DeclareLocks(self, level):
907
    if level == locking.LEVEL_INSTANCE:
908
      assert not self.needed_locks[locking.LEVEL_INSTANCE]
909

    
910
      # Lock instances optimistically, needs verification once node and group
911
      # locks have been acquired
912
      self.needed_locks[locking.LEVEL_INSTANCE] = \
913
        self.cfg.GetNodeGroupInstances(self.group_uuid)
914

    
915
    elif level == locking.LEVEL_NODEGROUP:
916
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
917

    
918
      self.needed_locks[locking.LEVEL_NODEGROUP] = \
919
        set([self.group_uuid] +
920
            # Lock all groups used by instances optimistically; this requires
921
            # going via the node before it's locked, requiring verification
922
            # later on
923
            [group_uuid
924
             for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
925
             for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
926

    
927
    elif level == locking.LEVEL_NODE:
928
      # This will only lock the nodes in the group to be verified which contain
929
      # actual instances
930
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
931
      self._LockInstancesNodes()
932

    
933
      # Lock all nodes in group to be verified
934
      assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
935
      member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
936
      self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
937

    
938
  def CheckPrereq(self):
939
    owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
940
    owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
941
    owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
942

    
943
    assert self.group_uuid in owned_groups
944

    
945
    # Check if locked instances are still correct
946
    _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
947

    
948
    # Get instance information
949
    self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
950

    
951
    # Check if node groups for locked instances are still correct
952
    _CheckInstancesNodeGroups(self.cfg, self.instances,
953
                              owned_groups, owned_nodes, self.group_uuid)
954

    
955
  def Exec(self, feedback_fn):
956
    """Verify integrity of cluster disks.
957

958
    @rtype: tuple of three items
959
    @return: a tuple of (dict of node-to-node_error, list of instances
960
        which need activate-disks, dict of instance: (node, volume) for
961
        missing volumes
962

963
    """
964
    res_nodes = {}
965
    res_instances = set()
966
    res_missing = {}
967

    
968
    nv_dict = _MapInstanceDisksToNodes(
969
      [inst for inst in self.instances.values()
970
       if inst.admin_state == constants.ADMINST_UP])
971

    
972
    if nv_dict:
973
      nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
974
                             set(self.cfg.GetVmCapableNodeList()))
975

    
976
      node_lvs = self.rpc.call_lv_list(nodes, [])
977

    
978
      for (node, node_res) in node_lvs.items():
979
        if node_res.offline:
980
          continue
981

    
982
        msg = node_res.fail_msg
983
        if msg:
984
          logging.warning("Error enumerating LVs on node %s: %s", node, msg)
985
          res_nodes[node] = msg
986
          continue
987

    
988
        for lv_name, (_, _, lv_online) in node_res.payload.items():
989
          inst = nv_dict.pop((node, lv_name), None)
990
          if not (lv_online or inst is None):
991
            res_instances.add(inst)
992

    
993
      # any leftover items in nv_dict are missing LVs, let's arrange the data
994
      # better
995
      for key, inst in nv_dict.iteritems():
996
        res_missing.setdefault(inst, []).append(list(key))
997

    
998
    return (res_nodes, list(res_instances), res_missing)
999

    
1000

    
1001
def _WaitForSync(lu, instance, disks=None, oneshot=False):
1002
  """Sleep and poll for an instance's disk to sync.
1003

1004
  """
1005
  if not instance.disks or disks is not None and not disks:
1006
    return True
1007

    
1008
  disks = _ExpandCheckDisks(instance, disks)
1009

    
1010
  if not oneshot:
1011
    lu.LogInfo("Waiting for instance %s to sync disks", instance.name)
1012

    
1013
  node = instance.primary_node
1014

    
1015
  for dev in disks:
1016
    lu.cfg.SetDiskID(dev, node)
1017

    
1018
  # TODO: Convert to utils.Retry
1019

    
1020
  retries = 0
1021
  degr_retries = 10 # in seconds, as we sleep 1 second each time
1022
  while True:
1023
    max_time = 0
1024
    done = True
1025
    cumul_degraded = False
1026
    rstats = lu.rpc.call_blockdev_getmirrorstatus(node, (disks, instance))
1027
    msg = rstats.fail_msg
1028
    if msg:
1029
      lu.LogWarning("Can't get any data from node %s: %s", node, msg)
1030
      retries += 1
1031
      if retries >= 10:
1032
        raise errors.RemoteError("Can't contact node %s for mirror data,"
1033
                                 " aborting." % node)
1034
      time.sleep(6)
1035
      continue
1036
    rstats = rstats.payload
1037
    retries = 0
1038
    for i, mstat in enumerate(rstats):
1039
      if mstat is None:
1040
        lu.LogWarning("Can't compute data for node %s/%s",
1041
                           node, disks[i].iv_name)
1042
        continue
1043

    
1044
      cumul_degraded = (cumul_degraded or
1045
                        (mstat.is_degraded and mstat.sync_percent is None))
1046
      if mstat.sync_percent is not None:
1047
        done = False
1048
        if mstat.estimated_time is not None:
1049
          rem_time = ("%s remaining (estimated)" %
1050
                      utils.FormatSeconds(mstat.estimated_time))
1051
          max_time = mstat.estimated_time
1052
        else:
1053
          rem_time = "no time estimate"
1054
        lu.LogInfo("- device %s: %5.2f%% done, %s",
1055
                   disks[i].iv_name, mstat.sync_percent, rem_time)
1056

    
1057
    # if we're done but degraded, let's do a few small retries, to
1058
    # make sure we see a stable and not transient situation; therefore
1059
    # we force restart of the loop
1060
    if (done or oneshot) and cumul_degraded and degr_retries > 0:
1061
      logging.info("Degraded disks found, %d retries left", degr_retries)
1062
      degr_retries -= 1
1063
      time.sleep(1)
1064
      continue
1065

    
1066
    if done or oneshot:
1067
      break
1068

    
1069
    time.sleep(min(60, max_time))
1070

    
1071
  if done:
1072
    lu.LogInfo("Instance %s's disks are in sync", instance.name)
1073

    
1074
  return not cumul_degraded
1075

    
1076

    
1077
def _BlockdevFind(lu, node, dev, instance):
1078
  """Wrapper around call_blockdev_find to annotate diskparams.
1079

1080
  @param lu: A reference to the lu object
1081
  @param node: The node to call out
1082
  @param dev: The device to find
1083
  @param instance: The instance object the device belongs to
1084
  @returns The result of the rpc call
1085

1086
  """
1087
  (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
1088
  return lu.rpc.call_blockdev_find(node, disk)
1089

    
1090

    
1091
def _CheckDiskConsistency(lu, instance, dev, node, on_primary, ldisk=False):
1092
  """Wrapper around L{_CheckDiskConsistencyInner}.
1093

1094
  """
1095
  (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
1096
  return _CheckDiskConsistencyInner(lu, instance, disk, node, on_primary,
1097
                                    ldisk=ldisk)
1098

    
1099

    
1100
def _CheckDiskConsistencyInner(lu, instance, dev, node, on_primary,
1101
                               ldisk=False):
1102
  """Check that mirrors are not degraded.
1103

1104
  @attention: The device has to be annotated already.
1105

1106
  The ldisk parameter, if True, will change the test from the
1107
  is_degraded attribute (which represents overall non-ok status for
1108
  the device(s)) to the ldisk (representing the local storage status).
1109

1110
  """
1111
  lu.cfg.SetDiskID(dev, node)
1112

    
1113
  result = True
1114

    
1115
  if on_primary or dev.AssembleOnSecondary():
1116
    rstats = lu.rpc.call_blockdev_find(node, dev)
1117
    msg = rstats.fail_msg
1118
    if msg:
1119
      lu.LogWarning("Can't find disk on node %s: %s", node, msg)
1120
      result = False
1121
    elif not rstats.payload:
1122
      lu.LogWarning("Can't find disk on node %s", node)
1123
      result = False
1124
    else:
1125
      if ldisk:
1126
        result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
1127
      else:
1128
        result = result and not rstats.payload.is_degraded
1129

    
1130
  if dev.children:
1131
    for child in dev.children:
1132
      result = result and _CheckDiskConsistencyInner(lu, instance, child, node,
1133
                                                     on_primary)
1134

    
1135
  return result
1136

    
1137

    
1138
class LUOobCommand(NoHooksLU):
1139
  """Logical unit for OOB handling.
1140

1141
  """
1142
  REQ_BGL = False
1143
  _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
1144

    
1145
  def ExpandNames(self):
1146
    """Gather locks we need.
1147

1148
    """
1149
    if self.op.node_names:
1150
      self.op.node_names = _GetWantedNodes(self, self.op.node_names)
1151
      lock_names = self.op.node_names
1152
    else:
1153
      lock_names = locking.ALL_SET
1154

    
1155
    self.needed_locks = {
1156
      locking.LEVEL_NODE: lock_names,
1157
      }
1158

    
1159
    self.share_locks[locking.LEVEL_NODE_ALLOC] = 1
1160

    
1161
    if not self.op.node_names:
1162
      # Acquire node allocation lock only if all nodes are affected
1163
      self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
1164

    
1165
  def CheckPrereq(self):
1166
    """Check prerequisites.
1167

1168
    This checks:
1169
     - the node exists in the configuration
1170
     - OOB is supported
1171

1172
    Any errors are signaled by raising errors.OpPrereqError.
1173

1174
    """
1175
    self.nodes = []
1176
    self.master_node = self.cfg.GetMasterNode()
1177

    
1178
    assert self.op.power_delay >= 0.0
1179

    
1180
    if self.op.node_names:
1181
      if (self.op.command in self._SKIP_MASTER and
1182
          self.master_node in self.op.node_names):
1183
        master_node_obj = self.cfg.GetNodeInfo(self.master_node)
1184
        master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
1185

    
1186
        if master_oob_handler:
1187
          additional_text = ("run '%s %s %s' if you want to operate on the"
1188
                             " master regardless") % (master_oob_handler,
1189
                                                      self.op.command,
1190
                                                      self.master_node)
1191
        else:
1192
          additional_text = "it does not support out-of-band operations"
1193

    
1194
        raise errors.OpPrereqError(("Operating on the master node %s is not"
1195
                                    " allowed for %s; %s") %
1196
                                   (self.master_node, self.op.command,
1197
                                    additional_text), errors.ECODE_INVAL)
1198
    else:
1199
      self.op.node_names = self.cfg.GetNodeList()
1200
      if self.op.command in self._SKIP_MASTER:
1201
        self.op.node_names.remove(self.master_node)
1202

    
1203
    if self.op.command in self._SKIP_MASTER:
1204
      assert self.master_node not in self.op.node_names
1205

    
1206
    for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
1207
      if node is None:
1208
        raise errors.OpPrereqError("Node %s not found" % node_name,
1209
                                   errors.ECODE_NOENT)
1210
      else:
1211
        self.nodes.append(node)
1212

    
1213
      if (not self.op.ignore_status and
1214
          (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
1215
        raise errors.OpPrereqError(("Cannot power off node %s because it is"
1216
                                    " not marked offline") % node_name,
1217
                                   errors.ECODE_STATE)
1218

    
1219
  def Exec(self, feedback_fn):
1220
    """Execute OOB and return result if we expect any.
1221

1222
    """
1223
    master_node = self.master_node
1224
    ret = []
1225

    
1226
    for idx, node in enumerate(utils.NiceSort(self.nodes,
1227
                                              key=lambda node: node.name)):
1228
      node_entry = [(constants.RS_NORMAL, node.name)]
1229
      ret.append(node_entry)
1230

    
1231
      oob_program = _SupportsOob(self.cfg, node)
1232

    
1233
      if not oob_program:
1234
        node_entry.append((constants.RS_UNAVAIL, None))
1235
        continue
1236

    
1237
      logging.info("Executing out-of-band command '%s' using '%s' on %s",
1238
                   self.op.command, oob_program, node.name)
1239
      result = self.rpc.call_run_oob(master_node, oob_program,
1240
                                     self.op.command, node.name,
1241
                                     self.op.timeout)
1242

    
1243
      if result.fail_msg:
1244
        self.LogWarning("Out-of-band RPC failed on node '%s': %s",
1245
                        node.name, result.fail_msg)
1246
        node_entry.append((constants.RS_NODATA, None))
1247
      else:
1248
        try:
1249
          self._CheckPayload(result)
1250
        except errors.OpExecError, err:
1251
          self.LogWarning("Payload returned by node '%s' is not valid: %s",
1252
                          node.name, err)
1253
          node_entry.append((constants.RS_NODATA, None))
1254
        else:
1255
          if self.op.command == constants.OOB_HEALTH:
1256
            # For health we should log important events
1257
            for item, status in result.payload:
1258
              if status in [constants.OOB_STATUS_WARNING,
1259
                            constants.OOB_STATUS_CRITICAL]:
1260
                self.LogWarning("Item '%s' on node '%s' has status '%s'",
1261
                                item, node.name, status)
1262

    
1263
          if self.op.command == constants.OOB_POWER_ON:
1264
            node.powered = True
1265
          elif self.op.command == constants.OOB_POWER_OFF:
1266
            node.powered = False
1267
          elif self.op.command == constants.OOB_POWER_STATUS:
1268
            powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
1269
            if powered != node.powered:
1270
              logging.warning(("Recorded power state (%s) of node '%s' does not"
1271
                               " match actual power state (%s)"), node.powered,
1272
                              node.name, powered)
1273

    
1274
          # For configuration changing commands we should update the node
1275
          if self.op.command in (constants.OOB_POWER_ON,
1276
                                 constants.OOB_POWER_OFF):
1277
            self.cfg.Update(node, feedback_fn)
1278

    
1279
          node_entry.append((constants.RS_NORMAL, result.payload))
1280

    
1281
          if (self.op.command == constants.OOB_POWER_ON and
1282
              idx < len(self.nodes) - 1):
1283
            time.sleep(self.op.power_delay)
1284

    
1285
    return ret
1286

    
1287
  def _CheckPayload(self, result):
1288
    """Checks if the payload is valid.
1289

1290
    @param result: RPC result
1291
    @raises errors.OpExecError: If payload is not valid
1292

1293
    """
1294
    errs = []
1295
    if self.op.command == constants.OOB_HEALTH:
1296
      if not isinstance(result.payload, list):
1297
        errs.append("command 'health' is expected to return a list but got %s" %
1298
                    type(result.payload))
1299
      else:
1300
        for item, status in result.payload:
1301
          if status not in constants.OOB_STATUSES:
1302
            errs.append("health item '%s' has invalid status '%s'" %
1303
                        (item, status))
1304

    
1305
    if self.op.command == constants.OOB_POWER_STATUS:
1306
      if not isinstance(result.payload, dict):
1307
        errs.append("power-status is expected to return a dict but got %s" %
1308
                    type(result.payload))
1309

    
1310
    if self.op.command in [
1311
      constants.OOB_POWER_ON,
1312
      constants.OOB_POWER_OFF,
1313
      constants.OOB_POWER_CYCLE,
1314
      ]:
1315
      if result.payload is not None:
1316
        errs.append("%s is expected to not return payload but got '%s'" %
1317
                    (self.op.command, result.payload))
1318

    
1319
    if errs:
1320
      raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
1321
                               utils.CommaJoin(errs))
1322

    
1323

    
1324
class _OsQuery(_QueryBase):
1325
  FIELDS = query.OS_FIELDS
1326

    
1327
  def ExpandNames(self, lu):
1328
    # Lock all nodes in shared mode
1329
    # Temporary removal of locks, should be reverted later
1330
    # TODO: reintroduce locks when they are lighter-weight
1331
    lu.needed_locks = {}
1332
    #self.share_locks[locking.LEVEL_NODE] = 1
1333
    #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
1334

    
1335
    # The following variables interact with _QueryBase._GetNames
1336
    if self.names:
1337
      self.wanted = self.names
1338
    else:
1339
      self.wanted = locking.ALL_SET
1340

    
1341
    self.do_locking = self.use_locking
1342

    
1343
  def DeclareLocks(self, lu, level):
1344
    pass
1345

    
1346
  @staticmethod
1347
  def _DiagnoseByOS(rlist):
1348
    """Remaps a per-node return list into an a per-os per-node dictionary
1349

1350
    @param rlist: a map with node names as keys and OS objects as values
1351

1352
    @rtype: dict
1353
    @return: a dictionary with osnames as keys and as value another
1354
        map, with nodes as keys and tuples of (path, status, diagnose,
1355
        variants, parameters, api_versions) as values, eg::
1356

1357
          {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
1358
                                     (/srv/..., False, "invalid api")],
1359
                           "node2": [(/srv/..., True, "", [], [])]}
1360
          }
1361

1362
    """
1363
    all_os = {}
1364
    # we build here the list of nodes that didn't fail the RPC (at RPC
1365
    # level), so that nodes with a non-responding node daemon don't
1366
    # make all OSes invalid
1367
    good_nodes = [node_name for node_name in rlist
1368
                  if not rlist[node_name].fail_msg]
1369
    for node_name, nr in rlist.items():
1370
      if nr.fail_msg or not nr.payload:
1371
        continue
1372
      for (name, path, status, diagnose, variants,
1373
           params, api_versions) in nr.payload:
1374
        if name not in all_os:
1375
          # build a list of nodes for this os containing empty lists
1376
          # for each node in node_list
1377
          all_os[name] = {}
1378
          for nname in good_nodes:
1379
            all_os[name][nname] = []
1380
        # convert params from [name, help] to (name, help)
1381
        params = [tuple(v) for v in params]
1382
        all_os[name][node_name].append((path, status, diagnose,
1383
                                        variants, params, api_versions))
1384
    return all_os
1385

    
1386
  def _GetQueryData(self, lu):
1387
    """Computes the list of nodes and their attributes.
1388

1389
    """
1390
    # Locking is not used
1391
    assert not (compat.any(lu.glm.is_owned(level)
1392
                           for level in locking.LEVELS
1393
                           if level != locking.LEVEL_CLUSTER) or
1394
                self.do_locking or self.use_locking)
1395

    
1396
    valid_nodes = [node.name
1397
                   for node in lu.cfg.GetAllNodesInfo().values()
1398
                   if not node.offline and node.vm_capable]
1399
    pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
1400
    cluster = lu.cfg.GetClusterInfo()
1401

    
1402
    data = {}
1403

    
1404
    for (os_name, os_data) in pol.items():
1405
      info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
1406
                          hidden=(os_name in cluster.hidden_os),
1407
                          blacklisted=(os_name in cluster.blacklisted_os))
1408

    
1409
      variants = set()
1410
      parameters = set()
1411
      api_versions = set()
1412

    
1413
      for idx, osl in enumerate(os_data.values()):
1414
        info.valid = bool(info.valid and osl and osl[0][1])
1415
        if not info.valid:
1416
          break
1417

    
1418
        (node_variants, node_params, node_api) = osl[0][3:6]
1419
        if idx == 0:
1420
          # First entry
1421
          variants.update(node_variants)
1422
          parameters.update(node_params)
1423
          api_versions.update(node_api)
1424
        else:
1425
          # Filter out inconsistent values
1426
          variants.intersection_update(node_variants)
1427
          parameters.intersection_update(node_params)
1428
          api_versions.intersection_update(node_api)
1429

    
1430
      info.variants = list(variants)
1431
      info.parameters = list(parameters)
1432
      info.api_versions = list(api_versions)
1433

    
1434
      data[os_name] = info
1435

    
1436
    # Prepare data in requested order
1437
    return [data[name] for name in self._GetNames(lu, pol.keys(), None)
1438
            if name in data]
1439

    
1440

    
1441
class LUOsDiagnose(NoHooksLU):
1442
  """Logical unit for OS diagnose/query.
1443

1444
  """
1445
  REQ_BGL = False
1446

    
1447
  @staticmethod
1448
  def _BuildFilter(fields, names):
1449
    """Builds a filter for querying OSes.
1450

1451
    """
1452
    name_filter = qlang.MakeSimpleFilter("name", names)
1453

    
1454
    # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
1455
    # respective field is not requested
1456
    status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
1457
                     for fname in ["hidden", "blacklisted"]
1458
                     if fname not in fields]
1459
    if "valid" not in fields:
1460
      status_filter.append([qlang.OP_TRUE, "valid"])
1461

    
1462
    if status_filter:
1463
      status_filter.insert(0, qlang.OP_AND)
1464
    else:
1465
      status_filter = None
1466

    
1467
    if name_filter and status_filter:
1468
      return [qlang.OP_AND, name_filter, status_filter]
1469
    elif name_filter:
1470
      return name_filter
1471
    else:
1472
      return status_filter
1473

    
1474
  def CheckArguments(self):
1475
    self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
1476
                       self.op.output_fields, False)
1477

    
1478
  def ExpandNames(self):
1479
    self.oq.ExpandNames(self)
1480

    
1481
  def Exec(self, feedback_fn):
1482
    return self.oq.OldStyleQuery(self)
1483

    
1484

    
1485
class _ExtStorageQuery(_QueryBase):
1486
  FIELDS = query.EXTSTORAGE_FIELDS
1487

    
1488
  def ExpandNames(self, lu):
1489
    # Lock all nodes in shared mode
1490
    # Temporary removal of locks, should be reverted later
1491
    # TODO: reintroduce locks when they are lighter-weight
1492
    lu.needed_locks = {}
1493
    #self.share_locks[locking.LEVEL_NODE] = 1
1494
    #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
1495

    
1496
    # The following variables interact with _QueryBase._GetNames
1497
    if self.names:
1498
      self.wanted = self.names
1499
    else:
1500
      self.wanted = locking.ALL_SET
1501

    
1502
    self.do_locking = self.use_locking
1503

    
1504
  def DeclareLocks(self, lu, level):
1505
    pass
1506

    
1507
  @staticmethod
1508
  def _DiagnoseByProvider(rlist):
1509
    """Remaps a per-node return list into an a per-provider per-node dictionary
1510

1511
    @param rlist: a map with node names as keys and ExtStorage objects as values
1512

1513
    @rtype: dict
1514
    @return: a dictionary with extstorage providers as keys and as
1515
        value another map, with nodes as keys and tuples of
1516
        (path, status, diagnose, parameters) as values, eg::
1517

1518
          {"provider1": {"node1": [(/usr/lib/..., True, "", [])]
1519
                         "node2": [(/srv/..., False, "missing file")]
1520
                         "node3": [(/srv/..., True, "", [])]
1521
          }
1522

1523
    """
1524
    all_es = {}
1525
    # we build here the list of nodes that didn't fail the RPC (at RPC
1526
    # level), so that nodes with a non-responding node daemon don't
1527
    # make all OSes invalid
1528
    good_nodes = [node_name for node_name in rlist
1529
                  if not rlist[node_name].fail_msg]
1530
    for node_name, nr in rlist.items():
1531
      if nr.fail_msg or not nr.payload:
1532
        continue
1533
      for (name, path, status, diagnose, params) in nr.payload:
1534
        if name not in all_es:
1535
          # build a list of nodes for this os containing empty lists
1536
          # for each node in node_list
1537
          all_es[name] = {}
1538
          for nname in good_nodes:
1539
            all_es[name][nname] = []
1540
        # convert params from [name, help] to (name, help)
1541
        params = [tuple(v) for v in params]
1542
        all_es[name][node_name].append((path, status, diagnose, params))
1543
    return all_es
1544

    
1545
  def _GetQueryData(self, lu):
1546
    """Computes the list of nodes and their attributes.
1547

1548
    """
1549
    # Locking is not used
1550
    assert not (compat.any(lu.glm.is_owned(level)
1551
                           for level in locking.LEVELS
1552
                           if level != locking.LEVEL_CLUSTER) or
1553
                self.do_locking or self.use_locking)
1554

    
1555
    valid_nodes = [node.name
1556
                   for node in lu.cfg.GetAllNodesInfo().values()
1557
                   if not node.offline and node.vm_capable]
1558
    pol = self._DiagnoseByProvider(lu.rpc.call_extstorage_diagnose(valid_nodes))
1559

    
1560
    data = {}
1561

    
1562
    nodegroup_list = lu.cfg.GetNodeGroupList()
1563

    
1564
    for (es_name, es_data) in pol.items():
1565
      # For every provider compute the nodegroup validity.
1566
      # To do this we need to check the validity of each node in es_data
1567
      # and then construct the corresponding nodegroup dict:
1568
      #      { nodegroup1: status
1569
      #        nodegroup2: status
1570
      #      }
1571
      ndgrp_data = {}
1572
      for nodegroup in nodegroup_list:
1573
        ndgrp = lu.cfg.GetNodeGroup(nodegroup)
1574

    
1575
        nodegroup_nodes = ndgrp.members
1576
        nodegroup_name = ndgrp.name
1577
        node_statuses = []
1578

    
1579
        for node in nodegroup_nodes:
1580
          if node in valid_nodes:
1581
            if es_data[node] != []:
1582
              node_status = es_data[node][0][1]
1583
              node_statuses.append(node_status)
1584
            else:
1585
              node_statuses.append(False)
1586

    
1587
        if False in node_statuses:
1588
          ndgrp_data[nodegroup_name] = False
1589
        else:
1590
          ndgrp_data[nodegroup_name] = True
1591

    
1592
      # Compute the provider's parameters
1593
      parameters = set()
1594
      for idx, esl in enumerate(es_data.values()):
1595
        valid = bool(esl and esl[0][1])
1596
        if not valid:
1597
          break
1598

    
1599
        node_params = esl[0][3]
1600
        if idx == 0:
1601
          # First entry
1602
          parameters.update(node_params)
1603
        else:
1604
          # Filter out inconsistent values
1605
          parameters.intersection_update(node_params)
1606

    
1607
      params = list(parameters)
1608

    
1609
      # Now fill all the info for this provider
1610
      info = query.ExtStorageInfo(name=es_name, node_status=es_data,
1611
                                  nodegroup_status=ndgrp_data,
1612
                                  parameters=params)
1613

    
1614
      data[es_name] = info
1615

    
1616
    # Prepare data in requested order
1617
    return [data[name] for name in self._GetNames(lu, pol.keys(), None)
1618
            if name in data]
1619

    
1620

    
1621
class LUExtStorageDiagnose(NoHooksLU):
1622
  """Logical unit for ExtStorage diagnose/query.
1623

1624
  """
1625
  REQ_BGL = False
1626

    
1627
  def CheckArguments(self):
1628
    self.eq = _ExtStorageQuery(qlang.MakeSimpleFilter("name", self.op.names),
1629
                               self.op.output_fields, False)
1630

    
1631
  def ExpandNames(self):
1632
    self.eq.ExpandNames(self)
1633

    
1634
  def Exec(self, feedback_fn):
1635
    return self.eq.OldStyleQuery(self)
1636

    
1637

    
1638
class LUNodeRemove(LogicalUnit):
1639
  """Logical unit for removing a node.
1640

1641
  """
1642
  HPATH = "node-remove"
1643
  HTYPE = constants.HTYPE_NODE
1644

    
1645
  def BuildHooksEnv(self):
1646
    """Build hooks env.
1647

1648
    """
1649
    return {
1650
      "OP_TARGET": self.op.node_name,
1651
      "NODE_NAME": self.op.node_name,
1652
      }
1653

    
1654
  def BuildHooksNodes(self):
1655
    """Build hooks nodes.
1656

1657
    This doesn't run on the target node in the pre phase as a failed
1658
    node would then be impossible to remove.
1659

1660
    """
1661
    all_nodes = self.cfg.GetNodeList()
1662
    try:
1663
      all_nodes.remove(self.op.node_name)
1664
    except ValueError:
1665
      pass
1666
    return (all_nodes, all_nodes)
1667

    
1668
  def CheckPrereq(self):
1669
    """Check prerequisites.
1670

1671
    This checks:
1672
     - the node exists in the configuration
1673
     - it does not have primary or secondary instances
1674
     - it's not the master
1675

1676
    Any errors are signaled by raising errors.OpPrereqError.
1677

1678
    """
1679
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
1680
    node = self.cfg.GetNodeInfo(self.op.node_name)
1681
    assert node is not None
1682

    
1683
    masternode = self.cfg.GetMasterNode()
1684
    if node.name == masternode:
1685
      raise errors.OpPrereqError("Node is the master node, failover to another"
1686
                                 " node is required", errors.ECODE_INVAL)
1687

    
1688
    for instance_name, instance in self.cfg.GetAllInstancesInfo().items():
1689
      if node.name in instance.all_nodes:
1690
        raise errors.OpPrereqError("Instance %s is still running on the node,"
1691
                                   " please remove first" % instance_name,
1692
                                   errors.ECODE_INVAL)
1693
    self.op.node_name = node.name
1694
    self.node = node
1695

    
1696
  def Exec(self, feedback_fn):
1697
    """Removes the node from the cluster.
1698

1699
    """
1700
    node = self.node
1701
    logging.info("Stopping the node daemon and removing configs from node %s",
1702
                 node.name)
1703

    
1704
    modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
1705

    
1706
    assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
1707
      "Not owning BGL"
1708

    
1709
    # Promote nodes to master candidate as needed
1710
    _AdjustCandidatePool(self, exceptions=[node.name])
1711
    self.context.RemoveNode(node.name)
1712

    
1713
    # Run post hooks on the node before it's removed
1714
    _RunPostHook(self, node.name)
1715

    
1716
    result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
1717
    msg = result.fail_msg
1718
    if msg:
1719
      self.LogWarning("Errors encountered on the remote node while leaving"
1720
                      " the cluster: %s", msg)
1721

    
1722
    # Remove node from our /etc/hosts
1723
    if self.cfg.GetClusterInfo().modify_etc_hosts:
1724
      master_node = self.cfg.GetMasterNode()
1725
      result = self.rpc.call_etc_hosts_modify(master_node,
1726
                                              constants.ETC_HOSTS_REMOVE,
1727
                                              node.name, None)
1728
      result.Raise("Can't update hosts file with new host data")
1729
      _RedistributeAncillaryFiles(self)
1730

    
1731

    
1732
class _NodeQuery(_QueryBase):
1733
  FIELDS = query.NODE_FIELDS
1734

    
1735
  def ExpandNames(self, lu):
1736
    lu.needed_locks = {}
1737
    lu.share_locks = _ShareAll()
1738

    
1739
    if self.names:
1740
      self.wanted = _GetWantedNodes(lu, self.names)
1741
    else:
1742
      self.wanted = locking.ALL_SET
1743

    
1744
    self.do_locking = (self.use_locking and
1745
                       query.NQ_LIVE in self.requested_data)
1746

    
1747
    if self.do_locking:
1748
      # If any non-static field is requested we need to lock the nodes
1749
      lu.needed_locks[locking.LEVEL_NODE] = self.wanted
1750
      lu.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
1751

    
1752
  def DeclareLocks(self, lu, level):
1753
    pass
1754

    
1755
  def _GetQueryData(self, lu):
1756
    """Computes the list of nodes and their attributes.
1757

1758
    """
1759
    all_info = lu.cfg.GetAllNodesInfo()
1760

    
1761
    nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
1762

    
1763
    # Gather data as requested
1764
    if query.NQ_LIVE in self.requested_data:
1765
      # filter out non-vm_capable nodes
1766
      toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
1767

    
1768
      es_flags = rpc.GetExclusiveStorageForNodeNames(lu.cfg, toquery_nodes)
1769
      node_data = lu.rpc.call_node_info(toquery_nodes, [lu.cfg.GetVGName()],
1770
                                        [lu.cfg.GetHypervisorType()], es_flags)
1771
      live_data = dict((name, rpc.MakeLegacyNodeInfo(nresult.payload))
1772
                       for (name, nresult) in node_data.items()
1773
                       if not nresult.fail_msg and nresult.payload)
1774
    else:
1775
      live_data = None
1776

    
1777
    if query.NQ_INST in self.requested_data:
1778
      node_to_primary = dict([(name, set()) for name in nodenames])
1779
      node_to_secondary = dict([(name, set()) for name in nodenames])
1780

    
1781
      inst_data = lu.cfg.GetAllInstancesInfo()
1782

    
1783
      for inst in inst_data.values():
1784
        if inst.primary_node in node_to_primary:
1785
          node_to_primary[inst.primary_node].add(inst.name)
1786
        for secnode in inst.secondary_nodes:
1787
          if secnode in node_to_secondary:
1788
            node_to_secondary[secnode].add(inst.name)
1789
    else:
1790
      node_to_primary = None
1791
      node_to_secondary = None
1792

    
1793
    if query.NQ_OOB in self.requested_data:
1794
      oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
1795
                         for name, node in all_info.iteritems())
1796
    else:
1797
      oob_support = None
1798

    
1799
    if query.NQ_GROUP in self.requested_data:
1800
      groups = lu.cfg.GetAllNodeGroupsInfo()
1801
    else:
1802
      groups = {}
1803

    
1804
    return query.NodeQueryData([all_info[name] for name in nodenames],
1805
                               live_data, lu.cfg.GetMasterNode(),
1806
                               node_to_primary, node_to_secondary, groups,
1807
                               oob_support, lu.cfg.GetClusterInfo())
1808

    
1809

    
1810
class LUNodeQuery(NoHooksLU):
1811
  """Logical unit for querying nodes.
1812

1813
  """
1814
  # pylint: disable=W0142
1815
  REQ_BGL = False
1816

    
1817
  def CheckArguments(self):
1818
    self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
1819
                         self.op.output_fields, self.op.use_locking)
1820

    
1821
  def ExpandNames(self):
1822
    self.nq.ExpandNames(self)
1823

    
1824
  def DeclareLocks(self, level):
1825
    self.nq.DeclareLocks(self, level)
1826

    
1827
  def Exec(self, feedback_fn):
1828
    return self.nq.OldStyleQuery(self)
1829

    
1830

    
1831
class LUNodeQueryvols(NoHooksLU):
1832
  """Logical unit for getting volumes on node(s).
1833

1834
  """
1835
  REQ_BGL = False
1836
  _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
1837
  _FIELDS_STATIC = utils.FieldSet("node")
1838

    
1839
  def CheckArguments(self):
1840
    _CheckOutputFields(static=self._FIELDS_STATIC,
1841
                       dynamic=self._FIELDS_DYNAMIC,
1842
                       selected=self.op.output_fields)
1843

    
1844
  def ExpandNames(self):
1845
    self.share_locks = _ShareAll()
1846

    
1847
    if self.op.nodes:
1848
      self.needed_locks = {
1849
        locking.LEVEL_NODE: _GetWantedNodes(self, self.op.nodes),
1850
        }
1851
    else:
1852
      self.needed_locks = {
1853
        locking.LEVEL_NODE: locking.ALL_SET,
1854
        locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
1855
        }
1856

    
1857
  def Exec(self, feedback_fn):
1858
    """Computes the list of nodes and their attributes.
1859

1860
    """
1861
    nodenames = self.owned_locks(locking.LEVEL_NODE)
1862
    volumes = self.rpc.call_node_volumes(nodenames)
1863

    
1864
    ilist = self.cfg.GetAllInstancesInfo()
1865
    vol2inst = _MapInstanceDisksToNodes(ilist.values())
1866

    
1867
    output = []
1868
    for node in nodenames:
1869
      nresult = volumes[node]
1870
      if nresult.offline:
1871
        continue
1872
      msg = nresult.fail_msg
1873
      if msg:
1874
        self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
1875
        continue
1876

    
1877
      node_vols = sorted(nresult.payload,
1878
                         key=operator.itemgetter("dev"))
1879

    
1880
      for vol in node_vols:
1881
        node_output = []
1882
        for field in self.op.output_fields:
1883
          if field == "node":
1884
            val = node
1885
          elif field == "phys":
1886
            val = vol["dev"]
1887
          elif field == "vg":
1888
            val = vol["vg"]
1889
          elif field == "name":
1890
            val = vol["name"]
1891
          elif field == "size":
1892
            val = int(float(vol["size"]))
1893
          elif field == "instance":
1894
            val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
1895
          else:
1896
            raise errors.ParameterError(field)
1897
          node_output.append(str(val))
1898

    
1899
        output.append(node_output)
1900

    
1901
    return output
1902

    
1903

    
1904
class LUNodeQueryStorage(NoHooksLU):
1905
  """Logical unit for getting information on storage units on node(s).
1906

1907
  """
1908
  _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
1909
  REQ_BGL = False
1910

    
1911
  def CheckArguments(self):
1912
    _CheckOutputFields(static=self._FIELDS_STATIC,
1913
                       dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
1914
                       selected=self.op.output_fields)
1915

    
1916
  def ExpandNames(self):
1917
    self.share_locks = _ShareAll()
1918

    
1919
    if self.op.nodes:
1920
      self.needed_locks = {
1921
        locking.LEVEL_NODE: _GetWantedNodes(self, self.op.nodes),
1922
        }
1923
    else:
1924
      self.needed_locks = {
1925
        locking.LEVEL_NODE: locking.ALL_SET,
1926
        locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
1927
        }
1928

    
1929
  def Exec(self, feedback_fn):
1930
    """Computes the list of nodes and their attributes.
1931

1932
    """
1933
    self.nodes = self.owned_locks(locking.LEVEL_NODE)
1934

    
1935
    # Always get name to sort by
1936
    if constants.SF_NAME in self.op.output_fields:
1937
      fields = self.op.output_fields[:]
1938
    else:
1939
      fields = [constants.SF_NAME] + self.op.output_fields
1940

    
1941
    # Never ask for node or type as it's only known to the LU
1942
    for extra in [constants.SF_NODE, constants.SF_TYPE]:
1943
      while extra in fields:
1944
        fields.remove(extra)
1945

    
1946
    field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
1947
    name_idx = field_idx[constants.SF_NAME]
1948

    
1949
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
1950
    data = self.rpc.call_storage_list(self.nodes,
1951
                                      self.op.storage_type, st_args,
1952
                                      self.op.name, fields)
1953

    
1954
    result = []
1955

    
1956
    for node in utils.NiceSort(self.nodes):
1957
      nresult = data[node]
1958
      if nresult.offline:
1959
        continue
1960

    
1961
      msg = nresult.fail_msg
1962
      if msg:
1963
        self.LogWarning("Can't get storage data from node %s: %s", node, msg)
1964
        continue
1965

    
1966
      rows = dict([(row[name_idx], row) for row in nresult.payload])
1967

    
1968
      for name in utils.NiceSort(rows.keys()):
1969
        row = rows[name]
1970

    
1971
        out = []
1972

    
1973
        for field in self.op.output_fields:
1974
          if field == constants.SF_NODE:
1975
            val = node
1976
          elif field == constants.SF_TYPE:
1977
            val = self.op.storage_type
1978
          elif field in field_idx:
1979
            val = row[field_idx[field]]
1980
          else:
1981
            raise errors.ParameterError(field)
1982

    
1983
          out.append(val)
1984

    
1985
        result.append(out)
1986

    
1987
    return result
1988

    
1989

    
1990
class _InstanceQuery(_QueryBase):
1991
  FIELDS = query.INSTANCE_FIELDS
1992

    
1993
  def ExpandNames(self, lu):
1994
    lu.needed_locks = {}
1995
    lu.share_locks = _ShareAll()
1996

    
1997
    if self.names:
1998
      self.wanted = _GetWantedInstances(lu, self.names)
1999
    else:
2000
      self.wanted = locking.ALL_SET
2001

    
2002
    self.do_locking = (self.use_locking and
2003
                       query.IQ_LIVE in self.requested_data)
2004
    if self.do_locking:
2005
      lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
2006
      lu.needed_locks[locking.LEVEL_NODEGROUP] = []
2007
      lu.needed_locks[locking.LEVEL_NODE] = []
2008
      lu.needed_locks[locking.LEVEL_NETWORK] = []
2009
      lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2010

    
2011
    self.do_grouplocks = (self.do_locking and
2012
                          query.IQ_NODES in self.requested_data)
2013

    
2014
  def DeclareLocks(self, lu, level):
2015
    if self.do_locking:
2016
      if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
2017
        assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
2018

    
2019
        # Lock all groups used by instances optimistically; this requires going
2020
        # via the node before it's locked, requiring verification later on
2021
        lu.needed_locks[locking.LEVEL_NODEGROUP] = \
2022
          set(group_uuid
2023
              for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
2024
              for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
2025
      elif level == locking.LEVEL_NODE:
2026
        lu._LockInstancesNodes() # pylint: disable=W0212
2027

    
2028
      elif level == locking.LEVEL_NETWORK:
2029
        lu.needed_locks[locking.LEVEL_NETWORK] = \
2030
          frozenset(net_uuid
2031
                    for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
2032
                    for net_uuid in lu.cfg.GetInstanceNetworks(instance_name))
2033

    
2034
  @staticmethod
2035
  def _CheckGroupLocks(lu):
2036
    owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
2037
    owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
2038

    
2039
    # Check if node groups for locked instances are still correct
2040
    for instance_name in owned_instances:
2041
      _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
2042

    
2043
  def _GetQueryData(self, lu):
2044
    """Computes the list of instances and their attributes.
2045

2046
    """
2047
    if self.do_grouplocks:
2048
      self._CheckGroupLocks(lu)
2049

    
2050
    cluster = lu.cfg.GetClusterInfo()
2051
    all_info = lu.cfg.GetAllInstancesInfo()
2052

    
2053
    instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
2054

    
2055
    instance_list = [all_info[name] for name in instance_names]
2056
    nodes = frozenset(itertools.chain(*(inst.all_nodes
2057
                                        for inst in instance_list)))
2058
    hv_list = list(set([inst.hypervisor for inst in instance_list]))
2059
    bad_nodes = []
2060
    offline_nodes = []
2061
    wrongnode_inst = set()
2062

    
2063
    # Gather data as requested
2064
    if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
2065
      live_data = {}
2066
      node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
2067
      for name in nodes:
2068
        result = node_data[name]
2069
        if result.offline:
2070
          # offline nodes will be in both lists
2071
          assert result.fail_msg
2072
          offline_nodes.append(name)
2073
        if result.fail_msg:
2074
          bad_nodes.append(name)
2075
        elif result.payload:
2076
          for inst in result.payload:
2077
            if inst in all_info:
2078
              if all_info[inst].primary_node == name:
2079
                live_data.update(result.payload)
2080
              else:
2081
                wrongnode_inst.add(inst)
2082
            else:
2083
              # orphan instance; we don't list it here as we don't
2084
              # handle this case yet in the output of instance listing
2085
              logging.warning("Orphan instance '%s' found on node %s",
2086
                              inst, name)
2087
        # else no instance is alive
2088
    else:
2089
      live_data = {}
2090

    
2091
    if query.IQ_DISKUSAGE in self.requested_data:
2092
      gmi = ganeti.masterd.instance
2093
      disk_usage = dict((inst.name,
2094
                         gmi.ComputeDiskSize(inst.disk_template,
2095
                                             [{constants.IDISK_SIZE: disk.size}
2096
                                              for disk in inst.disks]))
2097
                        for inst in instance_list)
2098
    else:
2099
      disk_usage = None
2100

    
2101
    if query.IQ_CONSOLE in self.requested_data:
2102
      consinfo = {}
2103
      for inst in instance_list:
2104
        if inst.name in live_data:
2105
          # Instance is running
2106
          consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
2107
        else:
2108
          consinfo[inst.name] = None
2109
      assert set(consinfo.keys()) == set(instance_names)
2110
    else:
2111
      consinfo = None
2112

    
2113
    if query.IQ_NODES in self.requested_data:
2114
      node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
2115
                                            instance_list)))
2116
      nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
2117
      groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
2118
                    for uuid in set(map(operator.attrgetter("group"),
2119
                                        nodes.values())))
2120
    else:
2121
      nodes = None
2122
      groups = None
2123

    
2124
    if query.IQ_NETWORKS in self.requested_data:
2125
      net_uuids = itertools.chain(*(lu.cfg.GetInstanceNetworks(i.name)
2126
                                    for i in instance_list))
2127
      networks = dict((uuid, lu.cfg.GetNetwork(uuid)) for uuid in net_uuids)
2128
    else:
2129
      networks = None
2130

    
2131
    return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
2132
                                   disk_usage, offline_nodes, bad_nodes,
2133
                                   live_data, wrongnode_inst, consinfo,
2134
                                   nodes, groups, networks)
2135

    
2136

    
2137
class LUQuery(NoHooksLU):
2138
  """Query for resources/items of a certain kind.
2139

2140
  """
2141
  # pylint: disable=W0142
2142
  REQ_BGL = False
2143

    
2144
  def CheckArguments(self):
2145
    qcls = _GetQueryImplementation(self.op.what)
2146

    
2147
    self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
2148

    
2149
  def ExpandNames(self):
2150
    self.impl.ExpandNames(self)
2151

    
2152
  def DeclareLocks(self, level):
2153
    self.impl.DeclareLocks(self, level)
2154

    
2155
  def Exec(self, feedback_fn):
2156
    return self.impl.NewStyleQuery(self)
2157

    
2158

    
2159
class LUQueryFields(NoHooksLU):
2160
  """Query for resources/items of a certain kind.
2161

2162
  """
2163
  # pylint: disable=W0142
2164
  REQ_BGL = False
2165

    
2166
  def CheckArguments(self):
2167
    self.qcls = _GetQueryImplementation(self.op.what)
2168

    
2169
  def ExpandNames(self):
2170
    self.needed_locks = {}
2171

    
2172
  def Exec(self, feedback_fn):
2173
    return query.QueryFields(self.qcls.FIELDS, self.op.fields)
2174

    
2175

    
2176
class LUNodeModifyStorage(NoHooksLU):
2177
  """Logical unit for modifying a storage volume on a node.
2178

2179
  """
2180
  REQ_BGL = False
2181

    
2182
  def CheckArguments(self):
2183
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
2184

    
2185
    storage_type = self.op.storage_type
2186

    
2187
    try:
2188
      modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
2189
    except KeyError:
2190
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
2191
                                 " modified" % storage_type,
2192
                                 errors.ECODE_INVAL)
2193

    
2194
    diff = set(self.op.changes.keys()) - modifiable
2195
    if diff:
2196
      raise errors.OpPrereqError("The following fields can not be modified for"
2197
                                 " storage units of type '%s': %r" %
2198
                                 (storage_type, list(diff)),
2199
                                 errors.ECODE_INVAL)
2200

    
2201
  def ExpandNames(self):
2202
    self.needed_locks = {
2203
      locking.LEVEL_NODE: self.op.node_name,
2204
      }
2205

    
2206
  def Exec(self, feedback_fn):
2207
    """Computes the list of nodes and their attributes.
2208

2209
    """
2210
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
2211
    result = self.rpc.call_storage_modify(self.op.node_name,
2212
                                          self.op.storage_type, st_args,
2213
                                          self.op.name, self.op.changes)
2214
    result.Raise("Failed to modify storage unit '%s' on %s" %
2215
                 (self.op.name, self.op.node_name))
2216

    
2217

    
2218
class LUNodeAdd(LogicalUnit):
2219
  """Logical unit for adding node to the cluster.
2220

2221
  """
2222
  HPATH = "node-add"
2223
  HTYPE = constants.HTYPE_NODE
2224
  _NFLAGS = ["master_capable", "vm_capable"]
2225

    
2226
  def CheckArguments(self):
2227
    self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
2228
    # validate/normalize the node name
2229
    self.hostname = netutils.GetHostname(name=self.op.node_name,
2230
                                         family=self.primary_ip_family)
2231
    self.op.node_name = self.hostname.name
2232

    
2233
    if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
2234
      raise errors.OpPrereqError("Cannot readd the master node",
2235
                                 errors.ECODE_STATE)
2236

    
2237
    if self.op.readd and self.op.group:
2238
      raise errors.OpPrereqError("Cannot pass a node group when a node is"
2239
                                 " being readded", errors.ECODE_INVAL)
2240

    
2241
  def BuildHooksEnv(self):
2242
    """Build hooks env.
2243

2244
    This will run on all nodes before, and on all nodes + the new node after.
2245

2246
    """
2247
    return {
2248
      "OP_TARGET": self.op.node_name,
2249
      "NODE_NAME": self.op.node_name,
2250
      "NODE_PIP": self.op.primary_ip,
2251
      "NODE_SIP": self.op.secondary_ip,
2252
      "MASTER_CAPABLE": str(self.op.master_capable),
2253
      "VM_CAPABLE": str(self.op.vm_capable),
2254
      }
2255

    
2256
  def BuildHooksNodes(self):
2257
    """Build hooks nodes.
2258

2259
    """
2260
    # Exclude added node
2261
    pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
2262
    post_nodes = pre_nodes + [self.op.node_name, ]
2263

    
2264
    return (pre_nodes, post_nodes)
2265

    
2266
  def CheckPrereq(self):
2267
    """Check prerequisites.
2268

2269
    This checks:
2270
     - the new node is not already in the config
2271
     - it is resolvable
2272
     - its parameters (single/dual homed) matches the cluster
2273

2274
    Any errors are signaled by raising errors.OpPrereqError.
2275

2276
    """
2277
    cfg = self.cfg
2278
    hostname = self.hostname
2279
    node = hostname.name
2280
    primary_ip = self.op.primary_ip = hostname.ip
2281
    if self.op.secondary_ip is None:
2282
      if self.primary_ip_family == netutils.IP6Address.family:
2283
        raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
2284
                                   " IPv4 address must be given as secondary",
2285
                                   errors.ECODE_INVAL)
2286
      self.op.secondary_ip = primary_ip
2287

    
2288
    secondary_ip = self.op.secondary_ip
2289
    if not netutils.IP4Address.IsValid(secondary_ip):
2290
      raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
2291
                                 " address" % secondary_ip, errors.ECODE_INVAL)
2292

    
2293
    node_list = cfg.GetNodeList()
2294
    if not self.op.readd and node in node_list:
2295
      raise errors.OpPrereqError("Node %s is already in the configuration" %
2296
                                 node, errors.ECODE_EXISTS)
2297
    elif self.op.readd and node not in node_list:
2298
      raise errors.OpPrereqError("Node %s is not in the configuration" % node,
2299
                                 errors.ECODE_NOENT)
2300

    
2301
    self.changed_primary_ip = False
2302

    
2303
    for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
2304
      if self.op.readd and node == existing_node_name:
2305
        if existing_node.secondary_ip != secondary_ip:
2306
          raise errors.OpPrereqError("Readded node doesn't have the same IP"
2307
                                     " address configuration as before",
2308
                                     errors.ECODE_INVAL)
2309
        if existing_node.primary_ip != primary_ip:
2310
          self.changed_primary_ip = True
2311

    
2312
        continue
2313

    
2314
      if (existing_node.primary_ip == primary_ip or
2315
          existing_node.secondary_ip == primary_ip or
2316
          existing_node.primary_ip == secondary_ip or
2317
          existing_node.secondary_ip == secondary_ip):
2318
        raise errors.OpPrereqError("New node ip address(es) conflict with"
2319
                                   " existing node %s" % existing_node.name,
2320
                                   errors.ECODE_NOTUNIQUE)
2321

    
2322
    # After this 'if' block, None is no longer a valid value for the
2323
    # _capable op attributes
2324
    if self.op.readd:
2325
      old_node = self.cfg.GetNodeInfo(node)
2326
      assert old_node is not None, "Can't retrieve locked node %s" % node
2327
      for attr in self._NFLAGS:
2328
        if getattr(self.op, attr) is None:
2329
          setattr(self.op, attr, getattr(old_node, attr))
2330
    else:
2331
      for attr in self._NFLAGS:
2332
        if getattr(self.op, attr) is None:
2333
          setattr(self.op, attr, True)
2334

    
2335
    if self.op.readd and not self.op.vm_capable:
2336
      pri, sec = cfg.GetNodeInstances(node)
2337
      if pri or sec:
2338
        raise errors.OpPrereqError("Node %s being re-added with vm_capable"
2339
                                   " flag set to false, but it already holds"
2340
                                   " instances" % node,
2341
                                   errors.ECODE_STATE)
2342

    
2343
    # check that the type of the node (single versus dual homed) is the
2344
    # same as for the master
2345
    myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
2346
    master_singlehomed = myself.secondary_ip == myself.primary_ip
2347
    newbie_singlehomed = secondary_ip == primary_ip
2348
    if master_singlehomed != newbie_singlehomed:
2349
      if master_singlehomed:
2350
        raise errors.OpPrereqError("The master has no secondary ip but the"
2351
                                   " new node has one",
2352
                                   errors.ECODE_INVAL)
2353
      else:
2354
        raise errors.OpPrereqError("The master has a secondary ip but the"
2355
                                   " new node doesn't have one",
2356
                                   errors.ECODE_INVAL)
2357

    
2358
    # checks reachability
2359
    if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
2360
      raise errors.OpPrereqError("Node not reachable by ping",
2361
                                 errors.ECODE_ENVIRON)
2362

    
2363
    if not newbie_singlehomed:
2364
      # check reachability from my secondary ip to newbie's secondary ip
2365
      if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
2366
                              source=myself.secondary_ip):
2367
        raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
2368
                                   " based ping to node daemon port",
2369
                                   errors.ECODE_ENVIRON)
2370

    
2371
    if self.op.readd:
2372
      exceptions = [node]
2373
    else:
2374
      exceptions = []
2375

    
2376
    if self.op.master_capable:
2377
      self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
2378
    else:
2379
      self.master_candidate = False
2380

    
2381
    if self.op.readd:
2382
      self.new_node = old_node
2383
    else:
2384
      node_group = cfg.LookupNodeGroup(self.op.group)
2385
      self.new_node = objects.Node(name=node,
2386
                                   primary_ip=primary_ip,
2387
                                   secondary_ip=secondary_ip,
2388
                                   master_candidate=self.master_candidate,
2389
                                   offline=False, drained=False,
2390
                                   group=node_group, ndparams={})
2391

    
2392
    if self.op.ndparams:
2393
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
2394
      _CheckParamsNotGlobal(self.op.ndparams, constants.NDC_GLOBALS, "node",
2395
                            "node", "cluster or group")
2396

    
2397
    if self.op.hv_state:
2398
      self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
2399

    
2400
    if self.op.disk_state:
2401
      self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
2402

    
2403
    # TODO: If we need to have multiple DnsOnlyRunner we probably should make
2404
    #       it a property on the base class.
2405
    rpcrunner = rpc.DnsOnlyRunner()
2406
    result = rpcrunner.call_version([node])[node]
2407
    result.Raise("Can't get version information from node %s" % node)
2408
    if constants.PROTOCOL_VERSION == result.payload:
2409
      logging.info("Communication to node %s fine, sw version %s match",
2410
                   node, result.payload)
2411
    else:
2412
      raise errors.OpPrereqError("Version mismatch master version %s,"
2413
                                 " node version %s" %
2414
                                 (constants.PROTOCOL_VERSION, result.payload),
2415
                                 errors.ECODE_ENVIRON)
2416

    
2417
    vg_name = cfg.GetVGName()
2418
    if vg_name is not None:
2419
      vparams = {constants.NV_PVLIST: [vg_name]}
2420
      excl_stor = _IsExclusiveStorageEnabledNode(cfg, self.new_node)
2421
      cname = self.cfg.GetClusterName()
2422
      result = rpcrunner.call_node_verify_light([node], vparams, cname)[node]
2423
      (errmsgs, _) = _CheckNodePVs(result.payload, excl_stor)
2424
      if errmsgs:
2425
        raise errors.OpPrereqError("Checks on node PVs failed: %s" %
2426
                                   "; ".join(errmsgs), errors.ECODE_ENVIRON)
2427

    
2428
  def Exec(self, feedback_fn):
2429
    """Adds the new node to the cluster.
2430

2431
    """
2432
    new_node = self.new_node
2433
    node = new_node.name
2434

    
2435
    assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
2436
      "Not owning BGL"
2437

    
2438
    # We adding a new node so we assume it's powered
2439
    new_node.powered = True
2440

    
2441
    # for re-adds, reset the offline/drained/master-candidate flags;
2442
    # we need to reset here, otherwise offline would prevent RPC calls
2443
    # later in the procedure; this also means that if the re-add
2444
    # fails, we are left with a non-offlined, broken node
2445
    if self.op.readd:
2446
      new_node.drained = new_node.offline = False # pylint: disable=W0201
2447
      self.LogInfo("Readding a node, the offline/drained flags were reset")
2448
      # if we demote the node, we do cleanup later in the procedure
2449
      new_node.master_candidate = self.master_candidate
2450
      if self.changed_primary_ip:
2451
        new_node.primary_ip = self.op.primary_ip
2452

    
2453
    # copy the master/vm_capable flags
2454
    for attr in self._NFLAGS:
2455
      setattr(new_node, attr, getattr(self.op, attr))
2456

    
2457
    # notify the user about any possible mc promotion
2458
    if new_node.master_candidate:
2459
      self.LogInfo("Node will be a master candidate")
2460

    
2461
    if self.op.ndparams:
2462
      new_node.ndparams = self.op.ndparams
2463
    else:
2464
      new_node.ndparams = {}
2465

    
2466
    if self.op.hv_state:
2467
      new_node.hv_state_static = self.new_hv_state
2468

    
2469
    if self.op.disk_state:
2470
      new_node.disk_state_static = self.new_disk_state
2471

    
2472
    # Add node to our /etc/hosts, and add key to known_hosts
2473
    if self.cfg.GetClusterInfo().modify_etc_hosts:
2474
      master_node = self.cfg.GetMasterNode()
2475
      result = self.rpc.call_etc_hosts_modify(master_node,
2476
                                              constants.ETC_HOSTS_ADD,
2477
                                              self.hostname.name,
2478
                                              self.hostname.ip)
2479
      result.Raise("Can't update hosts file with new host data")
2480

    
2481
    if new_node.secondary_ip != new_node.primary_ip:
2482
      _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
2483
                               False)
2484

    
2485
    node_verify_list = [self.cfg.GetMasterNode()]
2486
    node_verify_param = {
2487
      constants.NV_NODELIST: ([node], {}),
2488
      # TODO: do a node-net-test as well?
2489
    }
2490

    
2491
    result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
2492
                                       self.cfg.GetClusterName())
2493
    for verifier in node_verify_list:
2494
      result[verifier].Raise("Cannot communicate with node %s" % verifier)
2495
      nl_payload = result[verifier].payload[constants.NV_NODELIST]
2496
      if nl_payload:
2497
        for failed in nl_payload:
2498
          feedback_fn("ssh/hostname verification failed"
2499
                      " (checking from %s): %s" %
2500
                      (verifier, nl_payload[failed]))
2501
        raise errors.OpExecError("ssh/hostname verification failed")
2502

    
2503
    if self.op.readd:
2504
      _RedistributeAncillaryFiles(self)
2505
      self.context.ReaddNode(new_node)
2506
      # make sure we redistribute the config
2507
      self.cfg.Update(new_node, feedback_fn)
2508
      # and make sure the new node will not have old files around
2509
      if not new_node.master_candidate:
2510
        result = self.rpc.call_node_demote_from_mc(new_node.name)
2511
        msg = result.fail_msg
2512
        if msg:
2513
          self.LogWarning("Node failed to demote itself from master"
2514
                          " candidate status: %s" % msg)
2515
    else:
2516
      _RedistributeAncillaryFiles(self, additional_nodes=[node],
2517
                                  additional_vm=self.op.vm_capable)
2518
      self.context.AddNode(new_node, self.proc.GetECId())
2519

    
2520

    
2521
class LUNodeSetParams(LogicalUnit):
2522
  """Modifies the parameters of a node.
2523

2524
  @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
2525
      to the node role (as _ROLE_*)
2526
  @cvar _R2F: a dictionary from node role to tuples of flags
2527
  @cvar _FLAGS: a list of attribute names corresponding to the flags
2528

2529
  """
2530
  HPATH = "node-modify"
2531
  HTYPE = constants.HTYPE_NODE
2532
  REQ_BGL = False
2533
  (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
2534
  _F2R = {
2535
    (True, False, False): _ROLE_CANDIDATE,
2536
    (False, True, False): _ROLE_DRAINED,
2537
    (False, False, True): _ROLE_OFFLINE,
2538
    (False, False, False): _ROLE_REGULAR,
2539
    }
2540
  _R2F = dict((v, k) for k, v in _F2R.items())
2541
  _FLAGS = ["master_candidate", "drained", "offline"]
2542

    
2543
  def CheckArguments(self):
2544
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
2545
    all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
2546
                self.op.master_capable, self.op.vm_capable,
2547
                self.op.secondary_ip, self.op.ndparams, self.op.hv_state,
2548
                self.op.disk_state]
2549
    if all_mods.count(None) == len(all_mods):
2550
      raise errors.OpPrereqError("Please pass at least one modification",
2551
                                 errors.ECODE_INVAL)
2552
    if all_mods.count(True) > 1:
2553
      raise errors.OpPrereqError("Can't set the node into more than one"
2554
                                 " state at the same time",
2555
                                 errors.ECODE_INVAL)
2556

    
2557
    # Boolean value that tells us whether we might be demoting from MC
2558
    self.might_demote = (self.op.master_candidate is False or
2559
                         self.op.offline is True or
2560
                         self.op.drained is True or
2561
                         self.op.master_capable is False)
2562

    
2563
    if self.op.secondary_ip:
2564
      if not netutils.IP4Address.IsValid(self.op.secondary_ip):
2565
        raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
2566
                                   " address" % self.op.secondary_ip,
2567
                                   errors.ECODE_INVAL)
2568

    
2569
    self.lock_all = self.op.auto_promote and self.might_demote
2570
    self.lock_instances = self.op.secondary_ip is not None
2571

    
2572
  def _InstanceFilter(self, instance):
2573
    """Filter for getting affected instances.
2574

2575
    """
2576
    return (instance.disk_template in constants.DTS_INT_MIRROR and
2577
            self.op.node_name in instance.all_nodes)
2578

    
2579
  def ExpandNames(self):
2580
    if self.lock_all:
2581
      self.needed_locks = {
2582
        locking.LEVEL_NODE: locking.ALL_SET,
2583

    
2584
        # Block allocations when all nodes are locked
2585
        locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
2586
        }
2587
    else:
2588
      self.needed_locks = {
2589
        locking.LEVEL_NODE: self.op.node_name,
2590
        }
2591

    
2592
    # Since modifying a node can have severe effects on currently running
2593
    # operations the resource lock is at least acquired in shared mode
2594
    self.needed_locks[locking.LEVEL_NODE_RES] = \
2595
      self.needed_locks[locking.LEVEL_NODE]
2596

    
2597
    # Get all locks except nodes in shared mode; they are not used for anything
2598
    # but read-only access
2599
    self.share_locks = _ShareAll()
2600
    self.share_locks[locking.LEVEL_NODE] = 0
2601
    self.share_locks[locking.LEVEL_NODE_RES] = 0
2602
    self.share_locks[locking.LEVEL_NODE_ALLOC] = 0
2603

    
2604
    if self.lock_instances:
2605
      self.needed_locks[locking.LEVEL_INSTANCE] = \
2606
        frozenset(self.cfg.GetInstancesInfoByFilter(self._InstanceFilter))
2607

    
2608
  def BuildHooksEnv(self):
2609
    """Build hooks env.
2610

2611
    This runs on the master node.
2612

2613
    """
2614
    return {
2615
      "OP_TARGET": self.op.node_name,
2616
      "MASTER_CANDIDATE": str(self.op.master_candidate),
2617
      "OFFLINE": str(self.op.offline),
2618
      "DRAINED": str(self.op.drained),
2619
      "MASTER_CAPABLE": str(self.op.master_capable),
2620
      "VM_CAPABLE": str(self.op.vm_capable),
2621
      }
2622

    
2623
  def BuildHooksNodes(self):
2624
    """Build hooks nodes.
2625

2626
    """
2627
    nl = [self.cfg.GetMasterNode(), self.op.node_name]
2628
    return (nl, nl)
2629

    
2630
  def CheckPrereq(self):
2631
    """Check prerequisites.
2632

2633
    This only checks the instance list against the existing names.
2634

2635
    """
2636
    node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
2637

    
2638
    if self.lock_instances:
2639
      affected_instances = \
2640
        self.cfg.GetInstancesInfoByFilter(self._InstanceFilter)
2641

    
2642
      # Verify instance locks
2643
      owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
2644
      wanted_instances = frozenset(affected_instances.keys())
2645
      if wanted_instances - owned_instances:
2646
        raise errors.OpPrereqError("Instances affected by changing node %s's"
2647
                                   " secondary IP address have changed since"
2648
                                   " locks were acquired, wanted '%s', have"
2649
                                   " '%s'; retry the operation" %
2650
                                   (self.op.node_name,
2651
                                    utils.CommaJoin(wanted_instances),
2652
                                    utils.CommaJoin(owned_instances)),
2653
                                   errors.ECODE_STATE)
2654
    else:
2655
      affected_instances = None
2656

    
2657
    if (self.op.master_candidate is not None or
2658
        self.op.drained is not None or
2659
        self.op.offline is not None):
2660
      # we can't change the master's node flags
2661
      if self.op.node_name == self.cfg.GetMasterNode():
2662
        raise errors.OpPrereqError("The master role can be changed"
2663
                                   " only via master-failover",
2664
                                   errors.ECODE_INVAL)
2665

    
2666
    if self.op.master_candidate and not node.master_capable:
2667
      raise errors.OpPrereqError("Node %s is not master capable, cannot make"
2668
                                 " it a master candidate" % node.name,
2669
                                 errors.ECODE_STATE)
2670

    
2671
    if self.op.vm_capable is False:
2672
      (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
2673
      if ipri or isec:
2674
        raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
2675
                                   " the vm_capable flag" % node.name,
2676
                                   errors.ECODE_STATE)
2677

    
2678
    if node.master_candidate and self.might_demote and not self.lock_all:
2679
      assert not self.op.auto_promote, "auto_promote set but lock_all not"
2680
      # check if after removing the current node, we're missing master
2681
      # candidates
2682
      (mc_remaining, mc_should, _) = \
2683
          self.cfg.GetMasterCandidateStats(exceptions=[node.name])
2684
      if mc_remaining < mc_should:
2685
        raise errors.OpPrereqError("Not enough master candidates, please"
2686
                                   " pass auto promote option to allow"
2687
                                   " promotion (--auto-promote or RAPI"
2688
                                   " auto_promote=True)", errors.ECODE_STATE)
2689

    
2690
    self.old_flags = old_flags = (node.master_candidate,
2691
                                  node.drained, node.offline)
2692
    assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
2693
    self.old_role = old_role = self._F2R[old_flags]
2694

    
2695
    # Check for ineffective changes
2696
    for attr in self._FLAGS:
2697
      if (getattr(self.op, attr) is False and getattr(node, attr) is False):
2698
        self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
2699
        setattr(self.op, attr, None)
2700

    
2701
    # Past this point, any flag change to False means a transition
2702
    # away from the respective state, as only real changes are kept
2703

    
2704
    # TODO: We might query the real power state if it supports OOB
2705
    if _SupportsOob(self.cfg, node):
2706
      if self.op.offline is False and not (node.powered or
2707
                                           self.op.powered is True):
2708
        raise errors.OpPrereqError(("Node %s needs to be turned on before its"
2709
                                    " offline status can be reset") %
2710
                                   self.op.node_name, errors.ECODE_STATE)
2711
    elif self.op.powered is not None:
2712
      raise errors.OpPrereqError(("Unable to change powered state for node %s"
2713
                                  " as it does not support out-of-band"
2714
                                  " handling") % self.op.node_name,
2715
                                 errors.ECODE_STATE)
2716

    
2717
    # If we're being deofflined/drained, we'll MC ourself if needed
2718
    if (self.op.drained is False or self.op.offline is False or
2719
        (self.op.master_capable and not node.master_capable)):
2720
      if _DecideSelfPromotion(self):
2721
        self.op.master_candidate = True
2722
        self.LogInfo("Auto-promoting node to master candidate")
2723

    
2724
    # If we're no longer master capable, we'll demote ourselves from MC
2725
    if self.op.master_capable is False and node.master_candidate:
2726
      self.LogInfo("Demoting from master candidate")
2727
      self.op.master_candidate = False
2728

    
2729
    # Compute new role
2730
    assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
2731
    if self.op.master_candidate:
2732
      new_role = self._ROLE_CANDIDATE
2733
    elif self.op.drained:
2734
      new_role = self._ROLE_DRAINED
2735
    elif self.op.offline:
2736
      new_role = self._ROLE_OFFLINE
2737
    elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
2738
      # False is still in new flags, which means we're un-setting (the
2739
      # only) True flag
2740
      new_role = self._ROLE_REGULAR
2741
    else: # no new flags, nothing, keep old role
2742
      new_role = old_role
2743

    
2744
    self.new_role = new_role
2745

    
2746
    if old_role == self._ROLE_OFFLINE and new_role != old_role:
2747
      # Trying to transition out of offline status
2748
      result = self.rpc.call_version([node.name])[node.name]
2749
      if result.fail_msg:
2750
        raise errors.OpPrereqError("Node %s is being de-offlined but fails"
2751
                                   " to report its version: %s" %
2752
                                   (node.name, result.fail_msg),
2753
                                   errors.ECODE_STATE)
2754
      else:
2755
        self.LogWarning("Transitioning node from offline to online state"
2756
                        " without using re-add. Please make sure the node"
2757
                        " is healthy!")
2758

    
2759
    # When changing the secondary ip, verify if this is a single-homed to
2760
    # multi-homed transition or vice versa, and apply the relevant
2761
    # restrictions.
2762
    if self.op.secondary_ip:
2763
      # Ok even without locking, because this can't be changed by any LU
2764
      master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
2765
      master_singlehomed = master.secondary_ip == master.primary_ip
2766
      if master_singlehomed and self.op.secondary_ip != node.primary_ip:
2767
        if self.op.force and node.name == master.name:
2768
          self.LogWarning("Transitioning from single-homed to multi-homed"
2769
                          " cluster; all nodes will require a secondary IP"
2770
                          " address")
2771
        else:
2772
          raise errors.OpPrereqError("Changing the secondary ip on a"
2773
                                     " single-homed cluster requires the"
2774
                                     " --force option to be passed, and the"
2775
                                     " target node to be the master",
2776
                                     errors.ECODE_INVAL)
2777
      elif not master_singlehomed and self.op.secondary_ip == node.primary_ip:
2778
        if self.op.force and node.name == master.name:
2779
          self.LogWarning("Transitioning from multi-homed to single-homed"
2780
                          " cluster; secondary IP addresses will have to be"
2781
                          " removed")
2782
        else:
2783
          raise errors.OpPrereqError("Cannot set the secondary IP to be the"
2784
                                     " same as the primary IP on a multi-homed"
2785
                                     " cluster, unless the --force option is"
2786
                                     " passed, and the target node is the"
2787
                                     " master", errors.ECODE_INVAL)
2788

    
2789
      assert not (frozenset(affected_instances) -
2790
                  self.owned_locks(locking.LEVEL_INSTANCE))
2791

    
2792
      if node.offline:
2793
        if affected_instances:
2794
          msg = ("Cannot change secondary IP address: offline node has"
2795
                 " instances (%s) configured to use it" %
2796
                 utils.CommaJoin(affected_instances.keys()))
2797
          raise errors.OpPrereqError(msg, errors.ECODE_STATE)
2798
      else:
2799
        # On online nodes, check that no instances are running, and that
2800
        # the node has the new ip and we can reach it.
2801
        for instance in affected_instances.values():
2802
          _CheckInstanceState(self, instance, INSTANCE_DOWN,
2803
                              msg="cannot change secondary ip")
2804

    
2805
        _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
2806
        if master.name != node.name:
2807
          # check reachability from master secondary ip to new secondary ip
2808
          if not netutils.TcpPing(self.op.secondary_ip,
2809
                                  constants.DEFAULT_NODED_PORT,
2810
                                  source=master.secondary_ip):
2811
            raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
2812
                                       " based ping to node daemon port",
2813
                                       errors.ECODE_ENVIRON)
2814

    
2815
    if self.op.ndparams:
2816
      new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
2817
      utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
2818
      _CheckParamsNotGlobal(self.op.ndparams, constants.NDC_GLOBALS, "node",
2819
                            "node", "cluster or group")
2820
      self.new_ndparams = new_ndparams
2821

    
2822
    if self.op.hv_state:
2823
      self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
2824
                                                 self.node.hv_state_static)
2825

    
2826
    if self.op.disk_state:
2827
      self.new_disk_state = \
2828
        _MergeAndVerifyDiskState(self.op.disk_state,
2829
                                 self.node.disk_state_static)
2830

    
2831
  def Exec(self, feedback_fn):
2832
    """Modifies a node.
2833

2834
    """
2835
    node = self.node
2836
    old_role = self.old_role
2837
    new_role = self.new_role
2838

    
2839
    result = []
2840

    
2841
    if self.op.ndparams:
2842
      node.ndparams = self.new_ndparams
2843

    
2844
    if self.op.powered is not None:
2845
      node.powered = self.op.powered
2846

    
2847
    if self.op.hv_state:
2848
      node.hv_state_static = self.new_hv_state
2849

    
2850
    if self.op.disk_state:
2851
      node.disk_state_static = self.new_disk_state
2852

    
2853
    for attr in ["master_capable", "vm_capable"]:
2854
      val = getattr(self.op, attr)
2855
      if val is not None:
2856
        setattr(node, attr, val)
2857
        result.append((attr, str(val)))
2858

    
2859
    if new_role != old_role:
2860
      # Tell the node to demote itself, if no longer MC and not offline
2861
      if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
2862
        msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
2863
        if msg:
2864
          self.LogWarning("Node failed to demote itself: %s", msg)
2865

    
2866
      new_flags = self._R2F[new_role]
2867
      for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
2868
        if of != nf:
2869
          result.append((desc, str(nf)))
2870
      (node.master_candidate, node.drained, node.offline) = new_flags
2871

    
2872
      # we locked all nodes, we adjust the CP before updating this node
2873
      if self.lock_all:
2874
        _AdjustCandidatePool(self, [node.name])
2875

    
2876
    if self.op.secondary_ip:
2877
      node.secondary_ip = self.op.secondary_ip
2878
      result.append(("secondary_ip", self.op.secondary_ip))
2879

    
2880
    # this will trigger configuration file update, if needed
2881
    self.cfg.Update(node, feedback_fn)
2882

    
2883
    # this will trigger job queue propagation or cleanup if the mc
2884
    # flag changed
2885
    if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
2886
      self.context.ReaddNode(node)
2887

    
2888
    return result
2889

    
2890

    
2891
class LUNodePowercycle(NoHooksLU):
2892
  """Powercycles a node.
2893

2894
  """
2895
  REQ_BGL = False
2896

    
2897
  def CheckArguments(self):
2898
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
2899
    if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
2900
      raise errors.OpPrereqError("The node is the master and the force"
2901
                                 " parameter was not set",
2902
                                 errors.ECODE_INVAL)
2903

    
2904
  def ExpandNames(self):
2905
    """Locking for PowercycleNode.
2906

2907
    This is a last-resort option and shouldn't block on other
2908
    jobs. Therefore, we grab no locks.
2909

2910
    """
2911
    self.needed_locks = {}
2912

    
2913
  def Exec(self, feedback_fn):
2914
    """Reboots a node.
2915

2916
    """
2917
    result = self.rpc.call_node_powercycle(self.op.node_name,
2918
                                           self.cfg.GetHypervisorType())
2919
    result.Raise("Failed to schedule the reboot")
2920
    return result.payload
2921

    
2922

    
2923
class LUInstanceActivateDisks(NoHooksLU):
2924
  """Bring up an instance's disks.
2925

2926
  """
2927
  REQ_BGL = False
2928

    
2929
  def ExpandNames(self):
2930
    self._ExpandAndLockInstance()
2931
    self.needed_locks[locking.LEVEL_NODE] = []
2932
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2933

    
2934
  def DeclareLocks(self, level):
2935
    if level == locking.LEVEL_NODE:
2936
      self._LockInstancesNodes()
2937

    
2938
  def CheckPrereq(self):
2939
    """Check prerequisites.
2940

2941
    This checks that the instance is in the cluster.
2942

2943
    """
2944
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
2945
    assert self.instance is not None, \
2946
      "Cannot retrieve locked instance %s" % self.op.instance_name
2947
    _CheckNodeOnline(self, self.instance.primary_node)
2948

    
2949
  def Exec(self, feedback_fn):
2950
    """Activate the disks.
2951

2952
    """
2953
    disks_ok, disks_info = \
2954
              _AssembleInstanceDisks(self, self.instance,
2955
                                     ignore_size=self.op.ignore_size)
2956
    if not disks_ok:
2957
      raise errors.OpExecError("Cannot activate block devices")
2958

    
2959
    if self.op.wait_for_sync:
2960
      if not _WaitForSync(self, self.instance):
2961
        raise errors.OpExecError("Some disks of the instance are degraded!")
2962

    
2963
    return disks_info
2964

    
2965

    
2966
def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
2967
                           ignore_size=False):
2968
  """Prepare the block devices for an instance.
2969

2970
  This sets up the block devices on all nodes.
2971

2972
  @type lu: L{LogicalUnit}
2973
  @param lu: the logical unit on whose behalf we execute
2974
  @type instance: L{objects.Instance}
2975
  @param instance: the instance for whose disks we assemble
2976
  @type disks: list of L{objects.Disk} or None
2977
  @param disks: which disks to assemble (or all, if None)
2978
  @type ignore_secondaries: boolean
2979
  @param ignore_secondaries: if true, errors on secondary nodes
2980
      won't result in an error return from the function
2981
  @type ignore_size: boolean
2982
  @param ignore_size: if true, the current known size of the disk
2983
      will not be used during the disk activation, useful for cases
2984
      when the size is wrong
2985
  @return: False if the operation failed, otherwise a list of
2986
      (host, instance_visible_name, node_visible_name)
2987
      with the mapping from node devices to instance devices
2988

2989
  """
2990
  device_info = []
2991
  disks_ok = True
2992
  iname = instance.name
2993
  disks = _ExpandCheckDisks(instance, disks)
2994

    
2995
  # With the two passes mechanism we try to reduce the window of
2996
  # opportunity for the race condition of switching DRBD to primary
2997
  # before handshaking occured, but we do not eliminate it
2998

    
2999
  # The proper fix would be to wait (with some limits) until the
3000
  # connection has been made and drbd transitions from WFConnection
3001
  # into any other network-connected state (Connected, SyncTarget,
3002
  # SyncSource, etc.)
3003

    
3004
  # 1st pass, assemble on all nodes in secondary mode
3005
  for idx, inst_disk in enumerate(disks):
3006
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
3007
      if ignore_size:
3008
        node_disk = node_disk.Copy()
3009
        node_disk.UnsetSize()
3010
      lu.cfg.SetDiskID(node_disk, node)
3011
      result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
3012
                                             False, idx)
3013
      msg = result.fail_msg
3014
      if msg:
3015
        is_offline_secondary = (node in instance.secondary_nodes and
3016
                                result.offline)
3017
        lu.LogWarning("Could not prepare block device %s on node %s"
3018
                      " (is_primary=False, pass=1): %s",
3019
                      inst_disk.iv_name, node, msg)
3020
        if not (ignore_secondaries or is_offline_secondary):
3021
          disks_ok = False
3022

    
3023
  # FIXME: race condition on drbd migration to primary
3024

    
3025
  # 2nd pass, do only the primary node
3026
  for idx, inst_disk in enumerate(disks):
3027
    dev_path = None
3028

    
3029
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
3030
      if node != instance.primary_node:
3031
        continue
3032
      if ignore_size:
3033
        node_disk = node_disk.Copy()
3034
        node_disk.UnsetSize()
3035
      lu.cfg.SetDiskID(node_disk, node)
3036
      result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
3037
                                             True, idx)
3038
      msg = result.fail_msg
3039
      if msg:
3040
        lu.LogWarning("Could not prepare block device %s on node %s"
3041
                      " (is_primary=True, pass=2): %s",
3042
                      inst_disk.iv_name, node, msg)
3043
        disks_ok = False
3044
      else:
3045
        dev_path = result.payload
3046

    
3047
    device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
3048

    
3049
  # leave the disks configured for the primary node
3050
  # this is a workaround that would be fixed better by
3051
  # improving the logical/physical id handling
3052
  for disk in disks:
3053
    lu.cfg.SetDiskID(disk, instance.primary_node)
3054

    
3055
  return disks_ok, device_info
3056

    
3057

    
3058
def _StartInstanceDisks(lu, instance, force):
3059
  """Start the disks of an instance.
3060

3061
  """
3062
  disks_ok, _ = _AssembleInstanceDisks(lu, instance,
3063
                                           ignore_secondaries=force)
3064
  if not disks_ok:
3065
    _ShutdownInstanceDisks(lu, instance)
3066
    if force is not None and not force:
3067
      lu.LogWarning("",
3068
                    hint=("If the message above refers to a secondary node,"
3069
                          " you can retry the operation using '--force'"))
3070
    raise errors.OpExecError("Disk consistency error")
3071

    
3072

    
3073
class LUInstanceDeactivateDisks(NoHooksLU):
3074
  """Shutdown an instance's disks.
3075

3076
  """
3077
  REQ_BGL = False
3078

    
3079
  def ExpandNames(self):
3080
    self._ExpandAndLockInstance()
3081
    self.needed_locks[locking.LEVEL_NODE] = []
3082
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3083

    
3084
  def DeclareLocks(self, level):
3085
    if level == locking.LEVEL_NODE:
3086
      self._LockInstancesNodes()
3087

    
3088
  def CheckPrereq(self):
3089
    """Check prerequisites.
3090

3091
    This checks that the instance is in the cluster.
3092

3093
    """
3094
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3095
    assert self.instance is not None, \
3096
      "Cannot retrieve locked instance %s" % self.op.instance_name
3097

    
3098
  def Exec(self, feedback_fn):
3099
    """Deactivate the disks
3100

3101
    """
3102
    instance = self.instance
3103
    if self.op.force:
3104
      _ShutdownInstanceDisks(self, instance)
3105
    else:
3106
      _SafeShutdownInstanceDisks(self, instance)
3107

    
3108

    
3109
def _SafeShutdownInstanceDisks(lu, instance, disks=None):
3110
  """Shutdown block devices of an instance.
3111

3112
  This function checks if an instance is running, before calling
3113
  _ShutdownInstanceDisks.
3114

3115
  """
3116
  _CheckInstanceState(lu, instance, INSTANCE_DOWN, msg="cannot shutdown disks")
3117
  _ShutdownInstanceDisks(lu, instance, disks=disks)
3118

    
3119

    
3120
def _ExpandCheckDisks(instance, disks):
3121
  """Return the instance disks selected by the disks list
3122

3123
  @type disks: list of L{objects.Disk} or None
3124
  @param disks: selected disks
3125
  @rtype: list of L{objects.Disk}
3126
  @return: selected instance disks to act on
3127

3128
  """
3129
  if disks is None:
3130
    return instance.disks
3131
  else:
3132
    if not set(disks).issubset(instance.disks):
3133
      raise errors.ProgrammerError("Can only act on disks belonging to the"
3134
                                   " target instance")
3135
    return disks
3136

    
3137

    
3138
def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
3139
  """Shutdown block devices of an instance.
3140

3141
  This does the shutdown on all nodes of the instance.
3142

3143
  If the ignore_primary is false, errors on the primary node are
3144
  ignored.
3145

3146
  """
3147
  all_result = True
3148
  disks = _ExpandCheckDisks(instance, disks)
3149

    
3150
  for disk in disks:
3151
    for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
3152
      lu.cfg.SetDiskID(top_disk, node)
3153
      result = lu.rpc.call_blockdev_shutdown(node, (top_disk, instance))
3154
      msg = result.fail_msg
3155
      if msg:
3156
        lu.LogWarning("Could not shutdown block device %s on node %s: %s",
3157
                      disk.iv_name, node, msg)
3158
        if ((node == instance.primary_node and not ignore_primary) or
3159
            (node != instance.primary_node and not result.offline)):
3160
          all_result = False
3161
  return all_result
3162

    
3163

    
3164
def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
3165
  """Checks if a node has enough free memory.
3166

3167
  This function checks if a given node has the needed amount of free
3168
  memory. In case the node has less memory or we cannot get the
3169
  information from the node, this function raises an OpPrereqError
3170
  exception.
3171

3172
  @type lu: C{LogicalUnit}
3173
  @param lu: a logical unit from which we get configuration data
3174
  @type node: C{str}
3175
  @param node: the node to check
3176
  @type reason: C{str}
3177
  @param reason: string to use in the error message
3178
  @type requested: C{int}
3179
  @param requested: the amount of memory in MiB to check for
3180
  @type hypervisor_name: C{str}
3181
  @param hypervisor_name: the hypervisor to ask for memory stats
3182
  @rtype: integer
3183
  @return: node current free memory
3184
  @raise errors.OpPrereqError: if the node doesn't have enough memory, or
3185
      we cannot check the node
3186

3187
  """
3188
  nodeinfo = lu.rpc.call_node_info([node], None, [hypervisor_name], False)
3189
  nodeinfo[node].Raise("Can't get data from node %s" % node,
3190
                       prereq=True, ecode=errors.ECODE_ENVIRON)
3191
  (_, _, (hv_info, )) = nodeinfo[node].payload
3192

    
3193
  free_mem = hv_info.get("memory_free", None)
3194
  if not isinstance(free_mem, int):
3195
    raise errors.OpPrereqError("Can't compute free memory on node %s, result"
3196
                               " was '%s'" % (node, free_mem),
3197
                               errors.ECODE_ENVIRON)
3198
  if requested > free_mem:
3199
    raise errors.OpPrereqError("Not enough memory on node %s for %s:"
3200
                               " needed %s MiB, available %s MiB" %
3201
                               (node, reason, requested, free_mem),
3202
                               errors.ECODE_NORES)
3203
  return free_mem
3204

    
3205

    
3206
def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
3207
  """Checks if nodes have enough free disk space in all the VGs.
3208

3209
  This function checks if all given nodes have the needed amount of
3210
  free disk. In case any node has less disk or we cannot get the
3211
  information from the node, this function raises an OpPrereqError
3212
  exception.
3213

3214
  @type lu: C{LogicalUnit}
3215
  @param lu: a logical unit from which we get configuration data
3216
  @type nodenames: C{list}
3217
  @param nodenames: the list of node names to check
3218
  @type req_sizes: C{dict}
3219
  @param req_sizes: the hash of vg and corresponding amount of disk in
3220
      MiB to check for
3221
  @raise errors.OpPrereqError: if the node doesn't have enough disk,
3222
      or we cannot check the node
3223

3224
  """
3225
  for vg, req_size in req_sizes.items():
3226
    _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
3227

    
3228

    
3229
def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
3230
  """Checks if nodes have enough free disk space in the specified VG.
3231

3232
  This function checks if all given nodes have the needed amount of
3233
  free disk. In case any node has less disk or we cannot get the
3234
  information from the node, this function raises an OpPrereqError
3235
  exception.
3236

3237
  @type lu: C{LogicalUnit}
3238
  @param lu: a logical unit from which we get configuration data
3239
  @type nodenames: C{list}
3240
  @param nodenames: the list of node names to check
3241
  @type vg: C{str}
3242
  @param vg: the volume group to check
3243
  @type requested: C{int}
3244
  @param requested: the amount of disk in MiB to check for
3245
  @raise errors.OpPrereqError: if the node doesn't have enough disk,
3246
      or we cannot check the node
3247

3248
  """
3249
  es_flags = rpc.GetExclusiveStorageForNodeNames(lu.cfg, nodenames)
3250
  nodeinfo = lu.rpc.call_node_info(nodenames, [vg], None, es_flags)
3251
  for node in nodenames:
3252
    info = nodeinfo[node]
3253
    info.Raise("Cannot get current information from node %s" % node,
3254
               prereq=True, ecode=errors.ECODE_ENVIRON)
3255
    (_, (vg_info, ), _) = info.payload
3256
    vg_free = vg_info.get("vg_free", None)
3257
    if not isinstance(vg_free, int):
3258
      raise errors.OpPrereqError("Can't compute free disk space on node"
3259
                                 " %s for vg %s, result was '%s'" %
3260
                                 (node, vg, vg_free), errors.ECODE_ENVIRON)
3261
    if requested > vg_free:
3262
      raise errors.OpPrereqError("Not enough disk space on target node %s"
3263
                                 " vg %s: required %d MiB, available %d MiB" %
3264
                                 (node, vg, requested, vg_free),
3265
                                 errors.ECODE_NORES)
3266

    
3267

    
3268
def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
3269
  """Checks if nodes have enough physical CPUs
3270

3271
  This function checks if all given nodes have the needed number of
3272
  physical CPUs. In case any node has less CPUs or we cannot get the
3273
  information from the node, this function raises an OpPrereqError
3274
  exception.
3275

3276
  @type lu: C{LogicalUnit}
3277
  @param lu: a logical unit from which we get configuration data
3278
  @type nodenames: C{list}
3279
  @param nodenames: the list of node names to check
3280
  @type requested: C{int}
3281
  @param requested: the minimum acceptable number of physical CPUs
3282
  @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
3283
      or we cannot check the node
3284

3285
  """
3286
  nodeinfo = lu.rpc.call_node_info(nodenames, None, [hypervisor_name], None)
3287
  for node in nodenames:
3288
    info = nodeinfo[node]
3289
    info.Raise("Cannot get current information from node %s" % node,
3290
               prereq=True, ecode=errors.ECODE_ENVIRON)
3291
    (_, _, (hv_info, )) = info.payload
3292
    num_cpus = hv_info.get("cpu_total", None)
3293
    if not isinstance(num_cpus, int):
3294
      raise errors.OpPrereqError("Can't compute the number of physical CPUs"
3295
                                 " on node %s, result was '%s'" %
3296
                                 (node, num_cpus), errors.ECODE_ENVIRON)
3297
    if requested > num_cpus:
3298
      raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
3299
                                 "required" % (node, num_cpus, requested),
3300
                                 errors.ECODE_NORES)
3301

    
3302

    
3303
class LUInstanceStartup(LogicalUnit):
3304
  """Starts an instance.
3305

3306
  """
3307
  HPATH = "instance-start"
3308
  HTYPE = constants.HTYPE_INSTANCE
3309
  REQ_BGL = False
3310

    
3311
  def CheckArguments(self):
3312
    # extra beparams
3313
    if self.op.beparams:
3314
      # fill the beparams dict
3315
      objects.UpgradeBeParams(self.op.beparams)
3316
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3317

    
3318
  def ExpandNames(self):
3319
    self._ExpandAndLockInstance()
3320
    self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
3321

    
3322
  def DeclareLocks(self, level):
3323
    if level == locking.LEVEL_NODE_RES:
3324
      self._LockInstancesNodes(primary_only=True, level=locking.LEVEL_NODE_RES)
3325

    
3326
  def BuildHooksEnv(self):
3327
    """Build hooks env.
3328

3329
    This runs on master, primary and secondary nodes of the instance.
3330

3331
    """
3332
    env = {
3333
      "FORCE": self.op.force,
3334
      }
3335

    
3336
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
3337

    
3338
    return env
3339

    
3340
  def BuildHooksNodes(self):
3341
    """Build hooks nodes.
3342

3343
    """
3344
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3345
    return (nl, nl)
3346

    
3347
  def CheckPrereq(self):
3348
    """Check prerequisites.
3349

3350
    This checks that the instance is in the cluster.
3351

3352
    """
3353
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3354
    assert self.instance is not None, \
3355
      "Cannot retrieve locked instance %s" % self.op.instance_name
3356

    
3357
    # extra hvparams
3358
    if self.op.hvparams:
3359
      # check hypervisor parameter syntax (locally)
3360
      cluster = self.cfg.GetClusterInfo()
3361
      utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
3362
      filled_hvp = cluster.FillHV(instance)
3363
      filled_hvp.update(self.op.hvparams)
3364
      hv_type = hypervisor.GetHypervisorClass(instance.hypervisor)
3365
      hv_type.CheckParameterSyntax(filled_hvp)
3366
      _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
3367

    
3368
    _CheckInstanceState(self, instance, INSTANCE_ONLINE)
3369

    
3370
    self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
3371

    
3372
    if self.primary_offline and self.op.ignore_offline_nodes:
3373
      self.LogWarning("Ignoring offline primary node")
3374

    
3375
      if self.op.hvparams or self.op.beparams:
3376
        self.LogWarning("Overridden parameters are ignored")
3377
    else:
3378
      _CheckNodeOnline(self, instance.primary_node)
3379

    
3380
      bep = self.cfg.GetClusterInfo().FillBE(instance)
3381
      bep.update(self.op.beparams)
3382

    
3383
      # check bridges existence
3384
      _CheckInstanceBridgesExist(self, instance)
3385

    
3386
      remote_info = self.rpc.call_instance_info(instance.primary_node,
3387
                                                instance.name,
3388
                                                instance.hypervisor)
3389
      remote_info.Raise("Error checking node %s" % instance.primary_node,
3390
                        prereq=True, ecode=errors.ECODE_ENVIRON)
3391
      if not remote_info.payload: # not running already
3392
        _CheckNodeFreeMemory(self, instance.primary_node,
3393
                             "starting instance %s" % instance.name,
3394
                             bep[constants.BE_MINMEM], instance.hypervisor)
3395

    
3396
  def Exec(self, feedback_fn):
3397
    """Start the instance.
3398

3399
    """
3400
    instance = self.instance
3401
    force = self.op.force
3402
    reason = self.op.reason
3403

    
3404
    if not self.op.no_remember:
3405
      self.cfg.MarkInstanceUp(instance.name)
3406

    
3407
    if self.primary_offline:
3408
      assert self.op.ignore_offline_nodes
3409
      self.LogInfo("Primary node offline, marked instance as started")
3410
    else:
3411
      node_current = instance.primary_node
3412

    
3413
      _StartInstanceDisks(self, instance, force)
3414

    
3415
      result = \
3416
        self.rpc.call_instance_start(node_current,
3417
                                     (instance, self.op.hvparams,
3418
                                      self.op.beparams),
3419
                                     self.op.startup_paused, reason)
3420
      msg = result.fail_msg
3421
      if msg:
3422
        _ShutdownInstanceDisks(self, instance)
3423
        raise errors.OpExecError("Could not start instance: %s" % msg)
3424

    
3425

    
3426
class LUInstanceReboot(LogicalUnit):
3427
  """Reboot an instance.
3428

3429
  """
3430
  HPATH = "instance-reboot"
3431
  HTYPE = constants.HTYPE_INSTANCE
3432
  REQ_BGL = False
3433

    
3434
  def ExpandNames(self):
3435
    self._ExpandAndLockInstance()
3436

    
3437
  def BuildHooksEnv(self):
3438
    """Build hooks env.
3439

3440
    This runs on master, primary and secondary nodes of the instance.
3441

3442
    """
3443
    env = {
3444
      "IGNORE_SECONDARIES": self.op.ignore_secondaries,
3445
      "REBOOT_TYPE": self.op.reboot_type,
3446
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
3447
      }
3448

    
3449
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
3450

    
3451
    return env
3452

    
3453
  def BuildHooksNodes(self):
3454
    """Build hooks nodes.
3455

3456
    """
3457
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3458
    return (nl, nl)
3459

    
3460
  def CheckPrereq(self):
3461
    """Check prerequisites.
3462

3463
    This checks that the instance is in the cluster.
3464

3465
    """
3466
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3467
    assert self.instance is not None, \
3468
      "Cannot retrieve locked instance %s" % self.op.instance_name
3469
    _CheckInstanceState(self, instance, INSTANCE_ONLINE)
3470
    _CheckNodeOnline(self, instance.primary_node)
3471

    
3472
    # check bridges existence
3473
    _CheckInstanceBridgesExist(self, instance)
3474

    
3475
  def Exec(self, feedback_fn):
3476
    """Reboot the instance.
3477

3478
    """
3479
    instance = self.instance
3480
    ignore_secondaries = self.op.ignore_secondaries
3481
    reboot_type = self.op.reboot_type
3482
    reason = self.op.reason
3483

    
3484
    remote_info = self.rpc.call_instance_info(instance.primary_node,
3485
                                              instance.name,
3486
                                              instance.hypervisor)
3487
    remote_info.Raise("Error checking node %s" % instance.primary_node)
3488
    instance_running = bool(remote_info.payload)
3489

    
3490
    node_current = instance.primary_node
3491

    
3492
    if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
3493
                                            constants.INSTANCE_REBOOT_HARD]:
3494
      for disk in instance.disks:
3495
        self.cfg.SetDiskID(disk, node_current)
3496
      result = self.rpc.call_instance_reboot(node_current, instance,
3497
                                             reboot_type,
3498
                                             self.op.shutdown_timeout, reason)
3499
      result.Raise("Could not reboot instance")
3500
    else:
3501
      if instance_running:
3502
        result = self.rpc.call_instance_shutdown(node_current, instance,
3503
                                                 self.op.shutdown_timeout,
3504
                                                 reason)
3505
        result.Raise("Could not shutdown instance for full reboot")
3506
        _ShutdownInstanceDisks(self, instance)
3507
      else:
3508
        self.LogInfo("Instance %s was already stopped, starting now",
3509
                     instance.name)
3510
      _StartInstanceDisks(self, instance, ignore_secondaries)
3511
      result = self.rpc.call_instance_start(node_current,
3512
                                            (instance, None, None), False,
3513
                                             reason)
3514
      msg = result.fail_msg
3515
      if msg:
3516
        _ShutdownInstanceDisks(self, instance)
3517
        raise errors.OpExecError("Could not start instance for"
3518
                                 " full reboot: %s" % msg)
3519

    
3520
    self.cfg.MarkInstanceUp(instance.name)
3521

    
3522

    
3523
class LUInstanceShutdown(LogicalUnit):
3524
  """Shutdown an instance.
3525

3526
  """
3527
  HPATH = "instance-stop"
3528
  HTYPE = constants.HTYPE_INSTANCE
3529
  REQ_BGL = False
3530

    
3531
  def ExpandNames(self):
3532
    self._ExpandAndLockInstance()
3533

    
3534
  def BuildHooksEnv(self):
3535
    """Build hooks env.
3536

3537
    This runs on master, primary and secondary nodes of the instance.
3538

3539
    """
3540
    env = _BuildInstanceHookEnvByObject(self, self.instance)
3541
    env["TIMEOUT"] = self.op.timeout
3542
    return env
3543

    
3544
  def BuildHooksNodes(self):
3545
    """Build hooks nodes.
3546

3547
    """
3548
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3549
    return (nl, nl)
3550

    
3551
  def CheckPrereq(self):
3552
    """Check prerequisites.
3553

3554
    This checks that the instance is in the cluster.
3555

3556
    """
3557
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3558
    assert self.instance is not None, \
3559
      "Cannot retrieve locked instance %s" % self.op.instance_name
3560

    
3561
    if not self.op.force:
3562
      _CheckInstanceState(self, self.instance, INSTANCE_ONLINE)
3563
    else:
3564
      self.LogWarning("Ignoring offline instance check")
3565

    
3566
    self.primary_offline = \
3567
      self.cfg.GetNodeInfo(self.instance.primary_node).offline
3568

    
3569
    if self.primary_offline and self.op.ignore_offline_nodes:
3570
      self.LogWarning("Ignoring offline primary node")
3571
    else:
3572
      _CheckNodeOnline(self, self.instance.primary_node)
3573

    
3574
  def Exec(self, feedback_fn):
3575
    """Shutdown the instance.
3576

3577
    """
3578
    instance = self.instance
3579
    node_current = instance.primary_node
3580
    timeout = self.op.timeout
3581
    reason = self.op.reason
3582

    
3583
    # If the instance is offline we shouldn't mark it as down, as that
3584
    # resets the offline flag.
3585
    if not self.op.no_remember and instance.admin_state in INSTANCE_ONLINE:
3586
      self.cfg.MarkInstanceDown(instance.name)
3587

    
3588
    if self.primary_offline:
3589
      assert self.op.ignore_offline_nodes
3590
      self.LogInfo("Primary node offline, marked instance as stopped")
3591
    else:
3592
      result = self.rpc.call_instance_shutdown(node_current, instance, timeout,
3593
                                               reason)
3594
      msg = result.fail_msg
3595
      if msg:
3596
        self.LogWarning("Could not shutdown instance: %s", msg)
3597

    
3598
      _ShutdownInstanceDisks(self, instance)
3599

    
3600

    
3601
class LUInstanceReinstall(LogicalUnit):
3602
  """Reinstall an instance.
3603

3604
  """
3605
  HPATH = "instance-reinstall"
3606
  HTYPE = constants.HTYPE_INSTANCE
3607
  REQ_BGL = False
3608

    
3609
  def ExpandNames(self):
3610
    self._ExpandAndLockInstance()
3611

    
3612
  def BuildHooksEnv(self):
3613
    """Build hooks env.
3614

3615
    This runs on master, primary and secondary nodes of the instance.
3616

3617
    """
3618
    return _BuildInstanceHookEnvByObject(self, self.instance)
3619

    
3620
  def BuildHooksNodes(self):
3621
    """Build hooks nodes.
3622

3623
    """
3624
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3625
    return (nl, nl)
3626

    
3627
  def CheckPrereq(self):
3628
    """Check prerequisites.
3629

3630
    This checks that the instance is in the cluster and is not running.
3631

3632
    """
3633
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3634
    assert instance is not None, \
3635
      "Cannot retrieve locked instance %s" % self.op.instance_name
3636
    _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
3637
                     " offline, cannot reinstall")
3638

    
3639
    if instance.disk_template == constants.DT_DISKLESS:
3640
      raise errors.OpPrereqError("Instance '%s' has no disks" %
3641
                                 self.op.instance_name,
3642
                                 errors.ECODE_INVAL)
3643
    _CheckInstanceState(self, instance, INSTANCE_DOWN, msg="cannot reinstall")
3644

    
3645
    if self.op.os_type is not None:
3646
      # OS verification
3647
      pnode = _ExpandNodeName(self.cfg, instance.primary_node)
3648
      _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
3649
      instance_os = self.op.os_type
3650
    else:
3651
      instance_os = instance.os
3652

    
3653
    nodelist = list(instance.all_nodes)
3654

    
3655
    if self.op.osparams:
3656
      i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
3657
      _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
3658
      self.os_inst = i_osdict # the new dict (without defaults)
3659
    else:
3660
      self.os_inst = None
3661

    
3662
    self.instance = instance
3663

    
3664
  def Exec(self, feedback_fn):
3665
    """Reinstall the instance.
3666

3667
    """
3668
    inst = self.instance
3669

    
3670
    if self.op.os_type is not None:
3671
      feedback_fn("Changing OS to '%s'..." % self.op.os_type)
3672
      inst.os = self.op.os_type
3673
      # Write to configuration
3674
      self.cfg.Update(inst, feedback_fn)
3675

    
3676
    _StartInstanceDisks(self, inst, None)
3677
    try:
3678
      feedback_fn("Running the instance OS create scripts...")
3679
      # FIXME: pass debug option from opcode to backend
3680
      result = self.rpc.call_instance_os_add(inst.primary_node,
3681
                                             (inst, self.os_inst), True,
3682
                                             self.op.debug_level)
3683
      result.Raise("Could not install OS for instance %s on node %s" %
3684
                   (inst.name, inst.primary_node))
3685
    finally:
3686
      _ShutdownInstanceDisks(self, inst)
3687

    
3688

    
3689
class LUInstanceRecreateDisks(LogicalUnit):
3690
  """Recreate an instance's missing disks.
3691

3692
  """
3693
  HPATH = "instance-recreate-disks"
3694
  HTYPE = constants.HTYPE_INSTANCE
3695
  REQ_BGL = False
3696

    
3697
  _MODIFYABLE = compat.UniqueFrozenset([
3698
    constants.IDISK_SIZE,
3699
    constants.IDISK_MODE,
3700
    ])
3701

    
3702
  # New or changed disk parameters may have different semantics
3703
  assert constants.IDISK_PARAMS == (_MODIFYABLE | frozenset([
3704
    constants.IDISK_ADOPT,
3705

    
3706
    # TODO: Implement support changing VG while recreating
3707
    constants.IDISK_VG,
3708
    constants.IDISK_METAVG,
3709
    constants.IDISK_PROVIDER,
3710
    constants.IDISK_NAME,
3711
    ]))
3712

    
3713
  def _RunAllocator(self):
3714
    """Run the allocator based on input opcode.
3715

3716
    """
3717
    be_full = self.cfg.GetClusterInfo().FillBE(self.instance)
3718

    
3719
    # FIXME
3720
    # The allocator should actually run in "relocate" mode, but current
3721
    # allocators don't support relocating all the nodes of an instance at
3722
    # the same time. As a workaround we use "allocate" mode, but this is
3723
    # suboptimal for two reasons:
3724
    # - The instance name passed to the allocator is present in the list of
3725
    #   existing instances, so there could be a conflict within the
3726
    #   internal structures of the allocator. This doesn't happen with the
3727
    #   current allocators, but it's a liability.
3728
    # - The allocator counts the resources used by the instance twice: once
3729
    #   because the instance exists already, and once because it tries to
3730
    #   allocate a new instance.
3731
    # The allocator could choose some of the nodes on which the instance is
3732
    # running, but that's not a problem. If the instance nodes are broken,
3733
    # they should be already be marked as drained or offline, and hence
3734
    # skipped by the allocator. If instance disks have been lost for other
3735
    # reasons, then recreating the disks on the same nodes should be fine.
3736
    disk_template = self.instance.disk_template
3737
    spindle_use = be_full[constants.BE_SPINDLE_USE]
3738
    req = iallocator.IAReqInstanceAlloc(name=self.op.instance_name,
3739
                                        disk_template=disk_template,
3740
                                        tags=list(self.instance.GetTags()),
3741
                                        os=self.instance.os,
3742
                                        nics=[{}],
3743
                                        vcpus=be_full[constants.BE_VCPUS],
3744
                                        memory=be_full[constants.BE_MAXMEM],
3745
                                        spindle_use=spindle_use,
3746
                                        disks=[{constants.IDISK_SIZE: d.size,
3747
                                                constants.IDISK_MODE: d.mode}
3748
                                                for d in self.instance.disks],
3749
                                        hypervisor=self.instance.hypervisor,
3750
                                        node_whitelist=None)
3751
    ial = iallocator.IAllocator(self.cfg, self.rpc, req)
3752

    
3753
    ial.Run(self.op.iallocator)
3754

    
3755
    assert req.RequiredNodes() == len(self.instance.all_nodes)
3756

    
3757
    if not ial.success:
3758
      raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
3759
                                 " %s" % (self.op.iallocator, ial.info),
3760
                                 errors.ECODE_NORES)
3761

    
3762
    self.op.nodes = ial.result
3763
    self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
3764
                 self.op.instance_name, self.op.iallocator,
3765
                 utils.CommaJoin(ial.result))
3766

    
3767
  def CheckArguments(self):
3768
    if self.op.disks and ht.TNonNegativeInt(self.op.disks[0]):
3769
      # Normalize and convert deprecated list of disk indices
3770
      self.op.disks = [(idx, {}) for idx in sorted(frozenset(self.op.disks))]
3771

    
3772
    duplicates = utils.FindDuplicates(map(compat.fst, self.op.disks))
3773
    if duplicates:
3774
      raise errors.OpPrereqError("Some disks have been specified more than"
3775
                                 " once: %s" % utils.CommaJoin(duplicates),
3776
                                 errors.ECODE_INVAL)
3777

    
3778
    # We don't want _CheckIAllocatorOrNode selecting the default iallocator
3779
    # when neither iallocator nor nodes are specified
3780
    if self.op.iallocator or self.op.nodes:
3781
      _CheckIAllocatorOrNode(self, "iallocator", "nodes")
3782

    
3783
    for (idx, params) in self.op.disks:
3784
      utils.ForceDictType(params, constants.IDISK_PARAMS_TYPES)
3785
      unsupported = frozenset(params.keys()) - self._MODIFYABLE
3786
      if unsupported:
3787
        raise errors.OpPrereqError("Parameters for disk %s try to change"
3788
                                   " unmodifyable parameter(s): %s" %
3789
                                   (idx, utils.CommaJoin(unsupported)),
3790
                                   errors.ECODE_INVAL)
3791

    
3792
  def ExpandNames(self):
3793
    self._ExpandAndLockInstance()
3794
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3795

    
3796
    if self.op.nodes:
3797
      self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
3798
      self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
3799
    else:
3800
      self.needed_locks[locking.LEVEL_NODE] = []
3801
      if self.op.iallocator:
3802
        # iallocator will select a new node in the same group
3803
        self.needed_locks[locking.LEVEL_NODEGROUP] = []
3804
        self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
3805

    
3806
    self.needed_locks[locking.LEVEL_NODE_RES] = []
3807

    
3808
  def DeclareLocks(self, level):
3809
    if level == locking.LEVEL_NODEGROUP:
3810
      assert self.op.iallocator is not None
3811
      assert not self.op.nodes
3812
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3813
      self.share_locks[locking.LEVEL_NODEGROUP] = 1
3814
      # Lock the primary group used by the instance optimistically; this
3815
      # requires going via the node before it's locked, requiring
3816
      # verification later on
3817
      self.needed_locks[locking.LEVEL_NODEGROUP] = \
3818
        self.cfg.GetInstanceNodeGroups(self.op.instance_name, primary_only=True)
3819

    
3820
    elif level == locking.LEVEL_NODE:
3821
      # If an allocator is used, then we lock all the nodes in the current
3822
      # instance group, as we don't know yet which ones will be selected;
3823
      # if we replace the nodes without using an allocator, locks are
3824
      # already declared in ExpandNames; otherwise, we need to lock all the
3825
      # instance nodes for disk re-creation
3826
      if self.op.iallocator:
3827
        assert not self.op.nodes
3828
        assert not self.needed_locks[locking.LEVEL_NODE]
3829
        assert len(self.owned_locks(locking.LEVEL_NODEGROUP)) == 1
3830

    
3831
        # Lock member nodes of the group of the primary node
3832
        for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP):
3833
          self.needed_locks[locking.LEVEL_NODE].extend(
3834
            self.cfg.GetNodeGroup(group_uuid).members)
3835

    
3836
        assert locking.NAL in self.owned_locks(locking.LEVEL_NODE_ALLOC)
3837
      elif not self.op.nodes:
3838
        self._LockInstancesNodes(primary_only=False)
3839
    elif level == locking.LEVEL_NODE_RES:
3840
      # Copy node locks
3841
      self.needed_locks[locking.LEVEL_NODE_RES] = \
3842
        _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
3843

    
3844
  def BuildHooksEnv(self):
3845
    """Build hooks env.
3846

3847
    This runs on master, primary and secondary nodes of the instance.
3848

3849
    """
3850
    return _BuildInstanceHookEnvByObject(self, self.instance)
3851

    
3852
  def BuildHooksNodes(self):
3853
    """Build hooks nodes.
3854

3855
    """
3856
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3857
    return (nl, nl)
3858

    
3859
  def CheckPrereq(self):
3860
    """Check prerequisites.
3861

3862
    This checks that the instance is in the cluster and is not running.
3863

3864
    """
3865
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3866
    assert instance is not None, \
3867
      "Cannot retrieve locked instance %s" % self.op.instance_name
3868
    if self.op.nodes:
3869
      if len(self.op.nodes) != len(instance.all_nodes):
3870
        raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
3871
                                   " %d replacement nodes were specified" %
3872
                                   (instance.name, len(instance.all_nodes),
3873
                                    len(self.op.nodes)),
3874
                                   errors.ECODE_INVAL)
3875
      assert instance.disk_template != constants.DT_DRBD8 or \
3876
          len(self.op.nodes) == 2
3877
      assert instance.disk_template != constants.DT_PLAIN or \
3878
          len(self.op.nodes) == 1
3879
      primary_node = self.op.nodes[0]
3880
    else:
3881
      primary_node = instance.primary_node
3882
    if not self.op.iallocator:
3883
      _CheckNodeOnline(self, primary_node)
3884

    
3885
    if instance.disk_template == constants.DT_DISKLESS:
3886
      raise errors.OpPrereqError("Instance '%s' has no disks" %
3887
                                 self.op.instance_name, errors.ECODE_INVAL)
3888

    
3889
    # Verify if node group locks are still correct
3890
    owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
3891
    if owned_groups:
3892
      # Node group locks are acquired only for the primary node (and only
3893
      # when the allocator is used)
3894
      _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups,
3895
                               primary_only=True)
3896

    
3897
    # if we replace nodes *and* the old primary is offline, we don't
3898
    # check the instance state
3899
    old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
3900
    if not ((self.op.iallocator or self.op.nodes) and old_pnode.offline):
3901
      _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
3902
                          msg="cannot recreate disks")
3903

    
3904
    if self.op.disks:
3905
      self.disks = dict(self.op.disks)
3906
    else:
3907
      self.disks = dict((idx, {}) for idx in range(len(instance.disks)))
3908

    
3909
    maxidx = max(self.disks.keys())
3910
    if maxidx >= len(instance.disks):
3911
      raise errors.OpPrereqError("Invalid disk index '%s'" % maxidx,
3912
                                 errors.ECODE_INVAL)
3913

    
3914
    if ((self.op.nodes or self.op.iallocator) and
3915
        sorted(self.disks.keys()) != range(len(instance.disks))):
3916
      raise errors.OpPrereqError("Can't recreate disks partially and"
3917
                                 " change the nodes at the same time",
3918
                                 errors.ECODE_INVAL)
3919

    
3920
    self.instance = instance
3921

    
3922
    if self.op.iallocator:
3923
      self._RunAllocator()
3924
      # Release unneeded node and node resource locks
3925
      _ReleaseLocks(self, locking.LEVEL_NODE, keep=self.op.nodes)
3926
      _ReleaseLocks(self, locking.LEVEL_NODE_RES, keep=self.op.nodes)
3927
      _ReleaseLocks(self, locking.LEVEL_NODE_ALLOC)
3928

    
3929
    assert not self.glm.is_owned(locking.LEVEL_NODE_ALLOC)
3930

    
3931
  def Exec(self, feedback_fn):
3932
    """Recreate the disks.
3933

3934
    """
3935
    instance = self.instance
3936

    
3937
    assert (self.owned_locks(locking.LEVEL_NODE) ==
3938
            self.owned_locks(locking.LEVEL_NODE_RES))
3939

    
3940
    to_skip = []
3941
    mods = [] # keeps track of needed changes
3942

    
3943
    for idx, disk in enumerate(instance.disks):
3944
      try:
3945
        changes = self.disks[idx]
3946
      except KeyError:
3947
        # Disk should not be recreated
3948
        to_skip.append(idx)
3949
        continue
3950

    
3951
      # update secondaries for disks, if needed
3952
      if self.op.nodes and disk.dev_type == constants.LD_DRBD8:
3953
        # need to update the nodes and minors
3954
        assert len(self.op.nodes) == 2
3955
        assert len(disk.logical_id) == 6 # otherwise disk internals
3956
                                         # have changed
3957
        (_, _, old_port, _, _, old_secret) = disk.logical_id
3958
        new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
3959
        new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
3960
                  new_minors[0], new_minors[1], old_secret)
3961
        assert len(disk.logical_id) == len(new_id)
3962
      else:
3963
        new_id = None
3964

    
3965
      mods.append((idx, new_id, changes))
3966

    
3967
    # now that we have passed all asserts above, we can apply the mods
3968
    # in a single run (to avoid partial changes)
3969
    for idx, new_id, changes in mods:
3970
      disk = instance.disks[idx]
3971
      if new_id is not None:
3972
        assert disk.dev_type == constants.LD_DRBD8
3973
        disk.logical_id = new_id
3974
      if changes:
3975
        disk.Update(size=changes.get(constants.IDISK_SIZE, None),
3976
                    mode=changes.get(constants.IDISK_MODE, None))
3977

    
3978
    # change primary node, if needed
3979
    if self.op.nodes:
3980
      instance.primary_node = self.op.nodes[0]
3981
      self.LogWarning("Changing the instance's nodes, you will have to"
3982
                      " remove any disks left on the older nodes manually")
3983

    
3984
    if self.op.nodes:
3985
      self.cfg.Update(instance, feedback_fn)
3986

    
3987
    # All touched nodes must be locked
3988
    mylocks = self.owned_locks(locking.LEVEL_NODE)
3989
    assert mylocks.issuperset(frozenset(instance.all_nodes))
3990
    _CreateDisks(self, instance, to_skip=to_skip)
3991

    
3992

    
3993
class LUInstanceRename(LogicalUnit):
3994
  """Rename an instance.
3995

3996
  """
3997
  HPATH = "instance-rename"
3998
  HTYPE = constants.HTYPE_INSTANCE
3999

    
4000
  def CheckArguments(self):
4001
    """Check arguments.
4002

4003
    """
4004
    if self.op.ip_check and not self.op.name_check:
4005
      # TODO: make the ip check more flexible and not depend on the name check
4006
      raise errors.OpPrereqError("IP address check requires a name check",
4007
                                 errors.ECODE_INVAL)
4008

    
4009
  def BuildHooksEnv(self):
4010
    """Build hooks env.
4011

4012
    This runs on master, primary and secondary nodes of the instance.
4013

4014
    """
4015
    env = _BuildInstanceHookEnvByObject(self, self.instance)
4016
    env["INSTANCE_NEW_NAME"] = self.op.new_name
4017
    return env
4018

    
4019
  def BuildHooksNodes(self):
4020
    """Build hooks nodes.
4021

4022
    """
4023
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4024
    return (nl, nl)
4025

    
4026
  def CheckPrereq(self):
4027
    """Check prerequisites.
4028

4029
    This checks that the instance is in the cluster and is not running.
4030

4031
    """
4032
    self.op.instance_name = _ExpandInstanceName(self.cfg,
4033
                                                self.op.instance_name)
4034
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4035
    assert instance is not None
4036
    _CheckNodeOnline(self, instance.primary_node)
4037
    _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
4038
                        msg="cannot rename")
4039
    self.instance = instance
4040

    
4041
    new_name = self.op.new_name
4042
    if self.op.name_check:
4043
      hostname = _CheckHostnameSane(self, new_name)
4044
      new_name = self.op.new_name = hostname.name
4045
      if (self.op.ip_check and
4046
          netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
4047
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
4048
                                   (hostname.ip, new_name),
4049
                                   errors.ECODE_NOTUNIQUE)
4050

    
4051
    instance_list = self.cfg.GetInstanceList()
4052
    if new_name in instance_list and new_name != instance.name:
4053
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
4054
                                 new_name, errors.ECODE_EXISTS)
4055

    
4056
  def Exec(self, feedback_fn):
4057
    """Rename the instance.
4058

4059
    """
4060
    inst = self.instance
4061
    old_name = inst.name
4062

    
4063
    rename_file_storage = False
4064
    if (inst.disk_template in constants.DTS_FILEBASED and
4065
        self.op.new_name != inst.name):
4066
      old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4067
      rename_file_storage = True
4068

    
4069
    self.cfg.RenameInstance(inst.name, self.op.new_name)
4070
    # Change the instance lock. This is definitely safe while we hold the BGL.
4071
    # Otherwise the new lock would have to be added in acquired mode.
4072
    assert self.REQ_BGL
4073
    assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER)
4074
    self.glm.remove(locking.LEVEL_INSTANCE, old_name)
4075
    self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
4076

    
4077
    # re-read the instance from the configuration after rename
4078
    inst = self.cfg.GetInstanceInfo(self.op.new_name)
4079

    
4080
    if rename_file_storage:
4081
      new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4082
      result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
4083
                                                     old_file_storage_dir,
4084
                                                     new_file_storage_dir)
4085
      result.Raise("Could not rename on node %s directory '%s' to '%s'"
4086
                   " (but the instance has been renamed in Ganeti)" %
4087
                   (inst.primary_node, old_file_storage_dir,
4088
                    new_file_storage_dir))
4089

    
4090
    _StartInstanceDisks(self, inst, None)
4091
    # update info on disks
4092
    info = _GetInstanceInfoText(inst)
4093
    for (idx, disk) in enumerate(inst.disks):
4094
      for node in inst.all_nodes:
4095
        self.cfg.SetDiskID(disk, node)
4096
        result = self.rpc.call_blockdev_setinfo(node, disk, info)
4097
        if result.fail_msg:
4098
          self.LogWarning("Error setting info on node %s for disk %s: %s",
4099
                          node, idx, result.fail_msg)
4100
    try:
4101
      result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
4102
                                                 old_name, self.op.debug_level)
4103
      msg = result.fail_msg
4104
      if msg:
4105
        msg = ("Could not run OS rename script for instance %s on node %s"
4106
               " (but the instance has been renamed in Ganeti): %s" %
4107
               (inst.name, inst.primary_node, msg))
4108
        self.LogWarning(msg)
4109
    finally:
4110
      _ShutdownInstanceDisks(self, inst)
4111

    
4112
    return inst.name
4113

    
4114

    
4115
class LUInstanceRemove(LogicalUnit):
4116
  """Remove an instance.
4117

4118
  """
4119
  HPATH = "instance-remove"
4120
  HTYPE = constants.HTYPE_INSTANCE
4121
  REQ_BGL = False
4122

    
4123
  def ExpandNames(self):
4124
    self._ExpandAndLockInstance()
4125
    self.needed_locks[locking.LEVEL_NODE] = []
4126
    self.needed_locks[locking.LEVEL_NODE_RES] = []
4127
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4128

    
4129
  def DeclareLocks(self, level):
4130
    if level == locking.LEVEL_NODE:
4131
      self._LockInstancesNodes()
4132
    elif level == locking.LEVEL_NODE_RES:
4133
      # Copy node locks
4134
      self.needed_locks[locking.LEVEL_NODE_RES] = \
4135
        _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
4136

    
4137
  def BuildHooksEnv(self):
4138
    """Build hooks env.
4139

4140
    This runs on master, primary and secondary nodes of the instance.
4141

4142
    """
4143
    env = _BuildInstanceHookEnvByObject(self, self.instance)
4144
    env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
4145
    return env
4146

    
4147
  def BuildHooksNodes(self):
4148
    """Build hooks nodes.
4149

4150
    """
4151
    nl = [self.cfg.GetMasterNode()]
4152
    nl_post = list(self.instance.all_nodes) + nl
4153
    return (nl, nl_post)
4154

    
4155
  def CheckPrereq(self):
4156
    """Check prerequisites.
4157

4158
    This checks that the instance is in the cluster.
4159

4160
    """
4161
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4162
    assert self.instance is not None, \
4163
      "Cannot retrieve locked instance %s" % self.op.instance_name
4164

    
4165
  def Exec(self, feedback_fn):
4166
    """Remove the instance.
4167

4168
    """
4169
    instance = self.instance
4170
    logging.info("Shutting down instance %s on node %s",
4171
                 instance.name, instance.primary_node)
4172

    
4173
    result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
4174
                                             self.op.shutdown_timeout,
4175
                                             self.op.reason)
4176
    msg = result.fail_msg
4177
    if msg:
4178
      if self.op.ignore_failures:
4179
        feedback_fn("Warning: can't shutdown instance: %s" % msg)
4180
      else:
4181
        raise errors.OpExecError("Could not shutdown instance %s on"
4182
                                 " node %s: %s" %
4183
                                 (instance.name, instance.primary_node, msg))
4184

    
4185
    assert (self.owned_locks(locking.LEVEL_NODE) ==
4186
            self.owned_locks(locking.LEVEL_NODE_RES))
4187
    assert not (set(instance.all_nodes) -
4188
                self.owned_locks(locking.LEVEL_NODE)), \
4189
      "Not owning correct locks"
4190

    
4191
    _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
4192

    
4193

    
4194
def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
4195
  """Utility function to remove an instance.
4196

4197
  """
4198
  logging.info("Removing block devices for instance %s", instance.name)
4199

    
4200
  if not _RemoveDisks(lu, instance, ignore_failures=ignore_failures):
4201
    if not ignore_failures:
4202
      raise errors.OpExecError("Can't remove instance's disks")
4203
    feedback_fn("Warning: can't remove instance's disks")
4204

    
4205
  logging.info("Removing instance %s out of cluster config", instance.name)
4206

    
4207
  lu.cfg.RemoveInstance(instance.name)
4208

    
4209
  assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
4210
    "Instance lock removal conflict"
4211

    
4212
  # Remove lock for the instance
4213
  lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
4214

    
4215

    
4216
class LUInstanceQuery(NoHooksLU):
4217
  """Logical unit for querying instances.
4218

4219
  """
4220
  # pylint: disable=W0142
4221
  REQ_BGL = False
4222

    
4223
  def CheckArguments(self):
4224
    self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
4225
                             self.op.output_fields, self.op.use_locking)
4226

    
4227
  def ExpandNames(self):
4228
    self.iq.ExpandNames(self)
4229

    
4230
  def DeclareLocks(self, level):
4231
    self.iq.DeclareLocks(self, level)
4232

    
4233
  def Exec(self, feedback_fn):
4234
    return self.iq.OldStyleQuery(self)
4235

    
4236

    
4237
def _ExpandNamesForMigration(lu):
4238
  """Expands names for use with L{TLMigrateInstance}.
4239

4240
  @type lu: L{LogicalUnit}
4241

4242
  """
4243
  if lu.op.target_node is not None:
4244
    lu.op.target_node = _ExpandNodeName(lu.cfg, lu.op.target_node)
4245

    
4246
  lu.needed_locks[locking.LEVEL_NODE] = []
4247
  lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4248

    
4249
  lu.needed_locks[locking.LEVEL_NODE_RES] = []
4250
  lu.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
4251

    
4252
  # The node allocation lock is actually only needed for externally replicated
4253
  # instances (e.g. sharedfile or RBD) and if an iallocator is used.
4254
  lu.needed_locks[locking.LEVEL_NODE_ALLOC] = []
4255

    
4256

    
4257
def _DeclareLocksForMigration(lu, level):
4258
  """Declares locks for L{TLMigrateInstance}.
4259

4260
  @type lu: L{LogicalUnit}
4261
  @param level: Lock level
4262

4263
  """
4264
  if level == locking.LEVEL_NODE_ALLOC:
4265
    assert lu.op.instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
4266

    
4267
    instance = lu.cfg.GetInstanceInfo(lu.op.instance_name)
4268

    
4269
    # Node locks are already declared here rather than at LEVEL_NODE as we need
4270
    # the instance object anyway to declare the node allocation lock.
4271
    if instance.disk_template in constants.DTS_EXT_MIRROR:
4272
      if lu.op.target_node is None:
4273
        lu.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4274
        lu.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
4275
      else:
4276
        lu.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
4277
                                               lu.op.target_node]
4278
      del lu.recalculate_locks[locking.LEVEL_NODE]
4279
    else:
4280
      lu._LockInstancesNodes() # pylint: disable=W0212
4281

    
4282
  elif level == locking.LEVEL_NODE:
4283
    # Node locks are declared together with the node allocation lock
4284
    assert (lu.needed_locks[locking.LEVEL_NODE] or
4285
            lu.needed_locks[locking.LEVEL_NODE] is locking.ALL_SET)
4286

    
4287
  elif level == locking.LEVEL_NODE_RES:
4288
    # Copy node locks
4289
    lu.needed_locks[locking.LEVEL_NODE_RES] = \
4290
      _CopyLockList(lu.needed_locks[locking.LEVEL_NODE])
4291

    
4292

    
4293
class LUInstanceFailover(LogicalUnit):
4294
  """Failover an instance.
4295

4296
  """
4297
  HPATH = "instance-failover"
4298
  HTYPE = constants.HTYPE_INSTANCE
4299
  REQ_BGL = False
4300

    
4301
  def CheckArguments(self):
4302
    """Check the arguments.
4303

4304
    """
4305
    self.iallocator = getattr(self.op, "iallocator", None)
4306
    self.target_node = getattr(self.op, "target_node", None)
4307

    
4308
  def ExpandNames(self):
4309
    self._ExpandAndLockInstance()
4310
    _ExpandNamesForMigration(self)
4311

    
4312
    self._migrater = \
4313
      TLMigrateInstance(self, self.op.instance_name, False, True, False,
4314
                        self.op.ignore_consistency, True,
4315
                        self.op.shutdown_timeout, self.op.ignore_ipolicy)
4316

    
4317
    self.tasklets = [self._migrater]
4318

    
4319
  def DeclareLocks(self, level):
4320
    _DeclareLocksForMigration(self, level)
4321

    
4322
  def BuildHooksEnv(self):
4323
    """Build hooks env.
4324

4325
    This runs on master, primary and secondary nodes of the instance.
4326

4327
    """
4328
    instance = self._migrater.instance
4329
    source_node = instance.primary_node
4330
    target_node = self.op.target_node
4331
    env = {
4332
      "IGNORE_CONSISTENCY": self.op.ignore_consistency,
4333
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
4334
      "OLD_PRIMARY": source_node,
4335
      "NEW_PRIMARY": target_node,
4336
      }
4337

    
4338
    if instance.disk_template in constants.DTS_INT_MIRROR:
4339
      env["OLD_SECONDARY"] = instance.secondary_nodes[0]
4340
      env["NEW_SECONDARY"] = source_node
4341
    else:
4342
      env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
4343

    
4344
    env.update(_BuildInstanceHookEnvByObject(self, instance))
4345

    
4346
    return env
4347

    
4348
  def BuildHooksNodes(self):
4349
    """Build hooks nodes.
4350

4351
    """
4352
    instance = self._migrater.instance
4353
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
4354
    return (nl, nl + [instance.primary_node])
4355

    
4356

    
4357
class LUInstanceMigrate(LogicalUnit):
4358
  """Migrate an instance.
4359

4360
  This is migration without shutting down, compared to the failover,
4361
  which is done with shutdown.
4362

4363
  """
4364
  HPATH = "instance-migrate"
4365
  HTYPE = constants.HTYPE_INSTANCE
4366
  REQ_BGL = False
4367

    
4368
  def ExpandNames(self):
4369
    self._ExpandAndLockInstance()
4370
    _ExpandNamesForMigration(self)
4371

    
4372
    self._migrater = \
4373
      TLMigrateInstance(self, self.op.instance_name, self.op.cleanup,
4374
                        False, self.op.allow_failover, False,
4375
                        self.op.allow_runtime_changes,
4376
                        constants.DEFAULT_SHUTDOWN_TIMEOUT,
4377
                        self.op.ignore_ipolicy)
4378

    
4379
    self.tasklets = [self._migrater]
4380

    
4381
  def DeclareLocks(self, level):
4382
    _DeclareLocksForMigration(self, level)
4383

    
4384
  def BuildHooksEnv(self):
4385
    """Build hooks env.
4386

4387
    This runs on master, primary and secondary nodes of the instance.
4388

4389
    """
4390
    instance = self._migrater.instance
4391
    source_node = instance.primary_node
4392
    target_node = self.op.target_node
4393
    env = _BuildInstanceHookEnvByObject(self, instance)
4394
    env.update({
4395
      "MIGRATE_LIVE": self._migrater.live,
4396
      "MIGRATE_CLEANUP": self.op.cleanup,
4397
      "OLD_PRIMARY": source_node,
4398
      "NEW_PRIMARY": target_node,
4399
      "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
4400
      })
4401

    
4402
    if instance.disk_template in constants.DTS_INT_MIRROR:
4403
      env["OLD_SECONDARY"] = target_node
4404
      env["NEW_SECONDARY"] = source_node
4405
    else:
4406
      env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
4407

    
4408
    return env
4409

    
4410
  def BuildHooksNodes(self):
4411
    """Build hooks nodes.
4412

4413
    """
4414
    instance = self._migrater.instance
4415
    snodes = list(instance.secondary_nodes)
4416
    nl = [self.cfg.GetMasterNode(), instance.primary_node] + snodes
4417
    return (nl, nl)
4418

    
4419

    
4420
class LUInstanceMove(LogicalUnit):
4421
  """Move an instance by data-copying.
4422

4423
  """
4424
  HPATH = "instance-move"
4425
  HTYPE = constants.HTYPE_INSTANCE
4426
  REQ_BGL = False
4427

    
4428
  def ExpandNames(self):
4429
    self._ExpandAndLockInstance()
4430
    target_node = _ExpandNodeName(self.cfg, self.op.target_node)
4431
    self.op.target_node = target_node
4432
    self.needed_locks[locking.LEVEL_NODE] = [target_node]
4433
    self.needed_locks[locking.LEVEL_NODE_RES] = []
4434
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
4435

    
4436
  def DeclareLocks(self, level):
4437
    if level == locking.LEVEL_NODE:
4438
      self._LockInstancesNodes(primary_only=True)
4439
    elif level == locking.LEVEL_NODE_RES:
4440
      # Copy node locks
4441
      self.needed_locks[locking.LEVEL_NODE_RES] = \
4442
        _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
4443

    
4444
  def BuildHooksEnv(self):
4445
    """Build hooks env.
4446

4447
    This runs on master, primary and secondary nodes of the instance.
4448

4449
    """
4450
    env = {
4451
      "TARGET_NODE": self.op.target_node,
4452
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
4453
      }
4454
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4455
    return env
4456

    
4457
  def BuildHooksNodes(self):
4458
    """Build hooks nodes.
4459

4460
    """
4461
    nl = [
4462
      self.cfg.GetMasterNode(),
4463
      self.instance.primary_node,
4464
      self.op.target_node,
4465
      ]
4466
    return (nl, nl)
4467

    
4468
  def CheckPrereq(self):
4469
    """Check prerequisites.
4470

4471
    This checks that the instance is in the cluster.
4472

4473
    """
4474
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4475
    assert self.instance is not None, \
4476
      "Cannot retrieve locked instance %s" % self.op.instance_name
4477

    
4478
    if instance.disk_template not in constants.DTS_COPYABLE:
4479
      raise errors.OpPrereqError("Disk template %s not suitable for copying" %
4480
                                 instance.disk_template, errors.ECODE_STATE)
4481

    
4482
    node = self.cfg.GetNodeInfo(self.op.target_node)
4483
    assert node is not None, \
4484
      "Cannot retrieve locked node %s" % self.op.target_node
4485

    
4486
    self.target_node = target_node = node.name
4487

    
4488
    if target_node == instance.primary_node:
4489
      raise errors.OpPrereqError("Instance %s is already on the node %s" %
4490
                                 (instance.name, target_node),
4491
                                 errors.ECODE_STATE)
4492

    
4493
    bep = self.cfg.GetClusterInfo().FillBE(instance)
4494

    
4495
    for idx, dsk in enumerate(instance.disks):
4496
      if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
4497
        raise errors.OpPrereqError("Instance disk %d has a complex layout,"
4498
                                   " cannot copy" % idx, errors.ECODE_STATE)
4499

    
4500
    _CheckNodeOnline(self, target_node)
4501
    _CheckNodeNotDrained(self, target_node)
4502
    _CheckNodeVmCapable(self, target_node)
4503
    cluster = self.cfg.GetClusterInfo()
4504
    group_info = self.cfg.GetNodeGroup(node.group)
4505
    ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group_info)
4506
    _CheckTargetNodeIPolicy(self, ipolicy, instance, node, self.cfg,
4507
                            ignore=self.op.ignore_ipolicy)
4508

    
4509
    if instance.admin_state == constants.ADMINST_UP:
4510
      # check memory requirements on the secondary node
4511
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
4512
                           instance.name, bep[constants.BE_MAXMEM],
4513
                           instance.hypervisor)
4514
    else:
4515
      self.LogInfo("Not checking memory on the secondary node as"
4516
                   " instance will not be started")
4517

    
4518
    # check bridge existance
4519
    _CheckInstanceBridgesExist(self, instance, node=target_node)
4520

    
4521
  def Exec(self, feedback_fn):
4522
    """Move an instance.
4523

4524
    The move is done by shutting it down on its present node, copying
4525
    the data over (slow) and starting it on the new node.
4526

4527
    """
4528
    instance = self.instance
4529

    
4530
    source_node = instance.primary_node
4531
    target_node = self.target_node
4532

    
4533
    self.LogInfo("Shutting down instance %s on source node %s",
4534
                 instance.name, source_node)
4535

    
4536
    assert (self.owned_locks(locking.LEVEL_NODE) ==
4537
            self.owned_locks(locking.LEVEL_NODE_RES))
4538

    
4539
    result = self.rpc.call_instance_shutdown(source_node, instance,
4540
                                             self.op.shutdown_timeout,
4541
                                             self.op.reason)
4542
    msg = result.fail_msg
4543
    if msg:
4544
      if self.op.ignore_consistency:
4545
        self.LogWarning("Could not shutdown instance %s on node %s."
4546
                        " Proceeding anyway. Please make sure node"
4547
                        " %s is down. Error details: %s",
4548
                        instance.name, source_node, source_node, msg)
4549
      else:
4550
        raise errors.OpExecError("Could not shutdown instance %s on"
4551
                                 " node %s: %s" %
4552
                                 (instance.name, source_node, msg))
4553

    
4554
    # create the target disks
4555
    try:
4556
      _CreateDisks(self, instance, target_node=target_node)
4557
    except errors.OpExecError:
4558
      self.LogWarning("Device creation failed")
4559
      self.cfg.ReleaseDRBDMinors(instance.name)
4560
      raise
4561

    
4562
    cluster_name = self.cfg.GetClusterInfo().cluster_name
4563

    
4564
    errs = []
4565
    # activate, get path, copy the data over
4566
    for idx, disk in enumerate(instance.disks):
4567
      self.LogInfo("Copying data for disk %d", idx)
4568
      result = self.rpc.call_blockdev_assemble(target_node, (disk, instance),
4569
                                               instance.name, True, idx)
4570
      if result.fail_msg:
4571
        self.LogWarning("Can't assemble newly created disk %d: %s",
4572
                        idx, result.fail_msg)
4573
        errs.append(result.fail_msg)
4574
        break
4575
      dev_path = result.payload
4576
      result = self.rpc.call_blockdev_export(source_node, (disk, instance),
4577
                                             target_node, dev_path,
4578
                                             cluster_name)
4579
      if result.fail_msg:
4580
        self.LogWarning("Can't copy data over for disk %d: %s",
4581
                        idx, result.fail_msg)
4582
        errs.append(result.fail_msg)
4583
        break
4584

    
4585
    if errs:
4586
      self.LogWarning("Some disks failed to copy, aborting")
4587
      try:
4588
        _RemoveDisks(self, instance, target_node=target_node)
4589
      finally:
4590
        self.cfg.ReleaseDRBDMinors(instance.name)
4591
        raise errors.OpExecError("Errors during disk copy: %s" %
4592
                                 (",".join(errs),))
4593

    
4594
    instance.primary_node = target_node
4595
    self.cfg.Update(instance, feedback_fn)
4596

    
4597
    self.LogInfo("Removing the disks on the original node")
4598
    _RemoveDisks(self, instance, target_node=source_node)
4599

    
4600
    # Only start the instance if it's marked as up
4601
    if instance.admin_state == constants.ADMINST_UP:
4602
      self.LogInfo("Starting instance %s on node %s",
4603
                   instance.name, target_node)
4604

    
4605
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
4606
                                           ignore_secondaries=True)
4607
      if not disks_ok:
4608
        _ShutdownInstanceDisks(self, instance)
4609
        raise errors.OpExecError("Can't activate the instance's disks")
4610

    
4611
      result = self.rpc.call_instance_start(target_node,
4612
                                            (instance, None, None), False,
4613
                                             self.op.reason)
4614
      msg = result.fail_msg
4615
      if msg:
4616
        _ShutdownInstanceDisks(self, instance)
4617
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
4618
                                 (instance.name, target_node, msg))
4619

    
4620

    
4621
class LUNodeMigrate(LogicalUnit):
4622
  """Migrate all instances from a node.
4623

4624
  """
4625
  HPATH = "node-migrate"
4626
  HTYPE = constants.HTYPE_NODE
4627
  REQ_BGL = False
4628

    
4629
  def CheckArguments(self):
4630
    pass
4631

    
4632
  def ExpandNames(self):
4633
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4634

    
4635
    self.share_locks = _ShareAll()
4636
    self.needed_locks = {
4637
      locking.LEVEL_NODE: [self.op.node_name],
4638
      }
4639

    
4640
  def BuildHooksEnv(self):
4641
    """Build hooks env.
4642

4643
    This runs on the master, the primary and all the secondaries.
4644

4645
    """
4646
    return {
4647
      "NODE_NAME": self.op.node_name,
4648
      "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
4649
      }
4650

    
4651
  def BuildHooksNodes(self):
4652
    """Build hooks nodes.
4653

4654
    """
4655
    nl = [self.cfg.GetMasterNode()]
4656
    return (nl, nl)
4657

    
4658
  def CheckPrereq(self):
4659
    pass
4660

    
4661
  def Exec(self, feedback_fn):
4662
    # Prepare jobs for migration instances
4663
    allow_runtime_changes = self.op.allow_runtime_changes
4664
    jobs = [
4665
      [opcodes.OpInstanceMigrate(instance_name=inst.name,
4666
                                 mode=self.op.mode,
4667
                                 live=self.op.live,
4668
                                 iallocator=self.op.iallocator,
4669
                                 target_node=self.op.target_node,
4670
                                 allow_runtime_changes=allow_runtime_changes,
4671
                                 ignore_ipolicy=self.op.ignore_ipolicy)]
4672
      for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)]
4673

    
4674
    # TODO: Run iallocator in this opcode and pass correct placement options to
4675
    # OpInstanceMigrate. Since other jobs can modify the cluster between
4676
    # running the iallocator and the actual migration, a good consistency model
4677
    # will have to be found.
4678

    
4679
    assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
4680
            frozenset([self.op.node_name]))
4681

    
4682
    return ResultWithJobs(jobs)
4683

    
4684

    
4685
class TLMigrateInstance(Tasklet):
4686
  """Tasklet class for instance migration.
4687

4688
  @type live: boolean
4689
  @ivar live: whether the migration will be done live or non-live;
4690
      this variable is initalized only after CheckPrereq has run
4691
  @type cleanup: boolean
4692
  @ivar cleanup: Wheater we cleanup from a failed migration
4693
  @type iallocator: string
4694
  @ivar iallocator: The iallocator used to determine target_node
4695
  @type target_node: string
4696
  @ivar target_node: If given, the target_node to reallocate the instance to
4697
  @type failover: boolean
4698
  @ivar failover: Whether operation results in failover or migration
4699
  @type fallback: boolean
4700
  @ivar fallback: Whether fallback to failover is allowed if migration not
4701
                  possible
4702
  @type ignore_consistency: boolean
4703
  @ivar ignore_consistency: Wheter we should ignore consistency between source
4704
                            and target node
4705
  @type shutdown_timeout: int
4706
  @ivar shutdown_timeout: In case of failover timeout of the shutdown
4707
  @type ignore_ipolicy: bool
4708
  @ivar ignore_ipolicy: If true, we can ignore instance policy when migrating
4709

4710
  """
4711

    
4712
  # Constants
4713
  _MIGRATION_POLL_INTERVAL = 1      # seconds
4714
  _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
4715

    
4716
  def __init__(self, lu, instance_name, cleanup, failover, fallback,
4717
               ignore_consistency, allow_runtime_changes, shutdown_timeout,
4718
               ignore_ipolicy):
4719
    """Initializes this class.
4720

4721
    """
4722
    Tasklet.__init__(self, lu)
4723

    
4724
    # Parameters
4725
    self.instance_name = instance_name
4726
    self.cleanup = cleanup
4727
    self.live = False # will be overridden later
4728
    self.failover = failover
4729
    self.fallback = fallback
4730
    self.ignore_consistency = ignore_consistency
4731
    self.shutdown_timeout = shutdown_timeout
4732
    self.ignore_ipolicy = ignore_ipolicy
4733
    self.allow_runtime_changes = allow_runtime_changes
4734

    
4735
  def CheckPrereq(self):
4736
    """Check prerequisites.
4737

4738
    This checks that the instance is in the cluster.
4739

4740
    """
4741
    instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
4742
    instance = self.cfg.GetInstanceInfo(instance_name)
4743
    assert instance is not None
4744
    self.instance = instance
4745
    cluster = self.cfg.GetClusterInfo()
4746

    
4747
    if (not self.cleanup and
4748
        not instance.admin_state == constants.ADMINST_UP and
4749
        not self.failover and self.fallback):
4750
      self.lu.LogInfo("Instance is marked down or offline, fallback allowed,"
4751
                      " switching to failover")
4752
      self.failover = True
4753

    
4754
    if instance.disk_template not in constants.DTS_MIRRORED:
4755
      if self.failover:
4756
        text = "failovers"
4757
      else:
4758
        text = "migrations"
4759
      raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
4760
                                 " %s" % (instance.disk_template, text),
4761
                                 errors.ECODE_STATE)
4762

    
4763
    if instance.disk_template in constants.DTS_EXT_MIRROR:
4764
      _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
4765

    
4766
      if self.lu.op.iallocator:
4767
        assert locking.NAL in self.lu.owned_locks(locking.LEVEL_NODE_ALLOC)
4768
        self._RunAllocator()
4769
      else:
4770
        # We set set self.target_node as it is required by
4771
        # BuildHooksEnv
4772
        self.target_node = self.lu.op.target_node
4773

    
4774
      # Check that the target node is correct in terms of instance policy
4775
      nodeinfo = self.cfg.GetNodeInfo(self.target_node)
4776
      group_info = self.cfg.GetNodeGroup(nodeinfo.group)
4777
      ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
4778
                                                              group_info)
4779
      _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo, self.cfg,
4780
                              ignore=self.ignore_ipolicy)
4781

    
4782
      # self.target_node is already populated, either directly or by the
4783
      # iallocator run
4784
      target_node = self.target_node
4785
      if self.target_node == instance.primary_node:
4786
        raise errors.OpPrereqError("Cannot migrate instance %s"
4787
                                   " to its primary (%s)" %
4788
                                   (instance.name, instance.primary_node),
4789
                                   errors.ECODE_STATE)
4790

    
4791
      if len(self.lu.tasklets) == 1:
4792
        # It is safe to release locks only when we're the only tasklet
4793
        # in the LU
4794
        _ReleaseLocks(self.lu, locking.LEVEL_NODE,
4795
                      keep=[instance.primary_node, self.target_node])
4796
        _ReleaseLocks(self.lu, locking.LEVEL_NODE_ALLOC)
4797

    
4798
    else:
4799
      assert not self.lu.glm.is_owned(locking.LEVEL_NODE_ALLOC)
4800

    
4801
      secondary_nodes = instance.secondary_nodes
4802
      if not secondary_nodes:
4803
        raise errors.ConfigurationError("No secondary node but using"
4804
                                        " %s disk template" %
4805
                                        instance.disk_template)
4806
      target_node = secondary_nodes[0]
4807
      if self.lu.op.iallocator or (self.lu.op.target_node and
4808
                                   self.lu.op.target_node != target_node):
4809
        if self.failover:
4810
          text = "failed over"
4811
        else:
4812
          text = "migrated"
4813
        raise errors.OpPrereqError("Instances with disk template %s cannot"
4814
                                   " be %s to arbitrary nodes"
4815
                                   " (neither an iallocator nor a target"
4816
                                   " node can be passed)" %
4817
                                   (instance.disk_template, text),
4818
                                   errors.ECODE_INVAL)
4819
      nodeinfo = self.cfg.GetNodeInfo(target_node)
4820
      group_info = self.cfg.GetNodeGroup(nodeinfo.group)
4821
      ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
4822
                                                              group_info)
4823
      _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo, self.cfg,
4824
                              ignore=self.ignore_ipolicy)
4825

    
4826
    i_be = cluster.FillBE(instance)
4827

    
4828
    # check memory requirements on the secondary node
4829
    if (not self.cleanup and
4830
         (not self.failover or instance.admin_state == constants.ADMINST_UP)):
4831
      self.tgt_free_mem = _CheckNodeFreeMemory(self.lu, target_node,
4832
                                               "migrating instance %s" %
4833
                                               instance.name,
4834
                                               i_be[constants.BE_MINMEM],
4835
                                               instance.hypervisor)
4836
    else:
4837
      self.lu.LogInfo("Not checking memory on the secondary node as"
4838
                      " instance will not be started")
4839

    
4840
    # check if failover must be forced instead of migration
4841
    if (not self.cleanup and not self.failover and
4842
        i_be[constants.BE_ALWAYS_FAILOVER]):
4843
      self.lu.LogInfo("Instance configured to always failover; fallback"
4844
                      " to failover")
4845
      self.failover = True
4846

    
4847
    # check bridge existance
4848
    _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
4849

    
4850
    if not self.cleanup:
4851
      _CheckNodeNotDrained(self.lu, target_node)
4852
      if not self.failover:
4853
        result = self.rpc.call_instance_migratable(instance.primary_node,
4854
                                                   instance)
4855
        if result.fail_msg and self.fallback:
4856
          self.lu.LogInfo("Can't migrate, instance offline, fallback to"
4857
                          " failover")
4858
          self.failover = True
4859
        else:
4860
          result.Raise("Can't migrate, please use failover",
4861
                       prereq=True, ecode=errors.ECODE_STATE)
4862

    
4863
    assert not (self.failover and self.cleanup)
4864

    
4865
    if not self.failover:
4866
      if self.lu.op.live is not None and self.lu.op.mode is not None:
4867
        raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
4868
                                   " parameters are accepted",
4869
                                   errors.ECODE_INVAL)
4870
      if self.lu.op.live is not None:
4871
        if self.lu.op.live:
4872
          self.lu.op.mode = constants.HT_MIGRATION_LIVE
4873
        else:
4874
          self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
4875
        # reset the 'live' parameter to None so that repeated
4876
        # invocations of CheckPrereq do not raise an exception
4877
        self.lu.op.live = None
4878
      elif self.lu.op.mode is None:
4879
        # read the default value from the hypervisor
4880
        i_hv = cluster.FillHV(self.instance, skip_globals=False)
4881
        self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
4882

    
4883
      self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
4884
    else:
4885
      # Failover is never live
4886
      self.live = False
4887

    
4888
    if not (self.failover or self.cleanup):
4889
      remote_info = self.rpc.call_instance_info(instance.primary_node,
4890
                                                instance.name,
4891
                                                instance.hypervisor)
4892
      remote_info.Raise("Error checking instance on node %s" %
4893
                        instance.primary_node)
4894
      instance_running = bool(remote_info.payload)
4895
      if instance_running:
4896
        self.current_mem = int(remote_info.payload["memory"])
4897

    
4898
  def _RunAllocator(self):
4899
    """Run the allocator based on input opcode.
4900

4901
    """
4902
    assert locking.NAL in self.lu.owned_locks(locking.LEVEL_NODE_ALLOC)
4903

    
4904
    # FIXME: add a self.ignore_ipolicy option
4905
    req = iallocator.IAReqRelocate(name=self.instance_name,
4906
                                   relocate_from=[self.instance.primary_node])
4907
    ial = iallocator.IAllocator(self.cfg, self.rpc, req)
4908

    
4909
    ial.Run(self.lu.op.iallocator)
4910

    
4911
    if not ial.success:
4912
      raise errors.OpPrereqError("Can't compute nodes using"
4913
                                 " iallocator '%s': %s" %
4914
                                 (self.lu.op.iallocator, ial.info),
4915
                                 errors.ECODE_NORES)
4916
    self.target_node = ial.result[0]
4917
    self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
4918
                    self.instance_name, self.lu.op.iallocator,
4919
                    utils.CommaJoin(ial.result))
4920

    
4921
  def _WaitUntilSync(self):
4922
    """Poll with custom rpc for disk sync.
4923

4924
    This uses our own step-based rpc call.
4925

4926
    """
4927
    self.feedback_fn("* wait until resync is done")
4928
    all_done = False
4929
    while not all_done:
4930
      all_done = True
4931
      result = self.rpc.call_drbd_wait_sync(self.all_nodes,
4932
                                            self.nodes_ip,
4933
                                            (self.instance.disks,
4934
                                             self.instance))
4935
      min_percent = 100
4936
      for node, nres in result.items():
4937
        nres.Raise("Cannot resync disks on node %s" % node)
4938
        node_done, node_percent = nres.payload
4939
        all_done = all_done and node_done
4940
        if node_percent is not None:
4941
          min_percent = min(min_percent, node_percent)
4942
      if not all_done:
4943
        if min_percent < 100:
4944
          self.feedback_fn("   - progress: %.1f%%" % min_percent)
4945
        time.sleep(2)
4946

    
4947
  def _EnsureSecondary(self, node):
4948
    """Demote a node to secondary.
4949

4950
    """
4951
    self.feedback_fn("* switching node %s to secondary mode" % node)
4952

    
4953
    for dev in self.instance.disks:
4954
      self.cfg.SetDiskID(dev, node)
4955

    
4956
    result = self.rpc.call_blockdev_close(node, self.instance.name,
4957
                                          self.instance.disks)
4958
    result.Raise("Cannot change disk to secondary on node %s" % node)
4959

    
4960
  def _GoStandalone(self):
4961
    """Disconnect from the network.
4962

4963
    """
4964
    self.feedback_fn("* changing into standalone mode")
4965
    result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
4966
                                               self.instance.disks)
4967
    for node, nres in result.items():
4968
      nres.Raise("Cannot disconnect disks node %s" % node)
4969

    
4970
  def _GoReconnect(self, multimaster):
4971
    """Reconnect to the network.
4972

4973
    """
4974
    if multimaster:
4975
      msg = "dual-master"
4976
    else:
4977
      msg = "single-master"
4978
    self.feedback_fn("* changing disks into %s mode" % msg)
4979
    result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
4980
                                           (self.instance.disks, self.instance),
4981
                                           self.instance.name, multimaster)
4982
    for node, nres in result.items():
4983
      nres.Raise("Cannot change disks config on node %s" % node)
4984

    
4985
  def _ExecCleanup(self):
4986
    """Try to cleanup after a failed migration.
4987

4988
    The cleanup is done by:
4989
      - check that the instance is running only on one node
4990
        (and update the config if needed)
4991
      - change disks on its secondary node to secondary
4992
      - wait until disks are fully synchronized
4993
      - disconnect from the network
4994
      - change disks into single-master mode
4995
      - wait again until disks are fully synchronized
4996

4997
    """
4998
    instance = self.instance
4999
    target_node = self.target_node
5000
    source_node = self.source_node
5001

    
5002
    # check running on only one node
5003
    self.feedback_fn("* checking where the instance actually runs"
5004
                     " (if this hangs, the hypervisor might be in"
5005
                     " a bad state)")
5006
    ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
5007
    for node, result in ins_l.items():
5008
      result.Raise("Can't contact node %s" % node)
5009

    
5010
    runningon_source = instance.name in ins_l[source_node].payload
5011
    runningon_target = instance.name in ins_l[target_node].payload
5012

    
5013
    if runningon_source and runningon_target:
5014
      raise errors.OpExecError("Instance seems to be running on two nodes,"
5015
                               " or the hypervisor is confused; you will have"
5016
                               " to ensure manually that it runs only on one"
5017
                               " and restart this operation")
5018

    
5019
    if not (runningon_source or runningon_target):
5020
      raise errors.OpExecError("Instance does not seem to be running at all;"
5021
                               " in this case it's safer to repair by"
5022
                               " running 'gnt-instance stop' to ensure disk"
5023
                               " shutdown, and then restarting it")
5024

    
5025
    if runningon_target:
5026
      # the migration has actually succeeded, we need to update the config
5027
      self.feedback_fn("* instance running on secondary node (%s),"
5028
                       " updating config" % target_node)
5029
      instance.primary_node = target_node
5030
      self.cfg.Update(instance, self.feedback_fn)
5031
      demoted_node = source_node
5032
    else:
5033
      self.feedback_fn("* instance confirmed to be running on its"
5034
                       " primary node (%s)" % source_node)
5035
      demoted_node = target_node
5036

    
5037
    if instance.disk_template in constants.DTS_INT_MIRROR:
5038
      self._EnsureSecondary(demoted_node)
5039
      try:
5040
        self._WaitUntilSync()
5041
      except errors.OpExecError:
5042
        # we ignore here errors, since if the device is standalone, it
5043
        # won't be able to sync
5044
        pass
5045
      self._GoStandalone()
5046
      self._GoReconnect(False)
5047
      self._WaitUntilSync()
5048

    
5049
    self.feedback_fn("* done")
5050

    
5051
  def _RevertDiskStatus(self):
5052
    """Try to revert the disk status after a failed migration.
5053

5054
    """
5055
    target_node = self.target_node
5056
    if self.instance.disk_template in constants.DTS_EXT_MIRROR:
5057
      return
5058

    
5059
    try:
5060
      self._EnsureSecondary(target_node)
5061
      self._GoStandalone()
5062
      self._GoReconnect(False)
5063
      self._WaitUntilSync()
5064
    except errors.OpExecError, err:
5065
      self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
5066
                         " please try to recover the instance manually;"
5067
                         " error '%s'" % str(err))
5068

    
5069
  def _AbortMigration(self):
5070
    """Call the hypervisor code to abort a started migration.
5071

5072
    """
5073
    instance = self.instance
5074
    target_node = self.target_node
5075
    source_node = self.source_node
5076
    migration_info = self.migration_info
5077

    
5078
    abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
5079
                                                                 instance,
5080
                                                                 migration_info,
5081
                                                                 False)
5082
    abort_msg = abort_result.fail_msg
5083
    if abort_msg:
5084
      logging.error("Aborting migration failed on target node %s: %s",
5085
                    target_node, abort_msg)
5086
      # Don't raise an exception here, as we stil have to try to revert the
5087
      # disk status, even if this step failed.
5088

    
5089
    abort_result = self.rpc.call_instance_finalize_migration_src(
5090
      source_node, instance, False, self.live)
5091
    abort_msg = abort_result.fail_msg
5092
    if abort_msg:
5093
      logging.error("Aborting migration failed on source node %s: %s",
5094
                    source_node, abort_msg)
5095

    
5096
  def _ExecMigration(self):
5097
    """Migrate an instance.
5098

5099
    The migrate is done by:
5100
      - change the disks into dual-master mode
5101
      - wait until disks are fully synchronized again
5102
      - migrate the instance
5103
      - change disks on the new secondary node (the old primary) to secondary
5104
      - wait until disks are fully synchronized
5105
      - change disks into single-master mode
5106

5107
    """
5108
    instance = self.instance
5109
    target_node = self.target_node
5110
    source_node = self.source_node
5111

    
5112
    # Check for hypervisor version mismatch and warn the user.
5113
    nodeinfo = self.rpc.call_node_info([source_node, target_node],
5114
                                       None, [self.instance.hypervisor], False)
5115
    for ninfo in nodeinfo.values():
5116
      ninfo.Raise("Unable to retrieve node information from node '%s'" %
5117
                  ninfo.node)
5118
    (_, _, (src_info, )) = nodeinfo[source_node].payload
5119
    (_, _, (dst_info, )) = nodeinfo[target_node].payload
5120

    
5121
    if ((constants.HV_NODEINFO_KEY_VERSION in src_info) and
5122
        (constants.HV_NODEINFO_KEY_VERSION in dst_info)):
5123
      src_version = src_info[constants.HV_NODEINFO_KEY_VERSION]
5124
      dst_version = dst_info[constants.HV_NODEINFO_KEY_VERSION]
5125
      if src_version != dst_version:
5126
        self.feedback_fn("* warning: hypervisor version mismatch between"
5127
                         " source (%s) and target (%s) node" %
5128
                         (src_version, dst_version))
5129

    
5130
    self.feedback_fn("* checking disk consistency between source and target")
5131
    for (idx, dev) in enumerate(instance.disks):
5132
      if not _CheckDiskConsistency(self.lu, instance, dev, target_node, False):
5133
        raise errors.OpExecError("Disk %s is degraded or not fully"
5134
                                 " synchronized on target node,"
5135
                                 " aborting migration" % idx)
5136

    
5137
    if self.current_mem > self.tgt_free_mem:
5138
      if not self.allow_runtime_changes:
5139
        raise errors.OpExecError("Memory ballooning not allowed and not enough"
5140
                                 " free memory to fit instance %s on target"
5141
                                 " node %s (have %dMB, need %dMB)" %
5142
                                 (instance.name, target_node,
5143
                                  self.tgt_free_mem, self.current_mem))
5144
      self.feedback_fn("* setting instance memory to %s" % self.tgt_free_mem)
5145
      rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
5146
                                                     instance,
5147
                                                     self.tgt_free_mem)
5148
      rpcres.Raise("Cannot modify instance runtime memory")
5149

    
5150
    # First get the migration information from the remote node
5151
    result = self.rpc.call_migration_info(source_node, instance)
5152
    msg = result.fail_msg
5153
    if msg:
5154
      log_err = ("Failed fetching source migration information from %s: %s" %
5155
                 (source_node, msg))
5156
      logging.error(log_err)
5157
      raise errors.OpExecError(log_err)
5158

    
5159
    self.migration_info = migration_info = result.payload
5160

    
5161
    if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
5162
      # Then switch the disks to master/master mode
5163
      self._EnsureSecondary(target_node)
5164
      self._GoStandalone()
5165
      self._GoReconnect(True)
5166
      self._WaitUntilSync()
5167

    
5168
    self.feedback_fn("* preparing %s to accept the instance" % target_node)
5169
    result = self.rpc.call_accept_instance(target_node,
5170
                                           instance,
5171
                                           migration_info,
5172
                                           self.nodes_ip[target_node])
5173

    
5174
    msg = result.fail_msg
5175
    if msg:
5176
      logging.error("Instance pre-migration failed, trying to revert"
5177
                    " disk status: %s", msg)
5178
      self.feedback_fn("Pre-migration failed, aborting")
5179
      self._AbortMigration()
5180
      self._RevertDiskStatus()
5181
      raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
5182
                               (instance.name, msg))
5183

    
5184
    self.feedback_fn("* migrating instance to %s" % target_node)
5185
    result = self.rpc.call_instance_migrate(source_node, instance,
5186
                                            self.nodes_ip[target_node],
5187
                                            self.live)
5188
    msg = result.fail_msg
5189
    if msg:
5190
      logging.error("Instance migration failed, trying to revert"
5191
                    " disk status: %s", msg)
5192
      self.feedback_fn("Migration failed, aborting")
5193
      self._AbortMigration()
5194
      self._RevertDiskStatus()
5195
      raise errors.OpExecError("Could not migrate instance %s: %s" %
5196
                               (instance.name, msg))
5197

    
5198
    self.feedback_fn("* starting memory transfer")
5199
    last_feedback = time.time()
5200
    while True:
5201
      result = self.rpc.call_instance_get_migration_status(source_node,
5202
                                                           instance)
5203
      msg = result.fail_msg
5204
      ms = result.payload   # MigrationStatus instance
5205
      if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
5206
        logging.error("Instance migration failed, trying to revert"
5207
                      " disk status: %s", msg)
5208
        self.feedback_fn("Migration failed, aborting")
5209
        self._AbortMigration()
5210
        self._RevertDiskStatus()
5211
        if not msg:
5212
          msg = "hypervisor returned failure"
5213
        raise errors.OpExecError("Could not migrate instance %s: %s" %
5214
                                 (instance.name, msg))
5215

    
5216
      if result.payload.status != constants.HV_MIGRATION_ACTIVE:
5217
        self.feedback_fn("* memory transfer complete")
5218
        break
5219

    
5220
      if (utils.TimeoutExpired(last_feedback,
5221
                               self._MIGRATION_FEEDBACK_INTERVAL) and
5222
          ms.transferred_ram is not None):
5223
        mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
5224
        self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
5225
        last_feedback = time.time()
5226

    
5227
      time.sleep(self._MIGRATION_POLL_INTERVAL)
5228

    
5229
    result = self.rpc.call_instance_finalize_migration_src(source_node,
5230
                                                           instance,
5231
                                                           True,
5232
                                                           self.live)
5233
    msg = result.fail_msg
5234
    if msg:
5235
      logging.error("Instance migration succeeded, but finalization failed"
5236
                    " on the source node: %s", msg)
5237
      raise errors.OpExecError("Could not finalize instance migration: %s" %
5238
                               msg)
5239

    
5240
    instance.primary_node = target_node
5241

    
5242
    # distribute new instance config to the other nodes
5243
    self.cfg.Update(instance, self.feedback_fn)
5244

    
5245
    result = self.rpc.call_instance_finalize_migration_dst(target_node,
5246
                                                           instance,
5247
                                                           migration_info,
5248
                                                           True)
5249
    msg = result.fail_msg
5250
    if msg:
5251
      logging.error("Instance migration succeeded, but finalization failed"
5252
                    " on the target node: %s", msg)
5253
      raise errors.OpExecError("Could not finalize instance migration: %s" %
5254
                               msg)
5255

    
5256
    if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
5257
      self._EnsureSecondary(source_node)
5258
      self._WaitUntilSync()
5259
      self._GoStandalone()
5260
      self._GoReconnect(False)
5261
      self._WaitUntilSync()
5262

    
5263
    # If the instance's disk template is `rbd' or `ext' and there was a
5264
    # successful migration, unmap the device from the source node.
5265
    if self.instance.disk_template in (constants.DT_RBD, constants.DT_EXT):
5266
      disks = _ExpandCheckDisks(instance, instance.disks)
5267
      self.feedback_fn("* unmapping instance's disks from %s" % source_node)
5268
      for disk in disks:
5269
        result = self.rpc.call_blockdev_shutdown(source_node, (disk, instance))
5270
        msg = result.fail_msg
5271
        if msg:
5272
          logging.error("Migration was successful, but couldn't unmap the"
5273
                        " block device %s on source node %s: %s",
5274
                        disk.iv_name, source_node, msg)
5275
          logging.error("You need to unmap the device %s manually on %s",
5276
                        disk.iv_name, source_node)
5277

    
5278
    self.feedback_fn("* done")
5279

    
5280
  def _ExecFailover(self):
5281
    """Failover an instance.
5282

5283
    The failover is done by shutting it down on its present node and
5284
    starting it on the secondary.
5285

5286
    """
5287
    instance = self.instance
5288
    primary_node = self.cfg.GetNodeInfo(instance.primary_node)
5289

    
5290
    source_node = instance.primary_node
5291
    target_node = self.target_node
5292

    
5293
    if instance.admin_state == constants.ADMINST_UP:
5294
      self.feedback_fn("* checking disk consistency between source and target")
5295
      for (idx, dev) in enumerate(instance.disks):
5296
        # for drbd, these are drbd over lvm
5297
        if not _CheckDiskConsistency(self.lu, instance, dev, target_node,
5298
                                     False):
5299
          if primary_node.offline:
5300
            self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
5301
                             " target node %s" %
5302
                             (primary_node.name, idx, target_node))
5303
          elif not self.ignore_consistency:
5304
            raise errors.OpExecError("Disk %s is degraded on target node,"
5305
                                     " aborting failover" % idx)
5306
    else:
5307
      self.feedback_fn("* not checking disk consistency as instance is not"
5308
                       " running")
5309

    
5310
    self.feedback_fn("* shutting down instance on source node")
5311
    logging.info("Shutting down instance %s on node %s",
5312
                 instance.name, source_node)
5313

    
5314
    result = self.rpc.call_instance_shutdown(source_node, instance,
5315
                                             self.shutdown_timeout,
5316
                                             self.lu.op.reason)
5317
    msg = result.fail_msg
5318
    if msg:
5319
      if self.ignore_consistency or primary_node.offline:
5320
        self.lu.LogWarning("Could not shutdown instance %s on node %s,"
5321
                           " proceeding anyway; please make sure node"
5322
                           " %s is down; error details: %s",
5323
                           instance.name, source_node, source_node, msg)
5324
      else:
5325
        raise errors.OpExecError("Could not shutdown instance %s on"
5326
                                 " node %s: %s" %
5327
                                 (instance.name, source_node, msg))
5328

    
5329
    self.feedback_fn("* deactivating the instance's disks on source node")
5330
    if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
5331
      raise errors.OpExecError("Can't shut down the instance's disks")
5332

    
5333
    instance.primary_node = target_node
5334
    # distribute new instance config to the other nodes
5335
    self.cfg.Update(instance, self.feedback_fn)
5336

    
5337
    # Only start the instance if it's marked as up
5338
    if instance.admin_state == constants.ADMINST_UP:
5339
      self.feedback_fn("* activating the instance's disks on target node %s" %
5340
                       target_node)
5341
      logging.info("Starting instance %s on node %s",
5342
                   instance.name, target_node)
5343

    
5344
      disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
5345
                                           ignore_secondaries=True)
5346
      if not disks_ok:
5347
        _ShutdownInstanceDisks(self.lu, instance)
5348
        raise errors.OpExecError("Can't activate the instance's disks")
5349

    
5350
      self.feedback_fn("* starting the instance on the target node %s" %
5351
                       target_node)
5352
      result = self.rpc.call_instance_start(target_node, (instance, None, None),
5353
                                            False, self.lu.op.reason)
5354
      msg = result.fail_msg
5355
      if msg:
5356
        _ShutdownInstanceDisks(self.lu, instance)
5357
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5358
                                 (instance.name, target_node, msg))
5359

    
5360
  def Exec(self, feedback_fn):
5361
    """Perform the migration.
5362

5363
    """
5364
    self.feedback_fn = feedback_fn
5365
    self.source_node = self.instance.primary_node
5366

    
5367
    # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
5368
    if self.instance.disk_template in constants.DTS_INT_MIRROR:
5369
      self.target_node = self.instance.secondary_nodes[0]
5370
      # Otherwise self.target_node has been populated either
5371
      # directly, or through an iallocator.
5372

    
5373
    self.all_nodes = [self.source_node, self.target_node]
5374
    self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
5375
                         in self.cfg.GetMultiNodeInfo(self.all_nodes))
5376

    
5377
    if self.failover:
5378
      feedback_fn("Failover instance %s" % self.instance.name)
5379
      self._ExecFailover()
5380
    else:
5381
      feedback_fn("Migrating instance %s" % self.instance.name)
5382

    
5383
      if self.cleanup:
5384
        return self._ExecCleanup()
5385
      else:
5386
        return self._ExecMigration()
5387

    
5388

    
5389
def _CreateBlockDev(lu, node, instance, device, force_create, info,
5390
                    force_open):
5391
  """Wrapper around L{_CreateBlockDevInner}.
5392

5393
  This method annotates the root device first.
5394

5395
  """
5396
  (disk,) = _AnnotateDiskParams(instance, [device], lu.cfg)
5397
  excl_stor = _IsExclusiveStorageEnabledNodeName(lu.cfg, node)
5398
  return _CreateBlockDevInner(lu, node, instance, disk, force_create, info,
5399
                              force_open, excl_stor)
5400

    
5401

    
5402
def _CreateBlockDevInner(lu, node, instance, device, force_create,
5403
                         info, force_open, excl_stor):
5404
  """Create a tree of block devices on a given node.
5405

5406
  If this device type has to be created on secondaries, create it and
5407
  all its children.
5408

5409
  If not, just recurse to children keeping the same 'force' value.
5410

5411
  @attention: The device has to be annotated already.
5412

5413
  @param lu: the lu on whose behalf we execute
5414
  @param node: the node on which to create the device
5415
  @type instance: L{objects.Instance}
5416
  @param instance: the instance which owns the device
5417
  @type device: L{objects.Disk}
5418
  @param device: the device to create
5419
  @type force_create: boolean
5420
  @param force_create: whether to force creation of this device; this
5421
      will be change to True whenever we find a device which has
5422
      CreateOnSecondary() attribute
5423
  @param info: the extra 'metadata' we should attach to the device
5424
      (this will be represented as a LVM tag)
5425
  @type force_open: boolean
5426
  @param force_open: this parameter will be passes to the
5427
      L{backend.BlockdevCreate} function where it specifies
5428
      whether we run on primary or not, and it affects both
5429
      the child assembly and the device own Open() execution
5430
  @type excl_stor: boolean
5431
  @param excl_stor: Whether exclusive_storage is active for the node
5432

5433
  @return: list of created devices
5434
  """
5435
  created_devices = []
5436
  try:
5437
    if device.CreateOnSecondary():
5438
      force_create = True
5439

    
5440
    if device.children:
5441
      for child in device.children:
5442
        devs = _CreateBlockDevInner(lu, node, instance, child, force_create,
5443
                                    info, force_open, excl_stor)
5444
        created_devices.extend(devs)
5445

    
5446
    if not force_create:
5447
      return created_devices
5448

    
5449
    _CreateSingleBlockDev(lu, node, instance, device, info, force_open,
5450
                          excl_stor)
5451
    # The device has been completely created, so there is no point in keeping
5452
    # its subdevices in the list. We just add the device itself instead.
5453
    created_devices = [(node, device)]
5454
    return created_devices
5455

    
5456
  except errors.DeviceCreationError, e:
5457
    e.created_devices.extend(created_devices)
5458
    raise e
5459
  except errors.OpExecError, e:
5460
    raise errors.DeviceCreationError(str(e), created_devices)
5461

    
5462

    
5463
def _CreateSingleBlockDev(lu, node, instance, device, info, force_open,
5464
                          excl_stor):
5465
  """Create a single block device on a given node.
5466

5467
  This will not recurse over children of the device, so they must be
5468
  created in advance.
5469

5470
  @param lu: the lu on whose behalf we execute
5471
  @param node: the node on which to create the device
5472
  @type instance: L{objects.Instance}
5473
  @param instance: the instance which owns the device
5474
  @type device: L{objects.Disk}
5475
  @param device: the device to create
5476
  @param info: the extra 'metadata' we should attach to the device
5477
      (this will be represented as a LVM tag)
5478
  @type force_open: boolean
5479
  @param force_open: this parameter will be passes to the
5480
      L{backend.BlockdevCreate} function where it specifies
5481
      whether we run on primary or not, and it affects both
5482
      the child assembly and the device own Open() execution
5483
  @type excl_stor: boolean
5484
  @param excl_stor: Whether exclusive_storage is active for the node
5485

5486
  """
5487
  lu.cfg.SetDiskID(device, node)
5488
  result = lu.rpc.call_blockdev_create(node, device, device.size,
5489
                                       instance.name, force_open, info,
5490
                                       excl_stor)
5491
  result.Raise("Can't create block device %s on"
5492
               " node %s for instance %s" % (device, node, instance.name))
5493
  if device.physical_id is None:
5494
    device.physical_id = result.payload
5495

    
5496

    
5497
def _GenerateUniqueNames(lu, exts):
5498
  """Generate a suitable LV name.
5499

5500
  This will generate a logical volume name for the given instance.
5501

5502
  """
5503
  results = []
5504
  for val in exts:
5505
    new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
5506
    results.append("%s%s" % (new_id, val))
5507
  return results
5508

    
5509

    
5510
def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
5511
                         iv_name, p_minor, s_minor):
5512
  """Generate a drbd8 device complete with its children.
5513

5514
  """
5515
  assert len(vgnames) == len(names) == 2
5516
  port = lu.cfg.AllocatePort()
5517
  shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
5518

    
5519
  dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
5520
                          logical_id=(vgnames[0], names[0]),
5521
                          params={})
5522
  dev_data.uuid = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
5523
  dev_meta = objects.Disk(dev_type=constants.LD_LV,
5524
                          size=constants.DRBD_META_SIZE,
5525
                          logical_id=(vgnames[1], names[1]),
5526
                          params={})
5527
  dev_meta.uuid = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
5528
  drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
5529
                          logical_id=(primary, secondary, port,
5530
                                      p_minor, s_minor,
5531
                                      shared_secret),
5532
                          children=[dev_data, dev_meta],
5533
                          iv_name=iv_name, params={})
5534
  drbd_dev.uuid = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
5535
  return drbd_dev
5536

    
5537

    
5538
_DISK_TEMPLATE_NAME_PREFIX = {
5539
  constants.DT_PLAIN: "",
5540
  constants.DT_RBD: ".rbd",
5541
  constants.DT_EXT: ".ext",
5542
  }
5543

    
5544

    
5545
_DISK_TEMPLATE_DEVICE_TYPE = {
5546
  constants.DT_PLAIN: constants.LD_LV,
5547
  constants.DT_FILE: constants.LD_FILE,
5548
  constants.DT_SHARED_FILE: constants.LD_FILE,
5549
  constants.DT_BLOCK: constants.LD_BLOCKDEV,
5550
  constants.DT_RBD: constants.LD_RBD,
5551
  constants.DT_EXT: constants.LD_EXT,
5552
  }
5553

    
5554

    
5555
def _GenerateDiskTemplate(
5556
  lu, template_name, instance_name, primary_node, secondary_nodes,
5557
  disk_info, file_storage_dir, file_driver, base_index,
5558
  feedback_fn, full_disk_params, _req_file_storage=opcodes.RequireFileStorage,
5559
  _req_shr_file_storage=opcodes.RequireSharedFileStorage):
5560
  """Generate the entire disk layout for a given template type.
5561

5562
  """
5563
  vgname = lu.cfg.GetVGName()
5564
  disk_count = len(disk_info)
5565
  disks = []
5566

    
5567
  if template_name == constants.DT_DISKLESS:
5568
    pass
5569
  elif template_name == constants.DT_DRBD8:
5570
    if len(secondary_nodes) != 1:
5571
      raise errors.ProgrammerError("Wrong template configuration")
5572
    remote_node = secondary_nodes[0]
5573
    minors = lu.cfg.AllocateDRBDMinor(
5574
      [primary_node, remote_node] * len(disk_info), instance_name)
5575

    
5576
    (drbd_params, _, _) = objects.Disk.ComputeLDParams(template_name,
5577
                                                       full_disk_params)
5578
    drbd_default_metavg = drbd_params[constants.LDP_DEFAULT_METAVG]
5579

    
5580
    names = []
5581
    for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
5582
                                               for i in range(disk_count)]):
5583
      names.append(lv_prefix + "_data")
5584
      names.append(lv_prefix + "_meta")
5585
    for idx, disk in enumerate(disk_info):
5586
      disk_index = idx + base_index
5587
      data_vg = disk.get(constants.IDISK_VG, vgname)
5588
      meta_vg = disk.get(constants.IDISK_METAVG, drbd_default_metavg)
5589
      disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
5590
                                      disk[constants.IDISK_SIZE],
5591
                                      [data_vg, meta_vg],
5592
                                      names[idx * 2:idx * 2 + 2],
5593
                                      "disk/%d" % disk_index,
5594
                                      minors[idx * 2], minors[idx * 2 + 1])
5595
      disk_dev.mode = disk[constants.IDISK_MODE]
5596
      disk_dev.name = disk.get(constants.IDISK_NAME, None)
5597
      disks.append(disk_dev)
5598
  else:
5599
    if secondary_nodes:
5600
      raise errors.ProgrammerError("Wrong template configuration")
5601

    
5602
    if template_name == constants.DT_FILE:
5603
      _req_file_storage()
5604
    elif template_name == constants.DT_SHARED_FILE:
5605
      _req_shr_file_storage()
5606

    
5607
    name_prefix = _DISK_TEMPLATE_NAME_PREFIX.get(template_name, None)
5608
    if name_prefix is None:
5609
      names = None
5610
    else:
5611
      names = _GenerateUniqueNames(lu, ["%s.disk%s" %
5612
                                        (name_prefix, base_index + i)
5613
                                        for i in range(disk_count)])
5614

    
5615
    if template_name == constants.DT_PLAIN:
5616

    
5617
      def logical_id_fn(idx, _, disk):
5618
        vg = disk.get(constants.IDISK_VG, vgname)
5619
        return (vg, names[idx])
5620

    
5621
    elif template_name in (constants.DT_FILE, constants.DT_SHARED_FILE):
5622
      logical_id_fn = \
5623
        lambda _, disk_index, disk: (file_driver,
5624
                                     "%s/disk%d" % (file_storage_dir,
5625
                                                    disk_index))
5626
    elif template_name == constants.DT_BLOCK:
5627
      logical_id_fn = \
5628
        lambda idx, disk_index, disk: (constants.BLOCKDEV_DRIVER_MANUAL,
5629
                                       disk[constants.IDISK_ADOPT])
5630
    elif template_name == constants.DT_RBD:
5631
      logical_id_fn = lambda idx, _, disk: ("rbd", names[idx])
5632
    elif template_name == constants.DT_EXT:
5633
      def logical_id_fn(idx, _, disk):
5634
        provider = disk.get(constants.IDISK_PROVIDER, None)
5635
        if provider is None:
5636
          raise errors.ProgrammerError("Disk template is %s, but '%s' is"
5637
                                       " not found", constants.DT_EXT,
5638
                                       constants.IDISK_PROVIDER)
5639
        return (provider, names[idx])
5640
    else:
5641
      raise errors.ProgrammerError("Unknown disk template '%s'" % template_name)
5642

    
5643
    dev_type = _DISK_TEMPLATE_DEVICE_TYPE[template_name]
5644

    
5645
    for idx, disk in enumerate(disk_info):
5646
      params = {}
5647
      # Only for the Ext template add disk_info to params
5648
      if template_name == constants.DT_EXT:
5649
        params[constants.IDISK_PROVIDER] = disk[constants.IDISK_PROVIDER]
5650
        for key in disk:
5651
          if key not in constants.IDISK_PARAMS:
5652
            params[key] = disk[key]
5653
      disk_index = idx + base_index
5654
      size = disk[constants.IDISK_SIZE]
5655
      feedback_fn("* disk %s, size %s" %
5656
                  (disk_index, utils.FormatUnit(size, "h")))
5657
      disk_dev = objects.Disk(dev_type=dev_type, size=size,
5658
                              logical_id=logical_id_fn(idx, disk_index, disk),
5659
                              iv_name="disk/%d" % disk_index,
5660
                              mode=disk[constants.IDISK_MODE],
5661
                              params=params)
5662
      disk_dev.name = disk.get(constants.IDISK_NAME, None)
5663
      disk_dev.uuid = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
5664
      disks.append(disk_dev)
5665

    
5666
  return disks
5667

    
5668

    
5669
def _GetInstanceInfoText(instance):
5670
  """Compute that text that should be added to the disk's metadata.
5671

5672
  """
5673
  return "originstname+%s" % instance.name
5674

    
5675

    
5676
def _CalcEta(time_taken, written, total_size):
5677
  """Calculates the ETA based on size written and total size.
5678

5679
  @param time_taken: The time taken so far
5680
  @param written: amount written so far
5681
  @param total_size: The total size of data to be written
5682
  @return: The remaining time in seconds
5683

5684
  """
5685
  avg_time = time_taken / float(written)
5686
  return (total_size - written) * avg_time
5687

    
5688

    
5689
def _WipeDisks(lu, instance, disks=None):
5690
  """Wipes instance disks.
5691

5692
  @type lu: L{LogicalUnit}
5693
  @param lu: the logical unit on whose behalf we execute
5694
  @type instance: L{objects.Instance}
5695
  @param instance: the instance whose disks we should create
5696
  @type disks: None or list of tuple of (number, L{objects.Disk}, number)
5697
  @param disks: Disk details; tuple contains disk index, disk object and the
5698
    start offset
5699

5700
  """
5701
  node = instance.primary_node
5702

    
5703
  if disks is None:
5704
    disks = [(idx, disk, 0)
5705
             for (idx, disk) in enumerate(instance.disks)]
5706

    
5707
  for (_, device, _) in disks:
5708
    lu.cfg.SetDiskID(device, node)
5709

    
5710
  logging.info("Pausing synchronization of disks of instance '%s'",
5711
               instance.name)
5712
  result = lu.rpc.call_blockdev_pause_resume_sync(node,
5713
                                                  (map(compat.snd, disks),
5714
                                                   instance),
5715
                                                  True)
5716
  result.Raise("Failed to pause disk synchronization on node '%s'" % node)
5717

    
5718
  for idx, success in enumerate(result.payload):
5719
    if not success:
5720
      logging.warn("Pausing synchronization of disk %s of instance '%s'"
5721
                   " failed", idx, instance.name)
5722

    
5723
  try:
5724
    for (idx, device, offset) in disks:
5725
      # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
5726
      # MAX_WIPE_CHUNK at max. Truncating to integer to avoid rounding errors.
5727
      wipe_chunk_size = \
5728
        int(min(constants.MAX_WIPE_CHUNK,
5729
                device.size / 100.0 * constants.MIN_WIPE_CHUNK_PERCENT))
5730

    
5731
      size = device.size
5732
      last_output = 0
5733
      start_time = time.time()
5734

    
5735
      if offset == 0:
5736
        info_text = ""
5737
      else:
5738
        info_text = (" (from %s to %s)" %
5739
                     (utils.FormatUnit(offset, "h"),
5740
                      utils.FormatUnit(size, "h")))
5741

    
5742
      lu.LogInfo("* Wiping disk %s%s", idx, info_text)
5743

    
5744
      logging.info("Wiping disk %d for instance %s on node %s using"
5745
                   " chunk size %s", idx, instance.name, node, wipe_chunk_size)
5746

    
5747
      while offset < size:
5748
        wipe_size = min(wipe_chunk_size, size - offset)
5749

    
5750
        logging.debug("Wiping disk %d, offset %s, chunk %s",
5751
                      idx, offset, wipe_size)
5752

    
5753
        result = lu.rpc.call_blockdev_wipe(node, (device, instance), offset,
5754
                                           wipe_size)
5755
        result.Raise("Could not wipe disk %d at offset %d for size %d" %
5756
                     (idx, offset, wipe_size))
5757

    
5758
        now = time.time()
5759
        offset += wipe_size
5760
        if now - last_output >= 60:
5761
          eta = _CalcEta(now - start_time, offset, size)
5762
          lu.LogInfo(" - done: %.1f%% ETA: %s",
5763
                     offset / float(size) * 100, utils.FormatSeconds(eta))
5764
          last_output = now
5765
  finally:
5766
    logging.info("Resuming synchronization of disks for instance '%s'",
5767
                 instance.name)
5768

    
5769
    result = lu.rpc.call_blockdev_pause_resume_sync(node,
5770
                                                    (map(compat.snd, disks),
5771
                                                     instance),
5772
                                                    False)
5773

    
5774
    if result.fail_msg:
5775
      lu.LogWarning("Failed to resume disk synchronization on node '%s': %s",
5776
                    node, result.fail_msg)
5777
    else:
5778
      for idx, success in enumerate(result.payload):
5779
        if not success:
5780
          lu.LogWarning("Resuming synchronization of disk %s of instance '%s'"
5781
                        " failed", idx, instance.name)
5782

    
5783

    
5784
def _CreateDisks(lu, instance, to_skip=None, target_node=None):
5785
  """Create all disks for an instance.
5786

5787
  This abstracts away some work from AddInstance.
5788

5789
  @type lu: L{LogicalUnit}
5790
  @param lu: the logical unit on whose behalf we execute
5791
  @type instance: L{objects.Instance}
5792
  @param instance: the instance whose disks we should create
5793
  @type to_skip: list
5794
  @param to_skip: list of indices to skip
5795
  @type target_node: string
5796
  @param target_node: if passed, overrides the target node for creation
5797
  @rtype: boolean
5798
  @return: the success of the creation
5799

5800
  """
5801
  info = _GetInstanceInfoText(instance)
5802
  if target_node is None:
5803
    pnode = instance.primary_node
5804
    all_nodes = instance.all_nodes
5805
  else:
5806
    pnode = target_node
5807
    all_nodes = [pnode]
5808

    
5809
  if instance.disk_template in constants.DTS_FILEBASED:
5810
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
5811
    result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
5812

    
5813
    result.Raise("Failed to create directory '%s' on"
5814
                 " node %s" % (file_storage_dir, pnode))
5815

    
5816
  disks_created = []
5817
  # Note: this needs to be kept in sync with adding of disks in
5818
  # LUInstanceSetParams
5819
  for idx, device in enumerate(instance.disks):
5820
    if to_skip and idx in to_skip:
5821
      continue
5822
    logging.info("Creating disk %s for instance '%s'", idx, instance.name)
5823
    #HARDCODE
5824
    for node in all_nodes:
5825
      f_create = node == pnode
5826
      try:
5827
        _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
5828
        disks_created.append((node, device))
5829
      except errors.OpExecError:
5830
        logging.warning("Creating disk %s for instance '%s' failed",
5831
                        idx, instance.name)
5832
      except errors.DeviceCreationError, e:
5833
        logging.warning("Creating disk %s for instance '%s' failed",
5834
                        idx, instance.name)
5835
        disks_created.extend(e.created_devices)
5836
        for (node, disk) in disks_created:
5837
          lu.cfg.SetDiskID(disk, node)
5838
          result = lu.rpc.call_blockdev_remove(node, disk)
5839
          if result.fail_msg:
5840
            logging.warning("Failed to remove newly-created disk %s on node %s:"
5841
                            " %s", device, node, result.fail_msg)
5842
        raise errors.OpExecError(e.message)
5843

    
5844

    
5845
def _RemoveDisks(lu, instance, target_node=None, ignore_failures=False):
5846
  """Remove all disks for an instance.
5847

5848
  This abstracts away some work from `AddInstance()` and
5849
  `RemoveInstance()`. Note that in case some of the devices couldn't
5850
  be removed, the removal will continue with the other ones.
5851

5852
  @type lu: L{LogicalUnit}
5853
  @param lu: the logical unit on whose behalf we execute
5854
  @type instance: L{objects.Instance}
5855
  @param instance: the instance whose disks we should remove
5856
  @type target_node: string
5857
  @param target_node: used to override the node on which to remove the disks
5858
  @rtype: boolean
5859
  @return: the success of the removal
5860

5861
  """
5862
  logging.info("Removing block devices for instance %s", instance.name)
5863

    
5864
  all_result = True
5865
  ports_to_release = set()
5866
  anno_disks = _AnnotateDiskParams(instance, instance.disks, lu.cfg)
5867
  for (idx, device) in enumerate(anno_disks):
5868
    if target_node:
5869
      edata = [(target_node, device)]
5870
    else:
5871
      edata = device.ComputeNodeTree(instance.primary_node)
5872
    for node, disk in edata:
5873
      lu.cfg.SetDiskID(disk, node)
5874
      result = lu.rpc.call_blockdev_remove(node, disk)
5875
      if result.fail_msg:
5876
        lu.LogWarning("Could not remove disk %s on node %s,"
5877
                      " continuing anyway: %s", idx, node, result.fail_msg)
5878
        if not (result.offline and node != instance.primary_node):
5879
          all_result = False
5880

    
5881
    # if this is a DRBD disk, return its port to the pool
5882
    if device.dev_type in constants.LDS_DRBD:
5883
      ports_to_release.add(device.logical_id[2])
5884

    
5885
  if all_result or ignore_failures:
5886
    for port in ports_to_release:
5887
      lu.cfg.AddTcpUdpPort(port)
5888

    
5889
  if instance.disk_template in constants.DTS_FILEBASED:
5890
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
5891
    if target_node:
5892
      tgt = target_node
5893
    else:
5894
      tgt = instance.primary_node
5895
    result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
5896
    if result.fail_msg:
5897
      lu.LogWarning("Could not remove directory '%s' on node %s: %s",
5898
                    file_storage_dir, instance.primary_node, result.fail_msg)
5899
      all_result = False
5900

    
5901
  return all_result
5902

    
5903

    
5904
def _ComputeDiskSizePerVG(disk_template, disks):
5905
  """Compute disk size requirements in the volume group
5906

5907
  """
5908
  def _compute(disks, payload):
5909
    """Universal algorithm.
5910

5911
    """
5912
    vgs = {}
5913
    for disk in disks:
5914
      vgs[disk[constants.IDISK_VG]] = \
5915
        vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
5916

    
5917
    return vgs
5918

    
5919
  # Required free disk space as a function of disk and swap space
5920
  req_size_dict = {
5921
    constants.DT_DISKLESS: {},
5922
    constants.DT_PLAIN: _compute(disks, 0),
5923
    # 128 MB are added for drbd metadata for each disk
5924
    constants.DT_DRBD8: _compute(disks, constants.DRBD_META_SIZE),
5925
    constants.DT_FILE: {},
5926
    constants.DT_SHARED_FILE: {},
5927
  }
5928

    
5929
  if disk_template not in req_size_dict:
5930
    raise errors.ProgrammerError("Disk template '%s' size requirement"
5931
                                 " is unknown" % disk_template)
5932

    
5933
  return req_size_dict[disk_template]
5934

    
5935

    
5936
def _CreateInstanceAllocRequest(op, disks, nics, beparams, node_whitelist):
5937
  """Wrapper around IAReqInstanceAlloc.
5938

5939
  @param op: The instance opcode
5940
  @param disks: The computed disks
5941
  @param nics: The computed nics
5942
  @param beparams: The full filled beparams
5943
  @param node_whitelist: List of nodes which should appear as online to the
5944
    allocator (unless the node is already marked offline)
5945

5946
  @returns: A filled L{iallocator.IAReqInstanceAlloc}
5947

5948
  """
5949
  spindle_use = beparams[constants.BE_SPINDLE_USE]
5950
  return iallocator.IAReqInstanceAlloc(name=op.instance_name,
5951
                                       disk_template=op.disk_template,
5952
                                       tags=op.tags,
5953
                                       os=op.os_type,
5954
                                       vcpus=beparams[constants.BE_VCPUS],
5955
                                       memory=beparams[constants.BE_MAXMEM],
5956
                                       spindle_use=spindle_use,
5957
                                       disks=disks,
5958
                                       nics=[n.ToDict() for n in nics],
5959
                                       hypervisor=op.hypervisor,
5960
                                       node_whitelist=node_whitelist)
5961

    
5962

    
5963
def _ComputeNics(op, cluster, default_ip, cfg, ec_id):
5964
  """Computes the nics.
5965

5966
  @param op: The instance opcode
5967
  @param cluster: Cluster configuration object
5968
  @param default_ip: The default ip to assign
5969
  @param cfg: An instance of the configuration object
5970
  @param ec_id: Execution context ID
5971

5972
  @returns: The build up nics
5973

5974
  """
5975
  nics = []
5976
  for nic in op.nics:
5977
    nic_mode_req = nic.get(constants.INIC_MODE, None)
5978
    nic_mode = nic_mode_req
5979
    if nic_mode is None or nic_mode == constants.VALUE_AUTO:
5980
      nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
5981

    
5982
    net = nic.get(constants.INIC_NETWORK, None)
5983
    link = nic.get(constants.NIC_LINK, None)
5984
    ip = nic.get(constants.INIC_IP, None)
5985

    
5986
    if net is None or net.lower() == constants.VALUE_NONE:
5987
      net = None
5988
    else:
5989
      if nic_mode_req is not None or link is not None:
5990
        raise errors.OpPrereqError("If network is given, no mode or link"
5991
                                   " is allowed to be passed",
5992
                                   errors.ECODE_INVAL)
5993

    
5994
    # ip validity checks
5995
    if ip is None or ip.lower() == constants.VALUE_NONE:
5996
      nic_ip = None
5997
    elif ip.lower() == constants.VALUE_AUTO:
5998
      if not op.name_check:
5999
        raise errors.OpPrereqError("IP address set to auto but name checks"
6000
                                   " have been skipped",
6001
                                   errors.ECODE_INVAL)
6002
      nic_ip = default_ip
6003
    else:
6004
      # We defer pool operations until later, so that the iallocator has
6005
      # filled in the instance's node(s) dimara
6006
      if ip.lower() == constants.NIC_IP_POOL:
6007
        if net is None:
6008
          raise errors.OpPrereqError("if ip=pool, parameter network"
6009
                                     " must be passed too",
6010
                                     errors.ECODE_INVAL)
6011

    
6012
      elif not netutils.IPAddress.IsValid(ip):
6013
        raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
6014
                                   errors.ECODE_INVAL)
6015

    
6016
      nic_ip = ip
6017

    
6018
    # TODO: check the ip address for uniqueness
6019
    if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
6020
      raise errors.OpPrereqError("Routed nic mode requires an ip address",
6021
                                 errors.ECODE_INVAL)
6022

    
6023
    # MAC address verification
6024
    mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
6025
    if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
6026
      mac = utils.NormalizeAndValidateMac(mac)
6027

    
6028
      try:
6029
        # TODO: We need to factor this out
6030
        cfg.ReserveMAC(mac, ec_id)
6031
      except errors.ReservationError:
6032
        raise errors.OpPrereqError("MAC address %s already in use"
6033
                                   " in cluster" % mac,
6034
                                   errors.ECODE_NOTUNIQUE)
6035

    
6036
    #  Build nic parameters
6037
    nicparams = {}
6038
    if nic_mode_req:
6039
      nicparams[constants.NIC_MODE] = nic_mode
6040
    if link:
6041
      nicparams[constants.NIC_LINK] = link
6042

    
6043
    check_params = cluster.SimpleFillNIC(nicparams)
6044
    objects.NIC.CheckParameterSyntax(check_params)
6045
    net_uuid = cfg.LookupNetwork(net)
6046
    name = nic.get(constants.INIC_NAME, None)
6047
    if name is not None and name.lower() == constants.VALUE_NONE:
6048
      name = None
6049
    nic_obj = objects.NIC(mac=mac, ip=nic_ip, name=name,
6050
                          network=net_uuid, nicparams=nicparams)
6051
    nic_obj.uuid = cfg.GenerateUniqueID(ec_id)
6052
    nics.append(nic_obj)
6053

    
6054
  return nics
6055

    
6056

    
6057
def _ComputeDisks(op, default_vg):
6058
  """Computes the instance disks.
6059

6060
  @param op: The instance opcode
6061
  @param default_vg: The default_vg to assume
6062

6063
  @return: The computed disks
6064

6065
  """
6066
  disks = []
6067
  for disk in op.disks:
6068
    mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
6069
    if mode not in constants.DISK_ACCESS_SET:
6070
      raise errors.OpPrereqError("Invalid disk access mode '%s'" %
6071
                                 mode, errors.ECODE_INVAL)
6072
    size = disk.get(constants.IDISK_SIZE, None)
6073
    if size is None:
6074
      raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
6075
    try:
6076
      size = int(size)
6077
    except (TypeError, ValueError):
6078
      raise errors.OpPrereqError("Invalid disk size '%s'" % size,
6079
                                 errors.ECODE_INVAL)
6080

    
6081
    ext_provider = disk.get(constants.IDISK_PROVIDER, None)
6082
    if ext_provider and op.disk_template != constants.DT_EXT:
6083
      raise errors.OpPrereqError("The '%s' option is only valid for the %s"
6084
                                 " disk template, not %s" %
6085
                                 (constants.IDISK_PROVIDER, constants.DT_EXT,
6086
                                 op.disk_template), errors.ECODE_INVAL)
6087

    
6088
    data_vg = disk.get(constants.IDISK_VG, default_vg)
6089
    name = disk.get(constants.IDISK_NAME, None)
6090
    if name is not None and name.lower() == constants.VALUE_NONE:
6091
      name = None
6092
    new_disk = {
6093
      constants.IDISK_SIZE: size,
6094
      constants.IDISK_MODE: mode,
6095
      constants.IDISK_VG: data_vg,
6096
      constants.IDISK_NAME: name,
6097
      }
6098

    
6099
    if constants.IDISK_METAVG in disk:
6100
      new_disk[constants.IDISK_METAVG] = disk[constants.IDISK_METAVG]
6101
    if constants.IDISK_ADOPT in disk:
6102
      new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
6103

    
6104
    # For extstorage, demand the `provider' option and add any
6105
    # additional parameters (ext-params) to the dict
6106
    if op.disk_template == constants.DT_EXT:
6107
      if ext_provider:
6108
        new_disk[constants.IDISK_PROVIDER] = ext_provider
6109
        for key in disk:
6110
          if key not in constants.IDISK_PARAMS:
6111
            new_disk[key] = disk[key]
6112
      else:
6113
        raise errors.OpPrereqError("Missing provider for template '%s'" %
6114
                                   constants.DT_EXT, errors.ECODE_INVAL)
6115

    
6116
    disks.append(new_disk)
6117

    
6118
  return disks
6119

    
6120

    
6121
def _ComputeFullBeParams(op, cluster):
6122
  """Computes the full beparams.
6123

6124
  @param op: The instance opcode
6125
  @param cluster: The cluster config object
6126

6127
  @return: The fully filled beparams
6128

6129
  """
6130
  default_beparams = cluster.beparams[constants.PP_DEFAULT]
6131
  for param, value in op.beparams.iteritems():
6132
    if value == constants.VALUE_AUTO:
6133
      op.beparams[param] = default_beparams[param]
6134
  objects.UpgradeBeParams(op.beparams)
6135
  utils.ForceDictType(op.beparams, constants.BES_PARAMETER_TYPES)
6136
  return cluster.SimpleFillBE(op.beparams)
6137

    
6138

    
6139
def _CheckOpportunisticLocking(op):
6140
  """Generate error if opportunistic locking is not possible.
6141

6142
  """
6143
  if op.opportunistic_locking and not op.iallocator:
6144
    raise errors.OpPrereqError("Opportunistic locking is only available in"
6145
                               " combination with an instance allocator",
6146
                               errors.ECODE_INVAL)
6147

    
6148

    
6149
class LUInstanceCreate(LogicalUnit):
6150
  """Create an instance.
6151

6152
  """
6153
  HPATH = "instance-add"
6154
  HTYPE = constants.HTYPE_INSTANCE
6155
  REQ_BGL = False
6156

    
6157
  def CheckArguments(self):
6158
    """Check arguments.
6159

6160
    """
6161
    # do not require name_check to ease forward/backward compatibility
6162
    # for tools
6163
    if self.op.no_install and self.op.start:
6164
      self.LogInfo("No-installation mode selected, disabling startup")
6165
      self.op.start = False
6166
    # validate/normalize the instance name
6167
    self.op.instance_name = \
6168
      netutils.Hostname.GetNormalizedName(self.op.instance_name)
6169

    
6170
    if self.op.ip_check and not self.op.name_check:
6171
      # TODO: make the ip check more flexible and not depend on the name check
6172
      raise errors.OpPrereqError("Cannot do IP address check without a name"
6173
                                 " check", errors.ECODE_INVAL)
6174

    
6175
    # check nics' parameter names
6176
    for nic in self.op.nics:
6177
      utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
6178
    # check that NIC's parameters names are unique and valid
6179
    utils.ValidateDeviceNames("NIC", self.op.nics)
6180

    
6181
    # check that disk's names are unique and valid
6182
    utils.ValidateDeviceNames("disk", self.op.disks)
6183

    
6184
    cluster = self.cfg.GetClusterInfo()
6185
    if not self.op.disk_template in cluster.enabled_disk_templates:
6186
      raise errors.OpPrereqError("Cannot create an instance with disk template"
6187
                                 " '%s', because it is not enabled in the"
6188
                                 " cluster. Enabled disk templates are: %s." %
6189
                                 (self.op.disk_template,
6190
                                  ",".join(cluster.enabled_disk_templates)))
6191

    
6192
    # check disks. parameter names and consistent adopt/no-adopt strategy
6193
    has_adopt = has_no_adopt = False
6194
    for disk in self.op.disks:
6195
      if self.op.disk_template != constants.DT_EXT:
6196
        utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
6197
      if constants.IDISK_ADOPT in disk:
6198
        has_adopt = True
6199
      else:
6200
        has_no_adopt = True
6201
    if has_adopt and has_no_adopt:
6202
      raise errors.OpPrereqError("Either all disks are adopted or none is",
6203
                                 errors.ECODE_INVAL)
6204
    if has_adopt:
6205
      if self.op.disk_template not in constants.DTS_MAY_ADOPT:
6206
        raise errors.OpPrereqError("Disk adoption is not supported for the"
6207
                                   " '%s' disk template" %
6208
                                   self.op.disk_template,
6209
                                   errors.ECODE_INVAL)
6210
      if self.op.iallocator is not None:
6211
        raise errors.OpPrereqError("Disk adoption not allowed with an"
6212
                                   " iallocator script", errors.ECODE_INVAL)
6213
      if self.op.mode == constants.INSTANCE_IMPORT:
6214
        raise errors.OpPrereqError("Disk adoption not allowed for"
6215
                                   " instance import", errors.ECODE_INVAL)
6216
    else:
6217
      if self.op.disk_template in constants.DTS_MUST_ADOPT:
6218
        raise errors.OpPrereqError("Disk template %s requires disk adoption,"
6219
                                   " but no 'adopt' parameter given" %
6220
                                   self.op.disk_template,
6221
                                   errors.ECODE_INVAL)
6222

    
6223
    self.adopt_disks = has_adopt
6224

    
6225
    # instance name verification
6226
    if self.op.name_check:
6227
      self.hostname1 = _CheckHostnameSane(self, self.op.instance_name)
6228
      self.op.instance_name = self.hostname1.name
6229
      # used in CheckPrereq for ip ping check
6230
      self.check_ip = self.hostname1.ip
6231
    else:
6232
      self.check_ip = None
6233

    
6234
    # file storage checks
6235
    if (self.op.file_driver and
6236
        not self.op.file_driver in constants.FILE_DRIVER):
6237
      raise errors.OpPrereqError("Invalid file driver name '%s'" %
6238
                                 self.op.file_driver, errors.ECODE_INVAL)
6239

    
6240
    if self.op.disk_template == constants.DT_FILE:
6241
      opcodes.RequireFileStorage()
6242
    elif self.op.disk_template == constants.DT_SHARED_FILE:
6243
      opcodes.RequireSharedFileStorage()
6244

    
6245
    ### Node/iallocator related checks
6246
    _CheckIAllocatorOrNode(self, "iallocator", "pnode")
6247

    
6248
    if self.op.pnode is not None:
6249
      if self.op.disk_template in constants.DTS_INT_MIRROR:
6250
        if self.op.snode is None:
6251
          raise errors.OpPrereqError("The networked disk templates need"
6252
                                     " a mirror node", errors.ECODE_INVAL)
6253
      elif self.op.snode:
6254
        self.LogWarning("Secondary node will be ignored on non-mirrored disk"
6255
                        " template")
6256
        self.op.snode = None
6257

    
6258
    _CheckOpportunisticLocking(self.op)
6259

    
6260
    self._cds = _GetClusterDomainSecret()
6261

    
6262
    if self.op.mode == constants.INSTANCE_IMPORT:
6263
      # On import force_variant must be True, because if we forced it at
6264
      # initial install, our only chance when importing it back is that it
6265
      # works again!
6266
      self.op.force_variant = True
6267

    
6268
      if self.op.no_install:
6269
        self.LogInfo("No-installation mode has no effect during import")
6270

    
6271
    elif self.op.mode == constants.INSTANCE_CREATE:
6272
      if self.op.os_type is None:
6273
        raise errors.OpPrereqError("No guest OS specified",
6274
                                   errors.ECODE_INVAL)
6275
      if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
6276
        raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
6277
                                   " installation" % self.op.os_type,
6278
                                   errors.ECODE_STATE)
6279
      if self.op.disk_template is None:
6280
        raise errors.OpPrereqError("No disk template specified",
6281
                                   errors.ECODE_INVAL)
6282

    
6283
    elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
6284
      # Check handshake to ensure both clusters have the same domain secret
6285
      src_handshake = self.op.source_handshake
6286
      if not src_handshake:
6287
        raise errors.OpPrereqError("Missing source handshake",
6288
                                   errors.ECODE_INVAL)
6289

    
6290
      errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
6291
                                                           src_handshake)
6292
      if errmsg:
6293
        raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
6294
                                   errors.ECODE_INVAL)
6295

    
6296
      # Load and check source CA
6297
      self.source_x509_ca_pem = self.op.source_x509_ca
6298
      if not self.source_x509_ca_pem:
6299
        raise errors.OpPrereqError("Missing source X509 CA",
6300
                                   errors.ECODE_INVAL)
6301

    
6302
      try:
6303
        (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
6304
                                                    self._cds)
6305
      except OpenSSL.crypto.Error, err:
6306
        raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
6307
                                   (err, ), errors.ECODE_INVAL)
6308

    
6309
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
6310
      if errcode is not None:
6311
        raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
6312
                                   errors.ECODE_INVAL)
6313

    
6314
      self.source_x509_ca = cert
6315

    
6316
      src_instance_name = self.op.source_instance_name
6317
      if not src_instance_name:
6318
        raise errors.OpPrereqError("Missing source instance name",
6319
                                   errors.ECODE_INVAL)
6320

    
6321
      self.source_instance_name = \
6322
          netutils.GetHostname(name=src_instance_name).name
6323

    
6324
    else:
6325
      raise errors.OpPrereqError("Invalid instance creation mode %r" %
6326
                                 self.op.mode, errors.ECODE_INVAL)
6327

    
6328
  def ExpandNames(self):
6329
    """ExpandNames for CreateInstance.
6330

6331
    Figure out the right locks for instance creation.
6332

6333
    """
6334
    self.needed_locks = {}
6335

    
6336
    instance_name = self.op.instance_name
6337
    # this is just a preventive check, but someone might still add this
6338
    # instance in the meantime, and creation will fail at lock-add time
6339
    if instance_name in self.cfg.GetInstanceList():
6340
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6341
                                 instance_name, errors.ECODE_EXISTS)
6342

    
6343
    self.add_locks[locking.LEVEL_INSTANCE] = instance_name
6344

    
6345
    if self.op.iallocator:
6346
      # TODO: Find a solution to not lock all nodes in the cluster, e.g. by
6347
      # specifying a group on instance creation and then selecting nodes from
6348
      # that group
6349
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6350
      self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
6351

    
6352
      if self.op.opportunistic_locking:
6353
        self.opportunistic_locks[locking.LEVEL_NODE] = True
6354
        self.opportunistic_locks[locking.LEVEL_NODE_RES] = True
6355
    else:
6356
      self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
6357
      nodelist = [self.op.pnode]
6358
      if self.op.snode is not None:
6359
        self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
6360
        nodelist.append(self.op.snode)
6361
      self.needed_locks[locking.LEVEL_NODE] = nodelist
6362

    
6363
    # in case of import lock the source node too
6364
    if self.op.mode == constants.INSTANCE_IMPORT:
6365
      src_node = self.op.src_node
6366
      src_path = self.op.src_path
6367

    
6368
      if src_path is None:
6369
        self.op.src_path = src_path = self.op.instance_name
6370

    
6371
      if src_node is None:
6372
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6373
        self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
6374
        self.op.src_node = None
6375
        if os.path.isabs(src_path):
6376
          raise errors.OpPrereqError("Importing an instance from a path"
6377
                                     " requires a source node option",
6378
                                     errors.ECODE_INVAL)
6379
      else:
6380
        self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
6381
        if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
6382
          self.needed_locks[locking.LEVEL_NODE].append(src_node)
6383
        if not os.path.isabs(src_path):
6384
          self.op.src_path = src_path = \
6385
            utils.PathJoin(pathutils.EXPORT_DIR, src_path)
6386

    
6387
    self.needed_locks[locking.LEVEL_NODE_RES] = \
6388
      _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
6389

    
6390
  def _RunAllocator(self):
6391
    """Run the allocator based on input opcode.
6392

6393
    """
6394
    if self.op.opportunistic_locking:
6395
      # Only consider nodes for which a lock is held
6396
      node_whitelist = list(self.owned_locks(locking.LEVEL_NODE))
6397
    else:
6398
      node_whitelist = None
6399

    
6400
    #TODO Export network to iallocator so that it chooses a pnode
6401
    #     in a nodegroup that has the desired network connected to
6402
    req = _CreateInstanceAllocRequest(self.op, self.disks,
6403
                                      self.nics, self.be_full,
6404
                                      node_whitelist)
6405
    ial = iallocator.IAllocator(self.cfg, self.rpc, req)
6406

    
6407
    ial.Run(self.op.iallocator)
6408

    
6409
    if not ial.success:
6410
      # When opportunistic locks are used only a temporary failure is generated
6411
      if self.op.opportunistic_locking:
6412
        ecode = errors.ECODE_TEMP_NORES
6413
      else:
6414
        ecode = errors.ECODE_NORES
6415

    
6416
      raise errors.OpPrereqError("Can't compute nodes using"
6417
                                 " iallocator '%s': %s" %
6418
                                 (self.op.iallocator, ial.info),
6419
                                 ecode)
6420

    
6421
    self.op.pnode = ial.result[0]
6422
    self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
6423
                 self.op.instance_name, self.op.iallocator,
6424
                 utils.CommaJoin(ial.result))
6425

    
6426
    assert req.RequiredNodes() in (1, 2), "Wrong node count from iallocator"
6427

    
6428
    if req.RequiredNodes() == 2:
6429
      self.op.snode = ial.result[1]
6430

    
6431
  def BuildHooksEnv(self):
6432
    """Build hooks env.
6433

6434
    This runs on master, primary and secondary nodes of the instance.
6435

6436
    """
6437
    env = {
6438
      "ADD_MODE": self.op.mode,
6439
      }
6440
    if self.op.mode == constants.INSTANCE_IMPORT:
6441
      env["SRC_NODE"] = self.op.src_node
6442
      env["SRC_PATH"] = self.op.src_path
6443
      env["SRC_IMAGES"] = self.src_images
6444

    
6445
    env.update(_BuildInstanceHookEnv(
6446
      name=self.op.instance_name,
6447
      primary_node=self.op.pnode,
6448
      secondary_nodes=self.secondaries,
6449
      status=self.op.start,
6450
      os_type=self.op.os_type,
6451
      minmem=self.be_full[constants.BE_MINMEM],
6452
      maxmem=self.be_full[constants.BE_MAXMEM],
6453
      vcpus=self.be_full[constants.BE_VCPUS],
6454
      nics=_NICListToTuple(self, self.nics),
6455
      disk_template=self.op.disk_template,
6456
      disks=[(d[constants.IDISK_NAME], d[constants.IDISK_SIZE],
6457
             d[constants.IDISK_MODE]) for d in self.disks],
6458
      bep=self.be_full,
6459
      hvp=self.hv_full,
6460
      hypervisor_name=self.op.hypervisor,
6461
      tags=self.op.tags,
6462
    ))
6463

    
6464
    return env
6465

    
6466
  def BuildHooksNodes(self):
6467
    """Build hooks nodes.
6468

6469
    """
6470
    nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
6471
    return nl, nl
6472

    
6473
  def _ReadExportInfo(self):
6474
    """Reads the export information from disk.
6475

6476
    It will override the opcode source node and path with the actual
6477
    information, if these two were not specified before.
6478

6479
    @return: the export information
6480

6481
    """
6482
    assert self.op.mode == constants.INSTANCE_IMPORT
6483

    
6484
    src_node = self.op.src_node
6485
    src_path = self.op.src_path
6486

    
6487
    if src_node is None:
6488
      locked_nodes = self.owned_locks(locking.LEVEL_NODE)
6489
      exp_list = self.rpc.call_export_list(locked_nodes)
6490
      found = False
6491
      for node in exp_list:
6492
        if exp_list[node].fail_msg:
6493
          continue
6494
        if src_path in exp_list[node].payload:
6495
          found = True
6496
          self.op.src_node = src_node = node
6497
          self.op.src_path = src_path = utils.PathJoin(pathutils.EXPORT_DIR,
6498
                                                       src_path)
6499
          break
6500
      if not found:
6501
        raise errors.OpPrereqError("No export found for relative path %s" %
6502
                                    src_path, errors.ECODE_INVAL)
6503

    
6504
    _CheckNodeOnline(self, src_node)
6505
    result = self.rpc.call_export_info(src_node, src_path)
6506
    result.Raise("No export or invalid export found in dir %s" % src_path)
6507

    
6508
    export_info = objects.SerializableConfigParser.Loads(str(result.payload))
6509
    if not export_info.has_section(constants.INISECT_EXP):
6510
      raise errors.ProgrammerError("Corrupted export config",
6511
                                   errors.ECODE_ENVIRON)
6512

    
6513
    ei_version = export_info.get(constants.INISECT_EXP, "version")
6514
    if (int(ei_version) != constants.EXPORT_VERSION):
6515
      raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
6516
                                 (ei_version, constants.EXPORT_VERSION),
6517
                                 errors.ECODE_ENVIRON)
6518
    return export_info
6519

    
6520
  def _ReadExportParams(self, einfo):
6521
    """Use export parameters as defaults.
6522

6523
    In case the opcode doesn't specify (as in override) some instance
6524
    parameters, then try to use them from the export information, if
6525
    that declares them.
6526

6527
    """
6528
    self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
6529

    
6530
    if self.op.disk_template is None:
6531
      if einfo.has_option(constants.INISECT_INS, "disk_template"):
6532
        self.op.disk_template = einfo.get(constants.INISECT_INS,
6533
                                          "disk_template")
6534
        if self.op.disk_template not in constants.DISK_TEMPLATES:
6535
          raise errors.OpPrereqError("Disk template specified in configuration"
6536
                                     " file is not one of the allowed values:"
6537
                                     " %s" %
6538
                                     " ".join(constants.DISK_TEMPLATES),
6539
                                     errors.ECODE_INVAL)
6540
      else:
6541
        raise errors.OpPrereqError("No disk template specified and the export"
6542
                                   " is missing the disk_template information",
6543
                                   errors.ECODE_INVAL)
6544

    
6545
    if not self.op.disks:
6546
      disks = []
6547
      # TODO: import the disk iv_name too
6548
      for idx in range(constants.MAX_DISKS):
6549
        if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
6550
          disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
6551
          disks.append({constants.IDISK_SIZE: disk_sz})
6552
      self.op.disks = disks
6553
      if not disks and self.op.disk_template != constants.DT_DISKLESS:
6554
        raise errors.OpPrereqError("No disk info specified and the export"
6555
                                   " is missing the disk information",
6556
                                   errors.ECODE_INVAL)
6557

    
6558
    if not self.op.nics:
6559
      nics = []
6560
      for idx in range(constants.MAX_NICS):
6561
        if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
6562
          ndict = {}
6563
          for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
6564
            v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
6565
            ndict[name] = v
6566
          nics.append(ndict)
6567
        else:
6568
          break
6569
      self.op.nics = nics
6570

    
6571
    if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
6572
      self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
6573

    
6574
    if (self.op.hypervisor is None and
6575
        einfo.has_option(constants.INISECT_INS, "hypervisor")):
6576
      self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
6577

    
6578
    if einfo.has_section(constants.INISECT_HYP):
6579
      # use the export parameters but do not override the ones
6580
      # specified by the user
6581
      for name, value in einfo.items(constants.INISECT_HYP):
6582
        if name not in self.op.hvparams:
6583
          self.op.hvparams[name] = value
6584

    
6585
    if einfo.has_section(constants.INISECT_BEP):
6586
      # use the parameters, without overriding
6587
      for name, value in einfo.items(constants.INISECT_BEP):
6588
        if name not in self.op.beparams:
6589
          self.op.beparams[name] = value
6590
        # Compatibility for the old "memory" be param
6591
        if name == constants.BE_MEMORY:
6592
          if constants.BE_MAXMEM not in self.op.beparams:
6593
            self.op.beparams[constants.BE_MAXMEM] = value
6594
          if constants.BE_MINMEM not in self.op.beparams:
6595
            self.op.beparams[constants.BE_MINMEM] = value
6596
    else:
6597
      # try to read the parameters old style, from the main section
6598
      for name in constants.BES_PARAMETERS:
6599
        if (name not in self.op.beparams and
6600
            einfo.has_option(constants.INISECT_INS, name)):
6601
          self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
6602

    
6603
    if einfo.has_section(constants.INISECT_OSP):
6604
      # use the parameters, without overriding
6605
      for name, value in einfo.items(constants.INISECT_OSP):
6606
        if name not in self.op.osparams:
6607
          self.op.osparams[name] = value
6608

    
6609
  def _RevertToDefaults(self, cluster):
6610
    """Revert the instance parameters to the default values.
6611

6612
    """
6613
    # hvparams
6614
    hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
6615
    for name in self.op.hvparams.keys():
6616
      if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
6617
        del self.op.hvparams[name]
6618
    # beparams
6619
    be_defs = cluster.SimpleFillBE({})
6620
    for name in self.op.beparams.keys():
6621
      if name in be_defs and be_defs[name] == self.op.beparams[name]:
6622
        del self.op.beparams[name]
6623
    # nic params
6624
    nic_defs = cluster.SimpleFillNIC({})
6625
    for nic in self.op.nics:
6626
      for name in constants.NICS_PARAMETERS:
6627
        if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
6628
          del nic[name]
6629
    # osparams
6630
    os_defs = cluster.SimpleFillOS(self.op.os_type, {})
6631
    for name in self.op.osparams.keys():
6632
      if name in os_defs and os_defs[name] == self.op.osparams[name]:
6633
        del self.op.osparams[name]
6634

    
6635
  def _CalculateFileStorageDir(self):
6636
    """Calculate final instance file storage dir.
6637

6638
    """
6639
    # file storage dir calculation/check
6640
    self.instance_file_storage_dir = None
6641
    if self.op.disk_template in constants.DTS_FILEBASED:
6642
      # build the full file storage dir path
6643
      joinargs = []
6644

    
6645
      if self.op.disk_template == constants.DT_SHARED_FILE:
6646
        get_fsd_fn = self.cfg.GetSharedFileStorageDir
6647
      else:
6648
        get_fsd_fn = self.cfg.GetFileStorageDir
6649

    
6650
      cfg_storagedir = get_fsd_fn()
6651
      if not cfg_storagedir:
6652
        raise errors.OpPrereqError("Cluster file storage dir not defined",
6653
                                   errors.ECODE_STATE)
6654
      joinargs.append(cfg_storagedir)
6655

    
6656
      if self.op.file_storage_dir is not None:
6657
        joinargs.append(self.op.file_storage_dir)
6658

    
6659
      joinargs.append(self.op.instance_name)
6660

    
6661
      # pylint: disable=W0142
6662
      self.instance_file_storage_dir = utils.PathJoin(*joinargs)
6663

    
6664
  def CheckPrereq(self): # pylint: disable=R0914
6665
    """Check prerequisites.
6666

6667
    """
6668
    self._CalculateFileStorageDir()
6669

    
6670
    if self.op.mode == constants.INSTANCE_IMPORT:
6671
      export_info = self._ReadExportInfo()
6672
      self._ReadExportParams(export_info)
6673
      self._old_instance_name = export_info.get(constants.INISECT_INS, "name")
6674
    else:
6675
      self._old_instance_name = None
6676

    
6677
    if (not self.cfg.GetVGName() and
6678
        self.op.disk_template not in constants.DTS_NOT_LVM):
6679
      raise errors.OpPrereqError("Cluster does not support lvm-based"
6680
                                 " instances", errors.ECODE_STATE)
6681

    
6682
    if (self.op.hypervisor is None or
6683
        self.op.hypervisor == constants.VALUE_AUTO):
6684
      self.op.hypervisor = self.cfg.GetHypervisorType()
6685

    
6686
    cluster = self.cfg.GetClusterInfo()
6687
    enabled_hvs = cluster.enabled_hypervisors
6688
    if self.op.hypervisor not in enabled_hvs:
6689
      raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
6690
                                 " cluster (%s)" %
6691
                                 (self.op.hypervisor, ",".join(enabled_hvs)),
6692
                                 errors.ECODE_STATE)
6693

    
6694
    # Check tag validity
6695
    for tag in self.op.tags:
6696
      objects.TaggableObject.ValidateTag(tag)
6697

    
6698
    # check hypervisor parameter syntax (locally)
6699
    utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6700
    filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
6701
                                      self.op.hvparams)
6702
    hv_type = hypervisor.GetHypervisorClass(self.op.hypervisor)
6703
    hv_type.CheckParameterSyntax(filled_hvp)
6704
    self.hv_full = filled_hvp
6705
    # check that we don't specify global parameters on an instance
6706
    _CheckParamsNotGlobal(self.op.hvparams, constants.HVC_GLOBALS, "hypervisor",
6707
                          "instance", "cluster")
6708

    
6709
    # fill and remember the beparams dict
6710
    self.be_full = _ComputeFullBeParams(self.op, cluster)
6711

    
6712
    # build os parameters
6713
    self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
6714

    
6715
    # now that hvp/bep are in final format, let's reset to defaults,
6716
    # if told to do so
6717
    if self.op.identify_defaults:
6718
      self._RevertToDefaults(cluster)
6719

    
6720
    # NIC buildup
6721
    self.nics = _ComputeNics(self.op, cluster, self.check_ip, self.cfg,
6722
                             self.proc.GetECId())
6723

    
6724
    # disk checks/pre-build
6725
    default_vg = self.cfg.GetVGName()
6726
    self.disks = _ComputeDisks(self.op, default_vg)
6727

    
6728
    if self.op.mode == constants.INSTANCE_IMPORT:
6729
      disk_images = []
6730
      for idx in range(len(self.disks)):
6731
        option = "disk%d_dump" % idx
6732
        if export_info.has_option(constants.INISECT_INS, option):
6733
          # FIXME: are the old os-es, disk sizes, etc. useful?
6734
          export_name = export_info.get(constants.INISECT_INS, option)
6735
          image = utils.PathJoin(self.op.src_path, export_name)
6736
          disk_images.append(image)
6737
        else:
6738
          disk_images.append(False)
6739

    
6740
      self.src_images = disk_images
6741

    
6742
      if self.op.instance_name == self._old_instance_name:
6743
        for idx, nic in enumerate(self.nics):
6744
          if nic.mac == constants.VALUE_AUTO:
6745
            nic_mac_ini = "nic%d_mac" % idx
6746
            nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
6747

    
6748
    # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
6749

    
6750
    # ip ping checks (we use the same ip that was resolved in ExpandNames)
6751
    if self.op.ip_check:
6752
      if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
6753
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
6754
                                   (self.check_ip, self.op.instance_name),
6755
                                   errors.ECODE_NOTUNIQUE)
6756

    
6757
    #### mac address generation
6758
    # By generating here the mac address both the allocator and the hooks get
6759
    # the real final mac address rather than the 'auto' or 'generate' value.
6760
    # There is a race condition between the generation and the instance object
6761
    # creation, which means that we know the mac is valid now, but we're not
6762
    # sure it will be when we actually add the instance. If things go bad
6763
    # adding the instance will abort because of a duplicate mac, and the
6764
    # creation job will fail.
6765
    for nic in self.nics:
6766
      if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
6767
        nic.mac = self.cfg.GenerateMAC(nic.network, self.proc.GetECId())
6768

    
6769
    #### allocator run
6770

    
6771
    if self.op.iallocator is not None:
6772
      self._RunAllocator()
6773

    
6774
    # Release all unneeded node locks
6775
    keep_locks = filter(None, [self.op.pnode, self.op.snode, self.op.src_node])
6776
    _ReleaseLocks(self, locking.LEVEL_NODE, keep=keep_locks)
6777
    _ReleaseLocks(self, locking.LEVEL_NODE_RES, keep=keep_locks)
6778
    _ReleaseLocks(self, locking.LEVEL_NODE_ALLOC)
6779

    
6780
    assert (self.owned_locks(locking.LEVEL_NODE) ==
6781
            self.owned_locks(locking.LEVEL_NODE_RES)), \
6782
      "Node locks differ from node resource locks"
6783

    
6784
    #### node related checks
6785

    
6786
    # check primary node
6787
    self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
6788
    assert self.pnode is not None, \
6789
      "Cannot retrieve locked node %s" % self.op.pnode
6790
    if pnode.offline:
6791
      raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
6792
                                 pnode.name, errors.ECODE_STATE)
6793
    if pnode.drained:
6794
      raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
6795
                                 pnode.name, errors.ECODE_STATE)
6796
    if not pnode.vm_capable:
6797
      raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
6798
                                 " '%s'" % pnode.name, errors.ECODE_STATE)
6799

    
6800
    self.secondaries = []
6801

    
6802
    # Fill in any IPs from IP pools. This must happen here, because we need to
6803
    # know the nic's primary node, as specified by the iallocator
6804
    for idx, nic in enumerate(self.nics):
6805
      net_uuid = nic.network
6806
      if net_uuid is not None:
6807
        nobj = self.cfg.GetNetwork(net_uuid)
6808
        netparams = self.cfg.GetGroupNetParams(net_uuid, self.pnode.name)
6809
        if netparams is None:
6810
          raise errors.OpPrereqError("No netparams found for network"
6811
                                     " %s. Propably not connected to"
6812
                                     " node's %s nodegroup" %
6813
                                     (nobj.name, self.pnode.name),
6814
                                     errors.ECODE_INVAL)
6815
        self.LogInfo("NIC/%d inherits netparams %s" %
6816
                     (idx, netparams.values()))
6817
        nic.nicparams = dict(netparams)
6818
        if nic.ip is not None:
6819
          if nic.ip.lower() == constants.NIC_IP_POOL:
6820
            try:
6821
              nic.ip = self.cfg.GenerateIp(net_uuid, self.proc.GetECId())
6822
            except errors.ReservationError:
6823
              raise errors.OpPrereqError("Unable to get a free IP for NIC %d"
6824
                                         " from the address pool" % idx,
6825
                                         errors.ECODE_STATE)
6826
            self.LogInfo("Chose IP %s from network %s", nic.ip, nobj.name)
6827
          else:
6828
            try:
6829
              self.cfg.ReserveIp(net_uuid, nic.ip, self.proc.GetECId())
6830
            except errors.ReservationError:
6831
              raise errors.OpPrereqError("IP address %s already in use"
6832
                                         " or does not belong to network %s" %
6833
                                         (nic.ip, nobj.name),
6834
                                         errors.ECODE_NOTUNIQUE)
6835

    
6836
      # net is None, ip None or given
6837
      elif self.op.conflicts_check:
6838
        _CheckForConflictingIp(self, nic.ip, self.pnode.name)
6839

    
6840
    # mirror node verification
6841
    if self.op.disk_template in constants.DTS_INT_MIRROR:
6842
      if self.op.snode == pnode.name:
6843
        raise errors.OpPrereqError("The secondary node cannot be the"
6844
                                   " primary node", errors.ECODE_INVAL)
6845
      _CheckNodeOnline(self, self.op.snode)
6846
      _CheckNodeNotDrained(self, self.op.snode)
6847
      _CheckNodeVmCapable(self, self.op.snode)
6848
      self.secondaries.append(self.op.snode)
6849

    
6850
      snode = self.cfg.GetNodeInfo(self.op.snode)
6851
      if pnode.group != snode.group:
6852
        self.LogWarning("The primary and secondary nodes are in two"
6853
                        " different node groups; the disk parameters"
6854
                        " from the first disk's node group will be"
6855
                        " used")
6856

    
6857
    if not self.op.disk_template in constants.DTS_EXCL_STORAGE:
6858
      nodes = [pnode]
6859
      if self.op.disk_template in constants.DTS_INT_MIRROR:
6860
        nodes.append(snode)
6861
      has_es = lambda n: _IsExclusiveStorageEnabledNode(self.cfg, n)
6862
      if compat.any(map(has_es, nodes)):
6863
        raise errors.OpPrereqError("Disk template %s not supported with"
6864
                                   " exclusive storage" % self.op.disk_template,
6865
                                   errors.ECODE_STATE)
6866

    
6867
    nodenames = [pnode.name] + self.secondaries
6868

    
6869
    if not self.adopt_disks:
6870
      if self.op.disk_template == constants.DT_RBD:
6871
        # _CheckRADOSFreeSpace() is just a placeholder.
6872
        # Any function that checks prerequisites can be placed here.
6873
        # Check if there is enough space on the RADOS cluster.
6874
        _CheckRADOSFreeSpace()
6875
      elif self.op.disk_template == constants.DT_EXT:
6876
        # FIXME: Function that checks prereqs if needed
6877
        pass
6878
      else:
6879
        # Check lv size requirements, if not adopting
6880
        req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
6881
        _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
6882

    
6883
    elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
6884
      all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
6885
                                disk[constants.IDISK_ADOPT])
6886
                     for disk in self.disks])
6887
      if len(all_lvs) != len(self.disks):
6888
        raise errors.OpPrereqError("Duplicate volume names given for adoption",
6889
                                   errors.ECODE_INVAL)
6890
      for lv_name in all_lvs:
6891
        try:
6892
          # FIXME: lv_name here is "vg/lv" need to ensure that other calls
6893
          # to ReserveLV uses the same syntax
6894
          self.cfg.ReserveLV(lv_name, self.proc.GetECId())
6895
        except errors.ReservationError:
6896
          raise errors.OpPrereqError("LV named %s used by another instance" %
6897
                                     lv_name, errors.ECODE_NOTUNIQUE)
6898

    
6899
      vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
6900
      vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
6901

    
6902
      node_lvs = self.rpc.call_lv_list([pnode.name],
6903
                                       vg_names.payload.keys())[pnode.name]
6904
      node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
6905
      node_lvs = node_lvs.payload
6906

    
6907
      delta = all_lvs.difference(node_lvs.keys())
6908
      if delta:
6909
        raise errors.OpPrereqError("Missing logical volume(s): %s" %
6910
                                   utils.CommaJoin(delta),
6911
                                   errors.ECODE_INVAL)
6912
      online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
6913
      if online_lvs:
6914
        raise errors.OpPrereqError("Online logical volumes found, cannot"
6915
                                   " adopt: %s" % utils.CommaJoin(online_lvs),
6916
                                   errors.ECODE_STATE)
6917
      # update the size of disk based on what is found
6918
      for dsk in self.disks:
6919
        dsk[constants.IDISK_SIZE] = \
6920
          int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
6921
                                        dsk[constants.IDISK_ADOPT])][0]))
6922

    
6923
    elif self.op.disk_template == constants.DT_BLOCK:
6924
      # Normalize and de-duplicate device paths
6925
      all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
6926
                       for disk in self.disks])
6927
      if len(all_disks) != len(self.disks):
6928
        raise errors.OpPrereqError("Duplicate disk names given for adoption",
6929
                                   errors.ECODE_INVAL)
6930
      baddisks = [d for d in all_disks
6931
                  if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
6932
      if baddisks:
6933
        raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
6934
                                   " cannot be adopted" %
6935
                                   (utils.CommaJoin(baddisks),
6936
                                    constants.ADOPTABLE_BLOCKDEV_ROOT),
6937
                                   errors.ECODE_INVAL)
6938

    
6939
      node_disks = self.rpc.call_bdev_sizes([pnode.name],
6940
                                            list(all_disks))[pnode.name]
6941
      node_disks.Raise("Cannot get block device information from node %s" %
6942
                       pnode.name)
6943
      node_disks = node_disks.payload
6944
      delta = all_disks.difference(node_disks.keys())
6945
      if delta:
6946
        raise errors.OpPrereqError("Missing block device(s): %s" %
6947
                                   utils.CommaJoin(delta),
6948
                                   errors.ECODE_INVAL)
6949
      for dsk in self.disks:
6950
        dsk[constants.IDISK_SIZE] = \
6951
          int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
6952

    
6953
    # Verify instance specs
6954
    spindle_use = self.be_full.get(constants.BE_SPINDLE_USE, None)
6955
    ispec = {
6956
      constants.ISPEC_MEM_SIZE: self.be_full.get(constants.BE_MAXMEM, None),
6957
      constants.ISPEC_CPU_COUNT: self.be_full.get(constants.BE_VCPUS, None),
6958
      constants.ISPEC_DISK_COUNT: len(self.disks),
6959
      constants.ISPEC_DISK_SIZE: [disk[constants.IDISK_SIZE]
6960
                                  for disk in self.disks],
6961
      constants.ISPEC_NIC_COUNT: len(self.nics),
6962
      constants.ISPEC_SPINDLE_USE: spindle_use,
6963
      }
6964

    
6965
    group_info = self.cfg.GetNodeGroup(pnode.group)
6966
    ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group_info)
6967
    res = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec,
6968
                                               self.op.disk_template)
6969
    if not self.op.ignore_ipolicy and res:
6970
      msg = ("Instance allocation to group %s (%s) violates policy: %s" %
6971
             (pnode.group, group_info.name, utils.CommaJoin(res)))
6972
      raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
6973

    
6974
    _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
6975

    
6976
    _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
6977
    # check OS parameters (remotely)
6978
    _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
6979

    
6980
    _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
6981

    
6982
    #TODO: _CheckExtParams (remotely)
6983
    # Check parameters for extstorage
6984

    
6985
    # memory check on primary node
6986
    #TODO(dynmem): use MINMEM for checking
6987
    if self.op.start:
6988
      _CheckNodeFreeMemory(self, self.pnode.name,
6989
                           "creating instance %s" % self.op.instance_name,
6990
                           self.be_full[constants.BE_MAXMEM],
6991
                           self.op.hypervisor)
6992

    
6993
    self.dry_run_result = list(nodenames)
6994

    
6995
  def Exec(self, feedback_fn):
6996
    """Create and add the instance to the cluster.
6997

6998
    """
6999
    instance = self.op.instance_name
7000
    pnode_name = self.pnode.name
7001

    
7002
    assert not (self.owned_locks(locking.LEVEL_NODE_RES) -
7003
                self.owned_locks(locking.LEVEL_NODE)), \
7004
      "Node locks differ from node resource locks"
7005
    assert not self.glm.is_owned(locking.LEVEL_NODE_ALLOC)
7006

    
7007
    ht_kind = self.op.hypervisor
7008
    if ht_kind in constants.HTS_REQ_PORT:
7009
      network_port = self.cfg.AllocatePort()
7010
    else:
7011
      network_port = None
7012

    
7013
    # This is ugly but we got a chicken-egg problem here
7014
    # We can only take the group disk parameters, as the instance
7015
    # has no disks yet (we are generating them right here).
7016
    node = self.cfg.GetNodeInfo(pnode_name)
7017
    nodegroup = self.cfg.GetNodeGroup(node.group)
7018
    disks = _GenerateDiskTemplate(self,
7019
                                  self.op.disk_template,
7020
                                  instance, pnode_name,
7021
                                  self.secondaries,
7022
                                  self.disks,
7023
                                  self.instance_file_storage_dir,
7024
                                  self.op.file_driver,
7025
                                  0,
7026
                                  feedback_fn,
7027
                                  self.cfg.GetGroupDiskParams(nodegroup))
7028

    
7029
    iobj = objects.Instance(name=instance, os=self.op.os_type,
7030
                            primary_node=pnode_name,
7031
                            nics=self.nics, disks=disks,
7032
                            disk_template=self.op.disk_template,
7033
                            admin_state=constants.ADMINST_DOWN,
7034
                            network_port=network_port,
7035
                            beparams=self.op.beparams,
7036
                            hvparams=self.op.hvparams,
7037
                            hypervisor=self.op.hypervisor,
7038
                            osparams=self.op.osparams,
7039
                            )
7040

    
7041
    if self.op.tags:
7042
      for tag in self.op.tags:
7043
        iobj.AddTag(tag)
7044

    
7045
    if self.adopt_disks:
7046
      if self.op.disk_template == constants.DT_PLAIN:
7047
        # rename LVs to the newly-generated names; we need to construct
7048
        # 'fake' LV disks with the old data, plus the new unique_id
7049
        tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
7050
        rename_to = []
7051
        for t_dsk, a_dsk in zip(tmp_disks, self.disks):
7052
          rename_to.append(t_dsk.logical_id)
7053
          t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
7054
          self.cfg.SetDiskID(t_dsk, pnode_name)
7055
        result = self.rpc.call_blockdev_rename(pnode_name,
7056
                                               zip(tmp_disks, rename_to))
7057
        result.Raise("Failed to rename adoped LVs")
7058
    else:
7059
      feedback_fn("* creating instance disks...")
7060
      try:
7061
        _CreateDisks(self, iobj)
7062
      except errors.OpExecError:
7063
        self.LogWarning("Device creation failed")
7064
        self.cfg.ReleaseDRBDMinors(instance)
7065
        raise
7066

    
7067
    feedback_fn("adding instance %s to cluster config" % instance)
7068

    
7069
    self.cfg.AddInstance(iobj, self.proc.GetECId())
7070

    
7071
    # Declare that we don't want to remove the instance lock anymore, as we've
7072
    # added the instance to the config
7073
    del self.remove_locks[locking.LEVEL_INSTANCE]
7074

    
7075
    if self.op.mode == constants.INSTANCE_IMPORT:
7076
      # Release unused nodes
7077
      _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
7078
    else:
7079
      # Release all nodes
7080
      _ReleaseLocks(self, locking.LEVEL_NODE)
7081

    
7082
    disk_abort = False
7083
    if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
7084
      feedback_fn("* wiping instance disks...")
7085
      try:
7086
        _WipeDisks(self, iobj)
7087
      except errors.OpExecError, err:
7088
        logging.exception("Wiping disks failed")
7089
        self.LogWarning("Wiping instance disks failed (%s)", err)
7090
        disk_abort = True
7091

    
7092
    if disk_abort:
7093
      # Something is already wrong with the disks, don't do anything else
7094
      pass
7095
    elif self.op.wait_for_sync:
7096
      disk_abort = not _WaitForSync(self, iobj)
7097
    elif iobj.disk_template in constants.DTS_INT_MIRROR:
7098
      # make sure the disks are not degraded (still sync-ing is ok)
7099
      feedback_fn("* checking mirrors status")
7100
      disk_abort = not _WaitForSync(self, iobj, oneshot=True)
7101
    else:
7102
      disk_abort = False
7103

    
7104
    if disk_abort:
7105
      _RemoveDisks(self, iobj)
7106
      self.cfg.RemoveInstance(iobj.name)
7107
      # Make sure the instance lock gets removed
7108
      self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
7109
      raise errors.OpExecError("There are some degraded disks for"
7110
                               " this instance")
7111

    
7112
    # Release all node resource locks
7113
    _ReleaseLocks(self, locking.LEVEL_NODE_RES)
7114

    
7115
    if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
7116
      # we need to set the disks ID to the primary node, since the
7117
      # preceding code might or might have not done it, depending on
7118
      # disk template and other options
7119
      for disk in iobj.disks:
7120
        self.cfg.SetDiskID(disk, pnode_name)
7121
      if self.op.mode == constants.INSTANCE_CREATE:
7122
        if not self.op.no_install:
7123
          pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
7124
                        not self.op.wait_for_sync)
7125
          if pause_sync:
7126
            feedback_fn("* pausing disk sync to install instance OS")
7127
            result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
7128
                                                              (iobj.disks,
7129
                                                               iobj), True)
7130
            for idx, success in enumerate(result.payload):
7131
              if not success:
7132
                logging.warn("pause-sync of instance %s for disk %d failed",
7133
                             instance, idx)
7134

    
7135
          feedback_fn("* running the instance OS create scripts...")
7136
          # FIXME: pass debug option from opcode to backend
7137
          os_add_result = \
7138
            self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
7139
                                          self.op.debug_level)
7140
          if pause_sync:
7141
            feedback_fn("* resuming disk sync")
7142
            result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
7143
                                                              (iobj.disks,
7144
                                                               iobj), False)
7145
            for idx, success in enumerate(result.payload):
7146
              if not success:
7147
                logging.warn("resume-sync of instance %s for disk %d failed",
7148
                             instance, idx)
7149

    
7150
          os_add_result.Raise("Could not add os for instance %s"
7151
                              " on node %s" % (instance, pnode_name))
7152

    
7153
      else:
7154
        if self.op.mode == constants.INSTANCE_IMPORT:
7155
          feedback_fn("* running the instance OS import scripts...")
7156

    
7157
          transfers = []
7158

    
7159
          for idx, image in enumerate(self.src_images):
7160
            if not image:
7161
              continue
7162

    
7163
            # FIXME: pass debug option from opcode to backend
7164
            dt = masterd.instance.DiskTransfer("disk/%s" % idx,
7165
                                               constants.IEIO_FILE, (image, ),
7166
                                               constants.IEIO_SCRIPT,
7167
                                               (iobj.disks[idx], idx),
7168
                                               None)
7169
            transfers.append(dt)
7170

    
7171
          import_result = \
7172
            masterd.instance.TransferInstanceData(self, feedback_fn,
7173
                                                  self.op.src_node, pnode_name,
7174
                                                  self.pnode.secondary_ip,
7175
                                                  iobj, transfers)
7176
          if not compat.all(import_result):
7177
            self.LogWarning("Some disks for instance %s on node %s were not"
7178
                            " imported successfully" % (instance, pnode_name))
7179

    
7180
          rename_from = self._old_instance_name
7181

    
7182
        elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
7183
          feedback_fn("* preparing remote import...")
7184
          # The source cluster will stop the instance before attempting to make
7185
          # a connection. In some cases stopping an instance can take a long
7186
          # time, hence the shutdown timeout is added to the connection
7187
          # timeout.
7188
          connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
7189
                             self.op.source_shutdown_timeout)
7190
          timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
7191

    
7192
          assert iobj.primary_node == self.pnode.name
7193
          disk_results = \
7194
            masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
7195
                                          self.source_x509_ca,
7196
                                          self._cds, timeouts)
7197
          if not compat.all(disk_results):
7198
            # TODO: Should the instance still be started, even if some disks
7199
            # failed to import (valid for local imports, too)?
7200
            self.LogWarning("Some disks for instance %s on node %s were not"
7201
                            " imported successfully" % (instance, pnode_name))
7202

    
7203
          rename_from = self.source_instance_name
7204

    
7205
        else:
7206
          # also checked in the prereq part
7207
          raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
7208
                                       % self.op.mode)
7209

    
7210
        # Run rename script on newly imported instance
7211
        assert iobj.name == instance
7212
        feedback_fn("Running rename script for %s" % instance)
7213
        result = self.rpc.call_instance_run_rename(pnode_name, iobj,
7214
                                                   rename_from,
7215
                                                   self.op.debug_level)
7216
        if result.fail_msg:
7217
          self.LogWarning("Failed to run rename script for %s on node"
7218
                          " %s: %s" % (instance, pnode_name, result.fail_msg))
7219

    
7220
    assert not self.owned_locks(locking.LEVEL_NODE_RES)
7221

    
7222
    if self.op.start:
7223
      iobj.admin_state = constants.ADMINST_UP
7224
      self.cfg.Update(iobj, feedback_fn)
7225
      logging.info("Starting instance %s on node %s", instance, pnode_name)
7226
      feedback_fn("* starting instance...")
7227
      result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
7228
                                            False, self.op.reason)
7229
      result.Raise("Could not start instance")
7230

    
7231
    return list(iobj.all_nodes)
7232

    
7233

    
7234
class LUInstanceMultiAlloc(NoHooksLU):
7235
  """Allocates multiple instances at the same time.
7236

7237
  """
7238
  REQ_BGL = False
7239

    
7240
  def CheckArguments(self):
7241
    """Check arguments.
7242

7243
    """
7244
    nodes = []
7245
    for inst in self.op.instances:
7246
      if inst.iallocator is not None:
7247
        raise errors.OpPrereqError("iallocator are not allowed to be set on"
7248
                                   " instance objects", errors.ECODE_INVAL)
7249
      nodes.append(bool(inst.pnode))
7250
      if inst.disk_template in constants.DTS_INT_MIRROR:
7251
        nodes.append(bool(inst.snode))
7252

    
7253
    has_nodes = compat.any(nodes)
7254
    if compat.all(nodes) ^ has_nodes:
7255
      raise errors.OpPrereqError("There are instance objects providing"
7256
                                 " pnode/snode while others do not",
7257
                                 errors.ECODE_INVAL)
7258

    
7259
    if self.op.iallocator is None:
7260
      default_iallocator = self.cfg.GetDefaultIAllocator()
7261
      if default_iallocator and has_nodes:
7262
        self.op.iallocator = default_iallocator
7263
      else:
7264
        raise errors.OpPrereqError("No iallocator or nodes on the instances"
7265
                                   " given and no cluster-wide default"
7266
                                   " iallocator found; please specify either"
7267
                                   " an iallocator or nodes on the instances"
7268
                                   " or set a cluster-wide default iallocator",
7269
                                   errors.ECODE_INVAL)
7270

    
7271
    _CheckOpportunisticLocking(self.op)
7272

    
7273
    dups = utils.FindDuplicates([op.instance_name for op in self.op.instances])
7274
    if dups:
7275
      raise errors.OpPrereqError("There are duplicate instance names: %s" %
7276
                                 utils.CommaJoin(dups), errors.ECODE_INVAL)
7277

    
7278
  def ExpandNames(self):
7279
    """Calculate the locks.
7280

7281
    """
7282
    self.share_locks = _ShareAll()
7283
    self.needed_locks = {
7284
      # iallocator will select nodes and even if no iallocator is used,
7285
      # collisions with LUInstanceCreate should be avoided
7286
      locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
7287
      }
7288

    
7289
    if self.op.iallocator:
7290
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7291
      self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
7292

    
7293
      if self.op.opportunistic_locking:
7294
        self.opportunistic_locks[locking.LEVEL_NODE] = True
7295
        self.opportunistic_locks[locking.LEVEL_NODE_RES] = True
7296
    else:
7297
      nodeslist = []
7298
      for inst in self.op.instances:
7299
        inst.pnode = _ExpandNodeName(self.cfg, inst.pnode)
7300
        nodeslist.append(inst.pnode)
7301
        if inst.snode is not None:
7302
          inst.snode = _ExpandNodeName(self.cfg, inst.snode)
7303
          nodeslist.append(inst.snode)
7304

    
7305
      self.needed_locks[locking.LEVEL_NODE] = nodeslist
7306
      # Lock resources of instance's primary and secondary nodes (copy to
7307
      # prevent accidential modification)
7308
      self.needed_locks[locking.LEVEL_NODE_RES] = list(nodeslist)
7309

    
7310
  def CheckPrereq(self):
7311
    """Check prerequisite.
7312

7313
    """
7314
    cluster = self.cfg.GetClusterInfo()
7315
    default_vg = self.cfg.GetVGName()
7316
    ec_id = self.proc.GetECId()
7317

    
7318
    if self.op.opportunistic_locking:
7319
      # Only consider nodes for which a lock is held
7320
      node_whitelist = list(self.owned_locks(locking.LEVEL_NODE))
7321
    else:
7322
      node_whitelist = None
7323

    
7324
    insts = [_CreateInstanceAllocRequest(op, _ComputeDisks(op, default_vg),
7325
                                         _ComputeNics(op, cluster, None,
7326
                                                      self.cfg, ec_id),
7327
                                         _ComputeFullBeParams(op, cluster),
7328
                                         node_whitelist)
7329
             for op in self.op.instances]
7330

    
7331
    req = iallocator.IAReqMultiInstanceAlloc(instances=insts)
7332
    ial = iallocator.IAllocator(self.cfg, self.rpc, req)
7333

    
7334
    ial.Run(self.op.iallocator)
7335

    
7336
    if not ial.success:
7337
      raise errors.OpPrereqError("Can't compute nodes using"
7338
                                 " iallocator '%s': %s" %
7339
                                 (self.op.iallocator, ial.info),
7340
                                 errors.ECODE_NORES)
7341

    
7342
    self.ia_result = ial.result
7343

    
7344
    if self.op.dry_run:
7345
      self.dry_run_result = objects.FillDict(self._ConstructPartialResult(), {
7346
        constants.JOB_IDS_KEY: [],
7347
        })
7348

    
7349
  def _ConstructPartialResult(self):
7350
    """Contructs the partial result.
7351

7352
    """
7353
    (allocatable, failed) = self.ia_result
7354
    return {
7355
      opcodes.OpInstanceMultiAlloc.ALLOCATABLE_KEY:
7356
        map(compat.fst, allocatable),
7357
      opcodes.OpInstanceMultiAlloc.FAILED_KEY: failed,
7358
      }
7359

    
7360
  def Exec(self, feedback_fn):
7361
    """Executes the opcode.
7362

7363
    """
7364
    op2inst = dict((op.instance_name, op) for op in self.op.instances)
7365
    (allocatable, failed) = self.ia_result
7366

    
7367
    jobs = []
7368
    for (name, nodes) in allocatable:
7369
      op = op2inst.pop(name)
7370

    
7371
      if len(nodes) > 1:
7372
        (op.pnode, op.snode) = nodes
7373
      else:
7374
        (op.pnode,) = nodes
7375

    
7376
      jobs.append([op])
7377

    
7378
    missing = set(op2inst.keys()) - set(failed)
7379
    assert not missing, \
7380
      "Iallocator did return incomplete result: %s" % utils.CommaJoin(missing)
7381

    
7382
    return ResultWithJobs(jobs, **self._ConstructPartialResult())
7383

    
7384

    
7385
def _CheckRADOSFreeSpace():
7386
  """Compute disk size requirements inside the RADOS cluster.
7387

7388
  """
7389
  # For the RADOS cluster we assume there is always enough space.
7390
  pass
7391

    
7392

    
7393
class LUInstanceConsole(NoHooksLU):
7394
  """Connect to an instance's console.
7395

7396
  This is somewhat special in that it returns the command line that
7397
  you need to run on the master node in order to connect to the
7398
  console.
7399

7400
  """
7401
  REQ_BGL = False
7402

    
7403
  def ExpandNames(self):
7404
    self.share_locks = _ShareAll()
7405
    self._ExpandAndLockInstance()
7406

    
7407
  def CheckPrereq(self):
7408
    """Check prerequisites.
7409

7410
    This checks that the instance is in the cluster.
7411

7412
    """
7413
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7414
    assert self.instance is not None, \
7415
      "Cannot retrieve locked instance %s" % self.op.instance_name
7416
    _CheckNodeOnline(self, self.instance.primary_node)
7417

    
7418
  def Exec(self, feedback_fn):
7419
    """Connect to the console of an instance
7420

7421
    """
7422
    instance = self.instance
7423
    node = instance.primary_node
7424

    
7425
    node_insts = self.rpc.call_instance_list([node],
7426
                                             [instance.hypervisor])[node]
7427
    node_insts.Raise("Can't get node information from %s" % node)
7428

    
7429
    if instance.name not in node_insts.payload:
7430
      if instance.admin_state == constants.ADMINST_UP:
7431
        state = constants.INSTST_ERRORDOWN
7432
      elif instance.admin_state == constants.ADMINST_DOWN:
7433
        state = constants.INSTST_ADMINDOWN
7434
      else:
7435
        state = constants.INSTST_ADMINOFFLINE
7436
      raise errors.OpExecError("Instance %s is not running (state %s)" %
7437
                               (instance.name, state))
7438

    
7439
    logging.debug("Connecting to console of %s on %s", instance.name, node)
7440

    
7441
    return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
7442

    
7443

    
7444
def _GetInstanceConsole(cluster, instance):
7445
  """Returns console information for an instance.
7446

7447
  @type cluster: L{objects.Cluster}
7448
  @type instance: L{objects.Instance}
7449
  @rtype: dict
7450

7451
  """
7452
  hyper = hypervisor.GetHypervisorClass(instance.hypervisor)
7453
  # beparams and hvparams are passed separately, to avoid editing the
7454
  # instance and then saving the defaults in the instance itself.
7455
  hvparams = cluster.FillHV(instance)
7456
  beparams = cluster.FillBE(instance)
7457
  console = hyper.GetInstanceConsole(instance, hvparams, beparams)
7458

    
7459
  assert console.instance == instance.name
7460
  assert console.Validate()
7461

    
7462
  return console.ToDict()
7463

    
7464

    
7465
class LUInstanceReplaceDisks(LogicalUnit):
7466
  """Replace the disks of an instance.
7467

7468
  """
7469
  HPATH = "mirrors-replace"
7470
  HTYPE = constants.HTYPE_INSTANCE
7471
  REQ_BGL = False
7472

    
7473
  def CheckArguments(self):
7474
    """Check arguments.
7475

7476
    """
7477
    remote_node = self.op.remote_node
7478
    ialloc = self.op.iallocator
7479
    if self.op.mode == constants.REPLACE_DISK_CHG:
7480
      if remote_node is None and ialloc is None:
7481
        raise errors.OpPrereqError("When changing the secondary either an"
7482
                                   " iallocator script must be used or the"
7483
                                   " new node given", errors.ECODE_INVAL)
7484
      else:
7485
        _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
7486

    
7487
    elif remote_node is not None or ialloc is not None:
7488
      # Not replacing the secondary
7489
      raise errors.OpPrereqError("The iallocator and new node options can"
7490
                                 " only be used when changing the"
7491
                                 " secondary node", errors.ECODE_INVAL)
7492

    
7493
  def ExpandNames(self):
7494
    self._ExpandAndLockInstance()
7495

    
7496
    assert locking.LEVEL_NODE not in self.needed_locks
7497
    assert locking.LEVEL_NODE_RES not in self.needed_locks
7498
    assert locking.LEVEL_NODEGROUP not in self.needed_locks
7499

    
7500
    assert self.op.iallocator is None or self.op.remote_node is None, \
7501
      "Conflicting options"
7502

    
7503
    if self.op.remote_node is not None:
7504
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
7505

    
7506
      # Warning: do not remove the locking of the new secondary here
7507
      # unless DRBD8.AddChildren is changed to work in parallel;
7508
      # currently it doesn't since parallel invocations of
7509
      # FindUnusedMinor will conflict
7510
      self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
7511
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7512
    else:
7513
      self.needed_locks[locking.LEVEL_NODE] = []
7514
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7515

    
7516
      if self.op.iallocator is not None:
7517
        # iallocator will select a new node in the same group
7518
        self.needed_locks[locking.LEVEL_NODEGROUP] = []
7519
        self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
7520

    
7521
    self.needed_locks[locking.LEVEL_NODE_RES] = []
7522

    
7523
    self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
7524
                                   self.op.iallocator, self.op.remote_node,
7525
                                   self.op.disks, self.op.early_release,
7526
                                   self.op.ignore_ipolicy)
7527

    
7528
    self.tasklets = [self.replacer]
7529

    
7530
  def DeclareLocks(self, level):
7531
    if level == locking.LEVEL_NODEGROUP:
7532
      assert self.op.remote_node is None
7533
      assert self.op.iallocator is not None
7534
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
7535

    
7536
      self.share_locks[locking.LEVEL_NODEGROUP] = 1
7537
      # Lock all groups used by instance optimistically; this requires going
7538
      # via the node before it's locked, requiring verification later on
7539
      self.needed_locks[locking.LEVEL_NODEGROUP] = \
7540
        self.cfg.GetInstanceNodeGroups(self.op.instance_name)
7541

    
7542
    elif level == locking.LEVEL_NODE:
7543
      if self.op.iallocator is not None:
7544
        assert self.op.remote_node is None
7545
        assert not self.needed_locks[locking.LEVEL_NODE]
7546
        assert locking.NAL in self.owned_locks(locking.LEVEL_NODE_ALLOC)
7547

    
7548
        # Lock member nodes of all locked groups
7549
        self.needed_locks[locking.LEVEL_NODE] = \
7550
            [node_name
7551
             for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
7552
             for node_name in self.cfg.GetNodeGroup(group_uuid).members]
7553
      else:
7554
        assert not self.glm.is_owned(locking.LEVEL_NODE_ALLOC)
7555

    
7556
        self._LockInstancesNodes()
7557

    
7558
    elif level == locking.LEVEL_NODE_RES:
7559
      # Reuse node locks
7560
      self.needed_locks[locking.LEVEL_NODE_RES] = \
7561
        self.needed_locks[locking.LEVEL_NODE]
7562

    
7563
  def BuildHooksEnv(self):
7564
    """Build hooks env.
7565

7566
    This runs on the master, the primary and all the secondaries.
7567

7568
    """
7569
    instance = self.replacer.instance
7570
    env = {
7571
      "MODE": self.op.mode,
7572
      "NEW_SECONDARY": self.op.remote_node,
7573
      "OLD_SECONDARY": instance.secondary_nodes[0],
7574
      }
7575
    env.update(_BuildInstanceHookEnvByObject(self, instance))
7576
    return env
7577

    
7578
  def BuildHooksNodes(self):
7579
    """Build hooks nodes.
7580

7581
    """
7582
    instance = self.replacer.instance
7583
    nl = [
7584
      self.cfg.GetMasterNode(),
7585
      instance.primary_node,
7586
      ]
7587
    if self.op.remote_node is not None:
7588
      nl.append(self.op.remote_node)
7589
    return nl, nl
7590

    
7591
  def CheckPrereq(self):
7592
    """Check prerequisites.
7593

7594
    """
7595
    assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
7596
            self.op.iallocator is None)
7597

    
7598
    # Verify if node group locks are still correct
7599
    owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
7600
    if owned_groups:
7601
      _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
7602

    
7603
    return LogicalUnit.CheckPrereq(self)
7604

    
7605

    
7606
class TLReplaceDisks(Tasklet):
7607
  """Replaces disks for an instance.
7608

7609
  Note: Locking is not within the scope of this class.
7610

7611
  """
7612
  def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
7613
               disks, early_release, ignore_ipolicy):
7614
    """Initializes this class.
7615

7616
    """
7617
    Tasklet.__init__(self, lu)
7618

    
7619
    # Parameters
7620
    self.instance_name = instance_name
7621
    self.mode = mode
7622
    self.iallocator_name = iallocator_name
7623
    self.remote_node = remote_node
7624
    self.disks = disks
7625
    self.early_release = early_release
7626
    self.ignore_ipolicy = ignore_ipolicy
7627

    
7628
    # Runtime data
7629
    self.instance = None
7630
    self.new_node = None
7631
    self.target_node = None
7632
    self.other_node = None
7633
    self.remote_node_info = None
7634
    self.node_secondary_ip = None
7635

    
7636
  @staticmethod
7637
  def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
7638
    """Compute a new secondary node using an IAllocator.
7639

7640
    """
7641
    req = iallocator.IAReqRelocate(name=instance_name,
7642
                                   relocate_from=list(relocate_from))
7643
    ial = iallocator.IAllocator(lu.cfg, lu.rpc, req)
7644

    
7645
    ial.Run(iallocator_name)
7646

    
7647
    if not ial.success:
7648
      raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
7649
                                 " %s" % (iallocator_name, ial.info),
7650
                                 errors.ECODE_NORES)
7651

    
7652
    remote_node_name = ial.result[0]
7653

    
7654
    lu.LogInfo("Selected new secondary for instance '%s': %s",
7655
               instance_name, remote_node_name)
7656

    
7657
    return remote_node_name
7658

    
7659
  def _FindFaultyDisks(self, node_name):
7660
    """Wrapper for L{_FindFaultyInstanceDisks}.
7661

7662
    """
7663
    return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
7664
                                    node_name, True)
7665

    
7666
  def _CheckDisksActivated(self, instance):
7667
    """Checks if the instance disks are activated.
7668

7669
    @param instance: The instance to check disks
7670
    @return: True if they are activated, False otherwise
7671

7672
    """
7673
    nodes = instance.all_nodes
7674

    
7675
    for idx, dev in enumerate(instance.disks):
7676
      for node in nodes:
7677
        self.lu.LogInfo("Checking disk/%d on %s", idx, node)
7678
        self.cfg.SetDiskID(dev, node)
7679

    
7680
        result = _BlockdevFind(self, node, dev, instance)
7681

    
7682
        if result.offline:
7683
          continue
7684
        elif result.fail_msg or not result.payload:
7685
          return False
7686

    
7687
    return True
7688

    
7689
  def CheckPrereq(self):
7690
    """Check prerequisites.
7691

7692
    This checks that the instance is in the cluster.
7693

7694
    """
7695
    self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
7696
    assert instance is not None, \
7697
      "Cannot retrieve locked instance %s" % self.instance_name
7698

    
7699
    if instance.disk_template != constants.DT_DRBD8:
7700
      raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
7701
                                 " instances", errors.ECODE_INVAL)
7702

    
7703
    if len(instance.secondary_nodes) != 1:
7704
      raise errors.OpPrereqError("The instance has a strange layout,"
7705
                                 " expected one secondary but found %d" %
7706
                                 len(instance.secondary_nodes),
7707
                                 errors.ECODE_FAULT)
7708

    
7709
    instance = self.instance
7710
    secondary_node = instance.secondary_nodes[0]
7711

    
7712
    if self.iallocator_name is None:
7713
      remote_node = self.remote_node
7714
    else:
7715
      remote_node = self._RunAllocator(self.lu, self.iallocator_name,
7716
                                       instance.name, instance.secondary_nodes)
7717

    
7718
    if remote_node is None:
7719
      self.remote_node_info = None
7720
    else:
7721
      assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
7722
             "Remote node '%s' is not locked" % remote_node
7723

    
7724
      self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
7725
      assert self.remote_node_info is not None, \
7726
        "Cannot retrieve locked node %s" % remote_node
7727

    
7728
    if remote_node == self.instance.primary_node:
7729
      raise errors.OpPrereqError("The specified node is the primary node of"
7730
                                 " the instance", errors.ECODE_INVAL)
7731

    
7732
    if remote_node == secondary_node:
7733
      raise errors.OpPrereqError("The specified node is already the"
7734
                                 " secondary node of the instance",
7735
                                 errors.ECODE_INVAL)
7736

    
7737
    if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
7738
                                    constants.REPLACE_DISK_CHG):
7739
      raise errors.OpPrereqError("Cannot specify disks to be replaced",
7740
                                 errors.ECODE_INVAL)
7741

    
7742
    if self.mode == constants.REPLACE_DISK_AUTO:
7743
      if not self._CheckDisksActivated(instance):
7744
        raise errors.OpPrereqError("Please run activate-disks on instance %s"
7745
                                   " first" % self.instance_name,
7746
                                   errors.ECODE_STATE)
7747
      faulty_primary = self._FindFaultyDisks(instance.primary_node)
7748
      faulty_secondary = self._FindFaultyDisks(secondary_node)
7749

    
7750
      if faulty_primary and faulty_secondary:
7751
        raise errors.OpPrereqError("Instance %s has faulty disks on more than"
7752
                                   " one node and can not be repaired"
7753
                                   " automatically" % self.instance_name,
7754
                                   errors.ECODE_STATE)
7755

    
7756
      if faulty_primary:
7757
        self.disks = faulty_primary
7758
        self.target_node = instance.primary_node
7759
        self.other_node = secondary_node
7760
        check_nodes = [self.target_node, self.other_node]
7761
      elif faulty_secondary:
7762
        self.disks = faulty_secondary
7763
        self.target_node = secondary_node
7764
        self.other_node = instance.primary_node
7765
        check_nodes = [self.target_node, self.other_node]
7766
      else:
7767
        self.disks = []
7768
        check_nodes = []
7769

    
7770
    else:
7771
      # Non-automatic modes
7772
      if self.mode == constants.REPLACE_DISK_PRI:
7773
        self.target_node = instance.primary_node
7774
        self.other_node = secondary_node
7775
        check_nodes = [self.target_node, self.other_node]
7776

    
7777
      elif self.mode == constants.REPLACE_DISK_SEC:
7778
        self.target_node = secondary_node
7779
        self.other_node = instance.primary_node
7780
        check_nodes = [self.target_node, self.other_node]
7781

    
7782
      elif self.mode == constants.REPLACE_DISK_CHG:
7783
        self.new_node = remote_node
7784
        self.other_node = instance.primary_node
7785
        self.target_node = secondary_node
7786
        check_nodes = [self.new_node, self.other_node]
7787

    
7788
        _CheckNodeNotDrained(self.lu, remote_node)
7789
        _CheckNodeVmCapable(self.lu, remote_node)
7790

    
7791
        old_node_info = self.cfg.GetNodeInfo(secondary_node)
7792
        assert old_node_info is not None
7793
        if old_node_info.offline and not self.early_release:
7794
          # doesn't make sense to delay the release
7795
          self.early_release = True
7796
          self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
7797
                          " early-release mode", secondary_node)
7798

    
7799
      else:
7800
        raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
7801
                                     self.mode)
7802

    
7803
      # If not specified all disks should be replaced
7804
      if not self.disks:
7805
        self.disks = range(len(self.instance.disks))
7806

    
7807
    # TODO: This is ugly, but right now we can't distinguish between internal
7808
    # submitted opcode and external one. We should fix that.
7809
    if self.remote_node_info:
7810
      # We change the node, lets verify it still meets instance policy
7811
      new_group_info = self.cfg.GetNodeGroup(self.remote_node_info.group)
7812
      cluster = self.cfg.GetClusterInfo()
7813
      ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
7814
                                                              new_group_info)
7815
      _CheckTargetNodeIPolicy(self, ipolicy, instance, self.remote_node_info,
7816
                              self.cfg, ignore=self.ignore_ipolicy)
7817

    
7818
    for node in check_nodes:
7819
      _CheckNodeOnline(self.lu, node)
7820

    
7821
    touched_nodes = frozenset(node_name for node_name in [self.new_node,
7822
                                                          self.other_node,
7823
                                                          self.target_node]
7824
                              if node_name is not None)
7825

    
7826
    # Release unneeded node and node resource locks
7827
    _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
7828
    _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, keep=touched_nodes)
7829
    _ReleaseLocks(self.lu, locking.LEVEL_NODE_ALLOC)
7830

    
7831
    # Release any owned node group
7832
    _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
7833

    
7834
    # Check whether disks are valid
7835
    for disk_idx in self.disks:
7836
      instance.FindDisk(disk_idx)
7837

    
7838
    # Get secondary node IP addresses
7839
    self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
7840
                                  in self.cfg.GetMultiNodeInfo(touched_nodes))
7841

    
7842
  def Exec(self, feedback_fn):
7843
    """Execute disk replacement.
7844

7845
    This dispatches the disk replacement to the appropriate handler.
7846

7847
    """
7848
    if __debug__:
7849
      # Verify owned locks before starting operation
7850
      owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
7851
      assert set(owned_nodes) == set(self.node_secondary_ip), \
7852
          ("Incorrect node locks, owning %s, expected %s" %
7853
           (owned_nodes, self.node_secondary_ip.keys()))
7854
      assert (self.lu.owned_locks(locking.LEVEL_NODE) ==
7855
              self.lu.owned_locks(locking.LEVEL_NODE_RES))
7856
      assert not self.lu.glm.is_owned(locking.LEVEL_NODE_ALLOC)
7857

    
7858
      owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
7859
      assert list(owned_instances) == [self.instance_name], \
7860
          "Instance '%s' not locked" % self.instance_name
7861

    
7862
      assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
7863
          "Should not own any node group lock at this point"
7864

    
7865
    if not self.disks:
7866
      feedback_fn("No disks need replacement for instance '%s'" %
7867
                  self.instance.name)
7868
      return
7869

    
7870
    feedback_fn("Replacing disk(s) %s for instance '%s'" %
7871
                (utils.CommaJoin(self.disks), self.instance.name))
7872
    feedback_fn("Current primary node: %s" % self.instance.primary_node)
7873
    feedback_fn("Current seconary node: %s" %
7874
                utils.CommaJoin(self.instance.secondary_nodes))
7875

    
7876
    activate_disks = (self.instance.admin_state != constants.ADMINST_UP)
7877

    
7878
    # Activate the instance disks if we're replacing them on a down instance
7879
    if activate_disks:
7880
      _StartInstanceDisks(self.lu, self.instance, True)
7881

    
7882
    try:
7883
      # Should we replace the secondary node?
7884
      if self.new_node is not None:
7885
        fn = self._ExecDrbd8Secondary
7886
      else:
7887
        fn = self._ExecDrbd8DiskOnly
7888

    
7889
      result = fn(feedback_fn)
7890
    finally:
7891
      # Deactivate the instance disks if we're replacing them on a
7892
      # down instance
7893
      if activate_disks:
7894
        _SafeShutdownInstanceDisks(self.lu, self.instance)
7895

    
7896
    assert not self.lu.owned_locks(locking.LEVEL_NODE)
7897

    
7898
    if __debug__:
7899
      # Verify owned locks
7900
      owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE_RES)
7901
      nodes = frozenset(self.node_secondary_ip)
7902
      assert ((self.early_release and not owned_nodes) or
7903
              (not self.early_release and not (set(owned_nodes) - nodes))), \
7904
        ("Not owning the correct locks, early_release=%s, owned=%r,"
7905
         " nodes=%r" % (self.early_release, owned_nodes, nodes))
7906

    
7907
    return result
7908

    
7909
  def _CheckVolumeGroup(self, nodes):
7910
    self.lu.LogInfo("Checking volume groups")
7911

    
7912
    vgname = self.cfg.GetVGName()
7913

    
7914
    # Make sure volume group exists on all involved nodes
7915
    results = self.rpc.call_vg_list(nodes)
7916
    if not results:
7917
      raise errors.OpExecError("Can't list volume groups on the nodes")
7918

    
7919
    for node in nodes:
7920
      res = results[node]
7921
      res.Raise("Error checking node %s" % node)
7922
      if vgname not in res.payload:
7923
        raise errors.OpExecError("Volume group '%s' not found on node %s" %
7924
                                 (vgname, node))
7925

    
7926
  def _CheckDisksExistence(self, nodes):
7927
    # Check disk existence
7928
    for idx, dev in enumerate(self.instance.disks):
7929
      if idx not in self.disks:
7930
        continue
7931

    
7932
      for node in nodes:
7933
        self.lu.LogInfo("Checking disk/%d on %s", idx, node)
7934
        self.cfg.SetDiskID(dev, node)
7935

    
7936
        result = _BlockdevFind(self, node, dev, self.instance)
7937

    
7938
        msg = result.fail_msg
7939
        if msg or not result.payload:
7940
          if not msg:
7941
            msg = "disk not found"
7942
          raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
7943
                                   (idx, node, msg))
7944

    
7945
  def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
7946
    for idx, dev in enumerate(self.instance.disks):
7947
      if idx not in self.disks:
7948
        continue
7949

    
7950
      self.lu.LogInfo("Checking disk/%d consistency on node %s" %
7951
                      (idx, node_name))
7952

    
7953
      if not _CheckDiskConsistency(self.lu, self.instance, dev, node_name,
7954
                                   on_primary, ldisk=ldisk):
7955
        raise errors.OpExecError("Node %s has degraded storage, unsafe to"
7956
                                 " replace disks for instance %s" %
7957
                                 (node_name, self.instance.name))
7958

    
7959
  def _CreateNewStorage(self, node_name):
7960
    """Create new storage on the primary or secondary node.
7961

7962
    This is only used for same-node replaces, not for changing the
7963
    secondary node, hence we don't want to modify the existing disk.
7964

7965
    """
7966
    iv_names = {}
7967

    
7968
    disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
7969
    for idx, dev in enumerate(disks):
7970
      if idx not in self.disks:
7971
        continue
7972

    
7973
      self.lu.LogInfo("Adding storage on %s for disk/%d", node_name, idx)
7974

    
7975
      self.cfg.SetDiskID(dev, node_name)
7976

    
7977
      lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
7978
      names = _GenerateUniqueNames(self.lu, lv_names)
7979

    
7980
      (data_disk, meta_disk) = dev.children
7981
      vg_data = data_disk.logical_id[0]
7982
      lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
7983
                             logical_id=(vg_data, names[0]),
7984
                             params=data_disk.params)
7985
      vg_meta = meta_disk.logical_id[0]
7986
      lv_meta = objects.Disk(dev_type=constants.LD_LV,
7987
                             size=constants.DRBD_META_SIZE,
7988
                             logical_id=(vg_meta, names[1]),
7989
                             params=meta_disk.params)
7990

    
7991
      new_lvs = [lv_data, lv_meta]
7992
      old_lvs = [child.Copy() for child in dev.children]
7993
      iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
7994
      excl_stor = _IsExclusiveStorageEnabledNodeName(self.lu.cfg, node_name)
7995

    
7996
      # we pass force_create=True to force the LVM creation
7997
      for new_lv in new_lvs:
7998
        _CreateBlockDevInner(self.lu, node_name, self.instance, new_lv, True,
7999
                             _GetInstanceInfoText(self.instance), False,
8000
                             excl_stor)
8001

    
8002
    return iv_names
8003

    
8004
  def _CheckDevices(self, node_name, iv_names):
8005
    for name, (dev, _, _) in iv_names.iteritems():
8006
      self.cfg.SetDiskID(dev, node_name)
8007

    
8008
      result = _BlockdevFind(self, node_name, dev, self.instance)
8009

    
8010
      msg = result.fail_msg
8011
      if msg or not result.payload:
8012
        if not msg:
8013
          msg = "disk not found"
8014
        raise errors.OpExecError("Can't find DRBD device %s: %s" %
8015
                                 (name, msg))
8016

    
8017
      if result.payload.is_degraded:
8018
        raise errors.OpExecError("DRBD device %s is degraded!" % name)
8019

    
8020
  def _RemoveOldStorage(self, node_name, iv_names):
8021
    for name, (_, old_lvs, _) in iv_names.iteritems():
8022
      self.lu.LogInfo("Remove logical volumes for %s", name)
8023

    
8024
      for lv in old_lvs:
8025
        self.cfg.SetDiskID(lv, node_name)
8026

    
8027
        msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
8028
        if msg:
8029
          self.lu.LogWarning("Can't remove old LV: %s", msg,
8030
                             hint="remove unused LVs manually")
8031

    
8032
  def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
8033
    """Replace a disk on the primary or secondary for DRBD 8.
8034

8035
    The algorithm for replace is quite complicated:
8036

8037
      1. for each disk to be replaced:
8038

8039
        1. create new LVs on the target node with unique names
8040
        1. detach old LVs from the drbd device
8041
        1. rename old LVs to name_replaced.<time_t>
8042
        1. rename new LVs to old LVs
8043
        1. attach the new LVs (with the old names now) to the drbd device
8044

8045
      1. wait for sync across all devices
8046

8047
      1. for each modified disk:
8048

8049
        1. remove old LVs (which have the name name_replaces.<time_t>)
8050

8051
    Failures are not very well handled.
8052

8053
    """
8054
    steps_total = 6
8055

    
8056
    # Step: check device activation
8057
    self.lu.LogStep(1, steps_total, "Check device existence")
8058
    self._CheckDisksExistence([self.other_node, self.target_node])
8059
    self._CheckVolumeGroup([self.target_node, self.other_node])
8060

    
8061
    # Step: check other node consistency
8062
    self.lu.LogStep(2, steps_total, "Check peer consistency")
8063
    self._CheckDisksConsistency(self.other_node,
8064
                                self.other_node == self.instance.primary_node,
8065
                                False)
8066

    
8067
    # Step: create new storage
8068
    self.lu.LogStep(3, steps_total, "Allocate new storage")
8069
    iv_names = self._CreateNewStorage(self.target_node)
8070

    
8071
    # Step: for each lv, detach+rename*2+attach
8072
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
8073
    for dev, old_lvs, new_lvs in iv_names.itervalues():
8074
      self.lu.LogInfo("Detaching %s drbd from local storage", dev.iv_name)
8075

    
8076
      result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
8077
                                                     old_lvs)
8078
      result.Raise("Can't detach drbd from local storage on node"
8079
                   " %s for device %s" % (self.target_node, dev.iv_name))
8080
      #dev.children = []
8081
      #cfg.Update(instance)
8082

    
8083
      # ok, we created the new LVs, so now we know we have the needed
8084
      # storage; as such, we proceed on the target node to rename
8085
      # old_lv to _old, and new_lv to old_lv; note that we rename LVs
8086
      # using the assumption that logical_id == physical_id (which in
8087
      # turn is the unique_id on that node)
8088

    
8089
      # FIXME(iustin): use a better name for the replaced LVs
8090
      temp_suffix = int(time.time())
8091
      ren_fn = lambda d, suff: (d.physical_id[0],
8092
                                d.physical_id[1] + "_replaced-%s" % suff)
8093

    
8094
      # Build the rename list based on what LVs exist on the node
8095
      rename_old_to_new = []
8096
      for to_ren in old_lvs:
8097
        result = self.rpc.call_blockdev_find(self.target_node, to_ren)
8098
        if not result.fail_msg and result.payload:
8099
          # device exists
8100
          rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
8101

    
8102
      self.lu.LogInfo("Renaming the old LVs on the target node")
8103
      result = self.rpc.call_blockdev_rename(self.target_node,
8104
                                             rename_old_to_new)
8105
      result.Raise("Can't rename old LVs on node %s" % self.target_node)
8106

    
8107
      # Now we rename the new LVs to the old LVs
8108
      self.lu.LogInfo("Renaming the new LVs on the target node")
8109
      rename_new_to_old = [(new, old.physical_id)
8110
                           for old, new in zip(old_lvs, new_lvs)]
8111
      result = self.rpc.call_blockdev_rename(self.target_node,
8112
                                             rename_new_to_old)
8113
      result.Raise("Can't rename new LVs on node %s" % self.target_node)
8114

    
8115
      # Intermediate steps of in memory modifications
8116
      for old, new in zip(old_lvs, new_lvs):
8117
        new.logical_id = old.logical_id
8118
        self.cfg.SetDiskID(new, self.target_node)
8119

    
8120
      # We need to modify old_lvs so that removal later removes the
8121
      # right LVs, not the newly added ones; note that old_lvs is a
8122
      # copy here
8123
      for disk in old_lvs:
8124
        disk.logical_id = ren_fn(disk, temp_suffix)
8125
        self.cfg.SetDiskID(disk, self.target_node)
8126

    
8127
      # Now that the new lvs have the old name, we can add them to the device
8128
      self.lu.LogInfo("Adding new mirror component on %s", self.target_node)
8129
      result = self.rpc.call_blockdev_addchildren(self.target_node,
8130
                                                  (dev, self.instance), new_lvs)
8131
      msg = result.fail_msg
8132
      if msg:
8133
        for new_lv in new_lvs:
8134
          msg2 = self.rpc.call_blockdev_remove(self.target_node,
8135
                                               new_lv).fail_msg
8136
          if msg2:
8137
            self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
8138
                               hint=("cleanup manually the unused logical"
8139
                                     "volumes"))
8140
        raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
8141

    
8142
    cstep = itertools.count(5)
8143

    
8144
    if self.early_release:
8145
      self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
8146
      self._RemoveOldStorage(self.target_node, iv_names)
8147
      # TODO: Check if releasing locks early still makes sense
8148
      _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
8149
    else:
8150
      # Release all resource locks except those used by the instance
8151
      _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
8152
                    keep=self.node_secondary_ip.keys())
8153

    
8154
    # Release all node locks while waiting for sync
8155
    _ReleaseLocks(self.lu, locking.LEVEL_NODE)
8156

    
8157
    # TODO: Can the instance lock be downgraded here? Take the optional disk
8158
    # shutdown in the caller into consideration.
8159

    
8160
    # Wait for sync
8161
    # This can fail as the old devices are degraded and _WaitForSync
8162
    # does a combined result over all disks, so we don't check its return value
8163
    self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
8164
    _WaitForSync(self.lu, self.instance)
8165

    
8166
    # Check all devices manually
8167
    self._CheckDevices(self.instance.primary_node, iv_names)
8168

    
8169
    # Step: remove old storage
8170
    if not self.early_release:
8171
      self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
8172
      self._RemoveOldStorage(self.target_node, iv_names)
8173

    
8174
  def _ExecDrbd8Secondary(self, feedback_fn):
8175
    """Replace the secondary node for DRBD 8.
8176

8177
    The algorithm for replace is quite complicated:
8178
      - for all disks of the instance:
8179
        - create new LVs on the new node with same names
8180
        - shutdown the drbd device on the old secondary
8181
        - disconnect the drbd network on the primary
8182
        - create the drbd device on the new secondary
8183
        - network attach the drbd on the primary, using an artifice:
8184
          the drbd code for Attach() will connect to the network if it
8185
          finds a device which is connected to the good local disks but
8186
          not network enabled
8187
      - wait for sync across all devices
8188
      - remove all disks from the old secondary
8189

8190
    Failures are not very well handled.
8191

8192
    """
8193
    steps_total = 6
8194

    
8195
    pnode = self.instance.primary_node
8196

    
8197
    # Step: check device activation
8198
    self.lu.LogStep(1, steps_total, "Check device existence")
8199
    self._CheckDisksExistence([self.instance.primary_node])
8200
    self._CheckVolumeGroup([self.instance.primary_node])
8201

    
8202
    # Step: check other node consistency
8203
    self.lu.LogStep(2, steps_total, "Check peer consistency")
8204
    self._CheckDisksConsistency(self.instance.primary_node, True, True)
8205

    
8206
    # Step: create new storage
8207
    self.lu.LogStep(3, steps_total, "Allocate new storage")
8208
    disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
8209
    excl_stor = _IsExclusiveStorageEnabledNodeName(self.lu.cfg, self.new_node)
8210
    for idx, dev in enumerate(disks):
8211
      self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
8212
                      (self.new_node, idx))
8213
      # we pass force_create=True to force LVM creation
8214
      for new_lv in dev.children:
8215
        _CreateBlockDevInner(self.lu, self.new_node, self.instance, new_lv,
8216
                             True, _GetInstanceInfoText(self.instance), False,
8217
                             excl_stor)
8218

    
8219
    # Step 4: dbrd minors and drbd setups changes
8220
    # after this, we must manually remove the drbd minors on both the
8221
    # error and the success paths
8222
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
8223
    minors = self.cfg.AllocateDRBDMinor([self.new_node
8224
                                         for dev in self.instance.disks],
8225
                                        self.instance.name)
8226
    logging.debug("Allocated minors %r", minors)
8227

    
8228
    iv_names = {}
8229
    for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
8230
      self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
8231
                      (self.new_node, idx))
8232
      # create new devices on new_node; note that we create two IDs:
8233
      # one without port, so the drbd will be activated without
8234
      # networking information on the new node at this stage, and one
8235
      # with network, for the latter activation in step 4
8236
      (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
8237
      if self.instance.primary_node == o_node1:
8238
        p_minor = o_minor1
8239
      else:
8240
        assert self.instance.primary_node == o_node2, "Three-node instance?"
8241
        p_minor = o_minor2
8242

    
8243
      new_alone_id = (self.instance.primary_node, self.new_node, None,
8244
                      p_minor, new_minor, o_secret)
8245
      new_net_id = (self.instance.primary_node, self.new_node, o_port,
8246
                    p_minor, new_minor, o_secret)
8247

    
8248
      iv_names[idx] = (dev, dev.children, new_net_id)
8249
      logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
8250
                    new_net_id)
8251
      new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
8252
                              logical_id=new_alone_id,
8253
                              children=dev.children,
8254
                              size=dev.size,
8255
                              params={})
8256
      (anno_new_drbd,) = _AnnotateDiskParams(self.instance, [new_drbd],
8257
                                             self.cfg)
8258
      try:
8259
        _CreateSingleBlockDev(self.lu, self.new_node, self.instance,
8260
                              anno_new_drbd,
8261
                              _GetInstanceInfoText(self.instance), False,
8262
                              excl_stor)
8263
      except errors.GenericError:
8264
        self.cfg.ReleaseDRBDMinors(self.instance.name)
8265
        raise
8266

    
8267
    # We have new devices, shutdown the drbd on the old secondary
8268
    for idx, dev in enumerate(self.instance.disks):
8269
      self.lu.LogInfo("Shutting down drbd for disk/%d on old node", idx)
8270
      self.cfg.SetDiskID(dev, self.target_node)
8271
      msg = self.rpc.call_blockdev_shutdown(self.target_node,
8272
                                            (dev, self.instance)).fail_msg
8273
      if msg:
8274
        self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
8275
                           "node: %s" % (idx, msg),
8276
                           hint=("Please cleanup this device manually as"
8277
                                 " soon as possible"))
8278

    
8279
    self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
8280
    result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
8281
                                               self.instance.disks)[pnode]
8282

    
8283
    msg = result.fail_msg
8284
    if msg:
8285
      # detaches didn't succeed (unlikely)
8286
      self.cfg.ReleaseDRBDMinors(self.instance.name)
8287
      raise errors.OpExecError("Can't detach the disks from the network on"
8288
                               " old node: %s" % (msg,))
8289

    
8290
    # if we managed to detach at least one, we update all the disks of
8291
    # the instance to point to the new secondary
8292
    self.lu.LogInfo("Updating instance configuration")
8293
    for dev, _, new_logical_id in iv_names.itervalues():
8294
      dev.logical_id = new_logical_id
8295
      self.cfg.SetDiskID(dev, self.instance.primary_node)
8296

    
8297
    self.cfg.Update(self.instance, feedback_fn)
8298

    
8299
    # Release all node locks (the configuration has been updated)
8300
    _ReleaseLocks(self.lu, locking.LEVEL_NODE)
8301

    
8302
    # and now perform the drbd attach
8303
    self.lu.LogInfo("Attaching primary drbds to new secondary"
8304
                    " (standalone => connected)")
8305
    result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
8306
                                            self.new_node],
8307
                                           self.node_secondary_ip,
8308
                                           (self.instance.disks, self.instance),
8309
                                           self.instance.name,
8310
                                           False)
8311
    for to_node, to_result in result.items():
8312
      msg = to_result.fail_msg
8313
      if msg:
8314
        self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
8315
                           to_node, msg,
8316
                           hint=("please do a gnt-instance info to see the"
8317
                                 " status of disks"))
8318

    
8319
    cstep = itertools.count(5)
8320

    
8321
    if self.early_release:
8322
      self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
8323
      self._RemoveOldStorage(self.target_node, iv_names)
8324
      # TODO: Check if releasing locks early still makes sense
8325
      _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
8326
    else:
8327
      # Release all resource locks except those used by the instance
8328
      _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
8329
                    keep=self.node_secondary_ip.keys())
8330

    
8331
    # TODO: Can the instance lock be downgraded here? Take the optional disk
8332
    # shutdown in the caller into consideration.
8333

    
8334
    # Wait for sync
8335
    # This can fail as the old devices are degraded and _WaitForSync
8336
    # does a combined result over all disks, so we don't check its return value
8337
    self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
8338
    _WaitForSync(self.lu, self.instance)
8339

    
8340
    # Check all devices manually
8341
    self._CheckDevices(self.instance.primary_node, iv_names)
8342

    
8343
    # Step: remove old storage
8344
    if not self.early_release:
8345
      self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
8346
      self._RemoveOldStorage(self.target_node, iv_names)
8347

    
8348

    
8349
class LURepairNodeStorage(NoHooksLU):
8350
  """Repairs the volume group on a node.
8351

8352
  """
8353
  REQ_BGL = False
8354

    
8355
  def CheckArguments(self):
8356
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
8357

    
8358
    storage_type = self.op.storage_type
8359

    
8360
    if (constants.SO_FIX_CONSISTENCY not in
8361
        constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
8362
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
8363
                                 " repaired" % storage_type,
8364
                                 errors.ECODE_INVAL)
8365

    
8366
  def ExpandNames(self):
8367
    self.needed_locks = {
8368
      locking.LEVEL_NODE: [self.op.node_name],
8369
      }
8370

    
8371
  def _CheckFaultyDisks(self, instance, node_name):
8372
    """Ensure faulty disks abort the opcode or at least warn."""
8373
    try:
8374
      if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
8375
                                  node_name, True):
8376
        raise errors.OpPrereqError("Instance '%s' has faulty disks on"
8377
                                   " node '%s'" % (instance.name, node_name),
8378
                                   errors.ECODE_STATE)
8379
    except errors.OpPrereqError, err:
8380
      if self.op.ignore_consistency:
8381
        self.LogWarning(str(err.args[0]))
8382
      else:
8383
        raise
8384

    
8385
  def CheckPrereq(self):
8386
    """Check prerequisites.
8387

8388
    """
8389
    # Check whether any instance on this node has faulty disks
8390
    for inst in _GetNodeInstances(self.cfg, self.op.node_name):
8391
      if inst.admin_state != constants.ADMINST_UP:
8392
        continue
8393
      check_nodes = set(inst.all_nodes)
8394
      check_nodes.discard(self.op.node_name)
8395
      for inst_node_name in check_nodes:
8396
        self._CheckFaultyDisks(inst, inst_node_name)
8397

    
8398
  def Exec(self, feedback_fn):
8399
    feedback_fn("Repairing storage unit '%s' on %s ..." %
8400
                (self.op.name, self.op.node_name))
8401

    
8402
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
8403
    result = self.rpc.call_storage_execute(self.op.node_name,
8404
                                           self.op.storage_type, st_args,
8405
                                           self.op.name,
8406
                                           constants.SO_FIX_CONSISTENCY)
8407
    result.Raise("Failed to repair storage unit '%s' on %s" %
8408
                 (self.op.name, self.op.node_name))
8409

    
8410

    
8411
class LUNodeEvacuate(NoHooksLU):
8412
  """Evacuates instances off a list of nodes.
8413

8414
  """
8415
  REQ_BGL = False
8416

    
8417
  _MODE2IALLOCATOR = {
8418
    constants.NODE_EVAC_PRI: constants.IALLOCATOR_NEVAC_PRI,
8419
    constants.NODE_EVAC_SEC: constants.IALLOCATOR_NEVAC_SEC,
8420
    constants.NODE_EVAC_ALL: constants.IALLOCATOR_NEVAC_ALL,
8421
    }
8422
  assert frozenset(_MODE2IALLOCATOR.keys()) == constants.NODE_EVAC_MODES
8423
  assert (frozenset(_MODE2IALLOCATOR.values()) ==
8424
          constants.IALLOCATOR_NEVAC_MODES)
8425

    
8426
  def CheckArguments(self):
8427
    _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
8428

    
8429
  def ExpandNames(self):
8430
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
8431

    
8432
    if self.op.remote_node is not None:
8433
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8434
      assert self.op.remote_node
8435

    
8436
      if self.op.remote_node == self.op.node_name:
8437
        raise errors.OpPrereqError("Can not use evacuated node as a new"
8438
                                   " secondary node", errors.ECODE_INVAL)
8439

    
8440
      if self.op.mode != constants.NODE_EVAC_SEC:
8441
        raise errors.OpPrereqError("Without the use of an iallocator only"
8442
                                   " secondary instances can be evacuated",
8443
                                   errors.ECODE_INVAL)
8444

    
8445
    # Declare locks
8446
    self.share_locks = _ShareAll()
8447
    self.needed_locks = {
8448
      locking.LEVEL_INSTANCE: [],
8449
      locking.LEVEL_NODEGROUP: [],
8450
      locking.LEVEL_NODE: [],
8451
      }
8452

    
8453
    # Determine nodes (via group) optimistically, needs verification once locks
8454
    # have been acquired
8455
    self.lock_nodes = self._DetermineNodes()
8456

    
8457
  def _DetermineNodes(self):
8458
    """Gets the list of nodes to operate on.
8459

8460
    """
8461
    if self.op.remote_node is None:
8462
      # Iallocator will choose any node(s) in the same group
8463
      group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
8464
    else:
8465
      group_nodes = frozenset([self.op.remote_node])
8466

    
8467
    # Determine nodes to be locked
8468
    return set([self.op.node_name]) | group_nodes
8469

    
8470
  def _DetermineInstances(self):
8471
    """Builds list of instances to operate on.
8472

8473
    """
8474
    assert self.op.mode in constants.NODE_EVAC_MODES
8475

    
8476
    if self.op.mode == constants.NODE_EVAC_PRI:
8477
      # Primary instances only
8478
      inst_fn = _GetNodePrimaryInstances
8479
      assert self.op.remote_node is None, \
8480
        "Evacuating primary instances requires iallocator"
8481
    elif self.op.mode == constants.NODE_EVAC_SEC:
8482
      # Secondary instances only
8483
      inst_fn = _GetNodeSecondaryInstances
8484
    else:
8485
      # All instances
8486
      assert self.op.mode == constants.NODE_EVAC_ALL
8487
      inst_fn = _GetNodeInstances
8488
      # TODO: In 2.6, change the iallocator interface to take an evacuation mode
8489
      # per instance
8490
      raise errors.OpPrereqError("Due to an issue with the iallocator"
8491
                                 " interface it is not possible to evacuate"
8492
                                 " all instances at once; specify explicitly"
8493
                                 " whether to evacuate primary or secondary"
8494
                                 " instances",
8495
                                 errors.ECODE_INVAL)
8496

    
8497
    return inst_fn(self.cfg, self.op.node_name)
8498

    
8499
  def DeclareLocks(self, level):
8500
    if level == locking.LEVEL_INSTANCE:
8501
      # Lock instances optimistically, needs verification once node and group
8502
      # locks have been acquired
8503
      self.needed_locks[locking.LEVEL_INSTANCE] = \
8504
        set(i.name for i in self._DetermineInstances())
8505

    
8506
    elif level == locking.LEVEL_NODEGROUP:
8507
      # Lock node groups for all potential target nodes optimistically, needs
8508
      # verification once nodes have been acquired
8509
      self.needed_locks[locking.LEVEL_NODEGROUP] = \
8510
        self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
8511

    
8512
    elif level == locking.LEVEL_NODE:
8513
      self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
8514

    
8515
  def CheckPrereq(self):
8516
    # Verify locks
8517
    owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
8518
    owned_nodes = self.owned_locks(locking.LEVEL_NODE)
8519
    owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
8520

    
8521
    need_nodes = self._DetermineNodes()
8522

    
8523
    if not owned_nodes.issuperset(need_nodes):
8524
      raise errors.OpPrereqError("Nodes in same group as '%s' changed since"
8525
                                 " locks were acquired, current nodes are"
8526
                                 " are '%s', used to be '%s'; retry the"
8527
                                 " operation" %
8528
                                 (self.op.node_name,
8529
                                  utils.CommaJoin(need_nodes),
8530
                                  utils.CommaJoin(owned_nodes)),
8531
                                 errors.ECODE_STATE)
8532

    
8533
    wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
8534
    if owned_groups != wanted_groups:
8535
      raise errors.OpExecError("Node groups changed since locks were acquired,"
8536
                               " current groups are '%s', used to be '%s';"
8537
                               " retry the operation" %
8538
                               (utils.CommaJoin(wanted_groups),
8539
                                utils.CommaJoin(owned_groups)))
8540

    
8541
    # Determine affected instances
8542
    self.instances = self._DetermineInstances()
8543
    self.instance_names = [i.name for i in self.instances]
8544

    
8545
    if set(self.instance_names) != owned_instances:
8546
      raise errors.OpExecError("Instances on node '%s' changed since locks"
8547
                               " were acquired, current instances are '%s',"
8548
                               " used to be '%s'; retry the operation" %
8549
                               (self.op.node_name,
8550
                                utils.CommaJoin(self.instance_names),
8551
                                utils.CommaJoin(owned_instances)))
8552

    
8553
    if self.instance_names:
8554
      self.LogInfo("Evacuating instances from node '%s': %s",
8555
                   self.op.node_name,
8556
                   utils.CommaJoin(utils.NiceSort(self.instance_names)))
8557
    else:
8558
      self.LogInfo("No instances to evacuate from node '%s'",
8559
                   self.op.node_name)
8560

    
8561
    if self.op.remote_node is not None:
8562
      for i in self.instances:
8563
        if i.primary_node == self.op.remote_node:
8564
          raise errors.OpPrereqError("Node %s is the primary node of"
8565
                                     " instance %s, cannot use it as"
8566
                                     " secondary" %
8567
                                     (self.op.remote_node, i.name),
8568
                                     errors.ECODE_INVAL)
8569

    
8570
  def Exec(self, feedback_fn):
8571
    assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
8572

    
8573
    if not self.instance_names:
8574
      # No instances to evacuate
8575
      jobs = []
8576

    
8577
    elif self.op.iallocator is not None:
8578
      # TODO: Implement relocation to other group
8579
      evac_mode = self._MODE2IALLOCATOR[self.op.mode]
8580
      req = iallocator.IAReqNodeEvac(evac_mode=evac_mode,
8581
                                     instances=list(self.instance_names))
8582
      ial = iallocator.IAllocator(self.cfg, self.rpc, req)
8583

    
8584
      ial.Run(self.op.iallocator)
8585

    
8586
      if not ial.success:
8587
        raise errors.OpPrereqError("Can't compute node evacuation using"
8588
                                   " iallocator '%s': %s" %
8589
                                   (self.op.iallocator, ial.info),
8590
                                   errors.ECODE_NORES)
8591

    
8592
      jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
8593

    
8594
    elif self.op.remote_node is not None:
8595
      assert self.op.mode == constants.NODE_EVAC_SEC
8596
      jobs = [
8597
        [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
8598
                                        remote_node=self.op.remote_node,
8599
                                        disks=[],
8600
                                        mode=constants.REPLACE_DISK_CHG,
8601
                                        early_release=self.op.early_release)]
8602
        for instance_name in self.instance_names]
8603

    
8604
    else:
8605
      raise errors.ProgrammerError("No iallocator or remote node")
8606

    
8607
    return ResultWithJobs(jobs)
8608

    
8609

    
8610
def _SetOpEarlyRelease(early_release, op):
8611
  """Sets C{early_release} flag on opcodes if available.
8612

8613
  """
8614
  try:
8615
    op.early_release = early_release
8616
  except AttributeError:
8617
    assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
8618

    
8619
  return op
8620

    
8621

    
8622
def _NodeEvacDest(use_nodes, group, nodes):
8623
  """Returns group or nodes depending on caller's choice.
8624

8625
  """
8626
  if use_nodes:
8627
    return utils.CommaJoin(nodes)
8628
  else:
8629
    return group
8630

    
8631

    
8632
def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
8633
  """Unpacks the result of change-group and node-evacuate iallocator requests.
8634

8635
  Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
8636
  L{constants.IALLOCATOR_MODE_CHG_GROUP}.
8637

8638
  @type lu: L{LogicalUnit}
8639
  @param lu: Logical unit instance
8640
  @type alloc_result: tuple/list
8641
  @param alloc_result: Result from iallocator
8642
  @type early_release: bool
8643
  @param early_release: Whether to release locks early if possible
8644
  @type use_nodes: bool
8645
  @param use_nodes: Whether to display node names instead of groups
8646

8647
  """
8648
  (moved, failed, jobs) = alloc_result
8649

    
8650
  if failed:
8651
    failreason = utils.CommaJoin("%s (%s)" % (name, reason)
8652
                                 for (name, reason) in failed)
8653
    lu.LogWarning("Unable to evacuate instances %s", failreason)
8654
    raise errors.OpExecError("Unable to evacuate instances %s" % failreason)
8655

    
8656
  if moved:
8657
    lu.LogInfo("Instances to be moved: %s",
8658
               utils.CommaJoin("%s (to %s)" %
8659
                               (name, _NodeEvacDest(use_nodes, group, nodes))
8660
                               for (name, group, nodes) in moved))
8661

    
8662
  return [map(compat.partial(_SetOpEarlyRelease, early_release),
8663
              map(opcodes.OpCode.LoadOpCode, ops))
8664
          for ops in jobs]
8665

    
8666

    
8667
def _DiskSizeInBytesToMebibytes(lu, size):
8668
  """Converts a disk size in bytes to mebibytes.
8669

8670
  Warns and rounds up if the size isn't an even multiple of 1 MiB.
8671

8672
  """
8673
  (mib, remainder) = divmod(size, 1024 * 1024)
8674

    
8675
  if remainder != 0:
8676
    lu.LogWarning("Disk size is not an even multiple of 1 MiB; rounding up"
8677
                  " to not overwrite existing data (%s bytes will not be"
8678
                  " wiped)", (1024 * 1024) - remainder)
8679
    mib += 1
8680

    
8681
  return mib
8682

    
8683

    
8684
class LUInstanceGrowDisk(LogicalUnit):
8685
  """Grow a disk of an instance.
8686

8687
  """
8688
  HPATH = "disk-grow"
8689
  HTYPE = constants.HTYPE_INSTANCE
8690
  REQ_BGL = False
8691

    
8692
  def ExpandNames(self):
8693
    self._ExpandAndLockInstance()
8694
    self.needed_locks[locking.LEVEL_NODE] = []
8695
    self.needed_locks[locking.LEVEL_NODE_RES] = []
8696
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8697
    self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
8698

    
8699
  def DeclareLocks(self, level):
8700
    if level == locking.LEVEL_NODE:
8701
      self._LockInstancesNodes()
8702
    elif level == locking.LEVEL_NODE_RES:
8703
      # Copy node locks
8704
      self.needed_locks[locking.LEVEL_NODE_RES] = \
8705
        _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
8706

    
8707
  def BuildHooksEnv(self):
8708
    """Build hooks env.
8709

8710
    This runs on the master, the primary and all the secondaries.
8711

8712
    """
8713
    env = {
8714
      "DISK": self.op.disk,
8715
      "AMOUNT": self.op.amount,
8716
      "ABSOLUTE": self.op.absolute,
8717
      }
8718
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
8719
    return env
8720

    
8721
  def BuildHooksNodes(self):
8722
    """Build hooks nodes.
8723

8724
    """
8725
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8726
    return (nl, nl)
8727

    
8728
  def CheckPrereq(self):
8729
    """Check prerequisites.
8730

8731
    This checks that the instance is in the cluster.
8732

8733
    """
8734
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8735
    assert instance is not None, \
8736
      "Cannot retrieve locked instance %s" % self.op.instance_name
8737
    nodenames = list(instance.all_nodes)
8738
    for node in nodenames:
8739
      _CheckNodeOnline(self, node)
8740

    
8741
    self.instance = instance
8742

    
8743
    if instance.disk_template not in constants.DTS_GROWABLE:
8744
      raise errors.OpPrereqError("Instance's disk layout does not support"
8745
                                 " growing", errors.ECODE_INVAL)
8746

    
8747
    self.disk = instance.FindDisk(self.op.disk)
8748

    
8749
    if self.op.absolute:
8750
      self.target = self.op.amount
8751
      self.delta = self.target - self.disk.size
8752
      if self.delta < 0:
8753
        raise errors.OpPrereqError("Requested size (%s) is smaller than "
8754
                                   "current disk size (%s)" %
8755
                                   (utils.FormatUnit(self.target, "h"),
8756
                                    utils.FormatUnit(self.disk.size, "h")),
8757
                                   errors.ECODE_STATE)
8758
    else:
8759
      self.delta = self.op.amount
8760
      self.target = self.disk.size + self.delta
8761
      if self.delta < 0:
8762
        raise errors.OpPrereqError("Requested increment (%s) is negative" %
8763
                                   utils.FormatUnit(self.delta, "h"),
8764
                                   errors.ECODE_INVAL)
8765

    
8766
    self._CheckDiskSpace(nodenames, self.disk.ComputeGrowth(self.delta))
8767

    
8768
  def _CheckDiskSpace(self, nodenames, req_vgspace):
8769
    template = self.instance.disk_template
8770
    if template not in (constants.DTS_NO_FREE_SPACE_CHECK):
8771
      # TODO: check the free disk space for file, when that feature will be
8772
      # supported
8773
      nodes = map(self.cfg.GetNodeInfo, nodenames)
8774
      es_nodes = filter(lambda n: _IsExclusiveStorageEnabledNode(self.cfg, n),
8775
                        nodes)
8776
      if es_nodes:
8777
        # With exclusive storage we need to something smarter than just looking
8778
        # at free space; for now, let's simply abort the operation.
8779
        raise errors.OpPrereqError("Cannot grow disks when exclusive_storage"
8780
                                   " is enabled", errors.ECODE_STATE)
8781
      _CheckNodesFreeDiskPerVG(self, nodenames, req_vgspace)
8782

    
8783
  def Exec(self, feedback_fn):
8784
    """Execute disk grow.
8785

8786
    """
8787
    instance = self.instance
8788
    disk = self.disk
8789

    
8790
    assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
8791
    assert (self.owned_locks(locking.LEVEL_NODE) ==
8792
            self.owned_locks(locking.LEVEL_NODE_RES))
8793

    
8794
    wipe_disks = self.cfg.GetClusterInfo().prealloc_wipe_disks
8795

    
8796
    disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
8797
    if not disks_ok:
8798
      raise errors.OpExecError("Cannot activate block device to grow")
8799

    
8800
    feedback_fn("Growing disk %s of instance '%s' by %s to %s" %
8801
                (self.op.disk, instance.name,
8802
                 utils.FormatUnit(self.delta, "h"),
8803
                 utils.FormatUnit(self.target, "h")))
8804

    
8805
    # First run all grow ops in dry-run mode
8806
    for node in instance.all_nodes:
8807
      self.cfg.SetDiskID(disk, node)
8808
      result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
8809
                                           True, True)
8810
      result.Raise("Dry-run grow request failed to node %s" % node)
8811

    
8812
    if wipe_disks:
8813
      # Get disk size from primary node for wiping
8814
      result = self.rpc.call_blockdev_getsize(instance.primary_node, [disk])
8815
      result.Raise("Failed to retrieve disk size from node '%s'" %
8816
                   instance.primary_node)
8817

    
8818
      (disk_size_in_bytes, ) = result.payload
8819

    
8820
      if disk_size_in_bytes is None:
8821
        raise errors.OpExecError("Failed to retrieve disk size from primary"
8822
                                 " node '%s'" % instance.primary_node)
8823

    
8824
      old_disk_size = _DiskSizeInBytesToMebibytes(self, disk_size_in_bytes)
8825

    
8826
      assert old_disk_size >= disk.size, \
8827
        ("Retrieved disk size too small (got %s, should be at least %s)" %
8828
         (old_disk_size, disk.size))
8829
    else:
8830
      old_disk_size = None
8831

    
8832
    # We know that (as far as we can test) operations across different
8833
    # nodes will succeed, time to run it for real on the backing storage
8834
    for node in instance.all_nodes:
8835
      self.cfg.SetDiskID(disk, node)
8836
      result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
8837
                                           False, True)
8838
      result.Raise("Grow request failed to node %s" % node)
8839

    
8840
    # And now execute it for logical storage, on the primary node
8841
    node = instance.primary_node
8842
    self.cfg.SetDiskID(disk, node)
8843
    result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
8844
                                         False, False)
8845
    result.Raise("Grow request failed to node %s" % node)
8846

    
8847
    disk.RecordGrow(self.delta)
8848
    self.cfg.Update(instance, feedback_fn)
8849

    
8850
    # Changes have been recorded, release node lock
8851
    _ReleaseLocks(self, locking.LEVEL_NODE)
8852

    
8853
    # Downgrade lock while waiting for sync
8854
    self.glm.downgrade(locking.LEVEL_INSTANCE)
8855

    
8856
    assert wipe_disks ^ (old_disk_size is None)
8857

    
8858
    if wipe_disks:
8859
      assert instance.disks[self.op.disk] == disk
8860

    
8861
      # Wipe newly added disk space
8862
      _WipeDisks(self, instance,
8863
                 disks=[(self.op.disk, disk, old_disk_size)])
8864

    
8865
    if self.op.wait_for_sync:
8866
      disk_abort = not _WaitForSync(self, instance, disks=[disk])
8867
      if disk_abort:
8868
        self.LogWarning("Disk syncing has not returned a good status; check"
8869
                        " the instance")
8870
      if instance.admin_state != constants.ADMINST_UP:
8871
        _SafeShutdownInstanceDisks(self, instance, disks=[disk])
8872
    elif instance.admin_state != constants.ADMINST_UP:
8873
      self.LogWarning("Not shutting down the disk even if the instance is"
8874
                      " not supposed to be running because no wait for"
8875
                      " sync mode was requested")
8876

    
8877
    assert self.owned_locks(locking.LEVEL_NODE_RES)
8878
    assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
8879

    
8880

    
8881
class LUInstanceQueryData(NoHooksLU):
8882
  """Query runtime instance data.
8883

8884
  """
8885
  REQ_BGL = False
8886

    
8887
  def ExpandNames(self):
8888
    self.needed_locks = {}
8889

    
8890
    # Use locking if requested or when non-static information is wanted
8891
    if not (self.op.static or self.op.use_locking):
8892
      self.LogWarning("Non-static data requested, locks need to be acquired")
8893
      self.op.use_locking = True
8894

    
8895
    if self.op.instances or not self.op.use_locking:
8896
      # Expand instance names right here
8897
      self.wanted_names = _GetWantedInstances(self, self.op.instances)
8898
    else:
8899
      # Will use acquired locks
8900
      self.wanted_names = None
8901

    
8902
    if self.op.use_locking:
8903
      self.share_locks = _ShareAll()
8904

    
8905
      if self.wanted_names is None:
8906
        self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
8907
      else:
8908
        self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
8909

    
8910
      self.needed_locks[locking.LEVEL_NODEGROUP] = []
8911
      self.needed_locks[locking.LEVEL_NODE] = []
8912
      self.needed_locks[locking.LEVEL_NETWORK] = []
8913
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8914

    
8915
  def DeclareLocks(self, level):
8916
    if self.op.use_locking:
8917
      owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
8918
      if level == locking.LEVEL_NODEGROUP:
8919

    
8920
        # Lock all groups used by instances optimistically; this requires going
8921
        # via the node before it's locked, requiring verification later on
8922
        self.needed_locks[locking.LEVEL_NODEGROUP] = \
8923
          frozenset(group_uuid
8924
                    for instance_name in owned_instances
8925
                    for group_uuid in
8926
                      self.cfg.GetInstanceNodeGroups(instance_name))
8927

    
8928
      elif level == locking.LEVEL_NODE:
8929
        self._LockInstancesNodes()
8930

    
8931
      elif level == locking.LEVEL_NETWORK:
8932
        self.needed_locks[locking.LEVEL_NETWORK] = \
8933
          frozenset(net_uuid
8934
                    for instance_name in owned_instances
8935
                    for net_uuid in
8936
                       self.cfg.GetInstanceNetworks(instance_name))
8937

    
8938
  def CheckPrereq(self):
8939
    """Check prerequisites.
8940

8941
    This only checks the optional instance list against the existing names.
8942

8943
    """
8944
    owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
8945
    owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
8946
    owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
8947
    owned_networks = frozenset(self.owned_locks(locking.LEVEL_NETWORK))
8948

    
8949
    if self.wanted_names is None:
8950
      assert self.op.use_locking, "Locking was not used"
8951
      self.wanted_names = owned_instances
8952

    
8953
    instances = dict(self.cfg.GetMultiInstanceInfo(self.wanted_names))
8954

    
8955
    if self.op.use_locking:
8956
      _CheckInstancesNodeGroups(self.cfg, instances, owned_groups, owned_nodes,
8957
                                None)
8958
    else:
8959
      assert not (owned_instances or owned_groups or
8960
                  owned_nodes or owned_networks)
8961

    
8962
    self.wanted_instances = instances.values()
8963

    
8964
  def _ComputeBlockdevStatus(self, node, instance, dev):
8965
    """Returns the status of a block device
8966

8967
    """
8968
    if self.op.static or not node:
8969
      return None
8970

    
8971
    self.cfg.SetDiskID(dev, node)
8972

    
8973
    result = self.rpc.call_blockdev_find(node, dev)
8974
    if result.offline:
8975
      return None
8976

    
8977
    result.Raise("Can't compute disk status for %s" % instance.name)
8978

    
8979
    status = result.payload
8980
    if status is None:
8981
      return None
8982

    
8983
    return (status.dev_path, status.major, status.minor,
8984
            status.sync_percent, status.estimated_time,
8985
            status.is_degraded, status.ldisk_status)
8986

    
8987
  def _ComputeDiskStatus(self, instance, snode, dev):
8988
    """Compute block device status.
8989

8990
    """
8991
    (anno_dev,) = _AnnotateDiskParams(instance, [dev], self.cfg)
8992

    
8993
    return self._ComputeDiskStatusInner(instance, snode, anno_dev)
8994

    
8995
  def _ComputeDiskStatusInner(self, instance, snode, dev):
8996
    """Compute block device status.
8997

8998
    @attention: The device has to be annotated already.
8999

9000
    """
9001
    if dev.dev_type in constants.LDS_DRBD:
9002
      # we change the snode then (otherwise we use the one passed in)
9003
      if dev.logical_id[0] == instance.primary_node:
9004
        snode = dev.logical_id[1]
9005
      else:
9006
        snode = dev.logical_id[0]
9007

    
9008
    dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
9009
                                              instance, dev)
9010
    dev_sstatus = self._ComputeBlockdevStatus(snode, instance, dev)
9011

    
9012
    if dev.children:
9013
      dev_children = map(compat.partial(self._ComputeDiskStatusInner,
9014
                                        instance, snode),
9015
                         dev.children)
9016
    else:
9017
      dev_children = []
9018

    
9019
    return {
9020
      "iv_name": dev.iv_name,
9021
      "dev_type": dev.dev_type,
9022
      "logical_id": dev.logical_id,
9023
      "physical_id": dev.physical_id,
9024
      "pstatus": dev_pstatus,
9025
      "sstatus": dev_sstatus,
9026
      "children": dev_children,
9027
      "mode": dev.mode,
9028
      "size": dev.size,
9029
      "name": dev.name,
9030
      "uuid": dev.uuid,
9031
      }
9032

    
9033
  def Exec(self, feedback_fn):
9034
    """Gather and return data"""
9035
    result = {}
9036

    
9037
    cluster = self.cfg.GetClusterInfo()
9038

    
9039
    node_names = itertools.chain(*(i.all_nodes for i in self.wanted_instances))
9040
    nodes = dict(self.cfg.GetMultiNodeInfo(node_names))
9041

    
9042
    groups = dict(self.cfg.GetMultiNodeGroupInfo(node.group
9043
                                                 for node in nodes.values()))
9044

    
9045
    group2name_fn = lambda uuid: groups[uuid].name
9046
    for instance in self.wanted_instances:
9047
      pnode = nodes[instance.primary_node]
9048

    
9049
      if self.op.static or pnode.offline:
9050
        remote_state = None
9051
        if pnode.offline:
9052
          self.LogWarning("Primary node %s is marked offline, returning static"
9053
                          " information only for instance %s" %
9054
                          (pnode.name, instance.name))
9055
      else:
9056
        remote_info = self.rpc.call_instance_info(instance.primary_node,
9057
                                                  instance.name,
9058
                                                  instance.hypervisor)
9059
        remote_info.Raise("Error checking node %s" % instance.primary_node)
9060
        remote_info = remote_info.payload
9061
        if remote_info and "state" in remote_info:
9062
          remote_state = "up"
9063
        else:
9064
          if instance.admin_state == constants.ADMINST_UP:
9065
            remote_state = "down"
9066
          else:
9067
            remote_state = instance.admin_state
9068

    
9069
      disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
9070
                  instance.disks)
9071

    
9072
      snodes_group_uuids = [nodes[snode_name].group
9073
                            for snode_name in instance.secondary_nodes]
9074

    
9075
      result[instance.name] = {
9076
        "name": instance.name,
9077
        "config_state": instance.admin_state,
9078
        "run_state": remote_state,
9079
        "pnode": instance.primary_node,
9080
        "pnode_group_uuid": pnode.group,
9081
        "pnode_group_name": group2name_fn(pnode.group),
9082
        "snodes": instance.secondary_nodes,
9083
        "snodes_group_uuids": snodes_group_uuids,
9084
        "snodes_group_names": map(group2name_fn, snodes_group_uuids),
9085
        "os": instance.os,
9086
        # this happens to be the same format used for hooks
9087
        "nics": _NICListToTuple(self, instance.nics),
9088
        "disk_template": instance.disk_template,
9089
        "disks": disks,
9090
        "hypervisor": instance.hypervisor,
9091
        "network_port": instance.network_port,
9092
        "hv_instance": instance.hvparams,
9093
        "hv_actual": cluster.FillHV(instance, skip_globals=True),
9094
        "be_instance": instance.beparams,
9095
        "be_actual": cluster.FillBE(instance),
9096
        "os_instance": instance.osparams,
9097
        "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
9098
        "serial_no": instance.serial_no,
9099
        "mtime": instance.mtime,
9100
        "ctime": instance.ctime,
9101
        "uuid": instance.uuid,
9102
        }
9103

    
9104
    return result
9105

    
9106

    
9107
def PrepareContainerMods(mods, private_fn):
9108
  """Prepares a list of container modifications by adding a private data field.
9109

9110
  @type mods: list of tuples; (operation, index, parameters)
9111
  @param mods: List of modifications
9112
  @type private_fn: callable or None
9113
  @param private_fn: Callable for constructing a private data field for a
9114
    modification
9115
  @rtype: list
9116

9117
  """
9118
  if private_fn is None:
9119
    fn = lambda: None
9120
  else:
9121
    fn = private_fn
9122

    
9123
  return [(op, idx, params, fn()) for (op, idx, params) in mods]
9124

    
9125

    
9126
def GetItemFromContainer(identifier, kind, container):
9127
  """Return the item refered by the identifier.
9128

9129
  @type identifier: string
9130
  @param identifier: Item index or name or UUID
9131
  @type kind: string
9132
  @param kind: One-word item description
9133
  @type container: list
9134
  @param container: Container to get the item from
9135

9136
  """
9137
  # Index
9138
  try:
9139
    idx = int(identifier)
9140
    if idx == -1:
9141
      # Append
9142
      absidx = len(container) - 1
9143
    elif idx < 0:
9144
      raise IndexError("Not accepting negative indices other than -1")
9145
    elif idx > len(container):
9146
      raise IndexError("Got %s index %s, but there are only %s" %
9147
                       (kind, idx, len(container)))
9148
    else:
9149
      absidx = idx
9150
    return (absidx, container[idx])
9151
  except ValueError:
9152
    pass
9153

    
9154
  for idx, item in enumerate(container):
9155
    if item.uuid == identifier or item.name == identifier:
9156
      return (idx, item)
9157

    
9158
  raise errors.OpPrereqError("Cannot find %s with identifier %s" %
9159
                             (kind, identifier), errors.ECODE_NOENT)
9160

    
9161

    
9162
#: Type description for changes as returned by L{ApplyContainerMods}'s
9163
#: callbacks
9164
_TApplyContModsCbChanges = \
9165
  ht.TMaybeListOf(ht.TAnd(ht.TIsLength(2), ht.TItems([
9166
    ht.TNonEmptyString,
9167
    ht.TAny,
9168
    ])))
9169

    
9170

    
9171
def ApplyContainerMods(kind, container, chgdesc, mods,
9172
                       create_fn, modify_fn, remove_fn):
9173
  """Applies descriptions in C{mods} to C{container}.
9174

9175
  @type kind: string
9176
  @param kind: One-word item description
9177
  @type container: list
9178
  @param container: Container to modify
9179
  @type chgdesc: None or list
9180
  @param chgdesc: List of applied changes
9181
  @type mods: list
9182
  @param mods: Modifications as returned by L{PrepareContainerMods}
9183
  @type create_fn: callable
9184
  @param create_fn: Callback for creating a new item (L{constants.DDM_ADD});
9185
    receives absolute item index, parameters and private data object as added
9186
    by L{PrepareContainerMods}, returns tuple containing new item and changes
9187
    as list
9188
  @type modify_fn: callable
9189
  @param modify_fn: Callback for modifying an existing item
9190
    (L{constants.DDM_MODIFY}); receives absolute item index, item, parameters
9191
    and private data object as added by L{PrepareContainerMods}, returns
9192
    changes as list
9193
  @type remove_fn: callable
9194
  @param remove_fn: Callback on removing item; receives absolute item index,
9195
    item and private data object as added by L{PrepareContainerMods}
9196

9197
  """
9198
  for (op, identifier, params, private) in mods:
9199
    changes = None
9200

    
9201
    if op == constants.DDM_ADD:
9202
      # Calculate where item will be added
9203
      # When adding an item, identifier can only be an index
9204
      try:
9205
        idx = int(identifier)
9206
      except ValueError:
9207
        raise errors.OpPrereqError("Only possitive integer or -1 is accepted as"
9208
                                   " identifier for %s" % constants.DDM_ADD,
9209
                                   errors.ECODE_INVAL)
9210
      if idx == -1:
9211
        addidx = len(container)
9212
      else:
9213
        if idx < 0:
9214
          raise IndexError("Not accepting negative indices other than -1")
9215
        elif idx > len(container):
9216
          raise IndexError("Got %s index %s, but there are only %s" %
9217
                           (kind, idx, len(container)))
9218
        addidx = idx
9219

    
9220
      if create_fn is None:
9221
        item = params
9222
      else:
9223
        (item, changes) = create_fn(addidx, params, private)
9224

    
9225
      if idx == -1:
9226
        container.append(item)
9227
      else:
9228
        assert idx >= 0
9229
        assert idx <= len(container)
9230
        # list.insert does so before the specified index
9231
        container.insert(idx, item)
9232
    else:
9233
      # Retrieve existing item
9234
      (absidx, item) = GetItemFromContainer(identifier, kind, container)
9235

    
9236
      if op == constants.DDM_REMOVE:
9237
        assert not params
9238

    
9239
        if remove_fn is not None:
9240
          remove_fn(absidx, item, private)
9241

    
9242
        changes = [("%s/%s" % (kind, absidx), "remove")]
9243

    
9244
        assert container[absidx] == item
9245
        del container[absidx]
9246
      elif op == constants.DDM_MODIFY:
9247
        if modify_fn is not None:
9248
          changes = modify_fn(absidx, item, params, private)
9249
      else:
9250
        raise errors.ProgrammerError("Unhandled operation '%s'" % op)
9251

    
9252
    assert _TApplyContModsCbChanges(changes)
9253

    
9254
    if not (chgdesc is None or changes is None):
9255
      chgdesc.extend(changes)
9256

    
9257

    
9258
def _UpdateIvNames(base_index, disks):
9259
  """Updates the C{iv_name} attribute of disks.
9260

9261
  @type disks: list of L{objects.Disk}
9262

9263
  """
9264
  for (idx, disk) in enumerate(disks):
9265
    disk.iv_name = "disk/%s" % (base_index + idx, )
9266

    
9267

    
9268
class _InstNicModPrivate:
9269
  """Data structure for network interface modifications.
9270

9271
  Used by L{LUInstanceSetParams}.
9272

9273
  """
9274
  def __init__(self):
9275
    self.params = None
9276
    self.filled = None
9277

    
9278

    
9279
class LUInstanceSetParams(LogicalUnit):
9280
  """Modifies an instances's parameters.
9281

9282
  """
9283
  HPATH = "instance-modify"
9284
  HTYPE = constants.HTYPE_INSTANCE
9285
  REQ_BGL = False
9286

    
9287
  @staticmethod
9288
  def _UpgradeDiskNicMods(kind, mods, verify_fn):
9289
    assert ht.TList(mods)
9290
    assert not mods or len(mods[0]) in (2, 3)
9291

    
9292
    if mods and len(mods[0]) == 2:
9293
      result = []
9294

    
9295
      addremove = 0
9296
      for op, params in mods:
9297
        if op in (constants.DDM_ADD, constants.DDM_REMOVE):
9298
          result.append((op, -1, params))
9299
          addremove += 1
9300

    
9301
          if addremove > 1:
9302
            raise errors.OpPrereqError("Only one %s add or remove operation is"
9303
                                       " supported at a time" % kind,
9304
                                       errors.ECODE_INVAL)
9305
        else:
9306
          result.append((constants.DDM_MODIFY, op, params))
9307

    
9308
      assert verify_fn(result)
9309
    else:
9310
      result = mods
9311

    
9312
    return result
9313

    
9314
  @staticmethod
9315
  def _CheckMods(kind, mods, key_types, item_fn):
9316
    """Ensures requested disk/NIC modifications are valid.
9317

9318
    """
9319
    for (op, _, params) in mods:
9320
      assert ht.TDict(params)
9321

    
9322
      # If 'key_types' is an empty dict, we assume we have an
9323
      # 'ext' template and thus do not ForceDictType
9324
      if key_types:
9325
        utils.ForceDictType(params, key_types)
9326

    
9327
      if op == constants.DDM_REMOVE:
9328
        if params:
9329
          raise errors.OpPrereqError("No settings should be passed when"
9330
                                     " removing a %s" % kind,
9331
                                     errors.ECODE_INVAL)
9332
      elif op in (constants.DDM_ADD, constants.DDM_MODIFY):
9333
        item_fn(op, params)
9334
      else:
9335
        raise errors.ProgrammerError("Unhandled operation '%s'" % op)
9336

    
9337
  @staticmethod
9338
  def _VerifyDiskModification(op, params):
9339
    """Verifies a disk modification.
9340

9341
    """
9342
    if op == constants.DDM_ADD:
9343
      mode = params.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
9344
      if mode not in constants.DISK_ACCESS_SET:
9345
        raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
9346
                                   errors.ECODE_INVAL)
9347

    
9348
      size = params.get(constants.IDISK_SIZE, None)
9349
      if size is None:
9350
        raise errors.OpPrereqError("Required disk parameter '%s' missing" %
9351
                                   constants.IDISK_SIZE, errors.ECODE_INVAL)
9352

    
9353
      try:
9354
        size = int(size)
9355
      except (TypeError, ValueError), err:
9356
        raise errors.OpPrereqError("Invalid disk size parameter: %s" % err,
9357
                                   errors.ECODE_INVAL)
9358

    
9359
      params[constants.IDISK_SIZE] = size
9360
      name = params.get(constants.IDISK_NAME, None)
9361
      if name is not None and name.lower() == constants.VALUE_NONE:
9362
        params[constants.IDISK_NAME] = None
9363

    
9364
    elif op == constants.DDM_MODIFY:
9365
      if constants.IDISK_SIZE in params:
9366
        raise errors.OpPrereqError("Disk size change not possible, use"
9367
                                   " grow-disk", errors.ECODE_INVAL)
9368
      if len(params) > 2:
9369
        raise errors.OpPrereqError("Disk modification doesn't support"
9370
                                   " additional arbitrary parameters",
9371
                                   errors.ECODE_INVAL)
9372
      name = params.get(constants.IDISK_NAME, None)
9373
      if name is not None and name.lower() == constants.VALUE_NONE:
9374
        params[constants.IDISK_NAME] = None
9375

    
9376
  @staticmethod
9377
  def _VerifyNicModification(op, params):
9378
    """Verifies a network interface modification.
9379

9380
    """
9381
    if op in (constants.DDM_ADD, constants.DDM_MODIFY):
9382
      ip = params.get(constants.INIC_IP, None)
9383
      name = params.get(constants.INIC_NAME, None)
9384
      req_net = params.get(constants.INIC_NETWORK, None)
9385
      link = params.get(constants.NIC_LINK, None)
9386
      mode = params.get(constants.NIC_MODE, None)
9387
      if name is not None and name.lower() == constants.VALUE_NONE:
9388
        params[constants.INIC_NAME] = None
9389
      if req_net is not None:
9390
        if req_net.lower() == constants.VALUE_NONE:
9391
          params[constants.INIC_NETWORK] = None
9392
          req_net = None
9393
        elif link is not None or mode is not None:
9394
          raise errors.OpPrereqError("If network is given"
9395
                                     " mode or link should not",
9396
                                     errors.ECODE_INVAL)
9397

    
9398
      if op == constants.DDM_ADD:
9399
        macaddr = params.get(constants.INIC_MAC, None)
9400
        if macaddr is None:
9401
          params[constants.INIC_MAC] = constants.VALUE_AUTO
9402

    
9403
      if ip is not None:
9404
        if ip.lower() == constants.VALUE_NONE:
9405
          params[constants.INIC_IP] = None
9406
        else:
9407
          if ip.lower() == constants.NIC_IP_POOL:
9408
            if op == constants.DDM_ADD and req_net is None:
9409
              raise errors.OpPrereqError("If ip=pool, parameter network"
9410
                                         " cannot be none",
9411
                                         errors.ECODE_INVAL)
9412
          else:
9413
            if not netutils.IPAddress.IsValid(ip):
9414
              raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
9415
                                         errors.ECODE_INVAL)
9416

    
9417
      if constants.INIC_MAC in params:
9418
        macaddr = params[constants.INIC_MAC]
9419
        if macaddr not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9420
          macaddr = utils.NormalizeAndValidateMac(macaddr)
9421

    
9422
        if op == constants.DDM_MODIFY and macaddr == constants.VALUE_AUTO:
9423
          raise errors.OpPrereqError("'auto' is not a valid MAC address when"
9424
                                     " modifying an existing NIC",
9425
                                     errors.ECODE_INVAL)
9426

    
9427
  def CheckArguments(self):
9428
    if not (self.op.nics or self.op.disks or self.op.disk_template or
9429
            self.op.hvparams or self.op.beparams or self.op.os_name or
9430
            self.op.offline is not None or self.op.runtime_mem or
9431
            self.op.pnode):
9432
      raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
9433

    
9434
    if self.op.hvparams:
9435
      _CheckParamsNotGlobal(self.op.hvparams, constants.HVC_GLOBALS,
9436
                            "hypervisor", "instance", "cluster")
9437

    
9438
    self.op.disks = self._UpgradeDiskNicMods(
9439
      "disk", self.op.disks, opcodes.OpInstanceSetParams.TestDiskModifications)
9440
    self.op.nics = self._UpgradeDiskNicMods(
9441
      "NIC", self.op.nics, opcodes.OpInstanceSetParams.TestNicModifications)
9442

    
9443
    if self.op.disks and self.op.disk_template is not None:
9444
      raise errors.OpPrereqError("Disk template conversion and other disk"
9445
                                 " changes not supported at the same time",
9446
                                 errors.ECODE_INVAL)
9447

    
9448
    if (self.op.disk_template and
9449
        self.op.disk_template in constants.DTS_INT_MIRROR and
9450
        self.op.remote_node is None):
9451
      raise errors.OpPrereqError("Changing the disk template to a mirrored"
9452
                                 " one requires specifying a secondary node",
9453
                                 errors.ECODE_INVAL)
9454

    
9455
    # Check NIC modifications
9456
    self._CheckMods("NIC", self.op.nics, constants.INIC_PARAMS_TYPES,
9457
                    self._VerifyNicModification)
9458

    
9459
    if self.op.pnode:
9460
      self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
9461

    
9462
  def ExpandNames(self):
9463
    self._ExpandAndLockInstance()
9464
    self.needed_locks[locking.LEVEL_NODEGROUP] = []
9465
    # Can't even acquire node locks in shared mode as upcoming changes in
9466
    # Ganeti 2.6 will start to modify the node object on disk conversion
9467
    self.needed_locks[locking.LEVEL_NODE] = []
9468
    self.needed_locks[locking.LEVEL_NODE_RES] = []
9469
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9470
    # Look node group to look up the ipolicy
9471
    self.share_locks[locking.LEVEL_NODEGROUP] = 1
9472

    
9473
  def DeclareLocks(self, level):
9474
    if level == locking.LEVEL_NODEGROUP:
9475
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
9476
      # Acquire locks for the instance's nodegroups optimistically. Needs
9477
      # to be verified in CheckPrereq
9478
      self.needed_locks[locking.LEVEL_NODEGROUP] = \
9479
        self.cfg.GetInstanceNodeGroups(self.op.instance_name)
9480
    elif level == locking.LEVEL_NODE:
9481
      self._LockInstancesNodes()
9482
      if self.op.disk_template and self.op.remote_node:
9483
        self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9484
        self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
9485
    elif level == locking.LEVEL_NODE_RES and self.op.disk_template:
9486
      # Copy node locks
9487
      self.needed_locks[locking.LEVEL_NODE_RES] = \
9488
        _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
9489

    
9490
  def BuildHooksEnv(self):
9491
    """Build hooks env.
9492

9493
    This runs on the master, primary and secondaries.
9494

9495
    """
9496
    args = {}
9497
    if constants.BE_MINMEM in self.be_new:
9498
      args["minmem"] = self.be_new[constants.BE_MINMEM]
9499
    if constants.BE_MAXMEM in self.be_new:
9500
      args["maxmem"] = self.be_new[constants.BE_MAXMEM]
9501
    if constants.BE_VCPUS in self.be_new:
9502
      args["vcpus"] = self.be_new[constants.BE_VCPUS]
9503
    # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
9504
    # information at all.
9505

    
9506
    if self._new_nics is not None:
9507
      nics = []
9508

    
9509
      for nic in self._new_nics:
9510
        n = copy.deepcopy(nic)
9511
        nicparams = self.cluster.SimpleFillNIC(n.nicparams)
9512
        n.nicparams = nicparams
9513
        nics.append(_NICToTuple(self, n))
9514

    
9515
      args["nics"] = nics
9516

    
9517
    env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
9518
    if self.op.disk_template:
9519
      env["NEW_DISK_TEMPLATE"] = self.op.disk_template
9520
    if self.op.runtime_mem:
9521
      env["RUNTIME_MEMORY"] = self.op.runtime_mem
9522

    
9523
    return env
9524

    
9525
  def BuildHooksNodes(self):
9526
    """Build hooks nodes.
9527

9528
    """
9529
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
9530
    return (nl, nl)
9531

    
9532
  def _PrepareNicModification(self, params, private, old_ip, old_net_uuid,
9533
                              old_params, cluster, pnode):
9534

    
9535
    update_params_dict = dict([(key, params[key])
9536
                               for key in constants.NICS_PARAMETERS
9537
                               if key in params])
9538

    
9539
    req_link = update_params_dict.get(constants.NIC_LINK, None)
9540
    req_mode = update_params_dict.get(constants.NIC_MODE, None)
9541

    
9542
    new_net_uuid = None
9543
    new_net_uuid_or_name = params.get(constants.INIC_NETWORK, old_net_uuid)
9544
    if new_net_uuid_or_name:
9545
      new_net_uuid = self.cfg.LookupNetwork(new_net_uuid_or_name)
9546
      new_net_obj = self.cfg.GetNetwork(new_net_uuid)
9547

    
9548
    if old_net_uuid:
9549
      old_net_obj = self.cfg.GetNetwork(old_net_uuid)
9550

    
9551
    if new_net_uuid:
9552
      netparams = self.cfg.GetGroupNetParams(new_net_uuid, pnode)
9553
      if not netparams:
9554
        raise errors.OpPrereqError("No netparams found for the network"
9555
                                   " %s, probably not connected" %
9556
                                   new_net_obj.name, errors.ECODE_INVAL)
9557
      new_params = dict(netparams)
9558
    else:
9559
      new_params = _GetUpdatedParams(old_params, update_params_dict)
9560

    
9561
    utils.ForceDictType(new_params, constants.NICS_PARAMETER_TYPES)
9562

    
9563
    new_filled_params = cluster.SimpleFillNIC(new_params)
9564
    objects.NIC.CheckParameterSyntax(new_filled_params)
9565

    
9566
    new_mode = new_filled_params[constants.NIC_MODE]
9567
    if new_mode == constants.NIC_MODE_BRIDGED:
9568
      bridge = new_filled_params[constants.NIC_LINK]
9569
      msg = self.rpc.call_bridges_exist(pnode, [bridge]).fail_msg
9570
      if msg:
9571
        msg = "Error checking bridges on node '%s': %s" % (pnode, msg)
9572
        if self.op.force:
9573
          self.warn.append(msg)
9574
        else:
9575
          raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
9576

    
9577
    elif new_mode == constants.NIC_MODE_ROUTED:
9578
      ip = params.get(constants.INIC_IP, old_ip)
9579
      if ip is None:
9580
        raise errors.OpPrereqError("Cannot set the NIC IP address to None"
9581
                                   " on a routed NIC", errors.ECODE_INVAL)
9582

    
9583
    elif new_mode == constants.NIC_MODE_OVS:
9584
      # TODO: check OVS link
9585
      self.LogInfo("OVS links are currently not checked for correctness")
9586

    
9587
    if constants.INIC_MAC in params:
9588
      mac = params[constants.INIC_MAC]
9589
      if mac is None:
9590
        raise errors.OpPrereqError("Cannot unset the NIC MAC address",
9591
                                   errors.ECODE_INVAL)
9592
      elif mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9593
        # otherwise generate the MAC address
9594
        params[constants.INIC_MAC] = \
9595
          self.cfg.GenerateMAC(new_net_uuid, self.proc.GetECId())
9596
      else:
9597
        # or validate/reserve the current one
9598
        try:
9599
          self.cfg.ReserveMAC(mac, self.proc.GetECId())
9600
        except errors.ReservationError:
9601
          raise errors.OpPrereqError("MAC address '%s' already in use"
9602
                                     " in cluster" % mac,
9603
                                     errors.ECODE_NOTUNIQUE)
9604
    elif new_net_uuid != old_net_uuid:
9605

    
9606
      def get_net_prefix(net_uuid):
9607
        mac_prefix = None
9608
        if net_uuid:
9609
          nobj = self.cfg.GetNetwork(net_uuid)
9610
          mac_prefix = nobj.mac_prefix
9611

    
9612
        return mac_prefix
9613

    
9614
      new_prefix = get_net_prefix(new_net_uuid)
9615
      old_prefix = get_net_prefix(old_net_uuid)
9616
      if old_prefix != new_prefix:
9617
        params[constants.INIC_MAC] = \
9618
          self.cfg.GenerateMAC(new_net_uuid, self.proc.GetECId())
9619

    
9620
    # if there is a change in (ip, network) tuple
9621
    new_ip = params.get(constants.INIC_IP, old_ip)
9622
    if (new_ip, new_net_uuid) != (old_ip, old_net_uuid):
9623
      if new_ip:
9624
        # if IP is pool then require a network and generate one IP
9625
        if new_ip.lower() == constants.NIC_IP_POOL:
9626
          if new_net_uuid:
9627
            try:
9628
              new_ip = self.cfg.GenerateIp(new_net_uuid, self.proc.GetECId())
9629
            except errors.ReservationError:
9630
              raise errors.OpPrereqError("Unable to get a free IP"
9631
                                         " from the address pool",
9632
                                         errors.ECODE_STATE)
9633
            self.LogInfo("Chose IP %s from network %s",
9634
                         new_ip,
9635
                         new_net_obj.name)
9636
            params[constants.INIC_IP] = new_ip
9637
          else:
9638
            raise errors.OpPrereqError("ip=pool, but no network found",
9639
                                       errors.ECODE_INVAL)
9640
        # Reserve new IP if in the new network if any
9641
        elif new_net_uuid:
9642
          try:
9643
            self.cfg.ReserveIp(new_net_uuid, new_ip, self.proc.GetECId())
9644
            self.LogInfo("Reserving IP %s in network %s",
9645
                         new_ip, new_net_obj.name)
9646
          except errors.ReservationError:
9647
            raise errors.OpPrereqError("IP %s not available in network %s" %
9648
                                       (new_ip, new_net_obj.name),
9649
                                       errors.ECODE_NOTUNIQUE)
9650
        # new network is None so check if new IP is a conflicting IP
9651
        elif self.op.conflicts_check:
9652
          _CheckForConflictingIp(self, new_ip, pnode)
9653

    
9654
      # release old IP if old network is not None
9655
      if old_ip and old_net_uuid:
9656
        try:
9657
          self.cfg.ReleaseIp(old_net_uuid, old_ip, self.proc.GetECId())
9658
        except errors.AddressPoolError:
9659
          logging.warning("Release IP %s not contained in network %s",
9660
                          old_ip, old_net_obj.name)
9661

    
9662
    # there are no changes in (ip, network) tuple and old network is not None
9663
    elif (old_net_uuid is not None and
9664
          (req_link is not None or req_mode is not None)):
9665
      raise errors.OpPrereqError("Not allowed to change link or mode of"
9666
                                 " a NIC that is connected to a network",
9667
                                 errors.ECODE_INVAL)
9668

    
9669
    private.params = new_params
9670
    private.filled = new_filled_params
9671

    
9672
  def _PreCheckDiskTemplate(self, pnode_info):
9673
    """CheckPrereq checks related to a new disk template."""
9674
    # Arguments are passed to avoid configuration lookups
9675
    instance = self.instance
9676
    pnode = instance.primary_node
9677
    cluster = self.cluster
9678
    if instance.disk_template == self.op.disk_template:
9679
      raise errors.OpPrereqError("Instance already has disk template %s" %
9680
                                 instance.disk_template, errors.ECODE_INVAL)
9681

    
9682
    if (instance.disk_template,
9683
        self.op.disk_template) not in self._DISK_CONVERSIONS:
9684
      raise errors.OpPrereqError("Unsupported disk template conversion from"
9685
                                 " %s to %s" % (instance.disk_template,
9686
                                                self.op.disk_template),
9687
                                 errors.ECODE_INVAL)
9688
    _CheckInstanceState(self, instance, INSTANCE_DOWN,
9689
                        msg="cannot change disk template")
9690
    if self.op.disk_template in constants.DTS_INT_MIRROR:
9691
      if self.op.remote_node == pnode:
9692
        raise errors.OpPrereqError("Given new secondary node %s is the same"
9693
                                   " as the primary node of the instance" %
9694
                                   self.op.remote_node, errors.ECODE_STATE)
9695
      _CheckNodeOnline(self, self.op.remote_node)
9696
      _CheckNodeNotDrained(self, self.op.remote_node)
9697
      # FIXME: here we assume that the old instance type is DT_PLAIN
9698
      assert instance.disk_template == constants.DT_PLAIN
9699
      disks = [{constants.IDISK_SIZE: d.size,
9700
                constants.IDISK_VG: d.logical_id[0]}
9701
               for d in instance.disks]
9702
      required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
9703
      _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
9704

    
9705
      snode_info = self.cfg.GetNodeInfo(self.op.remote_node)
9706
      snode_group = self.cfg.GetNodeGroup(snode_info.group)
9707
      ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
9708
                                                              snode_group)
9709
      _CheckTargetNodeIPolicy(self, ipolicy, instance, snode_info, self.cfg,
9710
                              ignore=self.op.ignore_ipolicy)
9711
      if pnode_info.group != snode_info.group:
9712
        self.LogWarning("The primary and secondary nodes are in two"
9713
                        " different node groups; the disk parameters"
9714
                        " from the first disk's node group will be"
9715
                        " used")
9716

    
9717
    if not self.op.disk_template in constants.DTS_EXCL_STORAGE:
9718
      # Make sure none of the nodes require exclusive storage
9719
      nodes = [pnode_info]
9720
      if self.op.disk_template in constants.DTS_INT_MIRROR:
9721
        assert snode_info
9722
        nodes.append(snode_info)
9723
      has_es = lambda n: _IsExclusiveStorageEnabledNode(self.cfg, n)
9724
      if compat.any(map(has_es, nodes)):
9725
        errmsg = ("Cannot convert disk template from %s to %s when exclusive"
9726
                  " storage is enabled" % (instance.disk_template,
9727
                                           self.op.disk_template))
9728
        raise errors.OpPrereqError(errmsg, errors.ECODE_STATE)
9729

    
9730
  def CheckPrereq(self):
9731
    """Check prerequisites.
9732

9733
    This only checks the instance list against the existing names.
9734

9735
    """
9736
    assert self.op.instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
9737
    instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9738

    
9739
    cluster = self.cluster = self.cfg.GetClusterInfo()
9740
    assert self.instance is not None, \
9741
      "Cannot retrieve locked instance %s" % self.op.instance_name
9742

    
9743
    pnode = instance.primary_node
9744

    
9745
    self.warn = []
9746

    
9747
    if (self.op.pnode is not None and self.op.pnode != pnode and
9748
        not self.op.force):
9749
      # verify that the instance is not up
9750
      instance_info = self.rpc.call_instance_info(pnode, instance.name,
9751
                                                  instance.hypervisor)
9752
      if instance_info.fail_msg:
9753
        self.warn.append("Can't get instance runtime information: %s" %
9754
                         instance_info.fail_msg)
9755
      elif instance_info.payload:
9756
        raise errors.OpPrereqError("Instance is still running on %s" % pnode,
9757
                                   errors.ECODE_STATE)
9758

    
9759
    assert pnode in self.owned_locks(locking.LEVEL_NODE)
9760
    nodelist = list(instance.all_nodes)
9761
    pnode_info = self.cfg.GetNodeInfo(pnode)
9762
    self.diskparams = self.cfg.GetInstanceDiskParams(instance)
9763

    
9764
    #_CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
9765
    assert pnode_info.group in self.owned_locks(locking.LEVEL_NODEGROUP)
9766
    group_info = self.cfg.GetNodeGroup(pnode_info.group)
9767

    
9768
    # dictionary with instance information after the modification
9769
    ispec = {}
9770

    
9771
    # Check disk modifications. This is done here and not in CheckArguments
9772
    # (as with NICs), because we need to know the instance's disk template
9773
    if instance.disk_template == constants.DT_EXT:
9774
      self._CheckMods("disk", self.op.disks, {},
9775
                      self._VerifyDiskModification)
9776
    else:
9777
      self._CheckMods("disk", self.op.disks, constants.IDISK_PARAMS_TYPES,
9778
                      self._VerifyDiskModification)
9779

    
9780
    # Prepare disk/NIC modifications
9781
    self.diskmod = PrepareContainerMods(self.op.disks, None)
9782
    self.nicmod = PrepareContainerMods(self.op.nics, _InstNicModPrivate)
9783

    
9784
    # Check the validity of the `provider' parameter
9785
    if instance.disk_template in constants.DT_EXT:
9786
      for mod in self.diskmod:
9787
        ext_provider = mod[2].get(constants.IDISK_PROVIDER, None)
9788
        if mod[0] == constants.DDM_ADD:
9789
          if ext_provider is None:
9790
            raise errors.OpPrereqError("Instance template is '%s' and parameter"
9791
                                       " '%s' missing, during disk add" %
9792
                                       (constants.DT_EXT,
9793
                                        constants.IDISK_PROVIDER),
9794
                                       errors.ECODE_NOENT)
9795
        elif mod[0] == constants.DDM_MODIFY:
9796
          if ext_provider:
9797
            raise errors.OpPrereqError("Parameter '%s' is invalid during disk"
9798
                                       " modification" %
9799
                                       constants.IDISK_PROVIDER,
9800
                                       errors.ECODE_INVAL)
9801
    else:
9802
      for mod in self.diskmod:
9803
        ext_provider = mod[2].get(constants.IDISK_PROVIDER, None)
9804
        if ext_provider is not None:
9805
          raise errors.OpPrereqError("Parameter '%s' is only valid for"
9806
                                     " instances of type '%s'" %
9807
                                     (constants.IDISK_PROVIDER,
9808
                                      constants.DT_EXT),
9809
                                     errors.ECODE_INVAL)
9810

    
9811
    # OS change
9812
    if self.op.os_name and not self.op.force:
9813
      _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
9814
                      self.op.force_variant)
9815
      instance_os = self.op.os_name
9816
    else:
9817
      instance_os = instance.os
9818

    
9819
    assert not (self.op.disk_template and self.op.disks), \
9820
      "Can't modify disk template and apply disk changes at the same time"
9821

    
9822
    if self.op.disk_template:
9823
      self._PreCheckDiskTemplate(pnode_info)
9824

    
9825
    # hvparams processing
9826
    if self.op.hvparams:
9827
      hv_type = instance.hypervisor
9828
      i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
9829
      utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
9830
      hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
9831

    
9832
      # local check
9833
      hypervisor.GetHypervisorClass(hv_type).CheckParameterSyntax(hv_new)
9834
      _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
9835
      self.hv_proposed = self.hv_new = hv_new # the new actual values
9836
      self.hv_inst = i_hvdict # the new dict (without defaults)
9837
    else:
9838
      self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
9839
                                              instance.hvparams)
9840
      self.hv_new = self.hv_inst = {}
9841

    
9842
    # beparams processing
9843
    if self.op.beparams:
9844
      i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
9845
                                   use_none=True)
9846
      objects.UpgradeBeParams(i_bedict)
9847
      utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
9848
      be_new = cluster.SimpleFillBE(i_bedict)
9849
      self.be_proposed = self.be_new = be_new # the new actual values
9850
      self.be_inst = i_bedict # the new dict (without defaults)
9851
    else:
9852
      self.be_new = self.be_inst = {}
9853
      self.be_proposed = cluster.SimpleFillBE(instance.beparams)
9854
    be_old = cluster.FillBE(instance)
9855

    
9856
    # CPU param validation -- checking every time a parameter is
9857
    # changed to cover all cases where either CPU mask or vcpus have
9858
    # changed
9859
    if (constants.BE_VCPUS in self.be_proposed and
9860
        constants.HV_CPU_MASK in self.hv_proposed):
9861
      cpu_list = \
9862
        utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
9863
      # Verify mask is consistent with number of vCPUs. Can skip this
9864
      # test if only 1 entry in the CPU mask, which means same mask
9865
      # is applied to all vCPUs.
9866
      if (len(cpu_list) > 1 and
9867
          len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
9868
        raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
9869
                                   " CPU mask [%s]" %
9870
                                   (self.be_proposed[constants.BE_VCPUS],
9871
                                    self.hv_proposed[constants.HV_CPU_MASK]),
9872
                                   errors.ECODE_INVAL)
9873

    
9874
      # Only perform this test if a new CPU mask is given
9875
      if constants.HV_CPU_MASK in self.hv_new:
9876
        # Calculate the largest CPU number requested
9877
        max_requested_cpu = max(map(max, cpu_list))
9878
        # Check that all of the instance's nodes have enough physical CPUs to
9879
        # satisfy the requested CPU mask
9880
        _CheckNodesPhysicalCPUs(self, instance.all_nodes,
9881
                                max_requested_cpu + 1, instance.hypervisor)
9882

    
9883
    # osparams processing
9884
    if self.op.osparams:
9885
      i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
9886
      _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
9887
      self.os_inst = i_osdict # the new dict (without defaults)
9888
    else:
9889
      self.os_inst = {}
9890

    
9891
    #TODO(dynmem): do the appropriate check involving MINMEM
9892
    if (constants.BE_MAXMEM in self.op.beparams and not self.op.force and
9893
        be_new[constants.BE_MAXMEM] > be_old[constants.BE_MAXMEM]):
9894
      mem_check_list = [pnode]
9895
      if be_new[constants.BE_AUTO_BALANCE]:
9896
        # either we changed auto_balance to yes or it was from before
9897
        mem_check_list.extend(instance.secondary_nodes)
9898
      instance_info = self.rpc.call_instance_info(pnode, instance.name,
9899
                                                  instance.hypervisor)
9900
      nodeinfo = self.rpc.call_node_info(mem_check_list, None,
9901
                                         [instance.hypervisor], False)
9902
      pninfo = nodeinfo[pnode]
9903
      msg = pninfo.fail_msg
9904
      if msg:
9905
        # Assume the primary node is unreachable and go ahead
9906
        self.warn.append("Can't get info from primary node %s: %s" %
9907
                         (pnode, msg))
9908
      else:
9909
        (_, _, (pnhvinfo, )) = pninfo.payload
9910
        if not isinstance(pnhvinfo.get("memory_free", None), int):
9911
          self.warn.append("Node data from primary node %s doesn't contain"
9912
                           " free memory information" % pnode)
9913
        elif instance_info.fail_msg:
9914
          self.warn.append("Can't get instance runtime information: %s" %
9915
                           instance_info.fail_msg)
9916
        else:
9917
          if instance_info.payload:
9918
            current_mem = int(instance_info.payload["memory"])
9919
          else:
9920
            # Assume instance not running
9921
            # (there is a slight race condition here, but it's not very
9922
            # probable, and we have no other way to check)
9923
            # TODO: Describe race condition
9924
            current_mem = 0
9925
          #TODO(dynmem): do the appropriate check involving MINMEM
9926
          miss_mem = (be_new[constants.BE_MAXMEM] - current_mem -
9927
                      pnhvinfo["memory_free"])
9928
          if miss_mem > 0:
9929
            raise errors.OpPrereqError("This change will prevent the instance"
9930
                                       " from starting, due to %d MB of memory"
9931
                                       " missing on its primary node" %
9932
                                       miss_mem, errors.ECODE_NORES)
9933

    
9934
      if be_new[constants.BE_AUTO_BALANCE]:
9935
        for node, nres in nodeinfo.items():
9936
          if node not in instance.secondary_nodes:
9937
            continue
9938
          nres.Raise("Can't get info from secondary node %s" % node,
9939
                     prereq=True, ecode=errors.ECODE_STATE)
9940
          (_, _, (nhvinfo, )) = nres.payload
9941
          if not isinstance(nhvinfo.get("memory_free", None), int):
9942
            raise errors.OpPrereqError("Secondary node %s didn't return free"
9943
                                       " memory information" % node,
9944
                                       errors.ECODE_STATE)
9945
          #TODO(dynmem): do the appropriate check involving MINMEM
9946
          elif be_new[constants.BE_MAXMEM] > nhvinfo["memory_free"]:
9947
            raise errors.OpPrereqError("This change will prevent the instance"
9948
                                       " from failover to its secondary node"
9949
                                       " %s, due to not enough memory" % node,
9950
                                       errors.ECODE_STATE)
9951

    
9952
    if self.op.runtime_mem:
9953
      remote_info = self.rpc.call_instance_info(instance.primary_node,
9954
                                                instance.name,
9955
                                                instance.hypervisor)
9956
      remote_info.Raise("Error checking node %s" % instance.primary_node)
9957
      if not remote_info.payload: # not running already
9958
        raise errors.OpPrereqError("Instance %s is not running" %
9959
                                   instance.name, errors.ECODE_STATE)
9960

    
9961
      current_memory = remote_info.payload["memory"]
9962
      if (not self.op.force and
9963
           (self.op.runtime_mem > self.be_proposed[constants.BE_MAXMEM] or
9964
            self.op.runtime_mem < self.be_proposed[constants.BE_MINMEM])):
9965
        raise errors.OpPrereqError("Instance %s must have memory between %d"
9966
                                   " and %d MB of memory unless --force is"
9967
                                   " given" %
9968
                                   (instance.name,
9969
                                    self.be_proposed[constants.BE_MINMEM],
9970
                                    self.be_proposed[constants.BE_MAXMEM]),
9971
                                   errors.ECODE_INVAL)
9972

    
9973
      delta = self.op.runtime_mem - current_memory
9974
      if delta > 0:
9975
        _CheckNodeFreeMemory(self, instance.primary_node,
9976
                             "ballooning memory for instance %s" %
9977
                             instance.name, delta, instance.hypervisor)
9978

    
9979
    if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
9980
      raise errors.OpPrereqError("Disk operations not supported for"
9981
                                 " diskless instances", errors.ECODE_INVAL)
9982

    
9983
    def _PrepareNicCreate(_, params, private):
9984
      self._PrepareNicModification(params, private, None, None,
9985
                                   {}, cluster, pnode)
9986
      return (None, None)
9987

    
9988
    def _PrepareNicMod(_, nic, params, private):
9989
      self._PrepareNicModification(params, private, nic.ip, nic.network,
9990
                                   nic.nicparams, cluster, pnode)
9991
      return None
9992

    
9993
    def _PrepareNicRemove(_, params, __):
9994
      ip = params.ip
9995
      net = params.network
9996
      if net is not None and ip is not None:
9997
        self.cfg.ReleaseIp(net, ip, self.proc.GetECId())
9998

    
9999
    # Verify NIC changes (operating on copy)
10000
    nics = instance.nics[:]
10001
    ApplyContainerMods("NIC", nics, None, self.nicmod,
10002
                       _PrepareNicCreate, _PrepareNicMod, _PrepareNicRemove)
10003
    if len(nics) > constants.MAX_NICS:
10004
      raise errors.OpPrereqError("Instance has too many network interfaces"
10005
                                 " (%d), cannot add more" % constants.MAX_NICS,
10006
                                 errors.ECODE_STATE)
10007

    
10008
    def _PrepareDiskMod(_, disk, params, __):
10009
      disk.name = params.get(constants.IDISK_NAME, None)
10010

    
10011
    # Verify disk changes (operating on a copy)
10012
    disks = copy.deepcopy(instance.disks)
10013
    ApplyContainerMods("disk", disks, None, self.diskmod, None, _PrepareDiskMod,
10014
                       None)
10015
    utils.ValidateDeviceNames("disk", disks)
10016
    if len(disks) > constants.MAX_DISKS:
10017
      raise errors.OpPrereqError("Instance has too many disks (%d), cannot add"
10018
                                 " more" % constants.MAX_DISKS,
10019
                                 errors.ECODE_STATE)
10020
    disk_sizes = [disk.size for disk in instance.disks]
10021
    disk_sizes.extend(params["size"] for (op, idx, params, private) in
10022
                      self.diskmod if op == constants.DDM_ADD)
10023
    ispec[constants.ISPEC_DISK_COUNT] = len(disk_sizes)
10024
    ispec[constants.ISPEC_DISK_SIZE] = disk_sizes
10025

    
10026
    if self.op.offline is not None and self.op.offline:
10027
      _CheckInstanceState(self, instance, CAN_CHANGE_INSTANCE_OFFLINE,
10028
                          msg="can't change to offline")
10029

    
10030
    # Pre-compute NIC changes (necessary to use result in hooks)
10031
    self._nic_chgdesc = []
10032
    if self.nicmod:
10033
      # Operate on copies as this is still in prereq
10034
      nics = [nic.Copy() for nic in instance.nics]
10035
      ApplyContainerMods("NIC", nics, self._nic_chgdesc, self.nicmod,
10036
                         self._CreateNewNic, self._ApplyNicMods, None)
10037
      # Verify that NIC names are unique and valid
10038
      utils.ValidateDeviceNames("NIC", nics)
10039
      self._new_nics = nics
10040
      ispec[constants.ISPEC_NIC_COUNT] = len(self._new_nics)
10041
    else:
10042
      self._new_nics = None
10043
      ispec[constants.ISPEC_NIC_COUNT] = len(instance.nics)
10044

    
10045
    if not self.op.ignore_ipolicy:
10046
      ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
10047
                                                              group_info)
10048

    
10049
      # Fill ispec with backend parameters
10050
      ispec[constants.ISPEC_SPINDLE_USE] = \
10051
        self.be_new.get(constants.BE_SPINDLE_USE, None)
10052
      ispec[constants.ISPEC_CPU_COUNT] = self.be_new.get(constants.BE_VCPUS,
10053
                                                         None)
10054

    
10055
      # Copy ispec to verify parameters with min/max values separately
10056
      if self.op.disk_template:
10057
        new_disk_template = self.op.disk_template
10058
      else:
10059
        new_disk_template = instance.disk_template
10060
      ispec_max = ispec.copy()
10061
      ispec_max[constants.ISPEC_MEM_SIZE] = \
10062
        self.be_new.get(constants.BE_MAXMEM, None)
10063
      res_max = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec_max,
10064
                                                     new_disk_template)
10065
      ispec_min = ispec.copy()
10066
      ispec_min[constants.ISPEC_MEM_SIZE] = \
10067
        self.be_new.get(constants.BE_MINMEM, None)
10068
      res_min = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec_min,
10069
                                                     new_disk_template)
10070

    
10071
      if (res_max or res_min):
10072
        # FIXME: Improve error message by including information about whether
10073
        # the upper or lower limit of the parameter fails the ipolicy.
10074
        msg = ("Instance allocation to group %s (%s) violates policy: %s" %
10075
               (group_info, group_info.name,
10076
                utils.CommaJoin(set(res_max + res_min))))
10077
        raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10078

    
10079
  def _ConvertPlainToDrbd(self, feedback_fn):
10080
    """Converts an instance from plain to drbd.
10081

10082
    """
10083
    feedback_fn("Converting template to drbd")
10084
    instance = self.instance
10085
    pnode = instance.primary_node
10086
    snode = self.op.remote_node
10087

    
10088
    assert instance.disk_template == constants.DT_PLAIN
10089

    
10090
    # create a fake disk info for _GenerateDiskTemplate
10091
    disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
10092
                  constants.IDISK_VG: d.logical_id[0],
10093
                  constants.IDISK_NAME: d.name}
10094
                 for d in instance.disks]
10095
    new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
10096
                                      instance.name, pnode, [snode],
10097
                                      disk_info, None, None, 0, feedback_fn,
10098
                                      self.diskparams)
10099
    anno_disks = rpc.AnnotateDiskParams(constants.DT_DRBD8, new_disks,
10100
                                        self.diskparams)
10101
    p_excl_stor = _IsExclusiveStorageEnabledNodeName(self.cfg, pnode)
10102
    s_excl_stor = _IsExclusiveStorageEnabledNodeName(self.cfg, snode)
10103
    info = _GetInstanceInfoText(instance)
10104
    feedback_fn("Creating additional volumes...")
10105
    # first, create the missing data and meta devices
10106
    for disk in anno_disks:
10107
      # unfortunately this is... not too nice
10108
      _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
10109
                            info, True, p_excl_stor)
10110
      for child in disk.children:
10111
        _CreateSingleBlockDev(self, snode, instance, child, info, True,
10112
                              s_excl_stor)
10113
    # at this stage, all new LVs have been created, we can rename the
10114
    # old ones
10115
    feedback_fn("Renaming original volumes...")
10116
    rename_list = [(o, n.children[0].logical_id)
10117
                   for (o, n) in zip(instance.disks, new_disks)]
10118
    result = self.rpc.call_blockdev_rename(pnode, rename_list)
10119
    result.Raise("Failed to rename original LVs")
10120

    
10121
    feedback_fn("Initializing DRBD devices...")
10122
    # all child devices are in place, we can now create the DRBD devices
10123
    try:
10124
      for disk in anno_disks:
10125
        for (node, excl_stor) in [(pnode, p_excl_stor), (snode, s_excl_stor)]:
10126
          f_create = node == pnode
10127
          _CreateSingleBlockDev(self, node, instance, disk, info, f_create,
10128
                                excl_stor)
10129
    except errors.GenericError, e:
10130
      feedback_fn("Initializing of DRBD devices failed;"
10131
                  " renaming back original volumes...")
10132
      for disk in new_disks:
10133
        self.cfg.SetDiskID(disk, pnode)
10134
      rename_back_list = [(n.children[0], o.logical_id)
10135
                          for (n, o) in zip(new_disks, instance.disks)]
10136
      result = self.rpc.call_blockdev_rename(pnode, rename_back_list)
10137
      result.Raise("Failed to rename LVs back after error %s" % str(e))
10138
      raise
10139

    
10140
    # at this point, the instance has been modified
10141
    instance.disk_template = constants.DT_DRBD8
10142
    instance.disks = new_disks
10143
    self.cfg.Update(instance, feedback_fn)
10144

    
10145
    # Release node locks while waiting for sync
10146
    _ReleaseLocks(self, locking.LEVEL_NODE)
10147

    
10148
    # disks are created, waiting for sync
10149
    disk_abort = not _WaitForSync(self, instance,
10150
                                  oneshot=not self.op.wait_for_sync)
10151
    if disk_abort:
10152
      raise errors.OpExecError("There are some degraded disks for"
10153
                               " this instance, please cleanup manually")
10154

    
10155
    # Node resource locks will be released by caller
10156

    
10157
  def _ConvertDrbdToPlain(self, feedback_fn):
10158
    """Converts an instance from drbd to plain.
10159

10160
    """
10161
    instance = self.instance
10162

    
10163
    assert len(instance.secondary_nodes) == 1
10164
    assert instance.disk_template == constants.DT_DRBD8
10165

    
10166
    pnode = instance.primary_node
10167
    snode = instance.secondary_nodes[0]
10168
    feedback_fn("Converting template to plain")
10169

    
10170
    old_disks = _AnnotateDiskParams(instance, instance.disks, self.cfg)
10171
    new_disks = [d.children[0] for d in instance.disks]
10172

    
10173
    # copy over size, mode and name
10174
    for parent, child in zip(old_disks, new_disks):
10175
      child.size = parent.size
10176
      child.mode = parent.mode
10177
      child.name = parent.name
10178

    
10179
    # this is a DRBD disk, return its port to the pool
10180
    # NOTE: this must be done right before the call to cfg.Update!
10181
    for disk in old_disks:
10182
      tcp_port = disk.logical_id[2]
10183
      self.cfg.AddTcpUdpPort(tcp_port)
10184

    
10185
    # update instance structure
10186
    instance.disks = new_disks
10187
    instance.disk_template = constants.DT_PLAIN
10188
    _UpdateIvNames(0, instance.disks)
10189
    self.cfg.Update(instance, feedback_fn)
10190

    
10191
    # Release locks in case removing disks takes a while
10192
    _ReleaseLocks(self, locking.LEVEL_NODE)
10193

    
10194
    feedback_fn("Removing volumes on the secondary node...")
10195
    for disk in old_disks:
10196
      self.cfg.SetDiskID(disk, snode)
10197
      msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
10198
      if msg:
10199
        self.LogWarning("Could not remove block device %s on node %s,"
10200
                        " continuing anyway: %s", disk.iv_name, snode, msg)
10201

    
10202
    feedback_fn("Removing unneeded volumes on the primary node...")
10203
    for idx, disk in enumerate(old_disks):
10204
      meta = disk.children[1]
10205
      self.cfg.SetDiskID(meta, pnode)
10206
      msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
10207
      if msg:
10208
        self.LogWarning("Could not remove metadata for disk %d on node %s,"
10209
                        " continuing anyway: %s", idx, pnode, msg)
10210

    
10211
  def _CreateNewDisk(self, idx, params, _):
10212
    """Creates a new disk.
10213

10214
    """
10215
    instance = self.instance
10216

    
10217
    # add a new disk
10218
    if instance.disk_template in constants.DTS_FILEBASED:
10219
      (file_driver, file_path) = instance.disks[0].logical_id
10220
      file_path = os.path.dirname(file_path)
10221
    else:
10222
      file_driver = file_path = None
10223

    
10224
    disk = \
10225
      _GenerateDiskTemplate(self, instance.disk_template, instance.name,
10226
                            instance.primary_node, instance.secondary_nodes,
10227
                            [params], file_path, file_driver, idx,
10228
                            self.Log, self.diskparams)[0]
10229

    
10230
    info = _GetInstanceInfoText(instance)
10231

    
10232
    logging.info("Creating volume %s for instance %s",
10233
                 disk.iv_name, instance.name)
10234
    # Note: this needs to be kept in sync with _CreateDisks
10235
    #HARDCODE
10236
    for node in instance.all_nodes:
10237
      f_create = (node == instance.primary_node)
10238
      try:
10239
        _CreateBlockDev(self, node, instance, disk, f_create, info, f_create)
10240
      except errors.OpExecError, err:
10241
        self.LogWarning("Failed to create volume %s (%s) on node '%s': %s",
10242
                        disk.iv_name, disk, node, err)
10243

    
10244
    if self.cluster.prealloc_wipe_disks:
10245
      # Wipe new disk
10246
      _WipeDisks(self, instance,
10247
                 disks=[(idx, disk, 0)])
10248

    
10249
    return (disk, [
10250
      ("disk/%d" % idx, "add:size=%s,mode=%s" % (disk.size, disk.mode)),
10251
      ])
10252

    
10253
  @staticmethod
10254
  def _ModifyDisk(idx, disk, params, _):
10255
    """Modifies a disk.
10256

10257
    """
10258
    changes = []
10259
    mode = params.get(constants.IDISK_MODE, None)
10260
    if mode:
10261
      disk.mode = mode
10262
      changes.append(("disk.mode/%d" % idx, disk.mode))
10263

    
10264
    name = params.get(constants.IDISK_NAME, None)
10265
    disk.name = name
10266
    changes.append(("disk.name/%d" % idx, disk.name))
10267

    
10268
    return changes
10269

    
10270
  def _RemoveDisk(self, idx, root, _):
10271
    """Removes a disk.
10272

10273
    """
10274
    (anno_disk,) = _AnnotateDiskParams(self.instance, [root], self.cfg)
10275
    for node, disk in anno_disk.ComputeNodeTree(self.instance.primary_node):
10276
      self.cfg.SetDiskID(disk, node)
10277
      msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
10278
      if msg:
10279
        self.LogWarning("Could not remove disk/%d on node '%s': %s,"
10280
                        " continuing anyway", idx, node, msg)
10281

    
10282
    # if this is a DRBD disk, return its port to the pool
10283
    if root.dev_type in constants.LDS_DRBD:
10284
      self.cfg.AddTcpUdpPort(root.logical_id[2])
10285

    
10286
  def _CreateNewNic(self, idx, params, private):
10287
    """Creates data structure for a new network interface.
10288

10289
    """
10290
    mac = params[constants.INIC_MAC]
10291
    ip = params.get(constants.INIC_IP, None)
10292
    net = params.get(constants.INIC_NETWORK, None)
10293
    name = params.get(constants.INIC_NAME, None)
10294
    net_uuid = self.cfg.LookupNetwork(net)
10295
    #TODO: not private.filled?? can a nic have no nicparams??
10296
    nicparams = private.filled
10297
    nobj = objects.NIC(mac=mac, ip=ip, network=net_uuid, name=name,
10298
                       nicparams=nicparams)
10299
    nobj.uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
10300

    
10301
    return (nobj, [
10302
      ("nic.%d" % idx,
10303
       "add:mac=%s,ip=%s,mode=%s,link=%s,network=%s" %
10304
       (mac, ip, private.filled[constants.NIC_MODE],
10305
       private.filled[constants.NIC_LINK],
10306
       net)),
10307
      ])
10308

    
10309
  def _ApplyNicMods(self, idx, nic, params, private):
10310
    """Modifies a network interface.
10311

10312
    """
10313
    changes = []
10314

    
10315
    for key in [constants.INIC_MAC, constants.INIC_IP, constants.INIC_NAME]:
10316
      if key in params:
10317
        changes.append(("nic.%s/%d" % (key, idx), params[key]))
10318
        setattr(nic, key, params[key])
10319

    
10320
    new_net = params.get(constants.INIC_NETWORK, nic.network)
10321
    new_net_uuid = self.cfg.LookupNetwork(new_net)
10322
    if new_net_uuid != nic.network:
10323
      changes.append(("nic.network/%d" % idx, new_net))
10324
      nic.network = new_net_uuid
10325

    
10326
    if private.filled:
10327
      nic.nicparams = private.filled
10328

    
10329
      for (key, val) in nic.nicparams.items():
10330
        changes.append(("nic.%s/%d" % (key, idx), val))
10331

    
10332
    return changes
10333

    
10334
  def Exec(self, feedback_fn):
10335
    """Modifies an instance.
10336

10337
    All parameters take effect only at the next restart of the instance.
10338

10339
    """
10340
    # Process here the warnings from CheckPrereq, as we don't have a
10341
    # feedback_fn there.
10342
    # TODO: Replace with self.LogWarning
10343
    for warn in self.warn:
10344
      feedback_fn("WARNING: %s" % warn)
10345

    
10346
    assert ((self.op.disk_template is None) ^
10347
            bool(self.owned_locks(locking.LEVEL_NODE_RES))), \
10348
      "Not owning any node resource locks"
10349

    
10350
    result = []
10351
    instance = self.instance
10352

    
10353
    # New primary node
10354
    if self.op.pnode:
10355
      instance.primary_node = self.op.pnode
10356

    
10357
    # runtime memory
10358
    if self.op.runtime_mem:
10359
      rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
10360
                                                     instance,
10361
                                                     self.op.runtime_mem)
10362
      rpcres.Raise("Cannot modify instance runtime memory")
10363
      result.append(("runtime_memory", self.op.runtime_mem))
10364

    
10365
    # Apply disk changes
10366
    ApplyContainerMods("disk", instance.disks, result, self.diskmod,
10367
                       self._CreateNewDisk, self._ModifyDisk, self._RemoveDisk)
10368
    _UpdateIvNames(0, instance.disks)
10369

    
10370
    if self.op.disk_template:
10371
      if __debug__:
10372
        check_nodes = set(instance.all_nodes)
10373
        if self.op.remote_node:
10374
          check_nodes.add(self.op.remote_node)
10375
        for level in [locking.LEVEL_NODE, locking.LEVEL_NODE_RES]:
10376
          owned = self.owned_locks(level)
10377
          assert not (check_nodes - owned), \
10378
            ("Not owning the correct locks, owning %r, expected at least %r" %
10379
             (owned, check_nodes))
10380

    
10381
      r_shut = _ShutdownInstanceDisks(self, instance)
10382
      if not r_shut:
10383
        raise errors.OpExecError("Cannot shutdown instance disks, unable to"
10384
                                 " proceed with disk template conversion")
10385
      mode = (instance.disk_template, self.op.disk_template)
10386
      try:
10387
        self._DISK_CONVERSIONS[mode](self, feedback_fn)
10388
      except:
10389
        self.cfg.ReleaseDRBDMinors(instance.name)
10390
        raise
10391
      result.append(("disk_template", self.op.disk_template))
10392

    
10393
      assert instance.disk_template == self.op.disk_template, \
10394
        ("Expected disk template '%s', found '%s'" %
10395
         (self.op.disk_template, instance.disk_template))
10396

    
10397
    # Release node and resource locks if there are any (they might already have
10398
    # been released during disk conversion)
10399
    _ReleaseLocks(self, locking.LEVEL_NODE)
10400
    _ReleaseLocks(self, locking.LEVEL_NODE_RES)
10401

    
10402
    # Apply NIC changes
10403
    if self._new_nics is not None:
10404
      instance.nics = self._new_nics
10405
      result.extend(self._nic_chgdesc)
10406

    
10407
    # hvparams changes
10408
    if self.op.hvparams:
10409
      instance.hvparams = self.hv_inst
10410
      for key, val in self.op.hvparams.iteritems():
10411
        result.append(("hv/%s" % key, val))
10412

    
10413
    # beparams changes
10414
    if self.op.beparams:
10415
      instance.beparams = self.be_inst
10416
      for key, val in self.op.beparams.iteritems():
10417
        result.append(("be/%s" % key, val))
10418

    
10419
    # OS change
10420
    if self.op.os_name:
10421
      instance.os = self.op.os_name
10422

    
10423
    # osparams changes
10424
    if self.op.osparams:
10425
      instance.osparams = self.os_inst
10426
      for key, val in self.op.osparams.iteritems():
10427
        result.append(("os/%s" % key, val))
10428

    
10429
    if self.op.offline is None:
10430
      # Ignore
10431
      pass
10432
    elif self.op.offline:
10433
      # Mark instance as offline
10434
      self.cfg.MarkInstanceOffline(instance.name)
10435
      result.append(("admin_state", constants.ADMINST_OFFLINE))
10436
    else:
10437
      # Mark instance as online, but stopped
10438
      self.cfg.MarkInstanceDown(instance.name)
10439
      result.append(("admin_state", constants.ADMINST_DOWN))
10440

    
10441
    self.cfg.Update(instance, feedback_fn, self.proc.GetECId())
10442

    
10443
    assert not (self.owned_locks(locking.LEVEL_NODE_RES) or
10444
                self.owned_locks(locking.LEVEL_NODE)), \
10445
      "All node locks should have been released by now"
10446

    
10447
    return result
10448

    
10449
  _DISK_CONVERSIONS = {
10450
    (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
10451
    (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
10452
    }
10453

    
10454

    
10455
class LUInstanceChangeGroup(LogicalUnit):
10456
  HPATH = "instance-change-group"
10457
  HTYPE = constants.HTYPE_INSTANCE
10458
  REQ_BGL = False
10459

    
10460
  def ExpandNames(self):
10461
    self.share_locks = _ShareAll()
10462

    
10463
    self.needed_locks = {
10464
      locking.LEVEL_NODEGROUP: [],
10465
      locking.LEVEL_NODE: [],
10466
      locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
10467
      }
10468

    
10469
    self._ExpandAndLockInstance()
10470

    
10471
    if self.op.target_groups:
10472
      self.req_target_uuids = map(self.cfg.LookupNodeGroup,
10473
                                  self.op.target_groups)
10474
    else:
10475
      self.req_target_uuids = None
10476

    
10477
    self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
10478

    
10479
  def DeclareLocks(self, level):
10480
    if level == locking.LEVEL_NODEGROUP:
10481
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
10482

    
10483
      if self.req_target_uuids:
10484
        lock_groups = set(self.req_target_uuids)
10485

    
10486
        # Lock all groups used by instance optimistically; this requires going
10487
        # via the node before it's locked, requiring verification later on
10488
        instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
10489
        lock_groups.update(instance_groups)
10490
      else:
10491
        # No target groups, need to lock all of them
10492
        lock_groups = locking.ALL_SET
10493

    
10494
      self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
10495

    
10496
    elif level == locking.LEVEL_NODE:
10497
      if self.req_target_uuids:
10498
        # Lock all nodes used by instances
10499
        self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
10500
        self._LockInstancesNodes()
10501

    
10502
        # Lock all nodes in all potential target groups
10503
        lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
10504
                       self.cfg.GetInstanceNodeGroups(self.op.instance_name))
10505
        member_nodes = [node_name
10506
                        for group in lock_groups
10507
                        for node_name in self.cfg.GetNodeGroup(group).members]
10508
        self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
10509
      else:
10510
        # Lock all nodes as all groups are potential targets
10511
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10512

    
10513
  def CheckPrereq(self):
10514
    owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
10515
    owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
10516
    owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
10517

    
10518
    assert (self.req_target_uuids is None or
10519
            owned_groups.issuperset(self.req_target_uuids))
10520
    assert owned_instances == set([self.op.instance_name])
10521

    
10522
    # Get instance information
10523
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10524

    
10525
    # Check if node groups for locked instance are still correct
10526
    assert owned_nodes.issuperset(self.instance.all_nodes), \
10527
      ("Instance %s's nodes changed while we kept the lock" %
10528
       self.op.instance_name)
10529

    
10530
    inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
10531
                                           owned_groups)
10532

    
10533
    if self.req_target_uuids:
10534
      # User requested specific target groups
10535
      self.target_uuids = frozenset(self.req_target_uuids)
10536
    else:
10537
      # All groups except those used by the instance are potential targets
10538
      self.target_uuids = owned_groups - inst_groups
10539

    
10540
    conflicting_groups = self.target_uuids & inst_groups
10541
    if conflicting_groups:
10542
      raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
10543
                                 " used by the instance '%s'" %
10544
                                 (utils.CommaJoin(conflicting_groups),
10545
                                  self.op.instance_name),
10546
                                 errors.ECODE_INVAL)
10547

    
10548
    if not self.target_uuids:
10549
      raise errors.OpPrereqError("There are no possible target groups",
10550
                                 errors.ECODE_INVAL)
10551

    
10552
  def BuildHooksEnv(self):
10553
    """Build hooks env.
10554

10555
    """
10556
    assert self.target_uuids
10557

    
10558
    env = {
10559
      "TARGET_GROUPS": " ".join(self.target_uuids),
10560
      }
10561

    
10562
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
10563

    
10564
    return env
10565

    
10566
  def BuildHooksNodes(self):
10567
    """Build hooks nodes.
10568

10569
    """
10570
    mn = self.cfg.GetMasterNode()
10571
    return ([mn], [mn])
10572

    
10573
  def Exec(self, feedback_fn):
10574
    instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
10575

    
10576
    assert instances == [self.op.instance_name], "Instance not locked"
10577

    
10578
    req = iallocator.IAReqGroupChange(instances=instances,
10579
                                      target_groups=list(self.target_uuids))
10580
    ial = iallocator.IAllocator(self.cfg, self.rpc, req)
10581

    
10582
    ial.Run(self.op.iallocator)
10583

    
10584
    if not ial.success:
10585
      raise errors.OpPrereqError("Can't compute solution for changing group of"
10586
                                 " instance '%s' using iallocator '%s': %s" %
10587
                                 (self.op.instance_name, self.op.iallocator,
10588
                                  ial.info), errors.ECODE_NORES)
10589

    
10590
    jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
10591

    
10592
    self.LogInfo("Iallocator returned %s job(s) for changing group of"
10593
                 " instance '%s'", len(jobs), self.op.instance_name)
10594

    
10595
    return ResultWithJobs(jobs)
10596

    
10597

    
10598
class LUBackupQuery(NoHooksLU):
10599
  """Query the exports list
10600

10601
  """
10602
  REQ_BGL = False
10603

    
10604
  def CheckArguments(self):
10605
    self.expq = _ExportQuery(qlang.MakeSimpleFilter("node", self.op.nodes),
10606
                             ["node", "export"], self.op.use_locking)
10607

    
10608
  def ExpandNames(self):
10609
    self.expq.ExpandNames(self)
10610

    
10611
  def DeclareLocks(self, level):
10612
    self.expq.DeclareLocks(self, level)
10613

    
10614
  def Exec(self, feedback_fn):
10615
    result = {}
10616

    
10617
    for (node, expname) in self.expq.OldStyleQuery(self):
10618
      if expname is None:
10619
        result[node] = False
10620
      else:
10621
        result.setdefault(node, []).append(expname)
10622

    
10623
    return result
10624

    
10625

    
10626
class _ExportQuery(_QueryBase):
10627
  FIELDS = query.EXPORT_FIELDS
10628

    
10629
  #: The node name is not a unique key for this query
10630
  SORT_FIELD = "node"
10631

    
10632
  def ExpandNames(self, lu):
10633
    lu.needed_locks = {}
10634

    
10635
    # The following variables interact with _QueryBase._GetNames
10636
    if self.names:
10637
      self.wanted = _GetWantedNodes(lu, self.names)
10638
    else:
10639
      self.wanted = locking.ALL_SET
10640

    
10641
    self.do_locking = self.use_locking
10642

    
10643
    if self.do_locking:
10644
      lu.share_locks = _ShareAll()
10645
      lu.needed_locks = {
10646
        locking.LEVEL_NODE: self.wanted,
10647
        }
10648

    
10649
      if not self.names:
10650
        lu.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
10651

    
10652
  def DeclareLocks(self, lu, level):
10653
    pass
10654

    
10655
  def _GetQueryData(self, lu):
10656
    """Computes the list of nodes and their attributes.
10657

10658
    """
10659
    # Locking is not used
10660
    # TODO
10661
    assert not (compat.any(lu.glm.is_owned(level)
10662
                           for level in locking.LEVELS
10663
                           if level != locking.LEVEL_CLUSTER) or
10664
                self.do_locking or self.use_locking)
10665

    
10666
    nodes = self._GetNames(lu, lu.cfg.GetNodeList(), locking.LEVEL_NODE)
10667

    
10668
    result = []
10669

    
10670
    for (node, nres) in lu.rpc.call_export_list(nodes).items():
10671
      if nres.fail_msg:
10672
        result.append((node, None))
10673
      else:
10674
        result.extend((node, expname) for expname in nres.payload)
10675

    
10676
    return result
10677

    
10678

    
10679
class LUBackupPrepare(NoHooksLU):
10680
  """Prepares an instance for an export and returns useful information.
10681

10682
  """
10683
  REQ_BGL = False
10684

    
10685
  def ExpandNames(self):
10686
    self._ExpandAndLockInstance()
10687

    
10688
  def CheckPrereq(self):
10689
    """Check prerequisites.
10690

10691
    """
10692
    instance_name = self.op.instance_name
10693

    
10694
    self.instance = self.cfg.GetInstanceInfo(instance_name)
10695
    assert self.instance is not None, \
10696
          "Cannot retrieve locked instance %s" % self.op.instance_name
10697
    _CheckNodeOnline(self, self.instance.primary_node)
10698

    
10699
    self._cds = _GetClusterDomainSecret()
10700

    
10701
  def Exec(self, feedback_fn):
10702
    """Prepares an instance for an export.
10703

10704
    """
10705
    instance = self.instance
10706

    
10707
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
10708
      salt = utils.GenerateSecret(8)
10709

    
10710
      feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
10711
      result = self.rpc.call_x509_cert_create(instance.primary_node,
10712
                                              constants.RIE_CERT_VALIDITY)
10713
      result.Raise("Can't create X509 key and certificate on %s" % result.node)
10714

    
10715
      (name, cert_pem) = result.payload
10716

    
10717
      cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
10718
                                             cert_pem)
10719

    
10720
      return {
10721
        "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
10722
        "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
10723
                          salt),
10724
        "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
10725
        }
10726

    
10727
    return None
10728

    
10729

    
10730
class LUBackupExport(LogicalUnit):
10731
  """Export an instance to an image in the cluster.
10732

10733
  """
10734
  HPATH = "instance-export"
10735
  HTYPE = constants.HTYPE_INSTANCE
10736
  REQ_BGL = False
10737

    
10738
  def CheckArguments(self):
10739
    """Check the arguments.
10740

10741
    """
10742
    self.x509_key_name = self.op.x509_key_name
10743
    self.dest_x509_ca_pem = self.op.destination_x509_ca
10744

    
10745
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
10746
      if not self.x509_key_name:
10747
        raise errors.OpPrereqError("Missing X509 key name for encryption",
10748
                                   errors.ECODE_INVAL)
10749

    
10750
      if not self.dest_x509_ca_pem:
10751
        raise errors.OpPrereqError("Missing destination X509 CA",
10752
                                   errors.ECODE_INVAL)
10753

    
10754
  def ExpandNames(self):
10755
    self._ExpandAndLockInstance()
10756

    
10757
    # Lock all nodes for local exports
10758
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
10759
      # FIXME: lock only instance primary and destination node
10760
      #
10761
      # Sad but true, for now we have do lock all nodes, as we don't know where
10762
      # the previous export might be, and in this LU we search for it and
10763
      # remove it from its current node. In the future we could fix this by:
10764
      #  - making a tasklet to search (share-lock all), then create the
10765
      #    new one, then one to remove, after
10766
      #  - removing the removal operation altogether
10767
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10768

    
10769
      # Allocations should be stopped while this LU runs with node locks, but
10770
      # it doesn't have to be exclusive
10771
      self.share_locks[locking.LEVEL_NODE_ALLOC] = 1
10772
      self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
10773

    
10774
  def DeclareLocks(self, level):
10775
    """Last minute lock declaration."""
10776
    # All nodes are locked anyway, so nothing to do here.
10777

    
10778
  def BuildHooksEnv(self):
10779
    """Build hooks env.
10780

10781
    This will run on the master, primary node and target node.
10782

10783
    """
10784
    env = {
10785
      "EXPORT_MODE": self.op.mode,
10786
      "EXPORT_NODE": self.op.target_node,
10787
      "EXPORT_DO_SHUTDOWN": self.op.shutdown,
10788
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
10789
      # TODO: Generic function for boolean env variables
10790
      "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
10791
      }
10792

    
10793
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
10794

    
10795
    return env
10796

    
10797
  def BuildHooksNodes(self):
10798
    """Build hooks nodes.
10799

10800
    """
10801
    nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
10802

    
10803
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
10804
      nl.append(self.op.target_node)
10805

    
10806
    return (nl, nl)
10807

    
10808
  def CheckPrereq(self):
10809
    """Check prerequisites.
10810

10811
    This checks that the instance and node names are valid.
10812

10813
    """
10814
    instance_name = self.op.instance_name
10815

    
10816
    self.instance = self.cfg.GetInstanceInfo(instance_name)
10817
    assert self.instance is not None, \
10818
          "Cannot retrieve locked instance %s" % self.op.instance_name
10819
    _CheckNodeOnline(self, self.instance.primary_node)
10820

    
10821
    if (self.op.remove_instance and
10822
        self.instance.admin_state == constants.ADMINST_UP and
10823
        not self.op.shutdown):
10824
      raise errors.OpPrereqError("Can not remove instance without shutting it"
10825
                                 " down before", errors.ECODE_STATE)
10826

    
10827
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
10828
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
10829
      self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
10830
      assert self.dst_node is not None
10831

    
10832
      _CheckNodeOnline(self, self.dst_node.name)
10833
      _CheckNodeNotDrained(self, self.dst_node.name)
10834

    
10835
      self._cds = None
10836
      self.dest_disk_info = None
10837
      self.dest_x509_ca = None
10838

    
10839
    elif self.op.mode == constants.EXPORT_MODE_REMOTE:
10840
      self.dst_node = None
10841

    
10842
      if len(self.op.target_node) != len(self.instance.disks):
10843
        raise errors.OpPrereqError(("Received destination information for %s"
10844
                                    " disks, but instance %s has %s disks") %
10845
                                   (len(self.op.target_node), instance_name,
10846
                                    len(self.instance.disks)),
10847
                                   errors.ECODE_INVAL)
10848

    
10849
      cds = _GetClusterDomainSecret()
10850

    
10851
      # Check X509 key name
10852
      try:
10853
        (key_name, hmac_digest, hmac_salt) = self.x509_key_name
10854
      except (TypeError, ValueError), err:
10855
        raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err,
10856
                                   errors.ECODE_INVAL)
10857

    
10858
      if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
10859
        raise errors.OpPrereqError("HMAC for X509 key name is wrong",
10860
                                   errors.ECODE_INVAL)
10861

    
10862
      # Load and verify CA
10863
      try:
10864
        (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
10865
      except OpenSSL.crypto.Error, err:
10866
        raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
10867
                                   (err, ), errors.ECODE_INVAL)
10868

    
10869
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
10870
      if errcode is not None:
10871
        raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
10872
                                   (msg, ), errors.ECODE_INVAL)
10873

    
10874
      self.dest_x509_ca = cert
10875

    
10876
      # Verify target information
10877
      disk_info = []
10878
      for idx, disk_data in enumerate(self.op.target_node):
10879
        try:
10880
          (host, port, magic) = \
10881
            masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
10882
        except errors.GenericError, err:
10883
          raise errors.OpPrereqError("Target info for disk %s: %s" %
10884
                                     (idx, err), errors.ECODE_INVAL)
10885

    
10886
        disk_info.append((host, port, magic))
10887

    
10888
      assert len(disk_info) == len(self.op.target_node)
10889
      self.dest_disk_info = disk_info
10890

    
10891
    else:
10892
      raise errors.ProgrammerError("Unhandled export mode %r" %
10893
                                   self.op.mode)
10894

    
10895
    # instance disk type verification
10896
    # TODO: Implement export support for file-based disks
10897
    for disk in self.instance.disks:
10898
      if disk.dev_type == constants.LD_FILE:
10899
        raise errors.OpPrereqError("Export not supported for instances with"
10900
                                   " file-based disks", errors.ECODE_INVAL)
10901

    
10902
  def _CleanupExports(self, feedback_fn):
10903
    """Removes exports of current instance from all other nodes.
10904

10905
    If an instance in a cluster with nodes A..D was exported to node C, its
10906
    exports will be removed from the nodes A, B and D.
10907

10908
    """
10909
    assert self.op.mode != constants.EXPORT_MODE_REMOTE
10910

    
10911
    nodelist = self.cfg.GetNodeList()
10912
    nodelist.remove(self.dst_node.name)
10913

    
10914
    # on one-node clusters nodelist will be empty after the removal
10915
    # if we proceed the backup would be removed because OpBackupQuery
10916
    # substitutes an empty list with the full cluster node list.
10917
    iname = self.instance.name
10918
    if nodelist:
10919
      feedback_fn("Removing old exports for instance %s" % iname)
10920
      exportlist = self.rpc.call_export_list(nodelist)
10921
      for node in exportlist:
10922
        if exportlist[node].fail_msg:
10923
          continue
10924
        if iname in exportlist[node].payload:
10925
          msg = self.rpc.call_export_remove(node, iname).fail_msg
10926
          if msg:
10927
            self.LogWarning("Could not remove older export for instance %s"
10928
                            " on node %s: %s", iname, node, msg)
10929

    
10930
  def Exec(self, feedback_fn):
10931
    """Export an instance to an image in the cluster.
10932

10933
    """
10934
    assert self.op.mode in constants.EXPORT_MODES
10935

    
10936
    instance = self.instance
10937
    src_node = instance.primary_node
10938

    
10939
    if self.op.shutdown:
10940
      # shutdown the instance, but not the disks
10941
      feedback_fn("Shutting down instance %s" % instance.name)
10942
      result = self.rpc.call_instance_shutdown(src_node, instance,
10943
                                               self.op.shutdown_timeout,
10944
                                               self.op.reason)
10945
      # TODO: Maybe ignore failures if ignore_remove_failures is set
10946
      result.Raise("Could not shutdown instance %s on"
10947
                   " node %s" % (instance.name, src_node))
10948

    
10949
    # set the disks ID correctly since call_instance_start needs the
10950
    # correct drbd minor to create the symlinks
10951
    for disk in instance.disks:
10952
      self.cfg.SetDiskID(disk, src_node)
10953

    
10954
    activate_disks = (instance.admin_state != constants.ADMINST_UP)
10955

    
10956
    if activate_disks:
10957
      # Activate the instance disks if we'exporting a stopped instance
10958
      feedback_fn("Activating disks for %s" % instance.name)
10959
      _StartInstanceDisks(self, instance, None)
10960

    
10961
    try:
10962
      helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
10963
                                                     instance)
10964

    
10965
      helper.CreateSnapshots()
10966
      try:
10967
        if (self.op.shutdown and
10968
            instance.admin_state == constants.ADMINST_UP and
10969
            not self.op.remove_instance):
10970
          assert not activate_disks
10971
          feedback_fn("Starting instance %s" % instance.name)
10972
          result = self.rpc.call_instance_start(src_node,
10973
                                                (instance, None, None), False,
10974
                                                 self.op.reason)
10975
          msg = result.fail_msg
10976
          if msg:
10977
            feedback_fn("Failed to start instance: %s" % msg)
10978
            _ShutdownInstanceDisks(self, instance)
10979
            raise errors.OpExecError("Could not start instance: %s" % msg)
10980

    
10981
        if self.op.mode == constants.EXPORT_MODE_LOCAL:
10982
          (fin_resu, dresults) = helper.LocalExport(self.dst_node)
10983
        elif self.op.mode == constants.EXPORT_MODE_REMOTE:
10984
          connect_timeout = constants.RIE_CONNECT_TIMEOUT
10985
          timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
10986

    
10987
          (key_name, _, _) = self.x509_key_name
10988

    
10989
          dest_ca_pem = \
10990
            OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
10991
                                            self.dest_x509_ca)
10992

    
10993
          (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
10994
                                                     key_name, dest_ca_pem,
10995
                                                     timeouts)
10996
      finally:
10997
        helper.Cleanup()
10998

    
10999
      # Check for backwards compatibility
11000
      assert len(dresults) == len(instance.disks)
11001
      assert compat.all(isinstance(i, bool) for i in dresults), \
11002
             "Not all results are boolean: %r" % dresults
11003

    
11004
    finally:
11005
      if activate_disks:
11006
        feedback_fn("Deactivating disks for %s" % instance.name)
11007
        _ShutdownInstanceDisks(self, instance)
11008

    
11009
    if not (compat.all(dresults) and fin_resu):
11010
      failures = []
11011
      if not fin_resu:
11012
        failures.append("export finalization")
11013
      if not compat.all(dresults):
11014
        fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
11015
                               if not dsk)
11016
        failures.append("disk export: disk(s) %s" % fdsk)
11017

    
11018
      raise errors.OpExecError("Export failed, errors in %s" %
11019
                               utils.CommaJoin(failures))
11020

    
11021
    # At this point, the export was successful, we can cleanup/finish
11022

    
11023
    # Remove instance if requested
11024
    if self.op.remove_instance:
11025
      feedback_fn("Removing instance %s" % instance.name)
11026
      _RemoveInstance(self, feedback_fn, instance,
11027
                      self.op.ignore_remove_failures)
11028

    
11029
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
11030
      self._CleanupExports(feedback_fn)
11031

    
11032
    return fin_resu, dresults
11033

    
11034

    
11035
class LUBackupRemove(NoHooksLU):
11036
  """Remove exports related to the named instance.
11037

11038
  """
11039
  REQ_BGL = False
11040

    
11041
  def ExpandNames(self):
11042
    self.needed_locks = {
11043
      # We need all nodes to be locked in order for RemoveExport to work, but
11044
      # we don't need to lock the instance itself, as nothing will happen to it
11045
      # (and we can remove exports also for a removed instance)
11046
      locking.LEVEL_NODE: locking.ALL_SET,
11047

    
11048
      # Removing backups is quick, so blocking allocations is justified
11049
      locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
11050
      }
11051

    
11052
    # Allocations should be stopped while this LU runs with node locks, but it
11053
    # doesn't have to be exclusive
11054
    self.share_locks[locking.LEVEL_NODE_ALLOC] = 1
11055

    
11056
  def Exec(self, feedback_fn):
11057
    """Remove any export.
11058

11059
    """
11060
    instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
11061
    # If the instance was not found we'll try with the name that was passed in.
11062
    # This will only work if it was an FQDN, though.
11063
    fqdn_warn = False
11064
    if not instance_name:
11065
      fqdn_warn = True
11066
      instance_name = self.op.instance_name
11067

    
11068
    locked_nodes = self.owned_locks(locking.LEVEL_NODE)
11069
    exportlist = self.rpc.call_export_list(locked_nodes)
11070
    found = False
11071
    for node in exportlist:
11072
      msg = exportlist[node].fail_msg
11073
      if msg:
11074
        self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
11075
        continue
11076
      if instance_name in exportlist[node].payload:
11077
        found = True
11078
        result = self.rpc.call_export_remove(node, instance_name)
11079
        msg = result.fail_msg
11080
        if msg:
11081
          logging.error("Could not remove export for instance %s"
11082
                        " on node %s: %s", instance_name, node, msg)
11083

    
11084
    if fqdn_warn and not found:
11085
      feedback_fn("Export not found. If trying to remove an export belonging"
11086
                  " to a deleted instance please use its Fully Qualified"
11087
                  " Domain Name.")
11088

    
11089

    
11090
class LUGroupAdd(LogicalUnit):
11091
  """Logical unit for creating node groups.
11092

11093
  """
11094
  HPATH = "group-add"
11095
  HTYPE = constants.HTYPE_GROUP
11096
  REQ_BGL = False
11097

    
11098
  def ExpandNames(self):
11099
    # We need the new group's UUID here so that we can create and acquire the
11100
    # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
11101
    # that it should not check whether the UUID exists in the configuration.
11102
    self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
11103
    self.needed_locks = {}
11104
    self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
11105

    
11106
  def CheckPrereq(self):
11107
    """Check prerequisites.
11108

11109
    This checks that the given group name is not an existing node group
11110
    already.
11111

11112
    """
11113
    try:
11114
      existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11115
    except errors.OpPrereqError:
11116
      pass
11117
    else:
11118
      raise errors.OpPrereqError("Desired group name '%s' already exists as a"
11119
                                 " node group (UUID: %s)" %
11120
                                 (self.op.group_name, existing_uuid),
11121
                                 errors.ECODE_EXISTS)
11122

    
11123
    if self.op.ndparams:
11124
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
11125

    
11126
    if self.op.hv_state:
11127
      self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
11128
    else:
11129
      self.new_hv_state = None
11130

    
11131
    if self.op.disk_state:
11132
      self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
11133
    else:
11134
      self.new_disk_state = None
11135

    
11136
    if self.op.diskparams:
11137
      for templ in constants.DISK_TEMPLATES:
11138
        if templ in self.op.diskparams:
11139
          utils.ForceDictType(self.op.diskparams[templ],
11140
                              constants.DISK_DT_TYPES)
11141
      self.new_diskparams = self.op.diskparams
11142
      try:
11143
        utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
11144
      except errors.OpPrereqError, err:
11145
        raise errors.OpPrereqError("While verify diskparams options: %s" % err,
11146
                                   errors.ECODE_INVAL)
11147
    else:
11148
      self.new_diskparams = {}
11149

    
11150
    if self.op.ipolicy:
11151
      cluster = self.cfg.GetClusterInfo()
11152
      full_ipolicy = cluster.SimpleFillIPolicy(self.op.ipolicy)
11153
      try:
11154
        objects.InstancePolicy.CheckParameterSyntax(full_ipolicy, False)
11155
      except errors.ConfigurationError, err:
11156
        raise errors.OpPrereqError("Invalid instance policy: %s" % err,
11157
                                   errors.ECODE_INVAL)
11158

    
11159
  def BuildHooksEnv(self):
11160
    """Build hooks env.
11161

11162
    """
11163
    return {
11164
      "GROUP_NAME": self.op.group_name,
11165
      }
11166

    
11167
  def BuildHooksNodes(self):
11168
    """Build hooks nodes.
11169

11170
    """
11171
    mn = self.cfg.GetMasterNode()
11172
    return ([mn], [mn])
11173

    
11174
  def Exec(self, feedback_fn):
11175
    """Add the node group to the cluster.
11176

11177
    """
11178
    group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
11179
                                  uuid=self.group_uuid,
11180
                                  alloc_policy=self.op.alloc_policy,
11181
                                  ndparams=self.op.ndparams,
11182
                                  diskparams=self.new_diskparams,
11183
                                  ipolicy=self.op.ipolicy,
11184
                                  hv_state_static=self.new_hv_state,
11185
                                  disk_state_static=self.new_disk_state)
11186

    
11187
    self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
11188
    del self.remove_locks[locking.LEVEL_NODEGROUP]
11189

    
11190

    
11191
class LUGroupAssignNodes(NoHooksLU):
11192
  """Logical unit for assigning nodes to groups.
11193

11194
  """
11195
  REQ_BGL = False
11196

    
11197
  def ExpandNames(self):
11198
    # These raise errors.OpPrereqError on their own:
11199
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11200
    self.op.nodes = _GetWantedNodes(self, self.op.nodes)
11201

    
11202
    # We want to lock all the affected nodes and groups. We have readily
11203
    # available the list of nodes, and the *destination* group. To gather the
11204
    # list of "source" groups, we need to fetch node information later on.
11205
    self.needed_locks = {
11206
      locking.LEVEL_NODEGROUP: set([self.group_uuid]),
11207
      locking.LEVEL_NODE: self.op.nodes,
11208
      }
11209

    
11210
  def DeclareLocks(self, level):
11211
    if level == locking.LEVEL_NODEGROUP:
11212
      assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
11213

    
11214
      # Try to get all affected nodes' groups without having the group or node
11215
      # lock yet. Needs verification later in the code flow.
11216
      groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
11217

    
11218
      self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
11219

    
11220
  def CheckPrereq(self):
11221
    """Check prerequisites.
11222

11223
    """
11224
    assert self.needed_locks[locking.LEVEL_NODEGROUP]
11225
    assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
11226
            frozenset(self.op.nodes))
11227

    
11228
    expected_locks = (set([self.group_uuid]) |
11229
                      self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
11230
    actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
11231
    if actual_locks != expected_locks:
11232
      raise errors.OpExecError("Nodes changed groups since locks were acquired,"
11233
                               " current groups are '%s', used to be '%s'" %
11234
                               (utils.CommaJoin(expected_locks),
11235
                                utils.CommaJoin(actual_locks)))
11236

    
11237
    self.node_data = self.cfg.GetAllNodesInfo()
11238
    self.group = self.cfg.GetNodeGroup(self.group_uuid)
11239
    instance_data = self.cfg.GetAllInstancesInfo()
11240

    
11241
    if self.group is None:
11242
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
11243
                               (self.op.group_name, self.group_uuid))
11244

    
11245
    (new_splits, previous_splits) = \
11246
      self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
11247
                                             for node in self.op.nodes],
11248
                                            self.node_data, instance_data)
11249

    
11250
    if new_splits:
11251
      fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
11252

    
11253
      if not self.op.force:
11254
        raise errors.OpExecError("The following instances get split by this"
11255
                                 " change and --force was not given: %s" %
11256
                                 fmt_new_splits)
11257
      else:
11258
        self.LogWarning("This operation will split the following instances: %s",
11259
                        fmt_new_splits)
11260

    
11261
        if previous_splits:
11262
          self.LogWarning("In addition, these already-split instances continue"
11263
                          " to be split across groups: %s",
11264
                          utils.CommaJoin(utils.NiceSort(previous_splits)))
11265

    
11266
  def Exec(self, feedback_fn):
11267
    """Assign nodes to a new group.
11268

11269
    """
11270
    mods = [(node_name, self.group_uuid) for node_name in self.op.nodes]
11271

    
11272
    self.cfg.AssignGroupNodes(mods)
11273

    
11274
  @staticmethod
11275
  def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
11276
    """Check for split instances after a node assignment.
11277

11278
    This method considers a series of node assignments as an atomic operation,
11279
    and returns information about split instances after applying the set of
11280
    changes.
11281

11282
    In particular, it returns information about newly split instances, and
11283
    instances that were already split, and remain so after the change.
11284

11285
    Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
11286
    considered.
11287

11288
    @type changes: list of (node_name, new_group_uuid) pairs.
11289
    @param changes: list of node assignments to consider.
11290
    @param node_data: a dict with data for all nodes
11291
    @param instance_data: a dict with all instances to consider
11292
    @rtype: a two-tuple
11293
    @return: a list of instances that were previously okay and result split as a
11294
      consequence of this change, and a list of instances that were previously
11295
      split and this change does not fix.
11296

11297
    """
11298
    changed_nodes = dict((node, group) for node, group in changes
11299
                         if node_data[node].group != group)
11300

    
11301
    all_split_instances = set()
11302
    previously_split_instances = set()
11303

    
11304
    def InstanceNodes(instance):
11305
      return [instance.primary_node] + list(instance.secondary_nodes)
11306

    
11307
    for inst in instance_data.values():
11308
      if inst.disk_template not in constants.DTS_INT_MIRROR:
11309
        continue
11310

    
11311
      instance_nodes = InstanceNodes(inst)
11312

    
11313
      if len(set(node_data[node].group for node in instance_nodes)) > 1:
11314
        previously_split_instances.add(inst.name)
11315

    
11316
      if len(set(changed_nodes.get(node, node_data[node].group)
11317
                 for node in instance_nodes)) > 1:
11318
        all_split_instances.add(inst.name)
11319

    
11320
    return (list(all_split_instances - previously_split_instances),
11321
            list(previously_split_instances & all_split_instances))
11322

    
11323

    
11324
class _GroupQuery(_QueryBase):
11325
  FIELDS = query.GROUP_FIELDS
11326

    
11327
  def ExpandNames(self, lu):
11328
    lu.needed_locks = {}
11329

    
11330
    self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
11331
    self._cluster = lu.cfg.GetClusterInfo()
11332
    name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
11333

    
11334
    if not self.names:
11335
      self.wanted = [name_to_uuid[name]
11336
                     for name in utils.NiceSort(name_to_uuid.keys())]
11337
    else:
11338
      # Accept names to be either names or UUIDs.
11339
      missing = []
11340
      self.wanted = []
11341
      all_uuid = frozenset(self._all_groups.keys())
11342

    
11343
      for name in self.names:
11344
        if name in all_uuid:
11345
          self.wanted.append(name)
11346
        elif name in name_to_uuid:
11347
          self.wanted.append(name_to_uuid[name])
11348
        else:
11349
          missing.append(name)
11350

    
11351
      if missing:
11352
        raise errors.OpPrereqError("Some groups do not exist: %s" %
11353
                                   utils.CommaJoin(missing),
11354
                                   errors.ECODE_NOENT)
11355

    
11356
  def DeclareLocks(self, lu, level):
11357
    pass
11358

    
11359
  def _GetQueryData(self, lu):
11360
    """Computes the list of node groups and their attributes.
11361

11362
    """
11363
    do_nodes = query.GQ_NODE in self.requested_data
11364
    do_instances = query.GQ_INST in self.requested_data
11365

    
11366
    group_to_nodes = None
11367
    group_to_instances = None
11368

    
11369
    # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
11370
    # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
11371
    # latter GetAllInstancesInfo() is not enough, for we have to go through
11372
    # instance->node. Hence, we will need to process nodes even if we only need
11373
    # instance information.
11374
    if do_nodes or do_instances:
11375
      all_nodes = lu.cfg.GetAllNodesInfo()
11376
      group_to_nodes = dict((uuid, []) for uuid in self.wanted)
11377
      node_to_group = {}
11378

    
11379
      for node in all_nodes.values():
11380
        if node.group in group_to_nodes:
11381
          group_to_nodes[node.group].append(node.name)
11382
          node_to_group[node.name] = node.group
11383

    
11384
      if do_instances:
11385
        all_instances = lu.cfg.GetAllInstancesInfo()
11386
        group_to_instances = dict((uuid, []) for uuid in self.wanted)
11387

    
11388
        for instance in all_instances.values():
11389
          node = instance.primary_node
11390
          if node in node_to_group:
11391
            group_to_instances[node_to_group[node]].append(instance.name)
11392

    
11393
        if not do_nodes:
11394
          # Do not pass on node information if it was not requested.
11395
          group_to_nodes = None
11396

    
11397
    return query.GroupQueryData(self._cluster,
11398
                                [self._all_groups[uuid]
11399
                                 for uuid in self.wanted],
11400
                                group_to_nodes, group_to_instances,
11401
                                query.GQ_DISKPARAMS in self.requested_data)
11402

    
11403

    
11404
class LUGroupQuery(NoHooksLU):
11405
  """Logical unit for querying node groups.
11406

11407
  """
11408
  REQ_BGL = False
11409

    
11410
  def CheckArguments(self):
11411
    self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
11412
                          self.op.output_fields, False)
11413

    
11414
  def ExpandNames(self):
11415
    self.gq.ExpandNames(self)
11416

    
11417
  def DeclareLocks(self, level):
11418
    self.gq.DeclareLocks(self, level)
11419

    
11420
  def Exec(self, feedback_fn):
11421
    return self.gq.OldStyleQuery(self)
11422

    
11423

    
11424
class LUGroupSetParams(LogicalUnit):
11425
  """Modifies the parameters of a node group.
11426

11427
  """
11428
  HPATH = "group-modify"
11429
  HTYPE = constants.HTYPE_GROUP
11430
  REQ_BGL = False
11431

    
11432
  def CheckArguments(self):
11433
    all_changes = [
11434
      self.op.ndparams,
11435
      self.op.diskparams,
11436
      self.op.alloc_policy,
11437
      self.op.hv_state,
11438
      self.op.disk_state,
11439
      self.op.ipolicy,
11440
      ]
11441

    
11442
    if all_changes.count(None) == len(all_changes):
11443
      raise errors.OpPrereqError("Please pass at least one modification",
11444
                                 errors.ECODE_INVAL)
11445

    
11446
  def ExpandNames(self):
11447
    # This raises errors.OpPrereqError on its own:
11448
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11449

    
11450
    self.needed_locks = {
11451
      locking.LEVEL_INSTANCE: [],
11452
      locking.LEVEL_NODEGROUP: [self.group_uuid],
11453
      }
11454

    
11455
    self.share_locks[locking.LEVEL_INSTANCE] = 1
11456

    
11457
  def DeclareLocks(self, level):
11458
    if level == locking.LEVEL_INSTANCE:
11459
      assert not self.needed_locks[locking.LEVEL_INSTANCE]
11460

    
11461
      # Lock instances optimistically, needs verification once group lock has
11462
      # been acquired
11463
      self.needed_locks[locking.LEVEL_INSTANCE] = \
11464
          self.cfg.GetNodeGroupInstances(self.group_uuid)
11465

    
11466
  @staticmethod
11467
  def _UpdateAndVerifyDiskParams(old, new):
11468
    """Updates and verifies disk parameters.
11469

11470
    """
11471
    new_params = _GetUpdatedParams(old, new)
11472
    utils.ForceDictType(new_params, constants.DISK_DT_TYPES)
11473
    return new_params
11474

    
11475
  def CheckPrereq(self):
11476
    """Check prerequisites.
11477

11478
    """
11479
    owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
11480

    
11481
    # Check if locked instances are still correct
11482
    _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
11483

    
11484
    self.group = self.cfg.GetNodeGroup(self.group_uuid)
11485
    cluster = self.cfg.GetClusterInfo()
11486

    
11487
    if self.group is None:
11488
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
11489
                               (self.op.group_name, self.group_uuid))
11490

    
11491
    if self.op.ndparams:
11492
      new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
11493
      utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
11494
      self.new_ndparams = new_ndparams
11495

    
11496
    if self.op.diskparams:
11497
      diskparams = self.group.diskparams
11498
      uavdp = self._UpdateAndVerifyDiskParams
11499
      # For each disktemplate subdict update and verify the values
11500
      new_diskparams = dict((dt,
11501
                             uavdp(diskparams.get(dt, {}),
11502
                                   self.op.diskparams[dt]))
11503
                            for dt in constants.DISK_TEMPLATES
11504
                            if dt in self.op.diskparams)
11505
      # As we've all subdicts of diskparams ready, lets merge the actual
11506
      # dict with all updated subdicts
11507
      self.new_diskparams = objects.FillDict(diskparams, new_diskparams)
11508
      try:
11509
        utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
11510
      except errors.OpPrereqError, err:
11511
        raise errors.OpPrereqError("While verify diskparams options: %s" % err,
11512
                                   errors.ECODE_INVAL)
11513

    
11514
    if self.op.hv_state:
11515
      self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
11516
                                                 self.group.hv_state_static)
11517

    
11518
    if self.op.disk_state:
11519
      self.new_disk_state = \
11520
        _MergeAndVerifyDiskState(self.op.disk_state,
11521
                                 self.group.disk_state_static)
11522

    
11523
    if self.op.ipolicy:
11524
      self.new_ipolicy = _GetUpdatedIPolicy(self.group.ipolicy,
11525
                                            self.op.ipolicy,
11526
                                            group_policy=True)
11527

    
11528
      new_ipolicy = cluster.SimpleFillIPolicy(self.new_ipolicy)
11529
      inst_filter = lambda inst: inst.name in owned_instances
11530
      instances = self.cfg.GetInstancesInfoByFilter(inst_filter).values()
11531
      gmi = ganeti.masterd.instance
11532
      violations = \
11533
          _ComputeNewInstanceViolations(gmi.CalculateGroupIPolicy(cluster,
11534
                                                                  self.group),
11535
                                        new_ipolicy, instances, self.cfg)
11536

    
11537
      if violations:
11538
        self.LogWarning("After the ipolicy change the following instances"
11539
                        " violate them: %s",
11540
                        utils.CommaJoin(violations))
11541

    
11542
  def BuildHooksEnv(self):
11543
    """Build hooks env.
11544

11545
    """
11546
    return {
11547
      "GROUP_NAME": self.op.group_name,
11548
      "NEW_ALLOC_POLICY": self.op.alloc_policy,
11549
      }
11550

    
11551
  def BuildHooksNodes(self):
11552
    """Build hooks nodes.
11553

11554
    """
11555
    mn = self.cfg.GetMasterNode()
11556
    return ([mn], [mn])
11557

    
11558
  def Exec(self, feedback_fn):
11559
    """Modifies the node group.
11560

11561
    """
11562
    result = []
11563

    
11564
    if self.op.ndparams:
11565
      self.group.ndparams = self.new_ndparams
11566
      result.append(("ndparams", str(self.group.ndparams)))
11567

    
11568
    if self.op.diskparams:
11569
      self.group.diskparams = self.new_diskparams
11570
      result.append(("diskparams", str(self.group.diskparams)))
11571

    
11572
    if self.op.alloc_policy:
11573
      self.group.alloc_policy = self.op.alloc_policy
11574

    
11575
    if self.op.hv_state:
11576
      self.group.hv_state_static = self.new_hv_state
11577

    
11578
    if self.op.disk_state:
11579
      self.group.disk_state_static = self.new_disk_state
11580

    
11581
    if self.op.ipolicy:
11582
      self.group.ipolicy = self.new_ipolicy
11583

    
11584
    self.cfg.Update(self.group, feedback_fn)
11585
    return result
11586

    
11587

    
11588
class LUGroupRemove(LogicalUnit):
11589
  HPATH = "group-remove"
11590
  HTYPE = constants.HTYPE_GROUP
11591
  REQ_BGL = False
11592

    
11593
  def ExpandNames(self):
11594
    # This will raises errors.OpPrereqError on its own:
11595
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11596
    self.needed_locks = {
11597
      locking.LEVEL_NODEGROUP: [self.group_uuid],
11598
      }
11599

    
11600
  def CheckPrereq(self):
11601
    """Check prerequisites.
11602

11603
    This checks that the given group name exists as a node group, that is
11604
    empty (i.e., contains no nodes), and that is not the last group of the
11605
    cluster.
11606

11607
    """
11608
    # Verify that the group is empty.
11609
    group_nodes = [node.name
11610
                   for node in self.cfg.GetAllNodesInfo().values()
11611
                   if node.group == self.group_uuid]
11612

    
11613
    if group_nodes:
11614
      raise errors.OpPrereqError("Group '%s' not empty, has the following"
11615
                                 " nodes: %s" %
11616
                                 (self.op.group_name,
11617
                                  utils.CommaJoin(utils.NiceSort(group_nodes))),
11618
                                 errors.ECODE_STATE)
11619

    
11620
    # Verify the cluster would not be left group-less.
11621
    if len(self.cfg.GetNodeGroupList()) == 1:
11622
      raise errors.OpPrereqError("Group '%s' is the only group, cannot be"
11623
                                 " removed" % self.op.group_name,
11624
                                 errors.ECODE_STATE)
11625

    
11626
  def BuildHooksEnv(self):
11627
    """Build hooks env.
11628

11629
    """
11630
    return {
11631
      "GROUP_NAME": self.op.group_name,
11632
      }
11633

    
11634
  def BuildHooksNodes(self):
11635
    """Build hooks nodes.
11636

11637
    """
11638
    mn = self.cfg.GetMasterNode()
11639
    return ([mn], [mn])
11640

    
11641
  def Exec(self, feedback_fn):
11642
    """Remove the node group.
11643

11644
    """
11645
    try:
11646
      self.cfg.RemoveNodeGroup(self.group_uuid)
11647
    except errors.ConfigurationError:
11648
      raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
11649
                               (self.op.group_name, self.group_uuid))
11650

    
11651
    self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
11652

    
11653

    
11654
class LUGroupRename(LogicalUnit):
11655
  HPATH = "group-rename"
11656
  HTYPE = constants.HTYPE_GROUP
11657
  REQ_BGL = False
11658

    
11659
  def ExpandNames(self):
11660
    # This raises errors.OpPrereqError on its own:
11661
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11662

    
11663
    self.needed_locks = {
11664
      locking.LEVEL_NODEGROUP: [self.group_uuid],
11665
      }
11666

    
11667
  def CheckPrereq(self):
11668
    """Check prerequisites.
11669

11670
    Ensures requested new name is not yet used.
11671

11672
    """
11673
    try:
11674
      new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
11675
    except errors.OpPrereqError:
11676
      pass
11677
    else:
11678
      raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
11679
                                 " node group (UUID: %s)" %
11680
                                 (self.op.new_name, new_name_uuid),
11681
                                 errors.ECODE_EXISTS)
11682

    
11683
  def BuildHooksEnv(self):
11684
    """Build hooks env.
11685

11686
    """
11687
    return {
11688
      "OLD_NAME": self.op.group_name,
11689
      "NEW_NAME": self.op.new_name,
11690
      }
11691

    
11692
  def BuildHooksNodes(self):
11693
    """Build hooks nodes.
11694

11695
    """
11696
    mn = self.cfg.GetMasterNode()
11697

    
11698
    all_nodes = self.cfg.GetAllNodesInfo()
11699
    all_nodes.pop(mn, None)
11700

    
11701
    run_nodes = [mn]
11702
    run_nodes.extend(node.name for node in all_nodes.values()
11703
                     if node.group == self.group_uuid)
11704

    
11705
    return (run_nodes, run_nodes)
11706

    
11707
  def Exec(self, feedback_fn):
11708
    """Rename the node group.
11709

11710
    """
11711
    group = self.cfg.GetNodeGroup(self.group_uuid)
11712

    
11713
    if group is None:
11714
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
11715
                               (self.op.group_name, self.group_uuid))
11716

    
11717
    group.name = self.op.new_name
11718
    self.cfg.Update(group, feedback_fn)
11719

    
11720
    return self.op.new_name
11721

    
11722

    
11723
class LUGroupEvacuate(LogicalUnit):
11724
  HPATH = "group-evacuate"
11725
  HTYPE = constants.HTYPE_GROUP
11726
  REQ_BGL = False
11727

    
11728
  def ExpandNames(self):
11729
    # This raises errors.OpPrereqError on its own:
11730
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11731

    
11732
    if self.op.target_groups:
11733
      self.req_target_uuids = map(self.cfg.LookupNodeGroup,
11734
                                  self.op.target_groups)
11735
    else:
11736
      self.req_target_uuids = []
11737

    
11738
    if self.group_uuid in self.req_target_uuids:
11739
      raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
11740
                                 " as a target group (targets are %s)" %
11741
                                 (self.group_uuid,
11742
                                  utils.CommaJoin(self.req_target_uuids)),
11743
                                 errors.ECODE_INVAL)
11744

    
11745
    self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
11746

    
11747
    self.share_locks = _ShareAll()
11748
    self.needed_locks = {
11749
      locking.LEVEL_INSTANCE: [],
11750
      locking.LEVEL_NODEGROUP: [],
11751
      locking.LEVEL_NODE: [],
11752
      }
11753

    
11754
  def DeclareLocks(self, level):
11755
    if level == locking.LEVEL_INSTANCE:
11756
      assert not self.needed_locks[locking.LEVEL_INSTANCE]
11757

    
11758
      # Lock instances optimistically, needs verification once node and group
11759
      # locks have been acquired
11760
      self.needed_locks[locking.LEVEL_INSTANCE] = \
11761
        self.cfg.GetNodeGroupInstances(self.group_uuid)
11762

    
11763
    elif level == locking.LEVEL_NODEGROUP:
11764
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
11765

    
11766
      if self.req_target_uuids:
11767
        lock_groups = set([self.group_uuid] + self.req_target_uuids)
11768

    
11769
        # Lock all groups used by instances optimistically; this requires going
11770
        # via the node before it's locked, requiring verification later on
11771
        lock_groups.update(group_uuid
11772
                           for instance_name in
11773
                             self.owned_locks(locking.LEVEL_INSTANCE)
11774
                           for group_uuid in
11775
                             self.cfg.GetInstanceNodeGroups(instance_name))
11776
      else:
11777
        # No target groups, need to lock all of them
11778
        lock_groups = locking.ALL_SET
11779

    
11780
      self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
11781

    
11782
    elif level == locking.LEVEL_NODE:
11783
      # This will only lock the nodes in the group to be evacuated which
11784
      # contain actual instances
11785
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
11786
      self._LockInstancesNodes()
11787

    
11788
      # Lock all nodes in group to be evacuated and target groups
11789
      owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
11790
      assert self.group_uuid in owned_groups
11791
      member_nodes = [node_name
11792
                      for group in owned_groups
11793
                      for node_name in self.cfg.GetNodeGroup(group).members]
11794
      self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
11795

    
11796
  def CheckPrereq(self):
11797
    owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
11798
    owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
11799
    owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
11800

    
11801
    assert owned_groups.issuperset(self.req_target_uuids)
11802
    assert self.group_uuid in owned_groups
11803

    
11804
    # Check if locked instances are still correct
11805
    _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
11806

    
11807
    # Get instance information
11808
    self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
11809

    
11810
    # Check if node groups for locked instances are still correct
11811
    _CheckInstancesNodeGroups(self.cfg, self.instances,
11812
                              owned_groups, owned_nodes, self.group_uuid)
11813

    
11814
    if self.req_target_uuids:
11815
      # User requested specific target groups
11816
      self.target_uuids = self.req_target_uuids
11817
    else:
11818
      # All groups except the one to be evacuated are potential targets
11819
      self.target_uuids = [group_uuid for group_uuid in owned_groups
11820
                           if group_uuid != self.group_uuid]
11821

    
11822
      if not self.target_uuids:
11823
        raise errors.OpPrereqError("There are no possible target groups",
11824
                                   errors.ECODE_INVAL)
11825

    
11826
  def BuildHooksEnv(self):
11827
    """Build hooks env.
11828

11829
    """
11830
    return {
11831
      "GROUP_NAME": self.op.group_name,
11832
      "TARGET_GROUPS": " ".join(self.target_uuids),
11833
      }
11834

    
11835
  def BuildHooksNodes(self):
11836
    """Build hooks nodes.
11837

11838
    """
11839
    mn = self.cfg.GetMasterNode()
11840

    
11841
    assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
11842

    
11843
    run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
11844

    
11845
    return (run_nodes, run_nodes)
11846

    
11847
  def Exec(self, feedback_fn):
11848
    instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
11849

    
11850
    assert self.group_uuid not in self.target_uuids
11851

    
11852
    req = iallocator.IAReqGroupChange(instances=instances,
11853
                                      target_groups=self.target_uuids)
11854
    ial = iallocator.IAllocator(self.cfg, self.rpc, req)
11855

    
11856
    ial.Run(self.op.iallocator)
11857

    
11858
    if not ial.success:
11859
      raise errors.OpPrereqError("Can't compute group evacuation using"
11860
                                 " iallocator '%s': %s" %
11861
                                 (self.op.iallocator, ial.info),
11862
                                 errors.ECODE_NORES)
11863

    
11864
    jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
11865

    
11866
    self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
11867
                 len(jobs), self.op.group_name)
11868

    
11869
    return ResultWithJobs(jobs)
11870

    
11871

    
11872
class LURestrictedCommand(NoHooksLU):
11873
  """Logical unit for executing restricted commands.
11874

11875
  """
11876
  REQ_BGL = False
11877

    
11878
  def ExpandNames(self):
11879
    if self.op.nodes:
11880
      self.op.nodes = _GetWantedNodes(self, self.op.nodes)
11881

    
11882
    self.needed_locks = {
11883
      locking.LEVEL_NODE: self.op.nodes,
11884
      }
11885
    self.share_locks = {
11886
      locking.LEVEL_NODE: not self.op.use_locking,
11887
      }
11888

    
11889
  def CheckPrereq(self):
11890
    """Check prerequisites.
11891

11892
    """
11893

    
11894
  def Exec(self, feedback_fn):
11895
    """Execute restricted command and return output.
11896

11897
    """
11898
    owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
11899

    
11900
    # Check if correct locks are held
11901
    assert set(self.op.nodes).issubset(owned_nodes)
11902

    
11903
    rpcres = self.rpc.call_restricted_command(self.op.nodes, self.op.command)
11904

    
11905
    result = []
11906

    
11907
    for node_name in self.op.nodes:
11908
      nres = rpcres[node_name]
11909
      if nres.fail_msg:
11910
        msg = ("Command '%s' on node '%s' failed: %s" %
11911
               (self.op.command, node_name, nres.fail_msg))
11912
        result.append((False, msg))
11913
      else:
11914
        result.append((True, nres.payload))
11915

    
11916
    return result
11917

    
11918

    
11919
#: Query type implementations
11920
_QUERY_IMPL = {
11921
  constants.QR_CLUSTER: _ClusterQuery,
11922
  constants.QR_INSTANCE: _InstanceQuery,
11923
  constants.QR_NODE: _NodeQuery,
11924
  constants.QR_GROUP: _GroupQuery,
11925
  constants.QR_NETWORK: _NetworkQuery,
11926
  constants.QR_OS: _OsQuery,
11927
  constants.QR_EXTSTORAGE: _ExtStorageQuery,
11928
  constants.QR_EXPORT: _ExportQuery,
11929
  }
11930

    
11931
assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
11932

    
11933

    
11934
def _GetQueryImplementation(name):
11935
  """Returns the implemtnation for a query type.
11936

11937
  @param name: Query type, must be one of L{constants.QR_VIA_OP}
11938

11939
  """
11940
  try:
11941
    return _QUERY_IMPL[name]
11942
  except KeyError:
11943
    raise errors.OpPrereqError("Unknown query resource '%s'" % name,
11944
                               errors.ECODE_INVAL)
11945

    
11946

    
11947
def _CheckForConflictingIp(lu, ip, node):
11948
  """In case of conflicting IP address raise error.
11949

11950
  @type ip: string
11951
  @param ip: IP address
11952
  @type node: string
11953
  @param node: node name
11954

11955
  """
11956
  (conf_net, _) = lu.cfg.CheckIPInNodeGroup(ip, node)
11957
  if conf_net is not None:
11958
    raise errors.OpPrereqError(("The requested IP address (%s) belongs to"
11959
                                " network %s, but the target NIC does not." %
11960
                                (ip, conf_net)),
11961
                               errors.ECODE_STATE)
11962

    
11963
  return (None, None)