code.grnet.gr Git - ganeti-local/blob - lib/cmdlib/__init__.py

   1 #
   2 #
   3
   4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 Google Inc.
   5 #
   6 # This program is free software; you can redistribute it and/or modify
   7 # it under the terms of the GNU General Public License as published by
   8 # the Free Software Foundation; either version 2 of the License, or
   9 # (at your option) any later version.
  10 #
  11 # This program is distributed in the hope that it will be useful, but
  12 # WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 # General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program; if not, write to the Free Software
  18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19 # 02110-1301, USA.
  20
  21
  22 """Module implementing the master-side code."""
  23
  24 # pylint: disable=W0201,C0302
  25
  26 # W0201 since most LU attributes are defined in CheckPrereq or similar
  27 # functions
  28
  29 # C0302: since we have waaaay too many lines in this module
  30
  31 import os
  32 import time
  33 import logging
  34 import copy
  35 import OpenSSL
  36 import itertools
  37 import operator
  38
  39 from ganeti import utils
  40 from ganeti import errors
  41 from ganeti import hypervisor
  42 from ganeti import locking
  43 from ganeti import constants
  44 from ganeti import objects
  45 from ganeti import compat
  46 from ganeti import masterd
  47 from ganeti import netutils
  48 from ganeti import query
  49 from ganeti import qlang
  50 from ganeti import opcodes
  51 from ganeti import ht
  52 from ganeti import rpc
  53 from ganeti import pathutils
  54 from ganeti import network
  55 from ganeti.masterd import iallocator
  56
  57 from ganeti.cmdlib.base import ResultWithJobs, LogicalUnit, NoHooksLU, \
  58   Tasklet, _QueryBase
  59 from ganeti.cmdlib.common import _ExpandInstanceName, _ExpandItemName, \
  60   _ExpandNodeName, _ShareAll, _CheckNodeGroupInstances, _GetWantedNodes, \
  61   _GetWantedInstances, _RunPostHook, _RedistributeAncillaryFiles, \
  62   _MergeAndVerifyHvState, _MergeAndVerifyDiskState, _GetUpdatedIPolicy, \
  63   _ComputeNewInstanceViolations, _GetUpdatedParams, _CheckOSParams, \
  64   _CheckHVParams, _AdjustCandidatePool, _CheckNodePVs, \
  65   _ComputeIPolicyInstanceViolation, _AnnotateDiskParams, _SupportsOob, \
  66   _ComputeIPolicySpecViolation
  67
  68 from ganeti.cmdlib.cluster import LUClusterActivateMasterIp, \
  69   LUClusterDeactivateMasterIp, LUClusterConfigQuery, LUClusterDestroy, \
  70   LUClusterPostInit, _ClusterQuery, LUClusterQuery, LUClusterRedistConf, \
  71   LUClusterRename, LUClusterRepairDiskSizes, LUClusterSetParams, \
  72   LUClusterVerify, LUClusterVerifyConfig, LUClusterVerifyGroup, \
  73   LUClusterVerifyDisks
  74 from ganeti.cmdlib.tags import LUTagsGet, LUTagsSearch, LUTagsSet, LUTagsDel
  75 from ganeti.cmdlib.network import LUNetworkAdd, LUNetworkRemove, \
  76   LUNetworkSetParams, _NetworkQuery, LUNetworkQuery, LUNetworkConnect, \
  77   LUNetworkDisconnect
  78 from ganeti.cmdlib.test import LUTestDelay, LUTestJqueue, LUTestAllocator
  79
  80 import ganeti.masterd.instance # pylint: disable=W0611
  81
  82
  83 # States of instance
  84 INSTANCE_DOWN = [constants.ADMINST_DOWN]
  85 INSTANCE_ONLINE = [constants.ADMINST_DOWN, constants.ADMINST_UP]
  86 INSTANCE_NOT_RUNNING = [constants.ADMINST_DOWN, constants.ADMINST_OFFLINE]
  87
  88 #: Instance status in which an instance can be marked as offline/online
  89 CAN_CHANGE_INSTANCE_OFFLINE = (frozenset(INSTANCE_DOWN) | frozenset([
  90   constants.ADMINST_OFFLINE,
  91   ]))
  92
  93
  94 def _CheckInstancesNodeGroups(cfg, instances, owned_groups, owned_nodes,
  95                               cur_group_uuid):
  96   """Checks if node groups for locked instances are still correct.
  97
  98   @type cfg: L{config.ConfigWriter}
  99   @param cfg: Cluster configuration
 100   @type instances: dict; string as key, L{objects.Instance} as value
 101   @param instances: Dictionary, instance name as key, instance object as value
 102   @type owned_groups: iterable of string
 103   @param owned_groups: List of owned groups
 104   @type owned_nodes: iterable of string
 105   @param owned_nodes: List of owned nodes
 106   @type cur_group_uuid: string or None
 107   @param cur_group_uuid: Optional group UUID to check against instance's groups
 108
 109   """
 110   for (name, inst) in instances.items():
 111     assert owned_nodes.issuperset(inst.all_nodes), \
 112       "Instance %s's nodes changed while we kept the lock" % name
 113
 114     inst_groups = _CheckInstanceNodeGroups(cfg, name, owned_groups)
 115
 116     assert cur_group_uuid is None or cur_group_uuid in inst_groups, \
 117       "Instance %s has no node in group %s" % (name, cur_group_uuid)
 118
 119
 120 def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups,
 121                              primary_only=False):
 122   """Checks if the owned node groups are still correct for an instance.
 123
 124   @type cfg: L{config.ConfigWriter}
 125   @param cfg: The cluster configuration
 126   @type instance_name: string
 127   @param instance_name: Instance name
 128   @type owned_groups: set or frozenset
 129   @param owned_groups: List of currently owned node groups
 130   @type primary_only: boolean
 131   @param primary_only: Whether to check node groups for only the primary node
 132
 133   """
 134   inst_groups = cfg.GetInstanceNodeGroups(instance_name, primary_only)
 135
 136   if not owned_groups.issuperset(inst_groups):
 137     raise errors.OpPrereqError("Instance %s's node groups changed since"
 138                                " locks were acquired, current groups are"
 139                                " are '%s', owning groups '%s'; retry the"
 140                                " operation" %
 141                                (instance_name,
 142                                 utils.CommaJoin(inst_groups),
 143                                 utils.CommaJoin(owned_groups)),
 144                                errors.ECODE_STATE)
 145
 146   return inst_groups
 147
 148
 149 def _IsExclusiveStorageEnabledNode(cfg, node):
 150   """Whether exclusive_storage is in effect for the given node.
 151
 152   @type cfg: L{config.ConfigWriter}
 153   @param cfg: The cluster configuration
 154   @type node: L{objects.Node}
 155   @param node: The node
 156   @rtype: bool
 157   @return: The effective value of exclusive_storage
 158
 159   """
 160   return cfg.GetNdParams(node)[constants.ND_EXCLUSIVE_STORAGE]
 161
 162
 163 def _IsExclusiveStorageEnabledNodeName(cfg, nodename):
 164   """Whether exclusive_storage is in effect for the given node.
 165
 166   @type cfg: L{config.ConfigWriter}
 167   @param cfg: The cluster configuration
 168   @type nodename: string
 169   @param nodename: The node
 170   @rtype: bool
 171   @return: The effective value of exclusive_storage
 172   @raise errors.OpPrereqError: if no node exists with the given name
 173
 174   """
 175   ni = cfg.GetNodeInfo(nodename)
 176   if ni is None:
 177     raise errors.OpPrereqError("Invalid node name %s" % nodename,
 178                                errors.ECODE_NOENT)
 179   return _IsExclusiveStorageEnabledNode(cfg, ni)
 180
 181
 182 def _CopyLockList(names):
 183   """Makes a copy of a list of lock names.
 184
 185   Handles L{locking.ALL_SET} correctly.
 186
 187   """
 188   if names == locking.ALL_SET:
 189     return locking.ALL_SET
 190   else:
 191     return names[:]
 192
 193
 194 def _ReleaseLocks(lu, level, names=None, keep=None):
 195   """Releases locks owned by an LU.
 196
 197   @type lu: L{LogicalUnit}
 198   @param level: Lock level
 199   @type names: list or None
 200   @param names: Names of locks to release
 201   @type keep: list or None
 202   @param keep: Names of locks to retain
 203
 204   """
 205   assert not (keep is not None and names is not None), \
 206          "Only one of the 'names' and the 'keep' parameters can be given"
 207
 208   if names is not None:
 209     should_release = names.__contains__
 210   elif keep:
 211     should_release = lambda name: name not in keep
 212   else:
 213     should_release = None
 214
 215   owned = lu.owned_locks(level)
 216   if not owned:
 217     # Not owning any lock at this level, do nothing
 218     pass
 219
 220   elif should_release:
 221     retain = []
 222     release = []
 223
 224     # Determine which locks to release
 225     for name in owned:
 226       if should_release(name):
 227         release.append(name)
 228       else:
 229         retain.append(name)
 230
 231     assert len(lu.owned_locks(level)) == (len(retain) + len(release))
 232
 233     # Release just some locks
 234     lu.glm.release(level, names=release)
 235
 236     assert frozenset(lu.owned_locks(level)) == frozenset(retain)
 237   else:
 238     # Release everything
 239     lu.glm.release(level)
 240
 241     assert not lu.glm.is_owned(level), "No locks should be owned"
 242
 243
 244 def _MapInstanceDisksToNodes(instances):
 245   """Creates a map from (node, volume) to instance name.
 246
 247   @type instances: list of L{objects.Instance}
 248   @rtype: dict; tuple of (node name, volume name) as key, instance name as value
 249
 250   """
 251   return dict(((node, vol), inst.name)
 252               for inst in instances
 253               for (node, vols) in inst.MapLVsByNode().items()
 254               for vol in vols)
 255
 256
 257 def _CheckOutputFields(static, dynamic, selected):
 258   """Checks whether all selected fields are valid.
 259
 260   @type static: L{utils.FieldSet}
 261   @param static: static fields set
 262   @type dynamic: L{utils.FieldSet}
 263   @param dynamic: dynamic fields set
 264
 265   """
 266   f = utils.FieldSet()
 267   f.Extend(static)
 268   f.Extend(dynamic)
 269
 270   delta = f.NonMatching(selected)
 271   if delta:
 272     raise errors.OpPrereqError("Unknown output fields selected: %s"
 273                                % ",".join(delta), errors.ECODE_INVAL)
 274
 275
 276 def _CheckParamsNotGlobal(params, glob_pars, kind, bad_levels, good_levels):
 277   """Make sure that none of the given paramters is global.
 278
 279   If a global parameter is found, an L{errors.OpPrereqError} exception is
 280   raised. This is used to avoid setting global parameters for individual nodes.
 281
 282   @type params: dictionary
 283   @param params: Parameters to check
 284   @type glob_pars: dictionary
 285   @param glob_pars: Forbidden parameters
 286   @type kind: string
 287   @param kind: Kind of parameters (e.g. "node")
 288   @type bad_levels: string
 289   @param bad_levels: Level(s) at which the parameters are forbidden (e.g.
 290       "instance")
 291   @type good_levels: strings
 292   @param good_levels: Level(s) at which the parameters are allowed (e.g.
 293       "cluster or group")
 294
 295   """
 296   used_globals = glob_pars.intersection(params)
 297   if used_globals:
 298     msg = ("The following %s parameters are global and cannot"
 299            " be customized at %s level, please modify them at"
 300            " %s level: %s" %
 301            (kind, bad_levels, good_levels, utils.CommaJoin(used_globals)))
 302     raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
 303
 304
 305 def _CheckNodeOnline(lu, node, msg=None):
 306   """Ensure that a given node is online.
 307
 308   @param lu: the LU on behalf of which we make the check
 309   @param node: the node to check
 310   @param msg: if passed, should be a message to replace the default one
 311   @raise errors.OpPrereqError: if the node is offline
 312
 313   """
 314   if msg is None:
 315     msg = "Can't use offline node"
 316   if lu.cfg.GetNodeInfo(node).offline:
 317     raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
 318
 319
 320 def _CheckNodeNotDrained(lu, node):
 321   """Ensure that a given node is not drained.
 322
 323   @param lu: the LU on behalf of which we make the check
 324   @param node: the node to check
 325   @raise errors.OpPrereqError: if the node is drained
 326
 327   """
 328   if lu.cfg.GetNodeInfo(node).drained:
 329     raise errors.OpPrereqError("Can't use drained node %s" % node,
 330                                errors.ECODE_STATE)
 331
 332
 333 def _CheckNodeVmCapable(lu, node):
 334   """Ensure that a given node is vm capable.
 335
 336   @param lu: the LU on behalf of which we make the check
 337   @param node: the node to check
 338   @raise errors.OpPrereqError: if the node is not vm capable
 339
 340   """
 341   if not lu.cfg.GetNodeInfo(node).vm_capable:
 342     raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
 343                                errors.ECODE_STATE)
 344
 345
 346 def _CheckNodeHasOS(lu, node, os_name, force_variant):
 347   """Ensure that a node supports a given OS.
 348
 349   @param lu: the LU on behalf of which we make the check
 350   @param node: the node to check
 351   @param os_name: the OS to query about
 352   @param force_variant: whether to ignore variant errors
 353   @raise errors.OpPrereqError: if the node is not supporting the OS
 354
 355   """
 356   result = lu.rpc.call_os_get(node, os_name)
 357   result.Raise("OS '%s' not in supported OS list for node %s" %
 358                (os_name, node),
 359                prereq=True, ecode=errors.ECODE_INVAL)
 360   if not force_variant:
 361     _CheckOSVariant(result.payload, os_name)
 362
 363
 364 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
 365   """Ensure that a node has the given secondary ip.
 366
 367   @type lu: L{LogicalUnit}
 368   @param lu: the LU on behalf of which we make the check
 369   @type node: string
 370   @param node: the node to check
 371   @type secondary_ip: string
 372   @param secondary_ip: the ip to check
 373   @type prereq: boolean
 374   @param prereq: whether to throw a prerequisite or an execute error
 375   @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
 376   @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
 377
 378   """
 379   result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
 380   result.Raise("Failure checking secondary ip on node %s" % node,
 381                prereq=prereq, ecode=errors.ECODE_ENVIRON)
 382   if not result.payload:
 383     msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
 384            " please fix and re-run this command" % secondary_ip)
 385     if prereq:
 386       raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
 387     else:
 388       raise errors.OpExecError(msg)
 389
 390
 391 def _GetClusterDomainSecret():
 392   """Reads the cluster domain secret.
 393
 394   """
 395   return utils.ReadOneLineFile(pathutils.CLUSTER_DOMAIN_SECRET_FILE,
 396                                strict=True)
 397
 398
 399 def _CheckInstanceState(lu, instance, req_states, msg=None):
 400   """Ensure that an instance is in one of the required states.
 401
 402   @param lu: the LU on behalf of which we make the check
 403   @param instance: the instance to check
 404   @param msg: if passed, should be a message to replace the default one
 405   @raise errors.OpPrereqError: if the instance is not in the required state
 406
 407   """
 408   if msg is None:
 409     msg = ("can't use instance from outside %s states" %
 410            utils.CommaJoin(req_states))
 411   if instance.admin_state not in req_states:
 412     raise errors.OpPrereqError("Instance '%s' is marked to be %s, %s" %
 413                                (instance.name, instance.admin_state, msg),
 414                                errors.ECODE_STATE)
 415
 416   if constants.ADMINST_UP not in req_states:
 417     pnode = instance.primary_node
 418     if not lu.cfg.GetNodeInfo(pnode).offline:
 419       ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
 420       ins_l.Raise("Can't contact node %s for instance information" % pnode,
 421                   prereq=True, ecode=errors.ECODE_ENVIRON)
 422       if instance.name in ins_l.payload:
 423         raise errors.OpPrereqError("Instance %s is running, %s" %
 424                                    (instance.name, msg), errors.ECODE_STATE)
 425     else:
 426       lu.LogWarning("Primary node offline, ignoring check that instance"
 427                      " is down")
 428
 429
 430 def _ComputeIPolicyInstanceSpecViolation(
 431   ipolicy, instance_spec, disk_template,
 432   _compute_fn=_ComputeIPolicySpecViolation):
 433   """Compute if instance specs meets the specs of ipolicy.
 434
 435   @type ipolicy: dict
 436   @param ipolicy: The ipolicy to verify against
 437   @param instance_spec: dict
 438   @param instance_spec: The instance spec to verify
 439   @type disk_template: string
 440   @param disk_template: the disk template of the instance
 441   @param _compute_fn: The function to verify ipolicy (unittest only)
 442   @see: L{_ComputeIPolicySpecViolation}
 443
 444   """
 445   mem_size = instance_spec.get(constants.ISPEC_MEM_SIZE, None)
 446   cpu_count = instance_spec.get(constants.ISPEC_CPU_COUNT, None)
 447   disk_count = instance_spec.get(constants.ISPEC_DISK_COUNT, 0)
 448   disk_sizes = instance_spec.get(constants.ISPEC_DISK_SIZE, [])
 449   nic_count = instance_spec.get(constants.ISPEC_NIC_COUNT, 0)
 450   spindle_use = instance_spec.get(constants.ISPEC_SPINDLE_USE, None)
 451
 452   return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
 453                      disk_sizes, spindle_use, disk_template)
 454
 455
 456 def _ComputeIPolicyNodeViolation(ipolicy, instance, current_group,
 457                                  target_group, cfg,
 458                                  _compute_fn=_ComputeIPolicyInstanceViolation):
 459   """Compute if instance meets the specs of the new target group.
 460
 461   @param ipolicy: The ipolicy to verify
 462   @param instance: The instance object to verify
 463   @param current_group: The current group of the instance
 464   @param target_group: The new group of the instance
 465   @type cfg: L{config.ConfigWriter}
 466   @param cfg: Cluster configuration
 467   @param _compute_fn: The function to verify ipolicy (unittest only)
 468   @see: L{_ComputeIPolicySpecViolation}
 469
 470   """
 471   if current_group == target_group:
 472     return []
 473   else:
 474     return _compute_fn(ipolicy, instance, cfg)
 475
 476
 477 def _CheckTargetNodeIPolicy(lu, ipolicy, instance, node, cfg, ignore=False,
 478                             _compute_fn=_ComputeIPolicyNodeViolation):
 479   """Checks that the target node is correct in terms of instance policy.
 480
 481   @param ipolicy: The ipolicy to verify
 482   @param instance: The instance object to verify
 483   @param node: The new node to relocate
 484   @type cfg: L{config.ConfigWriter}
 485   @param cfg: Cluster configuration
 486   @param ignore: Ignore violations of the ipolicy
 487   @param _compute_fn: The function to verify ipolicy (unittest only)
 488   @see: L{_ComputeIPolicySpecViolation}
 489
 490   """
 491   primary_node = lu.cfg.GetNodeInfo(instance.primary_node)
 492   res = _compute_fn(ipolicy, instance, primary_node.group, node.group, cfg)
 493
 494   if res:
 495     msg = ("Instance does not meet target node group's (%s) instance"
 496            " policy: %s") % (node.group, utils.CommaJoin(res))
 497     if ignore:
 498       lu.LogWarning(msg)
 499     else:
 500       raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
 501
 502
 503 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
 504                           minmem, maxmem, vcpus, nics, disk_template, disks,
 505                           bep, hvp, hypervisor_name, tags):
 506   """Builds instance related env variables for hooks
 507
 508   This builds the hook environment from individual variables.
 509
 510   @type name: string
 511   @param name: the name of the instance
 512   @type primary_node: string
 513   @param primary_node: the name of the instance's primary node
 514   @type secondary_nodes: list
 515   @param secondary_nodes: list of secondary nodes as strings
 516   @type os_type: string
 517   @param os_type: the name of the instance's OS
 518   @type status: string
 519   @param status: the desired status of the instance
 520   @type minmem: string
 521   @param minmem: the minimum memory size of the instance
 522   @type maxmem: string
 523   @param maxmem: the maximum memory size of the instance
 524   @type vcpus: string
 525   @param vcpus: the count of VCPUs the instance has
 526   @type nics: list
 527   @param nics: list of tuples (name, uuid, ip, mac, mode, link, net, netinfo)
 528       representing the NICs the instance has
 529   @type disk_template: string
 530   @param disk_template: the disk template of the instance
 531   @type disks: list
 532   @param disks: list of tuples (name, uuid, size, mode)
 533   @type bep: dict
 534   @param bep: the backend parameters for the instance
 535   @type hvp: dict
 536   @param hvp: the hypervisor parameters for the instance
 537   @type hypervisor_name: string
 538   @param hypervisor_name: the hypervisor for the instance
 539   @type tags: list
 540   @param tags: list of instance tags as strings
 541   @rtype: dict
 542   @return: the hook environment for this instance
 543
 544   """
 545   env = {
 546     "OP_TARGET": name,
 547     "INSTANCE_NAME": name,
 548     "INSTANCE_PRIMARY": primary_node,
 549     "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
 550     "INSTANCE_OS_TYPE": os_type,
 551     "INSTANCE_STATUS": status,
 552     "INSTANCE_MINMEM": minmem,
 553     "INSTANCE_MAXMEM": maxmem,
 554     # TODO(2.9) remove deprecated "memory" value
 555     "INSTANCE_MEMORY": maxmem,
 556     "INSTANCE_VCPUS": vcpus,
 557     "INSTANCE_DISK_TEMPLATE": disk_template,
 558     "INSTANCE_HYPERVISOR": hypervisor_name,
 559   }
 560   if nics:
 561     nic_count = len(nics)
 562     for idx, (name, _, ip, mac, mode, link, net, netinfo) in enumerate(nics):
 563       if ip is None:
 564         ip = ""
 565       env["INSTANCE_NIC%d_NAME" % idx] = name
 566       env["INSTANCE_NIC%d_IP" % idx] = ip
 567       env["INSTANCE_NIC%d_MAC" % idx] = mac
 568       env["INSTANCE_NIC%d_MODE" % idx] = mode
 569       env["INSTANCE_NIC%d_LINK" % idx] = link
 570       if netinfo:
 571         nobj = objects.Network.FromDict(netinfo)
 572         env.update(nobj.HooksDict("INSTANCE_NIC%d_" % idx))
 573       elif network:
 574         # FIXME: broken network reference: the instance NIC specifies a
 575         # network, but the relevant network entry was not in the config. This
 576         # should be made impossible.
 577         env["INSTANCE_NIC%d_NETWORK_NAME" % idx] = net
 578       if mode == constants.NIC_MODE_BRIDGED:
 579         env["INSTANCE_NIC%d_BRIDGE" % idx] = link
 580   else:
 581     nic_count = 0
 582
 583   env["INSTANCE_NIC_COUNT"] = nic_count
 584
 585   if disks:
 586     disk_count = len(disks)
 587     for idx, (name, size, mode) in enumerate(disks):
 588       env["INSTANCE_DISK%d_NAME" % idx] = name
 589       env["INSTANCE_DISK%d_SIZE" % idx] = size
 590       env["INSTANCE_DISK%d_MODE" % idx] = mode
 591   else:
 592     disk_count = 0
 593
 594   env["INSTANCE_DISK_COUNT"] = disk_count
 595
 596   if not tags:
 597     tags = []
 598
 599   env["INSTANCE_TAGS"] = " ".join(tags)
 600
 601   for source, kind in [(bep, "BE"), (hvp, "HV")]:
 602     for key, value in source.items():
 603       env["INSTANCE_%s_%s" % (kind, key)] = value
 604
 605   return env
 606
 607
 608 def _NICToTuple(lu, nic):
 609   """Build a tupple of nic information.
 610
 611   @type lu:  L{LogicalUnit}
 612   @param lu: the logical unit on whose behalf we execute
 613   @type nic: L{objects.NIC}
 614   @param nic: nic to convert to hooks tuple
 615
 616   """
 617   cluster = lu.cfg.GetClusterInfo()
 618   filled_params = cluster.SimpleFillNIC(nic.nicparams)
 619   mode = filled_params[constants.NIC_MODE]
 620   link = filled_params[constants.NIC_LINK]
 621   netinfo = None
 622   if nic.network:
 623     nobj = lu.cfg.GetNetwork(nic.network)
 624     netinfo = objects.Network.ToDict(nobj)
 625   return (nic.name, nic.uuid, nic.ip, nic.mac, mode, link, nic.network, netinfo)
 626
 627
 628 def _NICListToTuple(lu, nics):
 629   """Build a list of nic information tuples.
 630
 631   This list is suitable to be passed to _BuildInstanceHookEnv or as a return
 632   value in LUInstanceQueryData.
 633
 634   @type lu:  L{LogicalUnit}
 635   @param lu: the logical unit on whose behalf we execute
 636   @type nics: list of L{objects.NIC}
 637   @param nics: list of nics to convert to hooks tuples
 638
 639   """
 640   hooks_nics = []
 641   for nic in nics:
 642     hooks_nics.append(_NICToTuple(lu, nic))
 643   return hooks_nics
 644
 645
 646 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
 647   """Builds instance related env variables for hooks from an object.
 648
 649   @type lu: L{LogicalUnit}
 650   @param lu: the logical unit on whose behalf we execute
 651   @type instance: L{objects.Instance}
 652   @param instance: the instance for which we should build the
 653       environment
 654   @type override: dict
 655   @param override: dictionary with key/values that will override
 656       our values
 657   @rtype: dict
 658   @return: the hook environment dictionary
 659
 660   """
 661   cluster = lu.cfg.GetClusterInfo()
 662   bep = cluster.FillBE(instance)
 663   hvp = cluster.FillHV(instance)
 664   args = {
 665     "name": instance.name,
 666     "primary_node": instance.primary_node,
 667     "secondary_nodes": instance.secondary_nodes,
 668     "os_type": instance.os,
 669     "status": instance.admin_state,
 670     "maxmem": bep[constants.BE_MAXMEM],
 671     "minmem": bep[constants.BE_MINMEM],
 672     "vcpus": bep[constants.BE_VCPUS],
 673     "nics": _NICListToTuple(lu, instance.nics),
 674     "disk_template": instance.disk_template,
 675     "disks": [(disk.name, disk.size, disk.mode)
 676               for disk in instance.disks],
 677     "bep": bep,
 678     "hvp": hvp,
 679     "hypervisor_name": instance.hypervisor,
 680     "tags": instance.tags,
 681   }
 682   if override:
 683     args.update(override)
 684   return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
 685
 686
 687 def _DecideSelfPromotion(lu, exceptions=None):
 688   """Decide whether I should promote myself as a master candidate.
 689
 690   """
 691   cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
 692   mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
 693   # the new node will increase mc_max with one, so:
 694   mc_should = min(mc_should + 1, cp_size)
 695   return mc_now < mc_should
 696
 697
 698 def _CheckNicsBridgesExist(lu, target_nics, target_node):
 699   """Check that the brigdes needed by a list of nics exist.
 700
 701   """
 702   cluster = lu.cfg.GetClusterInfo()
 703   paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
 704   brlist = [params[constants.NIC_LINK] for params in paramslist
 705             if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
 706   if brlist:
 707     result = lu.rpc.call_bridges_exist(target_node, brlist)
 708     result.Raise("Error checking bridges on destination node '%s'" %
 709                  target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
 710
 711
 712 def _CheckInstanceBridgesExist(lu, instance, node=None):
 713   """Check that the brigdes needed by an instance exist.
 714
 715   """
 716   if node is None:
 717     node = instance.primary_node
 718   _CheckNicsBridgesExist(lu, instance.nics, node)
 719
 720
 721 def _CheckOSVariant(os_obj, name):
 722   """Check whether an OS name conforms to the os variants specification.
 723
 724   @type os_obj: L{objects.OS}
 725   @param os_obj: OS object to check
 726   @type name: string
 727   @param name: OS name passed by the user, to check for validity
 728
 729   """
 730   variant = objects.OS.GetVariant(name)
 731   if not os_obj.supported_variants:
 732     if variant:
 733       raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
 734                                  " passed)" % (os_obj.name, variant),
 735                                  errors.ECODE_INVAL)
 736     return
 737   if not variant:
 738     raise errors.OpPrereqError("OS name must include a variant",
 739                                errors.ECODE_INVAL)
 740
 741   if variant not in os_obj.supported_variants:
 742     raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
 743
 744
 745 def _GetNodeInstancesInner(cfg, fn):
 746   return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
 747
 748
 749 def _GetNodeInstances(cfg, node_name):
 750   """Returns a list of all primary and secondary instances on a node.
 751
 752   """
 753
 754   return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
 755
 756
 757 def _GetNodePrimaryInstances(cfg, node_name):
 758   """Returns primary instances on a node.
 759
 760   """
 761   return _GetNodeInstancesInner(cfg,
 762                                 lambda inst: node_name == inst.primary_node)
 763
 764
 765 def _GetNodeSecondaryInstances(cfg, node_name):
 766   """Returns secondary instances on a node.
 767
 768   """
 769   return _GetNodeInstancesInner(cfg,
 770                                 lambda inst: node_name in inst.secondary_nodes)
 771
 772
 773 def _GetStorageTypeArgs(cfg, storage_type):
 774   """Returns the arguments for a storage type.
 775
 776   """
 777   # Special case for file storage
 778   if storage_type == constants.ST_FILE:
 779     # storage.FileStorage wants a list of storage directories
 780     return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
 781
 782   return []
 783
 784
 785 def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
 786   faulty = []
 787
 788   for dev in instance.disks:
 789     cfg.SetDiskID(dev, node_name)
 790
 791   result = rpc_runner.call_blockdev_getmirrorstatus(node_name, (instance.disks,
 792                                                                 instance))
 793   result.Raise("Failed to get disk status from node %s" % node_name,
 794                prereq=prereq, ecode=errors.ECODE_ENVIRON)
 795
 796   for idx, bdev_status in enumerate(result.payload):
 797     if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
 798       faulty.append(idx)
 799
 800   return faulty
 801
 802
 803 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
 804   """Check the sanity of iallocator and node arguments and use the
 805   cluster-wide iallocator if appropriate.
 806
 807   Check that at most one of (iallocator, node) is specified. If none is
 808   specified, or the iallocator is L{constants.DEFAULT_IALLOCATOR_SHORTCUT},
 809   then the LU's opcode's iallocator slot is filled with the cluster-wide
 810   default iallocator.
 811
 812   @type iallocator_slot: string
 813   @param iallocator_slot: the name of the opcode iallocator slot
 814   @type node_slot: string
 815   @param node_slot: the name of the opcode target node slot
 816
 817   """
 818   node = getattr(lu.op, node_slot, None)
 819   ialloc = getattr(lu.op, iallocator_slot, None)
 820   if node == []:
 821     node = None
 822
 823   if node is not None and ialloc is not None:
 824     raise errors.OpPrereqError("Do not specify both, iallocator and node",
 825                                errors.ECODE_INVAL)
 826   elif ((node is None and ialloc is None) or
 827         ialloc == constants.DEFAULT_IALLOCATOR_SHORTCUT):
 828     default_iallocator = lu.cfg.GetDefaultIAllocator()
 829     if default_iallocator:
 830       setattr(lu.op, iallocator_slot, default_iallocator)
 831     else:
 832       raise errors.OpPrereqError("No iallocator or node given and no"
 833                                  " cluster-wide default iallocator found;"
 834                                  " please specify either an iallocator or a"
 835                                  " node, or set a cluster-wide default"
 836                                  " iallocator", errors.ECODE_INVAL)
 837
 838
 839 def _GetDefaultIAllocator(cfg, ialloc):
 840   """Decides on which iallocator to use.
 841
 842   @type cfg: L{config.ConfigWriter}
 843   @param cfg: Cluster configuration object
 844   @type ialloc: string or None
 845   @param ialloc: Iallocator specified in opcode
 846   @rtype: string
 847   @return: Iallocator name
 848
 849   """
 850   if not ialloc:
 851     # Use default iallocator
 852     ialloc = cfg.GetDefaultIAllocator()
 853
 854   if not ialloc:
 855     raise errors.OpPrereqError("No iallocator was specified, neither in the"
 856                                " opcode nor as a cluster-wide default",
 857                                errors.ECODE_INVAL)
 858
 859   return ialloc
 860
 861
 862 def _CheckHostnameSane(lu, name):
 863   """Ensures that a given hostname resolves to a 'sane' name.
 864
 865   The given name is required to be a prefix of the resolved hostname,
 866   to prevent accidental mismatches.
 867
 868   @param lu: the logical unit on behalf of which we're checking
 869   @param name: the name we should resolve and check
 870   @return: the resolved hostname object
 871
 872   """
 873   hostname = netutils.GetHostname(name=name)
 874   if hostname.name != name:
 875     lu.LogInfo("Resolved given name '%s' to '%s'", name, hostname.name)
 876   if not utils.MatchNameComponent(name, [hostname.name]):
 877     raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
 878                                 " same as given hostname '%s'") %
 879                                 (hostname.name, name), errors.ECODE_INVAL)
 880   return hostname
 881
 882
 883 class LUGroupVerifyDisks(NoHooksLU):
 884   """Verifies the status of all disks in a node group.
 885
 886   """
 887   REQ_BGL = False
 888
 889   def ExpandNames(self):
 890     # Raises errors.OpPrereqError on its own if group can't be found
 891     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
 892
 893     self.share_locks = _ShareAll()
 894     self.needed_locks = {
 895       locking.LEVEL_INSTANCE: [],
 896       locking.LEVEL_NODEGROUP: [],
 897       locking.LEVEL_NODE: [],
 898
 899       # This opcode is acquires all node locks in a group. LUClusterVerifyDisks
 900       # starts one instance of this opcode for every group, which means all
 901       # nodes will be locked for a short amount of time, so it's better to
 902       # acquire the node allocation lock as well.
 903       locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
 904       }
 905
 906   def DeclareLocks(self, level):
 907     if level == locking.LEVEL_INSTANCE:
 908       assert not self.needed_locks[locking.LEVEL_INSTANCE]
 909
 910       # Lock instances optimistically, needs verification once node and group
 911       # locks have been acquired
 912       self.needed_locks[locking.LEVEL_INSTANCE] = \
 913         self.cfg.GetNodeGroupInstances(self.group_uuid)
 914
 915     elif level == locking.LEVEL_NODEGROUP:
 916       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
 917
 918       self.needed_locks[locking.LEVEL_NODEGROUP] = \
 919         set([self.group_uuid] +
 920             # Lock all groups used by instances optimistically; this requires
 921             # going via the node before it's locked, requiring verification
 922             # later on
 923             [group_uuid
 924              for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
 925              for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
 926
 927     elif level == locking.LEVEL_NODE:
 928       # This will only lock the nodes in the group to be verified which contain
 929       # actual instances
 930       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
 931       self._LockInstancesNodes()
 932
 933       # Lock all nodes in group to be verified
 934       assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
 935       member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
 936       self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
 937
 938   def CheckPrereq(self):
 939     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
 940     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
 941     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
 942
 943     assert self.group_uuid in owned_groups
 944
 945     # Check if locked instances are still correct
 946     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
 947
 948     # Get instance information
 949     self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
 950
 951     # Check if node groups for locked instances are still correct
 952     _CheckInstancesNodeGroups(self.cfg, self.instances,
 953                               owned_groups, owned_nodes, self.group_uuid)
 954
 955   def Exec(self, feedback_fn):
 956     """Verify integrity of cluster disks.
 957
 958     @rtype: tuple of three items
 959     @return: a tuple of (dict of node-to-node_error, list of instances
 960         which need activate-disks, dict of instance: (node, volume) for
 961         missing volumes
 962
 963     """
 964     res_nodes = {}
 965     res_instances = set()
 966     res_missing = {}
 967
 968     nv_dict = _MapInstanceDisksToNodes(
 969       [inst for inst in self.instances.values()
 970        if inst.admin_state == constants.ADMINST_UP])
 971
 972     if nv_dict:
 973       nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
 974                              set(self.cfg.GetVmCapableNodeList()))
 975
 976       node_lvs = self.rpc.call_lv_list(nodes, [])
 977
 978       for (node, node_res) in node_lvs.items():
 979         if node_res.offline:
 980           continue
 981
 982         msg = node_res.fail_msg
 983         if msg:
 984           logging.warning("Error enumerating LVs on node %s: %s", node, msg)
 985           res_nodes[node] = msg
 986           continue
 987
 988         for lv_name, (_, _, lv_online) in node_res.payload.items():
 989           inst = nv_dict.pop((node, lv_name), None)
 990           if not (lv_online or inst is None):
 991             res_instances.add(inst)
 992
 993       # any leftover items in nv_dict are missing LVs, let's arrange the data
 994       # better
 995       for key, inst in nv_dict.iteritems():
 996         res_missing.setdefault(inst, []).append(list(key))
 997
 998     return (res_nodes, list(res_instances), res_missing)
 999
1000
1001 def _WaitForSync(lu, instance, disks=None, oneshot=False):
1002   """Sleep and poll for an instance's disk to sync.
1003
1004   """
1005   if not instance.disks or disks is not None and not disks:
1006     return True
1007
1008   disks = _ExpandCheckDisks(instance, disks)
1009
1010   if not oneshot:
1011     lu.LogInfo("Waiting for instance %s to sync disks", instance.name)
1012
1013   node = instance.primary_node
1014
1015   for dev in disks:
1016     lu.cfg.SetDiskID(dev, node)
1017
1018   # TODO: Convert to utils.Retry
1019
1020   retries = 0
1021   degr_retries = 10 # in seconds, as we sleep 1 second each time
1022   while True:
1023     max_time = 0
1024     done = True
1025     cumul_degraded = False
1026     rstats = lu.rpc.call_blockdev_getmirrorstatus(node, (disks, instance))
1027     msg = rstats.fail_msg
1028     if msg:
1029       lu.LogWarning("Can't get any data from node %s: %s", node, msg)
1030       retries += 1
1031       if retries >= 10:
1032         raise errors.RemoteError("Can't contact node %s for mirror data,"
1033                                  " aborting." % node)
1034       time.sleep(6)
1035       continue
1036     rstats = rstats.payload
1037     retries = 0
1038     for i, mstat in enumerate(rstats):
1039       if mstat is None:
1040         lu.LogWarning("Can't compute data for node %s/%s",
1041                            node, disks[i].iv_name)
1042         continue
1043
1044       cumul_degraded = (cumul_degraded or
1045                         (mstat.is_degraded and mstat.sync_percent is None))
1046       if mstat.sync_percent is not None:
1047         done = False
1048         if mstat.estimated_time is not None:
1049           rem_time = ("%s remaining (estimated)" %
1050                       utils.FormatSeconds(mstat.estimated_time))
1051           max_time = mstat.estimated_time
1052         else:
1053           rem_time = "no time estimate"
1054         lu.LogInfo("- device %s: %5.2f%% done, %s",
1055                    disks[i].iv_name, mstat.sync_percent, rem_time)
1056
1057     # if we're done but degraded, let's do a few small retries, to
1058     # make sure we see a stable and not transient situation; therefore
1059     # we force restart of the loop
1060     if (done or oneshot) and cumul_degraded and degr_retries > 0:
1061       logging.info("Degraded disks found, %d retries left", degr_retries)
1062       degr_retries -= 1
1063       time.sleep(1)
1064       continue
1065
1066     if done or oneshot:
1067       break
1068
1069     time.sleep(min(60, max_time))
1070
1071   if done:
1072     lu.LogInfo("Instance %s's disks are in sync", instance.name)
1073
1074   return not cumul_degraded
1075
1076
1077 def _BlockdevFind(lu, node, dev, instance):
1078   """Wrapper around call_blockdev_find to annotate diskparams.
1079
1080   @param lu: A reference to the lu object
1081   @param node: The node to call out
1082   @param dev: The device to find
1083   @param instance: The instance object the device belongs to
1084   @returns The result of the rpc call
1085
1086   """
1087   (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
1088   return lu.rpc.call_blockdev_find(node, disk)
1089
1090
1091 def _CheckDiskConsistency(lu, instance, dev, node, on_primary, ldisk=False):
1092   """Wrapper around L{_CheckDiskConsistencyInner}.
1093
1094   """
1095   (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
1096   return _CheckDiskConsistencyInner(lu, instance, disk, node, on_primary,
1097                                     ldisk=ldisk)
1098
1099
1100 def _CheckDiskConsistencyInner(lu, instance, dev, node, on_primary,
1101                                ldisk=False):
1102   """Check that mirrors are not degraded.
1103
1104   @attention: The device has to be annotated already.
1105
1106   The ldisk parameter, if True, will change the test from the
1107   is_degraded attribute (which represents overall non-ok status for
1108   the device(s)) to the ldisk (representing the local storage status).
1109
1110   """
1111   lu.cfg.SetDiskID(dev, node)
1112
1113   result = True
1114
1115   if on_primary or dev.AssembleOnSecondary():
1116     rstats = lu.rpc.call_blockdev_find(node, dev)
1117     msg = rstats.fail_msg
1118     if msg:
1119       lu.LogWarning("Can't find disk on node %s: %s", node, msg)
1120       result = False
1121     elif not rstats.payload:
1122       lu.LogWarning("Can't find disk on node %s", node)
1123       result = False
1124     else:
1125       if ldisk:
1126         result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
1127       else:
1128         result = result and not rstats.payload.is_degraded
1129
1130   if dev.children:
1131     for child in dev.children:
1132       result = result and _CheckDiskConsistencyInner(lu, instance, child, node,
1133                                                      on_primary)
1134
1135   return result
1136
1137
1138 class LUOobCommand(NoHooksLU):
1139   """Logical unit for OOB handling.
1140
1141   """
1142   REQ_BGL = False
1143   _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
1144
1145   def ExpandNames(self):
1146     """Gather locks we need.
1147
1148     """
1149     if self.op.node_names:
1150       self.op.node_names = _GetWantedNodes(self, self.op.node_names)
1151       lock_names = self.op.node_names
1152     else:
1153       lock_names = locking.ALL_SET
1154
1155     self.needed_locks = {
1156       locking.LEVEL_NODE: lock_names,
1157       }
1158
1159     self.share_locks[locking.LEVEL_NODE_ALLOC] = 1
1160
1161     if not self.op.node_names:
1162       # Acquire node allocation lock only if all nodes are affected
1163       self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
1164
1165   def CheckPrereq(self):
1166     """Check prerequisites.
1167
1168     This checks:
1169      - the node exists in the configuration
1170      - OOB is supported
1171
1172     Any errors are signaled by raising errors.OpPrereqError.
1173
1174     """
1175     self.nodes = []
1176     self.master_node = self.cfg.GetMasterNode()
1177
1178     assert self.op.power_delay >= 0.0
1179
1180     if self.op.node_names:
1181       if (self.op.command in self._SKIP_MASTER and
1182           self.master_node in self.op.node_names):
1183         master_node_obj = self.cfg.GetNodeInfo(self.master_node)
1184         master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
1185
1186         if master_oob_handler:
1187           additional_text = ("run '%s %s %s' if you want to operate on the"
1188                              " master regardless") % (master_oob_handler,
1189                                                       self.op.command,
1190                                                       self.master_node)
1191         else:
1192           additional_text = "it does not support out-of-band operations"
1193
1194         raise errors.OpPrereqError(("Operating on the master node %s is not"
1195                                     " allowed for %s; %s") %
1196                                    (self.master_node, self.op.command,
1197                                     additional_text), errors.ECODE_INVAL)
1198     else:
1199       self.op.node_names = self.cfg.GetNodeList()
1200       if self.op.command in self._SKIP_MASTER:
1201         self.op.node_names.remove(self.master_node)
1202
1203     if self.op.command in self._SKIP_MASTER:
1204       assert self.master_node not in self.op.node_names
1205
1206     for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
1207       if node is None:
1208         raise errors.OpPrereqError("Node %s not found" % node_name,
1209                                    errors.ECODE_NOENT)
1210       else:
1211         self.nodes.append(node)
1212
1213       if (not self.op.ignore_status and
1214           (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
1215         raise errors.OpPrereqError(("Cannot power off node %s because it is"
1216                                     " not marked offline") % node_name,
1217                                    errors.ECODE_STATE)
1218
1219   def Exec(self, feedback_fn):
1220     """Execute OOB and return result if we expect any.
1221
1222     """
1223     master_node = self.master_node
1224     ret = []
1225
1226     for idx, node in enumerate(utils.NiceSort(self.nodes,
1227                                               key=lambda node: node.name)):
1228       node_entry = [(constants.RS_NORMAL, node.name)]
1229       ret.append(node_entry)
1230
1231       oob_program = _SupportsOob(self.cfg, node)
1232
1233       if not oob_program:
1234         node_entry.append((constants.RS_UNAVAIL, None))
1235         continue
1236
1237       logging.info("Executing out-of-band command '%s' using '%s' on %s",
1238                    self.op.command, oob_program, node.name)
1239       result = self.rpc.call_run_oob(master_node, oob_program,
1240                                      self.op.command, node.name,
1241                                      self.op.timeout)
1242
1243       if result.fail_msg:
1244         self.LogWarning("Out-of-band RPC failed on node '%s': %s",
1245                         node.name, result.fail_msg)
1246         node_entry.append((constants.RS_NODATA, None))
1247       else:
1248         try:
1249           self._CheckPayload(result)
1250         except errors.OpExecError, err:
1251           self.LogWarning("Payload returned by node '%s' is not valid: %s",
1252                           node.name, err)
1253           node_entry.append((constants.RS_NODATA, None))
1254         else:
1255           if self.op.command == constants.OOB_HEALTH:
1256             # For health we should log important events
1257             for item, status in result.payload:
1258               if status in [constants.OOB_STATUS_WARNING,
1259                             constants.OOB_STATUS_CRITICAL]:
1260                 self.LogWarning("Item '%s' on node '%s' has status '%s'",
1261                                 item, node.name, status)
1262
1263           if self.op.command == constants.OOB_POWER_ON:
1264             node.powered = True
1265           elif self.op.command == constants.OOB_POWER_OFF:
1266             node.powered = False
1267           elif self.op.command == constants.OOB_POWER_STATUS:
1268             powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
1269             if powered != node.powered:
1270               logging.warning(("Recorded power state (%s) of node '%s' does not"
1271                                " match actual power state (%s)"), node.powered,
1272                               node.name, powered)
1273
1274           # For configuration changing commands we should update the node
1275           if self.op.command in (constants.OOB_POWER_ON,
1276                                  constants.OOB_POWER_OFF):
1277             self.cfg.Update(node, feedback_fn)
1278
1279           node_entry.append((constants.RS_NORMAL, result.payload))
1280
1281           if (self.op.command == constants.OOB_POWER_ON and
1282               idx < len(self.nodes) - 1):
1283             time.sleep(self.op.power_delay)
1284
1285     return ret
1286
1287   def _CheckPayload(self, result):
1288     """Checks if the payload is valid.
1289
1290     @param result: RPC result
1291     @raises errors.OpExecError: If payload is not valid
1292
1293     """
1294     errs = []
1295     if self.op.command == constants.OOB_HEALTH:
1296       if not isinstance(result.payload, list):
1297         errs.append("command 'health' is expected to return a list but got %s" %
1298                     type(result.payload))
1299       else:
1300         for item, status in result.payload:
1301           if status not in constants.OOB_STATUSES:
1302             errs.append("health item '%s' has invalid status '%s'" %
1303                         (item, status))
1304
1305     if self.op.command == constants.OOB_POWER_STATUS:
1306       if not isinstance(result.payload, dict):
1307         errs.append("power-status is expected to return a dict but got %s" %
1308                     type(result.payload))
1309
1310     if self.op.command in [
1311       constants.OOB_POWER_ON,
1312       constants.OOB_POWER_OFF,
1313       constants.OOB_POWER_CYCLE,
1314       ]:
1315       if result.payload is not None:
1316         errs.append("%s is expected to not return payload but got '%s'" %
1317                     (self.op.command, result.payload))
1318
1319     if errs:
1320       raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
1321                                utils.CommaJoin(errs))
1322
1323
1324 class _OsQuery(_QueryBase):
1325   FIELDS = query.OS_FIELDS
1326
1327   def ExpandNames(self, lu):
1328     # Lock all nodes in shared mode
1329     # Temporary removal of locks, should be reverted later
1330     # TODO: reintroduce locks when they are lighter-weight
1331     lu.needed_locks = {}
1332     #self.share_locks[locking.LEVEL_NODE] = 1
1333     #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
1334
1335     # The following variables interact with _QueryBase._GetNames
1336     if self.names:
1337       self.wanted = self.names
1338     else:
1339       self.wanted = locking.ALL_SET
1340
1341     self.do_locking = self.use_locking
1342
1343   def DeclareLocks(self, lu, level):
1344     pass
1345
1346   @staticmethod
1347   def _DiagnoseByOS(rlist):
1348     """Remaps a per-node return list into an a per-os per-node dictionary
1349
1350     @param rlist: a map with node names as keys and OS objects as values
1351
1352     @rtype: dict
1353     @return: a dictionary with osnames as keys and as value another
1354         map, with nodes as keys and tuples of (path, status, diagnose,
1355         variants, parameters, api_versions) as values, eg::
1356
1357           {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
1358                                      (/srv/..., False, "invalid api")],
1359                            "node2": [(/srv/..., True, "", [], [])]}
1360           }
1361
1362     """
1363     all_os = {}
1364     # we build here the list of nodes that didn't fail the RPC (at RPC
1365     # level), so that nodes with a non-responding node daemon don't
1366     # make all OSes invalid
1367     good_nodes = [node_name for node_name in rlist
1368                   if not rlist[node_name].fail_msg]
1369     for node_name, nr in rlist.items():
1370       if nr.fail_msg or not nr.payload:
1371         continue
1372       for (name, path, status, diagnose, variants,
1373            params, api_versions) in nr.payload:
1374         if name not in all_os:
1375           # build a list of nodes for this os containing empty lists
1376           # for each node in node_list
1377           all_os[name] = {}
1378           for nname in good_nodes:
1379             all_os[name][nname] = []
1380         # convert params from [name, help] to (name, help)
1381         params = [tuple(v) for v in params]
1382         all_os[name][node_name].append((path, status, diagnose,
1383                                         variants, params, api_versions))
1384     return all_os
1385
1386   def _GetQueryData(self, lu):
1387     """Computes the list of nodes and their attributes.
1388
1389     """
1390     # Locking is not used
1391     assert not (compat.any(lu.glm.is_owned(level)
1392                            for level in locking.LEVELS
1393                            if level != locking.LEVEL_CLUSTER) or
1394                 self.do_locking or self.use_locking)
1395
1396     valid_nodes = [node.name
1397                    for node in lu.cfg.GetAllNodesInfo().values()
1398                    if not node.offline and node.vm_capable]
1399     pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
1400     cluster = lu.cfg.GetClusterInfo()
1401
1402     data = {}
1403
1404     for (os_name, os_data) in pol.items():
1405       info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
1406                           hidden=(os_name in cluster.hidden_os),
1407                           blacklisted=(os_name in cluster.blacklisted_os))
1408
1409       variants = set()
1410       parameters = set()
1411       api_versions = set()
1412
1413       for idx, osl in enumerate(os_data.values()):
1414         info.valid = bool(info.valid and osl and osl[0][1])
1415         if not info.valid:
1416           break
1417
1418         (node_variants, node_params, node_api) = osl[0][3:6]
1419         if idx == 0:
1420           # First entry
1421           variants.update(node_variants)
1422           parameters.update(node_params)
1423           api_versions.update(node_api)
1424         else:
1425           # Filter out inconsistent values
1426           variants.intersection_update(node_variants)
1427           parameters.intersection_update(node_params)
1428           api_versions.intersection_update(node_api)
1429
1430       info.variants = list(variants)
1431       info.parameters = list(parameters)
1432       info.api_versions = list(api_versions)
1433
1434       data[os_name] = info
1435
1436     # Prepare data in requested order
1437     return [data[name] for name in self._GetNames(lu, pol.keys(), None)
1438             if name in data]
1439
1440
1441 class LUOsDiagnose(NoHooksLU):
1442   """Logical unit for OS diagnose/query.
1443
1444   """
1445   REQ_BGL = False
1446
1447   @staticmethod
1448   def _BuildFilter(fields, names):
1449     """Builds a filter for querying OSes.
1450
1451     """
1452     name_filter = qlang.MakeSimpleFilter("name", names)
1453
1454     # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
1455     # respective field is not requested
1456     status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
1457                      for fname in ["hidden", "blacklisted"]
1458                      if fname not in fields]
1459     if "valid" not in fields:
1460       status_filter.append([qlang.OP_TRUE, "valid"])
1461
1462     if status_filter:
1463       status_filter.insert(0, qlang.OP_AND)
1464     else:
1465       status_filter = None
1466
1467     if name_filter and status_filter:
1468       return [qlang.OP_AND, name_filter, status_filter]
1469     elif name_filter:
1470       return name_filter
1471     else:
1472       return status_filter
1473
1474   def CheckArguments(self):
1475     self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
1476                        self.op.output_fields, False)
1477
1478   def ExpandNames(self):
1479     self.oq.ExpandNames(self)
1480
1481   def Exec(self, feedback_fn):
1482     return self.oq.OldStyleQuery(self)
1483
1484
1485 class _ExtStorageQuery(_QueryBase):
1486   FIELDS = query.EXTSTORAGE_FIELDS
1487
1488   def ExpandNames(self, lu):
1489     # Lock all nodes in shared mode
1490     # Temporary removal of locks, should be reverted later
1491     # TODO: reintroduce locks when they are lighter-weight
1492     lu.needed_locks = {}
1493     #self.share_locks[locking.LEVEL_NODE] = 1
1494     #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
1495
1496     # The following variables interact with _QueryBase._GetNames
1497     if self.names:
1498       self.wanted = self.names
1499     else:
1500       self.wanted = locking.ALL_SET
1501
1502     self.do_locking = self.use_locking
1503
1504   def DeclareLocks(self, lu, level):
1505     pass
1506
1507   @staticmethod
1508   def _DiagnoseByProvider(rlist):
1509     """Remaps a per-node return list into an a per-provider per-node dictionary
1510
1511     @param rlist: a map with node names as keys and ExtStorage objects as values
1512
1513     @rtype: dict
1514     @return: a dictionary with extstorage providers as keys and as
1515         value another map, with nodes as keys and tuples of
1516         (path, status, diagnose, parameters) as values, eg::
1517
1518           {"provider1": {"node1": [(/usr/lib/..., True, "", [])]
1519                          "node2": [(/srv/..., False, "missing file")]
1520                          "node3": [(/srv/..., True, "", [])]
1521           }
1522
1523     """
1524     all_es = {}
1525     # we build here the list of nodes that didn't fail the RPC (at RPC
1526     # level), so that nodes with a non-responding node daemon don't
1527     # make all OSes invalid
1528     good_nodes = [node_name for node_name in rlist
1529                   if not rlist[node_name].fail_msg]
1530     for node_name, nr in rlist.items():
1531       if nr.fail_msg or not nr.payload:
1532         continue
1533       for (name, path, status, diagnose, params) in nr.payload:
1534         if name not in all_es:
1535           # build a list of nodes for this os containing empty lists
1536           # for each node in node_list
1537           all_es[name] = {}
1538           for nname in good_nodes:
1539             all_es[name][nname] = []
1540         # convert params from [name, help] to (name, help)
1541         params = [tuple(v) for v in params]
1542         all_es[name][node_name].append((path, status, diagnose, params))
1543     return all_es
1544
1545   def _GetQueryData(self, lu):
1546     """Computes the list of nodes and their attributes.
1547
1548     """
1549     # Locking is not used
1550     assert not (compat.any(lu.glm.is_owned(level)
1551                            for level in locking.LEVELS
1552                            if level != locking.LEVEL_CLUSTER) or
1553                 self.do_locking or self.use_locking)
1554
1555     valid_nodes = [node.name
1556                    for node in lu.cfg.GetAllNodesInfo().values()
1557                    if not node.offline and node.vm_capable]
1558     pol = self._DiagnoseByProvider(lu.rpc.call_extstorage_diagnose(valid_nodes))
1559
1560     data = {}
1561
1562     nodegroup_list = lu.cfg.GetNodeGroupList()
1563
1564     for (es_name, es_data) in pol.items():
1565       # For every provider compute the nodegroup validity.
1566       # To do this we need to check the validity of each node in es_data
1567       # and then construct the corresponding nodegroup dict:
1568       #      { nodegroup1: status
1569       #        nodegroup2: status
1570       #      }
1571       ndgrp_data = {}
1572       for nodegroup in nodegroup_list:
1573         ndgrp = lu.cfg.GetNodeGroup(nodegroup)
1574
1575         nodegroup_nodes = ndgrp.members
1576         nodegroup_name = ndgrp.name
1577         node_statuses = []
1578
1579         for node in nodegroup_nodes:
1580           if node in valid_nodes:
1581             if es_data[node] != []:
1582               node_status = es_data[node][0][1]
1583               node_statuses.append(node_status)
1584             else:
1585               node_statuses.append(False)
1586
1587         if False in node_statuses:
1588           ndgrp_data[nodegroup_name] = False
1589         else:
1590           ndgrp_data[nodegroup_name] = True
1591
1592       # Compute the provider's parameters
1593       parameters = set()
1594       for idx, esl in enumerate(es_data.values()):
1595         valid = bool(esl and esl[0][1])
1596         if not valid:
1597           break
1598
1599         node_params = esl[0][3]
1600         if idx == 0:
1601           # First entry
1602           parameters.update(node_params)
1603         else:
1604           # Filter out inconsistent values
1605           parameters.intersection_update(node_params)
1606
1607       params = list(parameters)
1608
1609       # Now fill all the info for this provider
1610       info = query.ExtStorageInfo(name=es_name, node_status=es_data,
1611                                   nodegroup_status=ndgrp_data,
1612                                   parameters=params)
1613
1614       data[es_name] = info
1615
1616     # Prepare data in requested order
1617     return [data[name] for name in self._GetNames(lu, pol.keys(), None)
1618             if name in data]
1619
1620
1621 class LUExtStorageDiagnose(NoHooksLU):
1622   """Logical unit for ExtStorage diagnose/query.
1623
1624   """
1625   REQ_BGL = False
1626
1627   def CheckArguments(self):
1628     self.eq = _ExtStorageQuery(qlang.MakeSimpleFilter("name", self.op.names),
1629                                self.op.output_fields, False)
1630
1631   def ExpandNames(self):
1632     self.eq.ExpandNames(self)
1633
1634   def Exec(self, feedback_fn):
1635     return self.eq.OldStyleQuery(self)
1636
1637
1638 class LUNodeRemove(LogicalUnit):
1639   """Logical unit for removing a node.
1640
1641   """
1642   HPATH = "node-remove"
1643   HTYPE = constants.HTYPE_NODE
1644
1645   def BuildHooksEnv(self):
1646     """Build hooks env.
1647
1648     """
1649     return {
1650       "OP_TARGET": self.op.node_name,
1651       "NODE_NAME": self.op.node_name,
1652       }
1653
1654   def BuildHooksNodes(self):
1655     """Build hooks nodes.
1656
1657     This doesn't run on the target node in the pre phase as a failed
1658     node would then be impossible to remove.
1659
1660     """
1661     all_nodes = self.cfg.GetNodeList()
1662     try:
1663       all_nodes.remove(self.op.node_name)
1664     except ValueError:
1665       pass
1666     return (all_nodes, all_nodes)
1667
1668   def CheckPrereq(self):
1669     """Check prerequisites.
1670
1671     This checks:
1672      - the node exists in the configuration
1673      - it does not have primary or secondary instances
1674      - it's not the master
1675
1676     Any errors are signaled by raising errors.OpPrereqError.
1677
1678     """
1679     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
1680     node = self.cfg.GetNodeInfo(self.op.node_name)
1681     assert node is not None
1682
1683     masternode = self.cfg.GetMasterNode()
1684     if node.name == masternode:
1685       raise errors.OpPrereqError("Node is the master node, failover to another"
1686                                  " node is required", errors.ECODE_INVAL)
1687
1688     for instance_name, instance in self.cfg.GetAllInstancesInfo().items():
1689       if node.name in instance.all_nodes:
1690         raise errors.OpPrereqError("Instance %s is still running on the node,"
1691                                    " please remove first" % instance_name,
1692                                    errors.ECODE_INVAL)
1693     self.op.node_name = node.name
1694     self.node = node
1695
1696   def Exec(self, feedback_fn):
1697     """Removes the node from the cluster.
1698
1699     """
1700     node = self.node
1701     logging.info("Stopping the node daemon and removing configs from node %s",
1702                  node.name)
1703
1704     modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
1705
1706     assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
1707       "Not owning BGL"
1708
1709     # Promote nodes to master candidate as needed
1710     _AdjustCandidatePool(self, exceptions=[node.name])
1711     self.context.RemoveNode(node.name)
1712
1713     # Run post hooks on the node before it's removed
1714     _RunPostHook(self, node.name)
1715
1716     result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
1717     msg = result.fail_msg
1718     if msg:
1719       self.LogWarning("Errors encountered on the remote node while leaving"
1720                       " the cluster: %s", msg)
1721
1722     # Remove node from our /etc/hosts
1723     if self.cfg.GetClusterInfo().modify_etc_hosts:
1724       master_node = self.cfg.GetMasterNode()
1725       result = self.rpc.call_etc_hosts_modify(master_node,
1726                                               constants.ETC_HOSTS_REMOVE,
1727                                               node.name, None)
1728       result.Raise("Can't update hosts file with new host data")
1729       _RedistributeAncillaryFiles(self)
1730
1731
1732 class _NodeQuery(_QueryBase):
1733   FIELDS = query.NODE_FIELDS
1734
1735   def ExpandNames(self, lu):
1736     lu.needed_locks = {}
1737     lu.share_locks = _ShareAll()
1738
1739     if self.names:
1740       self.wanted = _GetWantedNodes(lu, self.names)
1741     else:
1742       self.wanted = locking.ALL_SET
1743
1744     self.do_locking = (self.use_locking and
1745                        query.NQ_LIVE in self.requested_data)
1746
1747     if self.do_locking:
1748       # If any non-static field is requested we need to lock the nodes
1749       lu.needed_locks[locking.LEVEL_NODE] = self.wanted
1750       lu.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
1751
1752   def DeclareLocks(self, lu, level):
1753     pass
1754
1755   def _GetQueryData(self, lu):
1756     """Computes the list of nodes and their attributes.
1757
1758     """
1759     all_info = lu.cfg.GetAllNodesInfo()
1760
1761     nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
1762
1763     # Gather data as requested
1764     if query.NQ_LIVE in self.requested_data:
1765       # filter out non-vm_capable nodes
1766       toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
1767
1768       es_flags = rpc.GetExclusiveStorageForNodeNames(lu.cfg, toquery_nodes)
1769       node_data = lu.rpc.call_node_info(toquery_nodes, [lu.cfg.GetVGName()],
1770                                         [lu.cfg.GetHypervisorType()], es_flags)
1771       live_data = dict((name, rpc.MakeLegacyNodeInfo(nresult.payload))
1772                        for (name, nresult) in node_data.items()
1773                        if not nresult.fail_msg and nresult.payload)
1774     else:
1775       live_data = None
1776
1777     if query.NQ_INST in self.requested_data:
1778       node_to_primary = dict([(name, set()) for name in nodenames])
1779       node_to_secondary = dict([(name, set()) for name in nodenames])
1780
1781       inst_data = lu.cfg.GetAllInstancesInfo()
1782
1783       for inst in inst_data.values():
1784         if inst.primary_node in node_to_primary:
1785           node_to_primary[inst.primary_node].add(inst.name)
1786         for secnode in inst.secondary_nodes:
1787           if secnode in node_to_secondary:
1788             node_to_secondary[secnode].add(inst.name)
1789     else:
1790       node_to_primary = None
1791       node_to_secondary = None
1792
1793     if query.NQ_OOB in self.requested_data:
1794       oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
1795                          for name, node in all_info.iteritems())
1796     else:
1797       oob_support = None
1798
1799     if query.NQ_GROUP in self.requested_data:
1800       groups = lu.cfg.GetAllNodeGroupsInfo()
1801     else:
1802       groups = {}
1803
1804     return query.NodeQueryData([all_info[name] for name in nodenames],
1805                                live_data, lu.cfg.GetMasterNode(),
1806                                node_to_primary, node_to_secondary, groups,
1807                                oob_support, lu.cfg.GetClusterInfo())
1808
1809
1810 class LUNodeQuery(NoHooksLU):
1811   """Logical unit for querying nodes.
1812
1813   """
1814   # pylint: disable=W0142
1815   REQ_BGL = False
1816
1817   def CheckArguments(self):
1818     self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
1819                          self.op.output_fields, self.op.use_locking)
1820
1821   def ExpandNames(self):
1822     self.nq.ExpandNames(self)
1823
1824   def DeclareLocks(self, level):
1825     self.nq.DeclareLocks(self, level)
1826
1827   def Exec(self, feedback_fn):
1828     return self.nq.OldStyleQuery(self)
1829
1830
1831 class LUNodeQueryvols(NoHooksLU):
1832   """Logical unit for getting volumes on node(s).
1833
1834   """
1835   REQ_BGL = False
1836   _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
1837   _FIELDS_STATIC = utils.FieldSet("node")
1838
1839   def CheckArguments(self):
1840     _CheckOutputFields(static=self._FIELDS_STATIC,
1841                        dynamic=self._FIELDS_DYNAMIC,
1842                        selected=self.op.output_fields)
1843
1844   def ExpandNames(self):
1845     self.share_locks = _ShareAll()
1846
1847     if self.op.nodes:
1848       self.needed_locks = {
1849         locking.LEVEL_NODE: _GetWantedNodes(self, self.op.nodes),
1850         }
1851     else:
1852       self.needed_locks = {
1853         locking.LEVEL_NODE: locking.ALL_SET,
1854         locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
1855         }
1856
1857   def Exec(self, feedback_fn):
1858     """Computes the list of nodes and their attributes.
1859
1860     """
1861     nodenames = self.owned_locks(locking.LEVEL_NODE)
1862     volumes = self.rpc.call_node_volumes(nodenames)
1863
1864     ilist = self.cfg.GetAllInstancesInfo()
1865     vol2inst = _MapInstanceDisksToNodes(ilist.values())
1866
1867     output = []
1868     for node in nodenames:
1869       nresult = volumes[node]
1870       if nresult.offline:
1871         continue
1872       msg = nresult.fail_msg
1873       if msg:
1874         self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
1875         continue
1876
1877       node_vols = sorted(nresult.payload,
1878                          key=operator.itemgetter("dev"))
1879
1880       for vol in node_vols:
1881         node_output = []
1882         for field in self.op.output_fields:
1883           if field == "node":
1884             val = node
1885           elif field == "phys":
1886             val = vol["dev"]
1887           elif field == "vg":
1888             val = vol["vg"]
1889           elif field == "name":
1890             val = vol["name"]
1891           elif field == "size":
1892             val = int(float(vol["size"]))
1893           elif field == "instance":
1894             val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
1895           else:
1896             raise errors.ParameterError(field)
1897           node_output.append(str(val))
1898
1899         output.append(node_output)
1900
1901     return output
1902
1903
1904 class LUNodeQueryStorage(NoHooksLU):
1905   """Logical unit for getting information on storage units on node(s).
1906
1907   """
1908   _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
1909   REQ_BGL = False
1910
1911   def CheckArguments(self):
1912     _CheckOutputFields(static=self._FIELDS_STATIC,
1913                        dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
1914                        selected=self.op.output_fields)
1915
1916   def ExpandNames(self):
1917     self.share_locks = _ShareAll()
1918
1919     if self.op.nodes:
1920       self.needed_locks = {
1921         locking.LEVEL_NODE: _GetWantedNodes(self, self.op.nodes),
1922         }
1923     else:
1924       self.needed_locks = {
1925         locking.LEVEL_NODE: locking.ALL_SET,
1926         locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
1927         }
1928
1929   def Exec(self, feedback_fn):
1930     """Computes the list of nodes and their attributes.
1931
1932     """
1933     self.nodes = self.owned_locks(locking.LEVEL_NODE)
1934
1935     # Always get name to sort by
1936     if constants.SF_NAME in self.op.output_fields:
1937       fields = self.op.output_fields[:]
1938     else:
1939       fields = [constants.SF_NAME] + self.op.output_fields
1940
1941     # Never ask for node or type as it's only known to the LU
1942     for extra in [constants.SF_NODE, constants.SF_TYPE]:
1943       while extra in fields:
1944         fields.remove(extra)
1945
1946     field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
1947     name_idx = field_idx[constants.SF_NAME]
1948
1949     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
1950     data = self.rpc.call_storage_list(self.nodes,
1951                                       self.op.storage_type, st_args,
1952                                       self.op.name, fields)
1953
1954     result = []
1955
1956     for node in utils.NiceSort(self.nodes):
1957       nresult = data[node]
1958       if nresult.offline:
1959         continue
1960
1961       msg = nresult.fail_msg
1962       if msg:
1963         self.LogWarning("Can't get storage data from node %s: %s", node, msg)
1964         continue
1965
1966       rows = dict([(row[name_idx], row) for row in nresult.payload])
1967
1968       for name in utils.NiceSort(rows.keys()):
1969         row = rows[name]
1970
1971         out = []
1972
1973         for field in self.op.output_fields:
1974           if field == constants.SF_NODE:
1975             val = node
1976           elif field == constants.SF_TYPE:
1977             val = self.op.storage_type
1978           elif field in field_idx:
1979             val = row[field_idx[field]]
1980           else:
1981             raise errors.ParameterError(field)
1982
1983           out.append(val)
1984
1985         result.append(out)
1986
1987     return result
1988
1989
1990 class _InstanceQuery(_QueryBase):
1991   FIELDS = query.INSTANCE_FIELDS
1992
1993   def ExpandNames(self, lu):
1994     lu.needed_locks = {}
1995     lu.share_locks = _ShareAll()
1996
1997     if self.names:
1998       self.wanted = _GetWantedInstances(lu, self.names)
1999     else:
2000       self.wanted = locking.ALL_SET
2001
2002     self.do_locking = (self.use_locking and
2003                        query.IQ_LIVE in self.requested_data)
2004     if self.do_locking:
2005       lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
2006       lu.needed_locks[locking.LEVEL_NODEGROUP] = []
2007       lu.needed_locks[locking.LEVEL_NODE] = []
2008       lu.needed_locks[locking.LEVEL_NETWORK] = []
2009       lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2010
2011     self.do_grouplocks = (self.do_locking and
2012                           query.IQ_NODES in self.requested_data)
2013
2014   def DeclareLocks(self, lu, level):
2015     if self.do_locking:
2016       if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
2017         assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
2018
2019         # Lock all groups used by instances optimistically; this requires going
2020         # via the node before it's locked, requiring verification later on
2021         lu.needed_locks[locking.LEVEL_NODEGROUP] = \
2022           set(group_uuid
2023               for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
2024               for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
2025       elif level == locking.LEVEL_NODE:
2026         lu._LockInstancesNodes() # pylint: disable=W0212
2027
2028       elif level == locking.LEVEL_NETWORK:
2029         lu.needed_locks[locking.LEVEL_NETWORK] = \
2030           frozenset(net_uuid
2031                     for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
2032                     for net_uuid in lu.cfg.GetInstanceNetworks(instance_name))
2033
2034   @staticmethod
2035   def _CheckGroupLocks(lu):
2036     owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
2037     owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
2038
2039     # Check if node groups for locked instances are still correct
2040     for instance_name in owned_instances:
2041       _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
2042
2043   def _GetQueryData(self, lu):
2044     """Computes the list of instances and their attributes.
2045
2046     """
2047     if self.do_grouplocks:
2048       self._CheckGroupLocks(lu)
2049
2050     cluster = lu.cfg.GetClusterInfo()
2051     all_info = lu.cfg.GetAllInstancesInfo()
2052
2053     instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
2054
2055     instance_list = [all_info[name] for name in instance_names]
2056     nodes = frozenset(itertools.chain(*(inst.all_nodes
2057                                         for inst in instance_list)))
2058     hv_list = list(set([inst.hypervisor for inst in instance_list]))
2059     bad_nodes = []
2060     offline_nodes = []
2061     wrongnode_inst = set()
2062
2063     # Gather data as requested
2064     if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
2065       live_data = {}
2066       node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
2067       for name in nodes:
2068         result = node_data[name]
2069         if result.offline:
2070           # offline nodes will be in both lists
2071           assert result.fail_msg
2072           offline_nodes.append(name)
2073         if result.fail_msg:
2074           bad_nodes.append(name)
2075         elif result.payload:
2076           for inst in result.payload:
2077             if inst in all_info:
2078               if all_info[inst].primary_node == name:
2079                 live_data.update(result.payload)
2080               else:
2081                 wrongnode_inst.add(inst)
2082             else:
2083               # orphan instance; we don't list it here as we don't
2084               # handle this case yet in the output of instance listing
2085               logging.warning("Orphan instance '%s' found on node %s",
2086                               inst, name)
2087         # else no instance is alive
2088     else:
2089       live_data = {}
2090
2091     if query.IQ_DISKUSAGE in self.requested_data:
2092       gmi = ganeti.masterd.instance
2093       disk_usage = dict((inst.name,
2094                          gmi.ComputeDiskSize(inst.disk_template,
2095                                              [{constants.IDISK_SIZE: disk.size}
2096                                               for disk in inst.disks]))
2097                         for inst in instance_list)
2098     else:
2099       disk_usage = None
2100
2101     if query.IQ_CONSOLE in self.requested_data:
2102       consinfo = {}
2103       for inst in instance_list:
2104         if inst.name in live_data:
2105           # Instance is running
2106           consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
2107         else:
2108           consinfo[inst.name] = None
2109       assert set(consinfo.keys()) == set(instance_names)
2110     else:
2111       consinfo = None
2112
2113     if query.IQ_NODES in self.requested_data:
2114       node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
2115                                             instance_list)))
2116       nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
2117       groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
2118                     for uuid in set(map(operator.attrgetter("group"),
2119                                         nodes.values())))
2120     else:
2121       nodes = None
2122       groups = None
2123
2124     if query.IQ_NETWORKS in self.requested_data:
2125       net_uuids = itertools.chain(*(lu.cfg.GetInstanceNetworks(i.name)
2126                                     for i in instance_list))
2127       networks = dict((uuid, lu.cfg.GetNetwork(uuid)) for uuid in net_uuids)
2128     else:
2129       networks = None
2130
2131     return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
2132                                    disk_usage, offline_nodes, bad_nodes,
2133                                    live_data, wrongnode_inst, consinfo,
2134                                    nodes, groups, networks)
2135
2136
2137 class LUQuery(NoHooksLU):
2138   """Query for resources/items of a certain kind.
2139
2140   """
2141   # pylint: disable=W0142
2142   REQ_BGL = False
2143
2144   def CheckArguments(self):
2145     qcls = _GetQueryImplementation(self.op.what)
2146
2147     self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
2148
2149   def ExpandNames(self):
2150     self.impl.ExpandNames(self)
2151
2152   def DeclareLocks(self, level):
2153     self.impl.DeclareLocks(self, level)
2154
2155   def Exec(self, feedback_fn):
2156     return self.impl.NewStyleQuery(self)
2157
2158
2159 class LUQueryFields(NoHooksLU):
2160   """Query for resources/items of a certain kind.
2161
2162   """
2163   # pylint: disable=W0142
2164   REQ_BGL = False
2165
2166   def CheckArguments(self):
2167     self.qcls = _GetQueryImplementation(self.op.what)
2168
2169   def ExpandNames(self):
2170     self.needed_locks = {}
2171
2172   def Exec(self, feedback_fn):
2173     return query.QueryFields(self.qcls.FIELDS, self.op.fields)
2174
2175
2176 class LUNodeModifyStorage(NoHooksLU):
2177   """Logical unit for modifying a storage volume on a node.
2178
2179   """
2180   REQ_BGL = False
2181
2182   def CheckArguments(self):
2183     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
2184
2185     storage_type = self.op.storage_type
2186
2187     try:
2188       modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
2189     except KeyError:
2190       raise errors.OpPrereqError("Storage units of type '%s' can not be"
2191                                  " modified" % storage_type,
2192                                  errors.ECODE_INVAL)
2193
2194     diff = set(self.op.changes.keys()) - modifiable
2195     if diff:
2196       raise errors.OpPrereqError("The following fields can not be modified for"
2197                                  " storage units of type '%s': %r" %
2198                                  (storage_type, list(diff)),
2199                                  errors.ECODE_INVAL)
2200
2201   def ExpandNames(self):
2202     self.needed_locks = {
2203       locking.LEVEL_NODE: self.op.node_name,
2204       }
2205
2206   def Exec(self, feedback_fn):
2207     """Computes the list of nodes and their attributes.
2208
2209     """
2210     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
2211     result = self.rpc.call_storage_modify(self.op.node_name,
2212                                           self.op.storage_type, st_args,
2213                                           self.op.name, self.op.changes)
2214     result.Raise("Failed to modify storage unit '%s' on %s" %
2215                  (self.op.name, self.op.node_name))
2216
2217
2218 class LUNodeAdd(LogicalUnit):
2219   """Logical unit for adding node to the cluster.
2220
2221   """
2222   HPATH = "node-add"
2223   HTYPE = constants.HTYPE_NODE
2224   _NFLAGS = ["master_capable", "vm_capable"]
2225
2226   def CheckArguments(self):
2227     self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
2228     # validate/normalize the node name
2229     self.hostname = netutils.GetHostname(name=self.op.node_name,
2230                                          family=self.primary_ip_family)
2231     self.op.node_name = self.hostname.name
2232
2233     if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
2234       raise errors.OpPrereqError("Cannot readd the master node",
2235                                  errors.ECODE_STATE)
2236
2237     if self.op.readd and self.op.group:
2238       raise errors.OpPrereqError("Cannot pass a node group when a node is"
2239                                  " being readded", errors.ECODE_INVAL)
2240
2241   def BuildHooksEnv(self):
2242     """Build hooks env.
2243
2244     This will run on all nodes before, and on all nodes + the new node after.
2245
2246     """
2247     return {
2248       "OP_TARGET": self.op.node_name,
2249       "NODE_NAME": self.op.node_name,
2250       "NODE_PIP": self.op.primary_ip,
2251       "NODE_SIP": self.op.secondary_ip,
2252       "MASTER_CAPABLE": str(self.op.master_capable),
2253       "VM_CAPABLE": str(self.op.vm_capable),
2254       }
2255
2256   def BuildHooksNodes(self):
2257     """Build hooks nodes.
2258
2259     """
2260     # Exclude added node
2261     pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
2262     post_nodes = pre_nodes + [self.op.node_name, ]
2263
2264     return (pre_nodes, post_nodes)
2265
2266   def CheckPrereq(self):
2267     """Check prerequisites.
2268
2269     This checks:
2270      - the new node is not already in the config
2271      - it is resolvable
2272      - its parameters (single/dual homed) matches the cluster
2273
2274     Any errors are signaled by raising errors.OpPrereqError.
2275
2276     """
2277     cfg = self.cfg
2278     hostname = self.hostname
2279     node = hostname.name
2280     primary_ip = self.op.primary_ip = hostname.ip
2281     if self.op.secondary_ip is None:
2282       if self.primary_ip_family == netutils.IP6Address.family:
2283         raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
2284                                    " IPv4 address must be given as secondary",
2285                                    errors.ECODE_INVAL)
2286       self.op.secondary_ip = primary_ip
2287
2288     secondary_ip = self.op.secondary_ip
2289     if not netutils.IP4Address.IsValid(secondary_ip):
2290       raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
2291                                  " address" % secondary_ip, errors.ECODE_INVAL)
2292
2293     node_list = cfg.GetNodeList()
2294     if not self.op.readd and node in node_list:
2295       raise errors.OpPrereqError("Node %s is already in the configuration" %
2296                                  node, errors.ECODE_EXISTS)
2297     elif self.op.readd and node not in node_list:
2298       raise errors.OpPrereqError("Node %s is not in the configuration" % node,
2299                                  errors.ECODE_NOENT)
2300
2301     self.changed_primary_ip = False
2302
2303     for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
2304       if self.op.readd and node == existing_node_name:
2305         if existing_node.secondary_ip != secondary_ip:
2306           raise errors.OpPrereqError("Readded node doesn't have the same IP"
2307                                      " address configuration as before",
2308                                      errors.ECODE_INVAL)
2309         if existing_node.primary_ip != primary_ip:
2310           self.changed_primary_ip = True
2311
2312         continue
2313
2314       if (existing_node.primary_ip == primary_ip or
2315           existing_node.secondary_ip == primary_ip or
2316           existing_node.primary_ip == secondary_ip or
2317           existing_node.secondary_ip == secondary_ip):
2318         raise errors.OpPrereqError("New node ip address(es) conflict with"
2319                                    " existing node %s" % existing_node.name,
2320                                    errors.ECODE_NOTUNIQUE)
2321
2322     # After this 'if' block, None is no longer a valid value for the
2323     # _capable op attributes
2324     if self.op.readd:
2325       old_node = self.cfg.GetNodeInfo(node)
2326       assert old_node is not None, "Can't retrieve locked node %s" % node
2327       for attr in self._NFLAGS:
2328         if getattr(self.op, attr) is None:
2329           setattr(self.op, attr, getattr(old_node, attr))
2330     else:
2331       for attr in self._NFLAGS:
2332         if getattr(self.op, attr) is None:
2333           setattr(self.op, attr, True)
2334
2335     if self.op.readd and not self.op.vm_capable:
2336       pri, sec = cfg.GetNodeInstances(node)
2337       if pri or sec:
2338         raise errors.OpPrereqError("Node %s being re-added with vm_capable"
2339                                    " flag set to false, but it already holds"
2340                                    " instances" % node,
2341                                    errors.ECODE_STATE)
2342
2343     # check that the type of the node (single versus dual homed) is the
2344     # same as for the master
2345     myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
2346     master_singlehomed = myself.secondary_ip == myself.primary_ip
2347     newbie_singlehomed = secondary_ip == primary_ip
2348     if master_singlehomed != newbie_singlehomed:
2349       if master_singlehomed:
2350         raise errors.OpPrereqError("The master has no secondary ip but the"
2351                                    " new node has one",
2352                                    errors.ECODE_INVAL)
2353       else:
2354         raise errors.OpPrereqError("The master has a secondary ip but the"
2355                                    " new node doesn't have one",
2356                                    errors.ECODE_INVAL)
2357
2358     # checks reachability
2359     if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
2360       raise errors.OpPrereqError("Node not reachable by ping",
2361                                  errors.ECODE_ENVIRON)
2362
2363     if not newbie_singlehomed:
2364       # check reachability from my secondary ip to newbie's secondary ip
2365       if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
2366                               source=myself.secondary_ip):
2367         raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
2368                                    " based ping to node daemon port",
2369                                    errors.ECODE_ENVIRON)
2370
2371     if self.op.readd:
2372       exceptions = [node]
2373     else:
2374       exceptions = []
2375
2376     if self.op.master_capable:
2377       self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
2378     else:
2379       self.master_candidate = False
2380
2381     if self.op.readd:
2382       self.new_node = old_node
2383     else:
2384       node_group = cfg.LookupNodeGroup(self.op.group)
2385       self.new_node = objects.Node(name=node,
2386                                    primary_ip=primary_ip,
2387                                    secondary_ip=secondary_ip,
2388                                    master_candidate=self.master_candidate,
2389                                    offline=False, drained=False,
2390                                    group=node_group, ndparams={})
2391
2392     if self.op.ndparams:
2393       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
2394       _CheckParamsNotGlobal(self.op.ndparams, constants.NDC_GLOBALS, "node",
2395                             "node", "cluster or group")
2396
2397     if self.op.hv_state:
2398       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
2399
2400     if self.op.disk_state:
2401       self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
2402
2403     # TODO: If we need to have multiple DnsOnlyRunner we probably should make
2404     #       it a property on the base class.
2405     rpcrunner = rpc.DnsOnlyRunner()
2406     result = rpcrunner.call_version([node])[node]
2407     result.Raise("Can't get version information from node %s" % node)
2408     if constants.PROTOCOL_VERSION == result.payload:
2409       logging.info("Communication to node %s fine, sw version %s match",
2410                    node, result.payload)
2411     else:
2412       raise errors.OpPrereqError("Version mismatch master version %s,"
2413                                  " node version %s" %
2414                                  (constants.PROTOCOL_VERSION, result.payload),
2415                                  errors.ECODE_ENVIRON)
2416
2417     vg_name = cfg.GetVGName()
2418     if vg_name is not None:
2419       vparams = {constants.NV_PVLIST: [vg_name]}
2420       excl_stor = _IsExclusiveStorageEnabledNode(cfg, self.new_node)
2421       cname = self.cfg.GetClusterName()
2422       result = rpcrunner.call_node_verify_light([node], vparams, cname)[node]
2423       (errmsgs, _) = _CheckNodePVs(result.payload, excl_stor)
2424       if errmsgs:
2425         raise errors.OpPrereqError("Checks on node PVs failed: %s" %
2426                                    "; ".join(errmsgs), errors.ECODE_ENVIRON)
2427
2428   def Exec(self, feedback_fn):
2429     """Adds the new node to the cluster.
2430
2431     """
2432     new_node = self.new_node
2433     node = new_node.name
2434
2435     assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
2436       "Not owning BGL"
2437
2438     # We adding a new node so we assume it's powered
2439     new_node.powered = True
2440
2441     # for re-adds, reset the offline/drained/master-candidate flags;
2442     # we need to reset here, otherwise offline would prevent RPC calls
2443     # later in the procedure; this also means that if the re-add
2444     # fails, we are left with a non-offlined, broken node
2445     if self.op.readd:
2446       new_node.drained = new_node.offline = False # pylint: disable=W0201
2447       self.LogInfo("Readding a node, the offline/drained flags were reset")
2448       # if we demote the node, we do cleanup later in the procedure
2449       new_node.master_candidate = self.master_candidate
2450       if self.changed_primary_ip:
2451         new_node.primary_ip = self.op.primary_ip
2452
2453     # copy the master/vm_capable flags
2454     for attr in self._NFLAGS:
2455       setattr(new_node, attr, getattr(self.op, attr))
2456
2457     # notify the user about any possible mc promotion
2458     if new_node.master_candidate:
2459       self.LogInfo("Node will be a master candidate")
2460
2461     if self.op.ndparams:
2462       new_node.ndparams = self.op.ndparams
2463     else:
2464       new_node.ndparams = {}
2465
2466     if self.op.hv_state:
2467       new_node.hv_state_static = self.new_hv_state
2468
2469     if self.op.disk_state:
2470       new_node.disk_state_static = self.new_disk_state
2471
2472     # Add node to our /etc/hosts, and add key to known_hosts
2473     if self.cfg.GetClusterInfo().modify_etc_hosts:
2474       master_node = self.cfg.GetMasterNode()
2475       result = self.rpc.call_etc_hosts_modify(master_node,
2476                                               constants.ETC_HOSTS_ADD,
2477                                               self.hostname.name,
2478                                               self.hostname.ip)
2479       result.Raise("Can't update hosts file with new host data")
2480
2481     if new_node.secondary_ip != new_node.primary_ip:
2482       _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
2483                                False)
2484
2485     node_verify_list = [self.cfg.GetMasterNode()]
2486     node_verify_param = {
2487       constants.NV_NODELIST: ([node], {}),
2488       # TODO: do a node-net-test as well?
2489     }
2490
2491     result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
2492                                        self.cfg.GetClusterName())
2493     for verifier in node_verify_list:
2494       result[verifier].Raise("Cannot communicate with node %s" % verifier)
2495       nl_payload = result[verifier].payload[constants.NV_NODELIST]
2496       if nl_payload:
2497         for failed in nl_payload:
2498           feedback_fn("ssh/hostname verification failed"
2499                       " (checking from %s): %s" %
2500                       (verifier, nl_payload[failed]))
2501         raise errors.OpExecError("ssh/hostname verification failed")
2502
2503     if self.op.readd:
2504       _RedistributeAncillaryFiles(self)
2505       self.context.ReaddNode(new_node)
2506       # make sure we redistribute the config
2507       self.cfg.Update(new_node, feedback_fn)
2508       # and make sure the new node will not have old files around
2509       if not new_node.master_candidate:
2510         result = self.rpc.call_node_demote_from_mc(new_node.name)
2511         msg = result.fail_msg
2512         if msg:
2513           self.LogWarning("Node failed to demote itself from master"
2514                           " candidate status: %s" % msg)
2515     else:
2516       _RedistributeAncillaryFiles(self, additional_nodes=[node],
2517                                   additional_vm=self.op.vm_capable)
2518       self.context.AddNode(new_node, self.proc.GetECId())
2519
2520
2521 class LUNodeSetParams(LogicalUnit):
2522   """Modifies the parameters of a node.
2523
2524   @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
2525       to the node role (as _ROLE_*)
2526   @cvar _R2F: a dictionary from node role to tuples of flags
2527   @cvar _FLAGS: a list of attribute names corresponding to the flags
2528
2529   """
2530   HPATH = "node-modify"
2531   HTYPE = constants.HTYPE_NODE
2532   REQ_BGL = False
2533   (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
2534   _F2R = {
2535     (True, False, False): _ROLE_CANDIDATE,
2536     (False, True, False): _ROLE_DRAINED,
2537     (False, False, True): _ROLE_OFFLINE,
2538     (False, False, False): _ROLE_REGULAR,
2539     }
2540   _R2F = dict((v, k) for k, v in _F2R.items())
2541   _FLAGS = ["master_candidate", "drained", "offline"]
2542
2543   def CheckArguments(self):
2544     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
2545     all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
2546                 self.op.master_capable, self.op.vm_capable,
2547                 self.op.secondary_ip, self.op.ndparams, self.op.hv_state,
2548                 self.op.disk_state]
2549     if all_mods.count(None) == len(all_mods):
2550       raise errors.OpPrereqError("Please pass at least one modification",
2551                                  errors.ECODE_INVAL)
2552     if all_mods.count(True) > 1:
2553       raise errors.OpPrereqError("Can't set the node into more than one"
2554                                  " state at the same time",
2555                                  errors.ECODE_INVAL)
2556
2557     # Boolean value that tells us whether we might be demoting from MC
2558     self.might_demote = (self.op.master_candidate is False or
2559                          self.op.offline is True or
2560                          self.op.drained is True or
2561                          self.op.master_capable is False)
2562
2563     if self.op.secondary_ip:
2564       if not netutils.IP4Address.IsValid(self.op.secondary_ip):
2565         raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
2566                                    " address" % self.op.secondary_ip,
2567                                    errors.ECODE_INVAL)
2568
2569     self.lock_all = self.op.auto_promote and self.might_demote
2570     self.lock_instances = self.op.secondary_ip is not None
2571
2572   def _InstanceFilter(self, instance):
2573     """Filter for getting affected instances.
2574
2575     """
2576     return (instance.disk_template in constants.DTS_INT_MIRROR and
2577             self.op.node_name in instance.all_nodes)
2578
2579   def ExpandNames(self):
2580     if self.lock_all:
2581       self.needed_locks = {
2582         locking.LEVEL_NODE: locking.ALL_SET,
2583
2584         # Block allocations when all nodes are locked
2585         locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
2586         }
2587     else:
2588       self.needed_locks = {
2589         locking.LEVEL_NODE: self.op.node_name,
2590         }
2591
2592     # Since modifying a node can have severe effects on currently running
2593     # operations the resource lock is at least acquired in shared mode
2594     self.needed_locks[locking.LEVEL_NODE_RES] = \
2595       self.needed_locks[locking.LEVEL_NODE]
2596
2597     # Get all locks except nodes in shared mode; they are not used for anything
2598     # but read-only access
2599     self.share_locks = _ShareAll()
2600     self.share_locks[locking.LEVEL_NODE] = 0
2601     self.share_locks[locking.LEVEL_NODE_RES] = 0
2602     self.share_locks[locking.LEVEL_NODE_ALLOC] = 0
2603
2604     if self.lock_instances:
2605       self.needed_locks[locking.LEVEL_INSTANCE] = \
2606         frozenset(self.cfg.GetInstancesInfoByFilter(self._InstanceFilter))
2607
2608   def BuildHooksEnv(self):
2609     """Build hooks env.
2610
2611     This runs on the master node.
2612
2613     """
2614     return {
2615       "OP_TARGET": self.op.node_name,
2616       "MASTER_CANDIDATE": str(self.op.master_candidate),
2617       "OFFLINE": str(self.op.offline),
2618       "DRAINED": str(self.op.drained),
2619       "MASTER_CAPABLE": str(self.op.master_capable),
2620       "VM_CAPABLE": str(self.op.vm_capable),
2621       }
2622
2623   def BuildHooksNodes(self):
2624     """Build hooks nodes.
2625
2626     """
2627     nl = [self.cfg.GetMasterNode(), self.op.node_name]
2628     return (nl, nl)
2629
2630   def CheckPrereq(self):
2631     """Check prerequisites.
2632
2633     This only checks the instance list against the existing names.
2634
2635     """
2636     node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
2637
2638     if self.lock_instances:
2639       affected_instances = \
2640         self.cfg.GetInstancesInfoByFilter(self._InstanceFilter)
2641
2642       # Verify instance locks
2643       owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
2644       wanted_instances = frozenset(affected_instances.keys())
2645       if wanted_instances - owned_instances:
2646         raise errors.OpPrereqError("Instances affected by changing node %s's"
2647                                    " secondary IP address have changed since"
2648                                    " locks were acquired, wanted '%s', have"
2649                                    " '%s'; retry the operation" %
2650                                    (self.op.node_name,
2651                                     utils.CommaJoin(wanted_instances),
2652                                     utils.CommaJoin(owned_instances)),
2653                                    errors.ECODE_STATE)
2654     else:
2655       affected_instances = None
2656
2657     if (self.op.master_candidate is not None or
2658         self.op.drained is not None or
2659         self.op.offline is not None):
2660       # we can't change the master's node flags
2661       if self.op.node_name == self.cfg.GetMasterNode():
2662         raise errors.OpPrereqError("The master role can be changed"
2663                                    " only via master-failover",
2664                                    errors.ECODE_INVAL)
2665
2666     if self.op.master_candidate and not node.master_capable:
2667       raise errors.OpPrereqError("Node %s is not master capable, cannot make"
2668                                  " it a master candidate" % node.name,
2669                                  errors.ECODE_STATE)
2670
2671     if self.op.vm_capable is False:
2672       (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
2673       if ipri or isec:
2674         raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
2675                                    " the vm_capable flag" % node.name,
2676                                    errors.ECODE_STATE)
2677
2678     if node.master_candidate and self.might_demote and not self.lock_all:
2679       assert not self.op.auto_promote, "auto_promote set but lock_all not"
2680       # check if after removing the current node, we're missing master
2681       # candidates
2682       (mc_remaining, mc_should, _) = \
2683           self.cfg.GetMasterCandidateStats(exceptions=[node.name])
2684       if mc_remaining < mc_should:
2685         raise errors.OpPrereqError("Not enough master candidates, please"
2686                                    " pass auto promote option to allow"
2687                                    " promotion (--auto-promote or RAPI"
2688                                    " auto_promote=True)", errors.ECODE_STATE)
2689
2690     self.old_flags = old_flags = (node.master_candidate,
2691                                   node.drained, node.offline)
2692     assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
2693     self.old_role = old_role = self._F2R[old_flags]
2694
2695     # Check for ineffective changes
2696     for attr in self._FLAGS:
2697       if (getattr(self.op, attr) is False and getattr(node, attr) is False):
2698         self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
2699         setattr(self.op, attr, None)
2700
2701     # Past this point, any flag change to False means a transition
2702     # away from the respective state, as only real changes are kept
2703
2704     # TODO: We might query the real power state if it supports OOB
2705     if _SupportsOob(self.cfg, node):
2706       if self.op.offline is False and not (node.powered or
2707                                            self.op.powered is True):
2708         raise errors.OpPrereqError(("Node %s needs to be turned on before its"
2709                                     " offline status can be reset") %
2710                                    self.op.node_name, errors.ECODE_STATE)
2711     elif self.op.powered is not None:
2712       raise errors.OpPrereqError(("Unable to change powered state for node %s"
2713                                   " as it does not support out-of-band"
2714                                   " handling") % self.op.node_name,
2715                                  errors.ECODE_STATE)
2716
2717     # If we're being deofflined/drained, we'll MC ourself if needed
2718     if (self.op.drained is False or self.op.offline is False or
2719         (self.op.master_capable and not node.master_capable)):
2720       if _DecideSelfPromotion(self):
2721         self.op.master_candidate = True
2722         self.LogInfo("Auto-promoting node to master candidate")
2723
2724     # If we're no longer master capable, we'll demote ourselves from MC
2725     if self.op.master_capable is False and node.master_candidate:
2726       self.LogInfo("Demoting from master candidate")
2727       self.op.master_candidate = False
2728
2729     # Compute new role
2730     assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
2731     if self.op.master_candidate:
2732       new_role = self._ROLE_CANDIDATE
2733     elif self.op.drained:
2734       new_role = self._ROLE_DRAINED
2735     elif self.op.offline:
2736       new_role = self._ROLE_OFFLINE
2737     elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
2738       # False is still in new flags, which means we're un-setting (the
2739       # only) True flag
2740       new_role = self._ROLE_REGULAR
2741     else: # no new flags, nothing, keep old role
2742       new_role = old_role
2743
2744     self.new_role = new_role
2745
2746     if old_role == self._ROLE_OFFLINE and new_role != old_role:
2747       # Trying to transition out of offline status
2748       result = self.rpc.call_version([node.name])[node.name]
2749       if result.fail_msg:
2750         raise errors.OpPrereqError("Node %s is being de-offlined but fails"
2751                                    " to report its version: %s" %
2752                                    (node.name, result.fail_msg),
2753                                    errors.ECODE_STATE)
2754       else:
2755         self.LogWarning("Transitioning node from offline to online state"
2756                         " without using re-add. Please make sure the node"
2757                         " is healthy!")
2758
2759     # When changing the secondary ip, verify if this is a single-homed to
2760     # multi-homed transition or vice versa, and apply the relevant
2761     # restrictions.
2762     if self.op.secondary_ip:
2763       # Ok even without locking, because this can't be changed by any LU
2764       master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
2765       master_singlehomed = master.secondary_ip == master.primary_ip
2766       if master_singlehomed and self.op.secondary_ip != node.primary_ip:
2767         if self.op.force and node.name == master.name:
2768           self.LogWarning("Transitioning from single-homed to multi-homed"
2769                           " cluster; all nodes will require a secondary IP"
2770                           " address")
2771         else:
2772           raise errors.OpPrereqError("Changing the secondary ip on a"
2773                                      " single-homed cluster requires the"
2774                                      " --force option to be passed, and the"
2775                                      " target node to be the master",
2776                                      errors.ECODE_INVAL)
2777       elif not master_singlehomed and self.op.secondary_ip == node.primary_ip:
2778         if self.op.force and node.name == master.name:
2779           self.LogWarning("Transitioning from multi-homed to single-homed"
2780                           " cluster; secondary IP addresses will have to be"
2781                           " removed")
2782         else:
2783           raise errors.OpPrereqError("Cannot set the secondary IP to be the"
2784                                      " same as the primary IP on a multi-homed"
2785                                      " cluster, unless the --force option is"
2786                                      " passed, and the target node is the"
2787                                      " master", errors.ECODE_INVAL)
2788
2789       assert not (frozenset(affected_instances) -
2790                   self.owned_locks(locking.LEVEL_INSTANCE))
2791
2792       if node.offline:
2793         if affected_instances:
2794           msg = ("Cannot change secondary IP address: offline node has"
2795                  " instances (%s) configured to use it" %
2796                  utils.CommaJoin(affected_instances.keys()))
2797           raise errors.OpPrereqError(msg, errors.ECODE_STATE)
2798       else:
2799         # On online nodes, check that no instances are running, and that
2800         # the node has the new ip and we can reach it.
2801         for instance in affected_instances.values():
2802           _CheckInstanceState(self, instance, INSTANCE_DOWN,
2803                               msg="cannot change secondary ip")
2804
2805         _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
2806         if master.name != node.name:
2807           # check reachability from master secondary ip to new secondary ip
2808           if not netutils.TcpPing(self.op.secondary_ip,
2809                                   constants.DEFAULT_NODED_PORT,
2810                                   source=master.secondary_ip):
2811             raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
2812                                        " based ping to node daemon port",
2813                                        errors.ECODE_ENVIRON)
2814
2815     if self.op.ndparams:
2816       new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
2817       utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
2818       _CheckParamsNotGlobal(self.op.ndparams, constants.NDC_GLOBALS, "node",
2819                             "node", "cluster or group")
2820       self.new_ndparams = new_ndparams
2821
2822     if self.op.hv_state:
2823       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
2824                                                  self.node.hv_state_static)
2825
2826     if self.op.disk_state:
2827       self.new_disk_state = \
2828         _MergeAndVerifyDiskState(self.op.disk_state,
2829                                  self.node.disk_state_static)
2830
2831   def Exec(self, feedback_fn):
2832     """Modifies a node.
2833
2834     """
2835     node = self.node
2836     old_role = self.old_role
2837     new_role = self.new_role
2838
2839     result = []
2840
2841     if self.op.ndparams:
2842       node.ndparams = self.new_ndparams
2843
2844     if self.op.powered is not None:
2845       node.powered = self.op.powered
2846
2847     if self.op.hv_state:
2848       node.hv_state_static = self.new_hv_state
2849
2850     if self.op.disk_state:
2851       node.disk_state_static = self.new_disk_state
2852
2853     for attr in ["master_capable", "vm_capable"]:
2854       val = getattr(self.op, attr)
2855       if val is not None:
2856         setattr(node, attr, val)
2857         result.append((attr, str(val)))
2858
2859     if new_role != old_role:
2860       # Tell the node to demote itself, if no longer MC and not offline
2861       if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
2862         msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
2863         if msg:
2864           self.LogWarning("Node failed to demote itself: %s", msg)
2865
2866       new_flags = self._R2F[new_role]
2867       for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
2868         if of != nf:
2869           result.append((desc, str(nf)))
2870       (node.master_candidate, node.drained, node.offline) = new_flags
2871
2872       # we locked all nodes, we adjust the CP before updating this node
2873       if self.lock_all:
2874         _AdjustCandidatePool(self, [node.name])
2875
2876     if self.op.secondary_ip:
2877       node.secondary_ip = self.op.secondary_ip
2878       result.append(("secondary_ip", self.op.secondary_ip))
2879
2880     # this will trigger configuration file update, if needed
2881     self.cfg.Update(node, feedback_fn)
2882
2883     # this will trigger job queue propagation or cleanup if the mc
2884     # flag changed
2885     if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
2886       self.context.ReaddNode(node)
2887
2888     return result
2889
2890
2891 class LUNodePowercycle(NoHooksLU):
2892   """Powercycles a node.
2893
2894   """
2895   REQ_BGL = False
2896
2897   def CheckArguments(self):
2898     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
2899     if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
2900       raise errors.OpPrereqError("The node is the master and the force"
2901                                  " parameter was not set",
2902                                  errors.ECODE_INVAL)
2903
2904   def ExpandNames(self):
2905     """Locking for PowercycleNode.
2906
2907     This is a last-resort option and shouldn't block on other
2908     jobs. Therefore, we grab no locks.
2909
2910     """
2911     self.needed_locks = {}
2912
2913   def Exec(self, feedback_fn):
2914     """Reboots a node.
2915
2916     """
2917     result = self.rpc.call_node_powercycle(self.op.node_name,
2918                                            self.cfg.GetHypervisorType())
2919     result.Raise("Failed to schedule the reboot")
2920     return result.payload
2921
2922
2923 class LUInstanceActivateDisks(NoHooksLU):
2924   """Bring up an instance's disks.
2925
2926   """
2927   REQ_BGL = False
2928
2929   def ExpandNames(self):
2930     self._ExpandAndLockInstance()
2931     self.needed_locks[locking.LEVEL_NODE] = []
2932     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2933
2934   def DeclareLocks(self, level):
2935     if level == locking.LEVEL_NODE:
2936       self._LockInstancesNodes()
2937
2938   def CheckPrereq(self):
2939     """Check prerequisites.
2940
2941     This checks that the instance is in the cluster.
2942
2943     """
2944     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
2945     assert self.instance is not None, \
2946       "Cannot retrieve locked instance %s" % self.op.instance_name
2947     _CheckNodeOnline(self, self.instance.primary_node)
2948
2949   def Exec(self, feedback_fn):
2950     """Activate the disks.
2951
2952     """
2953     disks_ok, disks_info = \
2954               _AssembleInstanceDisks(self, self.instance,
2955                                      ignore_size=self.op.ignore_size)
2956     if not disks_ok:
2957       raise errors.OpExecError("Cannot activate block devices")
2958
2959     if self.op.wait_for_sync:
2960       if not _WaitForSync(self, self.instance):
2961         raise errors.OpExecError("Some disks of the instance are degraded!")
2962
2963     return disks_info
2964
2965
2966 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
2967                            ignore_size=False):
2968   """Prepare the block devices for an instance.
2969
2970   This sets up the block devices on all nodes.
2971
2972   @type lu: L{LogicalUnit}
2973   @param lu: the logical unit on whose behalf we execute
2974   @type instance: L{objects.Instance}
2975   @param instance: the instance for whose disks we assemble
2976   @type disks: list of L{objects.Disk} or None
2977   @param disks: which disks to assemble (or all, if None)
2978   @type ignore_secondaries: boolean
2979   @param ignore_secondaries: if true, errors on secondary nodes
2980       won't result in an error return from the function
2981   @type ignore_size: boolean
2982   @param ignore_size: if true, the current known size of the disk
2983       will not be used during the disk activation, useful for cases
2984       when the size is wrong
2985   @return: False if the operation failed, otherwise a list of
2986       (host, instance_visible_name, node_visible_name)
2987       with the mapping from node devices to instance devices
2988
2989   """
2990   device_info = []
2991   disks_ok = True
2992   iname = instance.name
2993   disks = _ExpandCheckDisks(instance, disks)
2994
2995   # With the two passes mechanism we try to reduce the window of
2996   # opportunity for the race condition of switching DRBD to primary
2997   # before handshaking occured, but we do not eliminate it
2998
2999   # The proper fix would be to wait (with some limits) until the
3000   # connection has been made and drbd transitions from WFConnection
3001   # into any other network-connected state (Connected, SyncTarget,
3002   # SyncSource, etc.)
3003
3004   # 1st pass, assemble on all nodes in secondary mode
3005   for idx, inst_disk in enumerate(disks):
3006     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
3007       if ignore_size:
3008         node_disk = node_disk.Copy()
3009         node_disk.UnsetSize()
3010       lu.cfg.SetDiskID(node_disk, node)
3011       result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
3012                                              False, idx)
3013       msg = result.fail_msg
3014       if msg:
3015         is_offline_secondary = (node in instance.secondary_nodes and
3016                                 result.offline)
3017         lu.LogWarning("Could not prepare block device %s on node %s"
3018                       " (is_primary=False, pass=1): %s",
3019                       inst_disk.iv_name, node, msg)
3020         if not (ignore_secondaries or is_offline_secondary):
3021           disks_ok = False
3022
3023   # FIXME: race condition on drbd migration to primary
3024
3025   # 2nd pass, do only the primary node
3026   for idx, inst_disk in enumerate(disks):
3027     dev_path = None
3028
3029     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
3030       if node != instance.primary_node:
3031         continue
3032       if ignore_size:
3033         node_disk = node_disk.Copy()
3034         node_disk.UnsetSize()
3035       lu.cfg.SetDiskID(node_disk, node)
3036       result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
3037                                              True, idx)
3038       msg = result.fail_msg
3039       if msg:
3040         lu.LogWarning("Could not prepare block device %s on node %s"
3041                       " (is_primary=True, pass=2): %s",
3042                       inst_disk.iv_name, node, msg)
3043         disks_ok = False
3044       else:
3045         dev_path = result.payload
3046
3047     device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
3048
3049   # leave the disks configured for the primary node
3050   # this is a workaround that would be fixed better by
3051   # improving the logical/physical id handling
3052   for disk in disks:
3053     lu.cfg.SetDiskID(disk, instance.primary_node)
3054
3055   return disks_ok, device_info
3056
3057
3058 def _StartInstanceDisks(lu, instance, force):
3059   """Start the disks of an instance.
3060
3061   """
3062   disks_ok, _ = _AssembleInstanceDisks(lu, instance,
3063                                            ignore_secondaries=force)
3064   if not disks_ok:
3065     _ShutdownInstanceDisks(lu, instance)
3066     if force is not None and not force:
3067       lu.LogWarning("",
3068                     hint=("If the message above refers to a secondary node,"
3069                           " you can retry the operation using '--force'"))
3070     raise errors.OpExecError("Disk consistency error")
3071
3072
3073 class LUInstanceDeactivateDisks(NoHooksLU):
3074   """Shutdown an instance's disks.
3075
3076   """
3077   REQ_BGL = False
3078
3079   def ExpandNames(self):
3080     self._ExpandAndLockInstance()
3081     self.needed_locks[locking.LEVEL_NODE] = []
3082     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3083
3084   def DeclareLocks(self, level):
3085     if level == locking.LEVEL_NODE:
3086       self._LockInstancesNodes()
3087
3088   def CheckPrereq(self):
3089     """Check prerequisites.
3090
3091     This checks that the instance is in the cluster.
3092
3093     """
3094     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3095     assert self.instance is not None, \
3096       "Cannot retrieve locked instance %s" % self.op.instance_name
3097
3098   def Exec(self, feedback_fn):
3099     """Deactivate the disks
3100
3101     """
3102     instance = self.instance
3103     if self.op.force:
3104       _ShutdownInstanceDisks(self, instance)
3105     else:
3106       _SafeShutdownInstanceDisks(self, instance)
3107
3108
3109 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
3110   """Shutdown block devices of an instance.
3111
3112   This function checks if an instance is running, before calling
3113   _ShutdownInstanceDisks.
3114
3115   """
3116   _CheckInstanceState(lu, instance, INSTANCE_DOWN, msg="cannot shutdown disks")
3117   _ShutdownInstanceDisks(lu, instance, disks=disks)
3118
3119
3120 def _ExpandCheckDisks(instance, disks):
3121   """Return the instance disks selected by the disks list
3122
3123   @type disks: list of L{objects.Disk} or None
3124   @param disks: selected disks
3125   @rtype: list of L{objects.Disk}
3126   @return: selected instance disks to act on
3127
3128   """
3129   if disks is None:
3130     return instance.disks
3131   else:
3132     if not set(disks).issubset(instance.disks):
3133       raise errors.ProgrammerError("Can only act on disks belonging to the"
3134                                    " target instance")
3135     return disks
3136
3137
3138 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
3139   """Shutdown block devices of an instance.
3140
3141   This does the shutdown on all nodes of the instance.
3142
3143   If the ignore_primary is false, errors on the primary node are
3144   ignored.
3145
3146   """
3147   all_result = True
3148   disks = _ExpandCheckDisks(instance, disks)
3149
3150   for disk in disks:
3151     for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
3152       lu.cfg.SetDiskID(top_disk, node)
3153       result = lu.rpc.call_blockdev_shutdown(node, (top_disk, instance))
3154       msg = result.fail_msg
3155       if msg:
3156         lu.LogWarning("Could not shutdown block device %s on node %s: %s",
3157                       disk.iv_name, node, msg)
3158         if ((node == instance.primary_node and not ignore_primary) or
3159             (node != instance.primary_node and not result.offline)):
3160           all_result = False
3161   return all_result
3162
3163
3164 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
3165   """Checks if a node has enough free memory.
3166
3167   This function checks if a given node has the needed amount of free
3168   memory. In case the node has less memory or we cannot get the
3169   information from the node, this function raises an OpPrereqError
3170   exception.
3171
3172   @type lu: C{LogicalUnit}
3173   @param lu: a logical unit from which we get configuration data
3174   @type node: C{str}
3175   @param node: the node to check
3176   @type reason: C{str}
3177   @param reason: string to use in the error message
3178   @type requested: C{int}
3179   @param requested: the amount of memory in MiB to check for
3180   @type hypervisor_name: C{str}
3181   @param hypervisor_name: the hypervisor to ask for memory stats
3182   @rtype: integer
3183   @return: node current free memory
3184   @raise errors.OpPrereqError: if the node doesn't have enough memory, or
3185       we cannot check the node
3186
3187   """
3188   nodeinfo = lu.rpc.call_node_info([node], None, [hypervisor_name], False)
3189   nodeinfo[node].Raise("Can't get data from node %s" % node,
3190                        prereq=True, ecode=errors.ECODE_ENVIRON)
3191   (_, _, (hv_info, )) = nodeinfo[node].payload
3192
3193   free_mem = hv_info.get("memory_free", None)
3194   if not isinstance(free_mem, int):
3195     raise errors.OpPrereqError("Can't compute free memory on node %s, result"
3196                                " was '%s'" % (node, free_mem),
3197                                errors.ECODE_ENVIRON)
3198   if requested > free_mem:
3199     raise errors.OpPrereqError("Not enough memory on node %s for %s:"
3200                                " needed %s MiB, available %s MiB" %
3201                                (node, reason, requested, free_mem),
3202                                errors.ECODE_NORES)
3203   return free_mem
3204
3205
3206 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
3207   """Checks if nodes have enough free disk space in all the VGs.
3208
3209   This function checks if all given nodes have the needed amount of
3210   free disk. In case any node has less disk or we cannot get the
3211   information from the node, this function raises an OpPrereqError
3212   exception.
3213
3214   @type lu: C{LogicalUnit}
3215   @param lu: a logical unit from which we get configuration data
3216   @type nodenames: C{list}
3217   @param nodenames: the list of node names to check
3218   @type req_sizes: C{dict}
3219   @param req_sizes: the hash of vg and corresponding amount of disk in
3220       MiB to check for
3221   @raise errors.OpPrereqError: if the node doesn't have enough disk,
3222       or we cannot check the node
3223
3224   """
3225   for vg, req_size in req_sizes.items():
3226     _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
3227
3228
3229 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
3230   """Checks if nodes have enough free disk space in the specified VG.
3231
3232   This function checks if all given nodes have the needed amount of
3233   free disk. In case any node has less disk or we cannot get the
3234   information from the node, this function raises an OpPrereqError
3235   exception.
3236
3237   @type lu: C{LogicalUnit}
3238   @param lu: a logical unit from which we get configuration data
3239   @type nodenames: C{list}
3240   @param nodenames: the list of node names to check
3241   @type vg: C{str}
3242   @param vg: the volume group to check
3243   @type requested: C{int}
3244   @param requested: the amount of disk in MiB to check for
3245   @raise errors.OpPrereqError: if the node doesn't have enough disk,
3246       or we cannot check the node
3247
3248   """
3249   es_flags = rpc.GetExclusiveStorageForNodeNames(lu.cfg, nodenames)
3250   nodeinfo = lu.rpc.call_node_info(nodenames, [vg], None, es_flags)
3251   for node in nodenames:
3252     info = nodeinfo[node]
3253     info.Raise("Cannot get current information from node %s" % node,
3254                prereq=True, ecode=errors.ECODE_ENVIRON)
3255     (_, (vg_info, ), _) = info.payload
3256     vg_free = vg_info.get("vg_free", None)
3257     if not isinstance(vg_free, int):
3258       raise errors.OpPrereqError("Can't compute free disk space on node"
3259                                  " %s for vg %s, result was '%s'" %
3260                                  (node, vg, vg_free), errors.ECODE_ENVIRON)
3261     if requested > vg_free:
3262       raise errors.OpPrereqError("Not enough disk space on target node %s"
3263                                  " vg %s: required %d MiB, available %d MiB" %
3264                                  (node, vg, requested, vg_free),
3265                                  errors.ECODE_NORES)
3266
3267
3268 def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
3269   """Checks if nodes have enough physical CPUs
3270
3271   This function checks if all given nodes have the needed number of
3272   physical CPUs. In case any node has less CPUs or we cannot get the
3273   information from the node, this function raises an OpPrereqError
3274   exception.
3275
3276   @type lu: C{LogicalUnit}
3277   @param lu: a logical unit from which we get configuration data
3278   @type nodenames: C{list}
3279   @param nodenames: the list of node names to check
3280   @type requested: C{int}
3281   @param requested: the minimum acceptable number of physical CPUs
3282   @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
3283       or we cannot check the node
3284
3285   """
3286   nodeinfo = lu.rpc.call_node_info(nodenames, None, [hypervisor_name], None)
3287   for node in nodenames:
3288     info = nodeinfo[node]
3289     info.Raise("Cannot get current information from node %s" % node,
3290                prereq=True, ecode=errors.ECODE_ENVIRON)
3291     (_, _, (hv_info, )) = info.payload
3292     num_cpus = hv_info.get("cpu_total", None)
3293     if not isinstance(num_cpus, int):
3294       raise errors.OpPrereqError("Can't compute the number of physical CPUs"
3295                                  " on node %s, result was '%s'" %
3296                                  (node, num_cpus), errors.ECODE_ENVIRON)
3297     if requested > num_cpus:
3298       raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
3299                                  "required" % (node, num_cpus, requested),
3300                                  errors.ECODE_NORES)
3301
3302
3303 class LUInstanceStartup(LogicalUnit):
3304   """Starts an instance.
3305
3306   """
3307   HPATH = "instance-start"
3308   HTYPE = constants.HTYPE_INSTANCE
3309   REQ_BGL = False
3310
3311   def CheckArguments(self):
3312     # extra beparams
3313     if self.op.beparams:
3314       # fill the beparams dict
3315       objects.UpgradeBeParams(self.op.beparams)
3316       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3317
3318   def ExpandNames(self):
3319     self._ExpandAndLockInstance()
3320     self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
3321
3322   def DeclareLocks(self, level):
3323     if level == locking.LEVEL_NODE_RES:
3324       self._LockInstancesNodes(primary_only=True, level=locking.LEVEL_NODE_RES)
3325
3326   def BuildHooksEnv(self):
3327     """Build hooks env.
3328
3329     This runs on master, primary and secondary nodes of the instance.
3330
3331     """
3332     env = {
3333       "FORCE": self.op.force,
3334       }
3335
3336     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
3337
3338     return env
3339
3340   def BuildHooksNodes(self):
3341     """Build hooks nodes.
3342
3343     """
3344     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3345     return (nl, nl)
3346
3347   def CheckPrereq(self):
3348     """Check prerequisites.
3349
3350     This checks that the instance is in the cluster.
3351
3352     """
3353     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3354     assert self.instance is not None, \
3355       "Cannot retrieve locked instance %s" % self.op.instance_name
3356
3357     # extra hvparams
3358     if self.op.hvparams:
3359       # check hypervisor parameter syntax (locally)
3360       cluster = self.cfg.GetClusterInfo()
3361       utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
3362       filled_hvp = cluster.FillHV(instance)
3363       filled_hvp.update(self.op.hvparams)
3364       hv_type = hypervisor.GetHypervisorClass(instance.hypervisor)
3365       hv_type.CheckParameterSyntax(filled_hvp)
3366       _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
3367
3368     _CheckInstanceState(self, instance, INSTANCE_ONLINE)
3369
3370     self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
3371
3372     if self.primary_offline and self.op.ignore_offline_nodes:
3373       self.LogWarning("Ignoring offline primary node")
3374
3375       if self.op.hvparams or self.op.beparams:
3376         self.LogWarning("Overridden parameters are ignored")
3377     else:
3378       _CheckNodeOnline(self, instance.primary_node)
3379
3380       bep = self.cfg.GetClusterInfo().FillBE(instance)
3381       bep.update(self.op.beparams)
3382
3383       # check bridges existence
3384       _CheckInstanceBridgesExist(self, instance)
3385
3386       remote_info = self.rpc.call_instance_info(instance.primary_node,
3387                                                 instance.name,
3388                                                 instance.hypervisor)
3389       remote_info.Raise("Error checking node %s" % instance.primary_node,
3390                         prereq=True, ecode=errors.ECODE_ENVIRON)
3391       if not remote_info.payload: # not running already
3392         _CheckNodeFreeMemory(self, instance.primary_node,
3393                              "starting instance %s" % instance.name,
3394                              bep[constants.BE_MINMEM], instance.hypervisor)
3395
3396   def Exec(self, feedback_fn):
3397     """Start the instance.
3398
3399     """
3400     instance = self.instance
3401     force = self.op.force
3402     reason = self.op.reason
3403
3404     if not self.op.no_remember:
3405       self.cfg.MarkInstanceUp(instance.name)
3406
3407     if self.primary_offline:
3408       assert self.op.ignore_offline_nodes
3409       self.LogInfo("Primary node offline, marked instance as started")
3410     else:
3411       node_current = instance.primary_node
3412
3413       _StartInstanceDisks(self, instance, force)
3414
3415       result = \
3416         self.rpc.call_instance_start(node_current,
3417                                      (instance, self.op.hvparams,
3418                                       self.op.beparams),
3419                                      self.op.startup_paused, reason)
3420       msg = result.fail_msg
3421       if msg:
3422         _ShutdownInstanceDisks(self, instance)
3423         raise errors.OpExecError("Could not start instance: %s" % msg)
3424
3425
3426 class LUInstanceReboot(LogicalUnit):
3427   """Reboot an instance.
3428
3429   """
3430   HPATH = "instance-reboot"
3431   HTYPE = constants.HTYPE_INSTANCE
3432   REQ_BGL = False
3433
3434   def ExpandNames(self):
3435     self._ExpandAndLockInstance()
3436
3437   def BuildHooksEnv(self):
3438     """Build hooks env.
3439
3440     This runs on master, primary and secondary nodes of the instance.
3441
3442     """
3443     env = {
3444       "IGNORE_SECONDARIES": self.op.ignore_secondaries,
3445       "REBOOT_TYPE": self.op.reboot_type,
3446       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
3447       }
3448
3449     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
3450
3451     return env
3452
3453   def BuildHooksNodes(self):
3454     """Build hooks nodes.
3455
3456     """
3457     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3458     return (nl, nl)
3459
3460   def CheckPrereq(self):
3461     """Check prerequisites.
3462
3463     This checks that the instance is in the cluster.
3464
3465     """
3466     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3467     assert self.instance is not None, \
3468       "Cannot retrieve locked instance %s" % self.op.instance_name
3469     _CheckInstanceState(self, instance, INSTANCE_ONLINE)
3470     _CheckNodeOnline(self, instance.primary_node)
3471
3472     # check bridges existence
3473     _CheckInstanceBridgesExist(self, instance)
3474
3475   def Exec(self, feedback_fn):
3476     """Reboot the instance.
3477
3478     """
3479     instance = self.instance
3480     ignore_secondaries = self.op.ignore_secondaries
3481     reboot_type = self.op.reboot_type
3482     reason = self.op.reason
3483
3484     remote_info = self.rpc.call_instance_info(instance.primary_node,
3485                                               instance.name,
3486                                               instance.hypervisor)
3487     remote_info.Raise("Error checking node %s" % instance.primary_node)
3488     instance_running = bool(remote_info.payload)
3489
3490     node_current = instance.primary_node
3491
3492     if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
3493                                             constants.INSTANCE_REBOOT_HARD]:
3494       for disk in instance.disks:
3495         self.cfg.SetDiskID(disk, node_current)
3496       result = self.rpc.call_instance_reboot(node_current, instance,
3497                                              reboot_type,
3498                                              self.op.shutdown_timeout, reason)
3499       result.Raise("Could not reboot instance")
3500     else:
3501       if instance_running:
3502         result = self.rpc.call_instance_shutdown(node_current, instance,
3503                                                  self.op.shutdown_timeout,
3504                                                  reason)
3505         result.Raise("Could not shutdown instance for full reboot")
3506         _ShutdownInstanceDisks(self, instance)
3507       else:
3508         self.LogInfo("Instance %s was already stopped, starting now",
3509                      instance.name)
3510       _StartInstanceDisks(self, instance, ignore_secondaries)
3511       result = self.rpc.call_instance_start(node_current,
3512                                             (instance, None, None), False,
3513                                              reason)
3514       msg = result.fail_msg
3515       if msg:
3516         _ShutdownInstanceDisks(self, instance)
3517         raise errors.OpExecError("Could not start instance for"
3518                                  " full reboot: %s" % msg)
3519
3520     self.cfg.MarkInstanceUp(instance.name)
3521
3522
3523 class LUInstanceShutdown(LogicalUnit):
3524   """Shutdown an instance.
3525
3526   """
3527   HPATH = "instance-stop"
3528   HTYPE = constants.HTYPE_INSTANCE
3529   REQ_BGL = False
3530
3531   def ExpandNames(self):
3532     self._ExpandAndLockInstance()
3533
3534   def BuildHooksEnv(self):
3535     """Build hooks env.
3536
3537     This runs on master, primary and secondary nodes of the instance.
3538
3539     """
3540     env = _BuildInstanceHookEnvByObject(self, self.instance)
3541     env["TIMEOUT"] = self.op.timeout
3542     return env
3543
3544   def BuildHooksNodes(self):
3545     """Build hooks nodes.
3546
3547     """
3548     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3549     return (nl, nl)
3550
3551   def CheckPrereq(self):
3552     """Check prerequisites.
3553
3554     This checks that the instance is in the cluster.
3555
3556     """
3557     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3558     assert self.instance is not None, \
3559       "Cannot retrieve locked instance %s" % self.op.instance_name
3560
3561     if not self.op.force:
3562       _CheckInstanceState(self, self.instance, INSTANCE_ONLINE)
3563     else:
3564       self.LogWarning("Ignoring offline instance check")
3565
3566     self.primary_offline = \
3567       self.cfg.GetNodeInfo(self.instance.primary_node).offline
3568
3569     if self.primary_offline and self.op.ignore_offline_nodes:
3570       self.LogWarning("Ignoring offline primary node")
3571     else:
3572       _CheckNodeOnline(self, self.instance.primary_node)
3573
3574   def Exec(self, feedback_fn):
3575     """Shutdown the instance.
3576
3577     """
3578     instance = self.instance
3579     node_current = instance.primary_node
3580     timeout = self.op.timeout
3581     reason = self.op.reason
3582
3583     # If the instance is offline we shouldn't mark it as down, as that
3584     # resets the offline flag.
3585     if not self.op.no_remember and instance.admin_state in INSTANCE_ONLINE:
3586       self.cfg.MarkInstanceDown(instance.name)
3587
3588     if self.primary_offline:
3589       assert self.op.ignore_offline_nodes
3590       self.LogInfo("Primary node offline, marked instance as stopped")
3591     else:
3592       result = self.rpc.call_instance_shutdown(node_current, instance, timeout,
3593                                                reason)
3594       msg = result.fail_msg
3595       if msg:
3596         self.LogWarning("Could not shutdown instance: %s", msg)
3597
3598       _ShutdownInstanceDisks(self, instance)
3599
3600
3601 class LUInstanceReinstall(LogicalUnit):
3602   """Reinstall an instance.
3603
3604   """
3605   HPATH = "instance-reinstall"
3606   HTYPE = constants.HTYPE_INSTANCE
3607   REQ_BGL = False
3608
3609   def ExpandNames(self):
3610     self._ExpandAndLockInstance()
3611
3612   def BuildHooksEnv(self):
3613     """Build hooks env.
3614
3615     This runs on master, primary and secondary nodes of the instance.
3616
3617     """
3618     return _BuildInstanceHookEnvByObject(self, self.instance)
3619
3620   def BuildHooksNodes(self):
3621     """Build hooks nodes.
3622
3623     """
3624     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3625     return (nl, nl)
3626
3627   def CheckPrereq(self):
3628     """Check prerequisites.
3629
3630     This checks that the instance is in the cluster and is not running.
3631
3632     """
3633     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3634     assert instance is not None, \
3635       "Cannot retrieve locked instance %s" % self.op.instance_name
3636     _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
3637                      " offline, cannot reinstall")
3638
3639     if instance.disk_template == constants.DT_DISKLESS:
3640       raise errors.OpPrereqError("Instance '%s' has no disks" %
3641                                  self.op.instance_name,
3642                                  errors.ECODE_INVAL)
3643     _CheckInstanceState(self, instance, INSTANCE_DOWN, msg="cannot reinstall")
3644
3645     if self.op.os_type is not None:
3646       # OS verification
3647       pnode = _ExpandNodeName(self.cfg, instance.primary_node)
3648       _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
3649       instance_os = self.op.os_type
3650     else:
3651       instance_os = instance.os
3652
3653     nodelist = list(instance.all_nodes)
3654
3655     if self.op.osparams:
3656       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
3657       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
3658       self.os_inst = i_osdict # the new dict (without defaults)
3659     else:
3660       self.os_inst = None
3661
3662     self.instance = instance
3663
3664   def Exec(self, feedback_fn):
3665     """Reinstall the instance.
3666
3667     """
3668     inst = self.instance
3669
3670     if self.op.os_type is not None:
3671       feedback_fn("Changing OS to '%s'..." % self.op.os_type)
3672       inst.os = self.op.os_type
3673       # Write to configuration
3674       self.cfg.Update(inst, feedback_fn)
3675
3676     _StartInstanceDisks(self, inst, None)
3677     try:
3678       feedback_fn("Running the instance OS create scripts...")
3679       # FIXME: pass debug option from opcode to backend
3680       result = self.rpc.call_instance_os_add(inst.primary_node,
3681                                              (inst, self.os_inst), True,
3682                                              self.op.debug_level)
3683       result.Raise("Could not install OS for instance %s on node %s" %
3684                    (inst.name, inst.primary_node))
3685     finally:
3686       _ShutdownInstanceDisks(self, inst)
3687
3688
3689 class LUInstanceRecreateDisks(LogicalUnit):
3690   """Recreate an instance's missing disks.
3691
3692   """
3693   HPATH = "instance-recreate-disks"
3694   HTYPE = constants.HTYPE_INSTANCE
3695   REQ_BGL = False
3696
3697   _MODIFYABLE = compat.UniqueFrozenset([
3698     constants.IDISK_SIZE,
3699     constants.IDISK_MODE,
3700     ])
3701
3702   # New or changed disk parameters may have different semantics
3703   assert constants.IDISK_PARAMS == (_MODIFYABLE | frozenset([
3704     constants.IDISK_ADOPT,
3705
3706     # TODO: Implement support changing VG while recreating
3707     constants.IDISK_VG,
3708     constants.IDISK_METAVG,
3709     constants.IDISK_PROVIDER,
3710     constants.IDISK_NAME,
3711     ]))
3712
3713   def _RunAllocator(self):
3714     """Run the allocator based on input opcode.
3715
3716     """
3717     be_full = self.cfg.GetClusterInfo().FillBE(self.instance)
3718
3719     # FIXME
3720     # The allocator should actually run in "relocate" mode, but current
3721     # allocators don't support relocating all the nodes of an instance at
3722     # the same time. As a workaround we use "allocate" mode, but this is
3723     # suboptimal for two reasons:
3724     # - The instance name passed to the allocator is present in the list of
3725     #   existing instances, so there could be a conflict within the
3726     #   internal structures of the allocator. This doesn't happen with the
3727     #   current allocators, but it's a liability.
3728     # - The allocator counts the resources used by the instance twice: once
3729     #   because the instance exists already, and once because it tries to
3730     #   allocate a new instance.
3731     # The allocator could choose some of the nodes on which the instance is
3732     # running, but that's not a problem. If the instance nodes are broken,
3733     # they should be already be marked as drained or offline, and hence
3734     # skipped by the allocator. If instance disks have been lost for other
3735     # reasons, then recreating the disks on the same nodes should be fine.
3736     disk_template = self.instance.disk_template
3737     spindle_use = be_full[constants.BE_SPINDLE_USE]
3738     req = iallocator.IAReqInstanceAlloc(name=self.op.instance_name,
3739                                         disk_template=disk_template,
3740                                         tags=list(self.instance.GetTags()),
3741                                         os=self.instance.os,
3742                                         nics=[{}],
3743                                         vcpus=be_full[constants.BE_VCPUS],
3744                                         memory=be_full[constants.BE_MAXMEM],
3745                                         spindle_use=spindle_use,
3746                                         disks=[{constants.IDISK_SIZE: d.size,
3747                                                 constants.IDISK_MODE: d.mode}
3748                                                 for d in self.instance.disks],
3749                                         hypervisor=self.instance.hypervisor,
3750                                         node_whitelist=None)
3751     ial = iallocator.IAllocator(self.cfg, self.rpc, req)
3752
3753     ial.Run(self.op.iallocator)
3754
3755     assert req.RequiredNodes() == len(self.instance.all_nodes)
3756
3757     if not ial.success:
3758       raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
3759                                  " %s" % (self.op.iallocator, ial.info),
3760                                  errors.ECODE_NORES)
3761
3762     self.op.nodes = ial.result
3763     self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
3764                  self.op.instance_name, self.op.iallocator,
3765                  utils.CommaJoin(ial.result))
3766
3767   def CheckArguments(self):
3768     if self.op.disks and ht.TNonNegativeInt(self.op.disks[0]):
3769       # Normalize and convert deprecated list of disk indices
3770       self.op.disks = [(idx, {}) for idx in sorted(frozenset(self.op.disks))]
3771
3772     duplicates = utils.FindDuplicates(map(compat.fst, self.op.disks))
3773     if duplicates:
3774       raise errors.OpPrereqError("Some disks have been specified more than"
3775                                  " once: %s" % utils.CommaJoin(duplicates),
3776                                  errors.ECODE_INVAL)
3777
3778     # We don't want _CheckIAllocatorOrNode selecting the default iallocator
3779     # when neither iallocator nor nodes are specified
3780     if self.op.iallocator or self.op.nodes:
3781       _CheckIAllocatorOrNode(self, "iallocator", "nodes")
3782
3783     for (idx, params) in self.op.disks:
3784       utils.ForceDictType(params, constants.IDISK_PARAMS_TYPES)
3785       unsupported = frozenset(params.keys()) - self._MODIFYABLE
3786       if unsupported:
3787         raise errors.OpPrereqError("Parameters for disk %s try to change"
3788                                    " unmodifyable parameter(s): %s" %
3789                                    (idx, utils.CommaJoin(unsupported)),
3790                                    errors.ECODE_INVAL)
3791
3792   def ExpandNames(self):
3793     self._ExpandAndLockInstance()
3794     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3795
3796     if self.op.nodes:
3797       self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
3798       self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
3799     else:
3800       self.needed_locks[locking.LEVEL_NODE] = []
3801       if self.op.iallocator:
3802         # iallocator will select a new node in the same group
3803         self.needed_locks[locking.LEVEL_NODEGROUP] = []
3804         self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
3805
3806     self.needed_locks[locking.LEVEL_NODE_RES] = []
3807
3808   def DeclareLocks(self, level):
3809     if level == locking.LEVEL_NODEGROUP:
3810       assert self.op.iallocator is not None
3811       assert not self.op.nodes
3812       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3813       self.share_locks[locking.LEVEL_NODEGROUP] = 1
3814       # Lock the primary group used by the instance optimistically; this
3815       # requires going via the node before it's locked, requiring
3816       # verification later on
3817       self.needed_locks[locking.LEVEL_NODEGROUP] = \
3818         self.cfg.GetInstanceNodeGroups(self.op.instance_name, primary_only=True)
3819
3820     elif level == locking.LEVEL_NODE:
3821       # If an allocator is used, then we lock all the nodes in the current
3822       # instance group, as we don't know yet which ones will be selected;
3823       # if we replace the nodes without using an allocator, locks are
3824       # already declared in ExpandNames; otherwise, we need to lock all the
3825       # instance nodes for disk re-creation
3826       if self.op.iallocator:
3827         assert not self.op.nodes
3828         assert not self.needed_locks[locking.LEVEL_NODE]
3829         assert len(self.owned_locks(locking.LEVEL_NODEGROUP)) == 1
3830
3831         # Lock member nodes of the group of the primary node
3832         for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP):
3833           self.needed_locks[locking.LEVEL_NODE].extend(
3834             self.cfg.GetNodeGroup(group_uuid).members)
3835
3836         assert locking.NAL in self.owned_locks(locking.LEVEL_NODE_ALLOC)
3837       elif not self.op.nodes:
3838         self._LockInstancesNodes(primary_only=False)
3839     elif level == locking.LEVEL_NODE_RES:
3840       # Copy node locks
3841       self.needed_locks[locking.LEVEL_NODE_RES] = \
3842         _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
3843
3844   def BuildHooksEnv(self):
3845     """Build hooks env.
3846
3847     This runs on master, primary and secondary nodes of the instance.
3848
3849     """
3850     return _BuildInstanceHookEnvByObject(self, self.instance)
3851
3852   def BuildHooksNodes(self):
3853     """Build hooks nodes.
3854
3855     """
3856     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3857     return (nl, nl)
3858
3859   def CheckPrereq(self):
3860     """Check prerequisites.
3861
3862     This checks that the instance is in the cluster and is not running.
3863
3864     """
3865     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3866     assert instance is not None, \
3867       "Cannot retrieve locked instance %s" % self.op.instance_name
3868     if self.op.nodes:
3869       if len(self.op.nodes) != len(instance.all_nodes):
3870         raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
3871                                    " %d replacement nodes were specified" %
3872                                    (instance.name, len(instance.all_nodes),
3873                                     len(self.op.nodes)),
3874                                    errors.ECODE_INVAL)
3875       assert instance.disk_template != constants.DT_DRBD8 or \
3876           len(self.op.nodes) == 2
3877       assert instance.disk_template != constants.DT_PLAIN or \
3878           len(self.op.nodes) == 1
3879       primary_node = self.op.nodes[0]
3880     else:
3881       primary_node = instance.primary_node
3882     if not self.op.iallocator:
3883       _CheckNodeOnline(self, primary_node)
3884
3885     if instance.disk_template == constants.DT_DISKLESS:
3886       raise errors.OpPrereqError("Instance '%s' has no disks" %
3887                                  self.op.instance_name, errors.ECODE_INVAL)
3888
3889     # Verify if node group locks are still correct
3890     owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
3891     if owned_groups:
3892       # Node group locks are acquired only for the primary node (and only
3893       # when the allocator is used)
3894       _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups,
3895                                primary_only=True)
3896
3897     # if we replace nodes *and* the old primary is offline, we don't
3898     # check the instance state
3899     old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
3900     if not ((self.op.iallocator or self.op.nodes) and old_pnode.offline):
3901       _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
3902                           msg="cannot recreate disks")
3903
3904     if self.op.disks:
3905       self.disks = dict(self.op.disks)
3906     else:
3907       self.disks = dict((idx, {}) for idx in range(len(instance.disks)))
3908
3909     maxidx = max(self.disks.keys())
3910     if maxidx >= len(instance.disks):
3911       raise errors.OpPrereqError("Invalid disk index '%s'" % maxidx,
3912                                  errors.ECODE_INVAL)
3913
3914     if ((self.op.nodes or self.op.iallocator) and
3915         sorted(self.disks.keys()) != range(len(instance.disks))):
3916       raise errors.OpPrereqError("Can't recreate disks partially and"
3917                                  " change the nodes at the same time",
3918                                  errors.ECODE_INVAL)
3919
3920     self.instance = instance
3921
3922     if self.op.iallocator:
3923       self._RunAllocator()
3924       # Release unneeded node and node resource locks
3925       _ReleaseLocks(self, locking.LEVEL_NODE, keep=self.op.nodes)
3926       _ReleaseLocks(self, locking.LEVEL_NODE_RES, keep=self.op.nodes)
3927       _ReleaseLocks(self, locking.LEVEL_NODE_ALLOC)
3928
3929     assert not self.glm.is_owned(locking.LEVEL_NODE_ALLOC)
3930
3931   def Exec(self, feedback_fn):
3932     """Recreate the disks.
3933
3934     """
3935     instance = self.instance
3936
3937     assert (self.owned_locks(locking.LEVEL_NODE) ==
3938             self.owned_locks(locking.LEVEL_NODE_RES))
3939
3940     to_skip = []
3941     mods = [] # keeps track of needed changes
3942
3943     for idx, disk in enumerate(instance.disks):
3944       try:
3945         changes = self.disks[idx]
3946       except KeyError:
3947         # Disk should not be recreated
3948         to_skip.append(idx)
3949         continue
3950
3951       # update secondaries for disks, if needed
3952       if self.op.nodes and disk.dev_type == constants.LD_DRBD8:
3953         # need to update the nodes and minors
3954         assert len(self.op.nodes) == 2
3955         assert len(disk.logical_id) == 6 # otherwise disk internals
3956                                          # have changed
3957         (_, _, old_port, _, _, old_secret) = disk.logical_id
3958         new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
3959         new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
3960                   new_minors[0], new_minors[1], old_secret)
3961         assert len(disk.logical_id) == len(new_id)
3962       else:
3963         new_id = None
3964
3965       mods.append((idx, new_id, changes))
3966
3967     # now that we have passed all asserts above, we can apply the mods
3968     # in a single run (to avoid partial changes)
3969     for idx, new_id, changes in mods:
3970       disk = instance.disks[idx]
3971       if new_id is not None:
3972         assert disk.dev_type == constants.LD_DRBD8
3973         disk.logical_id = new_id
3974       if changes:
3975         disk.Update(size=changes.get(constants.IDISK_SIZE, None),
3976                     mode=changes.get(constants.IDISK_MODE, None))
3977
3978     # change primary node, if needed
3979     if self.op.nodes:
3980       instance.primary_node = self.op.nodes[0]
3981       self.LogWarning("Changing the instance's nodes, you will have to"
3982                       " remove any disks left on the older nodes manually")
3983
3984     if self.op.nodes:
3985       self.cfg.Update(instance, feedback_fn)
3986
3987     # All touched nodes must be locked
3988     mylocks = self.owned_locks(locking.LEVEL_NODE)
3989     assert mylocks.issuperset(frozenset(instance.all_nodes))
3990     _CreateDisks(self, instance, to_skip=to_skip)
3991
3992
3993 class LUInstanceRename(LogicalUnit):
3994   """Rename an instance.
3995
3996   """
3997   HPATH = "instance-rename"
3998   HTYPE = constants.HTYPE_INSTANCE
3999
4000   def CheckArguments(self):
4001     """Check arguments.
4002
4003     """
4004     if self.op.ip_check and not self.op.name_check:
4005       # TODO: make the ip check more flexible and not depend on the name check
4006       raise errors.OpPrereqError("IP address check requires a name check",
4007                                  errors.ECODE_INVAL)
4008
4009   def BuildHooksEnv(self):
4010     """Build hooks env.
4011
4012     This runs on master, primary and secondary nodes of the instance.
4013
4014     """
4015     env = _BuildInstanceHookEnvByObject(self, self.instance)
4016     env["INSTANCE_NEW_NAME"] = self.op.new_name
4017     return env
4018
4019   def BuildHooksNodes(self):
4020     """Build hooks nodes.
4021
4022     """
4023     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4024     return (nl, nl)
4025
4026   def CheckPrereq(self):
4027     """Check prerequisites.
4028
4029     This checks that the instance is in the cluster and is not running.
4030
4031     """
4032     self.op.instance_name = _ExpandInstanceName(self.cfg,
4033                                                 self.op.instance_name)
4034     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4035     assert instance is not None
4036     _CheckNodeOnline(self, instance.primary_node)
4037     _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
4038                         msg="cannot rename")
4039     self.instance = instance
4040
4041     new_name = self.op.new_name
4042     if self.op.name_check:
4043       hostname = _CheckHostnameSane(self, new_name)
4044       new_name = self.op.new_name = hostname.name
4045       if (self.op.ip_check and
4046           netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
4047         raise errors.OpPrereqError("IP %s of instance %s already in use" %
4048                                    (hostname.ip, new_name),
4049                                    errors.ECODE_NOTUNIQUE)
4050
4051     instance_list = self.cfg.GetInstanceList()
4052     if new_name in instance_list and new_name != instance.name:
4053       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
4054                                  new_name, errors.ECODE_EXISTS)
4055
4056   def Exec(self, feedback_fn):
4057     """Rename the instance.
4058
4059     """
4060     inst = self.instance
4061     old_name = inst.name
4062
4063     rename_file_storage = False
4064     if (inst.disk_template in constants.DTS_FILEBASED and
4065         self.op.new_name != inst.name):
4066       old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4067       rename_file_storage = True
4068
4069     self.cfg.RenameInstance(inst.name, self.op.new_name)
4070     # Change the instance lock. This is definitely safe while we hold the BGL.
4071     # Otherwise the new lock would have to be added in acquired mode.
4072     assert self.REQ_BGL
4073     assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER)
4074     self.glm.remove(locking.LEVEL_INSTANCE, old_name)
4075     self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
4076
4077     # re-read the instance from the configuration after rename
4078     inst = self.cfg.GetInstanceInfo(self.op.new_name)
4079
4080     if rename_file_storage:
4081       new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4082       result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
4083                                                      old_file_storage_dir,
4084                                                      new_file_storage_dir)
4085       result.Raise("Could not rename on node %s directory '%s' to '%s'"
4086                    " (but the instance has been renamed in Ganeti)" %
4087                    (inst.primary_node, old_file_storage_dir,
4088                     new_file_storage_dir))
4089
4090     _StartInstanceDisks(self, inst, None)
4091     # update info on disks
4092     info = _GetInstanceInfoText(inst)
4093     for (idx, disk) in enumerate(inst.disks):
4094       for node in inst.all_nodes:
4095         self.cfg.SetDiskID(disk, node)
4096         result = self.rpc.call_blockdev_setinfo(node, disk, info)
4097         if result.fail_msg:
4098           self.LogWarning("Error setting info on node %s for disk %s: %s",
4099                           node, idx, result.fail_msg)
4100     try:
4101       result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
4102                                                  old_name, self.op.debug_level)
4103       msg = result.fail_msg
4104       if msg:
4105         msg = ("Could not run OS rename script for instance %s on node %s"
4106                " (but the instance has been renamed in Ganeti): %s" %
4107                (inst.name, inst.primary_node, msg))
4108         self.LogWarning(msg)
4109     finally:
4110       _ShutdownInstanceDisks(self, inst)
4111
4112     return inst.name
4113
4114
4115 class LUInstanceRemove(LogicalUnit):
4116   """Remove an instance.
4117
4118   """
4119   HPATH = "instance-remove"
4120   HTYPE = constants.HTYPE_INSTANCE
4121   REQ_BGL = False
4122
4123   def ExpandNames(self):
4124     self._ExpandAndLockInstance()
4125     self.needed_locks[locking.LEVEL_NODE] = []
4126     self.needed_locks[locking.LEVEL_NODE_RES] = []
4127     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4128
4129   def DeclareLocks(self, level):
4130     if level == locking.LEVEL_NODE:
4131       self._LockInstancesNodes()
4132     elif level == locking.LEVEL_NODE_RES:
4133       # Copy node locks
4134       self.needed_locks[locking.LEVEL_NODE_RES] = \
4135         _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
4136
4137   def BuildHooksEnv(self):
4138     """Build hooks env.
4139
4140     This runs on master, primary and secondary nodes of the instance.
4141
4142     """
4143     env = _BuildInstanceHookEnvByObject(self, self.instance)
4144     env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
4145     return env
4146
4147   def BuildHooksNodes(self):
4148     """Build hooks nodes.
4149
4150     """
4151     nl = [self.cfg.GetMasterNode()]
4152     nl_post = list(self.instance.all_nodes) + nl
4153     return (nl, nl_post)
4154
4155   def CheckPrereq(self):
4156     """Check prerequisites.
4157
4158     This checks that the instance is in the cluster.
4159
4160     """
4161     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4162     assert self.instance is not None, \
4163       "Cannot retrieve locked instance %s" % self.op.instance_name
4164
4165   def Exec(self, feedback_fn):
4166     """Remove the instance.
4167
4168     """
4169     instance = self.instance
4170     logging.info("Shutting down instance %s on node %s",
4171                  instance.name, instance.primary_node)
4172
4173     result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
4174                                              self.op.shutdown_timeout,
4175                                              self.op.reason)
4176     msg = result.fail_msg
4177     if msg:
4178       if self.op.ignore_failures:
4179         feedback_fn("Warning: can't shutdown instance: %s" % msg)
4180       else:
4181         raise errors.OpExecError("Could not shutdown instance %s on"
4182                                  " node %s: %s" %
4183                                  (instance.name, instance.primary_node, msg))
4184
4185     assert (self.owned_locks(locking.LEVEL_NODE) ==
4186             self.owned_locks(locking.LEVEL_NODE_RES))
4187     assert not (set(instance.all_nodes) -
4188                 self.owned_locks(locking.LEVEL_NODE)), \
4189       "Not owning correct locks"
4190
4191     _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
4192
4193
4194 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
4195   """Utility function to remove an instance.
4196
4197   """
4198   logging.info("Removing block devices for instance %s", instance.name)
4199
4200   if not _RemoveDisks(lu, instance, ignore_failures=ignore_failures):
4201     if not ignore_failures:
4202       raise errors.OpExecError("Can't remove instance's disks")
4203     feedback_fn("Warning: can't remove instance's disks")
4204
4205   logging.info("Removing instance %s out of cluster config", instance.name)
4206
4207   lu.cfg.RemoveInstance(instance.name)
4208
4209   assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
4210     "Instance lock removal conflict"
4211
4212   # Remove lock for the instance
4213   lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
4214
4215
4216 class LUInstanceQuery(NoHooksLU):
4217   """Logical unit for querying instances.
4218
4219   """
4220   # pylint: disable=W0142
4221   REQ_BGL = False
4222
4223   def CheckArguments(self):
4224     self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
4225                              self.op.output_fields, self.op.use_locking)
4226
4227   def ExpandNames(self):
4228     self.iq.ExpandNames(self)
4229
4230   def DeclareLocks(self, level):
4231     self.iq.DeclareLocks(self, level)
4232
4233   def Exec(self, feedback_fn):
4234     return self.iq.OldStyleQuery(self)
4235
4236
4237 def _ExpandNamesForMigration(lu):
4238   """Expands names for use with L{TLMigrateInstance}.
4239
4240   @type lu: L{LogicalUnit}
4241
4242   """
4243   if lu.op.target_node is not None:
4244     lu.op.target_node = _ExpandNodeName(lu.cfg, lu.op.target_node)
4245
4246   lu.needed_locks[locking.LEVEL_NODE] = []
4247   lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4248
4249   lu.needed_locks[locking.LEVEL_NODE_RES] = []
4250   lu.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
4251
4252   # The node allocation lock is actually only needed for externally replicated
4253   # instances (e.g. sharedfile or RBD) and if an iallocator is used.
4254   lu.needed_locks[locking.LEVEL_NODE_ALLOC] = []
4255
4256
4257 def _DeclareLocksForMigration(lu, level):
4258   """Declares locks for L{TLMigrateInstance}.
4259
4260   @type lu: L{LogicalUnit}
4261   @param level: Lock level
4262
4263   """
4264   if level == locking.LEVEL_NODE_ALLOC:
4265     assert lu.op.instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
4266
4267     instance = lu.cfg.GetInstanceInfo(lu.op.instance_name)
4268
4269     # Node locks are already declared here rather than at LEVEL_NODE as we need
4270     # the instance object anyway to declare the node allocation lock.
4271     if instance.disk_template in constants.DTS_EXT_MIRROR:
4272       if lu.op.target_node is None:
4273         lu.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4274         lu.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
4275       else:
4276         lu.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
4277                                                lu.op.target_node]
4278       del lu.recalculate_locks[locking.LEVEL_NODE]
4279     else:
4280       lu._LockInstancesNodes() # pylint: disable=W0212
4281
4282   elif level == locking.LEVEL_NODE:
4283     # Node locks are declared together with the node allocation lock
4284     assert (lu.needed_locks[locking.LEVEL_NODE] or
4285             lu.needed_locks[locking.LEVEL_NODE] is locking.ALL_SET)
4286
4287   elif level == locking.LEVEL_NODE_RES:
4288     # Copy node locks
4289     lu.needed_locks[locking.LEVEL_NODE_RES] = \
4290       _CopyLockList(lu.needed_locks[locking.LEVEL_NODE])
4291
4292
4293 class LUInstanceFailover(LogicalUnit):
4294   """Failover an instance.
4295
4296   """
4297   HPATH = "instance-failover"
4298   HTYPE = constants.HTYPE_INSTANCE
4299   REQ_BGL = False
4300
4301   def CheckArguments(self):
4302     """Check the arguments.
4303
4304     """
4305     self.iallocator = getattr(self.op, "iallocator", None)
4306     self.target_node = getattr(self.op, "target_node", None)
4307
4308   def ExpandNames(self):
4309     self._ExpandAndLockInstance()
4310     _ExpandNamesForMigration(self)
4311
4312     self._migrater = \
4313       TLMigrateInstance(self, self.op.instance_name, False, True, False,
4314                         self.op.ignore_consistency, True,
4315                         self.op.shutdown_timeout, self.op.ignore_ipolicy)
4316
4317     self.tasklets = [self._migrater]
4318
4319   def DeclareLocks(self, level):
4320     _DeclareLocksForMigration(self, level)
4321
4322   def BuildHooksEnv(self):
4323     """Build hooks env.
4324
4325     This runs on master, primary and secondary nodes of the instance.
4326
4327     """
4328     instance = self._migrater.instance
4329     source_node = instance.primary_node
4330     target_node = self.op.target_node
4331     env = {
4332       "IGNORE_CONSISTENCY": self.op.ignore_consistency,
4333       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
4334       "OLD_PRIMARY": source_node,
4335       "NEW_PRIMARY": target_node,
4336       }
4337
4338     if instance.disk_template in constants.DTS_INT_MIRROR:
4339       env["OLD_SECONDARY"] = instance.secondary_nodes[0]
4340       env["NEW_SECONDARY"] = source_node
4341     else:
4342       env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
4343
4344     env.update(_BuildInstanceHookEnvByObject(self, instance))
4345
4346     return env
4347
4348   def BuildHooksNodes(self):
4349     """Build hooks nodes.
4350
4351     """
4352     instance = self._migrater.instance
4353     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
4354     return (nl, nl + [instance.primary_node])
4355
4356
4357 class LUInstanceMigrate(LogicalUnit):
4358   """Migrate an instance.
4359
4360   This is migration without shutting down, compared to the failover,
4361   which is done with shutdown.
4362
4363   """
4364   HPATH = "instance-migrate"
4365   HTYPE = constants.HTYPE_INSTANCE
4366   REQ_BGL = False
4367
4368   def ExpandNames(self):
4369     self._ExpandAndLockInstance()
4370     _ExpandNamesForMigration(self)
4371
4372     self._migrater = \
4373       TLMigrateInstance(self, self.op.instance_name, self.op.cleanup,
4374                         False, self.op.allow_failover, False,
4375                         self.op.allow_runtime_changes,
4376                         constants.DEFAULT_SHUTDOWN_TIMEOUT,
4377                         self.op.ignore_ipolicy)
4378
4379     self.tasklets = [self._migrater]
4380
4381   def DeclareLocks(self, level):
4382     _DeclareLocksForMigration(self, level)
4383
4384   def BuildHooksEnv(self):
4385     """Build hooks env.
4386
4387     This runs on master, primary and secondary nodes of the instance.
4388
4389     """
4390     instance = self._migrater.instance
4391     source_node = instance.primary_node
4392     target_node = self.op.target_node
4393     env = _BuildInstanceHookEnvByObject(self, instance)
4394     env.update({
4395       "MIGRATE_LIVE": self._migrater.live,
4396       "MIGRATE_CLEANUP": self.op.cleanup,
4397       "OLD_PRIMARY": source_node,
4398       "NEW_PRIMARY": target_node,
4399       "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
4400       })
4401
4402     if instance.disk_template in constants.DTS_INT_MIRROR:
4403       env["OLD_SECONDARY"] = target_node
4404       env["NEW_SECONDARY"] = source_node
4405     else:
4406       env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
4407
4408     return env
4409
4410   def BuildHooksNodes(self):
4411     """Build hooks nodes.
4412
4413     """
4414     instance = self._migrater.instance
4415     snodes = list(instance.secondary_nodes)
4416     nl = [self.cfg.GetMasterNode(), instance.primary_node] + snodes
4417     return (nl, nl)
4418
4419
4420 class LUInstanceMove(LogicalUnit):
4421   """Move an instance by data-copying.
4422
4423   """
4424   HPATH = "instance-move"
4425   HTYPE = constants.HTYPE_INSTANCE
4426   REQ_BGL = False
4427
4428   def ExpandNames(self):
4429     self._ExpandAndLockInstance()
4430     target_node = _ExpandNodeName(self.cfg, self.op.target_node)
4431     self.op.target_node = target_node
4432     self.needed_locks[locking.LEVEL_NODE] = [target_node]
4433     self.needed_locks[locking.LEVEL_NODE_RES] = []
4434     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
4435
4436   def DeclareLocks(self, level):
4437     if level == locking.LEVEL_NODE:
4438       self._LockInstancesNodes(primary_only=True)
4439     elif level == locking.LEVEL_NODE_RES:
4440       # Copy node locks
4441       self.needed_locks[locking.LEVEL_NODE_RES] = \
4442         _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
4443
4444   def BuildHooksEnv(self):
4445     """Build hooks env.
4446
4447     This runs on master, primary and secondary nodes of the instance.
4448
4449     """
4450     env = {
4451       "TARGET_NODE": self.op.target_node,
4452       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
4453       }
4454     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4455     return env
4456
4457   def BuildHooksNodes(self):
4458     """Build hooks nodes.
4459
4460     """
4461     nl = [
4462       self.cfg.GetMasterNode(),
4463       self.instance.primary_node,
4464       self.op.target_node,
4465       ]
4466     return (nl, nl)
4467
4468   def CheckPrereq(self):
4469     """Check prerequisites.
4470
4471     This checks that the instance is in the cluster.
4472
4473     """
4474     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4475     assert self.instance is not None, \
4476       "Cannot retrieve locked instance %s" % self.op.instance_name
4477
4478     if instance.disk_template not in constants.DTS_COPYABLE:
4479       raise errors.OpPrereqError("Disk template %s not suitable for copying" %
4480                                  instance.disk_template, errors.ECODE_STATE)
4481
4482     node = self.cfg.GetNodeInfo(self.op.target_node)
4483     assert node is not None, \
4484       "Cannot retrieve locked node %s" % self.op.target_node
4485
4486     self.target_node = target_node = node.name
4487
4488     if target_node == instance.primary_node:
4489       raise errors.OpPrereqError("Instance %s is already on the node %s" %
4490                                  (instance.name, target_node),
4491                                  errors.ECODE_STATE)
4492
4493     bep = self.cfg.GetClusterInfo().FillBE(instance)
4494
4495     for idx, dsk in enumerate(instance.disks):
4496       if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
4497         raise errors.OpPrereqError("Instance disk %d has a complex layout,"
4498                                    " cannot copy" % idx, errors.ECODE_STATE)
4499
4500     _CheckNodeOnline(self, target_node)
4501     _CheckNodeNotDrained(self, target_node)
4502     _CheckNodeVmCapable(self, target_node)
4503     cluster = self.cfg.GetClusterInfo()
4504     group_info = self.cfg.GetNodeGroup(node.group)
4505     ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group_info)
4506     _CheckTargetNodeIPolicy(self, ipolicy, instance, node, self.cfg,
4507                             ignore=self.op.ignore_ipolicy)
4508
4509     if instance.admin_state == constants.ADMINST_UP:
4510       # check memory requirements on the secondary node
4511       _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
4512                            instance.name, bep[constants.BE_MAXMEM],
4513                            instance.hypervisor)
4514     else:
4515       self.LogInfo("Not checking memory on the secondary node as"
4516                    " instance will not be started")
4517
4518     # check bridge existance
4519     _CheckInstanceBridgesExist(self, instance, node=target_node)
4520
4521   def Exec(self, feedback_fn):
4522     """Move an instance.
4523
4524     The move is done by shutting it down on its present node, copying
4525     the data over (slow) and starting it on the new node.
4526
4527     """
4528     instance = self.instance
4529
4530     source_node = instance.primary_node
4531     target_node = self.target_node
4532
4533     self.LogInfo("Shutting down instance %s on source node %s",
4534                  instance.name, source_node)
4535
4536     assert (self.owned_locks(locking.LEVEL_NODE) ==
4537             self.owned_locks(locking.LEVEL_NODE_RES))
4538
4539     result = self.rpc.call_instance_shutdown(source_node, instance,
4540                                              self.op.shutdown_timeout,
4541                                              self.op.reason)
4542     msg = result.fail_msg
4543     if msg:
4544       if self.op.ignore_consistency:
4545         self.LogWarning("Could not shutdown instance %s on node %s."
4546                         " Proceeding anyway. Please make sure node"
4547                         " %s is down. Error details: %s",
4548                         instance.name, source_node, source_node, msg)
4549       else:
4550         raise errors.OpExecError("Could not shutdown instance %s on"
4551                                  " node %s: %s" %
4552                                  (instance.name, source_node, msg))
4553
4554     # create the target disks
4555     try:
4556       _CreateDisks(self, instance, target_node=target_node)
4557     except errors.OpExecError:
4558       self.LogWarning("Device creation failed")
4559       self.cfg.ReleaseDRBDMinors(instance.name)
4560       raise
4561
4562     cluster_name = self.cfg.GetClusterInfo().cluster_name
4563
4564     errs = []
4565     # activate, get path, copy the data over
4566     for idx, disk in enumerate(instance.disks):
4567       self.LogInfo("Copying data for disk %d", idx)
4568       result = self.rpc.call_blockdev_assemble(target_node, (disk, instance),
4569                                                instance.name, True, idx)
4570       if result.fail_msg:
4571         self.LogWarning("Can't assemble newly created disk %d: %s",
4572                         idx, result.fail_msg)
4573         errs.append(result.fail_msg)
4574         break
4575       dev_path = result.payload
4576       result = self.rpc.call_blockdev_export(source_node, (disk, instance),
4577                                              target_node, dev_path,
4578                                              cluster_name)
4579       if result.fail_msg:
4580         self.LogWarning("Can't copy data over for disk %d: %s",
4581                         idx, result.fail_msg)
4582         errs.append(result.fail_msg)
4583         break
4584
4585     if errs:
4586       self.LogWarning("Some disks failed to copy, aborting")
4587       try:
4588         _RemoveDisks(self, instance, target_node=target_node)
4589       finally:
4590         self.cfg.ReleaseDRBDMinors(instance.name)
4591         raise errors.OpExecError("Errors during disk copy: %s" %
4592                                  (",".join(errs),))
4593
4594     instance.primary_node = target_node
4595     self.cfg.Update(instance, feedback_fn)
4596
4597     self.LogInfo("Removing the disks on the original node")
4598     _RemoveDisks(self, instance, target_node=source_node)
4599
4600     # Only start the instance if it's marked as up
4601     if instance.admin_state == constants.ADMINST_UP:
4602       self.LogInfo("Starting instance %s on node %s",
4603                    instance.name, target_node)
4604
4605       disks_ok, _ = _AssembleInstanceDisks(self, instance,
4606                                            ignore_secondaries=True)
4607       if not disks_ok:
4608         _ShutdownInstanceDisks(self, instance)
4609         raise errors.OpExecError("Can't activate the instance's disks")
4610
4611       result = self.rpc.call_instance_start(target_node,
4612                                             (instance, None, None), False,
4613                                              self.op.reason)
4614       msg = result.fail_msg
4615       if msg:
4616         _ShutdownInstanceDisks(self, instance)
4617         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
4618                                  (instance.name, target_node, msg))
4619
4620
4621 class LUNodeMigrate(LogicalUnit):
4622   """Migrate all instances from a node.
4623
4624   """
4625   HPATH = "node-migrate"
4626   HTYPE = constants.HTYPE_NODE
4627   REQ_BGL = False
4628
4629   def CheckArguments(self):
4630     pass
4631
4632   def ExpandNames(self):
4633     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4634
4635     self.share_locks = _ShareAll()
4636     self.needed_locks = {
4637       locking.LEVEL_NODE: [self.op.node_name],
4638       }
4639
4640   def BuildHooksEnv(self):
4641     """Build hooks env.
4642
4643     This runs on the master, the primary and all the secondaries.
4644
4645     """
4646     return {
4647       "NODE_NAME": self.op.node_name,
4648       "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
4649       }
4650
4651   def BuildHooksNodes(self):
4652     """Build hooks nodes.
4653
4654     """
4655     nl = [self.cfg.GetMasterNode()]
4656     return (nl, nl)
4657
4658   def CheckPrereq(self):
4659     pass
4660
4661   def Exec(self, feedback_fn):
4662     # Prepare jobs for migration instances
4663     allow_runtime_changes = self.op.allow_runtime_changes
4664     jobs = [
4665       [opcodes.OpInstanceMigrate(instance_name=inst.name,
4666                                  mode=self.op.mode,
4667                                  live=self.op.live,
4668                                  iallocator=self.op.iallocator,
4669                                  target_node=self.op.target_node,
4670                                  allow_runtime_changes=allow_runtime_changes,
4671                                  ignore_ipolicy=self.op.ignore_ipolicy)]
4672       for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)]
4673
4674     # TODO: Run iallocator in this opcode and pass correct placement options to
4675     # OpInstanceMigrate. Since other jobs can modify the cluster between
4676     # running the iallocator and the actual migration, a good consistency model
4677     # will have to be found.
4678
4679     assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
4680             frozenset([self.op.node_name]))
4681
4682     return ResultWithJobs(jobs)
4683
4684
4685 class TLMigrateInstance(Tasklet):
4686   """Tasklet class for instance migration.
4687
4688   @type live: boolean
4689   @ivar live: whether the migration will be done live or non-live;
4690       this variable is initalized only after CheckPrereq has run
4691   @type cleanup: boolean
4692   @ivar cleanup: Wheater we cleanup from a failed migration
4693   @type iallocator: string
4694   @ivar iallocator: The iallocator used to determine target_node
4695   @type target_node: string
4696   @ivar target_node: If given, the target_node to reallocate the instance to
4697   @type failover: boolean
4698   @ivar failover: Whether operation results in failover or migration
4699   @type fallback: boolean
4700   @ivar fallback: Whether fallback to failover is allowed if migration not
4701                   possible
4702   @type ignore_consistency: boolean
4703   @ivar ignore_consistency: Wheter we should ignore consistency between source
4704                             and target node
4705   @type shutdown_timeout: int
4706   @ivar shutdown_timeout: In case of failover timeout of the shutdown
4707   @type ignore_ipolicy: bool
4708   @ivar ignore_ipolicy: If true, we can ignore instance policy when migrating
4709
4710   """
4711
4712   # Constants
4713   _MIGRATION_POLL_INTERVAL = 1      # seconds
4714   _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
4715
4716   def __init__(self, lu, instance_name, cleanup, failover, fallback,
4717                ignore_consistency, allow_runtime_changes, shutdown_timeout,
4718                ignore_ipolicy):
4719     """Initializes this class.
4720
4721     """
4722     Tasklet.__init__(self, lu)
4723
4724     # Parameters
4725     self.instance_name = instance_name
4726     self.cleanup = cleanup
4727     self.live = False # will be overridden later
4728     self.failover = failover
4729     self.fallback = fallback
4730     self.ignore_consistency = ignore_consistency
4731     self.shutdown_timeout = shutdown_timeout
4732     self.ignore_ipolicy = ignore_ipolicy
4733     self.allow_runtime_changes = allow_runtime_changes
4734
4735   def CheckPrereq(self):
4736     """Check prerequisites.
4737
4738     This checks that the instance is in the cluster.
4739
4740     """
4741     instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
4742     instance = self.cfg.GetInstanceInfo(instance_name)
4743     assert instance is not None
4744     self.instance = instance
4745     cluster = self.cfg.GetClusterInfo()
4746
4747     if (not self.cleanup and
4748         not instance.admin_state == constants.ADMINST_UP and
4749         not self.failover and self.fallback):
4750       self.lu.LogInfo("Instance is marked down or offline, fallback allowed,"
4751                       " switching to failover")
4752       self.failover = True
4753
4754     if instance.disk_template not in constants.DTS_MIRRORED:
4755       if self.failover:
4756         text = "failovers"
4757       else:
4758         text = "migrations"
4759       raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
4760                                  " %s" % (instance.disk_template, text),
4761                                  errors.ECODE_STATE)
4762
4763     if instance.disk_template in constants.DTS_EXT_MIRROR:
4764       _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
4765
4766       if self.lu.op.iallocator:
4767         assert locking.NAL in self.lu.owned_locks(locking.LEVEL_NODE_ALLOC)
4768         self._RunAllocator()
4769       else:
4770         # We set set self.target_node as it is required by
4771         # BuildHooksEnv
4772         self.target_node = self.lu.op.target_node
4773
4774       # Check that the target node is correct in terms of instance policy
4775       nodeinfo = self.cfg.GetNodeInfo(self.target_node)
4776       group_info = self.cfg.GetNodeGroup(nodeinfo.group)
4777       ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
4778                                                               group_info)
4779       _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo, self.cfg,
4780                               ignore=self.ignore_ipolicy)
4781
4782       # self.target_node is already populated, either directly or by the
4783       # iallocator run
4784       target_node = self.target_node
4785       if self.target_node == instance.primary_node:
4786         raise errors.OpPrereqError("Cannot migrate instance %s"
4787                                    " to its primary (%s)" %
4788                                    (instance.name, instance.primary_node),
4789                                    errors.ECODE_STATE)
4790
4791       if len(self.lu.tasklets) == 1:
4792         # It is safe to release locks only when we're the only tasklet
4793         # in the LU
4794         _ReleaseLocks(self.lu, locking.LEVEL_NODE,
4795                       keep=[instance.primary_node, self.target_node])
4796         _ReleaseLocks(self.lu, locking.LEVEL_NODE_ALLOC)
4797
4798     else:
4799       assert not self.lu.glm.is_owned(locking.LEVEL_NODE_ALLOC)
4800
4801       secondary_nodes = instance.secondary_nodes
4802       if not secondary_nodes:
4803         raise errors.ConfigurationError("No secondary node but using"
4804                                         " %s disk template" %
4805                                         instance.disk_template)
4806       target_node = secondary_nodes[0]
4807       if self.lu.op.iallocator or (self.lu.op.target_node and
4808                                    self.lu.op.target_node != target_node):
4809         if self.failover:
4810           text = "failed over"
4811         else:
4812           text = "migrated"
4813         raise errors.OpPrereqError("Instances with disk template %s cannot"
4814                                    " be %s to arbitrary nodes"
4815                                    " (neither an iallocator nor a target"
4816                                    " node can be passed)" %
4817                                    (instance.disk_template, text),
4818                                    errors.ECODE_INVAL)
4819       nodeinfo = self.cfg.GetNodeInfo(target_node)
4820       group_info = self.cfg.GetNodeGroup(nodeinfo.group)
4821       ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
4822                                                               group_info)
4823       _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo, self.cfg,
4824                               ignore=self.ignore_ipolicy)
4825
4826     i_be = cluster.FillBE(instance)
4827
4828     # check memory requirements on the secondary node
4829     if (not self.cleanup and
4830          (not self.failover or instance.admin_state == constants.ADMINST_UP)):
4831       self.tgt_free_mem = _CheckNodeFreeMemory(self.lu, target_node,
4832                                                "migrating instance %s" %
4833                                                instance.name,
4834                                                i_be[constants.BE_MINMEM],
4835                                                instance.hypervisor)
4836     else:
4837       self.lu.LogInfo("Not checking memory on the secondary node as"
4838                       " instance will not be started")
4839
4840     # check if failover must be forced instead of migration
4841     if (not self.cleanup and not self.failover and
4842         i_be[constants.BE_ALWAYS_FAILOVER]):
4843       self.lu.LogInfo("Instance configured to always failover; fallback"
4844                       " to failover")
4845       self.failover = True
4846
4847     # check bridge existance
4848     _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
4849
4850     if not self.cleanup:
4851       _CheckNodeNotDrained(self.lu, target_node)
4852       if not self.failover:
4853         result = self.rpc.call_instance_migratable(instance.primary_node,
4854                                                    instance)
4855         if result.fail_msg and self.fallback:
4856           self.lu.LogInfo("Can't migrate, instance offline, fallback to"
4857                           " failover")
4858           self.failover = True
4859         else:
4860           result.Raise("Can't migrate, please use failover",
4861                        prereq=True, ecode=errors.ECODE_STATE)
4862
4863     assert not (self.failover and self.cleanup)
4864
4865     if not self.failover:
4866       if self.lu.op.live is not None and self.lu.op.mode is not None:
4867         raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
4868                                    " parameters are accepted",
4869                                    errors.ECODE_INVAL)
4870       if self.lu.op.live is not None:
4871         if self.lu.op.live:
4872           self.lu.op.mode = constants.HT_MIGRATION_LIVE
4873         else:
4874           self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
4875         # reset the 'live' parameter to None so that repeated
4876         # invocations of CheckPrereq do not raise an exception
4877         self.lu.op.live = None
4878       elif self.lu.op.mode is None:
4879         # read the default value from the hypervisor
4880         i_hv = cluster.FillHV(self.instance, skip_globals=False)
4881         self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
4882
4883       self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
4884     else:
4885       # Failover is never live
4886       self.live = False
4887
4888     if not (self.failover or self.cleanup):
4889       remote_info = self.rpc.call_instance_info(instance.primary_node,
4890                                                 instance.name,
4891                                                 instance.hypervisor)
4892       remote_info.Raise("Error checking instance on node %s" %
4893                         instance.primary_node)
4894       instance_running = bool(remote_info.payload)
4895       if instance_running:
4896         self.current_mem = int(remote_info.payload["memory"])
4897
4898   def _RunAllocator(self):
4899     """Run the allocator based on input opcode.
4900
4901     """
4902     assert locking.NAL in self.lu.owned_locks(locking.LEVEL_NODE_ALLOC)
4903
4904     # FIXME: add a self.ignore_ipolicy option
4905     req = iallocator.IAReqRelocate(name=self.instance_name,
4906                                    relocate_from=[self.instance.primary_node])
4907     ial = iallocator.IAllocator(self.cfg, self.rpc, req)
4908
4909     ial.Run(self.lu.op.iallocator)
4910
4911     if not ial.success:
4912       raise errors.OpPrereqError("Can't compute nodes using"
4913                                  " iallocator '%s': %s" %
4914                                  (self.lu.op.iallocator, ial.info),
4915                                  errors.ECODE_NORES)
4916     self.target_node = ial.result[0]
4917     self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
4918                     self.instance_name, self.lu.op.iallocator,
4919                     utils.CommaJoin(ial.result))
4920
4921   def _WaitUntilSync(self):
4922     """Poll with custom rpc for disk sync.
4923
4924     This uses our own step-based rpc call.
4925
4926     """
4927     self.feedback_fn("* wait until resync is done")
4928     all_done = False
4929     while not all_done:
4930       all_done = True
4931       result = self.rpc.call_drbd_wait_sync(self.all_nodes,
4932                                             self.nodes_ip,
4933                                             (self.instance.disks,
4934                                              self.instance))
4935       min_percent = 100
4936       for node, nres in result.items():
4937         nres.Raise("Cannot resync disks on node %s" % node)
4938         node_done, node_percent = nres.payload
4939         all_done = all_done and node_done
4940         if node_percent is not None:
4941           min_percent = min(min_percent, node_percent)
4942       if not all_done:
4943         if min_percent < 100:
4944           self.feedback_fn("   - progress: %.1f%%" % min_percent)
4945         time.sleep(2)
4946
4947   def _EnsureSecondary(self, node):
4948     """Demote a node to secondary.
4949
4950     """
4951     self.feedback_fn("* switching node %s to secondary mode" % node)
4952
4953     for dev in self.instance.disks:
4954       self.cfg.SetDiskID(dev, node)
4955
4956     result = self.rpc.call_blockdev_close(node, self.instance.name,
4957                                           self.instance.disks)
4958     result.Raise("Cannot change disk to secondary on node %s" % node)
4959
4960   def _GoStandalone(self):
4961     """Disconnect from the network.
4962
4963     """
4964     self.feedback_fn("* changing into standalone mode")
4965     result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
4966                                                self.instance.disks)
4967     for node, nres in result.items():
4968       nres.Raise("Cannot disconnect disks node %s" % node)
4969
4970   def _GoReconnect(self, multimaster):
4971     """Reconnect to the network.
4972
4973     """
4974     if multimaster:
4975       msg = "dual-master"
4976     else:
4977       msg = "single-master"
4978     self.feedback_fn("* changing disks into %s mode" % msg)
4979     result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
4980                                            (self.instance.disks, self.instance),
4981                                            self.instance.name, multimaster)
4982     for node, nres in result.items():
4983       nres.Raise("Cannot change disks config on node %s" % node)
4984
4985   def _ExecCleanup(self):
4986     """Try to cleanup after a failed migration.
4987
4988     The cleanup is done by:
4989       - check that the instance is running only on one node
4990         (and update the config if needed)
4991       - change disks on its secondary node to secondary
4992       - wait until disks are fully synchronized
4993       - disconnect from the network
4994       - change disks into single-master mode
4995       - wait again until disks are fully synchronized
4996
4997     """
4998     instance = self.instance
4999     target_node = self.target_node
5000     source_node = self.source_node
5001
5002     # check running on only one node
5003     self.feedback_fn("* checking where the instance actually runs"
5004                      " (if this hangs, the hypervisor might be in"
5005                      " a bad state)")
5006     ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
5007     for node, result in ins_l.items():
5008       result.Raise("Can't contact node %s" % node)
5009
5010     runningon_source = instance.name in ins_l[source_node].payload
5011     runningon_target = instance.name in ins_l[target_node].payload
5012
5013     if runningon_source and runningon_target:
5014       raise errors.OpExecError("Instance seems to be running on two nodes,"
5015                                " or the hypervisor is confused; you will have"
5016                                " to ensure manually that it runs only on one"
5017                                " and restart this operation")
5018
5019     if not (runningon_source or runningon_target):
5020       raise errors.OpExecError("Instance does not seem to be running at all;"
5021                                " in this case it's safer to repair by"
5022                                " running 'gnt-instance stop' to ensure disk"
5023                                " shutdown, and then restarting it")
5024
5025     if runningon_target:
5026       # the migration has actually succeeded, we need to update the config
5027       self.feedback_fn("* instance running on secondary node (%s),"
5028                        " updating config" % target_node)
5029       instance.primary_node = target_node
5030       self.cfg.Update(instance, self.feedback_fn)
5031       demoted_node = source_node
5032     else:
5033       self.feedback_fn("* instance confirmed to be running on its"
5034                        " primary node (%s)" % source_node)
5035       demoted_node = target_node
5036
5037     if instance.disk_template in constants.DTS_INT_MIRROR:
5038       self._EnsureSecondary(demoted_node)
5039       try:
5040         self._WaitUntilSync()
5041       except errors.OpExecError:
5042         # we ignore here errors, since if the device is standalone, it
5043         # won't be able to sync
5044         pass
5045       self._GoStandalone()
5046       self._GoReconnect(False)
5047       self._WaitUntilSync()
5048
5049     self.feedback_fn("* done")
5050
5051   def _RevertDiskStatus(self):
5052     """Try to revert the disk status after a failed migration.
5053
5054     """
5055     target_node = self.target_node
5056     if self.instance.disk_template in constants.DTS_EXT_MIRROR:
5057       return
5058
5059     try:
5060       self._EnsureSecondary(target_node)
5061       self._GoStandalone()
5062       self._GoReconnect(False)
5063       self._WaitUntilSync()
5064     except errors.OpExecError, err:
5065       self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
5066                          " please try to recover the instance manually;"
5067                          " error '%s'" % str(err))
5068
5069   def _AbortMigration(self):
5070     """Call the hypervisor code to abort a started migration.
5071
5072     """
5073     instance = self.instance
5074     target_node = self.target_node
5075     source_node = self.source_node
5076     migration_info = self.migration_info
5077
5078     abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
5079                                                                  instance,
5080                                                                  migration_info,
5081                                                                  False)
5082     abort_msg = abort_result.fail_msg
5083     if abort_msg:
5084       logging.error("Aborting migration failed on target node %s: %s",
5085                     target_node, abort_msg)
5086       # Don't raise an exception here, as we stil have to try to revert the
5087       # disk status, even if this step failed.
5088
5089     abort_result = self.rpc.call_instance_finalize_migration_src(
5090       source_node, instance, False, self.live)
5091     abort_msg = abort_result.fail_msg
5092     if abort_msg:
5093       logging.error("Aborting migration failed on source node %s: %s",
5094                     source_node, abort_msg)
5095
5096   def _ExecMigration(self):
5097     """Migrate an instance.
5098
5099     The migrate is done by:
5100       - change the disks into dual-master mode
5101       - wait until disks are fully synchronized again
5102       - migrate the instance
5103       - change disks on the new secondary node (the old primary) to secondary
5104       - wait until disks are fully synchronized
5105       - change disks into single-master mode
5106
5107     """
5108     instance = self.instance
5109     target_node = self.target_node
5110     source_node = self.source_node
5111
5112     # Check for hypervisor version mismatch and warn the user.
5113     nodeinfo = self.rpc.call_node_info([source_node, target_node],
5114                                        None, [self.instance.hypervisor], False)
5115     for ninfo in nodeinfo.values():
5116       ninfo.Raise("Unable to retrieve node information from node '%s'" %
5117                   ninfo.node)
5118     (_, _, (src_info, )) = nodeinfo[source_node].payload
5119     (_, _, (dst_info, )) = nodeinfo[target_node].payload
5120
5121     if ((constants.HV_NODEINFO_KEY_VERSION in src_info) and
5122         (constants.HV_NODEINFO_KEY_VERSION in dst_info)):
5123       src_version = src_info[constants.HV_NODEINFO_KEY_VERSION]
5124       dst_version = dst_info[constants.HV_NODEINFO_KEY_VERSION]
5125       if src_version != dst_version:
5126         self.feedback_fn("* warning: hypervisor version mismatch between"
5127                          " source (%s) and target (%s) node" %
5128                          (src_version, dst_version))
5129
5130     self.feedback_fn("* checking disk consistency between source and target")
5131     for (idx, dev) in enumerate(instance.disks):
5132       if not _CheckDiskConsistency(self.lu, instance, dev, target_node, False):
5133         raise errors.OpExecError("Disk %s is degraded or not fully"
5134                                  " synchronized on target node,"
5135                                  " aborting migration" % idx)
5136
5137     if self.current_mem > self.tgt_free_mem:
5138       if not self.allow_runtime_changes:
5139         raise errors.OpExecError("Memory ballooning not allowed and not enough"
5140                                  " free memory to fit instance %s on target"
5141                                  " node %s (have %dMB, need %dMB)" %
5142                                  (instance.name, target_node,
5143                                   self.tgt_free_mem, self.current_mem))
5144       self.feedback_fn("* setting instance memory to %s" % self.tgt_free_mem)
5145       rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
5146                                                      instance,
5147                                                      self.tgt_free_mem)
5148       rpcres.Raise("Cannot modify instance runtime memory")
5149
5150     # First get the migration information from the remote node
5151     result = self.rpc.call_migration_info(source_node, instance)
5152     msg = result.fail_msg
5153     if msg:
5154       log_err = ("Failed fetching source migration information from %s: %s" %
5155                  (source_node, msg))
5156       logging.error(log_err)
5157       raise errors.OpExecError(log_err)
5158
5159     self.migration_info = migration_info = result.payload
5160
5161     if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
5162       # Then switch the disks to master/master mode
5163       self._EnsureSecondary(target_node)
5164       self._GoStandalone()
5165       self._GoReconnect(True)
5166       self._WaitUntilSync()
5167
5168     self.feedback_fn("* preparing %s to accept the instance" % target_node)
5169     result = self.rpc.call_accept_instance(target_node,
5170                                            instance,
5171                                            migration_info,
5172                                            self.nodes_ip[target_node])
5173
5174     msg = result.fail_msg
5175     if msg:
5176       logging.error("Instance pre-migration failed, trying to revert"
5177                     " disk status: %s", msg)
5178       self.feedback_fn("Pre-migration failed, aborting")
5179       self._AbortMigration()
5180       self._RevertDiskStatus()
5181       raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
5182                                (instance.name, msg))
5183
5184     self.feedback_fn("* migrating instance to %s" % target_node)
5185     result = self.rpc.call_instance_migrate(source_node, instance,
5186                                             self.nodes_ip[target_node],
5187                                             self.live)
5188     msg = result.fail_msg
5189     if msg:
5190       logging.error("Instance migration failed, trying to revert"
5191                     " disk status: %s", msg)
5192       self.feedback_fn("Migration failed, aborting")
5193       self._AbortMigration()
5194       self._RevertDiskStatus()
5195       raise errors.OpExecError("Could not migrate instance %s: %s" %
5196                                (instance.name, msg))
5197
5198     self.feedback_fn("* starting memory transfer")
5199     last_feedback = time.time()
5200     while True:
5201       result = self.rpc.call_instance_get_migration_status(source_node,
5202                                                            instance)
5203       msg = result.fail_msg
5204       ms = result.payload   # MigrationStatus instance
5205       if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
5206         logging.error("Instance migration failed, trying to revert"
5207                       " disk status: %s", msg)
5208         self.feedback_fn("Migration failed, aborting")
5209         self._AbortMigration()
5210         self._RevertDiskStatus()
5211         if not msg:
5212           msg = "hypervisor returned failure"
5213         raise errors.OpExecError("Could not migrate instance %s: %s" %
5214                                  (instance.name, msg))
5215
5216       if result.payload.status != constants.HV_MIGRATION_ACTIVE:
5217         self.feedback_fn("* memory transfer complete")
5218         break
5219
5220       if (utils.TimeoutExpired(last_feedback,
5221                                self._MIGRATION_FEEDBACK_INTERVAL) and
5222           ms.transferred_ram is not None):
5223         mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
5224         self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
5225         last_feedback = time.time()
5226
5227       time.sleep(self._MIGRATION_POLL_INTERVAL)
5228
5229     result = self.rpc.call_instance_finalize_migration_src(source_node,
5230                                                            instance,
5231                                                            True,
5232                                                            self.live)
5233     msg = result.fail_msg
5234     if msg:
5235       logging.error("Instance migration succeeded, but finalization failed"
5236                     " on the source node: %s", msg)
5237       raise errors.OpExecError("Could not finalize instance migration: %s" %
5238                                msg)
5239
5240     instance.primary_node = target_node
5241
5242     # distribute new instance config to the other nodes
5243     self.cfg.Update(instance, self.feedback_fn)
5244
5245     result = self.rpc.call_instance_finalize_migration_dst(target_node,
5246                                                            instance,
5247                                                            migration_info,
5248                                                            True)
5249     msg = result.fail_msg
5250     if msg:
5251       logging.error("Instance migration succeeded, but finalization failed"
5252                     " on the target node: %s", msg)
5253       raise errors.OpExecError("Could not finalize instance migration: %s" %
5254                                msg)
5255
5256     if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
5257       self._EnsureSecondary(source_node)
5258       self._WaitUntilSync()
5259       self._GoStandalone()
5260       self._GoReconnect(False)
5261       self._WaitUntilSync()
5262
5263     # If the instance's disk template is `rbd' or `ext' and there was a
5264     # successful migration, unmap the device from the source node.
5265     if self.instance.disk_template in (constants.DT_RBD, constants.DT_EXT):
5266       disks = _ExpandCheckDisks(instance, instance.disks)
5267       self.feedback_fn("* unmapping instance's disks from %s" % source_node)
5268       for disk in disks:
5269         result = self.rpc.call_blockdev_shutdown(source_node, (disk, instance))
5270         msg = result.fail_msg
5271         if msg:
5272           logging.error("Migration was successful, but couldn't unmap the"
5273                         " block device %s on source node %s: %s",
5274                         disk.iv_name, source_node, msg)
5275           logging.error("You need to unmap the device %s manually on %s",
5276                         disk.iv_name, source_node)
5277
5278     self.feedback_fn("* done")
5279
5280   def _ExecFailover(self):
5281     """Failover an instance.
5282
5283     The failover is done by shutting it down on its present node and
5284     starting it on the secondary.
5285
5286     """
5287     instance = self.instance
5288     primary_node = self.cfg.GetNodeInfo(instance.primary_node)
5289
5290     source_node = instance.primary_node
5291     target_node = self.target_node
5292
5293     if instance.admin_state == constants.ADMINST_UP:
5294       self.feedback_fn("* checking disk consistency between source and target")
5295       for (idx, dev) in enumerate(instance.disks):
5296         # for drbd, these are drbd over lvm
5297         if not _CheckDiskConsistency(self.lu, instance, dev, target_node,
5298                                      False):
5299           if primary_node.offline:
5300             self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
5301                              " target node %s" %
5302                              (primary_node.name, idx, target_node))
5303           elif not self.ignore_consistency:
5304             raise errors.OpExecError("Disk %s is degraded on target node,"
5305                                      " aborting failover" % idx)
5306     else:
5307       self.feedback_fn("* not checking disk consistency as instance is not"
5308                        " running")
5309
5310     self.feedback_fn("* shutting down instance on source node")
5311     logging.info("Shutting down instance %s on node %s",
5312                  instance.name, source_node)
5313
5314     result = self.rpc.call_instance_shutdown(source_node, instance,
5315                                              self.shutdown_timeout,
5316                                              self.lu.op.reason)
5317     msg = result.fail_msg
5318     if msg:
5319       if self.ignore_consistency or primary_node.offline:
5320         self.lu.LogWarning("Could not shutdown instance %s on node %s,"
5321                            " proceeding anyway; please make sure node"
5322                            " %s is down; error details: %s",
5323                            instance.name, source_node, source_node, msg)
5324       else:
5325         raise errors.OpExecError("Could not shutdown instance %s on"
5326                                  " node %s: %s" %
5327                                  (instance.name, source_node, msg))
5328
5329     self.feedback_fn("* deactivating the instance's disks on source node")
5330     if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
5331       raise errors.OpExecError("Can't shut down the instance's disks")
5332
5333     instance.primary_node = target_node
5334     # distribute new instance config to the other nodes
5335     self.cfg.Update(instance, self.feedback_fn)
5336
5337     # Only start the instance if it's marked as up
5338     if instance.admin_state == constants.ADMINST_UP:
5339       self.feedback_fn("* activating the instance's disks on target node %s" %
5340                        target_node)
5341       logging.info("Starting instance %s on node %s",
5342                    instance.name, target_node)
5343
5344       disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
5345                                            ignore_secondaries=True)
5346       if not disks_ok:
5347         _ShutdownInstanceDisks(self.lu, instance)
5348         raise errors.OpExecError("Can't activate the instance's disks")
5349
5350       self.feedback_fn("* starting the instance on the target node %s" %
5351                        target_node)
5352       result = self.rpc.call_instance_start(target_node, (instance, None, None),
5353                                             False, self.lu.op.reason)
5354       msg = result.fail_msg
5355       if msg:
5356         _ShutdownInstanceDisks(self.lu, instance)
5357         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5358                                  (instance.name, target_node, msg))
5359
5360   def Exec(self, feedback_fn):
5361     """Perform the migration.
5362
5363     """
5364     self.feedback_fn = feedback_fn
5365     self.source_node = self.instance.primary_node
5366
5367     # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
5368     if self.instance.disk_template in constants.DTS_INT_MIRROR:
5369       self.target_node = self.instance.secondary_nodes[0]
5370       # Otherwise self.target_node has been populated either
5371       # directly, or through an iallocator.
5372
5373     self.all_nodes = [self.source_node, self.target_node]
5374     self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
5375                          in self.cfg.GetMultiNodeInfo(self.all_nodes))
5376
5377     if self.failover:
5378       feedback_fn("Failover instance %s" % self.instance.name)
5379       self._ExecFailover()
5380     else:
5381       feedback_fn("Migrating instance %s" % self.instance.name)
5382
5383       if self.cleanup:
5384         return self._ExecCleanup()
5385       else:
5386         return self._ExecMigration()
5387
5388
5389 def _CreateBlockDev(lu, node, instance, device, force_create, info,
5390                     force_open):
5391   """Wrapper around L{_CreateBlockDevInner}.
5392
5393   This method annotates the root device first.
5394
5395   """
5396   (disk,) = _AnnotateDiskParams(instance, [device], lu.cfg)
5397   excl_stor = _IsExclusiveStorageEnabledNodeName(lu.cfg, node)
5398   return _CreateBlockDevInner(lu, node, instance, disk, force_create, info,
5399                               force_open, excl_stor)
5400
5401
5402 def _CreateBlockDevInner(lu, node, instance, device, force_create,
5403                          info, force_open, excl_stor):
5404   """Create a tree of block devices on a given node.
5405
5406   If this device type has to be created on secondaries, create it and
5407   all its children.
5408
5409   If not, just recurse to children keeping the same 'force' value.
5410
5411   @attention: The device has to be annotated already.
5412
5413   @param lu: the lu on whose behalf we execute
5414   @param node: the node on which to create the device
5415   @type instance: L{objects.Instance}
5416   @param instance: the instance which owns the device
5417   @type device: L{objects.Disk}
5418   @param device: the device to create
5419   @type force_create: boolean
5420   @param force_create: whether to force creation of this device; this
5421       will be change to True whenever we find a device which has
5422       CreateOnSecondary() attribute
5423   @param info: the extra 'metadata' we should attach to the device
5424       (this will be represented as a LVM tag)
5425   @type force_open: boolean
5426   @param force_open: this parameter will be passes to the
5427       L{backend.BlockdevCreate} function where it specifies
5428       whether we run on primary or not, and it affects both
5429       the child assembly and the device own Open() execution
5430   @type excl_stor: boolean
5431   @param excl_stor: Whether exclusive_storage is active for the node
5432
5433   @return: list of created devices
5434   """
5435   created_devices = []
5436   try:
5437     if device.CreateOnSecondary():
5438       force_create = True
5439
5440     if device.children:
5441       for child in device.children:
5442         devs = _CreateBlockDevInner(lu, node, instance, child, force_create,
5443                                     info, force_open, excl_stor)
5444         created_devices.extend(devs)
5445
5446     if not force_create:
5447       return created_devices
5448
5449     _CreateSingleBlockDev(lu, node, instance, device, info, force_open,
5450                           excl_stor)
5451     # The device has been completely created, so there is no point in keeping
5452     # its subdevices in the list. We just add the device itself instead.
5453     created_devices = [(node, device)]
5454     return created_devices
5455
5456   except errors.DeviceCreationError, e:
5457     e.created_devices.extend(created_devices)
5458     raise e
5459   except errors.OpExecError, e:
5460     raise errors.DeviceCreationError(str(e), created_devices)
5461
5462
5463 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open,
5464                           excl_stor):
5465   """Create a single block device on a given node.
5466
5467   This will not recurse over children of the device, so they must be
5468   created in advance.
5469
5470   @param lu: the lu on whose behalf we execute
5471   @param node: the node on which to create the device
5472   @type instance: L{objects.Instance}
5473   @param instance: the instance which owns the device
5474   @type device: L{objects.Disk}
5475   @param device: the device to create
5476   @param info: the extra 'metadata' we should attach to the device
5477       (this will be represented as a LVM tag)
5478   @type force_open: boolean
5479   @param force_open: this parameter will be passes to the
5480       L{backend.BlockdevCreate} function where it specifies
5481       whether we run on primary or not, and it affects both
5482       the child assembly and the device own Open() execution
5483   @type excl_stor: boolean
5484   @param excl_stor: Whether exclusive_storage is active for the node
5485
5486   """
5487   lu.cfg.SetDiskID(device, node)
5488   result = lu.rpc.call_blockdev_create(node, device, device.size,
5489                                        instance.name, force_open, info,
5490                                        excl_stor)
5491   result.Raise("Can't create block device %s on"
5492                " node %s for instance %s" % (device, node, instance.name))
5493   if device.physical_id is None:
5494     device.physical_id = result.payload
5495
5496
5497 def _GenerateUniqueNames(lu, exts):
5498   """Generate a suitable LV name.
5499
5500   This will generate a logical volume name for the given instance.
5501
5502   """
5503   results = []
5504   for val in exts:
5505     new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
5506     results.append("%s%s" % (new_id, val))
5507   return results
5508
5509
5510 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
5511                          iv_name, p_minor, s_minor):
5512   """Generate a drbd8 device complete with its children.
5513
5514   """
5515   assert len(vgnames) == len(names) == 2
5516   port = lu.cfg.AllocatePort()
5517   shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
5518
5519   dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
5520                           logical_id=(vgnames[0], names[0]),
5521                           params={})
5522   dev_data.uuid = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
5523   dev_meta = objects.Disk(dev_type=constants.LD_LV,
5524                           size=constants.DRBD_META_SIZE,
5525                           logical_id=(vgnames[1], names[1]),
5526                           params={})
5527   dev_meta.uuid = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
5528   drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
5529                           logical_id=(primary, secondary, port,
5530                                       p_minor, s_minor,
5531                                       shared_secret),
5532                           children=[dev_data, dev_meta],
5533                           iv_name=iv_name, params={})
5534   drbd_dev.uuid = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
5535   return drbd_dev
5536
5537
5538 _DISK_TEMPLATE_NAME_PREFIX = {
5539   constants.DT_PLAIN: "",
5540   constants.DT_RBD: ".rbd",
5541   constants.DT_EXT: ".ext",
5542   }
5543
5544
5545 _DISK_TEMPLATE_DEVICE_TYPE = {
5546   constants.DT_PLAIN: constants.LD_LV,
5547   constants.DT_FILE: constants.LD_FILE,
5548   constants.DT_SHARED_FILE: constants.LD_FILE,
5549   constants.DT_BLOCK: constants.LD_BLOCKDEV,
5550   constants.DT_RBD: constants.LD_RBD,
5551   constants.DT_EXT: constants.LD_EXT,
5552   }
5553
5554
5555 def _GenerateDiskTemplate(
5556   lu, template_name, instance_name, primary_node, secondary_nodes,
5557   disk_info, file_storage_dir, file_driver, base_index,
5558   feedback_fn, full_disk_params, _req_file_storage=opcodes.RequireFileStorage,
5559   _req_shr_file_storage=opcodes.RequireSharedFileStorage):
5560   """Generate the entire disk layout for a given template type.
5561
5562   """
5563   vgname = lu.cfg.GetVGName()
5564   disk_count = len(disk_info)
5565   disks = []
5566
5567   if template_name == constants.DT_DISKLESS:
5568     pass
5569   elif template_name == constants.DT_DRBD8:
5570     if len(secondary_nodes) != 1:
5571       raise errors.ProgrammerError("Wrong template configuration")
5572     remote_node = secondary_nodes[0]
5573     minors = lu.cfg.AllocateDRBDMinor(
5574       [primary_node, remote_node] * len(disk_info), instance_name)
5575
5576     (drbd_params, _, _) = objects.Disk.ComputeLDParams(template_name,
5577                                                        full_disk_params)
5578     drbd_default_metavg = drbd_params[constants.LDP_DEFAULT_METAVG]
5579
5580     names = []
5581     for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
5582                                                for i in range(disk_count)]):
5583       names.append(lv_prefix + "_data")
5584       names.append(lv_prefix + "_meta")
5585     for idx, disk in enumerate(disk_info):
5586       disk_index = idx + base_index
5587       data_vg = disk.get(constants.IDISK_VG, vgname)
5588       meta_vg = disk.get(constants.IDISK_METAVG, drbd_default_metavg)
5589       disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
5590                                       disk[constants.IDISK_SIZE],
5591                                       [data_vg, meta_vg],
5592                                       names[idx * 2:idx * 2 + 2],
5593                                       "disk/%d" % disk_index,
5594                                       minors[idx * 2], minors[idx * 2 + 1])
5595       disk_dev.mode = disk[constants.IDISK_MODE]
5596       disk_dev.name = disk.get(constants.IDISK_NAME, None)
5597       disks.append(disk_dev)
5598   else:
5599     if secondary_nodes:
5600       raise errors.ProgrammerError("Wrong template configuration")
5601
5602     if template_name == constants.DT_FILE:
5603       _req_file_storage()
5604     elif template_name == constants.DT_SHARED_FILE:
5605       _req_shr_file_storage()
5606
5607     name_prefix = _DISK_TEMPLATE_NAME_PREFIX.get(template_name, None)
5608     if name_prefix is None:
5609       names = None
5610     else:
5611       names = _GenerateUniqueNames(lu, ["%s.disk%s" %
5612                                         (name_prefix, base_index + i)
5613                                         for i in range(disk_count)])
5614
5615     if template_name == constants.DT_PLAIN:
5616
5617       def logical_id_fn(idx, _, disk):
5618         vg = disk.get(constants.IDISK_VG, vgname)
5619         return (vg, names[idx])
5620
5621     elif template_name in (constants.DT_FILE, constants.DT_SHARED_FILE):
5622       logical_id_fn = \
5623         lambda _, disk_index, disk: (file_driver,
5624                                      "%s/disk%d" % (file_storage_dir,
5625                                                     disk_index))
5626     elif template_name == constants.DT_BLOCK:
5627       logical_id_fn = \
5628         lambda idx, disk_index, disk: (constants.BLOCKDEV_DRIVER_MANUAL,
5629                                        disk[constants.IDISK_ADOPT])
5630     elif template_name == constants.DT_RBD:
5631       logical_id_fn = lambda idx, _, disk: ("rbd", names[idx])
5632     elif template_name == constants.DT_EXT:
5633       def logical_id_fn(idx, _, disk):
5634         provider = disk.get(constants.IDISK_PROVIDER, None)
5635         if provider is None:
5636           raise errors.ProgrammerError("Disk template is %s, but '%s' is"
5637                                        " not found", constants.DT_EXT,
5638                                        constants.IDISK_PROVIDER)
5639         return (provider, names[idx])
5640     else:
5641       raise errors.ProgrammerError("Unknown disk template '%s'" % template_name)
5642
5643     dev_type = _DISK_TEMPLATE_DEVICE_TYPE[template_name]
5644
5645     for idx, disk in enumerate(disk_info):
5646       params = {}
5647       # Only for the Ext template add disk_info to params
5648       if template_name == constants.DT_EXT:
5649         params[constants.IDISK_PROVIDER] = disk[constants.IDISK_PROVIDER]
5650         for key in disk:
5651           if key not in constants.IDISK_PARAMS:
5652             params[key] = disk[key]
5653       disk_index = idx + base_index
5654       size = disk[constants.IDISK_SIZE]
5655       feedback_fn("* disk %s, size %s" %
5656                   (disk_index, utils.FormatUnit(size, "h")))
5657       disk_dev = objects.Disk(dev_type=dev_type, size=size,
5658                               logical_id=logical_id_fn(idx, disk_index, disk),
5659                               iv_name="disk/%d" % disk_index,
5660                               mode=disk[constants.IDISK_MODE],
5661                               params=params)
5662       disk_dev.name = disk.get(constants.IDISK_NAME, None)
5663       disk_dev.uuid = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
5664       disks.append(disk_dev)
5665
5666   return disks
5667
5668
5669 def _GetInstanceInfoText(instance):
5670   """Compute that text that should be added to the disk's metadata.
5671
5672   """
5673   return "originstname+%s" % instance.name
5674
5675
5676 def _CalcEta(time_taken, written, total_size):
5677   """Calculates the ETA based on size written and total size.
5678
5679   @param time_taken: The time taken so far
5680   @param written: amount written so far
5681   @param total_size: The total size of data to be written
5682   @return: The remaining time in seconds
5683
5684   """
5685   avg_time = time_taken / float(written)
5686   return (total_size - written) * avg_time
5687
5688
5689 def _WipeDisks(lu, instance, disks=None):
5690   """Wipes instance disks.
5691
5692   @type lu: L{LogicalUnit}
5693   @param lu: the logical unit on whose behalf we execute
5694   @type instance: L{objects.Instance}
5695   @param instance: the instance whose disks we should create
5696   @type disks: None or list of tuple of (number, L{objects.Disk}, number)
5697   @param disks: Disk details; tuple contains disk index, disk object and the
5698     start offset
5699
5700   """
5701   node = instance.primary_node
5702
5703   if disks is None:
5704     disks = [(idx, disk, 0)
5705              for (idx, disk) in enumerate(instance.disks)]
5706
5707   for (_, device, _) in disks:
5708     lu.cfg.SetDiskID(device, node)
5709
5710   logging.info("Pausing synchronization of disks of instance '%s'",
5711                instance.name)
5712   result = lu.rpc.call_blockdev_pause_resume_sync(node,
5713                                                   (map(compat.snd, disks),
5714                                                    instance),
5715                                                   True)
5716   result.Raise("Failed to pause disk synchronization on node '%s'" % node)
5717
5718   for idx, success in enumerate(result.payload):
5719     if not success:
5720       logging.warn("Pausing synchronization of disk %s of instance '%s'"
5721                    " failed", idx, instance.name)
5722
5723   try:
5724     for (idx, device, offset) in disks:
5725       # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
5726       # MAX_WIPE_CHUNK at max. Truncating to integer to avoid rounding errors.
5727       wipe_chunk_size = \
5728         int(min(constants.MAX_WIPE_CHUNK,
5729                 device.size / 100.0 * constants.MIN_WIPE_CHUNK_PERCENT))
5730
5731       size = device.size
5732       last_output = 0
5733       start_time = time.time()
5734
5735       if offset == 0:
5736         info_text = ""
5737       else:
5738         info_text = (" (from %s to %s)" %
5739                      (utils.FormatUnit(offset, "h"),
5740                       utils.FormatUnit(size, "h")))
5741
5742       lu.LogInfo("* Wiping disk %s%s", idx, info_text)
5743
5744       logging.info("Wiping disk %d for instance %s on node %s using"
5745                    " chunk size %s", idx, instance.name, node, wipe_chunk_size)
5746
5747       while offset < size:
5748         wipe_size = min(wipe_chunk_size, size - offset)
5749
5750         logging.debug("Wiping disk %d, offset %s, chunk %s",
5751                       idx, offset, wipe_size)
5752
5753         result = lu.rpc.call_blockdev_wipe(node, (device, instance), offset,
5754                                            wipe_size)
5755         result.Raise("Could not wipe disk %d at offset %d for size %d" %
5756                      (idx, offset, wipe_size))
5757
5758         now = time.time()
5759         offset += wipe_size
5760         if now - last_output >= 60:
5761           eta = _CalcEta(now - start_time, offset, size)
5762           lu.LogInfo(" - done: %.1f%% ETA: %s",
5763                      offset / float(size) * 100, utils.FormatSeconds(eta))
5764           last_output = now
5765   finally:
5766     logging.info("Resuming synchronization of disks for instance '%s'",
5767                  instance.name)
5768
5769     result = lu.rpc.call_blockdev_pause_resume_sync(node,
5770                                                     (map(compat.snd, disks),
5771                                                      instance),
5772                                                     False)
5773
5774     if result.fail_msg:
5775       lu.LogWarning("Failed to resume disk synchronization on node '%s': %s",
5776                     node, result.fail_msg)
5777     else:
5778       for idx, success in enumerate(result.payload):
5779         if not success:
5780           lu.LogWarning("Resuming synchronization of disk %s of instance '%s'"
5781                         " failed", idx, instance.name)
5782
5783
5784 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
5785   """Create all disks for an instance.
5786
5787   This abstracts away some work from AddInstance.
5788
5789   @type lu: L{LogicalUnit}
5790   @param lu: the logical unit on whose behalf we execute
5791   @type instance: L{objects.Instance}
5792   @param instance: the instance whose disks we should create
5793   @type to_skip: list
5794   @param to_skip: list of indices to skip
5795   @type target_node: string
5796   @param target_node: if passed, overrides the target node for creation
5797   @rtype: boolean
5798   @return: the success of the creation
5799
5800   """
5801   info = _GetInstanceInfoText(instance)
5802   if target_node is None:
5803     pnode = instance.primary_node
5804     all_nodes = instance.all_nodes
5805   else:
5806     pnode = target_node
5807     all_nodes = [pnode]
5808
5809   if instance.disk_template in constants.DTS_FILEBASED:
5810     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
5811     result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
5812
5813     result.Raise("Failed to create directory '%s' on"
5814                  " node %s" % (file_storage_dir, pnode))
5815
5816   disks_created = []
5817   # Note: this needs to be kept in sync with adding of disks in
5818   # LUInstanceSetParams
5819   for idx, device in enumerate(instance.disks):
5820     if to_skip and idx in to_skip:
5821       continue
5822     logging.info("Creating disk %s for instance '%s'", idx, instance.name)
5823     #HARDCODE
5824     for node in all_nodes:
5825       f_create = node == pnode
5826       try:
5827         _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
5828         disks_created.append((node, device))
5829       except errors.OpExecError:
5830         logging.warning("Creating disk %s for instance '%s' failed",
5831                         idx, instance.name)
5832       except errors.DeviceCreationError, e:
5833         logging.warning("Creating disk %s for instance '%s' failed",
5834                         idx, instance.name)
5835         disks_created.extend(e.created_devices)
5836         for (node, disk) in disks_created:
5837           lu.cfg.SetDiskID(disk, node)
5838           result = lu.rpc.call_blockdev_remove(node, disk)
5839           if result.fail_msg:
5840             logging.warning("Failed to remove newly-created disk %s on node %s:"
5841                             " %s", device, node, result.fail_msg)
5842         raise errors.OpExecError(e.message)
5843
5844
5845 def _RemoveDisks(lu, instance, target_node=None, ignore_failures=False):
5846   """Remove all disks for an instance.
5847
5848   This abstracts away some work from `AddInstance()` and
5849   `RemoveInstance()`. Note that in case some of the devices couldn't
5850   be removed, the removal will continue with the other ones.
5851
5852   @type lu: L{LogicalUnit}
5853   @param lu: the logical unit on whose behalf we execute
5854   @type instance: L{objects.Instance}
5855   @param instance: the instance whose disks we should remove
5856   @type target_node: string
5857   @param target_node: used to override the node on which to remove the disks
5858   @rtype: boolean
5859   @return: the success of the removal
5860
5861   """
5862   logging.info("Removing block devices for instance %s", instance.name)
5863
5864   all_result = True
5865   ports_to_release = set()
5866   anno_disks = _AnnotateDiskParams(instance, instance.disks, lu.cfg)
5867   for (idx, device) in enumerate(anno_disks):
5868     if target_node:
5869       edata = [(target_node, device)]
5870     else:
5871       edata = device.ComputeNodeTree(instance.primary_node)
5872     for node, disk in edata:
5873       lu.cfg.SetDiskID(disk, node)
5874       result = lu.rpc.call_blockdev_remove(node, disk)
5875       if result.fail_msg:
5876         lu.LogWarning("Could not remove disk %s on node %s,"
5877                       " continuing anyway: %s", idx, node, result.fail_msg)
5878         if not (result.offline and node != instance.primary_node):
5879           all_result = False
5880
5881     # if this is a DRBD disk, return its port to the pool
5882     if device.dev_type in constants.LDS_DRBD:
5883       ports_to_release.add(device.logical_id[2])
5884
5885   if all_result or ignore_failures:
5886     for port in ports_to_release:
5887       lu.cfg.AddTcpUdpPort(port)
5888
5889   if instance.disk_template in constants.DTS_FILEBASED:
5890     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
5891     if target_node:
5892       tgt = target_node
5893     else:
5894       tgt = instance.primary_node
5895     result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
5896     if result.fail_msg:
5897       lu.LogWarning("Could not remove directory '%s' on node %s: %s",
5898                     file_storage_dir, instance.primary_node, result.fail_msg)
5899       all_result = False
5900
5901   return all_result
5902
5903
5904 def _ComputeDiskSizePerVG(disk_template, disks):
5905   """Compute disk size requirements in the volume group
5906
5907   """
5908   def _compute(disks, payload):
5909     """Universal algorithm.
5910
5911     """
5912     vgs = {}
5913     for disk in disks:
5914       vgs[disk[constants.IDISK_VG]] = \
5915         vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
5916
5917     return vgs
5918
5919   # Required free disk space as a function of disk and swap space
5920   req_size_dict = {
5921     constants.DT_DISKLESS: {},
5922     constants.DT_PLAIN: _compute(disks, 0),
5923     # 128 MB are added for drbd metadata for each disk
5924     constants.DT_DRBD8: _compute(disks, constants.DRBD_META_SIZE),
5925     constants.DT_FILE: {},
5926     constants.DT_SHARED_FILE: {},
5927   }
5928
5929   if disk_template not in req_size_dict:
5930     raise errors.ProgrammerError("Disk template '%s' size requirement"
5931                                  " is unknown" % disk_template)
5932
5933   return req_size_dict[disk_template]
5934
5935
5936 def _CreateInstanceAllocRequest(op, disks, nics, beparams, node_whitelist):
5937   """Wrapper around IAReqInstanceAlloc.
5938
5939   @param op: The instance opcode
5940   @param disks: The computed disks
5941   @param nics: The computed nics
5942   @param beparams: The full filled beparams
5943   @param node_whitelist: List of nodes which should appear as online to the
5944     allocator (unless the node is already marked offline)
5945
5946   @returns: A filled L{iallocator.IAReqInstanceAlloc}
5947
5948   """
5949   spindle_use = beparams[constants.BE_SPINDLE_USE]
5950   return iallocator.IAReqInstanceAlloc(name=op.instance_name,
5951                                        disk_template=op.disk_template,
5952                                        tags=op.tags,
5953                                        os=op.os_type,
5954                                        vcpus=beparams[constants.BE_VCPUS],
5955                                        memory=beparams[constants.BE_MAXMEM],
5956                                        spindle_use=spindle_use,
5957                                        disks=disks,
5958                                        nics=[n.ToDict() for n in nics],
5959                                        hypervisor=op.hypervisor,
5960                                        node_whitelist=node_whitelist)
5961
5962
5963 def _ComputeNics(op, cluster, default_ip, cfg, ec_id):
5964   """Computes the nics.
5965
5966   @param op: The instance opcode
5967   @param cluster: Cluster configuration object
5968   @param default_ip: The default ip to assign
5969   @param cfg: An instance of the configuration object
5970   @param ec_id: Execution context ID
5971
5972   @returns: The build up nics
5973
5974   """
5975   nics = []
5976   for nic in op.nics:
5977     nic_mode_req = nic.get(constants.INIC_MODE, None)
5978     nic_mode = nic_mode_req
5979     if nic_mode is None or nic_mode == constants.VALUE_AUTO:
5980       nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
5981
5982     net = nic.get(constants.INIC_NETWORK, None)
5983     link = nic.get(constants.NIC_LINK, None)
5984     ip = nic.get(constants.INIC_IP, None)
5985
5986     if net is None or net.lower() == constants.VALUE_NONE:
5987       net = None
5988     else:
5989       if nic_mode_req is not None or link is not None:
5990         raise errors.OpPrereqError("If network is given, no mode or link"
5991                                    " is allowed to be passed",
5992                                    errors.ECODE_INVAL)
5993
5994     # ip validity checks
5995     if ip is None or ip.lower() == constants.VALUE_NONE:
5996       nic_ip = None
5997     elif ip.lower() == constants.VALUE_AUTO:
5998       if not op.name_check:
5999         raise errors.OpPrereqError("IP address set to auto but name checks"
6000                                    " have been skipped",
6001                                    errors.ECODE_INVAL)
6002       nic_ip = default_ip
6003     else:
6004       # We defer pool operations until later, so that the iallocator has
6005       # filled in the instance's node(s) dimara
6006       if ip.lower() == constants.NIC_IP_POOL:
6007         if net is None:
6008           raise errors.OpPrereqError("if ip=pool, parameter network"
6009                                      " must be passed too",
6010                                      errors.ECODE_INVAL)
6011
6012       elif not netutils.IPAddress.IsValid(ip):
6013         raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
6014                                    errors.ECODE_INVAL)
6015
6016       nic_ip = ip
6017
6018     # TODO: check the ip address for uniqueness
6019     if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
6020       raise errors.OpPrereqError("Routed nic mode requires an ip address",
6021                                  errors.ECODE_INVAL)
6022
6023     # MAC address verification
6024     mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
6025     if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
6026       mac = utils.NormalizeAndValidateMac(mac)
6027
6028       try:
6029         # TODO: We need to factor this out
6030         cfg.ReserveMAC(mac, ec_id)
6031       except errors.ReservationError:
6032         raise errors.OpPrereqError("MAC address %s already in use"
6033                                    " in cluster" % mac,
6034                                    errors.ECODE_NOTUNIQUE)
6035
6036     #  Build nic parameters
6037     nicparams = {}
6038     if nic_mode_req:
6039       nicparams[constants.NIC_MODE] = nic_mode
6040     if link:
6041       nicparams[constants.NIC_LINK] = link
6042
6043     check_params = cluster.SimpleFillNIC(nicparams)
6044     objects.NIC.CheckParameterSyntax(check_params)
6045     net_uuid = cfg.LookupNetwork(net)
6046     name = nic.get(constants.INIC_NAME, None)
6047     if name is not None and name.lower() == constants.VALUE_NONE:
6048       name = None
6049     nic_obj = objects.NIC(mac=mac, ip=nic_ip, name=name,
6050                           network=net_uuid, nicparams=nicparams)
6051     nic_obj.uuid = cfg.GenerateUniqueID(ec_id)
6052     nics.append(nic_obj)
6053
6054   return nics
6055
6056
6057 def _ComputeDisks(op, default_vg):
6058   """Computes the instance disks.
6059
6060   @param op: The instance opcode
6061   @param default_vg: The default_vg to assume
6062
6063   @return: The computed disks
6064
6065   """
6066   disks = []
6067   for disk in op.disks:
6068     mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
6069     if mode not in constants.DISK_ACCESS_SET:
6070       raise errors.OpPrereqError("Invalid disk access mode '%s'" %
6071                                  mode, errors.ECODE_INVAL)
6072     size = disk.get(constants.IDISK_SIZE, None)
6073     if size is None:
6074       raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
6075     try:
6076       size = int(size)
6077     except (TypeError, ValueError):
6078       raise errors.OpPrereqError("Invalid disk size '%s'" % size,
6079                                  errors.ECODE_INVAL)
6080
6081     ext_provider = disk.get(constants.IDISK_PROVIDER, None)
6082     if ext_provider and op.disk_template != constants.DT_EXT:
6083       raise errors.OpPrereqError("The '%s' option is only valid for the %s"
6084                                  " disk template, not %s" %
6085                                  (constants.IDISK_PROVIDER, constants.DT_EXT,
6086                                  op.disk_template), errors.ECODE_INVAL)
6087
6088     data_vg = disk.get(constants.IDISK_VG, default_vg)
6089     name = disk.get(constants.IDISK_NAME, None)
6090     if name is not None and name.lower() == constants.VALUE_NONE:
6091       name = None
6092     new_disk = {
6093       constants.IDISK_SIZE: size,
6094       constants.IDISK_MODE: mode,
6095       constants.IDISK_VG: data_vg,
6096       constants.IDISK_NAME: name,
6097       }
6098
6099     if constants.IDISK_METAVG in disk:
6100       new_disk[constants.IDISK_METAVG] = disk[constants.IDISK_METAVG]
6101     if constants.IDISK_ADOPT in disk:
6102       new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
6103
6104     # For extstorage, demand the `provider' option and add any
6105     # additional parameters (ext-params) to the dict
6106     if op.disk_template == constants.DT_EXT:
6107       if ext_provider:
6108         new_disk[constants.IDISK_PROVIDER] = ext_provider
6109         for key in disk:
6110           if key not in constants.IDISK_PARAMS:
6111             new_disk[key] = disk[key]
6112       else:
6113         raise errors.OpPrereqError("Missing provider for template '%s'" %
6114                                    constants.DT_EXT, errors.ECODE_INVAL)
6115
6116     disks.append(new_disk)
6117
6118   return disks
6119
6120
6121 def _ComputeFullBeParams(op, cluster):
6122   """Computes the full beparams.
6123
6124   @param op: The instance opcode
6125   @param cluster: The cluster config object
6126
6127   @return: The fully filled beparams
6128
6129   """
6130   default_beparams = cluster.beparams[constants.PP_DEFAULT]
6131   for param, value in op.beparams.iteritems():
6132     if value == constants.VALUE_AUTO:
6133       op.beparams[param] = default_beparams[param]
6134   objects.UpgradeBeParams(op.beparams)
6135   utils.ForceDictType(op.beparams, constants.BES_PARAMETER_TYPES)
6136   return cluster.SimpleFillBE(op.beparams)
6137
6138
6139 def _CheckOpportunisticLocking(op):
6140   """Generate error if opportunistic locking is not possible.
6141
6142   """
6143   if op.opportunistic_locking and not op.iallocator:
6144     raise errors.OpPrereqError("Opportunistic locking is only available in"
6145                                " combination with an instance allocator",
6146                                errors.ECODE_INVAL)
6147
6148
6149 class LUInstanceCreate(LogicalUnit):
6150   """Create an instance.
6151
6152   """
6153   HPATH = "instance-add"
6154   HTYPE = constants.HTYPE_INSTANCE
6155   REQ_BGL = False
6156
6157   def CheckArguments(self):
6158     """Check arguments.
6159
6160     """
6161     # do not require name_check to ease forward/backward compatibility
6162     # for tools
6163     if self.op.no_install and self.op.start:
6164       self.LogInfo("No-installation mode selected, disabling startup")
6165       self.op.start = False
6166     # validate/normalize the instance name
6167     self.op.instance_name = \
6168       netutils.Hostname.GetNormalizedName(self.op.instance_name)
6169
6170     if self.op.ip_check and not self.op.name_check:
6171       # TODO: make the ip check more flexible and not depend on the name check
6172       raise errors.OpPrereqError("Cannot do IP address check without a name"
6173                                  " check", errors.ECODE_INVAL)
6174
6175     # check nics' parameter names
6176     for nic in self.op.nics:
6177       utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
6178     # check that NIC's parameters names are unique and valid
6179     utils.ValidateDeviceNames("NIC", self.op.nics)
6180
6181     # check that disk's names are unique and valid
6182     utils.ValidateDeviceNames("disk", self.op.disks)
6183
6184     cluster = self.cfg.GetClusterInfo()
6185     if not self.op.disk_template in cluster.enabled_disk_templates:
6186       raise errors.OpPrereqError("Cannot create an instance with disk template"
6187                                  " '%s', because it is not enabled in the"
6188                                  " cluster. Enabled disk templates are: %s." %
6189                                  (self.op.disk_template,
6190                                   ",".join(cluster.enabled_disk_templates)))
6191
6192     # check disks. parameter names and consistent adopt/no-adopt strategy
6193     has_adopt = has_no_adopt = False
6194     for disk in self.op.disks:
6195       if self.op.disk_template != constants.DT_EXT:
6196         utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
6197       if constants.IDISK_ADOPT in disk:
6198         has_adopt = True
6199       else:
6200         has_no_adopt = True
6201     if has_adopt and has_no_adopt:
6202       raise errors.OpPrereqError("Either all disks are adopted or none is",
6203                                  errors.ECODE_INVAL)
6204     if has_adopt:
6205       if self.op.disk_template not in constants.DTS_MAY_ADOPT:
6206         raise errors.OpPrereqError("Disk adoption is not supported for the"
6207                                    " '%s' disk template" %
6208                                    self.op.disk_template,
6209                                    errors.ECODE_INVAL)
6210       if self.op.iallocator is not None:
6211         raise errors.OpPrereqError("Disk adoption not allowed with an"
6212                                    " iallocator script", errors.ECODE_INVAL)
6213       if self.op.mode == constants.INSTANCE_IMPORT:
6214         raise errors.OpPrereqError("Disk adoption not allowed for"
6215                                    " instance import", errors.ECODE_INVAL)
6216     else:
6217       if self.op.disk_template in constants.DTS_MUST_ADOPT:
6218         raise errors.OpPrereqError("Disk template %s requires disk adoption,"
6219                                    " but no 'adopt' parameter given" %
6220                                    self.op.disk_template,
6221                                    errors.ECODE_INVAL)
6222
6223     self.adopt_disks = has_adopt
6224
6225     # instance name verification
6226     if self.op.name_check:
6227       self.hostname1 = _CheckHostnameSane(self, self.op.instance_name)
6228       self.op.instance_name = self.hostname1.name
6229       # used in CheckPrereq for ip ping check
6230       self.check_ip = self.hostname1.ip
6231     else:
6232       self.check_ip = None
6233
6234     # file storage checks
6235     if (self.op.file_driver and
6236         not self.op.file_driver in constants.FILE_DRIVER):
6237       raise errors.OpPrereqError("Invalid file driver name '%s'" %
6238                                  self.op.file_driver, errors.ECODE_INVAL)
6239
6240     if self.op.disk_template == constants.DT_FILE:
6241       opcodes.RequireFileStorage()
6242     elif self.op.disk_template == constants.DT_SHARED_FILE:
6243       opcodes.RequireSharedFileStorage()
6244
6245     ### Node/iallocator related checks
6246     _CheckIAllocatorOrNode(self, "iallocator", "pnode")
6247
6248     if self.op.pnode is not None:
6249       if self.op.disk_template in constants.DTS_INT_MIRROR:
6250         if self.op.snode is None:
6251           raise errors.OpPrereqError("The networked disk templates need"
6252                                      " a mirror node", errors.ECODE_INVAL)
6253       elif self.op.snode:
6254         self.LogWarning("Secondary node will be ignored on non-mirrored disk"
6255                         " template")
6256         self.op.snode = None
6257
6258     _CheckOpportunisticLocking(self.op)
6259
6260     self._cds = _GetClusterDomainSecret()
6261
6262     if self.op.mode == constants.INSTANCE_IMPORT:
6263       # On import force_variant must be True, because if we forced it at
6264       # initial install, our only chance when importing it back is that it
6265       # works again!
6266       self.op.force_variant = True
6267
6268       if self.op.no_install:
6269         self.LogInfo("No-installation mode has no effect during import")
6270
6271     elif self.op.mode == constants.INSTANCE_CREATE:
6272       if self.op.os_type is None:
6273         raise errors.OpPrereqError("No guest OS specified",
6274                                    errors.ECODE_INVAL)
6275       if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
6276         raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
6277                                    " installation" % self.op.os_type,
6278                                    errors.ECODE_STATE)
6279       if self.op.disk_template is None:
6280         raise errors.OpPrereqError("No disk template specified",
6281                                    errors.ECODE_INVAL)
6282
6283     elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
6284       # Check handshake to ensure both clusters have the same domain secret
6285       src_handshake = self.op.source_handshake
6286       if not src_handshake:
6287         raise errors.OpPrereqError("Missing source handshake",
6288                                    errors.ECODE_INVAL)
6289
6290       errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
6291                                                            src_handshake)
6292       if errmsg:
6293         raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
6294                                    errors.ECODE_INVAL)
6295
6296       # Load and check source CA
6297       self.source_x509_ca_pem = self.op.source_x509_ca
6298       if not self.source_x509_ca_pem:
6299         raise errors.OpPrereqError("Missing source X509 CA",
6300                                    errors.ECODE_INVAL)
6301
6302       try:
6303         (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
6304                                                     self._cds)
6305       except OpenSSL.crypto.Error, err:
6306         raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
6307                                    (err, ), errors.ECODE_INVAL)
6308
6309       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
6310       if errcode is not None:
6311         raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
6312                                    errors.ECODE_INVAL)
6313
6314       self.source_x509_ca = cert
6315
6316       src_instance_name = self.op.source_instance_name
6317       if not src_instance_name:
6318         raise errors.OpPrereqError("Missing source instance name",
6319                                    errors.ECODE_INVAL)
6320
6321       self.source_instance_name = \
6322           netutils.GetHostname(name=src_instance_name).name
6323
6324     else:
6325       raise errors.OpPrereqError("Invalid instance creation mode %r" %
6326                                  self.op.mode, errors.ECODE_INVAL)
6327
6328   def ExpandNames(self):
6329     """ExpandNames for CreateInstance.
6330
6331     Figure out the right locks for instance creation.
6332
6333     """
6334     self.needed_locks = {}
6335
6336     instance_name = self.op.instance_name
6337     # this is just a preventive check, but someone might still add this
6338     # instance in the meantime, and creation will fail at lock-add time
6339     if instance_name in self.cfg.GetInstanceList():
6340       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6341                                  instance_name, errors.ECODE_EXISTS)
6342
6343     self.add_locks[locking.LEVEL_INSTANCE] = instance_name
6344
6345     if self.op.iallocator:
6346       # TODO: Find a solution to not lock all nodes in the cluster, e.g. by
6347       # specifying a group on instance creation and then selecting nodes from
6348       # that group
6349       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6350       self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
6351
6352       if self.op.opportunistic_locking:
6353         self.opportunistic_locks[locking.LEVEL_NODE] = True
6354         self.opportunistic_locks[locking.LEVEL_NODE_RES] = True
6355     else:
6356       self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
6357       nodelist = [self.op.pnode]
6358       if self.op.snode is not None:
6359         self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
6360         nodelist.append(self.op.snode)
6361       self.needed_locks[locking.LEVEL_NODE] = nodelist
6362
6363     # in case of import lock the source node too
6364     if self.op.mode == constants.INSTANCE_IMPORT:
6365       src_node = self.op.src_node
6366       src_path = self.op.src_path
6367
6368       if src_path is None:
6369         self.op.src_path = src_path = self.op.instance_name
6370
6371       if src_node is None:
6372         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6373         self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
6374         self.op.src_node = None
6375         if os.path.isabs(src_path):
6376           raise errors.OpPrereqError("Importing an instance from a path"
6377                                      " requires a source node option",
6378                                      errors.ECODE_INVAL)
6379       else:
6380         self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
6381         if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
6382           self.needed_locks[locking.LEVEL_NODE].append(src_node)
6383         if not os.path.isabs(src_path):
6384           self.op.src_path = src_path = \
6385             utils.PathJoin(pathutils.EXPORT_DIR, src_path)
6386
6387     self.needed_locks[locking.LEVEL_NODE_RES] = \
6388       _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
6389
6390   def _RunAllocator(self):
6391     """Run the allocator based on input opcode.
6392
6393     """
6394     if self.op.opportunistic_locking:
6395       # Only consider nodes for which a lock is held
6396       node_whitelist = list(self.owned_locks(locking.LEVEL_NODE))
6397     else:
6398       node_whitelist = None
6399
6400     #TODO Export network to iallocator so that it chooses a pnode
6401     #     in a nodegroup that has the desired network connected to
6402     req = _CreateInstanceAllocRequest(self.op, self.disks,
6403                                       self.nics, self.be_full,
6404                                       node_whitelist)
6405     ial = iallocator.IAllocator(self.cfg, self.rpc, req)
6406
6407     ial.Run(self.op.iallocator)
6408
6409     if not ial.success:
6410       # When opportunistic locks are used only a temporary failure is generated
6411       if self.op.opportunistic_locking:
6412         ecode = errors.ECODE_TEMP_NORES
6413       else:
6414         ecode = errors.ECODE_NORES
6415
6416       raise errors.OpPrereqError("Can't compute nodes using"
6417                                  " iallocator '%s': %s" %
6418                                  (self.op.iallocator, ial.info),
6419                                  ecode)
6420
6421     self.op.pnode = ial.result[0]
6422     self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
6423                  self.op.instance_name, self.op.iallocator,
6424                  utils.CommaJoin(ial.result))
6425
6426     assert req.RequiredNodes() in (1, 2), "Wrong node count from iallocator"
6427
6428     if req.RequiredNodes() == 2:
6429       self.op.snode = ial.result[1]
6430
6431   def BuildHooksEnv(self):
6432     """Build hooks env.
6433
6434     This runs on master, primary and secondary nodes of the instance.
6435
6436     """
6437     env = {
6438       "ADD_MODE": self.op.mode,
6439       }
6440     if self.op.mode == constants.INSTANCE_IMPORT:
6441       env["SRC_NODE"] = self.op.src_node
6442       env["SRC_PATH"] = self.op.src_path
6443       env["SRC_IMAGES"] = self.src_images
6444
6445     env.update(_BuildInstanceHookEnv(
6446       name=self.op.instance_name,
6447       primary_node=self.op.pnode,
6448       secondary_nodes=self.secondaries,
6449       status=self.op.start,
6450       os_type=self.op.os_type,
6451       minmem=self.be_full[constants.BE_MINMEM],
6452       maxmem=self.be_full[constants.BE_MAXMEM],
6453       vcpus=self.be_full[constants.BE_VCPUS],
6454       nics=_NICListToTuple(self, self.nics),
6455       disk_template=self.op.disk_template,
6456       disks=[(d[constants.IDISK_NAME], d[constants.IDISK_SIZE],
6457              d[constants.IDISK_MODE]) for d in self.disks],
6458       bep=self.be_full,
6459       hvp=self.hv_full,
6460       hypervisor_name=self.op.hypervisor,
6461       tags=self.op.tags,
6462     ))
6463
6464     return env
6465
6466   def BuildHooksNodes(self):
6467     """Build hooks nodes.
6468
6469     """
6470     nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
6471     return nl, nl
6472
6473   def _ReadExportInfo(self):
6474     """Reads the export information from disk.
6475
6476     It will override the opcode source node and path with the actual
6477     information, if these two were not specified before.
6478
6479     @return: the export information
6480
6481     """
6482     assert self.op.mode == constants.INSTANCE_IMPORT
6483
6484     src_node = self.op.src_node
6485     src_path = self.op.src_path
6486
6487     if src_node is None:
6488       locked_nodes = self.owned_locks(locking.LEVEL_NODE)
6489       exp_list = self.rpc.call_export_list(locked_nodes)
6490       found = False
6491       for node in exp_list:
6492         if exp_list[node].fail_msg:
6493           continue
6494         if src_path in exp_list[node].payload:
6495           found = True
6496           self.op.src_node = src_node = node
6497           self.op.src_path = src_path = utils.PathJoin(pathutils.EXPORT_DIR,
6498                                                        src_path)
6499           break
6500       if not found:
6501         raise errors.OpPrereqError("No export found for relative path %s" %
6502                                     src_path, errors.ECODE_INVAL)
6503
6504     _CheckNodeOnline(self, src_node)
6505     result = self.rpc.call_export_info(src_node, src_path)
6506     result.Raise("No export or invalid export found in dir %s" % src_path)
6507
6508     export_info = objects.SerializableConfigParser.Loads(str(result.payload))
6509     if not export_info.has_section(constants.INISECT_EXP):
6510       raise errors.ProgrammerError("Corrupted export config",
6511                                    errors.ECODE_ENVIRON)
6512
6513     ei_version = export_info.get(constants.INISECT_EXP, "version")
6514     if (int(ei_version) != constants.EXPORT_VERSION):
6515       raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
6516                                  (ei_version, constants.EXPORT_VERSION),
6517                                  errors.ECODE_ENVIRON)
6518     return export_info
6519
6520   def _ReadExportParams(self, einfo):
6521     """Use export parameters as defaults.
6522
6523     In case the opcode doesn't specify (as in override) some instance
6524     parameters, then try to use them from the export information, if
6525     that declares them.
6526
6527     """
6528     self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
6529
6530     if self.op.disk_template is None:
6531       if einfo.has_option(constants.INISECT_INS, "disk_template"):
6532         self.op.disk_template = einfo.get(constants.INISECT_INS,
6533                                           "disk_template")
6534         if self.op.disk_template not in constants.DISK_TEMPLATES:
6535           raise errors.OpPrereqError("Disk template specified in configuration"
6536                                      " file is not one of the allowed values:"
6537                                      " %s" %
6538                                      " ".join(constants.DISK_TEMPLATES),
6539                                      errors.ECODE_INVAL)
6540       else:
6541         raise errors.OpPrereqError("No disk template specified and the export"
6542                                    " is missing the disk_template information",
6543                                    errors.ECODE_INVAL)
6544
6545     if not self.op.disks:
6546       disks = []
6547       # TODO: import the disk iv_name too
6548       for idx in range(constants.MAX_DISKS):
6549         if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
6550           disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
6551           disks.append({constants.IDISK_SIZE: disk_sz})
6552       self.op.disks = disks
6553       if not disks and self.op.disk_template != constants.DT_DISKLESS:
6554         raise errors.OpPrereqError("No disk info specified and the export"
6555                                    " is missing the disk information",
6556                                    errors.ECODE_INVAL)
6557
6558     if not self.op.nics:
6559       nics = []
6560       for idx in range(constants.MAX_NICS):
6561         if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
6562           ndict = {}
6563           for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
6564             v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
6565             ndict[name] = v
6566           nics.append(ndict)
6567         else:
6568           break
6569       self.op.nics = nics
6570
6571     if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
6572       self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
6573
6574     if (self.op.hypervisor is None and
6575         einfo.has_option(constants.INISECT_INS, "hypervisor")):
6576       self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
6577
6578     if einfo.has_section(constants.INISECT_HYP):
6579       # use the export parameters but do not override the ones
6580       # specified by the user
6581       for name, value in einfo.items(constants.INISECT_HYP):
6582         if name not in self.op.hvparams:
6583           self.op.hvparams[name] = value
6584
6585     if einfo.has_section(constants.INISECT_BEP):
6586       # use the parameters, without overriding
6587       for name, value in einfo.items(constants.INISECT_BEP):
6588         if name not in self.op.beparams:
6589           self.op.beparams[name] = value
6590         # Compatibility for the old "memory" be param
6591         if name == constants.BE_MEMORY:
6592           if constants.BE_MAXMEM not in self.op.beparams:
6593             self.op.beparams[constants.BE_MAXMEM] = value
6594           if constants.BE_MINMEM not in self.op.beparams:
6595             self.op.beparams[constants.BE_MINMEM] = value
6596     else:
6597       # try to read the parameters old style, from the main section
6598       for name in constants.BES_PARAMETERS:
6599         if (name not in self.op.beparams and
6600             einfo.has_option(constants.INISECT_INS, name)):
6601           self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
6602
6603     if einfo.has_section(constants.INISECT_OSP):
6604       # use the parameters, without overriding
6605       for name, value in einfo.items(constants.INISECT_OSP):
6606         if name not in self.op.osparams:
6607           self.op.osparams[name] = value
6608
6609   def _RevertToDefaults(self, cluster):
6610     """Revert the instance parameters to the default values.
6611
6612     """
6613     # hvparams
6614     hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
6615     for name in self.op.hvparams.keys():
6616       if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
6617         del self.op.hvparams[name]
6618     # beparams
6619     be_defs = cluster.SimpleFillBE({})
6620     for name in self.op.beparams.keys():
6621       if name in be_defs and be_defs[name] == self.op.beparams[name]:
6622         del self.op.beparams[name]
6623     # nic params
6624     nic_defs = cluster.SimpleFillNIC({})
6625     for nic in self.op.nics:
6626       for name in constants.NICS_PARAMETERS:
6627         if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
6628           del nic[name]
6629     # osparams
6630     os_defs = cluster.SimpleFillOS(self.op.os_type, {})
6631     for name in self.op.osparams.keys():
6632       if name in os_defs and os_defs[name] == self.op.osparams[name]:
6633         del self.op.osparams[name]
6634
6635   def _CalculateFileStorageDir(self):
6636     """Calculate final instance file storage dir.
6637
6638     """
6639     # file storage dir calculation/check
6640     self.instance_file_storage_dir = None
6641     if self.op.disk_template in constants.DTS_FILEBASED:
6642       # build the full file storage dir path
6643       joinargs = []
6644
6645       if self.op.disk_template == constants.DT_SHARED_FILE:
6646         get_fsd_fn = self.cfg.GetSharedFileStorageDir
6647       else:
6648         get_fsd_fn = self.cfg.GetFileStorageDir
6649
6650       cfg_storagedir = get_fsd_fn()
6651       if not cfg_storagedir:
6652         raise errors.OpPrereqError("Cluster file storage dir not defined",
6653                                    errors.ECODE_STATE)
6654       joinargs.append(cfg_storagedir)
6655
6656       if self.op.file_storage_dir is not None:
6657         joinargs.append(self.op.file_storage_dir)
6658
6659       joinargs.append(self.op.instance_name)
6660
6661       # pylint: disable=W0142
6662       self.instance_file_storage_dir = utils.PathJoin(*joinargs)
6663
6664   def CheckPrereq(self): # pylint: disable=R0914
6665     """Check prerequisites.
6666
6667     """
6668     self._CalculateFileStorageDir()
6669
6670     if self.op.mode == constants.INSTANCE_IMPORT:
6671       export_info = self._ReadExportInfo()
6672       self._ReadExportParams(export_info)
6673       self._old_instance_name = export_info.get(constants.INISECT_INS, "name")
6674     else:
6675       self._old_instance_name = None
6676
6677     if (not self.cfg.GetVGName() and
6678         self.op.disk_template not in constants.DTS_NOT_LVM):
6679       raise errors.OpPrereqError("Cluster does not support lvm-based"
6680                                  " instances", errors.ECODE_STATE)
6681
6682     if (self.op.hypervisor is None or
6683         self.op.hypervisor == constants.VALUE_AUTO):
6684       self.op.hypervisor = self.cfg.GetHypervisorType()
6685
6686     cluster = self.cfg.GetClusterInfo()
6687     enabled_hvs = cluster.enabled_hypervisors
6688     if self.op.hypervisor not in enabled_hvs:
6689       raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
6690                                  " cluster (%s)" %
6691                                  (self.op.hypervisor, ",".join(enabled_hvs)),
6692                                  errors.ECODE_STATE)
6693
6694     # Check tag validity
6695     for tag in self.op.tags:
6696       objects.TaggableObject.ValidateTag(tag)
6697
6698     # check hypervisor parameter syntax (locally)
6699     utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6700     filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
6701                                       self.op.hvparams)
6702     hv_type = hypervisor.GetHypervisorClass(self.op.hypervisor)
6703     hv_type.CheckParameterSyntax(filled_hvp)
6704     self.hv_full = filled_hvp
6705     # check that we don't specify global parameters on an instance
6706     _CheckParamsNotGlobal(self.op.hvparams, constants.HVC_GLOBALS, "hypervisor",
6707                           "instance", "cluster")
6708
6709     # fill and remember the beparams dict
6710     self.be_full = _ComputeFullBeParams(self.op, cluster)
6711
6712     # build os parameters
6713     self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
6714
6715     # now that hvp/bep are in final format, let's reset to defaults,
6716     # if told to do so
6717     if self.op.identify_defaults:
6718       self._RevertToDefaults(cluster)
6719
6720     # NIC buildup
6721     self.nics = _ComputeNics(self.op, cluster, self.check_ip, self.cfg,
6722                              self.proc.GetECId())
6723
6724     # disk checks/pre-build
6725     default_vg = self.cfg.GetVGName()
6726     self.disks = _ComputeDisks(self.op, default_vg)
6727
6728     if self.op.mode == constants.INSTANCE_IMPORT:
6729       disk_images = []
6730       for idx in range(len(self.disks)):
6731         option = "disk%d_dump" % idx
6732         if export_info.has_option(constants.INISECT_INS, option):
6733           # FIXME: are the old os-es, disk sizes, etc. useful?
6734           export_name = export_info.get(constants.INISECT_INS, option)
6735           image = utils.PathJoin(self.op.src_path, export_name)
6736           disk_images.append(image)
6737         else:
6738           disk_images.append(False)
6739
6740       self.src_images = disk_images
6741
6742       if self.op.instance_name == self._old_instance_name:
6743         for idx, nic in enumerate(self.nics):
6744           if nic.mac == constants.VALUE_AUTO:
6745             nic_mac_ini = "nic%d_mac" % idx
6746             nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
6747
6748     # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
6749
6750     # ip ping checks (we use the same ip that was resolved in ExpandNames)
6751     if self.op.ip_check:
6752       if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
6753         raise errors.OpPrereqError("IP %s of instance %s already in use" %
6754                                    (self.check_ip, self.op.instance_name),
6755                                    errors.ECODE_NOTUNIQUE)
6756
6757     #### mac address generation
6758     # By generating here the mac address both the allocator and the hooks get
6759     # the real final mac address rather than the 'auto' or 'generate' value.
6760     # There is a race condition between the generation and the instance object
6761     # creation, which means that we know the mac is valid now, but we're not
6762     # sure it will be when we actually add the instance. If things go bad
6763     # adding the instance will abort because of a duplicate mac, and the
6764     # creation job will fail.
6765     for nic in self.nics:
6766       if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
6767         nic.mac = self.cfg.GenerateMAC(nic.network, self.proc.GetECId())
6768
6769     #### allocator run
6770
6771     if self.op.iallocator is not None:
6772       self._RunAllocator()
6773
6774     # Release all unneeded node locks
6775     keep_locks = filter(None, [self.op.pnode, self.op.snode, self.op.src_node])
6776     _ReleaseLocks(self, locking.LEVEL_NODE, keep=keep_locks)
6777     _ReleaseLocks(self, locking.LEVEL_NODE_RES, keep=keep_locks)
6778     _ReleaseLocks(self, locking.LEVEL_NODE_ALLOC)
6779
6780     assert (self.owned_locks(locking.LEVEL_NODE) ==
6781             self.owned_locks(locking.LEVEL_NODE_RES)), \
6782       "Node locks differ from node resource locks"
6783
6784     #### node related checks
6785
6786     # check primary node
6787     self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
6788     assert self.pnode is not None, \
6789       "Cannot retrieve locked node %s" % self.op.pnode
6790     if pnode.offline:
6791       raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
6792                                  pnode.name, errors.ECODE_STATE)
6793     if pnode.drained:
6794       raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
6795                                  pnode.name, errors.ECODE_STATE)
6796     if not pnode.vm_capable:
6797       raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
6798                                  " '%s'" % pnode.name, errors.ECODE_STATE)
6799
6800     self.secondaries = []
6801
6802     # Fill in any IPs from IP pools. This must happen here, because we need to
6803     # know the nic's primary node, as specified by the iallocator
6804     for idx, nic in enumerate(self.nics):
6805       net_uuid = nic.network
6806       if net_uuid is not None:
6807         nobj = self.cfg.GetNetwork(net_uuid)
6808         netparams = self.cfg.GetGroupNetParams(net_uuid, self.pnode.name)
6809         if netparams is None:
6810           raise errors.OpPrereqError("No netparams found for network"
6811                                      " %s. Propably not connected to"
6812                                      " node's %s nodegroup" %
6813                                      (nobj.name, self.pnode.name),
6814                                      errors.ECODE_INVAL)
6815         self.LogInfo("NIC/%d inherits netparams %s" %
6816                      (idx, netparams.values()))
6817         nic.nicparams = dict(netparams)
6818         if nic.ip is not None:
6819           if nic.ip.lower() == constants.NIC_IP_POOL:
6820             try:
6821               nic.ip = self.cfg.GenerateIp(net_uuid, self.proc.GetECId())
6822             except errors.ReservationError:
6823               raise errors.OpPrereqError("Unable to get a free IP for NIC %d"
6824                                          " from the address pool" % idx,
6825                                          errors.ECODE_STATE)
6826             self.LogInfo("Chose IP %s from network %s", nic.ip, nobj.name)
6827           else:
6828             try:
6829               self.cfg.ReserveIp(net_uuid, nic.ip, self.proc.GetECId())
6830             except errors.ReservationError:
6831               raise errors.OpPrereqError("IP address %s already in use"
6832                                          " or does not belong to network %s" %
6833                                          (nic.ip, nobj.name),
6834                                          errors.ECODE_NOTUNIQUE)
6835
6836       # net is None, ip None or given
6837       elif self.op.conflicts_check:
6838         _CheckForConflictingIp(self, nic.ip, self.pnode.name)
6839
6840     # mirror node verification
6841     if self.op.disk_template in constants.DTS_INT_MIRROR:
6842       if self.op.snode == pnode.name:
6843         raise errors.OpPrereqError("The secondary node cannot be the"
6844                                    " primary node", errors.ECODE_INVAL)
6845       _CheckNodeOnline(self, self.op.snode)
6846       _CheckNodeNotDrained(self, self.op.snode)
6847       _CheckNodeVmCapable(self, self.op.snode)
6848       self.secondaries.append(self.op.snode)
6849
6850       snode = self.cfg.GetNodeInfo(self.op.snode)
6851       if pnode.group != snode.group:
6852         self.LogWarning("The primary and secondary nodes are in two"
6853                         " different node groups; the disk parameters"
6854                         " from the first disk's node group will be"
6855                         " used")
6856
6857     if not self.op.disk_template in constants.DTS_EXCL_STORAGE:
6858       nodes = [pnode]
6859       if self.op.disk_template in constants.DTS_INT_MIRROR:
6860         nodes.append(snode)
6861       has_es = lambda n: _IsExclusiveStorageEnabledNode(self.cfg, n)
6862       if compat.any(map(has_es, nodes)):
6863         raise errors.OpPrereqError("Disk template %s not supported with"
6864                                    " exclusive storage" % self.op.disk_template,
6865                                    errors.ECODE_STATE)
6866
6867     nodenames = [pnode.name] + self.secondaries
6868
6869     if not self.adopt_disks:
6870       if self.op.disk_template == constants.DT_RBD:
6871         # _CheckRADOSFreeSpace() is just a placeholder.
6872         # Any function that checks prerequisites can be placed here.
6873         # Check if there is enough space on the RADOS cluster.
6874         _CheckRADOSFreeSpace()
6875       elif self.op.disk_template == constants.DT_EXT:
6876         # FIXME: Function that checks prereqs if needed
6877         pass
6878       else:
6879         # Check lv size requirements, if not adopting
6880         req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
6881         _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
6882
6883     elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
6884       all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
6885                                 disk[constants.IDISK_ADOPT])
6886                      for disk in self.disks])
6887       if len(all_lvs) != len(self.disks):
6888         raise errors.OpPrereqError("Duplicate volume names given for adoption",
6889                                    errors.ECODE_INVAL)
6890       for lv_name in all_lvs:
6891         try:
6892           # FIXME: lv_name here is "vg/lv" need to ensure that other calls
6893           # to ReserveLV uses the same syntax
6894           self.cfg.ReserveLV(lv_name, self.proc.GetECId())
6895         except errors.ReservationError:
6896           raise errors.OpPrereqError("LV named %s used by another instance" %
6897                                      lv_name, errors.ECODE_NOTUNIQUE)
6898
6899       vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
6900       vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
6901
6902       node_lvs = self.rpc.call_lv_list([pnode.name],
6903                                        vg_names.payload.keys())[pnode.name]
6904       node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
6905       node_lvs = node_lvs.payload
6906
6907       delta = all_lvs.difference(node_lvs.keys())
6908       if delta:
6909         raise errors.OpPrereqError("Missing logical volume(s): %s" %
6910                                    utils.CommaJoin(delta),
6911                                    errors.ECODE_INVAL)
6912       online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
6913       if online_lvs:
6914         raise errors.OpPrereqError("Online logical volumes found, cannot"
6915                                    " adopt: %s" % utils.CommaJoin(online_lvs),
6916                                    errors.ECODE_STATE)
6917       # update the size of disk based on what is found
6918       for dsk in self.disks:
6919         dsk[constants.IDISK_SIZE] = \
6920           int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
6921                                         dsk[constants.IDISK_ADOPT])][0]))
6922
6923     elif self.op.disk_template == constants.DT_BLOCK:
6924       # Normalize and de-duplicate device paths
6925       all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
6926                        for disk in self.disks])
6927       if len(all_disks) != len(self.disks):
6928         raise errors.OpPrereqError("Duplicate disk names given for adoption",
6929                                    errors.ECODE_INVAL)
6930       baddisks = [d for d in all_disks
6931                   if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
6932       if baddisks:
6933         raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
6934                                    " cannot be adopted" %
6935                                    (utils.CommaJoin(baddisks),
6936                                     constants.ADOPTABLE_BLOCKDEV_ROOT),
6937                                    errors.ECODE_INVAL)
6938
6939       node_disks = self.rpc.call_bdev_sizes([pnode.name],
6940                                             list(all_disks))[pnode.name]
6941       node_disks.Raise("Cannot get block device information from node %s" %
6942                        pnode.name)
6943       node_disks = node_disks.payload
6944       delta = all_disks.difference(node_disks.keys())
6945       if delta:
6946         raise errors.OpPrereqError("Missing block device(s): %s" %
6947                                    utils.CommaJoin(delta),
6948                                    errors.ECODE_INVAL)
6949       for dsk in self.disks:
6950         dsk[constants.IDISK_SIZE] = \
6951           int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
6952
6953     # Verify instance specs
6954     spindle_use = self.be_full.get(constants.BE_SPINDLE_USE, None)
6955     ispec = {
6956       constants.ISPEC_MEM_SIZE: self.be_full.get(constants.BE_MAXMEM, None),
6957       constants.ISPEC_CPU_COUNT: self.be_full.get(constants.BE_VCPUS, None),
6958       constants.ISPEC_DISK_COUNT: len(self.disks),
6959       constants.ISPEC_DISK_SIZE: [disk[constants.IDISK_SIZE]
6960                                   for disk in self.disks],
6961       constants.ISPEC_NIC_COUNT: len(self.nics),
6962       constants.ISPEC_SPINDLE_USE: spindle_use,
6963       }
6964
6965     group_info = self.cfg.GetNodeGroup(pnode.group)
6966     ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group_info)
6967     res = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec,
6968                                                self.op.disk_template)
6969     if not self.op.ignore_ipolicy and res:
6970       msg = ("Instance allocation to group %s (%s) violates policy: %s" %
6971              (pnode.group, group_info.name, utils.CommaJoin(res)))
6972       raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
6973
6974     _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
6975
6976     _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
6977     # check OS parameters (remotely)
6978     _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
6979
6980     _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
6981
6982     #TODO: _CheckExtParams (remotely)
6983     # Check parameters for extstorage
6984
6985     # memory check on primary node
6986     #TODO(dynmem): use MINMEM for checking
6987     if self.op.start:
6988       _CheckNodeFreeMemory(self, self.pnode.name,
6989                            "creating instance %s" % self.op.instance_name,
6990                            self.be_full[constants.BE_MAXMEM],
6991                            self.op.hypervisor)
6992
6993     self.dry_run_result = list(nodenames)
6994
6995   def Exec(self, feedback_fn):
6996     """Create and add the instance to the cluster.
6997
6998     """
6999     instance = self.op.instance_name
7000     pnode_name = self.pnode.name
7001
7002     assert not (self.owned_locks(locking.LEVEL_NODE_RES) -
7003                 self.owned_locks(locking.LEVEL_NODE)), \
7004       "Node locks differ from node resource locks"
7005     assert not self.glm.is_owned(locking.LEVEL_NODE_ALLOC)
7006
7007     ht_kind = self.op.hypervisor
7008     if ht_kind in constants.HTS_REQ_PORT:
7009       network_port = self.cfg.AllocatePort()
7010     else:
7011       network_port = None
7012
7013     # This is ugly but we got a chicken-egg problem here
7014     # We can only take the group disk parameters, as the instance
7015     # has no disks yet (we are generating them right here).
7016     node = self.cfg.GetNodeInfo(pnode_name)
7017     nodegroup = self.cfg.GetNodeGroup(node.group)
7018     disks = _GenerateDiskTemplate(self,
7019                                   self.op.disk_template,
7020                                   instance, pnode_name,
7021                                   self.secondaries,
7022                                   self.disks,
7023                                   self.instance_file_storage_dir,
7024                                   self.op.file_driver,
7025                                   0,
7026                                   feedback_fn,
7027                                   self.cfg.GetGroupDiskParams(nodegroup))
7028
7029     iobj = objects.Instance(name=instance, os=self.op.os_type,
7030                             primary_node=pnode_name,
7031                             nics=self.nics, disks=disks,
7032                             disk_template=self.op.disk_template,
7033                             admin_state=constants.ADMINST_DOWN,
7034                             network_port=network_port,
7035                             beparams=self.op.beparams,
7036                             hvparams=self.op.hvparams,
7037                             hypervisor=self.op.hypervisor,
7038                             osparams=self.op.osparams,
7039                             )
7040
7041     if self.op.tags:
7042       for tag in self.op.tags:
7043         iobj.AddTag(tag)
7044
7045     if self.adopt_disks:
7046       if self.op.disk_template == constants.DT_PLAIN:
7047         # rename LVs to the newly-generated names; we need to construct
7048         # 'fake' LV disks with the old data, plus the new unique_id
7049         tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
7050         rename_to = []
7051         for t_dsk, a_dsk in zip(tmp_disks, self.disks):
7052           rename_to.append(t_dsk.logical_id)
7053           t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
7054           self.cfg.SetDiskID(t_dsk, pnode_name)
7055         result = self.rpc.call_blockdev_rename(pnode_name,
7056                                                zip(tmp_disks, rename_to))
7057         result.Raise("Failed to rename adoped LVs")
7058     else:
7059       feedback_fn("* creating instance disks...")
7060       try:
7061         _CreateDisks(self, iobj)
7062       except errors.OpExecError:
7063         self.LogWarning("Device creation failed")
7064         self.cfg.ReleaseDRBDMinors(instance)
7065         raise
7066
7067     feedback_fn("adding instance %s to cluster config" % instance)
7068
7069     self.cfg.AddInstance(iobj, self.proc.GetECId())
7070
7071     # Declare that we don't want to remove the instance lock anymore, as we've
7072     # added the instance to the config
7073     del self.remove_locks[locking.LEVEL_INSTANCE]
7074
7075     if self.op.mode == constants.INSTANCE_IMPORT:
7076       # Release unused nodes
7077       _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
7078     else:
7079       # Release all nodes
7080       _ReleaseLocks(self, locking.LEVEL_NODE)
7081
7082     disk_abort = False
7083     if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
7084       feedback_fn("* wiping instance disks...")
7085       try:
7086         _WipeDisks(self, iobj)
7087       except errors.OpExecError, err:
7088         logging.exception("Wiping disks failed")
7089         self.LogWarning("Wiping instance disks failed (%s)", err)
7090         disk_abort = True
7091
7092     if disk_abort:
7093       # Something is already wrong with the disks, don't do anything else
7094       pass
7095     elif self.op.wait_for_sync:
7096       disk_abort = not _WaitForSync(self, iobj)
7097     elif iobj.disk_template in constants.DTS_INT_MIRROR:
7098       # make sure the disks are not degraded (still sync-ing is ok)
7099       feedback_fn("* checking mirrors status")
7100       disk_abort = not _WaitForSync(self, iobj, oneshot=True)
7101     else:
7102       disk_abort = False
7103
7104     if disk_abort:
7105       _RemoveDisks(self, iobj)
7106       self.cfg.RemoveInstance(iobj.name)
7107       # Make sure the instance lock gets removed
7108       self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
7109       raise errors.OpExecError("There are some degraded disks for"
7110                                " this instance")
7111
7112     # Release all node resource locks
7113     _ReleaseLocks(self, locking.LEVEL_NODE_RES)
7114
7115     if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
7116       # we need to set the disks ID to the primary node, since the
7117       # preceding code might or might have not done it, depending on
7118       # disk template and other options
7119       for disk in iobj.disks:
7120         self.cfg.SetDiskID(disk, pnode_name)
7121       if self.op.mode == constants.INSTANCE_CREATE:
7122         if not self.op.no_install:
7123           pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
7124                         not self.op.wait_for_sync)
7125           if pause_sync:
7126             feedback_fn("* pausing disk sync to install instance OS")
7127             result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
7128                                                               (iobj.disks,
7129                                                                iobj), True)
7130             for idx, success in enumerate(result.payload):
7131               if not success:
7132                 logging.warn("pause-sync of instance %s for disk %d failed",
7133                              instance, idx)
7134
7135           feedback_fn("* running the instance OS create scripts...")
7136           # FIXME: pass debug option from opcode to backend
7137           os_add_result = \
7138             self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
7139                                           self.op.debug_level)
7140           if pause_sync:
7141             feedback_fn("* resuming disk sync")
7142             result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
7143                                                               (iobj.disks,
7144                                                                iobj), False)
7145             for idx, success in enumerate(result.payload):
7146               if not success:
7147                 logging.warn("resume-sync of instance %s for disk %d failed",
7148                              instance, idx)
7149
7150           os_add_result.Raise("Could not add os for instance %s"
7151                               " on node %s" % (instance, pnode_name))
7152
7153       else:
7154         if self.op.mode == constants.INSTANCE_IMPORT:
7155           feedback_fn("* running the instance OS import scripts...")
7156
7157           transfers = []
7158
7159           for idx, image in enumerate(self.src_images):
7160             if not image:
7161               continue
7162
7163             # FIXME: pass debug option from opcode to backend
7164             dt = masterd.instance.DiskTransfer("disk/%s" % idx,
7165                                                constants.IEIO_FILE, (image, ),
7166                                                constants.IEIO_SCRIPT,
7167                                                (iobj.disks[idx], idx),
7168                                                None)
7169             transfers.append(dt)
7170
7171           import_result = \
7172             masterd.instance.TransferInstanceData(self, feedback_fn,
7173                                                   self.op.src_node, pnode_name,
7174                                                   self.pnode.secondary_ip,
7175                                                   iobj, transfers)
7176           if not compat.all(import_result):
7177             self.LogWarning("Some disks for instance %s on node %s were not"
7178                             " imported successfully" % (instance, pnode_name))
7179
7180           rename_from = self._old_instance_name
7181
7182         elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
7183           feedback_fn("* preparing remote import...")
7184           # The source cluster will stop the instance before attempting to make
7185           # a connection. In some cases stopping an instance can take a long
7186           # time, hence the shutdown timeout is added to the connection
7187           # timeout.
7188           connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
7189                              self.op.source_shutdown_timeout)
7190           timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
7191
7192           assert iobj.primary_node == self.pnode.name
7193           disk_results = \
7194             masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
7195                                           self.source_x509_ca,
7196                                           self._cds, timeouts)
7197           if not compat.all(disk_results):
7198             # TODO: Should the instance still be started, even if some disks
7199             # failed to import (valid for local imports, too)?
7200             self.LogWarning("Some disks for instance %s on node %s were not"
7201                             " imported successfully" % (instance, pnode_name))
7202
7203           rename_from = self.source_instance_name
7204
7205         else:
7206           # also checked in the prereq part
7207           raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
7208                                        % self.op.mode)
7209
7210         # Run rename script on newly imported instance
7211         assert iobj.name == instance
7212         feedback_fn("Running rename script for %s" % instance)
7213         result = self.rpc.call_instance_run_rename(pnode_name, iobj,
7214                                                    rename_from,
7215                                                    self.op.debug_level)
7216         if result.fail_msg:
7217           self.LogWarning("Failed to run rename script for %s on node"
7218                           " %s: %s" % (instance, pnode_name, result.fail_msg))
7219
7220     assert not self.owned_locks(locking.LEVEL_NODE_RES)
7221
7222     if self.op.start:
7223       iobj.admin_state = constants.ADMINST_UP
7224       self.cfg.Update(iobj, feedback_fn)
7225       logging.info("Starting instance %s on node %s", instance, pnode_name)
7226       feedback_fn("* starting instance...")
7227       result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
7228                                             False, self.op.reason)
7229       result.Raise("Could not start instance")
7230
7231     return list(iobj.all_nodes)
7232
7233
7234 class LUInstanceMultiAlloc(NoHooksLU):
7235   """Allocates multiple instances at the same time.
7236
7237   """
7238   REQ_BGL = False
7239
7240   def CheckArguments(self):
7241     """Check arguments.
7242
7243     """
7244     nodes = []
7245     for inst in self.op.instances:
7246       if inst.iallocator is not None:
7247         raise errors.OpPrereqError("iallocator are not allowed to be set on"
7248                                    " instance objects", errors.ECODE_INVAL)
7249       nodes.append(bool(inst.pnode))
7250       if inst.disk_template in constants.DTS_INT_MIRROR:
7251         nodes.append(bool(inst.snode))
7252
7253     has_nodes = compat.any(nodes)
7254     if compat.all(nodes) ^ has_nodes:
7255       raise errors.OpPrereqError("There are instance objects providing"
7256                                  " pnode/snode while others do not",
7257                                  errors.ECODE_INVAL)
7258
7259     if self.op.iallocator is None:
7260       default_iallocator = self.cfg.GetDefaultIAllocator()
7261       if default_iallocator and has_nodes:
7262         self.op.iallocator = default_iallocator
7263       else:
7264         raise errors.OpPrereqError("No iallocator or nodes on the instances"
7265                                    " given and no cluster-wide default"
7266                                    " iallocator found; please specify either"
7267                                    " an iallocator or nodes on the instances"
7268                                    " or set a cluster-wide default iallocator",
7269                                    errors.ECODE_INVAL)
7270
7271     _CheckOpportunisticLocking(self.op)
7272
7273     dups = utils.FindDuplicates([op.instance_name for op in self.op.instances])
7274     if dups:
7275       raise errors.OpPrereqError("There are duplicate instance names: %s" %
7276                                  utils.CommaJoin(dups), errors.ECODE_INVAL)
7277
7278   def ExpandNames(self):
7279     """Calculate the locks.
7280
7281     """
7282     self.share_locks = _ShareAll()
7283     self.needed_locks = {
7284       # iallocator will select nodes and even if no iallocator is used,
7285       # collisions with LUInstanceCreate should be avoided
7286       locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
7287       }
7288
7289     if self.op.iallocator:
7290       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7291       self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
7292
7293       if self.op.opportunistic_locking:
7294         self.opportunistic_locks[locking.LEVEL_NODE] = True
7295         self.opportunistic_locks[locking.LEVEL_NODE_RES] = True
7296     else:
7297       nodeslist = []
7298       for inst in self.op.instances:
7299         inst.pnode = _ExpandNodeName(self.cfg, inst.pnode)
7300         nodeslist.append(inst.pnode)
7301         if inst.snode is not None:
7302           inst.snode = _ExpandNodeName(self.cfg, inst.snode)
7303           nodeslist.append(inst.snode)
7304
7305       self.needed_locks[locking.LEVEL_NODE] = nodeslist
7306       # Lock resources of instance's primary and secondary nodes (copy to
7307       # prevent accidential modification)
7308       self.needed_locks[locking.LEVEL_NODE_RES] = list(nodeslist)
7309
7310   def CheckPrereq(self):
7311     """Check prerequisite.
7312
7313     """
7314     cluster = self.cfg.GetClusterInfo()
7315     default_vg = self.cfg.GetVGName()
7316     ec_id = self.proc.GetECId()
7317
7318     if self.op.opportunistic_locking:
7319       # Only consider nodes for which a lock is held
7320       node_whitelist = list(self.owned_locks(locking.LEVEL_NODE))
7321     else:
7322       node_whitelist = None
7323
7324     insts = [_CreateInstanceAllocRequest(op, _ComputeDisks(op, default_vg),
7325                                          _ComputeNics(op, cluster, None,
7326                                                       self.cfg, ec_id),
7327                                          _ComputeFullBeParams(op, cluster),
7328                                          node_whitelist)
7329              for op in self.op.instances]
7330
7331     req = iallocator.IAReqMultiInstanceAlloc(instances=insts)
7332     ial = iallocator.IAllocator(self.cfg, self.rpc, req)
7333
7334     ial.Run(self.op.iallocator)
7335
7336     if not ial.success:
7337       raise errors.OpPrereqError("Can't compute nodes using"
7338                                  " iallocator '%s': %s" %
7339                                  (self.op.iallocator, ial.info),
7340                                  errors.ECODE_NORES)
7341
7342     self.ia_result = ial.result
7343
7344     if self.op.dry_run:
7345       self.dry_run_result = objects.FillDict(self._ConstructPartialResult(), {
7346         constants.JOB_IDS_KEY: [],
7347         })
7348
7349   def _ConstructPartialResult(self):
7350     """Contructs the partial result.
7351
7352     """
7353     (allocatable, failed) = self.ia_result
7354     return {
7355       opcodes.OpInstanceMultiAlloc.ALLOCATABLE_KEY:
7356         map(compat.fst, allocatable),
7357       opcodes.OpInstanceMultiAlloc.FAILED_KEY: failed,
7358       }
7359
7360   def Exec(self, feedback_fn):
7361     """Executes the opcode.
7362
7363     """
7364     op2inst = dict((op.instance_name, op) for op in self.op.instances)
7365     (allocatable, failed) = self.ia_result
7366
7367     jobs = []
7368     for (name, nodes) in allocatable:
7369       op = op2inst.pop(name)
7370
7371       if len(nodes) > 1:
7372         (op.pnode, op.snode) = nodes
7373       else:
7374         (op.pnode,) = nodes
7375
7376       jobs.append([op])
7377
7378     missing = set(op2inst.keys()) - set(failed)
7379     assert not missing, \
7380       "Iallocator did return incomplete result: %s" % utils.CommaJoin(missing)
7381
7382     return ResultWithJobs(jobs, **self._ConstructPartialResult())
7383
7384
7385 def _CheckRADOSFreeSpace():
7386   """Compute disk size requirements inside the RADOS cluster.
7387
7388   """
7389   # For the RADOS cluster we assume there is always enough space.
7390   pass
7391
7392
7393 class LUInstanceConsole(NoHooksLU):
7394   """Connect to an instance's console.
7395
7396   This is somewhat special in that it returns the command line that
7397   you need to run on the master node in order to connect to the
7398   console.
7399
7400   """
7401   REQ_BGL = False
7402
7403   def ExpandNames(self):
7404     self.share_locks = _ShareAll()
7405     self._ExpandAndLockInstance()
7406
7407   def CheckPrereq(self):
7408     """Check prerequisites.
7409
7410     This checks that the instance is in the cluster.
7411
7412     """
7413     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7414     assert self.instance is not None, \
7415       "Cannot retrieve locked instance %s" % self.op.instance_name
7416     _CheckNodeOnline(self, self.instance.primary_node)
7417
7418   def Exec(self, feedback_fn):
7419     """Connect to the console of an instance
7420
7421     """
7422     instance = self.instance
7423     node = instance.primary_node
7424
7425     node_insts = self.rpc.call_instance_list([node],
7426                                              [instance.hypervisor])[node]
7427     node_insts.Raise("Can't get node information from %s" % node)
7428
7429     if instance.name not in node_insts.payload:
7430       if instance.admin_state == constants.ADMINST_UP:
7431         state = constants.INSTST_ERRORDOWN
7432       elif instance.admin_state == constants.ADMINST_DOWN:
7433         state = constants.INSTST_ADMINDOWN
7434       else:
7435         state = constants.INSTST_ADMINOFFLINE
7436       raise errors.OpExecError("Instance %s is not running (state %s)" %
7437                                (instance.name, state))
7438
7439     logging.debug("Connecting to console of %s on %s", instance.name, node)
7440
7441     return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
7442
7443
7444 def _GetInstanceConsole(cluster, instance):
7445   """Returns console information for an instance.
7446
7447   @type cluster: L{objects.Cluster}
7448   @type instance: L{objects.Instance}
7449   @rtype: dict
7450
7451   """
7452   hyper = hypervisor.GetHypervisorClass(instance.hypervisor)
7453   # beparams and hvparams are passed separately, to avoid editing the
7454   # instance and then saving the defaults in the instance itself.
7455   hvparams = cluster.FillHV(instance)
7456   beparams = cluster.FillBE(instance)
7457   console = hyper.GetInstanceConsole(instance, hvparams, beparams)
7458
7459   assert console.instance == instance.name
7460   assert console.Validate()
7461
7462   return console.ToDict()
7463
7464
7465 class LUInstanceReplaceDisks(LogicalUnit):
7466   """Replace the disks of an instance.
7467
7468   """
7469   HPATH = "mirrors-replace"
7470   HTYPE = constants.HTYPE_INSTANCE
7471   REQ_BGL = False
7472
7473   def CheckArguments(self):
7474     """Check arguments.
7475
7476     """
7477     remote_node = self.op.remote_node
7478     ialloc = self.op.iallocator
7479     if self.op.mode == constants.REPLACE_DISK_CHG:
7480       if remote_node is None and ialloc is None:
7481         raise errors.OpPrereqError("When changing the secondary either an"
7482                                    " iallocator script must be used or the"
7483                                    " new node given", errors.ECODE_INVAL)
7484       else:
7485         _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
7486
7487     elif remote_node is not None or ialloc is not None:
7488       # Not replacing the secondary
7489       raise errors.OpPrereqError("The iallocator and new node options can"
7490                                  " only be used when changing the"
7491                                  " secondary node", errors.ECODE_INVAL)
7492
7493   def ExpandNames(self):
7494     self._ExpandAndLockInstance()
7495
7496     assert locking.LEVEL_NODE not in self.needed_locks
7497     assert locking.LEVEL_NODE_RES not in self.needed_locks
7498     assert locking.LEVEL_NODEGROUP not in self.needed_locks
7499
7500     assert self.op.iallocator is None or self.op.remote_node is None, \
7501       "Conflicting options"
7502
7503     if self.op.remote_node is not None:
7504       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
7505
7506       # Warning: do not remove the locking of the new secondary here
7507       # unless DRBD8.AddChildren is changed to work in parallel;
7508       # currently it doesn't since parallel invocations of
7509       # FindUnusedMinor will conflict
7510       self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
7511       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7512     else:
7513       self.needed_locks[locking.LEVEL_NODE] = []
7514       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7515
7516       if self.op.iallocator is not None:
7517         # iallocator will select a new node in the same group
7518         self.needed_locks[locking.LEVEL_NODEGROUP] = []
7519         self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
7520
7521     self.needed_locks[locking.LEVEL_NODE_RES] = []
7522
7523     self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
7524                                    self.op.iallocator, self.op.remote_node,
7525                                    self.op.disks, self.op.early_release,
7526                                    self.op.ignore_ipolicy)
7527
7528     self.tasklets = [self.replacer]
7529
7530   def DeclareLocks(self, level):
7531     if level == locking.LEVEL_NODEGROUP:
7532       assert self.op.remote_node is None
7533       assert self.op.iallocator is not None
7534       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
7535
7536       self.share_locks[locking.LEVEL_NODEGROUP] = 1
7537       # Lock all groups used by instance optimistically; this requires going
7538       # via the node before it's locked, requiring verification later on
7539       self.needed_locks[locking.LEVEL_NODEGROUP] = \
7540         self.cfg.GetInstanceNodeGroups(self.op.instance_name)
7541
7542     elif level == locking.LEVEL_NODE:
7543       if self.op.iallocator is not None:
7544         assert self.op.remote_node is None
7545         assert not self.needed_locks[locking.LEVEL_NODE]
7546         assert locking.NAL in self.owned_locks(locking.LEVEL_NODE_ALLOC)
7547
7548         # Lock member nodes of all locked groups
7549         self.needed_locks[locking.LEVEL_NODE] = \
7550             [node_name
7551              for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
7552              for node_name in self.cfg.GetNodeGroup(group_uuid).members]
7553       else:
7554         assert not self.glm.is_owned(locking.LEVEL_NODE_ALLOC)
7555
7556         self._LockInstancesNodes()
7557
7558     elif level == locking.LEVEL_NODE_RES:
7559       # Reuse node locks
7560       self.needed_locks[locking.LEVEL_NODE_RES] = \
7561         self.needed_locks[locking.LEVEL_NODE]
7562
7563   def BuildHooksEnv(self):
7564     """Build hooks env.
7565
7566     This runs on the master, the primary and all the secondaries.
7567
7568     """
7569     instance = self.replacer.instance
7570     env = {
7571       "MODE": self.op.mode,
7572       "NEW_SECONDARY": self.op.remote_node,
7573       "OLD_SECONDARY": instance.secondary_nodes[0],
7574       }
7575     env.update(_BuildInstanceHookEnvByObject(self, instance))
7576     return env
7577
7578   def BuildHooksNodes(self):
7579     """Build hooks nodes.
7580
7581     """
7582     instance = self.replacer.instance
7583     nl = [
7584       self.cfg.GetMasterNode(),
7585       instance.primary_node,
7586       ]
7587     if self.op.remote_node is not None:
7588       nl.append(self.op.remote_node)
7589     return nl, nl
7590
7591   def CheckPrereq(self):
7592     """Check prerequisites.
7593
7594     """
7595     assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
7596             self.op.iallocator is None)
7597
7598     # Verify if node group locks are still correct
7599     owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
7600     if owned_groups:
7601       _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
7602
7603     return LogicalUnit.CheckPrereq(self)
7604
7605
7606 class TLReplaceDisks(Tasklet):
7607   """Replaces disks for an instance.
7608
7609   Note: Locking is not within the scope of this class.
7610
7611   """
7612   def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
7613                disks, early_release, ignore_ipolicy):
7614     """Initializes this class.
7615
7616     """
7617     Tasklet.__init__(self, lu)
7618
7619     # Parameters
7620     self.instance_name = instance_name
7621     self.mode = mode
7622     self.iallocator_name = iallocator_name
7623     self.remote_node = remote_node
7624     self.disks = disks
7625     self.early_release = early_release
7626     self.ignore_ipolicy = ignore_ipolicy
7627
7628     # Runtime data
7629     self.instance = None
7630     self.new_node = None
7631     self.target_node = None
7632     self.other_node = None
7633     self.remote_node_info = None
7634     self.node_secondary_ip = None
7635
7636   @staticmethod
7637   def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
7638     """Compute a new secondary node using an IAllocator.
7639
7640     """
7641     req = iallocator.IAReqRelocate(name=instance_name,
7642                                    relocate_from=list(relocate_from))
7643     ial = iallocator.IAllocator(lu.cfg, lu.rpc, req)
7644
7645     ial.Run(iallocator_name)
7646
7647     if not ial.success:
7648       raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
7649                                  " %s" % (iallocator_name, ial.info),
7650                                  errors.ECODE_NORES)
7651
7652     remote_node_name = ial.result[0]
7653
7654     lu.LogInfo("Selected new secondary for instance '%s': %s",
7655                instance_name, remote_node_name)
7656
7657     return remote_node_name
7658
7659   def _FindFaultyDisks(self, node_name):
7660     """Wrapper for L{_FindFaultyInstanceDisks}.
7661
7662     """
7663     return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
7664                                     node_name, True)
7665
7666   def _CheckDisksActivated(self, instance):
7667     """Checks if the instance disks are activated.
7668
7669     @param instance: The instance to check disks
7670     @return: True if they are activated, False otherwise
7671
7672     """
7673     nodes = instance.all_nodes
7674
7675     for idx, dev in enumerate(instance.disks):
7676       for node in nodes:
7677         self.lu.LogInfo("Checking disk/%d on %s", idx, node)
7678         self.cfg.SetDiskID(dev, node)
7679
7680         result = _BlockdevFind(self, node, dev, instance)
7681
7682         if result.offline:
7683           continue
7684         elif result.fail_msg or not result.payload:
7685           return False
7686
7687     return True
7688
7689   def CheckPrereq(self):
7690     """Check prerequisites.
7691
7692     This checks that the instance is in the cluster.
7693
7694     """
7695     self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
7696     assert instance is not None, \
7697       "Cannot retrieve locked instance %s" % self.instance_name
7698
7699     if instance.disk_template != constants.DT_DRBD8:
7700       raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
7701                                  " instances", errors.ECODE_INVAL)
7702
7703     if len(instance.secondary_nodes) != 1:
7704       raise errors.OpPrereqError("The instance has a strange layout,"
7705                                  " expected one secondary but found %d" %
7706                                  len(instance.secondary_nodes),
7707                                  errors.ECODE_FAULT)
7708
7709     instance = self.instance
7710     secondary_node = instance.secondary_nodes[0]
7711
7712     if self.iallocator_name is None:
7713       remote_node = self.remote_node
7714     else:
7715       remote_node = self._RunAllocator(self.lu, self.iallocator_name,
7716                                        instance.name, instance.secondary_nodes)
7717
7718     if remote_node is None:
7719       self.remote_node_info = None
7720     else:
7721       assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
7722              "Remote node '%s' is not locked" % remote_node
7723
7724       self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
7725       assert self.remote_node_info is not None, \
7726         "Cannot retrieve locked node %s" % remote_node
7727
7728     if remote_node == self.instance.primary_node:
7729       raise errors.OpPrereqError("The specified node is the primary node of"
7730                                  " the instance", errors.ECODE_INVAL)
7731
7732     if remote_node == secondary_node:
7733       raise errors.OpPrereqError("The specified node is already the"
7734                                  " secondary node of the instance",
7735                                  errors.ECODE_INVAL)
7736
7737     if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
7738                                     constants.REPLACE_DISK_CHG):
7739       raise errors.OpPrereqError("Cannot specify disks to be replaced",
7740                                  errors.ECODE_INVAL)
7741
7742     if self.mode == constants.REPLACE_DISK_AUTO:
7743       if not self._CheckDisksActivated(instance):
7744         raise errors.OpPrereqError("Please run activate-disks on instance %s"
7745                                    " first" % self.instance_name,
7746                                    errors.ECODE_STATE)
7747       faulty_primary = self._FindFaultyDisks(instance.primary_node)
7748       faulty_secondary = self._FindFaultyDisks(secondary_node)
7749
7750       if faulty_primary and faulty_secondary:
7751         raise errors.OpPrereqError("Instance %s has faulty disks on more than"
7752                                    " one node and can not be repaired"
7753                                    " automatically" % self.instance_name,
7754                                    errors.ECODE_STATE)
7755
7756       if faulty_primary:
7757         self.disks = faulty_primary
7758         self.target_node = instance.primary_node
7759         self.other_node = secondary_node
7760         check_nodes = [self.target_node, self.other_node]
7761       elif faulty_secondary:
7762         self.disks = faulty_secondary
7763         self.target_node = secondary_node
7764         self.other_node = instance.primary_node
7765         check_nodes = [self.target_node, self.other_node]
7766       else:
7767         self.disks = []
7768         check_nodes = []
7769
7770     else:
7771       # Non-automatic modes
7772       if self.mode == constants.REPLACE_DISK_PRI:
7773         self.target_node = instance.primary_node
7774         self.other_node = secondary_node
7775         check_nodes = [self.target_node, self.other_node]
7776
7777       elif self.mode == constants.REPLACE_DISK_SEC:
7778         self.target_node = secondary_node
7779         self.other_node = instance.primary_node
7780         check_nodes = [self.target_node, self.other_node]
7781
7782       elif self.mode == constants.REPLACE_DISK_CHG:
7783         self.new_node = remote_node
7784         self.other_node = instance.primary_node
7785         self.target_node = secondary_node
7786         check_nodes = [self.new_node, self.other_node]
7787
7788         _CheckNodeNotDrained(self.lu, remote_node)
7789         _CheckNodeVmCapable(self.lu, remote_node)
7790
7791         old_node_info = self.cfg.GetNodeInfo(secondary_node)
7792         assert old_node_info is not None
7793         if old_node_info.offline and not self.early_release:
7794           # doesn't make sense to delay the release
7795           self.early_release = True
7796           self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
7797                           " early-release mode", secondary_node)
7798
7799       else:
7800         raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
7801                                      self.mode)
7802
7803       # If not specified all disks should be replaced
7804       if not self.disks:
7805         self.disks = range(len(self.instance.disks))
7806
7807     # TODO: This is ugly, but right now we can't distinguish between internal
7808     # submitted opcode and external one. We should fix that.
7809     if self.remote_node_info:
7810       # We change the node, lets verify it still meets instance policy
7811       new_group_info = self.cfg.GetNodeGroup(self.remote_node_info.group)
7812       cluster = self.cfg.GetClusterInfo()
7813       ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
7814                                                               new_group_info)
7815       _CheckTargetNodeIPolicy(self, ipolicy, instance, self.remote_node_info,
7816                               self.cfg, ignore=self.ignore_ipolicy)
7817
7818     for node in check_nodes:
7819       _CheckNodeOnline(self.lu, node)
7820
7821     touched_nodes = frozenset(node_name for node_name in [self.new_node,
7822                                                           self.other_node,
7823                                                           self.target_node]
7824                               if node_name is not None)
7825
7826     # Release unneeded node and node resource locks
7827     _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
7828     _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, keep=touched_nodes)
7829     _ReleaseLocks(self.lu, locking.LEVEL_NODE_ALLOC)
7830
7831     # Release any owned node group
7832     _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
7833
7834     # Check whether disks are valid
7835     for disk_idx in self.disks:
7836       instance.FindDisk(disk_idx)
7837
7838     # Get secondary node IP addresses
7839     self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
7840                                   in self.cfg.GetMultiNodeInfo(touched_nodes))
7841
7842   def Exec(self, feedback_fn):
7843     """Execute disk replacement.
7844
7845     This dispatches the disk replacement to the appropriate handler.
7846
7847     """
7848     if __debug__:
7849       # Verify owned locks before starting operation
7850       owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
7851       assert set(owned_nodes) == set(self.node_secondary_ip), \
7852           ("Incorrect node locks, owning %s, expected %s" %
7853            (owned_nodes, self.node_secondary_ip.keys()))
7854       assert (self.lu.owned_locks(locking.LEVEL_NODE) ==
7855               self.lu.owned_locks(locking.LEVEL_NODE_RES))
7856       assert not self.lu.glm.is_owned(locking.LEVEL_NODE_ALLOC)
7857
7858       owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
7859       assert list(owned_instances) == [self.instance_name], \
7860           "Instance '%s' not locked" % self.instance_name
7861
7862       assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
7863           "Should not own any node group lock at this point"
7864
7865     if not self.disks:
7866       feedback_fn("No disks need replacement for instance '%s'" %
7867                   self.instance.name)
7868       return
7869
7870     feedback_fn("Replacing disk(s) %s for instance '%s'" %
7871                 (utils.CommaJoin(self.disks), self.instance.name))
7872     feedback_fn("Current primary node: %s" % self.instance.primary_node)
7873     feedback_fn("Current seconary node: %s" %
7874                 utils.CommaJoin(self.instance.secondary_nodes))
7875
7876     activate_disks = (self.instance.admin_state != constants.ADMINST_UP)
7877
7878     # Activate the instance disks if we're replacing them on a down instance
7879     if activate_disks:
7880       _StartInstanceDisks(self.lu, self.instance, True)
7881
7882     try:
7883       # Should we replace the secondary node?
7884       if self.new_node is not None:
7885         fn = self._ExecDrbd8Secondary
7886       else:
7887         fn = self._ExecDrbd8DiskOnly
7888
7889       result = fn(feedback_fn)
7890     finally:
7891       # Deactivate the instance disks if we're replacing them on a
7892       # down instance
7893       if activate_disks:
7894         _SafeShutdownInstanceDisks(self.lu, self.instance)
7895
7896     assert not self.lu.owned_locks(locking.LEVEL_NODE)
7897
7898     if __debug__:
7899       # Verify owned locks
7900       owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE_RES)
7901       nodes = frozenset(self.node_secondary_ip)
7902       assert ((self.early_release and not owned_nodes) or
7903               (not self.early_release and not (set(owned_nodes) - nodes))), \
7904         ("Not owning the correct locks, early_release=%s, owned=%r,"
7905          " nodes=%r" % (self.early_release, owned_nodes, nodes))
7906
7907     return result
7908
7909   def _CheckVolumeGroup(self, nodes):
7910     self.lu.LogInfo("Checking volume groups")
7911
7912     vgname = self.cfg.GetVGName()
7913
7914     # Make sure volume group exists on all involved nodes
7915     results = self.rpc.call_vg_list(nodes)
7916     if not results:
7917       raise errors.OpExecError("Can't list volume groups on the nodes")
7918
7919     for node in nodes:
7920       res = results[node]
7921       res.Raise("Error checking node %s" % node)
7922       if vgname not in res.payload:
7923         raise errors.OpExecError("Volume group '%s' not found on node %s" %
7924                                  (vgname, node))
7925
7926   def _CheckDisksExistence(self, nodes):
7927     # Check disk existence
7928     for idx, dev in enumerate(self.instance.disks):
7929       if idx not in self.disks:
7930         continue
7931
7932       for node in nodes:
7933         self.lu.LogInfo("Checking disk/%d on %s", idx, node)
7934         self.cfg.SetDiskID(dev, node)
7935
7936         result = _BlockdevFind(self, node, dev, self.instance)
7937
7938         msg = result.fail_msg
7939         if msg or not result.payload:
7940           if not msg:
7941             msg = "disk not found"
7942           raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
7943                                    (idx, node, msg))
7944
7945   def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
7946     for idx, dev in enumerate(self.instance.disks):
7947       if idx not in self.disks:
7948         continue
7949
7950       self.lu.LogInfo("Checking disk/%d consistency on node %s" %
7951                       (idx, node_name))
7952
7953       if not _CheckDiskConsistency(self.lu, self.instance, dev, node_name,
7954                                    on_primary, ldisk=ldisk):
7955         raise errors.OpExecError("Node %s has degraded storage, unsafe to"
7956                                  " replace disks for instance %s" %
7957                                  (node_name, self.instance.name))
7958
7959   def _CreateNewStorage(self, node_name):
7960     """Create new storage on the primary or secondary node.
7961
7962     This is only used for same-node replaces, not for changing the
7963     secondary node, hence we don't want to modify the existing disk.
7964
7965     """
7966     iv_names = {}
7967
7968     disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
7969     for idx, dev in enumerate(disks):
7970       if idx not in self.disks:
7971         continue
7972
7973       self.lu.LogInfo("Adding storage on %s for disk/%d", node_name, idx)
7974
7975       self.cfg.SetDiskID(dev, node_name)
7976
7977       lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
7978       names = _GenerateUniqueNames(self.lu, lv_names)
7979
7980       (data_disk, meta_disk) = dev.children
7981       vg_data = data_disk.logical_id[0]
7982       lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
7983                              logical_id=(vg_data, names[0]),
7984                              params=data_disk.params)
7985       vg_meta = meta_disk.logical_id[0]
7986       lv_meta = objects.Disk(dev_type=constants.LD_LV,
7987                              size=constants.DRBD_META_SIZE,
7988                              logical_id=(vg_meta, names[1]),
7989                              params=meta_disk.params)
7990
7991       new_lvs = [lv_data, lv_meta]
7992       old_lvs = [child.Copy() for child in dev.children]
7993       iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
7994       excl_stor = _IsExclusiveStorageEnabledNodeName(self.lu.cfg, node_name)
7995
7996       # we pass force_create=True to force the LVM creation
7997       for new_lv in new_lvs:
7998         _CreateBlockDevInner(self.lu, node_name, self.instance, new_lv, True,
7999                              _GetInstanceInfoText(self.instance), False,
8000                              excl_stor)
8001
8002     return iv_names
8003
8004   def _CheckDevices(self, node_name, iv_names):
8005     for name, (dev, _, _) in iv_names.iteritems():
8006       self.cfg.SetDiskID(dev, node_name)
8007
8008       result = _BlockdevFind(self, node_name, dev, self.instance)
8009
8010       msg = result.fail_msg
8011       if msg or not result.payload:
8012         if not msg:
8013           msg = "disk not found"
8014         raise errors.OpExecError("Can't find DRBD device %s: %s" %
8015                                  (name, msg))
8016
8017       if result.payload.is_degraded:
8018         raise errors.OpExecError("DRBD device %s is degraded!" % name)
8019
8020   def _RemoveOldStorage(self, node_name, iv_names):
8021     for name, (_, old_lvs, _) in iv_names.iteritems():
8022       self.lu.LogInfo("Remove logical volumes for %s", name)
8023
8024       for lv in old_lvs:
8025         self.cfg.SetDiskID(lv, node_name)
8026
8027         msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
8028         if msg:
8029           self.lu.LogWarning("Can't remove old LV: %s", msg,
8030                              hint="remove unused LVs manually")
8031
8032   def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
8033     """Replace a disk on the primary or secondary for DRBD 8.
8034
8035     The algorithm for replace is quite complicated:
8036
8037       1. for each disk to be replaced:
8038
8039         1. create new LVs on the target node with unique names
8040         1. detach old LVs from the drbd device
8041         1. rename old LVs to name_replaced.<time_t>
8042         1. rename new LVs to old LVs
8043         1. attach the new LVs (with the old names now) to the drbd device
8044
8045       1. wait for sync across all devices
8046
8047       1. for each modified disk:
8048
8049         1. remove old LVs (which have the name name_replaces.<time_t>)
8050
8051     Failures are not very well handled.
8052
8053     """
8054     steps_total = 6
8055
8056     # Step: check device activation
8057     self.lu.LogStep(1, steps_total, "Check device existence")
8058     self._CheckDisksExistence([self.other_node, self.target_node])
8059     self._CheckVolumeGroup([self.target_node, self.other_node])
8060
8061     # Step: check other node consistency
8062     self.lu.LogStep(2, steps_total, "Check peer consistency")
8063     self._CheckDisksConsistency(self.other_node,
8064                                 self.other_node == self.instance.primary_node,
8065                                 False)
8066
8067     # Step: create new storage
8068     self.lu.LogStep(3, steps_total, "Allocate new storage")
8069     iv_names = self._CreateNewStorage(self.target_node)
8070
8071     # Step: for each lv, detach+rename*2+attach
8072     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
8073     for dev, old_lvs, new_lvs in iv_names.itervalues():
8074       self.lu.LogInfo("Detaching %s drbd from local storage", dev.iv_name)
8075
8076       result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
8077                                                      old_lvs)
8078       result.Raise("Can't detach drbd from local storage on node"
8079                    " %s for device %s" % (self.target_node, dev.iv_name))
8080       #dev.children = []
8081       #cfg.Update(instance)
8082
8083       # ok, we created the new LVs, so now we know we have the needed
8084       # storage; as such, we proceed on the target node to rename
8085       # old_lv to _old, and new_lv to old_lv; note that we rename LVs
8086       # using the assumption that logical_id == physical_id (which in
8087       # turn is the unique_id on that node)
8088
8089       # FIXME(iustin): use a better name for the replaced LVs
8090       temp_suffix = int(time.time())
8091       ren_fn = lambda d, suff: (d.physical_id[0],
8092                                 d.physical_id[1] + "_replaced-%s" % suff)
8093
8094       # Build the rename list based on what LVs exist on the node
8095       rename_old_to_new = []
8096       for to_ren in old_lvs:
8097         result = self.rpc.call_blockdev_find(self.target_node, to_ren)
8098         if not result.fail_msg and result.payload:
8099           # device exists
8100           rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
8101
8102       self.lu.LogInfo("Renaming the old LVs on the target node")
8103       result = self.rpc.call_blockdev_rename(self.target_node,
8104                                              rename_old_to_new)
8105       result.Raise("Can't rename old LVs on node %s" % self.target_node)
8106
8107       # Now we rename the new LVs to the old LVs
8108       self.lu.LogInfo("Renaming the new LVs on the target node")
8109       rename_new_to_old = [(new, old.physical_id)
8110                            for old, new in zip(old_lvs, new_lvs)]
8111       result = self.rpc.call_blockdev_rename(self.target_node,
8112                                              rename_new_to_old)
8113       result.Raise("Can't rename new LVs on node %s" % self.target_node)
8114
8115       # Intermediate steps of in memory modifications
8116       for old, new in zip(old_lvs, new_lvs):
8117         new.logical_id = old.logical_id
8118         self.cfg.SetDiskID(new, self.target_node)
8119
8120       # We need to modify old_lvs so that removal later removes the
8121       # right LVs, not the newly added ones; note that old_lvs is a
8122       # copy here
8123       for disk in old_lvs:
8124         disk.logical_id = ren_fn(disk, temp_suffix)
8125         self.cfg.SetDiskID(disk, self.target_node)
8126
8127       # Now that the new lvs have the old name, we can add them to the device
8128       self.lu.LogInfo("Adding new mirror component on %s", self.target_node)
8129       result = self.rpc.call_blockdev_addchildren(self.target_node,
8130                                                   (dev, self.instance), new_lvs)
8131       msg = result.fail_msg
8132       if msg:
8133         for new_lv in new_lvs:
8134           msg2 = self.rpc.call_blockdev_remove(self.target_node,
8135                                                new_lv).fail_msg
8136           if msg2:
8137             self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
8138                                hint=("cleanup manually the unused logical"
8139                                      "volumes"))
8140         raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
8141
8142     cstep = itertools.count(5)
8143
8144     if self.early_release:
8145       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
8146       self._RemoveOldStorage(self.target_node, iv_names)
8147       # TODO: Check if releasing locks early still makes sense
8148       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
8149     else:
8150       # Release all resource locks except those used by the instance
8151       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
8152                     keep=self.node_secondary_ip.keys())
8153
8154     # Release all node locks while waiting for sync
8155     _ReleaseLocks(self.lu, locking.LEVEL_NODE)
8156
8157     # TODO: Can the instance lock be downgraded here? Take the optional disk
8158     # shutdown in the caller into consideration.
8159
8160     # Wait for sync
8161     # This can fail as the old devices are degraded and _WaitForSync
8162     # does a combined result over all disks, so we don't check its return value
8163     self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
8164     _WaitForSync(self.lu, self.instance)
8165
8166     # Check all devices manually
8167     self._CheckDevices(self.instance.primary_node, iv_names)
8168
8169     # Step: remove old storage
8170     if not self.early_release:
8171       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
8172       self._RemoveOldStorage(self.target_node, iv_names)
8173
8174   def _ExecDrbd8Secondary(self, feedback_fn):
8175     """Replace the secondary node for DRBD 8.
8176
8177     The algorithm for replace is quite complicated:
8178       - for all disks of the instance:
8179         - create new LVs on the new node with same names
8180         - shutdown the drbd device on the old secondary
8181         - disconnect the drbd network on the primary
8182         - create the drbd device on the new secondary
8183         - network attach the drbd on the primary, using an artifice:
8184           the drbd code for Attach() will connect to the network if it
8185           finds a device which is connected to the good local disks but
8186           not network enabled
8187       - wait for sync across all devices
8188       - remove all disks from the old secondary
8189
8190     Failures are not very well handled.
8191
8192     """
8193     steps_total = 6
8194
8195     pnode = self.instance.primary_node
8196
8197     # Step: check device activation
8198     self.lu.LogStep(1, steps_total, "Check device existence")
8199     self._CheckDisksExistence([self.instance.primary_node])
8200     self._CheckVolumeGroup([self.instance.primary_node])
8201
8202     # Step: check other node consistency
8203     self.lu.LogStep(2, steps_total, "Check peer consistency")
8204     self._CheckDisksConsistency(self.instance.primary_node, True, True)
8205
8206     # Step: create new storage
8207     self.lu.LogStep(3, steps_total, "Allocate new storage")
8208     disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
8209     excl_stor = _IsExclusiveStorageEnabledNodeName(self.lu.cfg, self.new_node)
8210     for idx, dev in enumerate(disks):
8211       self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
8212                       (self.new_node, idx))
8213       # we pass force_create=True to force LVM creation
8214       for new_lv in dev.children:
8215         _CreateBlockDevInner(self.lu, self.new_node, self.instance, new_lv,
8216                              True, _GetInstanceInfoText(self.instance), False,
8217                              excl_stor)
8218
8219     # Step 4: dbrd minors and drbd setups changes
8220     # after this, we must manually remove the drbd minors on both the
8221     # error and the success paths
8222     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
8223     minors = self.cfg.AllocateDRBDMinor([self.new_node
8224                                          for dev in self.instance.disks],
8225                                         self.instance.name)
8226     logging.debug("Allocated minors %r", minors)
8227
8228     iv_names = {}
8229     for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
8230       self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
8231                       (self.new_node, idx))
8232       # create new devices on new_node; note that we create two IDs:
8233       # one without port, so the drbd will be activated without
8234       # networking information on the new node at this stage, and one
8235       # with network, for the latter activation in step 4
8236       (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
8237       if self.instance.primary_node == o_node1:
8238         p_minor = o_minor1
8239       else:
8240         assert self.instance.primary_node == o_node2, "Three-node instance?"
8241         p_minor = o_minor2
8242
8243       new_alone_id = (self.instance.primary_node, self.new_node, None,
8244                       p_minor, new_minor, o_secret)
8245       new_net_id = (self.instance.primary_node, self.new_node, o_port,
8246                     p_minor, new_minor, o_secret)
8247
8248       iv_names[idx] = (dev, dev.children, new_net_id)
8249       logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
8250                     new_net_id)
8251       new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
8252                               logical_id=new_alone_id,
8253                               children=dev.children,
8254                               size=dev.size,
8255                               params={})
8256       (anno_new_drbd,) = _AnnotateDiskParams(self.instance, [new_drbd],
8257                                              self.cfg)
8258       try:
8259         _CreateSingleBlockDev(self.lu, self.new_node, self.instance,
8260                               anno_new_drbd,
8261                               _GetInstanceInfoText(self.instance), False,
8262                               excl_stor)
8263       except errors.GenericError:
8264         self.cfg.ReleaseDRBDMinors(self.instance.name)
8265         raise
8266
8267     # We have new devices, shutdown the drbd on the old secondary
8268     for idx, dev in enumerate(self.instance.disks):
8269       self.lu.LogInfo("Shutting down drbd for disk/%d on old node", idx)
8270       self.cfg.SetDiskID(dev, self.target_node)
8271       msg = self.rpc.call_blockdev_shutdown(self.target_node,
8272                                             (dev, self.instance)).fail_msg
8273       if msg:
8274         self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
8275                            "node: %s" % (idx, msg),
8276                            hint=("Please cleanup this device manually as"
8277                                  " soon as possible"))
8278
8279     self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
8280     result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
8281                                                self.instance.disks)[pnode]
8282
8283     msg = result.fail_msg
8284     if msg:
8285       # detaches didn't succeed (unlikely)
8286       self.cfg.ReleaseDRBDMinors(self.instance.name)
8287       raise errors.OpExecError("Can't detach the disks from the network on"
8288                                " old node: %s" % (msg,))
8289
8290     # if we managed to detach at least one, we update all the disks of
8291     # the instance to point to the new secondary
8292     self.lu.LogInfo("Updating instance configuration")
8293     for dev, _, new_logical_id in iv_names.itervalues():
8294       dev.logical_id = new_logical_id
8295       self.cfg.SetDiskID(dev, self.instance.primary_node)
8296
8297     self.cfg.Update(self.instance, feedback_fn)
8298
8299     # Release all node locks (the configuration has been updated)
8300     _ReleaseLocks(self.lu, locking.LEVEL_NODE)
8301
8302     # and now perform the drbd attach
8303     self.lu.LogInfo("Attaching primary drbds to new secondary"
8304                     " (standalone => connected)")
8305     result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
8306                                             self.new_node],
8307                                            self.node_secondary_ip,
8308                                            (self.instance.disks, self.instance),
8309                                            self.instance.name,
8310                                            False)
8311     for to_node, to_result in result.items():
8312       msg = to_result.fail_msg
8313       if msg:
8314         self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
8315                            to_node, msg,
8316                            hint=("please do a gnt-instance info to see the"
8317                                  " status of disks"))
8318
8319     cstep = itertools.count(5)
8320
8321     if self.early_release:
8322       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
8323       self._RemoveOldStorage(self.target_node, iv_names)
8324       # TODO: Check if releasing locks early still makes sense
8325       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
8326     else:
8327       # Release all resource locks except those used by the instance
8328       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
8329                     keep=self.node_secondary_ip.keys())
8330
8331     # TODO: Can the instance lock be downgraded here? Take the optional disk
8332     # shutdown in the caller into consideration.
8333
8334     # Wait for sync
8335     # This can fail as the old devices are degraded and _WaitForSync
8336     # does a combined result over all disks, so we don't check its return value
8337     self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
8338     _WaitForSync(self.lu, self.instance)
8339
8340     # Check all devices manually
8341     self._CheckDevices(self.instance.primary_node, iv_names)
8342
8343     # Step: remove old storage
8344     if not self.early_release:
8345       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
8346       self._RemoveOldStorage(self.target_node, iv_names)
8347
8348
8349 class LURepairNodeStorage(NoHooksLU):
8350   """Repairs the volume group on a node.
8351
8352   """
8353   REQ_BGL = False
8354
8355   def CheckArguments(self):
8356     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
8357
8358     storage_type = self.op.storage_type
8359
8360     if (constants.SO_FIX_CONSISTENCY not in
8361         constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
8362       raise errors.OpPrereqError("Storage units of type '%s' can not be"
8363                                  " repaired" % storage_type,
8364                                  errors.ECODE_INVAL)
8365
8366   def ExpandNames(self):
8367     self.needed_locks = {
8368       locking.LEVEL_NODE: [self.op.node_name],
8369       }
8370
8371   def _CheckFaultyDisks(self, instance, node_name):
8372     """Ensure faulty disks abort the opcode or at least warn."""
8373     try:
8374       if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
8375                                   node_name, True):
8376         raise errors.OpPrereqError("Instance '%s' has faulty disks on"
8377                                    " node '%s'" % (instance.name, node_name),
8378                                    errors.ECODE_STATE)
8379     except errors.OpPrereqError, err:
8380       if self.op.ignore_consistency:
8381         self.LogWarning(str(err.args[0]))
8382       else:
8383         raise
8384
8385   def CheckPrereq(self):
8386     """Check prerequisites.
8387
8388     """
8389     # Check whether any instance on this node has faulty disks
8390     for inst in _GetNodeInstances(self.cfg, self.op.node_name):
8391       if inst.admin_state != constants.ADMINST_UP:
8392         continue
8393       check_nodes = set(inst.all_nodes)
8394       check_nodes.discard(self.op.node_name)
8395       for inst_node_name in check_nodes:
8396         self._CheckFaultyDisks(inst, inst_node_name)
8397
8398   def Exec(self, feedback_fn):
8399     feedback_fn("Repairing storage unit '%s' on %s ..." %
8400                 (self.op.name, self.op.node_name))
8401
8402     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
8403     result = self.rpc.call_storage_execute(self.op.node_name,
8404                                            self.op.storage_type, st_args,
8405                                            self.op.name,
8406                                            constants.SO_FIX_CONSISTENCY)
8407     result.Raise("Failed to repair storage unit '%s' on %s" %
8408                  (self.op.name, self.op.node_name))
8409
8410
8411 class LUNodeEvacuate(NoHooksLU):
8412   """Evacuates instances off a list of nodes.
8413
8414   """
8415   REQ_BGL = False
8416
8417   _MODE2IALLOCATOR = {
8418     constants.NODE_EVAC_PRI: constants.IALLOCATOR_NEVAC_PRI,
8419     constants.NODE_EVAC_SEC: constants.IALLOCATOR_NEVAC_SEC,
8420     constants.NODE_EVAC_ALL: constants.IALLOCATOR_NEVAC_ALL,
8421     }
8422   assert frozenset(_MODE2IALLOCATOR.keys()) == constants.NODE_EVAC_MODES
8423   assert (frozenset(_MODE2IALLOCATOR.values()) ==
8424           constants.IALLOCATOR_NEVAC_MODES)
8425
8426   def CheckArguments(self):
8427     _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
8428
8429   def ExpandNames(self):
8430     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
8431
8432     if self.op.remote_node is not None:
8433       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8434       assert self.op.remote_node
8435
8436       if self.op.remote_node == self.op.node_name:
8437         raise errors.OpPrereqError("Can not use evacuated node as a new"
8438                                    " secondary node", errors.ECODE_INVAL)
8439
8440       if self.op.mode != constants.NODE_EVAC_SEC:
8441         raise errors.OpPrereqError("Without the use of an iallocator only"
8442                                    " secondary instances can be evacuated",
8443                                    errors.ECODE_INVAL)
8444
8445     # Declare locks
8446     self.share_locks = _ShareAll()
8447     self.needed_locks = {
8448       locking.LEVEL_INSTANCE: [],
8449       locking.LEVEL_NODEGROUP: [],
8450       locking.LEVEL_NODE: [],
8451       }
8452
8453     # Determine nodes (via group) optimistically, needs verification once locks
8454     # have been acquired
8455     self.lock_nodes = self._DetermineNodes()
8456
8457   def _DetermineNodes(self):
8458     """Gets the list of nodes to operate on.
8459
8460     """
8461     if self.op.remote_node is None:
8462       # Iallocator will choose any node(s) in the same group
8463       group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
8464     else:
8465       group_nodes = frozenset([self.op.remote_node])
8466
8467     # Determine nodes to be locked
8468     return set([self.op.node_name]) | group_nodes
8469
8470   def _DetermineInstances(self):
8471     """Builds list of instances to operate on.
8472
8473     """
8474     assert self.op.mode in constants.NODE_EVAC_MODES
8475
8476     if self.op.mode == constants.NODE_EVAC_PRI:
8477       # Primary instances only
8478       inst_fn = _GetNodePrimaryInstances
8479       assert self.op.remote_node is None, \
8480         "Evacuating primary instances requires iallocator"
8481     elif self.op.mode == constants.NODE_EVAC_SEC:
8482       # Secondary instances only
8483       inst_fn = _GetNodeSecondaryInstances
8484     else:
8485       # All instances
8486       assert self.op.mode == constants.NODE_EVAC_ALL
8487       inst_fn = _GetNodeInstances
8488       # TODO: In 2.6, change the iallocator interface to take an evacuation mode
8489       # per instance
8490       raise errors.OpPrereqError("Due to an issue with the iallocator"
8491                                  " interface it is not possible to evacuate"
8492                                  " all instances at once; specify explicitly"
8493                                  " whether to evacuate primary or secondary"
8494                                  " instances",
8495                                  errors.ECODE_INVAL)
8496
8497     return inst_fn(self.cfg, self.op.node_name)
8498
8499   def DeclareLocks(self, level):
8500     if level == locking.LEVEL_INSTANCE:
8501       # Lock instances optimistically, needs verification once node and group
8502       # locks have been acquired
8503       self.needed_locks[locking.LEVEL_INSTANCE] = \
8504         set(i.name for i in self._DetermineInstances())
8505
8506     elif level == locking.LEVEL_NODEGROUP:
8507       # Lock node groups for all potential target nodes optimistically, needs
8508       # verification once nodes have been acquired
8509       self.needed_locks[locking.LEVEL_NODEGROUP] = \
8510         self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
8511
8512     elif level == locking.LEVEL_NODE:
8513       self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
8514
8515   def CheckPrereq(self):
8516     # Verify locks
8517     owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
8518     owned_nodes = self.owned_locks(locking.LEVEL_NODE)
8519     owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
8520
8521     need_nodes = self._DetermineNodes()
8522
8523     if not owned_nodes.issuperset(need_nodes):
8524       raise errors.OpPrereqError("Nodes in same group as '%s' changed since"
8525                                  " locks were acquired, current nodes are"
8526                                  " are '%s', used to be '%s'; retry the"
8527                                  " operation" %
8528                                  (self.op.node_name,
8529                                   utils.CommaJoin(need_nodes),
8530                                   utils.CommaJoin(owned_nodes)),
8531                                  errors.ECODE_STATE)
8532
8533     wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
8534     if owned_groups != wanted_groups:
8535       raise errors.OpExecError("Node groups changed since locks were acquired,"
8536                                " current groups are '%s', used to be '%s';"
8537                                " retry the operation" %
8538                                (utils.CommaJoin(wanted_groups),
8539                                 utils.CommaJoin(owned_groups)))
8540
8541     # Determine affected instances
8542     self.instances = self._DetermineInstances()
8543     self.instance_names = [i.name for i in self.instances]
8544
8545     if set(self.instance_names) != owned_instances:
8546       raise errors.OpExecError("Instances on node '%s' changed since locks"
8547                                " were acquired, current instances are '%s',"
8548                                " used to be '%s'; retry the operation" %
8549                                (self.op.node_name,
8550                                 utils.CommaJoin(self.instance_names),
8551                                 utils.CommaJoin(owned_instances)))
8552
8553     if self.instance_names:
8554       self.LogInfo("Evacuating instances from node '%s': %s",
8555                    self.op.node_name,
8556                    utils.CommaJoin(utils.NiceSort(self.instance_names)))
8557     else:
8558       self.LogInfo("No instances to evacuate from node '%s'",
8559                    self.op.node_name)
8560
8561     if self.op.remote_node is not None:
8562       for i in self.instances:
8563         if i.primary_node == self.op.remote_node:
8564           raise errors.OpPrereqError("Node %s is the primary node of"
8565                                      " instance %s, cannot use it as"
8566                                      " secondary" %
8567                                      (self.op.remote_node, i.name),
8568                                      errors.ECODE_INVAL)
8569
8570   def Exec(self, feedback_fn):
8571     assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
8572
8573     if not self.instance_names:
8574       # No instances to evacuate
8575       jobs = []
8576
8577     elif self.op.iallocator is not None:
8578       # TODO: Implement relocation to other group
8579       evac_mode = self._MODE2IALLOCATOR[self.op.mode]
8580       req = iallocator.IAReqNodeEvac(evac_mode=evac_mode,
8581                                      instances=list(self.instance_names))
8582       ial = iallocator.IAllocator(self.cfg, self.rpc, req)
8583
8584       ial.Run(self.op.iallocator)
8585
8586       if not ial.success:
8587         raise errors.OpPrereqError("Can't compute node evacuation using"
8588                                    " iallocator '%s': %s" %
8589                                    (self.op.iallocator, ial.info),
8590                                    errors.ECODE_NORES)
8591
8592       jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
8593
8594     elif self.op.remote_node is not None:
8595       assert self.op.mode == constants.NODE_EVAC_SEC
8596       jobs = [
8597         [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
8598                                         remote_node=self.op.remote_node,
8599                                         disks=[],
8600                                         mode=constants.REPLACE_DISK_CHG,
8601                                         early_release=self.op.early_release)]
8602         for instance_name in self.instance_names]
8603
8604     else:
8605       raise errors.ProgrammerError("No iallocator or remote node")
8606
8607     return ResultWithJobs(jobs)
8608
8609
8610 def _SetOpEarlyRelease(early_release, op):
8611   """Sets C{early_release} flag on opcodes if available.
8612
8613   """
8614   try:
8615     op.early_release = early_release
8616   except AttributeError:
8617     assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
8618
8619   return op
8620
8621
8622 def _NodeEvacDest(use_nodes, group, nodes):
8623   """Returns group or nodes depending on caller's choice.
8624
8625   """
8626   if use_nodes:
8627     return utils.CommaJoin(nodes)
8628   else:
8629     return group
8630
8631
8632 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
8633   """Unpacks the result of change-group and node-evacuate iallocator requests.
8634
8635   Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
8636   L{constants.IALLOCATOR_MODE_CHG_GROUP}.
8637
8638   @type lu: L{LogicalUnit}
8639   @param lu: Logical unit instance
8640   @type alloc_result: tuple/list
8641   @param alloc_result: Result from iallocator
8642   @type early_release: bool
8643   @param early_release: Whether to release locks early if possible
8644   @type use_nodes: bool
8645   @param use_nodes: Whether to display node names instead of groups
8646
8647   """
8648   (moved, failed, jobs) = alloc_result
8649
8650   if failed:
8651     failreason = utils.CommaJoin("%s (%s)" % (name, reason)
8652                                  for (name, reason) in failed)
8653     lu.LogWarning("Unable to evacuate instances %s", failreason)
8654     raise errors.OpExecError("Unable to evacuate instances %s" % failreason)
8655
8656   if moved:
8657     lu.LogInfo("Instances to be moved: %s",
8658                utils.CommaJoin("%s (to %s)" %
8659                                (name, _NodeEvacDest(use_nodes, group, nodes))
8660                                for (name, group, nodes) in moved))
8661
8662   return [map(compat.partial(_SetOpEarlyRelease, early_release),
8663               map(opcodes.OpCode.LoadOpCode, ops))
8664           for ops in jobs]
8665
8666
8667 def _DiskSizeInBytesToMebibytes(lu, size):
8668   """Converts a disk size in bytes to mebibytes.
8669
8670   Warns and rounds up if the size isn't an even multiple of 1 MiB.
8671
8672   """
8673   (mib, remainder) = divmod(size, 1024 * 1024)
8674
8675   if remainder != 0:
8676     lu.LogWarning("Disk size is not an even multiple of 1 MiB; rounding up"
8677                   " to not overwrite existing data (%s bytes will not be"
8678                   " wiped)", (1024 * 1024) - remainder)
8679     mib += 1
8680
8681   return mib
8682
8683
8684 class LUInstanceGrowDisk(LogicalUnit):
8685   """Grow a disk of an instance.
8686
8687   """
8688   HPATH = "disk-grow"
8689   HTYPE = constants.HTYPE_INSTANCE
8690   REQ_BGL = False
8691
8692   def ExpandNames(self):
8693     self._ExpandAndLockInstance()
8694     self.needed_locks[locking.LEVEL_NODE] = []
8695     self.needed_locks[locking.LEVEL_NODE_RES] = []
8696     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8697     self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
8698
8699   def DeclareLocks(self, level):
8700     if level == locking.LEVEL_NODE:
8701       self._LockInstancesNodes()
8702     elif level == locking.LEVEL_NODE_RES:
8703       # Copy node locks
8704       self.needed_locks[locking.LEVEL_NODE_RES] = \
8705         _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
8706
8707   def BuildHooksEnv(self):
8708     """Build hooks env.
8709
8710     This runs on the master, the primary and all the secondaries.
8711
8712     """
8713     env = {
8714       "DISK": self.op.disk,
8715       "AMOUNT": self.op.amount,
8716       "ABSOLUTE": self.op.absolute,
8717       }
8718     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
8719     return env
8720
8721   def BuildHooksNodes(self):
8722     """Build hooks nodes.
8723
8724     """
8725     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8726     return (nl, nl)
8727
8728   def CheckPrereq(self):
8729     """Check prerequisites.
8730
8731     This checks that the instance is in the cluster.
8732
8733     """
8734     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8735     assert instance is not None, \
8736       "Cannot retrieve locked instance %s" % self.op.instance_name
8737     nodenames = list(instance.all_nodes)
8738     for node in nodenames:
8739       _CheckNodeOnline(self, node)
8740
8741     self.instance = instance
8742
8743     if instance.disk_template not in constants.DTS_GROWABLE:
8744       raise errors.OpPrereqError("Instance's disk layout does not support"
8745                                  " growing", errors.ECODE_INVAL)
8746
8747     self.disk = instance.FindDisk(self.op.disk)
8748
8749     if self.op.absolute:
8750       self.target = self.op.amount
8751       self.delta = self.target - self.disk.size
8752       if self.delta < 0:
8753         raise errors.OpPrereqError("Requested size (%s) is smaller than "
8754                                    "current disk size (%s)" %
8755                                    (utils.FormatUnit(self.target, "h"),
8756                                     utils.FormatUnit(self.disk.size, "h")),
8757                                    errors.ECODE_STATE)
8758     else:
8759       self.delta = self.op.amount
8760       self.target = self.disk.size + self.delta
8761       if self.delta < 0:
8762         raise errors.OpPrereqError("Requested increment (%s) is negative" %
8763                                    utils.FormatUnit(self.delta, "h"),
8764                                    errors.ECODE_INVAL)
8765
8766     self._CheckDiskSpace(nodenames, self.disk.ComputeGrowth(self.delta))
8767
8768   def _CheckDiskSpace(self, nodenames, req_vgspace):
8769     template = self.instance.disk_template
8770     if template not in (constants.DTS_NO_FREE_SPACE_CHECK):
8771       # TODO: check the free disk space for file, when that feature will be
8772       # supported
8773       nodes = map(self.cfg.GetNodeInfo, nodenames)
8774       es_nodes = filter(lambda n: _IsExclusiveStorageEnabledNode(self.cfg, n),
8775                         nodes)
8776       if es_nodes:
8777         # With exclusive storage we need to something smarter than just looking
8778         # at free space; for now, let's simply abort the operation.
8779         raise errors.OpPrereqError("Cannot grow disks when exclusive_storage"
8780                                    " is enabled", errors.ECODE_STATE)
8781       _CheckNodesFreeDiskPerVG(self, nodenames, req_vgspace)
8782
8783   def Exec(self, feedback_fn):
8784     """Execute disk grow.
8785
8786     """
8787     instance = self.instance
8788     disk = self.disk
8789
8790     assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
8791     assert (self.owned_locks(locking.LEVEL_NODE) ==
8792             self.owned_locks(locking.LEVEL_NODE_RES))
8793
8794     wipe_disks = self.cfg.GetClusterInfo().prealloc_wipe_disks
8795
8796     disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
8797     if not disks_ok:
8798       raise errors.OpExecError("Cannot activate block device to grow")
8799
8800     feedback_fn("Growing disk %s of instance '%s' by %s to %s" %
8801                 (self.op.disk, instance.name,
8802                  utils.FormatUnit(self.delta, "h"),
8803                  utils.FormatUnit(self.target, "h")))
8804
8805     # First run all grow ops in dry-run mode
8806     for node in instance.all_nodes:
8807       self.cfg.SetDiskID(disk, node)
8808       result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
8809                                            True, True)
8810       result.Raise("Dry-run grow request failed to node %s" % node)
8811
8812     if wipe_disks:
8813       # Get disk size from primary node for wiping
8814       result = self.rpc.call_blockdev_getsize(instance.primary_node, [disk])
8815       result.Raise("Failed to retrieve disk size from node '%s'" %
8816                    instance.primary_node)
8817
8818       (disk_size_in_bytes, ) = result.payload
8819
8820       if disk_size_in_bytes is None:
8821         raise errors.OpExecError("Failed to retrieve disk size from primary"
8822                                  " node '%s'" % instance.primary_node)
8823
8824       old_disk_size = _DiskSizeInBytesToMebibytes(self, disk_size_in_bytes)
8825
8826       assert old_disk_size >= disk.size, \
8827         ("Retrieved disk size too small (got %s, should be at least %s)" %
8828          (old_disk_size, disk.size))
8829     else:
8830       old_disk_size = None
8831
8832     # We know that (as far as we can test) operations across different
8833     # nodes will succeed, time to run it for real on the backing storage
8834     for node in instance.all_nodes:
8835       self.cfg.SetDiskID(disk, node)
8836       result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
8837                                            False, True)
8838       result.Raise("Grow request failed to node %s" % node)
8839
8840     # And now execute it for logical storage, on the primary node
8841     node = instance.primary_node
8842     self.cfg.SetDiskID(disk, node)
8843     result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
8844                                          False, False)
8845     result.Raise("Grow request failed to node %s" % node)
8846
8847     disk.RecordGrow(self.delta)
8848     self.cfg.Update(instance, feedback_fn)
8849
8850     # Changes have been recorded, release node lock
8851     _ReleaseLocks(self, locking.LEVEL_NODE)
8852
8853     # Downgrade lock while waiting for sync
8854     self.glm.downgrade(locking.LEVEL_INSTANCE)
8855
8856     assert wipe_disks ^ (old_disk_size is None)
8857
8858     if wipe_disks:
8859       assert instance.disks[self.op.disk] == disk
8860
8861       # Wipe newly added disk space
8862       _WipeDisks(self, instance,
8863                  disks=[(self.op.disk, disk, old_disk_size)])
8864
8865     if self.op.wait_for_sync:
8866       disk_abort = not _WaitForSync(self, instance, disks=[disk])
8867       if disk_abort:
8868         self.LogWarning("Disk syncing has not returned a good status; check"
8869                         " the instance")
8870       if instance.admin_state != constants.ADMINST_UP:
8871         _SafeShutdownInstanceDisks(self, instance, disks=[disk])
8872     elif instance.admin_state != constants.ADMINST_UP:
8873       self.LogWarning("Not shutting down the disk even if the instance is"
8874                       " not supposed to be running because no wait for"
8875                       " sync mode was requested")
8876
8877     assert self.owned_locks(locking.LEVEL_NODE_RES)
8878     assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
8879
8880
8881 class LUInstanceQueryData(NoHooksLU):
8882   """Query runtime instance data.
8883
8884   """
8885   REQ_BGL = False
8886
8887   def ExpandNames(self):
8888     self.needed_locks = {}
8889
8890     # Use locking if requested or when non-static information is wanted
8891     if not (self.op.static or self.op.use_locking):
8892       self.LogWarning("Non-static data requested, locks need to be acquired")
8893       self.op.use_locking = True
8894
8895     if self.op.instances or not self.op.use_locking:
8896       # Expand instance names right here
8897       self.wanted_names = _GetWantedInstances(self, self.op.instances)
8898     else:
8899       # Will use acquired locks
8900       self.wanted_names = None
8901
8902     if self.op.use_locking:
8903       self.share_locks = _ShareAll()
8904
8905       if self.wanted_names is None:
8906         self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
8907       else:
8908         self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
8909
8910       self.needed_locks[locking.LEVEL_NODEGROUP] = []
8911       self.needed_locks[locking.LEVEL_NODE] = []
8912       self.needed_locks[locking.LEVEL_NETWORK] = []
8913       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8914
8915   def DeclareLocks(self, level):
8916     if self.op.use_locking:
8917       owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
8918       if level == locking.LEVEL_NODEGROUP:
8919
8920         # Lock all groups used by instances optimistically; this requires going
8921         # via the node before it's locked, requiring verification later on
8922         self.needed_locks[locking.LEVEL_NODEGROUP] = \
8923           frozenset(group_uuid
8924                     for instance_name in owned_instances
8925                     for group_uuid in
8926                       self.cfg.GetInstanceNodeGroups(instance_name))
8927
8928       elif level == locking.LEVEL_NODE:
8929         self._LockInstancesNodes()
8930
8931       elif level == locking.LEVEL_NETWORK:
8932         self.needed_locks[locking.LEVEL_NETWORK] = \
8933           frozenset(net_uuid
8934                     for instance_name in owned_instances
8935                     for net_uuid in
8936                        self.cfg.GetInstanceNetworks(instance_name))
8937
8938   def CheckPrereq(self):
8939     """Check prerequisites.
8940
8941     This only checks the optional instance list against the existing names.
8942
8943     """
8944     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
8945     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
8946     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
8947     owned_networks = frozenset(self.owned_locks(locking.LEVEL_NETWORK))
8948
8949     if self.wanted_names is None:
8950       assert self.op.use_locking, "Locking was not used"
8951       self.wanted_names = owned_instances
8952
8953     instances = dict(self.cfg.GetMultiInstanceInfo(self.wanted_names))
8954
8955     if self.op.use_locking:
8956       _CheckInstancesNodeGroups(self.cfg, instances, owned_groups, owned_nodes,
8957                                 None)
8958     else:
8959       assert not (owned_instances or owned_groups or
8960                   owned_nodes or owned_networks)
8961
8962     self.wanted_instances = instances.values()
8963
8964   def _ComputeBlockdevStatus(self, node, instance, dev):
8965     """Returns the status of a block device
8966
8967     """
8968     if self.op.static or not node:
8969       return None
8970
8971     self.cfg.SetDiskID(dev, node)
8972
8973     result = self.rpc.call_blockdev_find(node, dev)
8974     if result.offline:
8975       return None
8976
8977     result.Raise("Can't compute disk status for %s" % instance.name)
8978
8979     status = result.payload
8980     if status is None:
8981       return None
8982
8983     return (status.dev_path, status.major, status.minor,
8984             status.sync_percent, status.estimated_time,
8985             status.is_degraded, status.ldisk_status)
8986
8987   def _ComputeDiskStatus(self, instance, snode, dev):
8988     """Compute block device status.
8989
8990     """
8991     (anno_dev,) = _AnnotateDiskParams(instance, [dev], self.cfg)
8992
8993     return self._ComputeDiskStatusInner(instance, snode, anno_dev)
8994
8995   def _ComputeDiskStatusInner(self, instance, snode, dev):
8996     """Compute block device status.
8997
8998     @attention: The device has to be annotated already.
8999
9000     """
9001     if dev.dev_type in constants.LDS_DRBD:
9002       # we change the snode then (otherwise we use the one passed in)
9003       if dev.logical_id[0] == instance.primary_node:
9004         snode = dev.logical_id[1]
9005       else:
9006         snode = dev.logical_id[0]
9007
9008     dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
9009                                               instance, dev)
9010     dev_sstatus = self._ComputeBlockdevStatus(snode, instance, dev)
9011
9012     if dev.children:
9013       dev_children = map(compat.partial(self._ComputeDiskStatusInner,
9014                                         instance, snode),
9015                          dev.children)
9016     else:
9017       dev_children = []
9018
9019     return {
9020       "iv_name": dev.iv_name,
9021       "dev_type": dev.dev_type,
9022       "logical_id": dev.logical_id,
9023       "physical_id": dev.physical_id,
9024       "pstatus": dev_pstatus,
9025       "sstatus": dev_sstatus,
9026       "children": dev_children,
9027       "mode": dev.mode,
9028       "size": dev.size,
9029       "name": dev.name,
9030       "uuid": dev.uuid,
9031       }
9032
9033   def Exec(self, feedback_fn):
9034     """Gather and return data"""
9035     result = {}
9036
9037     cluster = self.cfg.GetClusterInfo()
9038
9039     node_names = itertools.chain(*(i.all_nodes for i in self.wanted_instances))
9040     nodes = dict(self.cfg.GetMultiNodeInfo(node_names))
9041
9042     groups = dict(self.cfg.GetMultiNodeGroupInfo(node.group
9043                                                  for node in nodes.values()))
9044
9045     group2name_fn = lambda uuid: groups[uuid].name
9046     for instance in self.wanted_instances:
9047       pnode = nodes[instance.primary_node]
9048
9049       if self.op.static or pnode.offline:
9050         remote_state = None
9051         if pnode.offline:
9052           self.LogWarning("Primary node %s is marked offline, returning static"
9053                           " information only for instance %s" %
9054                           (pnode.name, instance.name))
9055       else:
9056         remote_info = self.rpc.call_instance_info(instance.primary_node,
9057                                                   instance.name,
9058                                                   instance.hypervisor)
9059         remote_info.Raise("Error checking node %s" % instance.primary_node)
9060         remote_info = remote_info.payload
9061         if remote_info and "state" in remote_info:
9062           remote_state = "up"
9063         else:
9064           if instance.admin_state == constants.ADMINST_UP:
9065             remote_state = "down"
9066           else:
9067             remote_state = instance.admin_state
9068
9069       disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
9070                   instance.disks)
9071
9072       snodes_group_uuids = [nodes[snode_name].group
9073                             for snode_name in instance.secondary_nodes]
9074
9075       result[instance.name] = {
9076         "name": instance.name,
9077         "config_state": instance.admin_state,
9078         "run_state": remote_state,
9079         "pnode": instance.primary_node,
9080         "pnode_group_uuid": pnode.group,
9081         "pnode_group_name": group2name_fn(pnode.group),
9082         "snodes": instance.secondary_nodes,
9083         "snodes_group_uuids": snodes_group_uuids,
9084         "snodes_group_names": map(group2name_fn, snodes_group_uuids),
9085         "os": instance.os,
9086         # this happens to be the same format used for hooks
9087         "nics": _NICListToTuple(self, instance.nics),
9088         "disk_template": instance.disk_template,
9089         "disks": disks,
9090         "hypervisor": instance.hypervisor,
9091         "network_port": instance.network_port,
9092         "hv_instance": instance.hvparams,
9093         "hv_actual": cluster.FillHV(instance, skip_globals=True),
9094         "be_instance": instance.beparams,
9095         "be_actual": cluster.FillBE(instance),
9096         "os_instance": instance.osparams,
9097         "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
9098         "serial_no": instance.serial_no,
9099         "mtime": instance.mtime,
9100         "ctime": instance.ctime,
9101         "uuid": instance.uuid,
9102         }
9103
9104     return result
9105
9106
9107 def PrepareContainerMods(mods, private_fn):
9108   """Prepares a list of container modifications by adding a private data field.
9109
9110   @type mods: list of tuples; (operation, index, parameters)
9111   @param mods: List of modifications
9112   @type private_fn: callable or None
9113   @param private_fn: Callable for constructing a private data field for a
9114     modification
9115   @rtype: list
9116
9117   """
9118   if private_fn is None:
9119     fn = lambda: None
9120   else:
9121     fn = private_fn
9122
9123   return [(op, idx, params, fn()) for (op, idx, params) in mods]
9124
9125
9126 def GetItemFromContainer(identifier, kind, container):
9127   """Return the item refered by the identifier.
9128
9129   @type identifier: string
9130   @param identifier: Item index or name or UUID
9131   @type kind: string
9132   @param kind: One-word item description
9133   @type container: list
9134   @param container: Container to get the item from
9135
9136   """
9137   # Index
9138   try:
9139     idx = int(identifier)
9140     if idx == -1:
9141       # Append
9142       absidx = len(container) - 1
9143     elif idx < 0:
9144       raise IndexError("Not accepting negative indices other than -1")
9145     elif idx > len(container):
9146       raise IndexError("Got %s index %s, but there are only %s" %
9147                        (kind, idx, len(container)))
9148     else:
9149       absidx = idx
9150     return (absidx, container[idx])
9151   except ValueError:
9152     pass
9153
9154   for idx, item in enumerate(container):
9155     if item.uuid == identifier or item.name == identifier:
9156       return (idx, item)
9157
9158   raise errors.OpPrereqError("Cannot find %s with identifier %s" %
9159                              (kind, identifier), errors.ECODE_NOENT)
9160
9161
9162 #: Type description for changes as returned by L{ApplyContainerMods}'s
9163 #: callbacks
9164 _TApplyContModsCbChanges = \
9165   ht.TMaybeListOf(ht.TAnd(ht.TIsLength(2), ht.TItems([
9166     ht.TNonEmptyString,
9167     ht.TAny,
9168     ])))
9169
9170
9171 def ApplyContainerMods(kind, container, chgdesc, mods,
9172                        create_fn, modify_fn, remove_fn):
9173   """Applies descriptions in C{mods} to C{container}.
9174
9175   @type kind: string
9176   @param kind: One-word item description
9177   @type container: list
9178   @param container: Container to modify
9179   @type chgdesc: None or list
9180   @param chgdesc: List of applied changes
9181   @type mods: list
9182   @param mods: Modifications as returned by L{PrepareContainerMods}
9183   @type create_fn: callable
9184   @param create_fn: Callback for creating a new item (L{constants.DDM_ADD});
9185     receives absolute item index, parameters and private data object as added
9186     by L{PrepareContainerMods}, returns tuple containing new item and changes
9187     as list
9188   @type modify_fn: callable
9189   @param modify_fn: Callback for modifying an existing item
9190     (L{constants.DDM_MODIFY}); receives absolute item index, item, parameters
9191     and private data object as added by L{PrepareContainerMods}, returns
9192     changes as list
9193   @type remove_fn: callable
9194   @param remove_fn: Callback on removing item; receives absolute item index,
9195     item and private data object as added by L{PrepareContainerMods}
9196
9197   """
9198   for (op, identifier, params, private) in mods:
9199     changes = None
9200
9201     if op == constants.DDM_ADD:
9202       # Calculate where item will be added
9203       # When adding an item, identifier can only be an index
9204       try:
9205         idx = int(identifier)
9206       except ValueError:
9207         raise errors.OpPrereqError("Only possitive integer or -1 is accepted as"
9208                                    " identifier for %s" % constants.DDM_ADD,
9209                                    errors.ECODE_INVAL)
9210       if idx == -1:
9211         addidx = len(container)
9212       else:
9213         if idx < 0:
9214           raise IndexError("Not accepting negative indices other than -1")
9215         elif idx > len(container):
9216           raise IndexError("Got %s index %s, but there are only %s" %
9217                            (kind, idx, len(container)))
9218         addidx = idx
9219
9220       if create_fn is None:
9221         item = params
9222       else:
9223         (item, changes) = create_fn(addidx, params, private)
9224
9225       if idx == -1:
9226         container.append(item)
9227       else:
9228         assert idx >= 0
9229         assert idx <= len(container)
9230         # list.insert does so before the specified index
9231         container.insert(idx, item)
9232     else:
9233       # Retrieve existing item
9234       (absidx, item) = GetItemFromContainer(identifier, kind, container)
9235
9236       if op == constants.DDM_REMOVE:
9237         assert not params
9238
9239         if remove_fn is not None:
9240           remove_fn(absidx, item, private)
9241
9242         changes = [("%s/%s" % (kind, absidx), "remove")]
9243
9244         assert container[absidx] == item
9245         del container[absidx]
9246       elif op == constants.DDM_MODIFY:
9247         if modify_fn is not None:
9248           changes = modify_fn(absidx, item, params, private)
9249       else:
9250         raise errors.ProgrammerError("Unhandled operation '%s'" % op)
9251
9252     assert _TApplyContModsCbChanges(changes)
9253
9254     if not (chgdesc is None or changes is None):
9255       chgdesc.extend(changes)
9256
9257
9258 def _UpdateIvNames(base_index, disks):
9259   """Updates the C{iv_name} attribute of disks.
9260
9261   @type disks: list of L{objects.Disk}
9262
9263   """
9264   for (idx, disk) in enumerate(disks):
9265     disk.iv_name = "disk/%s" % (base_index + idx, )
9266
9267
9268 class _InstNicModPrivate:
9269   """Data structure for network interface modifications.
9270
9271   Used by L{LUInstanceSetParams}.
9272
9273   """
9274   def __init__(self):
9275     self.params = None
9276     self.filled = None
9277
9278
9279 class LUInstanceSetParams(LogicalUnit):
9280   """Modifies an instances's parameters.
9281
9282   """
9283   HPATH = "instance-modify"
9284   HTYPE = constants.HTYPE_INSTANCE
9285   REQ_BGL = False
9286
9287   @staticmethod
9288   def _UpgradeDiskNicMods(kind, mods, verify_fn):
9289     assert ht.TList(mods)
9290     assert not mods or len(mods[0]) in (2, 3)
9291
9292     if mods and len(mods[0]) == 2:
9293       result = []
9294
9295       addremove = 0
9296       for op, params in mods:
9297         if op in (constants.DDM_ADD, constants.DDM_REMOVE):
9298           result.append((op, -1, params))
9299           addremove += 1
9300
9301           if addremove > 1:
9302             raise errors.OpPrereqError("Only one %s add or remove operation is"
9303                                        " supported at a time" % kind,
9304                                        errors.ECODE_INVAL)
9305         else:
9306           result.append((constants.DDM_MODIFY, op, params))
9307
9308       assert verify_fn(result)
9309     else:
9310       result = mods
9311
9312     return result
9313
9314   @staticmethod
9315   def _CheckMods(kind, mods, key_types, item_fn):
9316     """Ensures requested disk/NIC modifications are valid.
9317
9318     """
9319     for (op, _, params) in mods:
9320       assert ht.TDict(params)
9321
9322       # If 'key_types' is an empty dict, we assume we have an
9323       # 'ext' template and thus do not ForceDictType
9324       if key_types:
9325         utils.ForceDictType(params, key_types)
9326
9327       if op == constants.DDM_REMOVE:
9328         if params:
9329           raise errors.OpPrereqError("No settings should be passed when"
9330                                      " removing a %s" % kind,
9331                                      errors.ECODE_INVAL)
9332       elif op in (constants.DDM_ADD, constants.DDM_MODIFY):
9333         item_fn(op, params)
9334       else:
9335         raise errors.ProgrammerError("Unhandled operation '%s'" % op)
9336
9337   @staticmethod
9338   def _VerifyDiskModification(op, params):
9339     """Verifies a disk modification.
9340
9341     """
9342     if op == constants.DDM_ADD:
9343       mode = params.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
9344       if mode not in constants.DISK_ACCESS_SET:
9345         raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
9346                                    errors.ECODE_INVAL)
9347
9348       size = params.get(constants.IDISK_SIZE, None)
9349       if size is None:
9350         raise errors.OpPrereqError("Required disk parameter '%s' missing" %
9351                                    constants.IDISK_SIZE, errors.ECODE_INVAL)
9352
9353       try:
9354         size = int(size)
9355       except (TypeError, ValueError), err:
9356         raise errors.OpPrereqError("Invalid disk size parameter: %s" % err,
9357                                    errors.ECODE_INVAL)
9358
9359       params[constants.IDISK_SIZE] = size
9360       name = params.get(constants.IDISK_NAME, None)
9361       if name is not None and name.lower() == constants.VALUE_NONE:
9362         params[constants.IDISK_NAME] = None
9363
9364     elif op == constants.DDM_MODIFY:
9365       if constants.IDISK_SIZE in params:
9366         raise errors.OpPrereqError("Disk size change not possible, use"
9367                                    " grow-disk", errors.ECODE_INVAL)
9368       if len(params) > 2:
9369         raise errors.OpPrereqError("Disk modification doesn't support"
9370                                    " additional arbitrary parameters",
9371                                    errors.ECODE_INVAL)
9372       name = params.get(constants.IDISK_NAME, None)
9373       if name is not None and name.lower() == constants.VALUE_NONE:
9374         params[constants.IDISK_NAME] = None
9375
9376   @staticmethod
9377   def _VerifyNicModification(op, params):
9378     """Verifies a network interface modification.
9379
9380     """
9381     if op in (constants.DDM_ADD, constants.DDM_MODIFY):
9382       ip = params.get(constants.INIC_IP, None)
9383       name = params.get(constants.INIC_NAME, None)
9384       req_net = params.get(constants.INIC_NETWORK, None)
9385       link = params.get(constants.NIC_LINK, None)
9386       mode = params.get(constants.NIC_MODE, None)
9387       if name is not None and name.lower() == constants.VALUE_NONE:
9388         params[constants.INIC_NAME] = None
9389       if req_net is not None:
9390         if req_net.lower() == constants.VALUE_NONE:
9391           params[constants.INIC_NETWORK] = None
9392           req_net = None
9393         elif link is not None or mode is not None:
9394           raise errors.OpPrereqError("If network is given"
9395                                      " mode or link should not",
9396                                      errors.ECODE_INVAL)
9397
9398       if op == constants.DDM_ADD:
9399         macaddr = params.get(constants.INIC_MAC, None)
9400         if macaddr is None:
9401           params[constants.INIC_MAC] = constants.VALUE_AUTO
9402
9403       if ip is not None:
9404         if ip.lower() == constants.VALUE_NONE:
9405           params[constants.INIC_IP] = None
9406         else:
9407           if ip.lower() == constants.NIC_IP_POOL:
9408             if op == constants.DDM_ADD and req_net is None:
9409               raise errors.OpPrereqError("If ip=pool, parameter network"
9410                                          " cannot be none",
9411                                          errors.ECODE_INVAL)
9412           else:
9413             if not netutils.IPAddress.IsValid(ip):
9414               raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
9415                                          errors.ECODE_INVAL)
9416
9417       if constants.INIC_MAC in params:
9418         macaddr = params[constants.INIC_MAC]
9419         if macaddr not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9420           macaddr = utils.NormalizeAndValidateMac(macaddr)
9421
9422         if op == constants.DDM_MODIFY and macaddr == constants.VALUE_AUTO:
9423           raise errors.OpPrereqError("'auto' is not a valid MAC address when"
9424                                      " modifying an existing NIC",
9425                                      errors.ECODE_INVAL)
9426
9427   def CheckArguments(self):
9428     if not (self.op.nics or self.op.disks or self.op.disk_template or
9429             self.op.hvparams or self.op.beparams or self.op.os_name or
9430             self.op.offline is not None or self.op.runtime_mem or
9431             self.op.pnode):
9432       raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
9433
9434     if self.op.hvparams:
9435       _CheckParamsNotGlobal(self.op.hvparams, constants.HVC_GLOBALS,
9436                             "hypervisor", "instance", "cluster")
9437
9438     self.op.disks = self._UpgradeDiskNicMods(
9439       "disk", self.op.disks, opcodes.OpInstanceSetParams.TestDiskModifications)
9440     self.op.nics = self._UpgradeDiskNicMods(
9441       "NIC", self.op.nics, opcodes.OpInstanceSetParams.TestNicModifications)
9442
9443     if self.op.disks and self.op.disk_template is not None:
9444       raise errors.OpPrereqError("Disk template conversion and other disk"
9445                                  " changes not supported at the same time",
9446                                  errors.ECODE_INVAL)
9447
9448     if (self.op.disk_template and
9449         self.op.disk_template in constants.DTS_INT_MIRROR and
9450         self.op.remote_node is None):
9451       raise errors.OpPrereqError("Changing the disk template to a mirrored"
9452                                  " one requires specifying a secondary node",
9453                                  errors.ECODE_INVAL)
9454
9455     # Check NIC modifications
9456     self._CheckMods("NIC", self.op.nics, constants.INIC_PARAMS_TYPES,
9457                     self._VerifyNicModification)
9458
9459     if self.op.pnode:
9460       self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
9461
9462   def ExpandNames(self):
9463     self._ExpandAndLockInstance()
9464     self.needed_locks[locking.LEVEL_NODEGROUP] = []
9465     # Can't even acquire node locks in shared mode as upcoming changes in
9466     # Ganeti 2.6 will start to modify the node object on disk conversion
9467     self.needed_locks[locking.LEVEL_NODE] = []
9468     self.needed_locks[locking.LEVEL_NODE_RES] = []
9469     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9470     # Look node group to look up the ipolicy
9471     self.share_locks[locking.LEVEL_NODEGROUP] = 1
9472
9473   def DeclareLocks(self, level):
9474     if level == locking.LEVEL_NODEGROUP:
9475       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
9476       # Acquire locks for the instance's nodegroups optimistically. Needs
9477       # to be verified in CheckPrereq
9478       self.needed_locks[locking.LEVEL_NODEGROUP] = \
9479         self.cfg.GetInstanceNodeGroups(self.op.instance_name)
9480     elif level == locking.LEVEL_NODE:
9481       self._LockInstancesNodes()
9482       if self.op.disk_template and self.op.remote_node:
9483         self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9484         self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
9485     elif level == locking.LEVEL_NODE_RES and self.op.disk_template:
9486       # Copy node locks
9487       self.needed_locks[locking.LEVEL_NODE_RES] = \
9488         _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
9489
9490   def BuildHooksEnv(self):
9491     """Build hooks env.
9492
9493     This runs on the master, primary and secondaries.
9494
9495     """
9496     args = {}
9497     if constants.BE_MINMEM in self.be_new:
9498       args["minmem"] = self.be_new[constants.BE_MINMEM]
9499     if constants.BE_MAXMEM in self.be_new:
9500       args["maxmem"] = self.be_new[constants.BE_MAXMEM]
9501     if constants.BE_VCPUS in self.be_new:
9502       args["vcpus"] = self.be_new[constants.BE_VCPUS]
9503     # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
9504     # information at all.
9505
9506     if self._new_nics is not None:
9507       nics = []
9508
9509       for nic in self._new_nics:
9510         n = copy.deepcopy(nic)
9511         nicparams = self.cluster.SimpleFillNIC(n.nicparams)
9512         n.nicparams = nicparams
9513         nics.append(_NICToTuple(self, n))
9514
9515       args["nics"] = nics
9516
9517     env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
9518     if self.op.disk_template:
9519       env["NEW_DISK_TEMPLATE"] = self.op.disk_template
9520     if self.op.runtime_mem:
9521       env["RUNTIME_MEMORY"] = self.op.runtime_mem
9522
9523     return env
9524
9525   def BuildHooksNodes(self):
9526     """Build hooks nodes.
9527
9528     """
9529     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
9530     return (nl, nl)
9531
9532   def _PrepareNicModification(self, params, private, old_ip, old_net_uuid,
9533                               old_params, cluster, pnode):
9534
9535     update_params_dict = dict([(key, params[key])
9536                                for key in constants.NICS_PARAMETERS
9537                                if key in params])
9538
9539     req_link = update_params_dict.get(constants.NIC_LINK, None)
9540     req_mode = update_params_dict.get(constants.NIC_MODE, None)
9541
9542     new_net_uuid = None
9543     new_net_uuid_or_name = params.get(constants.INIC_NETWORK, old_net_uuid)
9544     if new_net_uuid_or_name:
9545       new_net_uuid = self.cfg.LookupNetwork(new_net_uuid_or_name)
9546       new_net_obj = self.cfg.GetNetwork(new_net_uuid)
9547
9548     if old_net_uuid:
9549       old_net_obj = self.cfg.GetNetwork(old_net_uuid)
9550
9551     if new_net_uuid:
9552       netparams = self.cfg.GetGroupNetParams(new_net_uuid, pnode)
9553       if not netparams:
9554         raise errors.OpPrereqError("No netparams found for the network"
9555                                    " %s, probably not connected" %
9556                                    new_net_obj.name, errors.ECODE_INVAL)
9557       new_params = dict(netparams)
9558     else:
9559       new_params = _GetUpdatedParams(old_params, update_params_dict)
9560
9561     utils.ForceDictType(new_params, constants.NICS_PARAMETER_TYPES)
9562
9563     new_filled_params = cluster.SimpleFillNIC(new_params)
9564     objects.NIC.CheckParameterSyntax(new_filled_params)
9565
9566     new_mode = new_filled_params[constants.NIC_MODE]
9567     if new_mode == constants.NIC_MODE_BRIDGED:
9568       bridge = new_filled_params[constants.NIC_LINK]
9569       msg = self.rpc.call_bridges_exist(pnode, [bridge]).fail_msg
9570       if msg:
9571         msg = "Error checking bridges on node '%s': %s" % (pnode, msg)
9572         if self.op.force:
9573           self.warn.append(msg)
9574         else:
9575           raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
9576
9577     elif new_mode == constants.NIC_MODE_ROUTED:
9578       ip = params.get(constants.INIC_IP, old_ip)
9579       if ip is None:
9580         raise errors.OpPrereqError("Cannot set the NIC IP address to None"
9581                                    " on a routed NIC", errors.ECODE_INVAL)
9582
9583     elif new_mode == constants.NIC_MODE_OVS:
9584       # TODO: check OVS link
9585       self.LogInfo("OVS links are currently not checked for correctness")
9586
9587     if constants.INIC_MAC in params:
9588       mac = params[constants.INIC_MAC]
9589       if mac is None:
9590         raise errors.OpPrereqError("Cannot unset the NIC MAC address",
9591                                    errors.ECODE_INVAL)
9592       elif mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9593         # otherwise generate the MAC address
9594         params[constants.INIC_MAC] = \
9595           self.cfg.GenerateMAC(new_net_uuid, self.proc.GetECId())
9596       else:
9597         # or validate/reserve the current one
9598         try:
9599           self.cfg.ReserveMAC(mac, self.proc.GetECId())
9600         except errors.ReservationError:
9601           raise errors.OpPrereqError("MAC address '%s' already in use"
9602                                      " in cluster" % mac,
9603                                      errors.ECODE_NOTUNIQUE)
9604     elif new_net_uuid != old_net_uuid:
9605
9606       def get_net_prefix(net_uuid):
9607         mac_prefix = None
9608         if net_uuid:
9609           nobj = self.cfg.GetNetwork(net_uuid)
9610           mac_prefix = nobj.mac_prefix
9611
9612         return mac_prefix
9613
9614       new_prefix = get_net_prefix(new_net_uuid)
9615       old_prefix = get_net_prefix(old_net_uuid)
9616       if old_prefix != new_prefix:
9617         params[constants.INIC_MAC] = \
9618           self.cfg.GenerateMAC(new_net_uuid, self.proc.GetECId())
9619
9620     # if there is a change in (ip, network) tuple
9621     new_ip = params.get(constants.INIC_IP, old_ip)
9622     if (new_ip, new_net_uuid) != (old_ip, old_net_uuid):
9623       if new_ip:
9624         # if IP is pool then require a network and generate one IP
9625         if new_ip.lower() == constants.NIC_IP_POOL:
9626           if new_net_uuid:
9627             try:
9628               new_ip = self.cfg.GenerateIp(new_net_uuid, self.proc.GetECId())
9629             except errors.ReservationError:
9630               raise errors.OpPrereqError("Unable to get a free IP"
9631                                          " from the address pool",
9632                                          errors.ECODE_STATE)
9633             self.LogInfo("Chose IP %s from network %s",
9634                          new_ip,
9635                          new_net_obj.name)
9636             params[constants.INIC_IP] = new_ip
9637           else:
9638             raise errors.OpPrereqError("ip=pool, but no network found",
9639                                        errors.ECODE_INVAL)
9640         # Reserve new IP if in the new network if any
9641         elif new_net_uuid:
9642           try:
9643             self.cfg.ReserveIp(new_net_uuid, new_ip, self.proc.GetECId())
9644             self.LogInfo("Reserving IP %s in network %s",
9645                          new_ip, new_net_obj.name)
9646           except errors.ReservationError:
9647             raise errors.OpPrereqError("IP %s not available in network %s" %
9648                                        (new_ip, new_net_obj.name),
9649                                        errors.ECODE_NOTUNIQUE)
9650         # new network is None so check if new IP is a conflicting IP
9651         elif self.op.conflicts_check:
9652           _CheckForConflictingIp(self, new_ip, pnode)
9653
9654       # release old IP if old network is not None
9655       if old_ip and old_net_uuid:
9656         try:
9657           self.cfg.ReleaseIp(old_net_uuid, old_ip, self.proc.GetECId())
9658         except errors.AddressPoolError:
9659           logging.warning("Release IP %s not contained in network %s",
9660                           old_ip, old_net_obj.name)
9661
9662     # there are no changes in (ip, network) tuple and old network is not None
9663     elif (old_net_uuid is not None and
9664           (req_link is not None or req_mode is not None)):
9665       raise errors.OpPrereqError("Not allowed to change link or mode of"
9666                                  " a NIC that is connected to a network",
9667                                  errors.ECODE_INVAL)
9668
9669     private.params = new_params
9670     private.filled = new_filled_params
9671
9672   def _PreCheckDiskTemplate(self, pnode_info):
9673     """CheckPrereq checks related to a new disk template."""
9674     # Arguments are passed to avoid configuration lookups
9675     instance = self.instance
9676     pnode = instance.primary_node
9677     cluster = self.cluster
9678     if instance.disk_template == self.op.disk_template:
9679       raise errors.OpPrereqError("Instance already has disk template %s" %
9680                                  instance.disk_template, errors.ECODE_INVAL)
9681
9682     if (instance.disk_template,
9683         self.op.disk_template) not in self._DISK_CONVERSIONS:
9684       raise errors.OpPrereqError("Unsupported disk template conversion from"
9685                                  " %s to %s" % (instance.disk_template,
9686                                                 self.op.disk_template),
9687                                  errors.ECODE_INVAL)
9688     _CheckInstanceState(self, instance, INSTANCE_DOWN,
9689                         msg="cannot change disk template")
9690     if self.op.disk_template in constants.DTS_INT_MIRROR:
9691       if self.op.remote_node == pnode:
9692         raise errors.OpPrereqError("Given new secondary node %s is the same"
9693                                    " as the primary node of the instance" %
9694                                    self.op.remote_node, errors.ECODE_STATE)
9695       _CheckNodeOnline(self, self.op.remote_node)
9696       _CheckNodeNotDrained(self, self.op.remote_node)
9697       # FIXME: here we assume that the old instance type is DT_PLAIN
9698       assert instance.disk_template == constants.DT_PLAIN
9699       disks = [{constants.IDISK_SIZE: d.size,
9700                 constants.IDISK_VG: d.logical_id[0]}
9701                for d in instance.disks]
9702       required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
9703       _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
9704
9705       snode_info = self.cfg.GetNodeInfo(self.op.remote_node)
9706       snode_group = self.cfg.GetNodeGroup(snode_info.group)
9707       ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
9708                                                               snode_group)
9709       _CheckTargetNodeIPolicy(self, ipolicy, instance, snode_info, self.cfg,
9710                               ignore=self.op.ignore_ipolicy)
9711       if pnode_info.group != snode_info.group:
9712         self.LogWarning("The primary and secondary nodes are in two"
9713                         " different node groups; the disk parameters"
9714                         " from the first disk's node group will be"
9715                         " used")
9716
9717     if not self.op.disk_template in constants.DTS_EXCL_STORAGE:
9718       # Make sure none of the nodes require exclusive storage
9719       nodes = [pnode_info]
9720       if self.op.disk_template in constants.DTS_INT_MIRROR:
9721         assert snode_info
9722         nodes.append(snode_info)
9723       has_es = lambda n: _IsExclusiveStorageEnabledNode(self.cfg, n)
9724       if compat.any(map(has_es, nodes)):
9725         errmsg = ("Cannot convert disk template from %s to %s when exclusive"
9726                   " storage is enabled" % (instance.disk_template,
9727                                            self.op.disk_template))
9728         raise errors.OpPrereqError(errmsg, errors.ECODE_STATE)
9729
9730   def CheckPrereq(self):
9731     """Check prerequisites.
9732
9733     This only checks the instance list against the existing names.
9734
9735     """
9736     assert self.op.instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
9737     instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9738
9739     cluster = self.cluster = self.cfg.GetClusterInfo()
9740     assert self.instance is not None, \
9741       "Cannot retrieve locked instance %s" % self.op.instance_name
9742
9743     pnode = instance.primary_node
9744
9745     self.warn = []
9746
9747     if (self.op.pnode is not None and self.op.pnode != pnode and
9748         not self.op.force):
9749       # verify that the instance is not up
9750       instance_info = self.rpc.call_instance_info(pnode, instance.name,
9751                                                   instance.hypervisor)
9752       if instance_info.fail_msg:
9753         self.warn.append("Can't get instance runtime information: %s" %
9754                          instance_info.fail_msg)
9755       elif instance_info.payload:
9756         raise errors.OpPrereqError("Instance is still running on %s" % pnode,
9757                                    errors.ECODE_STATE)
9758
9759     assert pnode in self.owned_locks(locking.LEVEL_NODE)
9760     nodelist = list(instance.all_nodes)
9761     pnode_info = self.cfg.GetNodeInfo(pnode)
9762     self.diskparams = self.cfg.GetInstanceDiskParams(instance)
9763
9764     #_CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
9765     assert pnode_info.group in self.owned_locks(locking.LEVEL_NODEGROUP)
9766     group_info = self.cfg.GetNodeGroup(pnode_info.group)
9767
9768     # dictionary with instance information after the modification
9769     ispec = {}
9770
9771     # Check disk modifications. This is done here and not in CheckArguments
9772     # (as with NICs), because we need to know the instance's disk template
9773     if instance.disk_template == constants.DT_EXT:
9774       self._CheckMods("disk", self.op.disks, {},
9775                       self._VerifyDiskModification)
9776     else:
9777       self._CheckMods("disk", self.op.disks, constants.IDISK_PARAMS_TYPES,
9778                       self._VerifyDiskModification)
9779
9780     # Prepare disk/NIC modifications
9781     self.diskmod = PrepareContainerMods(self.op.disks, None)
9782     self.nicmod = PrepareContainerMods(self.op.nics, _InstNicModPrivate)
9783
9784     # Check the validity of the `provider' parameter
9785     if instance.disk_template in constants.DT_EXT:
9786       for mod in self.diskmod:
9787         ext_provider = mod[2].get(constants.IDISK_PROVIDER, None)
9788         if mod[0] == constants.DDM_ADD:
9789           if ext_provider is None:
9790             raise errors.OpPrereqError("Instance template is '%s' and parameter"
9791                                        " '%s' missing, during disk add" %
9792                                        (constants.DT_EXT,
9793                                         constants.IDISK_PROVIDER),
9794                                        errors.ECODE_NOENT)
9795         elif mod[0] == constants.DDM_MODIFY:
9796           if ext_provider:
9797             raise errors.OpPrereqError("Parameter '%s' is invalid during disk"
9798                                        " modification" %
9799                                        constants.IDISK_PROVIDER,
9800                                        errors.ECODE_INVAL)
9801     else:
9802       for mod in self.diskmod:
9803         ext_provider = mod[2].get(constants.IDISK_PROVIDER, None)
9804         if ext_provider is not None:
9805           raise errors.OpPrereqError("Parameter '%s' is only valid for"
9806                                      " instances of type '%s'" %
9807                                      (constants.IDISK_PROVIDER,
9808                                       constants.DT_EXT),
9809                                      errors.ECODE_INVAL)
9810
9811     # OS change
9812     if self.op.os_name and not self.op.force:
9813       _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
9814                       self.op.force_variant)
9815       instance_os = self.op.os_name
9816     else:
9817       instance_os = instance.os
9818
9819     assert not (self.op.disk_template and self.op.disks), \
9820       "Can't modify disk template and apply disk changes at the same time"
9821
9822     if self.op.disk_template:
9823       self._PreCheckDiskTemplate(pnode_info)
9824
9825     # hvparams processing
9826     if self.op.hvparams:
9827       hv_type = instance.hypervisor
9828       i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
9829       utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
9830       hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
9831
9832       # local check
9833       hypervisor.GetHypervisorClass(hv_type).CheckParameterSyntax(hv_new)
9834       _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
9835       self.hv_proposed = self.hv_new = hv_new # the new actual values
9836       self.hv_inst = i_hvdict # the new dict (without defaults)
9837     else:
9838       self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
9839                                               instance.hvparams)
9840       self.hv_new = self.hv_inst = {}
9841
9842     # beparams processing
9843     if self.op.beparams:
9844       i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
9845                                    use_none=True)
9846       objects.UpgradeBeParams(i_bedict)
9847       utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
9848       be_new = cluster.SimpleFillBE(i_bedict)
9849       self.be_proposed = self.be_new = be_new # the new actual values
9850       self.be_inst = i_bedict # the new dict (without defaults)
9851     else:
9852       self.be_new = self.be_inst = {}
9853       self.be_proposed = cluster.SimpleFillBE(instance.beparams)
9854     be_old = cluster.FillBE(instance)
9855
9856     # CPU param validation -- checking every time a parameter is
9857     # changed to cover all cases where either CPU mask or vcpus have
9858     # changed
9859     if (constants.BE_VCPUS in self.be_proposed and
9860         constants.HV_CPU_MASK in self.hv_proposed):
9861       cpu_list = \
9862         utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
9863       # Verify mask is consistent with number of vCPUs. Can skip this
9864       # test if only 1 entry in the CPU mask, which means same mask
9865       # is applied to all vCPUs.
9866       if (len(cpu_list) > 1 and
9867           len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
9868         raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
9869                                    " CPU mask [%s]" %
9870                                    (self.be_proposed[constants.BE_VCPUS],
9871                                     self.hv_proposed[constants.HV_CPU_MASK]),
9872                                    errors.ECODE_INVAL)
9873
9874       # Only perform this test if a new CPU mask is given
9875       if constants.HV_CPU_MASK in self.hv_new:
9876         # Calculate the largest CPU number requested
9877         max_requested_cpu = max(map(max, cpu_list))
9878         # Check that all of the instance's nodes have enough physical CPUs to
9879         # satisfy the requested CPU mask
9880         _CheckNodesPhysicalCPUs(self, instance.all_nodes,
9881                                 max_requested_cpu + 1, instance.hypervisor)
9882
9883     # osparams processing
9884     if self.op.osparams:
9885       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
9886       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
9887       self.os_inst = i_osdict # the new dict (without defaults)
9888     else:
9889       self.os_inst = {}
9890
9891     #TODO(dynmem): do the appropriate check involving MINMEM
9892     if (constants.BE_MAXMEM in self.op.beparams and not self.op.force and
9893         be_new[constants.BE_MAXMEM] > be_old[constants.BE_MAXMEM]):
9894       mem_check_list = [pnode]
9895       if be_new[constants.BE_AUTO_BALANCE]:
9896         # either we changed auto_balance to yes or it was from before
9897         mem_check_list.extend(instance.secondary_nodes)
9898       instance_info = self.rpc.call_instance_info(pnode, instance.name,
9899                                                   instance.hypervisor)
9900       nodeinfo = self.rpc.call_node_info(mem_check_list, None,
9901                                          [instance.hypervisor], False)
9902       pninfo = nodeinfo[pnode]
9903       msg = pninfo.fail_msg
9904       if msg:
9905         # Assume the primary node is unreachable and go ahead
9906         self.warn.append("Can't get info from primary node %s: %s" %
9907                          (pnode, msg))
9908       else:
9909         (_, _, (pnhvinfo, )) = pninfo.payload
9910         if not isinstance(pnhvinfo.get("memory_free", None), int):
9911           self.warn.append("Node data from primary node %s doesn't contain"
9912                            " free memory information" % pnode)
9913         elif instance_info.fail_msg:
9914           self.warn.append("Can't get instance runtime information: %s" %
9915                            instance_info.fail_msg)
9916         else:
9917           if instance_info.payload:
9918             current_mem = int(instance_info.payload["memory"])
9919           else:
9920             # Assume instance not running
9921             # (there is a slight race condition here, but it's not very
9922             # probable, and we have no other way to check)
9923             # TODO: Describe race condition
9924             current_mem = 0
9925           #TODO(dynmem): do the appropriate check involving MINMEM
9926           miss_mem = (be_new[constants.BE_MAXMEM] - current_mem -
9927                       pnhvinfo["memory_free"])
9928           if miss_mem > 0:
9929             raise errors.OpPrereqError("This change will prevent the instance"
9930                                        " from starting, due to %d MB of memory"
9931                                        " missing on its primary node" %
9932                                        miss_mem, errors.ECODE_NORES)
9933
9934       if be_new[constants.BE_AUTO_BALANCE]:
9935         for node, nres in nodeinfo.items():
9936           if node not in instance.secondary_nodes:
9937             continue
9938           nres.Raise("Can't get info from secondary node %s" % node,
9939                      prereq=True, ecode=errors.ECODE_STATE)
9940           (_, _, (nhvinfo, )) = nres.payload
9941           if not isinstance(nhvinfo.get("memory_free", None), int):
9942             raise errors.OpPrereqError("Secondary node %s didn't return free"
9943                                        " memory information" % node,
9944                                        errors.ECODE_STATE)
9945           #TODO(dynmem): do the appropriate check involving MINMEM
9946           elif be_new[constants.BE_MAXMEM] > nhvinfo["memory_free"]:
9947             raise errors.OpPrereqError("This change will prevent the instance"
9948                                        " from failover to its secondary node"
9949                                        " %s, due to not enough memory" % node,
9950                                        errors.ECODE_STATE)
9951
9952     if self.op.runtime_mem:
9953       remote_info = self.rpc.call_instance_info(instance.primary_node,
9954                                                 instance.name,
9955                                                 instance.hypervisor)
9956       remote_info.Raise("Error checking node %s" % instance.primary_node)
9957       if not remote_info.payload: # not running already
9958         raise errors.OpPrereqError("Instance %s is not running" %
9959                                    instance.name, errors.ECODE_STATE)
9960
9961       current_memory = remote_info.payload["memory"]
9962       if (not self.op.force and
9963            (self.op.runtime_mem > self.be_proposed[constants.BE_MAXMEM] or
9964             self.op.runtime_mem < self.be_proposed[constants.BE_MINMEM])):
9965         raise errors.OpPrereqError("Instance %s must have memory between %d"
9966                                    " and %d MB of memory unless --force is"
9967                                    " given" %
9968                                    (instance.name,
9969                                     self.be_proposed[constants.BE_MINMEM],
9970                                     self.be_proposed[constants.BE_MAXMEM]),
9971                                    errors.ECODE_INVAL)
9972
9973       delta = self.op.runtime_mem - current_memory
9974       if delta > 0:
9975         _CheckNodeFreeMemory(self, instance.primary_node,
9976                              "ballooning memory for instance %s" %
9977                              instance.name, delta, instance.hypervisor)
9978
9979     if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
9980       raise errors.OpPrereqError("Disk operations not supported for"
9981                                  " diskless instances", errors.ECODE_INVAL)
9982
9983     def _PrepareNicCreate(_, params, private):
9984       self._PrepareNicModification(params, private, None, None,
9985                                    {}, cluster, pnode)
9986       return (None, None)
9987
9988     def _PrepareNicMod(_, nic, params, private):
9989       self._PrepareNicModification(params, private, nic.ip, nic.network,
9990                                    nic.nicparams, cluster, pnode)
9991       return None
9992
9993     def _PrepareNicRemove(_, params, __):
9994       ip = params.ip
9995       net = params.network
9996       if net is not None and ip is not None:
9997         self.cfg.ReleaseIp(net, ip, self.proc.GetECId())
9998
9999     # Verify NIC changes (operating on copy)
10000     nics = instance.nics[:]
10001     ApplyContainerMods("NIC", nics, None, self.nicmod,
10002                        _PrepareNicCreate, _PrepareNicMod, _PrepareNicRemove)
10003     if len(nics) > constants.MAX_NICS:
10004       raise errors.OpPrereqError("Instance has too many network interfaces"
10005                                  " (%d), cannot add more" % constants.MAX_NICS,
10006                                  errors.ECODE_STATE)
10007
10008     def _PrepareDiskMod(_, disk, params, __):
10009       disk.name = params.get(constants.IDISK_NAME, None)
10010
10011     # Verify disk changes (operating on a copy)
10012     disks = copy.deepcopy(instance.disks)
10013     ApplyContainerMods("disk", disks, None, self.diskmod, None, _PrepareDiskMod,
10014                        None)
10015     utils.ValidateDeviceNames("disk", disks)
10016     if len(disks) > constants.MAX_DISKS:
10017       raise errors.OpPrereqError("Instance has too many disks (%d), cannot add"
10018                                  " more" % constants.MAX_DISKS,
10019                                  errors.ECODE_STATE)
10020     disk_sizes = [disk.size for disk in instance.disks]
10021     disk_sizes.extend(params["size"] for (op, idx, params, private) in
10022                       self.diskmod if op == constants.DDM_ADD)
10023     ispec[constants.ISPEC_DISK_COUNT] = len(disk_sizes)
10024     ispec[constants.ISPEC_DISK_SIZE] = disk_sizes
10025
10026     if self.op.offline is not None and self.op.offline:
10027       _CheckInstanceState(self, instance, CAN_CHANGE_INSTANCE_OFFLINE,
10028                           msg="can't change to offline")
10029
10030     # Pre-compute NIC changes (necessary to use result in hooks)
10031     self._nic_chgdesc = []
10032     if self.nicmod:
10033       # Operate on copies as this is still in prereq
10034       nics = [nic.Copy() for nic in instance.nics]
10035       ApplyContainerMods("NIC", nics, self._nic_chgdesc, self.nicmod,
10036                          self._CreateNewNic, self._ApplyNicMods, None)
10037       # Verify that NIC names are unique and valid
10038       utils.ValidateDeviceNames("NIC", nics)
10039       self._new_nics = nics
10040       ispec[constants.ISPEC_NIC_COUNT] = len(self._new_nics)
10041     else:
10042       self._new_nics = None
10043       ispec[constants.ISPEC_NIC_COUNT] = len(instance.nics)
10044
10045     if not self.op.ignore_ipolicy:
10046       ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
10047                                                               group_info)
10048
10049       # Fill ispec with backend parameters
10050       ispec[constants.ISPEC_SPINDLE_USE] = \
10051         self.be_new.get(constants.BE_SPINDLE_USE, None)
10052       ispec[constants.ISPEC_CPU_COUNT] = self.be_new.get(constants.BE_VCPUS,
10053                                                          None)
10054
10055       # Copy ispec to verify parameters with min/max values separately
10056       if self.op.disk_template:
10057         new_disk_template = self.op.disk_template
10058       else:
10059         new_disk_template = instance.disk_template
10060       ispec_max = ispec.copy()
10061       ispec_max[constants.ISPEC_MEM_SIZE] = \
10062         self.be_new.get(constants.BE_MAXMEM, None)
10063       res_max = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec_max,
10064                                                      new_disk_template)
10065       ispec_min = ispec.copy()
10066       ispec_min[constants.ISPEC_MEM_SIZE] = \
10067         self.be_new.get(constants.BE_MINMEM, None)
10068       res_min = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec_min,
10069                                                      new_disk_template)
10070
10071       if (res_max or res_min):
10072         # FIXME: Improve error message by including information about whether
10073         # the upper or lower limit of the parameter fails the ipolicy.
10074         msg = ("Instance allocation to group %s (%s) violates policy: %s" %
10075                (group_info, group_info.name,
10076                 utils.CommaJoin(set(res_max + res_min))))
10077         raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10078
10079   def _ConvertPlainToDrbd(self, feedback_fn):
10080     """Converts an instance from plain to drbd.
10081
10082     """
10083     feedback_fn("Converting template to drbd")
10084     instance = self.instance
10085     pnode = instance.primary_node
10086     snode = self.op.remote_node
10087
10088     assert instance.disk_template == constants.DT_PLAIN
10089
10090     # create a fake disk info for _GenerateDiskTemplate
10091     disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
10092                   constants.IDISK_VG: d.logical_id[0],
10093                   constants.IDISK_NAME: d.name}
10094                  for d in instance.disks]
10095     new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
10096                                       instance.name, pnode, [snode],
10097                                       disk_info, None, None, 0, feedback_fn,
10098                                       self.diskparams)
10099     anno_disks = rpc.AnnotateDiskParams(constants.DT_DRBD8, new_disks,
10100                                         self.diskparams)
10101     p_excl_stor = _IsExclusiveStorageEnabledNodeName(self.cfg, pnode)
10102     s_excl_stor = _IsExclusiveStorageEnabledNodeName(self.cfg, snode)
10103     info = _GetInstanceInfoText(instance)
10104     feedback_fn("Creating additional volumes...")
10105     # first, create the missing data and meta devices
10106     for disk in anno_disks:
10107       # unfortunately this is... not too nice
10108       _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
10109                             info, True, p_excl_stor)
10110       for child in disk.children:
10111         _CreateSingleBlockDev(self, snode, instance, child, info, True,
10112                               s_excl_stor)
10113     # at this stage, all new LVs have been created, we can rename the
10114     # old ones
10115     feedback_fn("Renaming original volumes...")
10116     rename_list = [(o, n.children[0].logical_id)
10117                    for (o, n) in zip(instance.disks, new_disks)]
10118     result = self.rpc.call_blockdev_rename(pnode, rename_list)
10119     result.Raise("Failed to rename original LVs")
10120
10121     feedback_fn("Initializing DRBD devices...")
10122     # all child devices are in place, we can now create the DRBD devices
10123     try:
10124       for disk in anno_disks:
10125         for (node, excl_stor) in [(pnode, p_excl_stor), (snode, s_excl_stor)]:
10126           f_create = node == pnode
10127           _CreateSingleBlockDev(self, node, instance, disk, info, f_create,
10128                                 excl_stor)
10129     except errors.GenericError, e:
10130       feedback_fn("Initializing of DRBD devices failed;"
10131                   " renaming back original volumes...")
10132       for disk in new_disks:
10133         self.cfg.SetDiskID(disk, pnode)
10134       rename_back_list = [(n.children[0], o.logical_id)
10135                           for (n, o) in zip(new_disks, instance.disks)]
10136       result = self.rpc.call_blockdev_rename(pnode, rename_back_list)
10137       result.Raise("Failed to rename LVs back after error %s" % str(e))
10138       raise
10139
10140     # at this point, the instance has been modified
10141     instance.disk_template = constants.DT_DRBD8
10142     instance.disks = new_disks
10143     self.cfg.Update(instance, feedback_fn)
10144
10145     # Release node locks while waiting for sync
10146     _ReleaseLocks(self, locking.LEVEL_NODE)
10147
10148     # disks are created, waiting for sync
10149     disk_abort = not _WaitForSync(self, instance,
10150                                   oneshot=not self.op.wait_for_sync)
10151     if disk_abort:
10152       raise errors.OpExecError("There are some degraded disks for"
10153                                " this instance, please cleanup manually")
10154
10155     # Node resource locks will be released by caller
10156
10157   def _ConvertDrbdToPlain(self, feedback_fn):
10158     """Converts an instance from drbd to plain.
10159
10160     """
10161     instance = self.instance
10162
10163     assert len(instance.secondary_nodes) == 1
10164     assert instance.disk_template == constants.DT_DRBD8
10165
10166     pnode = instance.primary_node
10167     snode = instance.secondary_nodes[0]
10168     feedback_fn("Converting template to plain")
10169
10170     old_disks = _AnnotateDiskParams(instance, instance.disks, self.cfg)
10171     new_disks = [d.children[0] for d in instance.disks]
10172
10173     # copy over size, mode and name
10174     for parent, child in zip(old_disks, new_disks):
10175       child.size = parent.size
10176       child.mode = parent.mode
10177       child.name = parent.name
10178
10179     # this is a DRBD disk, return its port to the pool
10180     # NOTE: this must be done right before the call to cfg.Update!
10181     for disk in old_disks:
10182       tcp_port = disk.logical_id[2]
10183       self.cfg.AddTcpUdpPort(tcp_port)
10184
10185     # update instance structure
10186     instance.disks = new_disks
10187     instance.disk_template = constants.DT_PLAIN
10188     _UpdateIvNames(0, instance.disks)
10189     self.cfg.Update(instance, feedback_fn)
10190
10191     # Release locks in case removing disks takes a while
10192     _ReleaseLocks(self, locking.LEVEL_NODE)
10193
10194     feedback_fn("Removing volumes on the secondary node...")
10195     for disk in old_disks:
10196       self.cfg.SetDiskID(disk, snode)
10197       msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
10198       if msg:
10199         self.LogWarning("Could not remove block device %s on node %s,"
10200                         " continuing anyway: %s", disk.iv_name, snode, msg)
10201
10202     feedback_fn("Removing unneeded volumes on the primary node...")
10203     for idx, disk in enumerate(old_disks):
10204       meta = disk.children[1]
10205       self.cfg.SetDiskID(meta, pnode)
10206       msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
10207       if msg:
10208         self.LogWarning("Could not remove metadata for disk %d on node %s,"
10209                         " continuing anyway: %s", idx, pnode, msg)
10210
10211   def _CreateNewDisk(self, idx, params, _):
10212     """Creates a new disk.
10213
10214     """
10215     instance = self.instance
10216
10217     # add a new disk
10218     if instance.disk_template in constants.DTS_FILEBASED:
10219       (file_driver, file_path) = instance.disks[0].logical_id
10220       file_path = os.path.dirname(file_path)
10221     else:
10222       file_driver = file_path = None
10223
10224     disk = \
10225       _GenerateDiskTemplate(self, instance.disk_template, instance.name,
10226                             instance.primary_node, instance.secondary_nodes,
10227                             [params], file_path, file_driver, idx,
10228                             self.Log, self.diskparams)[0]
10229
10230     info = _GetInstanceInfoText(instance)
10231
10232     logging.info("Creating volume %s for instance %s",
10233                  disk.iv_name, instance.name)
10234     # Note: this needs to be kept in sync with _CreateDisks
10235     #HARDCODE
10236     for node in instance.all_nodes:
10237       f_create = (node == instance.primary_node)
10238       try:
10239         _CreateBlockDev(self, node, instance, disk, f_create, info, f_create)
10240       except errors.OpExecError, err:
10241         self.LogWarning("Failed to create volume %s (%s) on node '%s': %s",
10242                         disk.iv_name, disk, node, err)
10243
10244     if self.cluster.prealloc_wipe_disks:
10245       # Wipe new disk
10246       _WipeDisks(self, instance,
10247                  disks=[(idx, disk, 0)])
10248
10249     return (disk, [
10250       ("disk/%d" % idx, "add:size=%s,mode=%s" % (disk.size, disk.mode)),
10251       ])
10252
10253   @staticmethod
10254   def _ModifyDisk(idx, disk, params, _):
10255     """Modifies a disk.
10256
10257     """
10258     changes = []
10259     mode = params.get(constants.IDISK_MODE, None)
10260     if mode:
10261       disk.mode = mode
10262       changes.append(("disk.mode/%d" % idx, disk.mode))
10263
10264     name = params.get(constants.IDISK_NAME, None)
10265     disk.name = name
10266     changes.append(("disk.name/%d" % idx, disk.name))
10267
10268     return changes
10269
10270   def _RemoveDisk(self, idx, root, _):
10271     """Removes a disk.
10272
10273     """
10274     (anno_disk,) = _AnnotateDiskParams(self.instance, [root], self.cfg)
10275     for node, disk in anno_disk.ComputeNodeTree(self.instance.primary_node):
10276       self.cfg.SetDiskID(disk, node)
10277       msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
10278       if msg:
10279         self.LogWarning("Could not remove disk/%d on node '%s': %s,"
10280                         " continuing anyway", idx, node, msg)
10281
10282     # if this is a DRBD disk, return its port to the pool
10283     if root.dev_type in constants.LDS_DRBD:
10284       self.cfg.AddTcpUdpPort(root.logical_id[2])
10285
10286   def _CreateNewNic(self, idx, params, private):
10287     """Creates data structure for a new network interface.
10288
10289     """
10290     mac = params[constants.INIC_MAC]
10291     ip = params.get(constants.INIC_IP, None)
10292     net = params.get(constants.INIC_NETWORK, None)
10293     name = params.get(constants.INIC_NAME, None)
10294     net_uuid = self.cfg.LookupNetwork(net)
10295     #TODO: not private.filled?? can a nic have no nicparams??
10296     nicparams = private.filled
10297     nobj = objects.NIC(mac=mac, ip=ip, network=net_uuid, name=name,
10298                        nicparams=nicparams)
10299     nobj.uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
10300
10301     return (nobj, [
10302       ("nic.%d" % idx,
10303        "add:mac=%s,ip=%s,mode=%s,link=%s,network=%s" %
10304        (mac, ip, private.filled[constants.NIC_MODE],
10305        private.filled[constants.NIC_LINK],
10306        net)),
10307       ])
10308
10309   def _ApplyNicMods(self, idx, nic, params, private):
10310     """Modifies a network interface.
10311
10312     """
10313     changes = []
10314
10315     for key in [constants.INIC_MAC, constants.INIC_IP, constants.INIC_NAME]:
10316       if key in params:
10317         changes.append(("nic.%s/%d" % (key, idx), params[key]))
10318         setattr(nic, key, params[key])
10319
10320     new_net = params.get(constants.INIC_NETWORK, nic.network)
10321     new_net_uuid = self.cfg.LookupNetwork(new_net)
10322     if new_net_uuid != nic.network:
10323       changes.append(("nic.network/%d" % idx, new_net))
10324       nic.network = new_net_uuid
10325
10326     if private.filled:
10327       nic.nicparams = private.filled
10328
10329       for (key, val) in nic.nicparams.items():
10330         changes.append(("nic.%s/%d" % (key, idx), val))
10331
10332     return changes
10333
10334   def Exec(self, feedback_fn):
10335     """Modifies an instance.
10336
10337     All parameters take effect only at the next restart of the instance.
10338
10339     """
10340     # Process here the warnings from CheckPrereq, as we don't have a
10341     # feedback_fn there.
10342     # TODO: Replace with self.LogWarning
10343     for warn in self.warn:
10344       feedback_fn("WARNING: %s" % warn)
10345
10346     assert ((self.op.disk_template is None) ^
10347             bool(self.owned_locks(locking.LEVEL_NODE_RES))), \
10348       "Not owning any node resource locks"
10349
10350     result = []
10351     instance = self.instance
10352
10353     # New primary node
10354     if self.op.pnode:
10355       instance.primary_node = self.op.pnode
10356
10357     # runtime memory
10358     if self.op.runtime_mem:
10359       rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
10360                                                      instance,
10361                                                      self.op.runtime_mem)
10362       rpcres.Raise("Cannot modify instance runtime memory")
10363       result.append(("runtime_memory", self.op.runtime_mem))
10364
10365     # Apply disk changes
10366     ApplyContainerMods("disk", instance.disks, result, self.diskmod,
10367                        self._CreateNewDisk, self._ModifyDisk, self._RemoveDisk)
10368     _UpdateIvNames(0, instance.disks)
10369
10370     if self.op.disk_template:
10371       if __debug__:
10372         check_nodes = set(instance.all_nodes)
10373         if self.op.remote_node:
10374           check_nodes.add(self.op.remote_node)
10375         for level in [locking.LEVEL_NODE, locking.LEVEL_NODE_RES]:
10376           owned = self.owned_locks(level)
10377           assert not (check_nodes - owned), \
10378             ("Not owning the correct locks, owning %r, expected at least %r" %
10379              (owned, check_nodes))
10380
10381       r_shut = _ShutdownInstanceDisks(self, instance)
10382       if not r_shut:
10383         raise errors.OpExecError("Cannot shutdown instance disks, unable to"
10384                                  " proceed with disk template conversion")
10385       mode = (instance.disk_template, self.op.disk_template)
10386       try:
10387         self._DISK_CONVERSIONS[mode](self, feedback_fn)
10388       except:
10389         self.cfg.ReleaseDRBDMinors(instance.name)
10390         raise
10391       result.append(("disk_template", self.op.disk_template))
10392
10393       assert instance.disk_template == self.op.disk_template, \
10394         ("Expected disk template '%s', found '%s'" %
10395          (self.op.disk_template, instance.disk_template))
10396
10397     # Release node and resource locks if there are any (they might already have
10398     # been released during disk conversion)
10399     _ReleaseLocks(self, locking.LEVEL_NODE)
10400     _ReleaseLocks(self, locking.LEVEL_NODE_RES)
10401
10402     # Apply NIC changes
10403     if self._new_nics is not None:
10404       instance.nics = self._new_nics
10405       result.extend(self._nic_chgdesc)
10406
10407     # hvparams changes
10408     if self.op.hvparams:
10409       instance.hvparams = self.hv_inst
10410       for key, val in self.op.hvparams.iteritems():
10411         result.append(("hv/%s" % key, val))
10412
10413     # beparams changes
10414     if self.op.beparams:
10415       instance.beparams = self.be_inst
10416       for key, val in self.op.beparams.iteritems():
10417         result.append(("be/%s" % key, val))
10418
10419     # OS change
10420     if self.op.os_name:
10421       instance.os = self.op.os_name
10422
10423     # osparams changes
10424     if self.op.osparams:
10425       instance.osparams = self.os_inst
10426       for key, val in self.op.osparams.iteritems():
10427         result.append(("os/%s" % key, val))
10428
10429     if self.op.offline is None:
10430       # Ignore
10431       pass
10432     elif self.op.offline:
10433       # Mark instance as offline
10434       self.cfg.MarkInstanceOffline(instance.name)
10435       result.append(("admin_state", constants.ADMINST_OFFLINE))
10436     else:
10437       # Mark instance as online, but stopped
10438       self.cfg.MarkInstanceDown(instance.name)
10439       result.append(("admin_state", constants.ADMINST_DOWN))
10440
10441     self.cfg.Update(instance, feedback_fn, self.proc.GetECId())
10442
10443     assert not (self.owned_locks(locking.LEVEL_NODE_RES) or
10444                 self.owned_locks(locking.LEVEL_NODE)), \
10445       "All node locks should have been released by now"
10446
10447     return result
10448
10449   _DISK_CONVERSIONS = {
10450     (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
10451     (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
10452     }
10453
10454
10455 class LUInstanceChangeGroup(LogicalUnit):
10456   HPATH = "instance-change-group"
10457   HTYPE = constants.HTYPE_INSTANCE
10458   REQ_BGL = False
10459
10460   def ExpandNames(self):
10461     self.share_locks = _ShareAll()
10462
10463     self.needed_locks = {
10464       locking.LEVEL_NODEGROUP: [],
10465       locking.LEVEL_NODE: [],
10466       locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
10467       }
10468
10469     self._ExpandAndLockInstance()
10470
10471     if self.op.target_groups:
10472       self.req_target_uuids = map(self.cfg.LookupNodeGroup,
10473                                   self.op.target_groups)
10474     else:
10475       self.req_target_uuids = None
10476
10477     self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
10478
10479   def DeclareLocks(self, level):
10480     if level == locking.LEVEL_NODEGROUP:
10481       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
10482
10483       if self.req_target_uuids:
10484         lock_groups = set(self.req_target_uuids)
10485
10486         # Lock all groups used by instance optimistically; this requires going
10487         # via the node before it's locked, requiring verification later on
10488         instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
10489         lock_groups.update(instance_groups)
10490       else:
10491         # No target groups, need to lock all of them
10492         lock_groups = locking.ALL_SET
10493
10494       self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
10495
10496     elif level == locking.LEVEL_NODE:
10497       if self.req_target_uuids:
10498         # Lock all nodes used by instances
10499         self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
10500         self._LockInstancesNodes()
10501
10502         # Lock all nodes in all potential target groups
10503         lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
10504                        self.cfg.GetInstanceNodeGroups(self.op.instance_name))
10505         member_nodes = [node_name
10506                         for group in lock_groups
10507                         for node_name in self.cfg.GetNodeGroup(group).members]
10508         self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
10509       else:
10510         # Lock all nodes as all groups are potential targets
10511         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10512
10513   def CheckPrereq(self):
10514     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
10515     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
10516     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
10517
10518     assert (self.req_target_uuids is None or
10519             owned_groups.issuperset(self.req_target_uuids))
10520     assert owned_instances == set([self.op.instance_name])
10521
10522     # Get instance information
10523     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10524
10525     # Check if node groups for locked instance are still correct
10526     assert owned_nodes.issuperset(self.instance.all_nodes), \
10527       ("Instance %s's nodes changed while we kept the lock" %
10528        self.op.instance_name)
10529
10530     inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
10531                                            owned_groups)
10532
10533     if self.req_target_uuids:
10534       # User requested specific target groups
10535       self.target_uuids = frozenset(self.req_target_uuids)
10536     else:
10537       # All groups except those used by the instance are potential targets
10538       self.target_uuids = owned_groups - inst_groups
10539
10540     conflicting_groups = self.target_uuids & inst_groups
10541     if conflicting_groups:
10542       raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
10543                                  " used by the instance '%s'" %
10544                                  (utils.CommaJoin(conflicting_groups),
10545                                   self.op.instance_name),
10546                                  errors.ECODE_INVAL)
10547
10548     if not self.target_uuids:
10549       raise errors.OpPrereqError("There are no possible target groups",
10550                                  errors.ECODE_INVAL)
10551
10552   def BuildHooksEnv(self):
10553     """Build hooks env.
10554
10555     """
10556     assert self.target_uuids
10557
10558     env = {
10559       "TARGET_GROUPS": " ".join(self.target_uuids),
10560       }
10561
10562     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
10563
10564     return env
10565
10566   def BuildHooksNodes(self):
10567     """Build hooks nodes.
10568
10569     """
10570     mn = self.cfg.GetMasterNode()
10571     return ([mn], [mn])
10572
10573   def Exec(self, feedback_fn):
10574     instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
10575
10576     assert instances == [self.op.instance_name], "Instance not locked"
10577
10578     req = iallocator.IAReqGroupChange(instances=instances,
10579                                       target_groups=list(self.target_uuids))
10580     ial = iallocator.IAllocator(self.cfg, self.rpc, req)
10581
10582     ial.Run(self.op.iallocator)
10583
10584     if not ial.success:
10585       raise errors.OpPrereqError("Can't compute solution for changing group of"
10586                                  " instance '%s' using iallocator '%s': %s" %
10587                                  (self.op.instance_name, self.op.iallocator,
10588                                   ial.info), errors.ECODE_NORES)
10589
10590     jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
10591
10592     self.LogInfo("Iallocator returned %s job(s) for changing group of"
10593                  " instance '%s'", len(jobs), self.op.instance_name)
10594
10595     return ResultWithJobs(jobs)
10596
10597
10598 class LUBackupQuery(NoHooksLU):
10599   """Query the exports list
10600
10601   """
10602   REQ_BGL = False
10603
10604   def CheckArguments(self):
10605     self.expq = _ExportQuery(qlang.MakeSimpleFilter("node", self.op.nodes),
10606                              ["node", "export"], self.op.use_locking)
10607
10608   def ExpandNames(self):
10609     self.expq.ExpandNames(self)
10610
10611   def DeclareLocks(self, level):
10612     self.expq.DeclareLocks(self, level)
10613
10614   def Exec(self, feedback_fn):
10615     result = {}
10616
10617     for (node, expname) in self.expq.OldStyleQuery(self):
10618       if expname is None:
10619         result[node] = False
10620       else:
10621         result.setdefault(node, []).append(expname)
10622
10623     return result
10624
10625
10626 class _ExportQuery(_QueryBase):
10627   FIELDS = query.EXPORT_FIELDS
10628
10629   #: The node name is not a unique key for this query
10630   SORT_FIELD = "node"
10631
10632   def ExpandNames(self, lu):
10633     lu.needed_locks = {}
10634
10635     # The following variables interact with _QueryBase._GetNames
10636     if self.names:
10637       self.wanted = _GetWantedNodes(lu, self.names)
10638     else:
10639       self.wanted = locking.ALL_SET
10640
10641     self.do_locking = self.use_locking
10642
10643     if self.do_locking:
10644       lu.share_locks = _ShareAll()
10645       lu.needed_locks = {
10646         locking.LEVEL_NODE: self.wanted,
10647         }
10648
10649       if not self.names:
10650         lu.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
10651
10652   def DeclareLocks(self, lu, level):
10653     pass
10654
10655   def _GetQueryData(self, lu):
10656     """Computes the list of nodes and their attributes.
10657
10658     """
10659     # Locking is not used
10660     # TODO
10661     assert not (compat.any(lu.glm.is_owned(level)
10662                            for level in locking.LEVELS
10663                            if level != locking.LEVEL_CLUSTER) or
10664                 self.do_locking or self.use_locking)
10665
10666     nodes = self._GetNames(lu, lu.cfg.GetNodeList(), locking.LEVEL_NODE)
10667
10668     result = []
10669
10670     for (node, nres) in lu.rpc.call_export_list(nodes).items():
10671       if nres.fail_msg:
10672         result.append((node, None))
10673       else:
10674         result.extend((node, expname) for expname in nres.payload)
10675
10676     return result
10677
10678
10679 class LUBackupPrepare(NoHooksLU):
10680   """Prepares an instance for an export and returns useful information.
10681
10682   """
10683   REQ_BGL = False
10684
10685   def ExpandNames(self):
10686     self._ExpandAndLockInstance()
10687
10688   def CheckPrereq(self):
10689     """Check prerequisites.
10690
10691     """
10692     instance_name = self.op.instance_name
10693
10694     self.instance = self.cfg.GetInstanceInfo(instance_name)
10695     assert self.instance is not None, \
10696           "Cannot retrieve locked instance %s" % self.op.instance_name
10697     _CheckNodeOnline(self, self.instance.primary_node)
10698
10699     self._cds = _GetClusterDomainSecret()
10700
10701   def Exec(self, feedback_fn):
10702     """Prepares an instance for an export.
10703
10704     """
10705     instance = self.instance
10706
10707     if self.op.mode == constants.EXPORT_MODE_REMOTE:
10708       salt = utils.GenerateSecret(8)
10709
10710       feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
10711       result = self.rpc.call_x509_cert_create(instance.primary_node,
10712                                               constants.RIE_CERT_VALIDITY)
10713       result.Raise("Can't create X509 key and certificate on %s" % result.node)
10714
10715       (name, cert_pem) = result.payload
10716
10717       cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
10718                                              cert_pem)
10719
10720       return {
10721         "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
10722         "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
10723                           salt),
10724         "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
10725         }
10726
10727     return None
10728
10729
10730 class LUBackupExport(LogicalUnit):
10731   """Export an instance to an image in the cluster.
10732
10733   """
10734   HPATH = "instance-export"
10735   HTYPE = constants.HTYPE_INSTANCE
10736   REQ_BGL = False
10737
10738   def CheckArguments(self):
10739     """Check the arguments.
10740
10741     """
10742     self.x509_key_name = self.op.x509_key_name
10743     self.dest_x509_ca_pem = self.op.destination_x509_ca
10744
10745     if self.op.mode == constants.EXPORT_MODE_REMOTE:
10746       if not self.x509_key_name:
10747         raise errors.OpPrereqError("Missing X509 key name for encryption",
10748                                    errors.ECODE_INVAL)
10749
10750       if not self.dest_x509_ca_pem:
10751         raise errors.OpPrereqError("Missing destination X509 CA",
10752                                    errors.ECODE_INVAL)
10753
10754   def ExpandNames(self):
10755     self._ExpandAndLockInstance()
10756
10757     # Lock all nodes for local exports
10758     if self.op.mode == constants.EXPORT_MODE_LOCAL:
10759       # FIXME: lock only instance primary and destination node
10760       #
10761       # Sad but true, for now we have do lock all nodes, as we don't know where
10762       # the previous export might be, and in this LU we search for it and
10763       # remove it from its current node. In the future we could fix this by:
10764       #  - making a tasklet to search (share-lock all), then create the
10765       #    new one, then one to remove, after
10766       #  - removing the removal operation altogether
10767       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10768
10769       # Allocations should be stopped while this LU runs with node locks, but
10770       # it doesn't have to be exclusive
10771       self.share_locks[locking.LEVEL_NODE_ALLOC] = 1
10772       self.needed_locks[locking.LEVEL_NODE_ALLOC] = locking.ALL_SET
10773
10774   def DeclareLocks(self, level):
10775     """Last minute lock declaration."""
10776     # All nodes are locked anyway, so nothing to do here.
10777
10778   def BuildHooksEnv(self):
10779     """Build hooks env.
10780
10781     This will run on the master, primary node and target node.
10782
10783     """
10784     env = {
10785       "EXPORT_MODE": self.op.mode,
10786       "EXPORT_NODE": self.op.target_node,
10787       "EXPORT_DO_SHUTDOWN": self.op.shutdown,
10788       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
10789       # TODO: Generic function for boolean env variables
10790       "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
10791       }
10792
10793     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
10794
10795     return env
10796
10797   def BuildHooksNodes(self):
10798     """Build hooks nodes.
10799
10800     """
10801     nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
10802
10803     if self.op.mode == constants.EXPORT_MODE_LOCAL:
10804       nl.append(self.op.target_node)
10805
10806     return (nl, nl)
10807
10808   def CheckPrereq(self):
10809     """Check prerequisites.
10810
10811     This checks that the instance and node names are valid.
10812
10813     """
10814     instance_name = self.op.instance_name
10815
10816     self.instance = self.cfg.GetInstanceInfo(instance_name)
10817     assert self.instance is not None, \
10818           "Cannot retrieve locked instance %s" % self.op.instance_name
10819     _CheckNodeOnline(self, self.instance.primary_node)
10820
10821     if (self.op.remove_instance and
10822         self.instance.admin_state == constants.ADMINST_UP and
10823         not self.op.shutdown):
10824       raise errors.OpPrereqError("Can not remove instance without shutting it"
10825                                  " down before", errors.ECODE_STATE)
10826
10827     if self.op.mode == constants.EXPORT_MODE_LOCAL:
10828       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
10829       self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
10830       assert self.dst_node is not None
10831
10832       _CheckNodeOnline(self, self.dst_node.name)
10833       _CheckNodeNotDrained(self, self.dst_node.name)
10834
10835       self._cds = None
10836       self.dest_disk_info = None
10837       self.dest_x509_ca = None
10838
10839     elif self.op.mode == constants.EXPORT_MODE_REMOTE:
10840       self.dst_node = None
10841
10842       if len(self.op.target_node) != len(self.instance.disks):
10843         raise errors.OpPrereqError(("Received destination information for %s"
10844                                     " disks, but instance %s has %s disks") %
10845                                    (len(self.op.target_node), instance_name,
10846                                     len(self.instance.disks)),
10847                                    errors.ECODE_INVAL)
10848
10849       cds = _GetClusterDomainSecret()
10850
10851       # Check X509 key name
10852       try:
10853         (key_name, hmac_digest, hmac_salt) = self.x509_key_name
10854       except (TypeError, ValueError), err:
10855         raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err,
10856                                    errors.ECODE_INVAL)
10857
10858       if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
10859         raise errors.OpPrereqError("HMAC for X509 key name is wrong",
10860                                    errors.ECODE_INVAL)
10861
10862       # Load and verify CA
10863       try:
10864         (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
10865       except OpenSSL.crypto.Error, err:
10866         raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
10867                                    (err, ), errors.ECODE_INVAL)
10868
10869       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
10870       if errcode is not None:
10871         raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
10872                                    (msg, ), errors.ECODE_INVAL)
10873
10874       self.dest_x509_ca = cert
10875
10876       # Verify target information
10877       disk_info = []
10878       for idx, disk_data in enumerate(self.op.target_node):
10879         try:
10880           (host, port, magic) = \
10881             masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
10882         except errors.GenericError, err:
10883           raise errors.OpPrereqError("Target info for disk %s: %s" %
10884                                      (idx, err), errors.ECODE_INVAL)
10885
10886         disk_info.append((host, port, magic))
10887
10888       assert len(disk_info) == len(self.op.target_node)
10889       self.dest_disk_info = disk_info
10890
10891     else:
10892       raise errors.ProgrammerError("Unhandled export mode %r" %
10893                                    self.op.mode)
10894
10895     # instance disk type verification
10896     # TODO: Implement export support for file-based disks
10897     for disk in self.instance.disks:
10898       if disk.dev_type == constants.LD_FILE:
10899         raise errors.OpPrereqError("Export not supported for instances with"
10900                                    " file-based disks", errors.ECODE_INVAL)
10901
10902   def _CleanupExports(self, feedback_fn):
10903     """Removes exports of current instance from all other nodes.
10904
10905     If an instance in a cluster with nodes A..D was exported to node C, its
10906     exports will be removed from the nodes A, B and D.
10907
10908     """
10909     assert self.op.mode != constants.EXPORT_MODE_REMOTE
10910
10911     nodelist = self.cfg.GetNodeList()
10912     nodelist.remove(self.dst_node.name)
10913
10914     # on one-node clusters nodelist will be empty after the removal
10915     # if we proceed the backup would be removed because OpBackupQuery
10916     # substitutes an empty list with the full cluster node list.
10917     iname = self.instance.name
10918     if nodelist:
10919       feedback_fn("Removing old exports for instance %s" % iname)
10920       exportlist = self.rpc.call_export_list(nodelist)
10921       for node in exportlist:
10922         if exportlist[node].fail_msg:
10923           continue
10924         if iname in exportlist[node].payload:
10925           msg = self.rpc.call_export_remove(node, iname).fail_msg
10926           if msg:
10927             self.LogWarning("Could not remove older export for instance %s"
10928                             " on node %s: %s", iname, node, msg)
10929
10930   def Exec(self, feedback_fn):
10931     """Export an instance to an image in the cluster.
10932
10933     """
10934     assert self.op.mode in constants.EXPORT_MODES
10935
10936     instance = self.instance
10937     src_node = instance.primary_node
10938
10939     if self.op.shutdown:
10940       # shutdown the instance, but not the disks
10941       feedback_fn("Shutting down instance %s" % instance.name)
10942       result = self.rpc.call_instance_shutdown(src_node, instance,
10943                                                self.op.shutdown_timeout,
10944                                                self.op.reason)
10945       # TODO: Maybe ignore failures if ignore_remove_failures is set
10946       result.Raise("Could not shutdown instance %s on"
10947                    " node %s" % (instance.name, src_node))
10948
10949     # set the disks ID correctly since call_instance_start needs the
10950     # correct drbd minor to create the symlinks
10951     for disk in instance.disks:
10952       self.cfg.SetDiskID(disk, src_node)
10953
10954     activate_disks = (instance.admin_state != constants.ADMINST_UP)
10955
10956     if activate_disks:
10957       # Activate the instance disks if we'exporting a stopped instance
10958       feedback_fn("Activating disks for %s" % instance.name)
10959       _StartInstanceDisks(self, instance, None)
10960
10961     try:
10962       helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
10963                                                      instance)
10964
10965       helper.CreateSnapshots()
10966       try:
10967         if (self.op.shutdown and
10968             instance.admin_state == constants.ADMINST_UP and
10969             not self.op.remove_instance):
10970           assert not activate_disks
10971           feedback_fn("Starting instance %s" % instance.name)
10972           result = self.rpc.call_instance_start(src_node,
10973                                                 (instance, None, None), False,
10974                                                  self.op.reason)
10975           msg = result.fail_msg
10976           if msg:
10977             feedback_fn("Failed to start instance: %s" % msg)
10978             _ShutdownInstanceDisks(self, instance)
10979             raise errors.OpExecError("Could not start instance: %s" % msg)
10980
10981         if self.op.mode == constants.EXPORT_MODE_LOCAL:
10982           (fin_resu, dresults) = helper.LocalExport(self.dst_node)
10983         elif self.op.mode == constants.EXPORT_MODE_REMOTE:
10984           connect_timeout = constants.RIE_CONNECT_TIMEOUT
10985           timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
10986
10987           (key_name, _, _) = self.x509_key_name
10988
10989           dest_ca_pem = \
10990             OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
10991                                             self.dest_x509_ca)
10992
10993           (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
10994                                                      key_name, dest_ca_pem,
10995                                                      timeouts)
10996       finally:
10997         helper.Cleanup()
10998
10999       # Check for backwards compatibility
11000       assert len(dresults) == len(instance.disks)
11001       assert compat.all(isinstance(i, bool) for i in dresults), \
11002              "Not all results are boolean: %r" % dresults
11003
11004     finally:
11005       if activate_disks:
11006         feedback_fn("Deactivating disks for %s" % instance.name)
11007         _ShutdownInstanceDisks(self, instance)
11008
11009     if not (compat.all(dresults) and fin_resu):
11010       failures = []
11011       if not fin_resu:
11012         failures.append("export finalization")
11013       if not compat.all(dresults):
11014         fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
11015                                if not dsk)
11016         failures.append("disk export: disk(s) %s" % fdsk)
11017
11018       raise errors.OpExecError("Export failed, errors in %s" %
11019                                utils.CommaJoin(failures))
11020
11021     # At this point, the export was successful, we can cleanup/finish
11022
11023     # Remove instance if requested
11024     if self.op.remove_instance:
11025       feedback_fn("Removing instance %s" % instance.name)
11026       _RemoveInstance(self, feedback_fn, instance,
11027                       self.op.ignore_remove_failures)
11028
11029     if self.op.mode == constants.EXPORT_MODE_LOCAL:
11030       self._CleanupExports(feedback_fn)
11031
11032     return fin_resu, dresults
11033
11034
11035 class LUBackupRemove(NoHooksLU):
11036   """Remove exports related to the named instance.
11037
11038   """
11039   REQ_BGL = False
11040
11041   def ExpandNames(self):
11042     self.needed_locks = {
11043       # We need all nodes to be locked in order for RemoveExport to work, but
11044       # we don't need to lock the instance itself, as nothing will happen to it
11045       # (and we can remove exports also for a removed instance)
11046       locking.LEVEL_NODE: locking.ALL_SET,
11047
11048       # Removing backups is quick, so blocking allocations is justified
11049       locking.LEVEL_NODE_ALLOC: locking.ALL_SET,
11050       }
11051
11052     # Allocations should be stopped while this LU runs with node locks, but it
11053     # doesn't have to be exclusive
11054     self.share_locks[locking.LEVEL_NODE_ALLOC] = 1
11055
11056   def Exec(self, feedback_fn):
11057     """Remove any export.
11058
11059     """
11060     instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
11061     # If the instance was not found we'll try with the name that was passed in.
11062     # This will only work if it was an FQDN, though.
11063     fqdn_warn = False
11064     if not instance_name:
11065       fqdn_warn = True
11066       instance_name = self.op.instance_name
11067
11068     locked_nodes = self.owned_locks(locking.LEVEL_NODE)
11069     exportlist = self.rpc.call_export_list(locked_nodes)
11070     found = False
11071     for node in exportlist:
11072       msg = exportlist[node].fail_msg
11073       if msg:
11074         self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
11075         continue
11076       if instance_name in exportlist[node].payload:
11077         found = True
11078         result = self.rpc.call_export_remove(node, instance_name)
11079         msg = result.fail_msg
11080         if msg:
11081           logging.error("Could not remove export for instance %s"
11082                         " on node %s: %s", instance_name, node, msg)
11083
11084     if fqdn_warn and not found:
11085       feedback_fn("Export not found. If trying to remove an export belonging"
11086                   " to a deleted instance please use its Fully Qualified"
11087                   " Domain Name.")
11088
11089
11090 class LUGroupAdd(LogicalUnit):
11091   """Logical unit for creating node groups.
11092
11093   """
11094   HPATH = "group-add"
11095   HTYPE = constants.HTYPE_GROUP
11096   REQ_BGL = False
11097
11098   def ExpandNames(self):
11099     # We need the new group's UUID here so that we can create and acquire the
11100     # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
11101     # that it should not check whether the UUID exists in the configuration.
11102     self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
11103     self.needed_locks = {}
11104     self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
11105
11106   def CheckPrereq(self):
11107     """Check prerequisites.
11108
11109     This checks that the given group name is not an existing node group
11110     already.
11111
11112     """
11113     try:
11114       existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11115     except errors.OpPrereqError:
11116       pass
11117     else:
11118       raise errors.OpPrereqError("Desired group name '%s' already exists as a"
11119                                  " node group (UUID: %s)" %
11120                                  (self.op.group_name, existing_uuid),
11121                                  errors.ECODE_EXISTS)
11122
11123     if self.op.ndparams:
11124       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
11125
11126     if self.op.hv_state:
11127       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
11128     else:
11129       self.new_hv_state = None
11130
11131     if self.op.disk_state:
11132       self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
11133     else:
11134       self.new_disk_state = None
11135
11136     if self.op.diskparams:
11137       for templ in constants.DISK_TEMPLATES:
11138         if templ in self.op.diskparams:
11139           utils.ForceDictType(self.op.diskparams[templ],
11140                               constants.DISK_DT_TYPES)
11141       self.new_diskparams = self.op.diskparams
11142       try:
11143         utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
11144       except errors.OpPrereqError, err:
11145         raise errors.OpPrereqError("While verify diskparams options: %s" % err,
11146                                    errors.ECODE_INVAL)
11147     else:
11148       self.new_diskparams = {}
11149
11150     if self.op.ipolicy:
11151       cluster = self.cfg.GetClusterInfo()
11152       full_ipolicy = cluster.SimpleFillIPolicy(self.op.ipolicy)
11153       try:
11154         objects.InstancePolicy.CheckParameterSyntax(full_ipolicy, False)
11155       except errors.ConfigurationError, err:
11156         raise errors.OpPrereqError("Invalid instance policy: %s" % err,
11157                                    errors.ECODE_INVAL)
11158
11159   def BuildHooksEnv(self):
11160     """Build hooks env.
11161
11162     """
11163     return {
11164       "GROUP_NAME": self.op.group_name,
11165       }
11166
11167   def BuildHooksNodes(self):
11168     """Build hooks nodes.
11169
11170     """
11171     mn = self.cfg.GetMasterNode()
11172     return ([mn], [mn])
11173
11174   def Exec(self, feedback_fn):
11175     """Add the node group to the cluster.
11176
11177     """
11178     group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
11179                                   uuid=self.group_uuid,
11180                                   alloc_policy=self.op.alloc_policy,
11181                                   ndparams=self.op.ndparams,
11182                                   diskparams=self.new_diskparams,
11183                                   ipolicy=self.op.ipolicy,
11184                                   hv_state_static=self.new_hv_state,
11185                                   disk_state_static=self.new_disk_state)
11186
11187     self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
11188     del self.remove_locks[locking.LEVEL_NODEGROUP]
11189
11190
11191 class LUGroupAssignNodes(NoHooksLU):
11192   """Logical unit for assigning nodes to groups.
11193
11194   """
11195   REQ_BGL = False
11196
11197   def ExpandNames(self):
11198     # These raise errors.OpPrereqError on their own:
11199     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11200     self.op.nodes = _GetWantedNodes(self, self.op.nodes)
11201
11202     # We want to lock all the affected nodes and groups. We have readily
11203     # available the list of nodes, and the *destination* group. To gather the
11204     # list of "source" groups, we need to fetch node information later on.
11205     self.needed_locks = {
11206       locking.LEVEL_NODEGROUP: set([self.group_uuid]),
11207       locking.LEVEL_NODE: self.op.nodes,
11208       }
11209
11210   def DeclareLocks(self, level):
11211     if level == locking.LEVEL_NODEGROUP:
11212       assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
11213
11214       # Try to get all affected nodes' groups without having the group or node
11215       # lock yet. Needs verification later in the code flow.
11216       groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
11217
11218       self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
11219
11220   def CheckPrereq(self):
11221     """Check prerequisites.
11222
11223     """
11224     assert self.needed_locks[locking.LEVEL_NODEGROUP]
11225     assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
11226             frozenset(self.op.nodes))
11227
11228     expected_locks = (set([self.group_uuid]) |
11229                       self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
11230     actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
11231     if actual_locks != expected_locks:
11232       raise errors.OpExecError("Nodes changed groups since locks were acquired,"
11233                                " current groups are '%s', used to be '%s'" %
11234                                (utils.CommaJoin(expected_locks),
11235                                 utils.CommaJoin(actual_locks)))
11236
11237     self.node_data = self.cfg.GetAllNodesInfo()
11238     self.group = self.cfg.GetNodeGroup(self.group_uuid)
11239     instance_data = self.cfg.GetAllInstancesInfo()
11240
11241     if self.group is None:
11242       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
11243                                (self.op.group_name, self.group_uuid))
11244
11245     (new_splits, previous_splits) = \
11246       self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
11247                                              for node in self.op.nodes],
11248                                             self.node_data, instance_data)
11249
11250     if new_splits:
11251       fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
11252
11253       if not self.op.force:
11254         raise errors.OpExecError("The following instances get split by this"
11255                                  " change and --force was not given: %s" %
11256                                  fmt_new_splits)
11257       else:
11258         self.LogWarning("This operation will split the following instances: %s",
11259                         fmt_new_splits)
11260
11261         if previous_splits:
11262           self.LogWarning("In addition, these already-split instances continue"
11263                           " to be split across groups: %s",
11264                           utils.CommaJoin(utils.NiceSort(previous_splits)))
11265
11266   def Exec(self, feedback_fn):
11267     """Assign nodes to a new group.
11268
11269     """
11270     mods = [(node_name, self.group_uuid) for node_name in self.op.nodes]
11271
11272     self.cfg.AssignGroupNodes(mods)
11273
11274   @staticmethod
11275   def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
11276     """Check for split instances after a node assignment.
11277
11278     This method considers a series of node assignments as an atomic operation,
11279     and returns information about split instances after applying the set of
11280     changes.
11281
11282     In particular, it returns information about newly split instances, and
11283     instances that were already split, and remain so after the change.
11284
11285     Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
11286     considered.
11287
11288     @type changes: list of (node_name, new_group_uuid) pairs.
11289     @param changes: list of node assignments to consider.
11290     @param node_data: a dict with data for all nodes
11291     @param instance_data: a dict with all instances to consider
11292     @rtype: a two-tuple
11293     @return: a list of instances that were previously okay and result split as a
11294       consequence of this change, and a list of instances that were previously
11295       split and this change does not fix.
11296
11297     """
11298     changed_nodes = dict((node, group) for node, group in changes
11299                          if node_data[node].group != group)
11300
11301     all_split_instances = set()
11302     previously_split_instances = set()
11303
11304     def InstanceNodes(instance):
11305       return [instance.primary_node] + list(instance.secondary_nodes)
11306
11307     for inst in instance_data.values():
11308       if inst.disk_template not in constants.DTS_INT_MIRROR:
11309         continue
11310
11311       instance_nodes = InstanceNodes(inst)
11312
11313       if len(set(node_data[node].group for node in instance_nodes)) > 1:
11314         previously_split_instances.add(inst.name)
11315
11316       if len(set(changed_nodes.get(node, node_data[node].group)
11317                  for node in instance_nodes)) > 1:
11318         all_split_instances.add(inst.name)
11319
11320     return (list(all_split_instances - previously_split_instances),
11321             list(previously_split_instances & all_split_instances))
11322
11323
11324 class _GroupQuery(_QueryBase):
11325   FIELDS = query.GROUP_FIELDS
11326
11327   def ExpandNames(self, lu):
11328     lu.needed_locks = {}
11329
11330     self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
11331     self._cluster = lu.cfg.GetClusterInfo()
11332     name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
11333
11334     if not self.names:
11335       self.wanted = [name_to_uuid[name]
11336                      for name in utils.NiceSort(name_to_uuid.keys())]
11337     else:
11338       # Accept names to be either names or UUIDs.
11339       missing = []
11340       self.wanted = []
11341       all_uuid = frozenset(self._all_groups.keys())
11342
11343       for name in self.names:
11344         if name in all_uuid:
11345           self.wanted.append(name)
11346         elif name in name_to_uuid:
11347           self.wanted.append(name_to_uuid[name])
11348         else:
11349           missing.append(name)
11350
11351       if missing:
11352         raise errors.OpPrereqError("Some groups do not exist: %s" %
11353                                    utils.CommaJoin(missing),
11354                                    errors.ECODE_NOENT)
11355
11356   def DeclareLocks(self, lu, level):
11357     pass
11358
11359   def _GetQueryData(self, lu):
11360     """Computes the list of node groups and their attributes.
11361
11362     """
11363     do_nodes = query.GQ_NODE in self.requested_data
11364     do_instances = query.GQ_INST in self.requested_data
11365
11366     group_to_nodes = None
11367     group_to_instances = None
11368
11369     # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
11370     # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
11371     # latter GetAllInstancesInfo() is not enough, for we have to go through
11372     # instance->node. Hence, we will need to process nodes even if we only need
11373     # instance information.
11374     if do_nodes or do_instances:
11375       all_nodes = lu.cfg.GetAllNodesInfo()
11376       group_to_nodes = dict((uuid, []) for uuid in self.wanted)
11377       node_to_group = {}
11378
11379       for node in all_nodes.values():
11380         if node.group in group_to_nodes:
11381           group_to_nodes[node.group].append(node.name)
11382           node_to_group[node.name] = node.group
11383
11384       if do_instances:
11385         all_instances = lu.cfg.GetAllInstancesInfo()
11386         group_to_instances = dict((uuid, []) for uuid in self.wanted)
11387
11388         for instance in all_instances.values():
11389           node = instance.primary_node
11390           if node in node_to_group:
11391             group_to_instances[node_to_group[node]].append(instance.name)
11392
11393         if not do_nodes:
11394           # Do not pass on node information if it was not requested.
11395           group_to_nodes = None
11396
11397     return query.GroupQueryData(self._cluster,
11398                                 [self._all_groups[uuid]
11399                                  for uuid in self.wanted],
11400                                 group_to_nodes, group_to_instances,
11401                                 query.GQ_DISKPARAMS in self.requested_data)
11402
11403
11404 class LUGroupQuery(NoHooksLU):
11405   """Logical unit for querying node groups.
11406
11407   """
11408   REQ_BGL = False
11409
11410   def CheckArguments(self):
11411     self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
11412                           self.op.output_fields, False)
11413
11414   def ExpandNames(self):
11415     self.gq.ExpandNames(self)
11416
11417   def DeclareLocks(self, level):
11418     self.gq.DeclareLocks(self, level)
11419
11420   def Exec(self, feedback_fn):
11421     return self.gq.OldStyleQuery(self)
11422
11423
11424 class LUGroupSetParams(LogicalUnit):
11425   """Modifies the parameters of a node group.
11426
11427   """
11428   HPATH = "group-modify"
11429   HTYPE = constants.HTYPE_GROUP
11430   REQ_BGL = False
11431
11432   def CheckArguments(self):
11433     all_changes = [
11434       self.op.ndparams,
11435       self.op.diskparams,
11436       self.op.alloc_policy,
11437       self.op.hv_state,
11438       self.op.disk_state,
11439       self.op.ipolicy,
11440       ]
11441
11442     if all_changes.count(None) == len(all_changes):
11443       raise errors.OpPrereqError("Please pass at least one modification",
11444                                  errors.ECODE_INVAL)
11445
11446   def ExpandNames(self):
11447     # This raises errors.OpPrereqError on its own:
11448     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11449
11450     self.needed_locks = {
11451       locking.LEVEL_INSTANCE: [],
11452       locking.LEVEL_NODEGROUP: [self.group_uuid],
11453       }
11454
11455     self.share_locks[locking.LEVEL_INSTANCE] = 1
11456
11457   def DeclareLocks(self, level):
11458     if level == locking.LEVEL_INSTANCE:
11459       assert not self.needed_locks[locking.LEVEL_INSTANCE]
11460
11461       # Lock instances optimistically, needs verification once group lock has
11462       # been acquired
11463       self.needed_locks[locking.LEVEL_INSTANCE] = \
11464           self.cfg.GetNodeGroupInstances(self.group_uuid)
11465
11466   @staticmethod
11467   def _UpdateAndVerifyDiskParams(old, new):
11468     """Updates and verifies disk parameters.
11469
11470     """
11471     new_params = _GetUpdatedParams(old, new)
11472     utils.ForceDictType(new_params, constants.DISK_DT_TYPES)
11473     return new_params
11474
11475   def CheckPrereq(self):
11476     """Check prerequisites.
11477
11478     """
11479     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
11480
11481     # Check if locked instances are still correct
11482     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
11483
11484     self.group = self.cfg.GetNodeGroup(self.group_uuid)
11485     cluster = self.cfg.GetClusterInfo()
11486
11487     if self.group is None:
11488       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
11489                                (self.op.group_name, self.group_uuid))
11490
11491     if self.op.ndparams:
11492       new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
11493       utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
11494       self.new_ndparams = new_ndparams
11495
11496     if self.op.diskparams:
11497       diskparams = self.group.diskparams
11498       uavdp = self._UpdateAndVerifyDiskParams
11499       # For each disktemplate subdict update and verify the values
11500       new_diskparams = dict((dt,
11501                              uavdp(diskparams.get(dt, {}),
11502                                    self.op.diskparams[dt]))
11503                             for dt in constants.DISK_TEMPLATES
11504                             if dt in self.op.diskparams)
11505       # As we've all subdicts of diskparams ready, lets merge the actual
11506       # dict with all updated subdicts
11507       self.new_diskparams = objects.FillDict(diskparams, new_diskparams)
11508       try:
11509         utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
11510       except errors.OpPrereqError, err:
11511         raise errors.OpPrereqError("While verify diskparams options: %s" % err,
11512                                    errors.ECODE_INVAL)
11513
11514     if self.op.hv_state:
11515       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
11516                                                  self.group.hv_state_static)
11517
11518     if self.op.disk_state:
11519       self.new_disk_state = \
11520         _MergeAndVerifyDiskState(self.op.disk_state,
11521                                  self.group.disk_state_static)
11522
11523     if self.op.ipolicy:
11524       self.new_ipolicy = _GetUpdatedIPolicy(self.group.ipolicy,
11525                                             self.op.ipolicy,
11526                                             group_policy=True)
11527
11528       new_ipolicy = cluster.SimpleFillIPolicy(self.new_ipolicy)
11529       inst_filter = lambda inst: inst.name in owned_instances
11530       instances = self.cfg.GetInstancesInfoByFilter(inst_filter).values()
11531       gmi = ganeti.masterd.instance
11532       violations = \
11533           _ComputeNewInstanceViolations(gmi.CalculateGroupIPolicy(cluster,
11534                                                                   self.group),
11535                                         new_ipolicy, instances, self.cfg)
11536
11537       if violations:
11538         self.LogWarning("After the ipolicy change the following instances"
11539                         " violate them: %s",
11540                         utils.CommaJoin(violations))
11541
11542   def BuildHooksEnv(self):
11543     """Build hooks env.
11544
11545     """
11546     return {
11547       "GROUP_NAME": self.op.group_name,
11548       "NEW_ALLOC_POLICY": self.op.alloc_policy,
11549       }
11550
11551   def BuildHooksNodes(self):
11552     """Build hooks nodes.
11553
11554     """
11555     mn = self.cfg.GetMasterNode()
11556     return ([mn], [mn])
11557
11558   def Exec(self, feedback_fn):
11559     """Modifies the node group.
11560
11561     """
11562     result = []
11563
11564     if self.op.ndparams:
11565       self.group.ndparams = self.new_ndparams
11566       result.append(("ndparams", str(self.group.ndparams)))
11567
11568     if self.op.diskparams:
11569       self.group.diskparams = self.new_diskparams
11570       result.append(("diskparams", str(self.group.diskparams)))
11571
11572     if self.op.alloc_policy:
11573       self.group.alloc_policy = self.op.alloc_policy
11574
11575     if self.op.hv_state:
11576       self.group.hv_state_static = self.new_hv_state
11577
11578     if self.op.disk_state:
11579       self.group.disk_state_static = self.new_disk_state
11580
11581     if self.op.ipolicy:
11582       self.group.ipolicy = self.new_ipolicy
11583
11584     self.cfg.Update(self.group, feedback_fn)
11585     return result
11586
11587
11588 class LUGroupRemove(LogicalUnit):
11589   HPATH = "group-remove"
11590   HTYPE = constants.HTYPE_GROUP
11591   REQ_BGL = False
11592
11593   def ExpandNames(self):
11594     # This will raises errors.OpPrereqError on its own:
11595     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11596     self.needed_locks = {
11597       locking.LEVEL_NODEGROUP: [self.group_uuid],
11598       }
11599
11600   def CheckPrereq(self):
11601     """Check prerequisites.
11602
11603     This checks that the given group name exists as a node group, that is
11604     empty (i.e., contains no nodes), and that is not the last group of the
11605     cluster.
11606
11607     """
11608     # Verify that the group is empty.
11609     group_nodes = [node.name
11610                    for node in self.cfg.GetAllNodesInfo().values()
11611                    if node.group == self.group_uuid]
11612
11613     if group_nodes:
11614       raise errors.OpPrereqError("Group '%s' not empty, has the following"
11615                                  " nodes: %s" %
11616                                  (self.op.group_name,
11617                                   utils.CommaJoin(utils.NiceSort(group_nodes))),
11618                                  errors.ECODE_STATE)
11619
11620     # Verify the cluster would not be left group-less.
11621     if len(self.cfg.GetNodeGroupList()) == 1:
11622       raise errors.OpPrereqError("Group '%s' is the only group, cannot be"
11623                                  " removed" % self.op.group_name,
11624                                  errors.ECODE_STATE)
11625
11626   def BuildHooksEnv(self):
11627     """Build hooks env.
11628
11629     """
11630     return {
11631       "GROUP_NAME": self.op.group_name,
11632       }
11633
11634   def BuildHooksNodes(self):
11635     """Build hooks nodes.
11636
11637     """
11638     mn = self.cfg.GetMasterNode()
11639     return ([mn], [mn])
11640
11641   def Exec(self, feedback_fn):
11642     """Remove the node group.
11643
11644     """
11645     try:
11646       self.cfg.RemoveNodeGroup(self.group_uuid)
11647     except errors.ConfigurationError:
11648       raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
11649                                (self.op.group_name, self.group_uuid))
11650
11651     self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
11652
11653
11654 class LUGroupRename(LogicalUnit):
11655   HPATH = "group-rename"
11656   HTYPE = constants.HTYPE_GROUP
11657   REQ_BGL = False
11658
11659   def ExpandNames(self):
11660     # This raises errors.OpPrereqError on its own:
11661     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11662
11663     self.needed_locks = {
11664       locking.LEVEL_NODEGROUP: [self.group_uuid],
11665       }
11666
11667   def CheckPrereq(self):
11668     """Check prerequisites.
11669
11670     Ensures requested new name is not yet used.
11671
11672     """
11673     try:
11674       new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
11675     except errors.OpPrereqError:
11676       pass
11677     else:
11678       raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
11679                                  " node group (UUID: %s)" %
11680                                  (self.op.new_name, new_name_uuid),
11681                                  errors.ECODE_EXISTS)
11682
11683   def BuildHooksEnv(self):
11684     """Build hooks env.
11685
11686     """
11687     return {
11688       "OLD_NAME": self.op.group_name,
11689       "NEW_NAME": self.op.new_name,
11690       }
11691
11692   def BuildHooksNodes(self):
11693     """Build hooks nodes.
11694
11695     """
11696     mn = self.cfg.GetMasterNode()
11697
11698     all_nodes = self.cfg.GetAllNodesInfo()
11699     all_nodes.pop(mn, None)
11700
11701     run_nodes = [mn]
11702     run_nodes.extend(node.name for node in all_nodes.values()
11703                      if node.group == self.group_uuid)
11704
11705     return (run_nodes, run_nodes)
11706
11707   def Exec(self, feedback_fn):
11708     """Rename the node group.
11709
11710     """
11711     group = self.cfg.GetNodeGroup(self.group_uuid)
11712
11713     if group is None:
11714       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
11715                                (self.op.group_name, self.group_uuid))
11716
11717     group.name = self.op.new_name
11718     self.cfg.Update(group, feedback_fn)
11719
11720     return self.op.new_name
11721
11722
11723 class LUGroupEvacuate(LogicalUnit):
11724   HPATH = "group-evacuate"
11725   HTYPE = constants.HTYPE_GROUP
11726   REQ_BGL = False
11727
11728   def ExpandNames(self):
11729     # This raises errors.OpPrereqError on its own:
11730     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11731
11732     if self.op.target_groups:
11733       self.req_target_uuids = map(self.cfg.LookupNodeGroup,
11734                                   self.op.target_groups)
11735     else:
11736       self.req_target_uuids = []
11737
11738     if self.group_uuid in self.req_target_uuids:
11739       raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
11740                                  " as a target group (targets are %s)" %
11741                                  (self.group_uuid,
11742                                   utils.CommaJoin(self.req_target_uuids)),
11743                                  errors.ECODE_INVAL)
11744
11745     self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
11746
11747     self.share_locks = _ShareAll()
11748     self.needed_locks = {
11749       locking.LEVEL_INSTANCE: [],
11750       locking.LEVEL_NODEGROUP: [],
11751       locking.LEVEL_NODE: [],
11752       }
11753
11754   def DeclareLocks(self, level):
11755     if level == locking.LEVEL_INSTANCE:
11756       assert not self.needed_locks[locking.LEVEL_INSTANCE]
11757
11758       # Lock instances optimistically, needs verification once node and group
11759       # locks have been acquired
11760       self.needed_locks[locking.LEVEL_INSTANCE] = \
11761         self.cfg.GetNodeGroupInstances(self.group_uuid)
11762
11763     elif level == locking.LEVEL_NODEGROUP:
11764       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
11765
11766       if self.req_target_uuids:
11767         lock_groups = set([self.group_uuid] + self.req_target_uuids)
11768
11769         # Lock all groups used by instances optimistically; this requires going
11770         # via the node before it's locked, requiring verification later on
11771         lock_groups.update(group_uuid
11772                            for instance_name in
11773                              self.owned_locks(locking.LEVEL_INSTANCE)
11774                            for group_uuid in
11775                              self.cfg.GetInstanceNodeGroups(instance_name))
11776       else:
11777         # No target groups, need to lock all of them
11778         lock_groups = locking.ALL_SET
11779
11780       self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
11781
11782     elif level == locking.LEVEL_NODE:
11783       # This will only lock the nodes in the group to be evacuated which
11784       # contain actual instances
11785       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
11786       self._LockInstancesNodes()
11787
11788       # Lock all nodes in group to be evacuated and target groups
11789       owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
11790       assert self.group_uuid in owned_groups
11791       member_nodes = [node_name
11792                       for group in owned_groups
11793                       for node_name in self.cfg.GetNodeGroup(group).members]
11794       self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
11795
11796   def CheckPrereq(self):
11797     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
11798     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
11799     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
11800
11801     assert owned_groups.issuperset(self.req_target_uuids)
11802     assert self.group_uuid in owned_groups
11803
11804     # Check if locked instances are still correct
11805     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
11806
11807     # Get instance information
11808     self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
11809
11810     # Check if node groups for locked instances are still correct
11811     _CheckInstancesNodeGroups(self.cfg, self.instances,
11812                               owned_groups, owned_nodes, self.group_uuid)
11813
11814     if self.req_target_uuids:
11815       # User requested specific target groups
11816       self.target_uuids = self.req_target_uuids
11817     else:
11818       # All groups except the one to be evacuated are potential targets
11819       self.target_uuids = [group_uuid for group_uuid in owned_groups
11820                            if group_uuid != self.group_uuid]
11821
11822       if not self.target_uuids:
11823         raise errors.OpPrereqError("There are no possible target groups",
11824                                    errors.ECODE_INVAL)
11825
11826   def BuildHooksEnv(self):
11827     """Build hooks env.
11828
11829     """
11830     return {
11831       "GROUP_NAME": self.op.group_name,
11832       "TARGET_GROUPS": " ".join(self.target_uuids),
11833       }
11834
11835   def BuildHooksNodes(self):
11836     """Build hooks nodes.
11837
11838     """
11839     mn = self.cfg.GetMasterNode()
11840
11841     assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
11842
11843     run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
11844
11845     return (run_nodes, run_nodes)
11846
11847   def Exec(self, feedback_fn):
11848     instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
11849
11850     assert self.group_uuid not in self.target_uuids
11851
11852     req = iallocator.IAReqGroupChange(instances=instances,
11853                                       target_groups=self.target_uuids)
11854     ial = iallocator.IAllocator(self.cfg, self.rpc, req)
11855
11856     ial.Run(self.op.iallocator)
11857
11858     if not ial.success:
11859       raise errors.OpPrereqError("Can't compute group evacuation using"
11860                                  " iallocator '%s': %s" %
11861                                  (self.op.iallocator, ial.info),
11862                                  errors.ECODE_NORES)
11863
11864     jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
11865
11866     self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
11867                  len(jobs), self.op.group_name)
11868
11869     return ResultWithJobs(jobs)
11870
11871
11872 class LURestrictedCommand(NoHooksLU):
11873   """Logical unit for executing restricted commands.
11874
11875   """
11876   REQ_BGL = False
11877
11878   def ExpandNames(self):
11879     if self.op.nodes:
11880       self.op.nodes = _GetWantedNodes(self, self.op.nodes)
11881
11882     self.needed_locks = {
11883       locking.LEVEL_NODE: self.op.nodes,
11884       }
11885     self.share_locks = {
11886       locking.LEVEL_NODE: not self.op.use_locking,
11887       }
11888
11889   def CheckPrereq(self):
11890     """Check prerequisites.
11891
11892     """
11893
11894   def Exec(self, feedback_fn):
11895     """Execute restricted command and return output.
11896
11897     """
11898     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
11899
11900     # Check if correct locks are held
11901     assert set(self.op.nodes).issubset(owned_nodes)
11902
11903     rpcres = self.rpc.call_restricted_command(self.op.nodes, self.op.command)
11904
11905     result = []
11906
11907     for node_name in self.op.nodes:
11908       nres = rpcres[node_name]
11909       if nres.fail_msg:
11910         msg = ("Command '%s' on node '%s' failed: %s" %
11911                (self.op.command, node_name, nres.fail_msg))
11912         result.append((False, msg))
11913       else:
11914         result.append((True, nres.payload))
11915
11916     return result
11917
11918
11919 #: Query type implementations
11920 _QUERY_IMPL = {
11921   constants.QR_CLUSTER: _ClusterQuery,
11922   constants.QR_INSTANCE: _InstanceQuery,
11923   constants.QR_NODE: _NodeQuery,
11924   constants.QR_GROUP: _GroupQuery,
11925   constants.QR_NETWORK: _NetworkQuery,
11926   constants.QR_OS: _OsQuery,
11927   constants.QR_EXTSTORAGE: _ExtStorageQuery,
11928   constants.QR_EXPORT: _ExportQuery,
11929   }
11930
11931 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
11932
11933
11934 def _GetQueryImplementation(name):
11935   """Returns the implemtnation for a query type.
11936
11937   @param name: Query type, must be one of L{constants.QR_VIA_OP}
11938
11939   """
11940   try:
11941     return _QUERY_IMPL[name]
11942   except KeyError:
11943     raise errors.OpPrereqError("Unknown query resource '%s'" % name,
11944                                errors.ECODE_INVAL)
11945
11946
11947 def _CheckForConflictingIp(lu, ip, node):
11948   """In case of conflicting IP address raise error.
11949
11950   @type ip: string
11951   @param ip: IP address
11952   @type node: string
11953   @param node: node name
11954
11955   """
11956   (conf_net, _) = lu.cfg.CheckIPInNodeGroup(ip, node)
11957   if conf_net is not None:
11958     raise errors.OpPrereqError(("The requested IP address (%s) belongs to"
11959                                 " network %s, but the target NIC does not." %
11960                                 (ip, conf_net)),
11961                                errors.ECODE_STATE)
11962
11963   return (None, None)