code.grnet.gr Git - ganeti-local/blob - lib/cmdlib.py

   1 #
   2 #
   3
   4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012 Google Inc.
   5 #
   6 # This program is free software; you can redistribute it and/or modify
   7 # it under the terms of the GNU General Public License as published by
   8 # the Free Software Foundation; either version 2 of the License, or
   9 # (at your option) any later version.
  10 #
  11 # This program is distributed in the hope that it will be useful, but
  12 # WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 # General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program; if not, write to the Free Software
  18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19 # 02110-1301, USA.
  20
  21
  22 """Module implementing the master-side code."""
  23
  24 # pylint: disable=W0201,C0302
  25
  26 # W0201 since most LU attributes are defined in CheckPrereq or similar
  27 # functions
  28
  29 # C0302: since we have waaaay too many lines in this module
  30
  31 import os
  32 import os.path
  33 import time
  34 import re
  35 import platform
  36 import logging
  37 import copy
  38 import OpenSSL
  39 import socket
  40 import tempfile
  41 import shutil
  42 import itertools
  43 import operator
  44
  45 from ganeti import ssh
  46 from ganeti import utils
  47 from ganeti import errors
  48 from ganeti import hypervisor
  49 from ganeti import locking
  50 from ganeti import constants
  51 from ganeti import objects
  52 from ganeti import serializer
  53 from ganeti import ssconf
  54 from ganeti import uidpool
  55 from ganeti import compat
  56 from ganeti import masterd
  57 from ganeti import netutils
  58 from ganeti import query
  59 from ganeti import qlang
  60 from ganeti import opcodes
  61 from ganeti import ht
  62 from ganeti import rpc
  63
  64 import ganeti.masterd.instance # pylint: disable=W0611
  65
  66
  67 #: Size of DRBD meta block device
  68 DRBD_META_SIZE = 128
  69
  70 # States of instance
  71 INSTANCE_DOWN = [constants.ADMINST_DOWN]
  72 INSTANCE_ONLINE = [constants.ADMINST_DOWN, constants.ADMINST_UP]
  73 INSTANCE_NOT_RUNNING = [constants.ADMINST_DOWN, constants.ADMINST_OFFLINE]
  74
  75 #: Instance status in which an instance can be marked as offline/online
  76 CAN_CHANGE_INSTANCE_OFFLINE = (frozenset(INSTANCE_DOWN) | frozenset([
  77   constants.ADMINST_OFFLINE,
  78   ]))
  79
  80
  81 class ResultWithJobs:
  82   """Data container for LU results with jobs.
  83
  84   Instances of this class returned from L{LogicalUnit.Exec} will be recognized
  85   by L{mcpu.Processor._ProcessResult}. The latter will then submit the jobs
  86   contained in the C{jobs} attribute and include the job IDs in the opcode
  87   result.
  88
  89   """
  90   def __init__(self, jobs, **kwargs):
  91     """Initializes this class.
  92
  93     Additional return values can be specified as keyword arguments.
  94
  95     @type jobs: list of lists of L{opcode.OpCode}
  96     @param jobs: A list of lists of opcode objects
  97
  98     """
  99     self.jobs = jobs
 100     self.other = kwargs
 101
 102
 103 class LogicalUnit(object):
 104   """Logical Unit base class.
 105
 106   Subclasses must follow these rules:
 107     - implement ExpandNames
 108     - implement CheckPrereq (except when tasklets are used)
 109     - implement Exec (except when tasklets are used)
 110     - implement BuildHooksEnv
 111     - implement BuildHooksNodes
 112     - redefine HPATH and HTYPE
 113     - optionally redefine their run requirements:
 114         REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
 115
 116   Note that all commands require root permissions.
 117
 118   @ivar dry_run_result: the value (if any) that will be returned to the caller
 119       in dry-run mode (signalled by opcode dry_run parameter)
 120
 121   """
 122   HPATH = None
 123   HTYPE = None
 124   REQ_BGL = True
 125
 126   def __init__(self, processor, op, context, rpc_runner):
 127     """Constructor for LogicalUnit.
 128
 129     This needs to be overridden in derived classes in order to check op
 130     validity.
 131
 132     """
 133     self.proc = processor
 134     self.op = op
 135     self.cfg = context.cfg
 136     self.glm = context.glm
 137     # readability alias
 138     self.owned_locks = context.glm.list_owned
 139     self.context = context
 140     self.rpc = rpc_runner
 141     # Dicts used to declare locking needs to mcpu
 142     self.needed_locks = None
 143     self.share_locks = dict.fromkeys(locking.LEVELS, 0)
 144     self.add_locks = {}
 145     self.remove_locks = {}
 146     # Used to force good behavior when calling helper functions
 147     self.recalculate_locks = {}
 148     # logging
 149     self.Log = processor.Log # pylint: disable=C0103
 150     self.LogWarning = processor.LogWarning # pylint: disable=C0103
 151     self.LogInfo = processor.LogInfo # pylint: disable=C0103
 152     self.LogStep = processor.LogStep # pylint: disable=C0103
 153     # support for dry-run
 154     self.dry_run_result = None
 155     # support for generic debug attribute
 156     if (not hasattr(self.op, "debug_level") or
 157         not isinstance(self.op.debug_level, int)):
 158       self.op.debug_level = 0
 159
 160     # Tasklets
 161     self.tasklets = None
 162
 163     # Validate opcode parameters and set defaults
 164     self.op.Validate(True)
 165
 166     self.CheckArguments()
 167
 168   def CheckArguments(self):
 169     """Check syntactic validity for the opcode arguments.
 170
 171     This method is for doing a simple syntactic check and ensure
 172     validity of opcode parameters, without any cluster-related
 173     checks. While the same can be accomplished in ExpandNames and/or
 174     CheckPrereq, doing these separate is better because:
 175
 176       - ExpandNames is left as as purely a lock-related function
 177       - CheckPrereq is run after we have acquired locks (and possible
 178         waited for them)
 179
 180     The function is allowed to change the self.op attribute so that
 181     later methods can no longer worry about missing parameters.
 182
 183     """
 184     pass
 185
 186   def ExpandNames(self):
 187     """Expand names for this LU.
 188
 189     This method is called before starting to execute the opcode, and it should
 190     update all the parameters of the opcode to their canonical form (e.g. a
 191     short node name must be fully expanded after this method has successfully
 192     completed). This way locking, hooks, logging, etc. can work correctly.
 193
 194     LUs which implement this method must also populate the self.needed_locks
 195     member, as a dict with lock levels as keys, and a list of needed lock names
 196     as values. Rules:
 197
 198       - use an empty dict if you don't need any lock
 199       - if you don't need any lock at a particular level omit that level
 200       - don't put anything for the BGL level
 201       - if you want all locks at a level use locking.ALL_SET as a value
 202
 203     If you need to share locks (rather than acquire them exclusively) at one
 204     level you can modify self.share_locks, setting a true value (usually 1) for
 205     that level. By default locks are not shared.
 206
 207     This function can also define a list of tasklets, which then will be
 208     executed in order instead of the usual LU-level CheckPrereq and Exec
 209     functions, if those are not defined by the LU.
 210
 211     Examples::
 212
 213       # Acquire all nodes and one instance
 214       self.needed_locks = {
 215         locking.LEVEL_NODE: locking.ALL_SET,
 216         locking.LEVEL_INSTANCE: ['instance1.example.com'],
 217       }
 218       # Acquire just two nodes
 219       self.needed_locks = {
 220         locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
 221       }
 222       # Acquire no locks
 223       self.needed_locks = {} # No, you can't leave it to the default value None
 224
 225     """
 226     # The implementation of this method is mandatory only if the new LU is
 227     # concurrent, so that old LUs don't need to be changed all at the same
 228     # time.
 229     if self.REQ_BGL:
 230       self.needed_locks = {} # Exclusive LUs don't need locks.
 231     else:
 232       raise NotImplementedError
 233
 234   def DeclareLocks(self, level):
 235     """Declare LU locking needs for a level
 236
 237     While most LUs can just declare their locking needs at ExpandNames time,
 238     sometimes there's the need to calculate some locks after having acquired
 239     the ones before. This function is called just before acquiring locks at a
 240     particular level, but after acquiring the ones at lower levels, and permits
 241     such calculations. It can be used to modify self.needed_locks, and by
 242     default it does nothing.
 243
 244     This function is only called if you have something already set in
 245     self.needed_locks for the level.
 246
 247     @param level: Locking level which is going to be locked
 248     @type level: member of ganeti.locking.LEVELS
 249
 250     """
 251
 252   def CheckPrereq(self):
 253     """Check prerequisites for this LU.
 254
 255     This method should check that the prerequisites for the execution
 256     of this LU are fulfilled. It can do internode communication, but
 257     it should be idempotent - no cluster or system changes are
 258     allowed.
 259
 260     The method should raise errors.OpPrereqError in case something is
 261     not fulfilled. Its return value is ignored.
 262
 263     This method should also update all the parameters of the opcode to
 264     their canonical form if it hasn't been done by ExpandNames before.
 265
 266     """
 267     if self.tasklets is not None:
 268       for (idx, tl) in enumerate(self.tasklets):
 269         logging.debug("Checking prerequisites for tasklet %s/%s",
 270                       idx + 1, len(self.tasklets))
 271         tl.CheckPrereq()
 272     else:
 273       pass
 274
 275   def Exec(self, feedback_fn):
 276     """Execute the LU.
 277
 278     This method should implement the actual work. It should raise
 279     errors.OpExecError for failures that are somewhat dealt with in
 280     code, or expected.
 281
 282     """
 283     if self.tasklets is not None:
 284       for (idx, tl) in enumerate(self.tasklets):
 285         logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
 286         tl.Exec(feedback_fn)
 287     else:
 288       raise NotImplementedError
 289
 290   def BuildHooksEnv(self):
 291     """Build hooks environment for this LU.
 292
 293     @rtype: dict
 294     @return: Dictionary containing the environment that will be used for
 295       running the hooks for this LU. The keys of the dict must not be prefixed
 296       with "GANETI_"--that'll be added by the hooks runner. The hooks runner
 297       will extend the environment with additional variables. If no environment
 298       should be defined, an empty dictionary should be returned (not C{None}).
 299     @note: If the C{HPATH} attribute of the LU class is C{None}, this function
 300       will not be called.
 301
 302     """
 303     raise NotImplementedError
 304
 305   def BuildHooksNodes(self):
 306     """Build list of nodes to run LU's hooks.
 307
 308     @rtype: tuple; (list, list)
 309     @return: Tuple containing a list of node names on which the hook
 310       should run before the execution and a list of node names on which the
 311       hook should run after the execution. No nodes should be returned as an
 312       empty list (and not None).
 313     @note: If the C{HPATH} attribute of the LU class is C{None}, this function
 314       will not be called.
 315
 316     """
 317     raise NotImplementedError
 318
 319   def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
 320     """Notify the LU about the results of its hooks.
 321
 322     This method is called every time a hooks phase is executed, and notifies
 323     the Logical Unit about the hooks' result. The LU can then use it to alter
 324     its result based on the hooks.  By default the method does nothing and the
 325     previous result is passed back unchanged but any LU can define it if it
 326     wants to use the local cluster hook-scripts somehow.
 327
 328     @param phase: one of L{constants.HOOKS_PHASE_POST} or
 329         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
 330     @param hook_results: the results of the multi-node hooks rpc call
 331     @param feedback_fn: function used send feedback back to the caller
 332     @param lu_result: the previous Exec result this LU had, or None
 333         in the PRE phase
 334     @return: the new Exec result, based on the previous result
 335         and hook results
 336
 337     """
 338     # API must be kept, thus we ignore the unused argument and could
 339     # be a function warnings
 340     # pylint: disable=W0613,R0201
 341     return lu_result
 342
 343   def _ExpandAndLockInstance(self):
 344     """Helper function to expand and lock an instance.
 345
 346     Many LUs that work on an instance take its name in self.op.instance_name
 347     and need to expand it and then declare the expanded name for locking. This
 348     function does it, and then updates self.op.instance_name to the expanded
 349     name. It also initializes needed_locks as a dict, if this hasn't been done
 350     before.
 351
 352     """
 353     if self.needed_locks is None:
 354       self.needed_locks = {}
 355     else:
 356       assert locking.LEVEL_INSTANCE not in self.needed_locks, \
 357         "_ExpandAndLockInstance called with instance-level locks set"
 358     self.op.instance_name = _ExpandInstanceName(self.cfg,
 359                                                 self.op.instance_name)
 360     self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
 361
 362   def _LockInstancesNodes(self, primary_only=False,
 363                           level=locking.LEVEL_NODE):
 364     """Helper function to declare instances' nodes for locking.
 365
 366     This function should be called after locking one or more instances to lock
 367     their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
 368     with all primary or secondary nodes for instances already locked and
 369     present in self.needed_locks[locking.LEVEL_INSTANCE].
 370
 371     It should be called from DeclareLocks, and for safety only works if
 372     self.recalculate_locks[locking.LEVEL_NODE] is set.
 373
 374     In the future it may grow parameters to just lock some instance's nodes, or
 375     to just lock primaries or secondary nodes, if needed.
 376
 377     If should be called in DeclareLocks in a way similar to::
 378
 379       if level == locking.LEVEL_NODE:
 380         self._LockInstancesNodes()
 381
 382     @type primary_only: boolean
 383     @param primary_only: only lock primary nodes of locked instances
 384     @param level: Which lock level to use for locking nodes
 385
 386     """
 387     assert level in self.recalculate_locks, \
 388       "_LockInstancesNodes helper function called with no nodes to recalculate"
 389
 390     # TODO: check if we're really been called with the instance locks held
 391
 392     # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
 393     # future we might want to have different behaviors depending on the value
 394     # of self.recalculate_locks[locking.LEVEL_NODE]
 395     wanted_nodes = []
 396     locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
 397     for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
 398       wanted_nodes.append(instance.primary_node)
 399       if not primary_only:
 400         wanted_nodes.extend(instance.secondary_nodes)
 401
 402     if self.recalculate_locks[level] == constants.LOCKS_REPLACE:
 403       self.needed_locks[level] = wanted_nodes
 404     elif self.recalculate_locks[level] == constants.LOCKS_APPEND:
 405       self.needed_locks[level].extend(wanted_nodes)
 406     else:
 407       raise errors.ProgrammerError("Unknown recalculation mode")
 408
 409     del self.recalculate_locks[level]
 410
 411
 412 class NoHooksLU(LogicalUnit): # pylint: disable=W0223
 413   """Simple LU which runs no hooks.
 414
 415   This LU is intended as a parent for other LogicalUnits which will
 416   run no hooks, in order to reduce duplicate code.
 417
 418   """
 419   HPATH = None
 420   HTYPE = None
 421
 422   def BuildHooksEnv(self):
 423     """Empty BuildHooksEnv for NoHooksLu.
 424
 425     This just raises an error.
 426
 427     """
 428     raise AssertionError("BuildHooksEnv called for NoHooksLUs")
 429
 430   def BuildHooksNodes(self):
 431     """Empty BuildHooksNodes for NoHooksLU.
 432
 433     """
 434     raise AssertionError("BuildHooksNodes called for NoHooksLU")
 435
 436
 437 class Tasklet:
 438   """Tasklet base class.
 439
 440   Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
 441   they can mix legacy code with tasklets. Locking needs to be done in the LU,
 442   tasklets know nothing about locks.
 443
 444   Subclasses must follow these rules:
 445     - Implement CheckPrereq
 446     - Implement Exec
 447
 448   """
 449   def __init__(self, lu):
 450     self.lu = lu
 451
 452     # Shortcuts
 453     self.cfg = lu.cfg
 454     self.rpc = lu.rpc
 455
 456   def CheckPrereq(self):
 457     """Check prerequisites for this tasklets.
 458
 459     This method should check whether the prerequisites for the execution of
 460     this tasklet are fulfilled. It can do internode communication, but it
 461     should be idempotent - no cluster or system changes are allowed.
 462
 463     The method should raise errors.OpPrereqError in case something is not
 464     fulfilled. Its return value is ignored.
 465
 466     This method should also update all parameters to their canonical form if it
 467     hasn't been done before.
 468
 469     """
 470     pass
 471
 472   def Exec(self, feedback_fn):
 473     """Execute the tasklet.
 474
 475     This method should implement the actual work. It should raise
 476     errors.OpExecError for failures that are somewhat dealt with in code, or
 477     expected.
 478
 479     """
 480     raise NotImplementedError
 481
 482
 483 class _QueryBase:
 484   """Base for query utility classes.
 485
 486   """
 487   #: Attribute holding field definitions
 488   FIELDS = None
 489
 490   def __init__(self, qfilter, fields, use_locking):
 491     """Initializes this class.
 492
 493     """
 494     self.use_locking = use_locking
 495
 496     self.query = query.Query(self.FIELDS, fields, qfilter=qfilter,
 497                              namefield="name")
 498     self.requested_data = self.query.RequestedData()
 499     self.names = self.query.RequestedNames()
 500
 501     # Sort only if no names were requested
 502     self.sort_by_name = not self.names
 503
 504     self.do_locking = None
 505     self.wanted = None
 506
 507   def _GetNames(self, lu, all_names, lock_level):
 508     """Helper function to determine names asked for in the query.
 509
 510     """
 511     if self.do_locking:
 512       names = lu.owned_locks(lock_level)
 513     else:
 514       names = all_names
 515
 516     if self.wanted == locking.ALL_SET:
 517       assert not self.names
 518       # caller didn't specify names, so ordering is not important
 519       return utils.NiceSort(names)
 520
 521     # caller specified names and we must keep the same order
 522     assert self.names
 523     assert not self.do_locking or lu.glm.is_owned(lock_level)
 524
 525     missing = set(self.wanted).difference(names)
 526     if missing:
 527       raise errors.OpExecError("Some items were removed before retrieving"
 528                                " their data: %s" % missing)
 529
 530     # Return expanded names
 531     return self.wanted
 532
 533   def ExpandNames(self, lu):
 534     """Expand names for this query.
 535
 536     See L{LogicalUnit.ExpandNames}.
 537
 538     """
 539     raise NotImplementedError()
 540
 541   def DeclareLocks(self, lu, level):
 542     """Declare locks for this query.
 543
 544     See L{LogicalUnit.DeclareLocks}.
 545
 546     """
 547     raise NotImplementedError()
 548
 549   def _GetQueryData(self, lu):
 550     """Collects all data for this query.
 551
 552     @return: Query data object
 553
 554     """
 555     raise NotImplementedError()
 556
 557   def NewStyleQuery(self, lu):
 558     """Collect data and execute query.
 559
 560     """
 561     return query.GetQueryResponse(self.query, self._GetQueryData(lu),
 562                                   sort_by_name=self.sort_by_name)
 563
 564   def OldStyleQuery(self, lu):
 565     """Collect data and execute query.
 566
 567     """
 568     return self.query.OldStyleQuery(self._GetQueryData(lu),
 569                                     sort_by_name=self.sort_by_name)
 570
 571
 572 def _ShareAll():
 573   """Returns a dict declaring all lock levels shared.
 574
 575   """
 576   return dict.fromkeys(locking.LEVELS, 1)
 577
 578
 579 def _MakeLegacyNodeInfo(data):
 580   """Formats the data returned by L{rpc.RpcRunner.call_node_info}.
 581
 582   Converts the data into a single dictionary. This is fine for most use cases,
 583   but some require information from more than one volume group or hypervisor.
 584
 585   """
 586   (bootid, (vg_info, ), (hv_info, )) = data
 587
 588   return utils.JoinDisjointDicts(utils.JoinDisjointDicts(vg_info, hv_info), {
 589     "bootid": bootid,
 590     })
 591
 592
 593 def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups):
 594   """Checks if the owned node groups are still correct for an instance.
 595
 596   @type cfg: L{config.ConfigWriter}
 597   @param cfg: The cluster configuration
 598   @type instance_name: string
 599   @param instance_name: Instance name
 600   @type owned_groups: set or frozenset
 601   @param owned_groups: List of currently owned node groups
 602
 603   """
 604   inst_groups = cfg.GetInstanceNodeGroups(instance_name)
 605
 606   if not owned_groups.issuperset(inst_groups):
 607     raise errors.OpPrereqError("Instance %s's node groups changed since"
 608                                " locks were acquired, current groups are"
 609                                " are '%s', owning groups '%s'; retry the"
 610                                " operation" %
 611                                (instance_name,
 612                                 utils.CommaJoin(inst_groups),
 613                                 utils.CommaJoin(owned_groups)),
 614                                errors.ECODE_STATE)
 615
 616   return inst_groups
 617
 618
 619 def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
 620   """Checks if the instances in a node group are still correct.
 621
 622   @type cfg: L{config.ConfigWriter}
 623   @param cfg: The cluster configuration
 624   @type group_uuid: string
 625   @param group_uuid: Node group UUID
 626   @type owned_instances: set or frozenset
 627   @param owned_instances: List of currently owned instances
 628
 629   """
 630   wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
 631   if owned_instances != wanted_instances:
 632     raise errors.OpPrereqError("Instances in node group '%s' changed since"
 633                                " locks were acquired, wanted '%s', have '%s';"
 634                                " retry the operation" %
 635                                (group_uuid,
 636                                 utils.CommaJoin(wanted_instances),
 637                                 utils.CommaJoin(owned_instances)),
 638                                errors.ECODE_STATE)
 639
 640   return wanted_instances
 641
 642
 643 def _SupportsOob(cfg, node):
 644   """Tells if node supports OOB.
 645
 646   @type cfg: L{config.ConfigWriter}
 647   @param cfg: The cluster configuration
 648   @type node: L{objects.Node}
 649   @param node: The node
 650   @return: The OOB script if supported or an empty string otherwise
 651
 652   """
 653   return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
 654
 655
 656 def _GetWantedNodes(lu, nodes):
 657   """Returns list of checked and expanded node names.
 658
 659   @type lu: L{LogicalUnit}
 660   @param lu: the logical unit on whose behalf we execute
 661   @type nodes: list
 662   @param nodes: list of node names or None for all nodes
 663   @rtype: list
 664   @return: the list of nodes, sorted
 665   @raise errors.ProgrammerError: if the nodes parameter is wrong type
 666
 667   """
 668   if nodes:
 669     return [_ExpandNodeName(lu.cfg, name) for name in nodes]
 670
 671   return utils.NiceSort(lu.cfg.GetNodeList())
 672
 673
 674 def _GetWantedInstances(lu, instances):
 675   """Returns list of checked and expanded instance names.
 676
 677   @type lu: L{LogicalUnit}
 678   @param lu: the logical unit on whose behalf we execute
 679   @type instances: list
 680   @param instances: list of instance names or None for all instances
 681   @rtype: list
 682   @return: the list of instances, sorted
 683   @raise errors.OpPrereqError: if the instances parameter is wrong type
 684   @raise errors.OpPrereqError: if any of the passed instances is not found
 685
 686   """
 687   if instances:
 688     wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
 689   else:
 690     wanted = utils.NiceSort(lu.cfg.GetInstanceList())
 691   return wanted
 692
 693
 694 def _GetUpdatedParams(old_params, update_dict,
 695                       use_default=True, use_none=False):
 696   """Return the new version of a parameter dictionary.
 697
 698   @type old_params: dict
 699   @param old_params: old parameters
 700   @type update_dict: dict
 701   @param update_dict: dict containing new parameter values, or
 702       constants.VALUE_DEFAULT to reset the parameter to its default
 703       value
 704   @param use_default: boolean
 705   @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
 706       values as 'to be deleted' values
 707   @param use_none: boolean
 708   @type use_none: whether to recognise C{None} values as 'to be
 709       deleted' values
 710   @rtype: dict
 711   @return: the new parameter dictionary
 712
 713   """
 714   params_copy = copy.deepcopy(old_params)
 715   for key, val in update_dict.iteritems():
 716     if ((use_default and val == constants.VALUE_DEFAULT) or
 717         (use_none and val is None)):
 718       try:
 719         del params_copy[key]
 720       except KeyError:
 721         pass
 722     else:
 723       params_copy[key] = val
 724   return params_copy
 725
 726
 727 def _GetUpdatedIPolicy(old_ipolicy, new_ipolicy, group_policy=False):
 728   """Return the new version of a instance policy.
 729
 730   @param group_policy: whether this policy applies to a group and thus
 731     we should support removal of policy entries
 732
 733   """
 734   use_none = use_default = group_policy
 735   ipolicy = copy.deepcopy(old_ipolicy)
 736   for key, value in new_ipolicy.items():
 737     if key not in constants.IPOLICY_ALL_KEYS:
 738       raise errors.OpPrereqError("Invalid key in new ipolicy: %s" % key,
 739                                  errors.ECODE_INVAL)
 740     if key in constants.IPOLICY_ISPECS:
 741       utils.ForceDictType(value, constants.ISPECS_PARAMETER_TYPES)
 742       ipolicy[key] = _GetUpdatedParams(old_ipolicy.get(key, {}), value,
 743                                        use_none=use_none,
 744                                        use_default=use_default)
 745     else:
 746       if not value or value == [constants.VALUE_DEFAULT]:
 747         if group_policy:
 748           del ipolicy[key]
 749         else:
 750           raise errors.OpPrereqError("Can't unset ipolicy attribute '%s'"
 751                                      " on the cluster'" % key,
 752                                      errors.ECODE_INVAL)
 753       else:
 754         if key in constants.IPOLICY_PARAMETERS:
 755           # FIXME: we assume all such values are float
 756           try:
 757             ipolicy[key] = float(value)
 758           except (TypeError, ValueError), err:
 759             raise errors.OpPrereqError("Invalid value for attribute"
 760                                        " '%s': '%s', error: %s" %
 761                                        (key, value, err), errors.ECODE_INVAL)
 762         else:
 763           # FIXME: we assume all others are lists; this should be redone
 764           # in a nicer way
 765           ipolicy[key] = list(value)
 766   try:
 767     objects.InstancePolicy.CheckParameterSyntax(ipolicy)
 768   except errors.ConfigurationError, err:
 769     raise errors.OpPrereqError("Invalid instance policy: %s" % err,
 770                                errors.ECODE_INVAL)
 771   return ipolicy
 772
 773
 774 def _UpdateAndVerifySubDict(base, updates, type_check):
 775   """Updates and verifies a dict with sub dicts of the same type.
 776
 777   @param base: The dict with the old data
 778   @param updates: The dict with the new data
 779   @param type_check: Dict suitable to ForceDictType to verify correct types
 780   @returns: A new dict with updated and verified values
 781
 782   """
 783   def fn(old, value):
 784     new = _GetUpdatedParams(old, value)
 785     utils.ForceDictType(new, type_check)
 786     return new
 787
 788   ret = copy.deepcopy(base)
 789   ret.update(dict((key, fn(base.get(key, {}), value))
 790                   for key, value in updates.items()))
 791   return ret
 792
 793
 794 def _MergeAndVerifyHvState(op_input, obj_input):
 795   """Combines the hv state from an opcode with the one of the object
 796
 797   @param op_input: The input dict from the opcode
 798   @param obj_input: The input dict from the objects
 799   @return: The verified and updated dict
 800
 801   """
 802   if op_input:
 803     invalid_hvs = set(op_input) - constants.HYPER_TYPES
 804     if invalid_hvs:
 805       raise errors.OpPrereqError("Invalid hypervisor(s) in hypervisor state:"
 806                                  " %s" % utils.CommaJoin(invalid_hvs),
 807                                  errors.ECODE_INVAL)
 808     if obj_input is None:
 809       obj_input = {}
 810     type_check = constants.HVSTS_PARAMETER_TYPES
 811     return _UpdateAndVerifySubDict(obj_input, op_input, type_check)
 812
 813   return None
 814
 815
 816 def _MergeAndVerifyDiskState(op_input, obj_input):
 817   """Combines the disk state from an opcode with the one of the object
 818
 819   @param op_input: The input dict from the opcode
 820   @param obj_input: The input dict from the objects
 821   @return: The verified and updated dict
 822   """
 823   if op_input:
 824     invalid_dst = set(op_input) - constants.DS_VALID_TYPES
 825     if invalid_dst:
 826       raise errors.OpPrereqError("Invalid storage type(s) in disk state: %s" %
 827                                  utils.CommaJoin(invalid_dst),
 828                                  errors.ECODE_INVAL)
 829     type_check = constants.DSS_PARAMETER_TYPES
 830     if obj_input is None:
 831       obj_input = {}
 832     return dict((key, _UpdateAndVerifySubDict(obj_input.get(key, {}), value,
 833                                               type_check))
 834                 for key, value in op_input.items())
 835
 836   return None
 837
 838
 839 def _ReleaseLocks(lu, level, names=None, keep=None):
 840   """Releases locks owned by an LU.
 841
 842   @type lu: L{LogicalUnit}
 843   @param level: Lock level
 844   @type names: list or None
 845   @param names: Names of locks to release
 846   @type keep: list or None
 847   @param keep: Names of locks to retain
 848
 849   """
 850   assert not (keep is not None and names is not None), \
 851          "Only one of the 'names' and the 'keep' parameters can be given"
 852
 853   if names is not None:
 854     should_release = names.__contains__
 855   elif keep:
 856     should_release = lambda name: name not in keep
 857   else:
 858     should_release = None
 859
 860   owned = lu.owned_locks(level)
 861   if not owned:
 862     # Not owning any lock at this level, do nothing
 863     pass
 864
 865   elif should_release:
 866     retain = []
 867     release = []
 868
 869     # Determine which locks to release
 870     for name in owned:
 871       if should_release(name):
 872         release.append(name)
 873       else:
 874         retain.append(name)
 875
 876     assert len(lu.owned_locks(level)) == (len(retain) + len(release))
 877
 878     # Release just some locks
 879     lu.glm.release(level, names=release)
 880
 881     assert frozenset(lu.owned_locks(level)) == frozenset(retain)
 882   else:
 883     # Release everything
 884     lu.glm.release(level)
 885
 886     assert not lu.glm.is_owned(level), "No locks should be owned"
 887
 888
 889 def _MapInstanceDisksToNodes(instances):
 890   """Creates a map from (node, volume) to instance name.
 891
 892   @type instances: list of L{objects.Instance}
 893   @rtype: dict; tuple of (node name, volume name) as key, instance name as value
 894
 895   """
 896   return dict(((node, vol), inst.name)
 897               for inst in instances
 898               for (node, vols) in inst.MapLVsByNode().items()
 899               for vol in vols)
 900
 901
 902 def _RunPostHook(lu, node_name):
 903   """Runs the post-hook for an opcode on a single node.
 904
 905   """
 906   hm = lu.proc.BuildHooksManager(lu)
 907   try:
 908     hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
 909   except:
 910     # pylint: disable=W0702
 911     lu.LogWarning("Errors occurred running hooks on %s" % node_name)
 912
 913
 914 def _CheckOutputFields(static, dynamic, selected):
 915   """Checks whether all selected fields are valid.
 916
 917   @type static: L{utils.FieldSet}
 918   @param static: static fields set
 919   @type dynamic: L{utils.FieldSet}
 920   @param dynamic: dynamic fields set
 921
 922   """
 923   f = utils.FieldSet()
 924   f.Extend(static)
 925   f.Extend(dynamic)
 926
 927   delta = f.NonMatching(selected)
 928   if delta:
 929     raise errors.OpPrereqError("Unknown output fields selected: %s"
 930                                % ",".join(delta), errors.ECODE_INVAL)
 931
 932
 933 def _CheckGlobalHvParams(params):
 934   """Validates that given hypervisor params are not global ones.
 935
 936   This will ensure that instances don't get customised versions of
 937   global params.
 938
 939   """
 940   used_globals = constants.HVC_GLOBALS.intersection(params)
 941   if used_globals:
 942     msg = ("The following hypervisor parameters are global and cannot"
 943            " be customized at instance level, please modify them at"
 944            " cluster level: %s" % utils.CommaJoin(used_globals))
 945     raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
 946
 947
 948 def _CheckNodeOnline(lu, node, msg=None):
 949   """Ensure that a given node is online.
 950
 951   @param lu: the LU on behalf of which we make the check
 952   @param node: the node to check
 953   @param msg: if passed, should be a message to replace the default one
 954   @raise errors.OpPrereqError: if the node is offline
 955
 956   """
 957   if msg is None:
 958     msg = "Can't use offline node"
 959   if lu.cfg.GetNodeInfo(node).offline:
 960     raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
 961
 962
 963 def _CheckNodeNotDrained(lu, node):
 964   """Ensure that a given node is not drained.
 965
 966   @param lu: the LU on behalf of which we make the check
 967   @param node: the node to check
 968   @raise errors.OpPrereqError: if the node is drained
 969
 970   """
 971   if lu.cfg.GetNodeInfo(node).drained:
 972     raise errors.OpPrereqError("Can't use drained node %s" % node,
 973                                errors.ECODE_STATE)
 974
 975
 976 def _CheckNodeVmCapable(lu, node):
 977   """Ensure that a given node is vm capable.
 978
 979   @param lu: the LU on behalf of which we make the check
 980   @param node: the node to check
 981   @raise errors.OpPrereqError: if the node is not vm capable
 982
 983   """
 984   if not lu.cfg.GetNodeInfo(node).vm_capable:
 985     raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
 986                                errors.ECODE_STATE)
 987
 988
 989 def _CheckNodeHasOS(lu, node, os_name, force_variant):
 990   """Ensure that a node supports a given OS.
 991
 992   @param lu: the LU on behalf of which we make the check
 993   @param node: the node to check
 994   @param os_name: the OS to query about
 995   @param force_variant: whether to ignore variant errors
 996   @raise errors.OpPrereqError: if the node is not supporting the OS
 997
 998   """
 999   result = lu.rpc.call_os_get(node, os_name)
1000   result.Raise("OS '%s' not in supported OS list for node %s" %
1001                (os_name, node),
1002                prereq=True, ecode=errors.ECODE_INVAL)
1003   if not force_variant:
1004     _CheckOSVariant(result.payload, os_name)
1005
1006
1007 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
1008   """Ensure that a node has the given secondary ip.
1009
1010   @type lu: L{LogicalUnit}
1011   @param lu: the LU on behalf of which we make the check
1012   @type node: string
1013   @param node: the node to check
1014   @type secondary_ip: string
1015   @param secondary_ip: the ip to check
1016   @type prereq: boolean
1017   @param prereq: whether to throw a prerequisite or an execute error
1018   @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
1019   @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
1020
1021   """
1022   result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
1023   result.Raise("Failure checking secondary ip on node %s" % node,
1024                prereq=prereq, ecode=errors.ECODE_ENVIRON)
1025   if not result.payload:
1026     msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
1027            " please fix and re-run this command" % secondary_ip)
1028     if prereq:
1029       raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
1030     else:
1031       raise errors.OpExecError(msg)
1032
1033
1034 def _GetClusterDomainSecret():
1035   """Reads the cluster domain secret.
1036
1037   """
1038   return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
1039                                strict=True)
1040
1041
1042 def _CheckInstanceState(lu, instance, req_states, msg=None):
1043   """Ensure that an instance is in one of the required states.
1044
1045   @param lu: the LU on behalf of which we make the check
1046   @param instance: the instance to check
1047   @param msg: if passed, should be a message to replace the default one
1048   @raise errors.OpPrereqError: if the instance is not in the required state
1049
1050   """
1051   if msg is None:
1052     msg = "can't use instance from outside %s states" % ", ".join(req_states)
1053   if instance.admin_state not in req_states:
1054     raise errors.OpPrereqError("Instance '%s' is marked to be %s, %s" %
1055                                (instance.name, instance.admin_state, msg),
1056                                errors.ECODE_STATE)
1057
1058   if constants.ADMINST_UP not in req_states:
1059     pnode = instance.primary_node
1060     ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
1061     ins_l.Raise("Can't contact node %s for instance information" % pnode,
1062                 prereq=True, ecode=errors.ECODE_ENVIRON)
1063
1064     if instance.name in ins_l.payload:
1065       raise errors.OpPrereqError("Instance %s is running, %s" %
1066                                  (instance.name, msg), errors.ECODE_STATE)
1067
1068
1069 def _ComputeMinMaxSpec(name, ipolicy, value):
1070   """Computes if value is in the desired range.
1071
1072   @param name: name of the parameter for which we perform the check
1073   @param ipolicy: dictionary containing min, max and std values
1074   @param value: actual value that we want to use
1075   @return: None or element not meeting the criteria
1076
1077
1078   """
1079   if value in [None, constants.VALUE_AUTO]:
1080     return None
1081   max_v = ipolicy[constants.ISPECS_MAX].get(name, value)
1082   min_v = ipolicy[constants.ISPECS_MIN].get(name, value)
1083   if value > max_v or min_v > value:
1084     return ("%s value %s is not in range [%s, %s]" %
1085             (name, value, min_v, max_v))
1086   return None
1087
1088
1089 def _ComputeIPolicySpecViolation(ipolicy, mem_size, cpu_count, disk_count,
1090                                  nic_count, disk_sizes,
1091                                  _compute_fn=_ComputeMinMaxSpec):
1092   """Verifies ipolicy against provided specs.
1093
1094   @type ipolicy: dict
1095   @param ipolicy: The ipolicy
1096   @type mem_size: int
1097   @param mem_size: The memory size
1098   @type cpu_count: int
1099   @param cpu_count: Used cpu cores
1100   @type disk_count: int
1101   @param disk_count: Number of disks used
1102   @type nic_count: int
1103   @param nic_count: Number of nics used
1104   @type disk_sizes: list of ints
1105   @param disk_sizes: Disk sizes of used disk (len must match C{disk_count})
1106   @param _compute_fn: The compute function (unittest only)
1107   @return: A list of violations, or an empty list of no violations are found
1108
1109   """
1110   assert disk_count == len(disk_sizes)
1111
1112   test_settings = [
1113     (constants.ISPEC_MEM_SIZE, mem_size),
1114     (constants.ISPEC_CPU_COUNT, cpu_count),
1115     (constants.ISPEC_DISK_COUNT, disk_count),
1116     (constants.ISPEC_NIC_COUNT, nic_count),
1117     ] + map((lambda d: (constants.ISPEC_DISK_SIZE, d)), disk_sizes)
1118
1119   return filter(None,
1120                 (_compute_fn(name, ipolicy, value)
1121                  for (name, value) in test_settings))
1122
1123
1124 def _ComputeIPolicyInstanceViolation(ipolicy, instance,
1125                                      _compute_fn=_ComputeIPolicySpecViolation):
1126   """Compute if instance meets the specs of ipolicy.
1127
1128   @type ipolicy: dict
1129   @param ipolicy: The ipolicy to verify against
1130   @type instance: L{objects.Instance}
1131   @param instance: The instance to verify
1132   @param _compute_fn: The function to verify ipolicy (unittest only)
1133   @see: L{_ComputeIPolicySpecViolation}
1134
1135   """
1136   mem_size = instance.beparams.get(constants.BE_MAXMEM, None)
1137   cpu_count = instance.beparams.get(constants.BE_VCPUS, None)
1138   disk_count = len(instance.disks)
1139   disk_sizes = [disk.size for disk in instance.disks]
1140   nic_count = len(instance.nics)
1141
1142   return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1143                      disk_sizes)
1144
1145
1146 def _ComputeIPolicyInstanceSpecViolation(ipolicy, instance_spec,
1147     _compute_fn=_ComputeIPolicySpecViolation):
1148   """Compute if instance specs meets the specs of ipolicy.
1149
1150   @type ipolicy: dict
1151   @param ipolicy: The ipolicy to verify against
1152   @param instance_spec: dict
1153   @param instance_spec: The instance spec to verify
1154   @param _compute_fn: The function to verify ipolicy (unittest only)
1155   @see: L{_ComputeIPolicySpecViolation}
1156
1157   """
1158   mem_size = instance_spec.get(constants.ISPEC_MEM_SIZE, None)
1159   cpu_count = instance_spec.get(constants.ISPEC_CPU_COUNT, None)
1160   disk_count = instance_spec.get(constants.ISPEC_DISK_COUNT, 0)
1161   disk_sizes = instance_spec.get(constants.ISPEC_DISK_SIZE, [])
1162   nic_count = instance_spec.get(constants.ISPEC_NIC_COUNT, 0)
1163
1164   return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1165                      disk_sizes)
1166
1167
1168 def _ComputeIPolicyNodeViolation(ipolicy, instance, current_group,
1169                                  target_group,
1170                                  _compute_fn=_ComputeIPolicyInstanceViolation):
1171   """Compute if instance meets the specs of the new target group.
1172
1173   @param ipolicy: The ipolicy to verify
1174   @param instance: The instance object to verify
1175   @param current_group: The current group of the instance
1176   @param target_group: The new group of the instance
1177   @param _compute_fn: The function to verify ipolicy (unittest only)
1178   @see: L{_ComputeIPolicySpecViolation}
1179
1180   """
1181   if current_group == target_group:
1182     return []
1183   else:
1184     return _compute_fn(ipolicy, instance)
1185
1186
1187 def _CheckTargetNodeIPolicy(lu, ipolicy, instance, node, ignore=False,
1188                             _compute_fn=_ComputeIPolicyNodeViolation):
1189   """Checks that the target node is correct in terms of instance policy.
1190
1191   @param ipolicy: The ipolicy to verify
1192   @param instance: The instance object to verify
1193   @param node: The new node to relocate
1194   @param ignore: Ignore violations of the ipolicy
1195   @param _compute_fn: The function to verify ipolicy (unittest only)
1196   @see: L{_ComputeIPolicySpecViolation}
1197
1198   """
1199   primary_node = lu.cfg.GetNodeInfo(instance.primary_node)
1200   res = _compute_fn(ipolicy, instance, primary_node.group, node.group)
1201
1202   if res:
1203     msg = ("Instance does not meet target node group's (%s) instance"
1204            " policy: %s") % (node.group, utils.CommaJoin(res))
1205     if ignore:
1206       lu.LogWarning(msg)
1207     else:
1208       raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1209
1210
1211 def _ComputeNewInstanceViolations(old_ipolicy, new_ipolicy, instances):
1212   """Computes a set of any instances that would violate the new ipolicy.
1213
1214   @param old_ipolicy: The current (still in-place) ipolicy
1215   @param new_ipolicy: The new (to become) ipolicy
1216   @param instances: List of instances to verify
1217   @return: A list of instances which violates the new ipolicy but did not before
1218
1219   """
1220   return (_ComputeViolatingInstances(old_ipolicy, instances) -
1221           _ComputeViolatingInstances(new_ipolicy, instances))
1222
1223
1224 def _ExpandItemName(fn, name, kind):
1225   """Expand an item name.
1226
1227   @param fn: the function to use for expansion
1228   @param name: requested item name
1229   @param kind: text description ('Node' or 'Instance')
1230   @return: the resolved (full) name
1231   @raise errors.OpPrereqError: if the item is not found
1232
1233   """
1234   full_name = fn(name)
1235   if full_name is None:
1236     raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
1237                                errors.ECODE_NOENT)
1238   return full_name
1239
1240
1241 def _ExpandNodeName(cfg, name):
1242   """Wrapper over L{_ExpandItemName} for nodes."""
1243   return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
1244
1245
1246 def _ExpandInstanceName(cfg, name):
1247   """Wrapper over L{_ExpandItemName} for instance."""
1248   return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
1249
1250
1251 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
1252                           minmem, maxmem, vcpus, nics, disk_template, disks,
1253                           bep, hvp, hypervisor_name, tags):
1254   """Builds instance related env variables for hooks
1255
1256   This builds the hook environment from individual variables.
1257
1258   @type name: string
1259   @param name: the name of the instance
1260   @type primary_node: string
1261   @param primary_node: the name of the instance's primary node
1262   @type secondary_nodes: list
1263   @param secondary_nodes: list of secondary nodes as strings
1264   @type os_type: string
1265   @param os_type: the name of the instance's OS
1266   @type status: string
1267   @param status: the desired status of the instance
1268   @type minmem: string
1269   @param minmem: the minimum memory size of the instance
1270   @type maxmem: string
1271   @param maxmem: the maximum memory size of the instance
1272   @type vcpus: string
1273   @param vcpus: the count of VCPUs the instance has
1274   @type nics: list
1275   @param nics: list of tuples (ip, mac, mode, link) representing
1276       the NICs the instance has
1277   @type disk_template: string
1278   @param disk_template: the disk template of the instance
1279   @type disks: list
1280   @param disks: the list of (size, mode) pairs
1281   @type bep: dict
1282   @param bep: the backend parameters for the instance
1283   @type hvp: dict
1284   @param hvp: the hypervisor parameters for the instance
1285   @type hypervisor_name: string
1286   @param hypervisor_name: the hypervisor for the instance
1287   @type tags: list
1288   @param tags: list of instance tags as strings
1289   @rtype: dict
1290   @return: the hook environment for this instance
1291
1292   """
1293   env = {
1294     "OP_TARGET": name,
1295     "INSTANCE_NAME": name,
1296     "INSTANCE_PRIMARY": primary_node,
1297     "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
1298     "INSTANCE_OS_TYPE": os_type,
1299     "INSTANCE_STATUS": status,
1300     "INSTANCE_MINMEM": minmem,
1301     "INSTANCE_MAXMEM": maxmem,
1302     # TODO(2.7) remove deprecated "memory" value
1303     "INSTANCE_MEMORY": maxmem,
1304     "INSTANCE_VCPUS": vcpus,
1305     "INSTANCE_DISK_TEMPLATE": disk_template,
1306     "INSTANCE_HYPERVISOR": hypervisor_name,
1307   }
1308   if nics:
1309     nic_count = len(nics)
1310     for idx, (ip, mac, mode, link) in enumerate(nics):
1311       if ip is None:
1312         ip = ""
1313       env["INSTANCE_NIC%d_IP" % idx] = ip
1314       env["INSTANCE_NIC%d_MAC" % idx] = mac
1315       env["INSTANCE_NIC%d_MODE" % idx] = mode
1316       env["INSTANCE_NIC%d_LINK" % idx] = link
1317       if mode == constants.NIC_MODE_BRIDGED:
1318         env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1319   else:
1320     nic_count = 0
1321
1322   env["INSTANCE_NIC_COUNT"] = nic_count
1323
1324   if disks:
1325     disk_count = len(disks)
1326     for idx, (size, mode) in enumerate(disks):
1327       env["INSTANCE_DISK%d_SIZE" % idx] = size
1328       env["INSTANCE_DISK%d_MODE" % idx] = mode
1329   else:
1330     disk_count = 0
1331
1332   env["INSTANCE_DISK_COUNT"] = disk_count
1333
1334   if not tags:
1335     tags = []
1336
1337   env["INSTANCE_TAGS"] = " ".join(tags)
1338
1339   for source, kind in [(bep, "BE"), (hvp, "HV")]:
1340     for key, value in source.items():
1341       env["INSTANCE_%s_%s" % (kind, key)] = value
1342
1343   return env
1344
1345
1346 def _NICListToTuple(lu, nics):
1347   """Build a list of nic information tuples.
1348
1349   This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1350   value in LUInstanceQueryData.
1351
1352   @type lu:  L{LogicalUnit}
1353   @param lu: the logical unit on whose behalf we execute
1354   @type nics: list of L{objects.NIC}
1355   @param nics: list of nics to convert to hooks tuples
1356
1357   """
1358   hooks_nics = []
1359   cluster = lu.cfg.GetClusterInfo()
1360   for nic in nics:
1361     ip = nic.ip
1362     mac = nic.mac
1363     filled_params = cluster.SimpleFillNIC(nic.nicparams)
1364     mode = filled_params[constants.NIC_MODE]
1365     link = filled_params[constants.NIC_LINK]
1366     hooks_nics.append((ip, mac, mode, link))
1367   return hooks_nics
1368
1369
1370 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1371   """Builds instance related env variables for hooks from an object.
1372
1373   @type lu: L{LogicalUnit}
1374   @param lu: the logical unit on whose behalf we execute
1375   @type instance: L{objects.Instance}
1376   @param instance: the instance for which we should build the
1377       environment
1378   @type override: dict
1379   @param override: dictionary with key/values that will override
1380       our values
1381   @rtype: dict
1382   @return: the hook environment dictionary
1383
1384   """
1385   cluster = lu.cfg.GetClusterInfo()
1386   bep = cluster.FillBE(instance)
1387   hvp = cluster.FillHV(instance)
1388   args = {
1389     "name": instance.name,
1390     "primary_node": instance.primary_node,
1391     "secondary_nodes": instance.secondary_nodes,
1392     "os_type": instance.os,
1393     "status": instance.admin_state,
1394     "maxmem": bep[constants.BE_MAXMEM],
1395     "minmem": bep[constants.BE_MINMEM],
1396     "vcpus": bep[constants.BE_VCPUS],
1397     "nics": _NICListToTuple(lu, instance.nics),
1398     "disk_template": instance.disk_template,
1399     "disks": [(disk.size, disk.mode) for disk in instance.disks],
1400     "bep": bep,
1401     "hvp": hvp,
1402     "hypervisor_name": instance.hypervisor,
1403     "tags": instance.tags,
1404   }
1405   if override:
1406     args.update(override)
1407   return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1408
1409
1410 def _AdjustCandidatePool(lu, exceptions):
1411   """Adjust the candidate pool after node operations.
1412
1413   """
1414   mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1415   if mod_list:
1416     lu.LogInfo("Promoted nodes to master candidate role: %s",
1417                utils.CommaJoin(node.name for node in mod_list))
1418     for name in mod_list:
1419       lu.context.ReaddNode(name)
1420   mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1421   if mc_now > mc_max:
1422     lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1423                (mc_now, mc_max))
1424
1425
1426 def _DecideSelfPromotion(lu, exceptions=None):
1427   """Decide whether I should promote myself as a master candidate.
1428
1429   """
1430   cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1431   mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1432   # the new node will increase mc_max with one, so:
1433   mc_should = min(mc_should + 1, cp_size)
1434   return mc_now < mc_should
1435
1436
1437 def _CalculateGroupIPolicy(cluster, group):
1438   """Calculate instance policy for group.
1439
1440   """
1441   return cluster.SimpleFillIPolicy(group.ipolicy)
1442
1443
1444 def _ComputeViolatingInstances(ipolicy, instances):
1445   """Computes a set of instances who violates given ipolicy.
1446
1447   @param ipolicy: The ipolicy to verify
1448   @type instances: object.Instance
1449   @param instances: List of instances to verify
1450   @return: A frozenset of instance names violating the ipolicy
1451
1452   """
1453   return frozenset([inst.name for inst in instances
1454                     if _ComputeIPolicyInstanceViolation(ipolicy, inst)])
1455
1456
1457 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1458   """Check that the brigdes needed by a list of nics exist.
1459
1460   """
1461   cluster = lu.cfg.GetClusterInfo()
1462   paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1463   brlist = [params[constants.NIC_LINK] for params in paramslist
1464             if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1465   if brlist:
1466     result = lu.rpc.call_bridges_exist(target_node, brlist)
1467     result.Raise("Error checking bridges on destination node '%s'" %
1468                  target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1469
1470
1471 def _CheckInstanceBridgesExist(lu, instance, node=None):
1472   """Check that the brigdes needed by an instance exist.
1473
1474   """
1475   if node is None:
1476     node = instance.primary_node
1477   _CheckNicsBridgesExist(lu, instance.nics, node)
1478
1479
1480 def _CheckOSVariant(os_obj, name):
1481   """Check whether an OS name conforms to the os variants specification.
1482
1483   @type os_obj: L{objects.OS}
1484   @param os_obj: OS object to check
1485   @type name: string
1486   @param name: OS name passed by the user, to check for validity
1487
1488   """
1489   variant = objects.OS.GetVariant(name)
1490   if not os_obj.supported_variants:
1491     if variant:
1492       raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1493                                  " passed)" % (os_obj.name, variant),
1494                                  errors.ECODE_INVAL)
1495     return
1496   if not variant:
1497     raise errors.OpPrereqError("OS name must include a variant",
1498                                errors.ECODE_INVAL)
1499
1500   if variant not in os_obj.supported_variants:
1501     raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1502
1503
1504 def _GetNodeInstancesInner(cfg, fn):
1505   return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1506
1507
1508 def _GetNodeInstances(cfg, node_name):
1509   """Returns a list of all primary and secondary instances on a node.
1510
1511   """
1512
1513   return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1514
1515
1516 def _GetNodePrimaryInstances(cfg, node_name):
1517   """Returns primary instances on a node.
1518
1519   """
1520   return _GetNodeInstancesInner(cfg,
1521                                 lambda inst: node_name == inst.primary_node)
1522
1523
1524 def _GetNodeSecondaryInstances(cfg, node_name):
1525   """Returns secondary instances on a node.
1526
1527   """
1528   return _GetNodeInstancesInner(cfg,
1529                                 lambda inst: node_name in inst.secondary_nodes)
1530
1531
1532 def _GetStorageTypeArgs(cfg, storage_type):
1533   """Returns the arguments for a storage type.
1534
1535   """
1536   # Special case for file storage
1537   if storage_type == constants.ST_FILE:
1538     # storage.FileStorage wants a list of storage directories
1539     return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1540
1541   return []
1542
1543
1544 def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
1545   faulty = []
1546
1547   for dev in instance.disks:
1548     cfg.SetDiskID(dev, node_name)
1549
1550   result = rpc_runner.call_blockdev_getmirrorstatus(node_name, instance.disks)
1551   result.Raise("Failed to get disk status from node %s" % node_name,
1552                prereq=prereq, ecode=errors.ECODE_ENVIRON)
1553
1554   for idx, bdev_status in enumerate(result.payload):
1555     if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1556       faulty.append(idx)
1557
1558   return faulty
1559
1560
1561 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1562   """Check the sanity of iallocator and node arguments and use the
1563   cluster-wide iallocator if appropriate.
1564
1565   Check that at most one of (iallocator, node) is specified. If none is
1566   specified, then the LU's opcode's iallocator slot is filled with the
1567   cluster-wide default iallocator.
1568
1569   @type iallocator_slot: string
1570   @param iallocator_slot: the name of the opcode iallocator slot
1571   @type node_slot: string
1572   @param node_slot: the name of the opcode target node slot
1573
1574   """
1575   node = getattr(lu.op, node_slot, None)
1576   iallocator = getattr(lu.op, iallocator_slot, None)
1577
1578   if node is not None and iallocator is not None:
1579     raise errors.OpPrereqError("Do not specify both, iallocator and node",
1580                                errors.ECODE_INVAL)
1581   elif node is None and iallocator is None:
1582     default_iallocator = lu.cfg.GetDefaultIAllocator()
1583     if default_iallocator:
1584       setattr(lu.op, iallocator_slot, default_iallocator)
1585     else:
1586       raise errors.OpPrereqError("No iallocator or node given and no"
1587                                  " cluster-wide default iallocator found;"
1588                                  " please specify either an iallocator or a"
1589                                  " node, or set a cluster-wide default"
1590                                  " iallocator")
1591
1592
1593 def _GetDefaultIAllocator(cfg, iallocator):
1594   """Decides on which iallocator to use.
1595
1596   @type cfg: L{config.ConfigWriter}
1597   @param cfg: Cluster configuration object
1598   @type iallocator: string or None
1599   @param iallocator: Iallocator specified in opcode
1600   @rtype: string
1601   @return: Iallocator name
1602
1603   """
1604   if not iallocator:
1605     # Use default iallocator
1606     iallocator = cfg.GetDefaultIAllocator()
1607
1608   if not iallocator:
1609     raise errors.OpPrereqError("No iallocator was specified, neither in the"
1610                                " opcode nor as a cluster-wide default",
1611                                errors.ECODE_INVAL)
1612
1613   return iallocator
1614
1615
1616 class LUClusterPostInit(LogicalUnit):
1617   """Logical unit for running hooks after cluster initialization.
1618
1619   """
1620   HPATH = "cluster-init"
1621   HTYPE = constants.HTYPE_CLUSTER
1622
1623   def BuildHooksEnv(self):
1624     """Build hooks env.
1625
1626     """
1627     return {
1628       "OP_TARGET": self.cfg.GetClusterName(),
1629       }
1630
1631   def BuildHooksNodes(self):
1632     """Build hooks nodes.
1633
1634     """
1635     return ([], [self.cfg.GetMasterNode()])
1636
1637   def Exec(self, feedback_fn):
1638     """Nothing to do.
1639
1640     """
1641     return True
1642
1643
1644 class LUClusterDestroy(LogicalUnit):
1645   """Logical unit for destroying the cluster.
1646
1647   """
1648   HPATH = "cluster-destroy"
1649   HTYPE = constants.HTYPE_CLUSTER
1650
1651   def BuildHooksEnv(self):
1652     """Build hooks env.
1653
1654     """
1655     return {
1656       "OP_TARGET": self.cfg.GetClusterName(),
1657       }
1658
1659   def BuildHooksNodes(self):
1660     """Build hooks nodes.
1661
1662     """
1663     return ([], [])
1664
1665   def CheckPrereq(self):
1666     """Check prerequisites.
1667
1668     This checks whether the cluster is empty.
1669
1670     Any errors are signaled by raising errors.OpPrereqError.
1671
1672     """
1673     master = self.cfg.GetMasterNode()
1674
1675     nodelist = self.cfg.GetNodeList()
1676     if len(nodelist) != 1 or nodelist[0] != master:
1677       raise errors.OpPrereqError("There are still %d node(s) in"
1678                                  " this cluster." % (len(nodelist) - 1),
1679                                  errors.ECODE_INVAL)
1680     instancelist = self.cfg.GetInstanceList()
1681     if instancelist:
1682       raise errors.OpPrereqError("There are still %d instance(s) in"
1683                                  " this cluster." % len(instancelist),
1684                                  errors.ECODE_INVAL)
1685
1686   def Exec(self, feedback_fn):
1687     """Destroys the cluster.
1688
1689     """
1690     master_params = self.cfg.GetMasterNetworkParameters()
1691
1692     # Run post hooks on master node before it's removed
1693     _RunPostHook(self, master_params.name)
1694
1695     ems = self.cfg.GetUseExternalMipScript()
1696     result = self.rpc.call_node_deactivate_master_ip(master_params.name,
1697                                                      master_params, ems)
1698     if result.fail_msg:
1699       self.LogWarning("Error disabling the master IP address: %s",
1700                       result.fail_msg)
1701
1702     return master_params.name
1703
1704
1705 def _VerifyCertificate(filename):
1706   """Verifies a certificate for L{LUClusterVerifyConfig}.
1707
1708   @type filename: string
1709   @param filename: Path to PEM file
1710
1711   """
1712   try:
1713     cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1714                                            utils.ReadFile(filename))
1715   except Exception, err: # pylint: disable=W0703
1716     return (LUClusterVerifyConfig.ETYPE_ERROR,
1717             "Failed to load X509 certificate %s: %s" % (filename, err))
1718
1719   (errcode, msg) = \
1720     utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1721                                 constants.SSL_CERT_EXPIRATION_ERROR)
1722
1723   if msg:
1724     fnamemsg = "While verifying %s: %s" % (filename, msg)
1725   else:
1726     fnamemsg = None
1727
1728   if errcode is None:
1729     return (None, fnamemsg)
1730   elif errcode == utils.CERT_WARNING:
1731     return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1732   elif errcode == utils.CERT_ERROR:
1733     return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1734
1735   raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1736
1737
1738 def _GetAllHypervisorParameters(cluster, instances):
1739   """Compute the set of all hypervisor parameters.
1740
1741   @type cluster: L{objects.Cluster}
1742   @param cluster: the cluster object
1743   @param instances: list of L{objects.Instance}
1744   @param instances: additional instances from which to obtain parameters
1745   @rtype: list of (origin, hypervisor, parameters)
1746   @return: a list with all parameters found, indicating the hypervisor they
1747        apply to, and the origin (can be "cluster", "os X", or "instance Y")
1748
1749   """
1750   hvp_data = []
1751
1752   for hv_name in cluster.enabled_hypervisors:
1753     hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1754
1755   for os_name, os_hvp in cluster.os_hvp.items():
1756     for hv_name, hv_params in os_hvp.items():
1757       if hv_params:
1758         full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1759         hvp_data.append(("os %s" % os_name, hv_name, full_params))
1760
1761   # TODO: collapse identical parameter values in a single one
1762   for instance in instances:
1763     if instance.hvparams:
1764       hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1765                        cluster.FillHV(instance)))
1766
1767   return hvp_data
1768
1769
1770 class _VerifyErrors(object):
1771   """Mix-in for cluster/group verify LUs.
1772
1773   It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1774   self.op and self._feedback_fn to be available.)
1775
1776   """
1777
1778   ETYPE_FIELD = "code"
1779   ETYPE_ERROR = "ERROR"
1780   ETYPE_WARNING = "WARNING"
1781
1782   def _Error(self, ecode, item, msg, *args, **kwargs):
1783     """Format an error message.
1784
1785     Based on the opcode's error_codes parameter, either format a
1786     parseable error code, or a simpler error string.
1787
1788     This must be called only from Exec and functions called from Exec.
1789
1790     """
1791     ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1792     itype, etxt, _ = ecode
1793     # first complete the msg
1794     if args:
1795       msg = msg % args
1796     # then format the whole message
1797     if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1798       msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1799     else:
1800       if item:
1801         item = " " + item
1802       else:
1803         item = ""
1804       msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1805     # and finally report it via the feedback_fn
1806     self._feedback_fn("  - %s" % msg) # Mix-in. pylint: disable=E1101
1807
1808   def _ErrorIf(self, cond, ecode, *args, **kwargs):
1809     """Log an error message if the passed condition is True.
1810
1811     """
1812     cond = (bool(cond)
1813             or self.op.debug_simulate_errors) # pylint: disable=E1101
1814
1815     # If the error code is in the list of ignored errors, demote the error to a
1816     # warning
1817     (_, etxt, _) = ecode
1818     if etxt in self.op.ignore_errors:     # pylint: disable=E1101
1819       kwargs[self.ETYPE_FIELD] = self.ETYPE_WARNING
1820
1821     if cond:
1822       self._Error(ecode, *args, **kwargs)
1823
1824     # do not mark the operation as failed for WARN cases only
1825     if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1826       self.bad = self.bad or cond
1827
1828
1829 class LUClusterVerify(NoHooksLU):
1830   """Submits all jobs necessary to verify the cluster.
1831
1832   """
1833   REQ_BGL = False
1834
1835   def ExpandNames(self):
1836     self.needed_locks = {}
1837
1838   def Exec(self, feedback_fn):
1839     jobs = []
1840
1841     if self.op.group_name:
1842       groups = [self.op.group_name]
1843       depends_fn = lambda: None
1844     else:
1845       groups = self.cfg.GetNodeGroupList()
1846
1847       # Verify global configuration
1848       jobs.append([
1849         opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors)
1850         ])
1851
1852       # Always depend on global verification
1853       depends_fn = lambda: [(-len(jobs), [])]
1854
1855     jobs.extend([opcodes.OpClusterVerifyGroup(group_name=group,
1856                                             ignore_errors=self.op.ignore_errors,
1857                                             depends=depends_fn())]
1858                 for group in groups)
1859
1860     # Fix up all parameters
1861     for op in itertools.chain(*jobs): # pylint: disable=W0142
1862       op.debug_simulate_errors = self.op.debug_simulate_errors
1863       op.verbose = self.op.verbose
1864       op.error_codes = self.op.error_codes
1865       try:
1866         op.skip_checks = self.op.skip_checks
1867       except AttributeError:
1868         assert not isinstance(op, opcodes.OpClusterVerifyGroup)
1869
1870     return ResultWithJobs(jobs)
1871
1872
1873 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1874   """Verifies the cluster config.
1875
1876   """
1877   REQ_BGL = True
1878
1879   def _VerifyHVP(self, hvp_data):
1880     """Verifies locally the syntax of the hypervisor parameters.
1881
1882     """
1883     for item, hv_name, hv_params in hvp_data:
1884       msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1885              (item, hv_name))
1886       try:
1887         hv_class = hypervisor.GetHypervisor(hv_name)
1888         utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1889         hv_class.CheckParameterSyntax(hv_params)
1890       except errors.GenericError, err:
1891         self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
1892
1893   def ExpandNames(self):
1894     # Information can be safely retrieved as the BGL is acquired in exclusive
1895     # mode
1896     assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER)
1897     self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1898     self.all_node_info = self.cfg.GetAllNodesInfo()
1899     self.all_inst_info = self.cfg.GetAllInstancesInfo()
1900     self.needed_locks = {}
1901
1902   def Exec(self, feedback_fn):
1903     """Verify integrity of cluster, performing various test on nodes.
1904
1905     """
1906     self.bad = False
1907     self._feedback_fn = feedback_fn
1908
1909     feedback_fn("* Verifying cluster config")
1910
1911     for msg in self.cfg.VerifyConfig():
1912       self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
1913
1914     feedback_fn("* Verifying cluster certificate files")
1915
1916     for cert_filename in constants.ALL_CERT_FILES:
1917       (errcode, msg) = _VerifyCertificate(cert_filename)
1918       self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
1919
1920     feedback_fn("* Verifying hypervisor parameters")
1921
1922     self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1923                                                 self.all_inst_info.values()))
1924
1925     feedback_fn("* Verifying all nodes belong to an existing group")
1926
1927     # We do this verification here because, should this bogus circumstance
1928     # occur, it would never be caught by VerifyGroup, which only acts on
1929     # nodes/instances reachable from existing node groups.
1930
1931     dangling_nodes = set(node.name for node in self.all_node_info.values()
1932                          if node.group not in self.all_group_info)
1933
1934     dangling_instances = {}
1935     no_node_instances = []
1936
1937     for inst in self.all_inst_info.values():
1938       if inst.primary_node in dangling_nodes:
1939         dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
1940       elif inst.primary_node not in self.all_node_info:
1941         no_node_instances.append(inst.name)
1942
1943     pretty_dangling = [
1944         "%s (%s)" %
1945         (node.name,
1946          utils.CommaJoin(dangling_instances.get(node.name,
1947                                                 ["no instances"])))
1948         for node in dangling_nodes]
1949
1950     self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
1951                   None,
1952                   "the following nodes (and their instances) belong to a non"
1953                   " existing group: %s", utils.CommaJoin(pretty_dangling))
1954
1955     self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
1956                   None,
1957                   "the following instances have a non-existing primary-node:"
1958                   " %s", utils.CommaJoin(no_node_instances))
1959
1960     return not self.bad
1961
1962
1963 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
1964   """Verifies the status of a node group.
1965
1966   """
1967   HPATH = "cluster-verify"
1968   HTYPE = constants.HTYPE_CLUSTER
1969   REQ_BGL = False
1970
1971   _HOOKS_INDENT_RE = re.compile("^", re.M)
1972
1973   class NodeImage(object):
1974     """A class representing the logical and physical status of a node.
1975
1976     @type name: string
1977     @ivar name: the node name to which this object refers
1978     @ivar volumes: a structure as returned from
1979         L{ganeti.backend.GetVolumeList} (runtime)
1980     @ivar instances: a list of running instances (runtime)
1981     @ivar pinst: list of configured primary instances (config)
1982     @ivar sinst: list of configured secondary instances (config)
1983     @ivar sbp: dictionary of {primary-node: list of instances} for all
1984         instances for which this node is secondary (config)
1985     @ivar mfree: free memory, as reported by hypervisor (runtime)
1986     @ivar dfree: free disk, as reported by the node (runtime)
1987     @ivar offline: the offline status (config)
1988     @type rpc_fail: boolean
1989     @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1990         not whether the individual keys were correct) (runtime)
1991     @type lvm_fail: boolean
1992     @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1993     @type hyp_fail: boolean
1994     @ivar hyp_fail: whether the RPC call didn't return the instance list
1995     @type ghost: boolean
1996     @ivar ghost: whether this is a known node or not (config)
1997     @type os_fail: boolean
1998     @ivar os_fail: whether the RPC call didn't return valid OS data
1999     @type oslist: list
2000     @ivar oslist: list of OSes as diagnosed by DiagnoseOS
2001     @type vm_capable: boolean
2002     @ivar vm_capable: whether the node can host instances
2003
2004     """
2005     def __init__(self, offline=False, name=None, vm_capable=True):
2006       self.name = name
2007       self.volumes = {}
2008       self.instances = []
2009       self.pinst = []
2010       self.sinst = []
2011       self.sbp = {}
2012       self.mfree = 0
2013       self.dfree = 0
2014       self.offline = offline
2015       self.vm_capable = vm_capable
2016       self.rpc_fail = False
2017       self.lvm_fail = False
2018       self.hyp_fail = False
2019       self.ghost = False
2020       self.os_fail = False
2021       self.oslist = {}
2022
2023   def ExpandNames(self):
2024     # This raises errors.OpPrereqError on its own:
2025     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
2026
2027     # Get instances in node group; this is unsafe and needs verification later
2028     inst_names = self.cfg.GetNodeGroupInstances(self.group_uuid)
2029
2030     self.needed_locks = {
2031       locking.LEVEL_INSTANCE: inst_names,
2032       locking.LEVEL_NODEGROUP: [self.group_uuid],
2033       locking.LEVEL_NODE: [],
2034       }
2035
2036     self.share_locks = _ShareAll()
2037
2038   def DeclareLocks(self, level):
2039     if level == locking.LEVEL_NODE:
2040       # Get members of node group; this is unsafe and needs verification later
2041       nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
2042
2043       all_inst_info = self.cfg.GetAllInstancesInfo()
2044
2045       # In Exec(), we warn about mirrored instances that have primary and
2046       # secondary living in separate node groups. To fully verify that
2047       # volumes for these instances are healthy, we will need to do an
2048       # extra call to their secondaries. We ensure here those nodes will
2049       # be locked.
2050       for inst in self.owned_locks(locking.LEVEL_INSTANCE):
2051         # Important: access only the instances whose lock is owned
2052         if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
2053           nodes.update(all_inst_info[inst].secondary_nodes)
2054
2055       self.needed_locks[locking.LEVEL_NODE] = nodes
2056
2057   def CheckPrereq(self):
2058     assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
2059     self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
2060
2061     group_nodes = set(self.group_info.members)
2062     group_instances = self.cfg.GetNodeGroupInstances(self.group_uuid)
2063
2064     unlocked_nodes = \
2065         group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2066
2067     unlocked_instances = \
2068         group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
2069
2070     if unlocked_nodes:
2071       raise errors.OpPrereqError("Missing lock for nodes: %s" %
2072                                  utils.CommaJoin(unlocked_nodes))
2073
2074     if unlocked_instances:
2075       raise errors.OpPrereqError("Missing lock for instances: %s" %
2076                                  utils.CommaJoin(unlocked_instances))
2077
2078     self.all_node_info = self.cfg.GetAllNodesInfo()
2079     self.all_inst_info = self.cfg.GetAllInstancesInfo()
2080
2081     self.my_node_names = utils.NiceSort(group_nodes)
2082     self.my_inst_names = utils.NiceSort(group_instances)
2083
2084     self.my_node_info = dict((name, self.all_node_info[name])
2085                              for name in self.my_node_names)
2086
2087     self.my_inst_info = dict((name, self.all_inst_info[name])
2088                              for name in self.my_inst_names)
2089
2090     # We detect here the nodes that will need the extra RPC calls for verifying
2091     # split LV volumes; they should be locked.
2092     extra_lv_nodes = set()
2093
2094     for inst in self.my_inst_info.values():
2095       if inst.disk_template in constants.DTS_INT_MIRROR:
2096         group = self.my_node_info[inst.primary_node].group
2097         for nname in inst.secondary_nodes:
2098           if self.all_node_info[nname].group != group:
2099             extra_lv_nodes.add(nname)
2100
2101     unlocked_lv_nodes = \
2102         extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2103
2104     if unlocked_lv_nodes:
2105       raise errors.OpPrereqError("these nodes could be locked: %s" %
2106                                  utils.CommaJoin(unlocked_lv_nodes))
2107     self.extra_lv_nodes = list(extra_lv_nodes)
2108
2109   def _VerifyNode(self, ninfo, nresult):
2110     """Perform some basic validation on data returned from a node.
2111
2112       - check the result data structure is well formed and has all the
2113         mandatory fields
2114       - check ganeti version
2115
2116     @type ninfo: L{objects.Node}
2117     @param ninfo: the node to check
2118     @param nresult: the results from the node
2119     @rtype: boolean
2120     @return: whether overall this call was successful (and we can expect
2121          reasonable values in the respose)
2122
2123     """
2124     node = ninfo.name
2125     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2126
2127     # main result, nresult should be a non-empty dict
2128     test = not nresult or not isinstance(nresult, dict)
2129     _ErrorIf(test, constants.CV_ENODERPC, node,
2130                   "unable to verify node: no data returned")
2131     if test:
2132       return False
2133
2134     # compares ganeti version
2135     local_version = constants.PROTOCOL_VERSION
2136     remote_version = nresult.get("version", None)
2137     test = not (remote_version and
2138                 isinstance(remote_version, (list, tuple)) and
2139                 len(remote_version) == 2)
2140     _ErrorIf(test, constants.CV_ENODERPC, node,
2141              "connection to node returned invalid data")
2142     if test:
2143       return False
2144
2145     test = local_version != remote_version[0]
2146     _ErrorIf(test, constants.CV_ENODEVERSION, node,
2147              "incompatible protocol versions: master %s,"
2148              " node %s", local_version, remote_version[0])
2149     if test:
2150       return False
2151
2152     # node seems compatible, we can actually try to look into its results
2153
2154     # full package version
2155     self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
2156                   constants.CV_ENODEVERSION, node,
2157                   "software version mismatch: master %s, node %s",
2158                   constants.RELEASE_VERSION, remote_version[1],
2159                   code=self.ETYPE_WARNING)
2160
2161     hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
2162     if ninfo.vm_capable and isinstance(hyp_result, dict):
2163       for hv_name, hv_result in hyp_result.iteritems():
2164         test = hv_result is not None
2165         _ErrorIf(test, constants.CV_ENODEHV, node,
2166                  "hypervisor %s verify failure: '%s'", hv_name, hv_result)
2167
2168     hvp_result = nresult.get(constants.NV_HVPARAMS, None)
2169     if ninfo.vm_capable and isinstance(hvp_result, list):
2170       for item, hv_name, hv_result in hvp_result:
2171         _ErrorIf(True, constants.CV_ENODEHV, node,
2172                  "hypervisor %s parameter verify failure (source %s): %s",
2173                  hv_name, item, hv_result)
2174
2175     test = nresult.get(constants.NV_NODESETUP,
2176                        ["Missing NODESETUP results"])
2177     _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
2178              "; ".join(test))
2179
2180     return True
2181
2182   def _VerifyNodeTime(self, ninfo, nresult,
2183                       nvinfo_starttime, nvinfo_endtime):
2184     """Check the node time.
2185
2186     @type ninfo: L{objects.Node}
2187     @param ninfo: the node to check
2188     @param nresult: the remote results for the node
2189     @param nvinfo_starttime: the start time of the RPC call
2190     @param nvinfo_endtime: the end time of the RPC call
2191
2192     """
2193     node = ninfo.name
2194     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2195
2196     ntime = nresult.get(constants.NV_TIME, None)
2197     try:
2198       ntime_merged = utils.MergeTime(ntime)
2199     except (ValueError, TypeError):
2200       _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
2201       return
2202
2203     if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
2204       ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
2205     elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
2206       ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
2207     else:
2208       ntime_diff = None
2209
2210     _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
2211              "Node time diverges by at least %s from master node time",
2212              ntime_diff)
2213
2214   def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
2215     """Check the node LVM results.
2216
2217     @type ninfo: L{objects.Node}
2218     @param ninfo: the node to check
2219     @param nresult: the remote results for the node
2220     @param vg_name: the configured VG name
2221
2222     """
2223     if vg_name is None:
2224       return
2225
2226     node = ninfo.name
2227     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2228
2229     # checks vg existence and size > 20G
2230     vglist = nresult.get(constants.NV_VGLIST, None)
2231     test = not vglist
2232     _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
2233     if not test:
2234       vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
2235                                             constants.MIN_VG_SIZE)
2236       _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
2237
2238     # check pv names
2239     pvlist = nresult.get(constants.NV_PVLIST, None)
2240     test = pvlist is None
2241     _ErrorIf(test, constants.CV_ENODELVM, node, "Can't get PV list from node")
2242     if not test:
2243       # check that ':' is not present in PV names, since it's a
2244       # special character for lvcreate (denotes the range of PEs to
2245       # use on the PV)
2246       for _, pvname, owner_vg in pvlist:
2247         test = ":" in pvname
2248         _ErrorIf(test, constants.CV_ENODELVM, node,
2249                  "Invalid character ':' in PV '%s' of VG '%s'",
2250                  pvname, owner_vg)
2251
2252   def _VerifyNodeBridges(self, ninfo, nresult, bridges):
2253     """Check the node bridges.
2254
2255     @type ninfo: L{objects.Node}
2256     @param ninfo: the node to check
2257     @param nresult: the remote results for the node
2258     @param bridges: the expected list of bridges
2259
2260     """
2261     if not bridges:
2262       return
2263
2264     node = ninfo.name
2265     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2266
2267     missing = nresult.get(constants.NV_BRIDGES, None)
2268     test = not isinstance(missing, list)
2269     _ErrorIf(test, constants.CV_ENODENET, node,
2270              "did not return valid bridge information")
2271     if not test:
2272       _ErrorIf(bool(missing), constants.CV_ENODENET, node,
2273                "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
2274
2275   def _VerifyNodeUserScripts(self, ninfo, nresult):
2276     """Check the results of user scripts presence and executability on the node
2277
2278     @type ninfo: L{objects.Node}
2279     @param ninfo: the node to check
2280     @param nresult: the remote results for the node
2281
2282     """
2283     node = ninfo.name
2284
2285     test = not constants.NV_USERSCRIPTS in nresult
2286     self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node,
2287                   "did not return user scripts information")
2288
2289     broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None)
2290     if not test:
2291       self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node,
2292                     "user scripts not present or not executable: %s" %
2293                     utils.CommaJoin(sorted(broken_scripts)))
2294
2295   def _VerifyNodeNetwork(self, ninfo, nresult):
2296     """Check the node network connectivity results.
2297
2298     @type ninfo: L{objects.Node}
2299     @param ninfo: the node to check
2300     @param nresult: the remote results for the node
2301
2302     """
2303     node = ninfo.name
2304     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2305
2306     test = constants.NV_NODELIST not in nresult
2307     _ErrorIf(test, constants.CV_ENODESSH, node,
2308              "node hasn't returned node ssh connectivity data")
2309     if not test:
2310       if nresult[constants.NV_NODELIST]:
2311         for a_node, a_msg in nresult[constants.NV_NODELIST].items():
2312           _ErrorIf(True, constants.CV_ENODESSH, node,
2313                    "ssh communication with node '%s': %s", a_node, a_msg)
2314
2315     test = constants.NV_NODENETTEST not in nresult
2316     _ErrorIf(test, constants.CV_ENODENET, node,
2317              "node hasn't returned node tcp connectivity data")
2318     if not test:
2319       if nresult[constants.NV_NODENETTEST]:
2320         nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
2321         for anode in nlist:
2322           _ErrorIf(True, constants.CV_ENODENET, node,
2323                    "tcp communication with node '%s': %s",
2324                    anode, nresult[constants.NV_NODENETTEST][anode])
2325
2326     test = constants.NV_MASTERIP not in nresult
2327     _ErrorIf(test, constants.CV_ENODENET, node,
2328              "node hasn't returned node master IP reachability data")
2329     if not test:
2330       if not nresult[constants.NV_MASTERIP]:
2331         if node == self.master_node:
2332           msg = "the master node cannot reach the master IP (not configured?)"
2333         else:
2334           msg = "cannot reach the master IP"
2335         _ErrorIf(True, constants.CV_ENODENET, node, msg)
2336
2337   def _VerifyInstance(self, instance, instanceconfig, node_image,
2338                       diskstatus):
2339     """Verify an instance.
2340
2341     This function checks to see if the required block devices are
2342     available on the instance's node.
2343
2344     """
2345     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2346     node_current = instanceconfig.primary_node
2347
2348     node_vol_should = {}
2349     instanceconfig.MapLVsByNode(node_vol_should)
2350
2351     ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(), self.group_info)
2352     err = _ComputeIPolicyInstanceViolation(ipolicy, instanceconfig)
2353     _ErrorIf(err, constants.CV_EINSTANCEPOLICY, instance, err)
2354
2355     for node in node_vol_should:
2356       n_img = node_image[node]
2357       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2358         # ignore missing volumes on offline or broken nodes
2359         continue
2360       for volume in node_vol_should[node]:
2361         test = volume not in n_img.volumes
2362         _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
2363                  "volume %s missing on node %s", volume, node)
2364
2365     if instanceconfig.admin_state == constants.ADMINST_UP:
2366       pri_img = node_image[node_current]
2367       test = instance not in pri_img.instances and not pri_img.offline
2368       _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
2369                "instance not running on its primary node %s",
2370                node_current)
2371
2372     diskdata = [(nname, success, status, idx)
2373                 for (nname, disks) in diskstatus.items()
2374                 for idx, (success, status) in enumerate(disks)]
2375
2376     for nname, success, bdev_status, idx in diskdata:
2377       # the 'ghost node' construction in Exec() ensures that we have a
2378       # node here
2379       snode = node_image[nname]
2380       bad_snode = snode.ghost or snode.offline
2381       _ErrorIf(instanceconfig.admin_state == constants.ADMINST_UP and
2382                not success and not bad_snode,
2383                constants.CV_EINSTANCEFAULTYDISK, instance,
2384                "couldn't retrieve status for disk/%s on %s: %s",
2385                idx, nname, bdev_status)
2386       _ErrorIf((instanceconfig.admin_state == constants.ADMINST_UP and
2387                 success and bdev_status.ldisk_status == constants.LDS_FAULTY),
2388                constants.CV_EINSTANCEFAULTYDISK, instance,
2389                "disk/%s on %s is faulty", idx, nname)
2390
2391   def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2392     """Verify if there are any unknown volumes in the cluster.
2393
2394     The .os, .swap and backup volumes are ignored. All other volumes are
2395     reported as unknown.
2396
2397     @type reserved: L{ganeti.utils.FieldSet}
2398     @param reserved: a FieldSet of reserved volume names
2399
2400     """
2401     for node, n_img in node_image.items():
2402       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2403         # skip non-healthy nodes
2404         continue
2405       for volume in n_img.volumes:
2406         test = ((node not in node_vol_should or
2407                 volume not in node_vol_should[node]) and
2408                 not reserved.Matches(volume))
2409         self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
2410                       "volume %s is unknown", volume)
2411
2412   def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2413     """Verify N+1 Memory Resilience.
2414
2415     Check that if one single node dies we can still start all the
2416     instances it was primary for.
2417
2418     """
2419     cluster_info = self.cfg.GetClusterInfo()
2420     for node, n_img in node_image.items():
2421       # This code checks that every node which is now listed as
2422       # secondary has enough memory to host all instances it is
2423       # supposed to should a single other node in the cluster fail.
2424       # FIXME: not ready for failover to an arbitrary node
2425       # FIXME: does not support file-backed instances
2426       # WARNING: we currently take into account down instances as well
2427       # as up ones, considering that even if they're down someone
2428       # might want to start them even in the event of a node failure.
2429       if n_img.offline:
2430         # we're skipping offline nodes from the N+1 warning, since
2431         # most likely we don't have good memory infromation from them;
2432         # we already list instances living on such nodes, and that's
2433         # enough warning
2434         continue
2435       #TODO(dynmem): also consider ballooning out other instances
2436       for prinode, instances in n_img.sbp.items():
2437         needed_mem = 0
2438         for instance in instances:
2439           bep = cluster_info.FillBE(instance_cfg[instance])
2440           if bep[constants.BE_AUTO_BALANCE]:
2441             needed_mem += bep[constants.BE_MINMEM]
2442         test = n_img.mfree < needed_mem
2443         self._ErrorIf(test, constants.CV_ENODEN1, node,
2444                       "not enough memory to accomodate instance failovers"
2445                       " should node %s fail (%dMiB needed, %dMiB available)",
2446                       prinode, needed_mem, n_img.mfree)
2447
2448   @classmethod
2449   def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2450                    (files_all, files_opt, files_mc, files_vm)):
2451     """Verifies file checksums collected from all nodes.
2452
2453     @param errorif: Callback for reporting errors
2454     @param nodeinfo: List of L{objects.Node} objects
2455     @param master_node: Name of master node
2456     @param all_nvinfo: RPC results
2457
2458     """
2459     # Define functions determining which nodes to consider for a file
2460     files2nodefn = [
2461       (files_all, None),
2462       (files_mc, lambda node: (node.master_candidate or
2463                                node.name == master_node)),
2464       (files_vm, lambda node: node.vm_capable),
2465       ]
2466
2467     # Build mapping from filename to list of nodes which should have the file
2468     nodefiles = {}
2469     for (files, fn) in files2nodefn:
2470       if fn is None:
2471         filenodes = nodeinfo
2472       else:
2473         filenodes = filter(fn, nodeinfo)
2474       nodefiles.update((filename,
2475                         frozenset(map(operator.attrgetter("name"), filenodes)))
2476                        for filename in files)
2477
2478     assert set(nodefiles) == (files_all | files_mc | files_vm)
2479
2480     fileinfo = dict((filename, {}) for filename in nodefiles)
2481     ignore_nodes = set()
2482
2483     for node in nodeinfo:
2484       if node.offline:
2485         ignore_nodes.add(node.name)
2486         continue
2487
2488       nresult = all_nvinfo[node.name]
2489
2490       if nresult.fail_msg or not nresult.payload:
2491         node_files = None
2492       else:
2493         node_files = nresult.payload.get(constants.NV_FILELIST, None)
2494
2495       test = not (node_files and isinstance(node_files, dict))
2496       errorif(test, constants.CV_ENODEFILECHECK, node.name,
2497               "Node did not return file checksum data")
2498       if test:
2499         ignore_nodes.add(node.name)
2500         continue
2501
2502       # Build per-checksum mapping from filename to nodes having it
2503       for (filename, checksum) in node_files.items():
2504         assert filename in nodefiles
2505         fileinfo[filename].setdefault(checksum, set()).add(node.name)
2506
2507     for (filename, checksums) in fileinfo.items():
2508       assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2509
2510       # Nodes having the file
2511       with_file = frozenset(node_name
2512                             for nodes in fileinfo[filename].values()
2513                             for node_name in nodes) - ignore_nodes
2514
2515       expected_nodes = nodefiles[filename] - ignore_nodes
2516
2517       # Nodes missing file
2518       missing_file = expected_nodes - with_file
2519
2520       if filename in files_opt:
2521         # All or no nodes
2522         errorif(missing_file and missing_file != expected_nodes,
2523                 constants.CV_ECLUSTERFILECHECK, None,
2524                 "File %s is optional, but it must exist on all or no"
2525                 " nodes (not found on %s)",
2526                 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2527       else:
2528         errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2529                 "File %s is missing from node(s) %s", filename,
2530                 utils.CommaJoin(utils.NiceSort(missing_file)))
2531
2532         # Warn if a node has a file it shouldn't
2533         unexpected = with_file - expected_nodes
2534         errorif(unexpected,
2535                 constants.CV_ECLUSTERFILECHECK, None,
2536                 "File %s should not exist on node(s) %s",
2537                 filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2538
2539       # See if there are multiple versions of the file
2540       test = len(checksums) > 1
2541       if test:
2542         variants = ["variant %s on %s" %
2543                     (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2544                     for (idx, (checksum, nodes)) in
2545                       enumerate(sorted(checksums.items()))]
2546       else:
2547         variants = []
2548
2549       errorif(test, constants.CV_ECLUSTERFILECHECK, None,
2550               "File %s found with %s different checksums (%s)",
2551               filename, len(checksums), "; ".join(variants))
2552
2553   def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2554                       drbd_map):
2555     """Verifies and the node DRBD status.
2556
2557     @type ninfo: L{objects.Node}
2558     @param ninfo: the node to check
2559     @param nresult: the remote results for the node
2560     @param instanceinfo: the dict of instances
2561     @param drbd_helper: the configured DRBD usermode helper
2562     @param drbd_map: the DRBD map as returned by
2563         L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2564
2565     """
2566     node = ninfo.name
2567     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2568
2569     if drbd_helper:
2570       helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2571       test = (helper_result == None)
2572       _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2573                "no drbd usermode helper returned")
2574       if helper_result:
2575         status, payload = helper_result
2576         test = not status
2577         _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2578                  "drbd usermode helper check unsuccessful: %s", payload)
2579         test = status and (payload != drbd_helper)
2580         _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2581                  "wrong drbd usermode helper: %s", payload)
2582
2583     # compute the DRBD minors
2584     node_drbd = {}
2585     for minor, instance in drbd_map[node].items():
2586       test = instance not in instanceinfo
2587       _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2588                "ghost instance '%s' in temporary DRBD map", instance)
2589         # ghost instance should not be running, but otherwise we
2590         # don't give double warnings (both ghost instance and
2591         # unallocated minor in use)
2592       if test:
2593         node_drbd[minor] = (instance, False)
2594       else:
2595         instance = instanceinfo[instance]
2596         node_drbd[minor] = (instance.name,
2597                             instance.admin_state == constants.ADMINST_UP)
2598
2599     # and now check them
2600     used_minors = nresult.get(constants.NV_DRBDLIST, [])
2601     test = not isinstance(used_minors, (tuple, list))
2602     _ErrorIf(test, constants.CV_ENODEDRBD, node,
2603              "cannot parse drbd status file: %s", str(used_minors))
2604     if test:
2605       # we cannot check drbd status
2606       return
2607
2608     for minor, (iname, must_exist) in node_drbd.items():
2609       test = minor not in used_minors and must_exist
2610       _ErrorIf(test, constants.CV_ENODEDRBD, node,
2611                "drbd minor %d of instance %s is not active", minor, iname)
2612     for minor in used_minors:
2613       test = minor not in node_drbd
2614       _ErrorIf(test, constants.CV_ENODEDRBD, node,
2615                "unallocated drbd minor %d is in use", minor)
2616
2617   def _UpdateNodeOS(self, ninfo, nresult, nimg):
2618     """Builds the node OS structures.
2619
2620     @type ninfo: L{objects.Node}
2621     @param ninfo: the node to check
2622     @param nresult: the remote results for the node
2623     @param nimg: the node image object
2624
2625     """
2626     node = ninfo.name
2627     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2628
2629     remote_os = nresult.get(constants.NV_OSLIST, None)
2630     test = (not isinstance(remote_os, list) or
2631             not compat.all(isinstance(v, list) and len(v) == 7
2632                            for v in remote_os))
2633
2634     _ErrorIf(test, constants.CV_ENODEOS, node,
2635              "node hasn't returned valid OS data")
2636
2637     nimg.os_fail = test
2638
2639     if test:
2640       return
2641
2642     os_dict = {}
2643
2644     for (name, os_path, status, diagnose,
2645          variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2646
2647       if name not in os_dict:
2648         os_dict[name] = []
2649
2650       # parameters is a list of lists instead of list of tuples due to
2651       # JSON lacking a real tuple type, fix it:
2652       parameters = [tuple(v) for v in parameters]
2653       os_dict[name].append((os_path, status, diagnose,
2654                             set(variants), set(parameters), set(api_ver)))
2655
2656     nimg.oslist = os_dict
2657
2658   def _VerifyNodeOS(self, ninfo, nimg, base):
2659     """Verifies the node OS list.
2660
2661     @type ninfo: L{objects.Node}
2662     @param ninfo: the node to check
2663     @param nimg: the node image object
2664     @param base: the 'template' node we match against (e.g. from the master)
2665
2666     """
2667     node = ninfo.name
2668     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2669
2670     assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2671
2672     beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2673     for os_name, os_data in nimg.oslist.items():
2674       assert os_data, "Empty OS status for OS %s?!" % os_name
2675       f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2676       _ErrorIf(not f_status, constants.CV_ENODEOS, node,
2677                "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2678       _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
2679                "OS '%s' has multiple entries (first one shadows the rest): %s",
2680                os_name, utils.CommaJoin([v[0] for v in os_data]))
2681       # comparisons with the 'base' image
2682       test = os_name not in base.oslist
2683       _ErrorIf(test, constants.CV_ENODEOS, node,
2684                "Extra OS %s not present on reference node (%s)",
2685                os_name, base.name)
2686       if test:
2687         continue
2688       assert base.oslist[os_name], "Base node has empty OS status?"
2689       _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2690       if not b_status:
2691         # base OS is invalid, skipping
2692         continue
2693       for kind, a, b in [("API version", f_api, b_api),
2694                          ("variants list", f_var, b_var),
2695                          ("parameters", beautify_params(f_param),
2696                           beautify_params(b_param))]:
2697         _ErrorIf(a != b, constants.CV_ENODEOS, node,
2698                  "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2699                  kind, os_name, base.name,
2700                  utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2701
2702     # check any missing OSes
2703     missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2704     _ErrorIf(missing, constants.CV_ENODEOS, node,
2705              "OSes present on reference node %s but missing on this node: %s",
2706              base.name, utils.CommaJoin(missing))
2707
2708   def _VerifyOob(self, ninfo, nresult):
2709     """Verifies out of band functionality of a node.
2710
2711     @type ninfo: L{objects.Node}
2712     @param ninfo: the node to check
2713     @param nresult: the remote results for the node
2714
2715     """
2716     node = ninfo.name
2717     # We just have to verify the paths on master and/or master candidates
2718     # as the oob helper is invoked on the master
2719     if ((ninfo.master_candidate or ninfo.master_capable) and
2720         constants.NV_OOB_PATHS in nresult):
2721       for path_result in nresult[constants.NV_OOB_PATHS]:
2722         self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
2723
2724   def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2725     """Verifies and updates the node volume data.
2726
2727     This function will update a L{NodeImage}'s internal structures
2728     with data from the remote call.
2729
2730     @type ninfo: L{objects.Node}
2731     @param ninfo: the node to check
2732     @param nresult: the remote results for the node
2733     @param nimg: the node image object
2734     @param vg_name: the configured VG name
2735
2736     """
2737     node = ninfo.name
2738     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2739
2740     nimg.lvm_fail = True
2741     lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2742     if vg_name is None:
2743       pass
2744     elif isinstance(lvdata, basestring):
2745       _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
2746                utils.SafeEncode(lvdata))
2747     elif not isinstance(lvdata, dict):
2748       _ErrorIf(True, constants.CV_ENODELVM, node,
2749                "rpc call to node failed (lvlist)")
2750     else:
2751       nimg.volumes = lvdata
2752       nimg.lvm_fail = False
2753
2754   def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2755     """Verifies and updates the node instance list.
2756
2757     If the listing was successful, then updates this node's instance
2758     list. Otherwise, it marks the RPC call as failed for the instance
2759     list key.
2760
2761     @type ninfo: L{objects.Node}
2762     @param ninfo: the node to check
2763     @param nresult: the remote results for the node
2764     @param nimg: the node image object
2765
2766     """
2767     idata = nresult.get(constants.NV_INSTANCELIST, None)
2768     test = not isinstance(idata, list)
2769     self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
2770                   "rpc call to node failed (instancelist): %s",
2771                   utils.SafeEncode(str(idata)))
2772     if test:
2773       nimg.hyp_fail = True
2774     else:
2775       nimg.instances = idata
2776
2777   def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2778     """Verifies and computes a node information map
2779
2780     @type ninfo: L{objects.Node}
2781     @param ninfo: the node to check
2782     @param nresult: the remote results for the node
2783     @param nimg: the node image object
2784     @param vg_name: the configured VG name
2785
2786     """
2787     node = ninfo.name
2788     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2789
2790     # try to read free memory (from the hypervisor)
2791     hv_info = nresult.get(constants.NV_HVINFO, None)
2792     test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2793     _ErrorIf(test, constants.CV_ENODEHV, node,
2794              "rpc call to node failed (hvinfo)")
2795     if not test:
2796       try:
2797         nimg.mfree = int(hv_info["memory_free"])
2798       except (ValueError, TypeError):
2799         _ErrorIf(True, constants.CV_ENODERPC, node,
2800                  "node returned invalid nodeinfo, check hypervisor")
2801
2802     # FIXME: devise a free space model for file based instances as well
2803     if vg_name is not None:
2804       test = (constants.NV_VGLIST not in nresult or
2805               vg_name not in nresult[constants.NV_VGLIST])
2806       _ErrorIf(test, constants.CV_ENODELVM, node,
2807                "node didn't return data for the volume group '%s'"
2808                " - it is either missing or broken", vg_name)
2809       if not test:
2810         try:
2811           nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2812         except (ValueError, TypeError):
2813           _ErrorIf(True, constants.CV_ENODERPC, node,
2814                    "node returned invalid LVM info, check LVM status")
2815
2816   def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2817     """Gets per-disk status information for all instances.
2818
2819     @type nodelist: list of strings
2820     @param nodelist: Node names
2821     @type node_image: dict of (name, L{objects.Node})
2822     @param node_image: Node objects
2823     @type instanceinfo: dict of (name, L{objects.Instance})
2824     @param instanceinfo: Instance objects
2825     @rtype: {instance: {node: [(succes, payload)]}}
2826     @return: a dictionary of per-instance dictionaries with nodes as
2827         keys and disk information as values; the disk information is a
2828         list of tuples (success, payload)
2829
2830     """
2831     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2832
2833     node_disks = {}
2834     node_disks_devonly = {}
2835     diskless_instances = set()
2836     diskless = constants.DT_DISKLESS
2837
2838     for nname in nodelist:
2839       node_instances = list(itertools.chain(node_image[nname].pinst,
2840                                             node_image[nname].sinst))
2841       diskless_instances.update(inst for inst in node_instances
2842                                 if instanceinfo[inst].disk_template == diskless)
2843       disks = [(inst, disk)
2844                for inst in node_instances
2845                for disk in instanceinfo[inst].disks]
2846
2847       if not disks:
2848         # No need to collect data
2849         continue
2850
2851       node_disks[nname] = disks
2852
2853       # Creating copies as SetDiskID below will modify the objects and that can
2854       # lead to incorrect data returned from nodes
2855       devonly = [dev.Copy() for (_, dev) in disks]
2856
2857       for dev in devonly:
2858         self.cfg.SetDiskID(dev, nname)
2859
2860       node_disks_devonly[nname] = devonly
2861
2862     assert len(node_disks) == len(node_disks_devonly)
2863
2864     # Collect data from all nodes with disks
2865     result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2866                                                           node_disks_devonly)
2867
2868     assert len(result) == len(node_disks)
2869
2870     instdisk = {}
2871
2872     for (nname, nres) in result.items():
2873       disks = node_disks[nname]
2874
2875       if nres.offline:
2876         # No data from this node
2877         data = len(disks) * [(False, "node offline")]
2878       else:
2879         msg = nres.fail_msg
2880         _ErrorIf(msg, constants.CV_ENODERPC, nname,
2881                  "while getting disk information: %s", msg)
2882         if msg:
2883           # No data from this node
2884           data = len(disks) * [(False, msg)]
2885         else:
2886           data = []
2887           for idx, i in enumerate(nres.payload):
2888             if isinstance(i, (tuple, list)) and len(i) == 2:
2889               data.append(i)
2890             else:
2891               logging.warning("Invalid result from node %s, entry %d: %s",
2892                               nname, idx, i)
2893               data.append((False, "Invalid result from the remote node"))
2894
2895       for ((inst, _), status) in zip(disks, data):
2896         instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2897
2898     # Add empty entries for diskless instances.
2899     for inst in diskless_instances:
2900       assert inst not in instdisk
2901       instdisk[inst] = {}
2902
2903     assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2904                       len(nnames) <= len(instanceinfo[inst].all_nodes) and
2905                       compat.all(isinstance(s, (tuple, list)) and
2906                                  len(s) == 2 for s in statuses)
2907                       for inst, nnames in instdisk.items()
2908                       for nname, statuses in nnames.items())
2909     assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2910
2911     return instdisk
2912
2913   @staticmethod
2914   def _SshNodeSelector(group_uuid, all_nodes):
2915     """Create endless iterators for all potential SSH check hosts.
2916
2917     """
2918     nodes = [node for node in all_nodes
2919              if (node.group != group_uuid and
2920                  not node.offline)]
2921     keyfunc = operator.attrgetter("group")
2922
2923     return map(itertools.cycle,
2924                [sorted(map(operator.attrgetter("name"), names))
2925                 for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
2926                                                   keyfunc)])
2927
2928   @classmethod
2929   def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
2930     """Choose which nodes should talk to which other nodes.
2931
2932     We will make nodes contact all nodes in their group, and one node from
2933     every other group.
2934
2935     @warning: This algorithm has a known issue if one node group is much
2936       smaller than others (e.g. just one node). In such a case all other
2937       nodes will talk to the single node.
2938
2939     """
2940     online_nodes = sorted(node.name for node in group_nodes if not node.offline)
2941     sel = cls._SshNodeSelector(group_uuid, all_nodes)
2942
2943     return (online_nodes,
2944             dict((name, sorted([i.next() for i in sel]))
2945                  for name in online_nodes))
2946
2947   def BuildHooksEnv(self):
2948     """Build hooks env.
2949
2950     Cluster-Verify hooks just ran in the post phase and their failure makes
2951     the output be logged in the verify output and the verification to fail.
2952
2953     """
2954     env = {
2955       "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2956       }
2957
2958     env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
2959                for node in self.my_node_info.values())
2960
2961     return env
2962
2963   def BuildHooksNodes(self):
2964     """Build hooks nodes.
2965
2966     """
2967     return ([], self.my_node_names)
2968
2969   def Exec(self, feedback_fn):
2970     """Verify integrity of the node group, performing various test on nodes.
2971
2972     """
2973     # This method has too many local variables. pylint: disable=R0914
2974     feedback_fn("* Verifying group '%s'" % self.group_info.name)
2975
2976     if not self.my_node_names:
2977       # empty node group
2978       feedback_fn("* Empty node group, skipping verification")
2979       return True
2980
2981     self.bad = False
2982     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2983     verbose = self.op.verbose
2984     self._feedback_fn = feedback_fn
2985
2986     vg_name = self.cfg.GetVGName()
2987     drbd_helper = self.cfg.GetDRBDHelper()
2988     cluster = self.cfg.GetClusterInfo()
2989     groupinfo = self.cfg.GetAllNodeGroupsInfo()
2990     hypervisors = cluster.enabled_hypervisors
2991     node_data_list = [self.my_node_info[name] for name in self.my_node_names]
2992
2993     i_non_redundant = [] # Non redundant instances
2994     i_non_a_balanced = [] # Non auto-balanced instances
2995     i_offline = 0 # Count of offline instances
2996     n_offline = 0 # Count of offline nodes
2997     n_drained = 0 # Count of nodes being drained
2998     node_vol_should = {}
2999
3000     # FIXME: verify OS list
3001
3002     # File verification
3003     filemap = _ComputeAncillaryFiles(cluster, False)
3004
3005     # do local checksums
3006     master_node = self.master_node = self.cfg.GetMasterNode()
3007     master_ip = self.cfg.GetMasterIP()
3008
3009     feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
3010
3011     user_scripts = []
3012     if self.cfg.GetUseExternalMipScript():
3013       user_scripts.append(constants.EXTERNAL_MASTER_SETUP_SCRIPT)
3014
3015     node_verify_param = {
3016       constants.NV_FILELIST:
3017         utils.UniqueSequence(filename
3018                              for files in filemap
3019                              for filename in files),
3020       constants.NV_NODELIST:
3021         self._SelectSshCheckNodes(node_data_list, self.group_uuid,
3022                                   self.all_node_info.values()),
3023       constants.NV_HYPERVISOR: hypervisors,
3024       constants.NV_HVPARAMS:
3025         _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
3026       constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
3027                                  for node in node_data_list
3028                                  if not node.offline],
3029       constants.NV_INSTANCELIST: hypervisors,
3030       constants.NV_VERSION: None,
3031       constants.NV_HVINFO: self.cfg.GetHypervisorType(),
3032       constants.NV_NODESETUP: None,
3033       constants.NV_TIME: None,
3034       constants.NV_MASTERIP: (master_node, master_ip),
3035       constants.NV_OSLIST: None,
3036       constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
3037       constants.NV_USERSCRIPTS: user_scripts,
3038       }
3039
3040     if vg_name is not None:
3041       node_verify_param[constants.NV_VGLIST] = None
3042       node_verify_param[constants.NV_LVLIST] = vg_name
3043       node_verify_param[constants.NV_PVLIST] = [vg_name]
3044       node_verify_param[constants.NV_DRBDLIST] = None
3045
3046     if drbd_helper:
3047       node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
3048
3049     # bridge checks
3050     # FIXME: this needs to be changed per node-group, not cluster-wide
3051     bridges = set()
3052     default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
3053     if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3054       bridges.add(default_nicpp[constants.NIC_LINK])
3055     for instance in self.my_inst_info.values():
3056       for nic in instance.nics:
3057         full_nic = cluster.SimpleFillNIC(nic.nicparams)
3058         if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3059           bridges.add(full_nic[constants.NIC_LINK])
3060
3061     if bridges:
3062       node_verify_param[constants.NV_BRIDGES] = list(bridges)
3063
3064     # Build our expected cluster state
3065     node_image = dict((node.name, self.NodeImage(offline=node.offline,
3066                                                  name=node.name,
3067                                                  vm_capable=node.vm_capable))
3068                       for node in node_data_list)
3069
3070     # Gather OOB paths
3071     oob_paths = []
3072     for node in self.all_node_info.values():
3073       path = _SupportsOob(self.cfg, node)
3074       if path and path not in oob_paths:
3075         oob_paths.append(path)
3076
3077     if oob_paths:
3078       node_verify_param[constants.NV_OOB_PATHS] = oob_paths
3079
3080     for instance in self.my_inst_names:
3081       inst_config = self.my_inst_info[instance]
3082
3083       for nname in inst_config.all_nodes:
3084         if nname not in node_image:
3085           gnode = self.NodeImage(name=nname)
3086           gnode.ghost = (nname not in self.all_node_info)
3087           node_image[nname] = gnode
3088
3089       inst_config.MapLVsByNode(node_vol_should)
3090
3091       pnode = inst_config.primary_node
3092       node_image[pnode].pinst.append(instance)
3093
3094       for snode in inst_config.secondary_nodes:
3095         nimg = node_image[snode]
3096         nimg.sinst.append(instance)
3097         if pnode not in nimg.sbp:
3098           nimg.sbp[pnode] = []
3099         nimg.sbp[pnode].append(instance)
3100
3101     # At this point, we have the in-memory data structures complete,
3102     # except for the runtime information, which we'll gather next
3103
3104     # Due to the way our RPC system works, exact response times cannot be
3105     # guaranteed (e.g. a broken node could run into a timeout). By keeping the
3106     # time before and after executing the request, we can at least have a time
3107     # window.
3108     nvinfo_starttime = time.time()
3109     all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
3110                                            node_verify_param,
3111                                            self.cfg.GetClusterName())
3112     nvinfo_endtime = time.time()
3113
3114     if self.extra_lv_nodes and vg_name is not None:
3115       extra_lv_nvinfo = \
3116           self.rpc.call_node_verify(self.extra_lv_nodes,
3117                                     {constants.NV_LVLIST: vg_name},
3118                                     self.cfg.GetClusterName())
3119     else:
3120       extra_lv_nvinfo = {}
3121
3122     all_drbd_map = self.cfg.ComputeDRBDMap()
3123
3124     feedback_fn("* Gathering disk information (%s nodes)" %
3125                 len(self.my_node_names))
3126     instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
3127                                      self.my_inst_info)
3128
3129     feedback_fn("* Verifying configuration file consistency")
3130
3131     # If not all nodes are being checked, we need to make sure the master node
3132     # and a non-checked vm_capable node are in the list.
3133     absent_nodes = set(self.all_node_info).difference(self.my_node_info)
3134     if absent_nodes:
3135       vf_nvinfo = all_nvinfo.copy()
3136       vf_node_info = list(self.my_node_info.values())
3137       additional_nodes = []
3138       if master_node not in self.my_node_info:
3139         additional_nodes.append(master_node)
3140         vf_node_info.append(self.all_node_info[master_node])
3141       # Add the first vm_capable node we find which is not included
3142       for node in absent_nodes:
3143         nodeinfo = self.all_node_info[node]
3144         if nodeinfo.vm_capable and not nodeinfo.offline:
3145           additional_nodes.append(node)
3146           vf_node_info.append(self.all_node_info[node])
3147           break
3148       key = constants.NV_FILELIST
3149       vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
3150                                                  {key: node_verify_param[key]},
3151                                                  self.cfg.GetClusterName()))
3152     else:
3153       vf_nvinfo = all_nvinfo
3154       vf_node_info = self.my_node_info.values()
3155
3156     self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
3157
3158     feedback_fn("* Verifying node status")
3159
3160     refos_img = None
3161
3162     for node_i in node_data_list:
3163       node = node_i.name
3164       nimg = node_image[node]
3165
3166       if node_i.offline:
3167         if verbose:
3168           feedback_fn("* Skipping offline node %s" % (node,))
3169         n_offline += 1
3170         continue
3171
3172       if node == master_node:
3173         ntype = "master"
3174       elif node_i.master_candidate:
3175         ntype = "master candidate"
3176       elif node_i.drained:
3177         ntype = "drained"
3178         n_drained += 1
3179       else:
3180         ntype = "regular"
3181       if verbose:
3182         feedback_fn("* Verifying node %s (%s)" % (node, ntype))
3183
3184       msg = all_nvinfo[node].fail_msg
3185       _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
3186                msg)
3187       if msg:
3188         nimg.rpc_fail = True
3189         continue
3190
3191       nresult = all_nvinfo[node].payload
3192
3193       nimg.call_ok = self._VerifyNode(node_i, nresult)
3194       self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
3195       self._VerifyNodeNetwork(node_i, nresult)
3196       self._VerifyNodeUserScripts(node_i, nresult)
3197       self._VerifyOob(node_i, nresult)
3198
3199       if nimg.vm_capable:
3200         self._VerifyNodeLVM(node_i, nresult, vg_name)
3201         self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
3202                              all_drbd_map)
3203
3204         self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
3205         self._UpdateNodeInstances(node_i, nresult, nimg)
3206         self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
3207         self._UpdateNodeOS(node_i, nresult, nimg)
3208
3209         if not nimg.os_fail:
3210           if refos_img is None:
3211             refos_img = nimg
3212           self._VerifyNodeOS(node_i, nimg, refos_img)
3213         self._VerifyNodeBridges(node_i, nresult, bridges)
3214
3215         # Check whether all running instancies are primary for the node. (This
3216         # can no longer be done from _VerifyInstance below, since some of the
3217         # wrong instances could be from other node groups.)
3218         non_primary_inst = set(nimg.instances).difference(nimg.pinst)
3219
3220         for inst in non_primary_inst:
3221           # FIXME: investigate best way to handle offline insts
3222           if inst.admin_state == constants.ADMINST_OFFLINE:
3223             if verbose:
3224               feedback_fn("* Skipping offline instance %s" % inst.name)
3225             i_offline += 1
3226             continue
3227           test = inst in self.all_inst_info
3228           _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
3229                    "instance should not run on node %s", node_i.name)
3230           _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
3231                    "node is running unknown instance %s", inst)
3232
3233     for node, result in extra_lv_nvinfo.items():
3234       self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
3235                               node_image[node], vg_name)
3236
3237     feedback_fn("* Verifying instance status")
3238     for instance in self.my_inst_names:
3239       if verbose:
3240         feedback_fn("* Verifying instance %s" % instance)
3241       inst_config = self.my_inst_info[instance]
3242       self._VerifyInstance(instance, inst_config, node_image,
3243                            instdisk[instance])
3244       inst_nodes_offline = []
3245
3246       pnode = inst_config.primary_node
3247       pnode_img = node_image[pnode]
3248       _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
3249                constants.CV_ENODERPC, pnode, "instance %s, connection to"
3250                " primary node failed", instance)
3251
3252       _ErrorIf(inst_config.admin_state == constants.ADMINST_UP and
3253                pnode_img.offline,
3254                constants.CV_EINSTANCEBADNODE, instance,
3255                "instance is marked as running and lives on offline node %s",
3256                inst_config.primary_node)
3257
3258       # If the instance is non-redundant we cannot survive losing its primary
3259       # node, so we are not N+1 compliant. On the other hand we have no disk
3260       # templates with more than one secondary so that situation is not well
3261       # supported either.
3262       # FIXME: does not support file-backed instances
3263       if not inst_config.secondary_nodes:
3264         i_non_redundant.append(instance)
3265
3266       _ErrorIf(len(inst_config.secondary_nodes) > 1,
3267                constants.CV_EINSTANCELAYOUT,
3268                instance, "instance has multiple secondary nodes: %s",
3269                utils.CommaJoin(inst_config.secondary_nodes),
3270                code=self.ETYPE_WARNING)
3271
3272       if inst_config.disk_template in constants.DTS_INT_MIRROR:
3273         pnode = inst_config.primary_node
3274         instance_nodes = utils.NiceSort(inst_config.all_nodes)
3275         instance_groups = {}
3276
3277         for node in instance_nodes:
3278           instance_groups.setdefault(self.all_node_info[node].group,
3279                                      []).append(node)
3280
3281         pretty_list = [
3282           "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
3283           # Sort so that we always list the primary node first.
3284           for group, nodes in sorted(instance_groups.items(),
3285                                      key=lambda (_, nodes): pnode in nodes,
3286                                      reverse=True)]
3287
3288         self._ErrorIf(len(instance_groups) > 1,
3289                       constants.CV_EINSTANCESPLITGROUPS,
3290                       instance, "instance has primary and secondary nodes in"
3291                       " different groups: %s", utils.CommaJoin(pretty_list),
3292                       code=self.ETYPE_WARNING)
3293
3294       if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
3295         i_non_a_balanced.append(instance)
3296
3297       for snode in inst_config.secondary_nodes:
3298         s_img = node_image[snode]
3299         _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
3300                  snode, "instance %s, connection to secondary node failed",
3301                  instance)
3302
3303         if s_img.offline:
3304           inst_nodes_offline.append(snode)
3305
3306       # warn that the instance lives on offline nodes
3307       _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
3308                "instance has offline secondary node(s) %s",
3309                utils.CommaJoin(inst_nodes_offline))
3310       # ... or ghost/non-vm_capable nodes
3311       for node in inst_config.all_nodes:
3312         _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
3313                  instance, "instance lives on ghost node %s", node)
3314         _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
3315                  instance, "instance lives on non-vm_capable node %s", node)
3316
3317     feedback_fn("* Verifying orphan volumes")
3318     reserved = utils.FieldSet(*cluster.reserved_lvs)
3319
3320     # We will get spurious "unknown volume" warnings if any node of this group
3321     # is secondary for an instance whose primary is in another group. To avoid
3322     # them, we find these instances and add their volumes to node_vol_should.
3323     for inst in self.all_inst_info.values():
3324       for secondary in inst.secondary_nodes:
3325         if (secondary in self.my_node_info
3326             and inst.name not in self.my_inst_info):
3327           inst.MapLVsByNode(node_vol_should)
3328           break
3329
3330     self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
3331
3332     if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
3333       feedback_fn("* Verifying N+1 Memory redundancy")
3334       self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
3335
3336     feedback_fn("* Other Notes")
3337     if i_non_redundant:
3338       feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
3339                   % len(i_non_redundant))
3340
3341     if i_non_a_balanced:
3342       feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
3343                   % len(i_non_a_balanced))
3344
3345     if i_offline:
3346       feedback_fn("  - NOTICE: %d offline instance(s) found." % i_offline)
3347
3348     if n_offline:
3349       feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
3350
3351     if n_drained:
3352       feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
3353
3354     return not self.bad
3355
3356   def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
3357     """Analyze the post-hooks' result
3358
3359     This method analyses the hook result, handles it, and sends some
3360     nicely-formatted feedback back to the user.
3361
3362     @param phase: one of L{constants.HOOKS_PHASE_POST} or
3363         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
3364     @param hooks_results: the results of the multi-node hooks rpc call
3365     @param feedback_fn: function used send feedback back to the caller
3366     @param lu_result: previous Exec result
3367     @return: the new Exec result, based on the previous result
3368         and hook results
3369
3370     """
3371     # We only really run POST phase hooks, only for non-empty groups,
3372     # and are only interested in their results
3373     if not self.my_node_names:
3374       # empty node group
3375       pass
3376     elif phase == constants.HOOKS_PHASE_POST:
3377       # Used to change hooks' output to proper indentation
3378       feedback_fn("* Hooks Results")
3379       assert hooks_results, "invalid result from hooks"
3380
3381       for node_name in hooks_results:
3382         res = hooks_results[node_name]
3383         msg = res.fail_msg
3384         test = msg and not res.offline
3385         self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3386                       "Communication failure in hooks execution: %s", msg)
3387         if res.offline or msg:
3388           # No need to investigate payload if node is offline or gave
3389           # an error.
3390           continue
3391         for script, hkr, output in res.payload:
3392           test = hkr == constants.HKR_FAIL
3393           self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3394                         "Script %s failed, output:", script)
3395           if test:
3396             output = self._HOOKS_INDENT_RE.sub("      ", output)
3397             feedback_fn("%s" % output)
3398             lu_result = False
3399
3400     return lu_result
3401
3402
3403 class LUClusterVerifyDisks(NoHooksLU):
3404   """Verifies the cluster disks status.
3405
3406   """
3407   REQ_BGL = False
3408
3409   def ExpandNames(self):
3410     self.share_locks = _ShareAll()
3411     self.needed_locks = {
3412       locking.LEVEL_NODEGROUP: locking.ALL_SET,
3413       }
3414
3415   def Exec(self, feedback_fn):
3416     group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3417
3418     # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3419     return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3420                            for group in group_names])
3421
3422
3423 class LUGroupVerifyDisks(NoHooksLU):
3424   """Verifies the status of all disks in a node group.
3425
3426   """
3427   REQ_BGL = False
3428
3429   def ExpandNames(self):
3430     # Raises errors.OpPrereqError on its own if group can't be found
3431     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3432
3433     self.share_locks = _ShareAll()
3434     self.needed_locks = {
3435       locking.LEVEL_INSTANCE: [],
3436       locking.LEVEL_NODEGROUP: [],
3437       locking.LEVEL_NODE: [],
3438       }
3439
3440   def DeclareLocks(self, level):
3441     if level == locking.LEVEL_INSTANCE:
3442       assert not self.needed_locks[locking.LEVEL_INSTANCE]
3443
3444       # Lock instances optimistically, needs verification once node and group
3445       # locks have been acquired
3446       self.needed_locks[locking.LEVEL_INSTANCE] = \
3447         self.cfg.GetNodeGroupInstances(self.group_uuid)
3448
3449     elif level == locking.LEVEL_NODEGROUP:
3450       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3451
3452       self.needed_locks[locking.LEVEL_NODEGROUP] = \
3453         set([self.group_uuid] +
3454             # Lock all groups used by instances optimistically; this requires
3455             # going via the node before it's locked, requiring verification
3456             # later on
3457             [group_uuid
3458              for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3459              for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3460
3461     elif level == locking.LEVEL_NODE:
3462       # This will only lock the nodes in the group to be verified which contain
3463       # actual instances
3464       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3465       self._LockInstancesNodes()
3466
3467       # Lock all nodes in group to be verified
3468       assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3469       member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3470       self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3471
3472   def CheckPrereq(self):
3473     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3474     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3475     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3476
3477     assert self.group_uuid in owned_groups
3478
3479     # Check if locked instances are still correct
3480     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3481
3482     # Get instance information
3483     self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3484
3485     # Check if node groups for locked instances are still correct
3486     for (instance_name, inst) in self.instances.items():
3487       assert owned_nodes.issuperset(inst.all_nodes), \
3488         "Instance %s's nodes changed while we kept the lock" % instance_name
3489
3490       inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
3491                                              owned_groups)
3492
3493       assert self.group_uuid in inst_groups, \
3494         "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
3495
3496   def Exec(self, feedback_fn):
3497     """Verify integrity of cluster disks.
3498
3499     @rtype: tuple of three items
3500     @return: a tuple of (dict of node-to-node_error, list of instances
3501         which need activate-disks, dict of instance: (node, volume) for
3502         missing volumes
3503
3504     """
3505     res_nodes = {}
3506     res_instances = set()
3507     res_missing = {}
3508
3509     nv_dict = _MapInstanceDisksToNodes([inst
3510             for inst in self.instances.values()
3511             if inst.admin_state == constants.ADMINST_UP])
3512
3513     if nv_dict:
3514       nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3515                              set(self.cfg.GetVmCapableNodeList()))
3516
3517       node_lvs = self.rpc.call_lv_list(nodes, [])
3518
3519       for (node, node_res) in node_lvs.items():
3520         if node_res.offline:
3521           continue
3522
3523         msg = node_res.fail_msg
3524         if msg:
3525           logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3526           res_nodes[node] = msg
3527           continue
3528
3529         for lv_name, (_, _, lv_online) in node_res.payload.items():
3530           inst = nv_dict.pop((node, lv_name), None)
3531           if not (lv_online or inst is None):
3532             res_instances.add(inst)
3533
3534       # any leftover items in nv_dict are missing LVs, let's arrange the data
3535       # better
3536       for key, inst in nv_dict.iteritems():
3537         res_missing.setdefault(inst, []).append(list(key))
3538
3539     return (res_nodes, list(res_instances), res_missing)
3540
3541
3542 class LUClusterRepairDiskSizes(NoHooksLU):
3543   """Verifies the cluster disks sizes.
3544
3545   """
3546   REQ_BGL = False
3547
3548   def ExpandNames(self):
3549     if self.op.instances:
3550       self.wanted_names = _GetWantedInstances(self, self.op.instances)
3551       self.needed_locks = {
3552         locking.LEVEL_NODE_RES: [],
3553         locking.LEVEL_INSTANCE: self.wanted_names,
3554         }
3555       self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
3556     else:
3557       self.wanted_names = None
3558       self.needed_locks = {
3559         locking.LEVEL_NODE_RES: locking.ALL_SET,
3560         locking.LEVEL_INSTANCE: locking.ALL_SET,
3561         }
3562     self.share_locks = {
3563       locking.LEVEL_NODE_RES: 1,
3564       locking.LEVEL_INSTANCE: 0,
3565       }
3566
3567   def DeclareLocks(self, level):
3568     if level == locking.LEVEL_NODE_RES and self.wanted_names is not None:
3569       self._LockInstancesNodes(primary_only=True, level=level)
3570
3571   def CheckPrereq(self):
3572     """Check prerequisites.
3573
3574     This only checks the optional instance list against the existing names.
3575
3576     """
3577     if self.wanted_names is None:
3578       self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3579
3580     self.wanted_instances = \
3581         map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3582
3583   def _EnsureChildSizes(self, disk):
3584     """Ensure children of the disk have the needed disk size.
3585
3586     This is valid mainly for DRBD8 and fixes an issue where the
3587     children have smaller disk size.
3588
3589     @param disk: an L{ganeti.objects.Disk} object
3590
3591     """
3592     if disk.dev_type == constants.LD_DRBD8:
3593       assert disk.children, "Empty children for DRBD8?"
3594       fchild = disk.children[0]
3595       mismatch = fchild.size < disk.size
3596       if mismatch:
3597         self.LogInfo("Child disk has size %d, parent %d, fixing",
3598                      fchild.size, disk.size)
3599         fchild.size = disk.size
3600
3601       # and we recurse on this child only, not on the metadev
3602       return self._EnsureChildSizes(fchild) or mismatch
3603     else:
3604       return False
3605
3606   def Exec(self, feedback_fn):
3607     """Verify the size of cluster disks.
3608
3609     """
3610     # TODO: check child disks too
3611     # TODO: check differences in size between primary/secondary nodes
3612     per_node_disks = {}
3613     for instance in self.wanted_instances:
3614       pnode = instance.primary_node
3615       if pnode not in per_node_disks:
3616         per_node_disks[pnode] = []
3617       for idx, disk in enumerate(instance.disks):
3618         per_node_disks[pnode].append((instance, idx, disk))
3619
3620     assert not (frozenset(per_node_disks.keys()) -
3621                 self.owned_locks(locking.LEVEL_NODE_RES)), \
3622       "Not owning correct locks"
3623     assert not self.owned_locks(locking.LEVEL_NODE)
3624
3625     changed = []
3626     for node, dskl in per_node_disks.items():
3627       newl = [v[2].Copy() for v in dskl]
3628       for dsk in newl:
3629         self.cfg.SetDiskID(dsk, node)
3630       result = self.rpc.call_blockdev_getsize(node, newl)
3631       if result.fail_msg:
3632         self.LogWarning("Failure in blockdev_getsize call to node"
3633                         " %s, ignoring", node)
3634         continue
3635       if len(result.payload) != len(dskl):
3636         logging.warning("Invalid result from node %s: len(dksl)=%d,"
3637                         " result.payload=%s", node, len(dskl), result.payload)
3638         self.LogWarning("Invalid result from node %s, ignoring node results",
3639                         node)
3640         continue
3641       for ((instance, idx, disk), size) in zip(dskl, result.payload):
3642         if size is None:
3643           self.LogWarning("Disk %d of instance %s did not return size"
3644                           " information, ignoring", idx, instance.name)
3645           continue
3646         if not isinstance(size, (int, long)):
3647           self.LogWarning("Disk %d of instance %s did not return valid"
3648                           " size information, ignoring", idx, instance.name)
3649           continue
3650         size = size >> 20
3651         if size != disk.size:
3652           self.LogInfo("Disk %d of instance %s has mismatched size,"
3653                        " correcting: recorded %d, actual %d", idx,
3654                        instance.name, disk.size, size)
3655           disk.size = size
3656           self.cfg.Update(instance, feedback_fn)
3657           changed.append((instance.name, idx, size))
3658         if self._EnsureChildSizes(disk):
3659           self.cfg.Update(instance, feedback_fn)
3660           changed.append((instance.name, idx, disk.size))
3661     return changed
3662
3663
3664 class LUClusterRename(LogicalUnit):
3665   """Rename the cluster.
3666
3667   """
3668   HPATH = "cluster-rename"
3669   HTYPE = constants.HTYPE_CLUSTER
3670
3671   def BuildHooksEnv(self):
3672     """Build hooks env.
3673
3674     """
3675     return {
3676       "OP_TARGET": self.cfg.GetClusterName(),
3677       "NEW_NAME": self.op.name,
3678       }
3679
3680   def BuildHooksNodes(self):
3681     """Build hooks nodes.
3682
3683     """
3684     return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3685
3686   def CheckPrereq(self):
3687     """Verify that the passed name is a valid one.
3688
3689     """
3690     hostname = netutils.GetHostname(name=self.op.name,
3691                                     family=self.cfg.GetPrimaryIPFamily())
3692
3693     new_name = hostname.name
3694     self.ip = new_ip = hostname.ip
3695     old_name = self.cfg.GetClusterName()
3696     old_ip = self.cfg.GetMasterIP()
3697     if new_name == old_name and new_ip == old_ip:
3698       raise errors.OpPrereqError("Neither the name nor the IP address of the"
3699                                  " cluster has changed",
3700                                  errors.ECODE_INVAL)
3701     if new_ip != old_ip:
3702       if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3703         raise errors.OpPrereqError("The given cluster IP address (%s) is"
3704                                    " reachable on the network" %
3705                                    new_ip, errors.ECODE_NOTUNIQUE)
3706
3707     self.op.name = new_name
3708
3709   def Exec(self, feedback_fn):
3710     """Rename the cluster.
3711
3712     """
3713     clustername = self.op.name
3714     new_ip = self.ip
3715
3716     # shutdown the master IP
3717     master_params = self.cfg.GetMasterNetworkParameters()
3718     ems = self.cfg.GetUseExternalMipScript()
3719     result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3720                                                      master_params, ems)
3721     result.Raise("Could not disable the master role")
3722
3723     try:
3724       cluster = self.cfg.GetClusterInfo()
3725       cluster.cluster_name = clustername
3726       cluster.master_ip = new_ip
3727       self.cfg.Update(cluster, feedback_fn)
3728
3729       # update the known hosts file
3730       ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3731       node_list = self.cfg.GetOnlineNodeList()
3732       try:
3733         node_list.remove(master_params.name)
3734       except ValueError:
3735         pass
3736       _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3737     finally:
3738       master_params.ip = new_ip
3739       result = self.rpc.call_node_activate_master_ip(master_params.name,
3740                                                      master_params, ems)
3741       msg = result.fail_msg
3742       if msg:
3743         self.LogWarning("Could not re-enable the master role on"
3744                         " the master, please restart manually: %s", msg)
3745
3746     return clustername
3747
3748
3749 def _ValidateNetmask(cfg, netmask):
3750   """Checks if a netmask is valid.
3751
3752   @type cfg: L{config.ConfigWriter}
3753   @param cfg: The cluster configuration
3754   @type netmask: int
3755   @param netmask: the netmask to be verified
3756   @raise errors.OpPrereqError: if the validation fails
3757
3758   """
3759   ip_family = cfg.GetPrimaryIPFamily()
3760   try:
3761     ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
3762   except errors.ProgrammerError:
3763     raise errors.OpPrereqError("Invalid primary ip family: %s." %
3764                                ip_family)
3765   if not ipcls.ValidateNetmask(netmask):
3766     raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
3767                                 (netmask))
3768
3769
3770 class LUClusterSetParams(LogicalUnit):
3771   """Change the parameters of the cluster.
3772
3773   """
3774   HPATH = "cluster-modify"
3775   HTYPE = constants.HTYPE_CLUSTER
3776   REQ_BGL = False
3777
3778   def CheckArguments(self):
3779     """Check parameters
3780
3781     """
3782     if self.op.uid_pool:
3783       uidpool.CheckUidPool(self.op.uid_pool)
3784
3785     if self.op.add_uids:
3786       uidpool.CheckUidPool(self.op.add_uids)
3787
3788     if self.op.remove_uids:
3789       uidpool.CheckUidPool(self.op.remove_uids)
3790
3791     if self.op.master_netmask is not None:
3792       _ValidateNetmask(self.cfg, self.op.master_netmask)
3793
3794     if self.op.diskparams:
3795       for dt_params in self.op.diskparams.values():
3796         utils.ForceDictType(dt_params, constants.DISK_DT_TYPES)
3797
3798   def ExpandNames(self):
3799     # FIXME: in the future maybe other cluster params won't require checking on
3800     # all nodes to be modified.
3801     self.needed_locks = {
3802       locking.LEVEL_NODE: locking.ALL_SET,
3803       locking.LEVEL_INSTANCE: locking.ALL_SET,
3804       locking.LEVEL_NODEGROUP: locking.ALL_SET,
3805     }
3806     self.share_locks = {
3807         locking.LEVEL_NODE: 1,
3808         locking.LEVEL_INSTANCE: 1,
3809         locking.LEVEL_NODEGROUP: 1,
3810     }
3811
3812   def BuildHooksEnv(self):
3813     """Build hooks env.
3814
3815     """
3816     return {
3817       "OP_TARGET": self.cfg.GetClusterName(),
3818       "NEW_VG_NAME": self.op.vg_name,
3819       }
3820
3821   def BuildHooksNodes(self):
3822     """Build hooks nodes.
3823
3824     """
3825     mn = self.cfg.GetMasterNode()
3826     return ([mn], [mn])
3827
3828   def CheckPrereq(self):
3829     """Check prerequisites.
3830
3831     This checks whether the given params don't conflict and
3832     if the given volume group is valid.
3833
3834     """
3835     if self.op.vg_name is not None and not self.op.vg_name:
3836       if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3837         raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3838                                    " instances exist", errors.ECODE_INVAL)
3839
3840     if self.op.drbd_helper is not None and not self.op.drbd_helper:
3841       if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3842         raise errors.OpPrereqError("Cannot disable drbd helper while"
3843                                    " drbd-based instances exist",
3844                                    errors.ECODE_INVAL)
3845
3846     node_list = self.owned_locks(locking.LEVEL_NODE)
3847
3848     # if vg_name not None, checks given volume group on all nodes
3849     if self.op.vg_name:
3850       vglist = self.rpc.call_vg_list(node_list)
3851       for node in node_list:
3852         msg = vglist[node].fail_msg
3853         if msg:
3854           # ignoring down node
3855           self.LogWarning("Error while gathering data on node %s"
3856                           " (ignoring node): %s", node, msg)
3857           continue
3858         vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3859                                               self.op.vg_name,
3860                                               constants.MIN_VG_SIZE)
3861         if vgstatus:
3862           raise errors.OpPrereqError("Error on node '%s': %s" %
3863                                      (node, vgstatus), errors.ECODE_ENVIRON)
3864
3865     if self.op.drbd_helper:
3866       # checks given drbd helper on all nodes
3867       helpers = self.rpc.call_drbd_helper(node_list)
3868       for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
3869         if ninfo.offline:
3870           self.LogInfo("Not checking drbd helper on offline node %s", node)
3871           continue
3872         msg = helpers[node].fail_msg
3873         if msg:
3874           raise errors.OpPrereqError("Error checking drbd helper on node"
3875                                      " '%s': %s" % (node, msg),
3876                                      errors.ECODE_ENVIRON)
3877         node_helper = helpers[node].payload
3878         if node_helper != self.op.drbd_helper:
3879           raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3880                                      (node, node_helper), errors.ECODE_ENVIRON)
3881
3882     self.cluster = cluster = self.cfg.GetClusterInfo()
3883     # validate params changes
3884     if self.op.beparams:
3885       objects.UpgradeBeParams(self.op.beparams)
3886       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3887       self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3888
3889     if self.op.ndparams:
3890       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3891       self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3892
3893       # TODO: we need a more general way to handle resetting
3894       # cluster-level parameters to default values
3895       if self.new_ndparams["oob_program"] == "":
3896         self.new_ndparams["oob_program"] = \
3897             constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3898
3899     if self.op.hv_state:
3900       new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
3901                                             self.cluster.hv_state_static)
3902       self.new_hv_state = dict((hv, cluster.SimpleFillHvState(values))
3903                                for hv, values in new_hv_state.items())
3904
3905     if self.op.disk_state:
3906       new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state,
3907                                                 self.cluster.disk_state_static)
3908       self.new_disk_state = \
3909         dict((storage, dict((name, cluster.SimpleFillDiskState(values))
3910                             for name, values in svalues.items()))
3911              for storage, svalues in new_disk_state.items())
3912
3913     if self.op.ipolicy:
3914       self.new_ipolicy = _GetUpdatedIPolicy(cluster.ipolicy, self.op.ipolicy,
3915                                             group_policy=False)
3916
3917       all_instances = self.cfg.GetAllInstancesInfo().values()
3918       violations = set()
3919       for group in self.cfg.GetAllNodeGroupsInfo().values():
3920         instances = frozenset([inst for inst in all_instances
3921                                if compat.any(node in group.members
3922                                              for node in inst.all_nodes)])
3923         new_ipolicy = objects.FillIPolicy(self.new_ipolicy, group.ipolicy)
3924         new = _ComputeNewInstanceViolations(_CalculateGroupIPolicy(cluster,
3925                                                                    group),
3926                                             new_ipolicy, instances)
3927         if new:
3928           violations.update(new)
3929
3930       if violations:
3931         self.LogWarning("After the ipolicy change the following instances"
3932                         " violate them: %s",
3933                         utils.CommaJoin(violations))
3934
3935     if self.op.nicparams:
3936       utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
3937       self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
3938       objects.NIC.CheckParameterSyntax(self.new_nicparams)
3939       nic_errors = []
3940
3941       # check all instances for consistency
3942       for instance in self.cfg.GetAllInstancesInfo().values():
3943         for nic_idx, nic in enumerate(instance.nics):
3944           params_copy = copy.deepcopy(nic.nicparams)
3945           params_filled = objects.FillDict(self.new_nicparams, params_copy)
3946
3947           # check parameter syntax
3948           try:
3949             objects.NIC.CheckParameterSyntax(params_filled)
3950           except errors.ConfigurationError, err:
3951             nic_errors.append("Instance %s, nic/%d: %s" %
3952                               (instance.name, nic_idx, err))
3953
3954           # if we're moving instances to routed, check that they have an ip
3955           target_mode = params_filled[constants.NIC_MODE]
3956           if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
3957             nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
3958                               " address" % (instance.name, nic_idx))
3959       if nic_errors:
3960         raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
3961                                    "\n".join(nic_errors))
3962
3963     # hypervisor list/parameters
3964     self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
3965     if self.op.hvparams:
3966       for hv_name, hv_dict in self.op.hvparams.items():
3967         if hv_name not in self.new_hvparams:
3968           self.new_hvparams[hv_name] = hv_dict
3969         else:
3970           self.new_hvparams[hv_name].update(hv_dict)
3971
3972     # disk template parameters
3973     self.new_diskparams = objects.FillDict(cluster.diskparams, {})
3974     if self.op.diskparams:
3975       for dt_name, dt_params in self.op.diskparams.items():
3976         if dt_name not in self.op.diskparams:
3977           self.new_diskparams[dt_name] = dt_params
3978         else:
3979           self.new_diskparams[dt_name].update(dt_params)
3980
3981     # os hypervisor parameters
3982     self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
3983     if self.op.os_hvp:
3984       for os_name, hvs in self.op.os_hvp.items():
3985         if os_name not in self.new_os_hvp:
3986           self.new_os_hvp[os_name] = hvs
3987         else:
3988           for hv_name, hv_dict in hvs.items():
3989             if hv_name not in self.new_os_hvp[os_name]:
3990               self.new_os_hvp[os_name][hv_name] = hv_dict
3991             else:
3992               self.new_os_hvp[os_name][hv_name].update(hv_dict)
3993
3994     # os parameters
3995     self.new_osp = objects.FillDict(cluster.osparams, {})
3996     if self.op.osparams:
3997       for os_name, osp in self.op.osparams.items():
3998         if os_name not in self.new_osp:
3999           self.new_osp[os_name] = {}
4000
4001         self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
4002                                                   use_none=True)
4003
4004         if not self.new_osp[os_name]:
4005           # we removed all parameters
4006           del self.new_osp[os_name]
4007         else:
4008           # check the parameter validity (remote check)
4009           _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
4010                          os_name, self.new_osp[os_name])
4011
4012     # changes to the hypervisor list
4013     if self.op.enabled_hypervisors is not None:
4014       self.hv_list = self.op.enabled_hypervisors
4015       for hv in self.hv_list:
4016         # if the hypervisor doesn't already exist in the cluster
4017         # hvparams, we initialize it to empty, and then (in both
4018         # cases) we make sure to fill the defaults, as we might not
4019         # have a complete defaults list if the hypervisor wasn't
4020         # enabled before
4021         if hv not in new_hvp:
4022           new_hvp[hv] = {}
4023         new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
4024         utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
4025     else:
4026       self.hv_list = cluster.enabled_hypervisors
4027
4028     if self.op.hvparams or self.op.enabled_hypervisors is not None:
4029       # either the enabled list has changed, or the parameters have, validate
4030       for hv_name, hv_params in self.new_hvparams.items():
4031         if ((self.op.hvparams and hv_name in self.op.hvparams) or
4032             (self.op.enabled_hypervisors and
4033              hv_name in self.op.enabled_hypervisors)):
4034           # either this is a new hypervisor, or its parameters have changed
4035           hv_class = hypervisor.GetHypervisor(hv_name)
4036           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4037           hv_class.CheckParameterSyntax(hv_params)
4038           _CheckHVParams(self, node_list, hv_name, hv_params)
4039
4040     if self.op.os_hvp:
4041       # no need to check any newly-enabled hypervisors, since the
4042       # defaults have already been checked in the above code-block
4043       for os_name, os_hvp in self.new_os_hvp.items():
4044         for hv_name, hv_params in os_hvp.items():
4045           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4046           # we need to fill in the new os_hvp on top of the actual hv_p
4047           cluster_defaults = self.new_hvparams.get(hv_name, {})
4048           new_osp = objects.FillDict(cluster_defaults, hv_params)
4049           hv_class = hypervisor.GetHypervisor(hv_name)
4050           hv_class.CheckParameterSyntax(new_osp)
4051           _CheckHVParams(self, node_list, hv_name, new_osp)
4052
4053     if self.op.default_iallocator:
4054       alloc_script = utils.FindFile(self.op.default_iallocator,
4055                                     constants.IALLOCATOR_SEARCH_PATH,
4056                                     os.path.isfile)
4057       if alloc_script is None:
4058         raise errors.OpPrereqError("Invalid default iallocator script '%s'"
4059                                    " specified" % self.op.default_iallocator,
4060                                    errors.ECODE_INVAL)
4061
4062   def Exec(self, feedback_fn):
4063     """Change the parameters of the cluster.
4064
4065     """
4066     if self.op.vg_name is not None:
4067       new_volume = self.op.vg_name
4068       if not new_volume:
4069         new_volume = None
4070       if new_volume != self.cfg.GetVGName():
4071         self.cfg.SetVGName(new_volume)
4072       else:
4073         feedback_fn("Cluster LVM configuration already in desired"
4074                     " state, not changing")
4075     if self.op.drbd_helper is not None:
4076       new_helper = self.op.drbd_helper
4077       if not new_helper:
4078         new_helper = None
4079       if new_helper != self.cfg.GetDRBDHelper():
4080         self.cfg.SetDRBDHelper(new_helper)
4081       else:
4082         feedback_fn("Cluster DRBD helper already in desired state,"
4083                     " not changing")
4084     if self.op.hvparams:
4085       self.cluster.hvparams = self.new_hvparams
4086     if self.op.os_hvp:
4087       self.cluster.os_hvp = self.new_os_hvp
4088     if self.op.enabled_hypervisors is not None:
4089       self.cluster.hvparams = self.new_hvparams
4090       self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
4091     if self.op.beparams:
4092       self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
4093     if self.op.nicparams:
4094       self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
4095     if self.op.ipolicy:
4096       self.cluster.ipolicy = self.new_ipolicy
4097     if self.op.osparams:
4098       self.cluster.osparams = self.new_osp
4099     if self.op.ndparams:
4100       self.cluster.ndparams = self.new_ndparams
4101     if self.op.diskparams:
4102       self.cluster.diskparams = self.new_diskparams
4103     if self.op.hv_state:
4104       self.cluster.hv_state_static = self.new_hv_state
4105     if self.op.disk_state:
4106       self.cluster.disk_state_static = self.new_disk_state
4107
4108     if self.op.candidate_pool_size is not None:
4109       self.cluster.candidate_pool_size = self.op.candidate_pool_size
4110       # we need to update the pool size here, otherwise the save will fail
4111       _AdjustCandidatePool(self, [])
4112
4113     if self.op.maintain_node_health is not None:
4114       if self.op.maintain_node_health and not constants.ENABLE_CONFD:
4115         feedback_fn("Note: CONFD was disabled at build time, node health"
4116                     " maintenance is not useful (still enabling it)")
4117       self.cluster.maintain_node_health = self.op.maintain_node_health
4118
4119     if self.op.prealloc_wipe_disks is not None:
4120       self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
4121
4122     if self.op.add_uids is not None:
4123       uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
4124
4125     if self.op.remove_uids is not None:
4126       uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
4127
4128     if self.op.uid_pool is not None:
4129       self.cluster.uid_pool = self.op.uid_pool
4130
4131     if self.op.default_iallocator is not None:
4132       self.cluster.default_iallocator = self.op.default_iallocator
4133
4134     if self.op.reserved_lvs is not None:
4135       self.cluster.reserved_lvs = self.op.reserved_lvs
4136
4137     if self.op.use_external_mip_script is not None:
4138       self.cluster.use_external_mip_script = self.op.use_external_mip_script
4139
4140     def helper_os(aname, mods, desc):
4141       desc += " OS list"
4142       lst = getattr(self.cluster, aname)
4143       for key, val in mods:
4144         if key == constants.DDM_ADD:
4145           if val in lst:
4146             feedback_fn("OS %s already in %s, ignoring" % (val, desc))
4147           else:
4148             lst.append(val)
4149         elif key == constants.DDM_REMOVE:
4150           if val in lst:
4151             lst.remove(val)
4152           else:
4153             feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
4154         else:
4155           raise errors.ProgrammerError("Invalid modification '%s'" % key)
4156
4157     if self.op.hidden_os:
4158       helper_os("hidden_os", self.op.hidden_os, "hidden")
4159
4160     if self.op.blacklisted_os:
4161       helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
4162
4163     if self.op.master_netdev:
4164       master_params = self.cfg.GetMasterNetworkParameters()
4165       ems = self.cfg.GetUseExternalMipScript()
4166       feedback_fn("Shutting down master ip on the current netdev (%s)" %
4167                   self.cluster.master_netdev)
4168       result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4169                                                        master_params, ems)
4170       result.Raise("Could not disable the master ip")
4171       feedback_fn("Changing master_netdev from %s to %s" %
4172                   (master_params.netdev, self.op.master_netdev))
4173       self.cluster.master_netdev = self.op.master_netdev
4174
4175     if self.op.master_netmask:
4176       master_params = self.cfg.GetMasterNetworkParameters()
4177       feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
4178       result = self.rpc.call_node_change_master_netmask(master_params.name,
4179                                                         master_params.netmask,
4180                                                         self.op.master_netmask,
4181                                                         master_params.ip,
4182                                                         master_params.netdev)
4183       if result.fail_msg:
4184         msg = "Could not change the master IP netmask: %s" % result.fail_msg
4185         feedback_fn(msg)
4186
4187       self.cluster.master_netmask = self.op.master_netmask
4188
4189     self.cfg.Update(self.cluster, feedback_fn)
4190
4191     if self.op.master_netdev:
4192       master_params = self.cfg.GetMasterNetworkParameters()
4193       feedback_fn("Starting the master ip on the new master netdev (%s)" %
4194                   self.op.master_netdev)
4195       ems = self.cfg.GetUseExternalMipScript()
4196       result = self.rpc.call_node_activate_master_ip(master_params.name,
4197                                                      master_params, ems)
4198       if result.fail_msg:
4199         self.LogWarning("Could not re-enable the master ip on"
4200                         " the master, please restart manually: %s",
4201                         result.fail_msg)
4202
4203
4204 def _UploadHelper(lu, nodes, fname):
4205   """Helper for uploading a file and showing warnings.
4206
4207   """
4208   if os.path.exists(fname):
4209     result = lu.rpc.call_upload_file(nodes, fname)
4210     for to_node, to_result in result.items():
4211       msg = to_result.fail_msg
4212       if msg:
4213         msg = ("Copy of file %s to node %s failed: %s" %
4214                (fname, to_node, msg))
4215         lu.proc.LogWarning(msg)
4216
4217
4218 def _ComputeAncillaryFiles(cluster, redist):
4219   """Compute files external to Ganeti which need to be consistent.
4220
4221   @type redist: boolean
4222   @param redist: Whether to include files which need to be redistributed
4223
4224   """
4225   # Compute files for all nodes
4226   files_all = set([
4227     constants.SSH_KNOWN_HOSTS_FILE,
4228     constants.CONFD_HMAC_KEY,
4229     constants.CLUSTER_DOMAIN_SECRET_FILE,
4230     constants.SPICE_CERT_FILE,
4231     constants.SPICE_CACERT_FILE,
4232     constants.RAPI_USERS_FILE,
4233     ])
4234
4235   if not redist:
4236     files_all.update(constants.ALL_CERT_FILES)
4237     files_all.update(ssconf.SimpleStore().GetFileList())
4238   else:
4239     # we need to ship at least the RAPI certificate
4240     files_all.add(constants.RAPI_CERT_FILE)
4241
4242   if cluster.modify_etc_hosts:
4243     files_all.add(constants.ETC_HOSTS)
4244
4245   # Files which are optional, these must:
4246   # - be present in one other category as well
4247   # - either exist or not exist on all nodes of that category (mc, vm all)
4248   files_opt = set([
4249     constants.RAPI_USERS_FILE,
4250     ])
4251
4252   # Files which should only be on master candidates
4253   files_mc = set()
4254
4255   if not redist:
4256     files_mc.add(constants.CLUSTER_CONF_FILE)
4257
4258     # FIXME: this should also be replicated but Ganeti doesn't support files_mc
4259     # replication
4260     files_mc.add(constants.DEFAULT_MASTER_SETUP_SCRIPT)
4261
4262   # Files which should only be on VM-capable nodes
4263   files_vm = set(filename
4264     for hv_name in cluster.enabled_hypervisors
4265     for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[0])
4266
4267   files_opt |= set(filename
4268     for hv_name in cluster.enabled_hypervisors
4269     for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[1])
4270
4271   # Filenames in each category must be unique
4272   all_files_set = files_all | files_mc | files_vm
4273   assert (len(all_files_set) ==
4274           sum(map(len, [files_all, files_mc, files_vm]))), \
4275          "Found file listed in more than one file list"
4276
4277   # Optional files must be present in one other category
4278   assert all_files_set.issuperset(files_opt), \
4279          "Optional file not in a different required list"
4280
4281   return (files_all, files_opt, files_mc, files_vm)
4282
4283
4284 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
4285   """Distribute additional files which are part of the cluster configuration.
4286
4287   ConfigWriter takes care of distributing the config and ssconf files, but
4288   there are more files which should be distributed to all nodes. This function
4289   makes sure those are copied.
4290
4291   @param lu: calling logical unit
4292   @param additional_nodes: list of nodes not in the config to distribute to
4293   @type additional_vm: boolean
4294   @param additional_vm: whether the additional nodes are vm-capable or not
4295
4296   """
4297   # Gather target nodes
4298   cluster = lu.cfg.GetClusterInfo()
4299   master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
4300
4301   online_nodes = lu.cfg.GetOnlineNodeList()
4302   vm_nodes = lu.cfg.GetVmCapableNodeList()
4303
4304   if additional_nodes is not None:
4305     online_nodes.extend(additional_nodes)
4306     if additional_vm:
4307       vm_nodes.extend(additional_nodes)
4308
4309   # Never distribute to master node
4310   for nodelist in [online_nodes, vm_nodes]:
4311     if master_info.name in nodelist:
4312       nodelist.remove(master_info.name)
4313
4314   # Gather file lists
4315   (files_all, _, files_mc, files_vm) = \
4316     _ComputeAncillaryFiles(cluster, True)
4317
4318   # Never re-distribute configuration file from here
4319   assert not (constants.CLUSTER_CONF_FILE in files_all or
4320               constants.CLUSTER_CONF_FILE in files_vm)
4321   assert not files_mc, "Master candidates not handled in this function"
4322
4323   filemap = [
4324     (online_nodes, files_all),
4325     (vm_nodes, files_vm),
4326     ]
4327
4328   # Upload the files
4329   for (node_list, files) in filemap:
4330     for fname in files:
4331       _UploadHelper(lu, node_list, fname)
4332
4333
4334 class LUClusterRedistConf(NoHooksLU):
4335   """Force the redistribution of cluster configuration.
4336
4337   This is a very simple LU.
4338
4339   """
4340   REQ_BGL = False
4341
4342   def ExpandNames(self):
4343     self.needed_locks = {
4344       locking.LEVEL_NODE: locking.ALL_SET,
4345     }
4346     self.share_locks[locking.LEVEL_NODE] = 1
4347
4348   def Exec(self, feedback_fn):
4349     """Redistribute the configuration.
4350
4351     """
4352     self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
4353     _RedistributeAncillaryFiles(self)
4354
4355
4356 class LUClusterActivateMasterIp(NoHooksLU):
4357   """Activate the master IP on the master node.
4358
4359   """
4360   def Exec(self, feedback_fn):
4361     """Activate the master IP.
4362
4363     """
4364     master_params = self.cfg.GetMasterNetworkParameters()
4365     ems = self.cfg.GetUseExternalMipScript()
4366     result = self.rpc.call_node_activate_master_ip(master_params.name,
4367                                                    master_params, ems)
4368     result.Raise("Could not activate the master IP")
4369
4370
4371 class LUClusterDeactivateMasterIp(NoHooksLU):
4372   """Deactivate the master IP on the master node.
4373
4374   """
4375   def Exec(self, feedback_fn):
4376     """Deactivate the master IP.
4377
4378     """
4379     master_params = self.cfg.GetMasterNetworkParameters()
4380     ems = self.cfg.GetUseExternalMipScript()
4381     result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4382                                                      master_params, ems)
4383     result.Raise("Could not deactivate the master IP")
4384
4385
4386 def _WaitForSync(lu, instance, disks=None, oneshot=False):
4387   """Sleep and poll for an instance's disk to sync.
4388
4389   """
4390   if not instance.disks or disks is not None and not disks:
4391     return True
4392
4393   disks = _ExpandCheckDisks(instance, disks)
4394
4395   if not oneshot:
4396     lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
4397
4398   node = instance.primary_node
4399
4400   for dev in disks:
4401     lu.cfg.SetDiskID(dev, node)
4402
4403   # TODO: Convert to utils.Retry
4404
4405   retries = 0
4406   degr_retries = 10 # in seconds, as we sleep 1 second each time
4407   while True:
4408     max_time = 0
4409     done = True
4410     cumul_degraded = False
4411     rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
4412     msg = rstats.fail_msg
4413     if msg:
4414       lu.LogWarning("Can't get any data from node %s: %s", node, msg)
4415       retries += 1
4416       if retries >= 10:
4417         raise errors.RemoteError("Can't contact node %s for mirror data,"
4418                                  " aborting." % node)
4419       time.sleep(6)
4420       continue
4421     rstats = rstats.payload
4422     retries = 0
4423     for i, mstat in enumerate(rstats):
4424       if mstat is None:
4425         lu.LogWarning("Can't compute data for node %s/%s",
4426                            node, disks[i].iv_name)
4427         continue
4428
4429       cumul_degraded = (cumul_degraded or
4430                         (mstat.is_degraded and mstat.sync_percent is None))
4431       if mstat.sync_percent is not None:
4432         done = False
4433         if mstat.estimated_time is not None:
4434           rem_time = ("%s remaining (estimated)" %
4435                       utils.FormatSeconds(mstat.estimated_time))
4436           max_time = mstat.estimated_time
4437         else:
4438           rem_time = "no time estimate"
4439         lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
4440                         (disks[i].iv_name, mstat.sync_percent, rem_time))
4441
4442     # if we're done but degraded, let's do a few small retries, to
4443     # make sure we see a stable and not transient situation; therefore
4444     # we force restart of the loop
4445     if (done or oneshot) and cumul_degraded and degr_retries > 0:
4446       logging.info("Degraded disks found, %d retries left", degr_retries)
4447       degr_retries -= 1
4448       time.sleep(1)
4449       continue
4450
4451     if done or oneshot:
4452       break
4453
4454     time.sleep(min(60, max_time))
4455
4456   if done:
4457     lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
4458   return not cumul_degraded
4459
4460
4461 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
4462   """Check that mirrors are not degraded.
4463
4464   The ldisk parameter, if True, will change the test from the
4465   is_degraded attribute (which represents overall non-ok status for
4466   the device(s)) to the ldisk (representing the local storage status).
4467
4468   """
4469   lu.cfg.SetDiskID(dev, node)
4470
4471   result = True
4472
4473   if on_primary or dev.AssembleOnSecondary():
4474     rstats = lu.rpc.call_blockdev_find(node, dev)
4475     msg = rstats.fail_msg
4476     if msg:
4477       lu.LogWarning("Can't find disk on node %s: %s", node, msg)
4478       result = False
4479     elif not rstats.payload:
4480       lu.LogWarning("Can't find disk on node %s", node)
4481       result = False
4482     else:
4483       if ldisk:
4484         result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4485       else:
4486         result = result and not rstats.payload.is_degraded
4487
4488   if dev.children:
4489     for child in dev.children:
4490       result = result and _CheckDiskConsistency(lu, child, node, on_primary)
4491
4492   return result
4493
4494
4495 class LUOobCommand(NoHooksLU):
4496   """Logical unit for OOB handling.
4497
4498   """
4499   REG_BGL = False
4500   _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4501
4502   def ExpandNames(self):
4503     """Gather locks we need.
4504
4505     """
4506     if self.op.node_names:
4507       self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4508       lock_names = self.op.node_names
4509     else:
4510       lock_names = locking.ALL_SET
4511
4512     self.needed_locks = {
4513       locking.LEVEL_NODE: lock_names,
4514       }
4515
4516   def CheckPrereq(self):
4517     """Check prerequisites.
4518
4519     This checks:
4520      - the node exists in the configuration
4521      - OOB is supported
4522
4523     Any errors are signaled by raising errors.OpPrereqError.
4524
4525     """
4526     self.nodes = []
4527     self.master_node = self.cfg.GetMasterNode()
4528
4529     assert self.op.power_delay >= 0.0
4530
4531     if self.op.node_names:
4532       if (self.op.command in self._SKIP_MASTER and
4533           self.master_node in self.op.node_names):
4534         master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4535         master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4536
4537         if master_oob_handler:
4538           additional_text = ("run '%s %s %s' if you want to operate on the"
4539                              " master regardless") % (master_oob_handler,
4540                                                       self.op.command,
4541                                                       self.master_node)
4542         else:
4543           additional_text = "it does not support out-of-band operations"
4544
4545         raise errors.OpPrereqError(("Operating on the master node %s is not"
4546                                     " allowed for %s; %s") %
4547                                    (self.master_node, self.op.command,
4548                                     additional_text), errors.ECODE_INVAL)
4549     else:
4550       self.op.node_names = self.cfg.GetNodeList()
4551       if self.op.command in self._SKIP_MASTER:
4552         self.op.node_names.remove(self.master_node)
4553
4554     if self.op.command in self._SKIP_MASTER:
4555       assert self.master_node not in self.op.node_names
4556
4557     for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4558       if node is None:
4559         raise errors.OpPrereqError("Node %s not found" % node_name,
4560                                    errors.ECODE_NOENT)
4561       else:
4562         self.nodes.append(node)
4563
4564       if (not self.op.ignore_status and
4565           (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4566         raise errors.OpPrereqError(("Cannot power off node %s because it is"
4567                                     " not marked offline") % node_name,
4568                                    errors.ECODE_STATE)
4569
4570   def Exec(self, feedback_fn):
4571     """Execute OOB and return result if we expect any.
4572
4573     """
4574     master_node = self.master_node
4575     ret = []
4576
4577     for idx, node in enumerate(utils.NiceSort(self.nodes,
4578                                               key=lambda node: node.name)):
4579       node_entry = [(constants.RS_NORMAL, node.name)]
4580       ret.append(node_entry)
4581
4582       oob_program = _SupportsOob(self.cfg, node)
4583
4584       if not oob_program:
4585         node_entry.append((constants.RS_UNAVAIL, None))
4586         continue
4587
4588       logging.info("Executing out-of-band command '%s' using '%s' on %s",
4589                    self.op.command, oob_program, node.name)
4590       result = self.rpc.call_run_oob(master_node, oob_program,
4591                                      self.op.command, node.name,
4592                                      self.op.timeout)
4593
4594       if result.fail_msg:
4595         self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4596                         node.name, result.fail_msg)
4597         node_entry.append((constants.RS_NODATA, None))
4598       else:
4599         try:
4600           self._CheckPayload(result)
4601         except errors.OpExecError, err:
4602           self.LogWarning("Payload returned by node '%s' is not valid: %s",
4603                           node.name, err)
4604           node_entry.append((constants.RS_NODATA, None))
4605         else:
4606           if self.op.command == constants.OOB_HEALTH:
4607             # For health we should log important events
4608             for item, status in result.payload:
4609               if status in [constants.OOB_STATUS_WARNING,
4610                             constants.OOB_STATUS_CRITICAL]:
4611                 self.LogWarning("Item '%s' on node '%s' has status '%s'",
4612                                 item, node.name, status)
4613
4614           if self.op.command == constants.OOB_POWER_ON:
4615             node.powered = True
4616           elif self.op.command == constants.OOB_POWER_OFF:
4617             node.powered = False
4618           elif self.op.command == constants.OOB_POWER_STATUS:
4619             powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4620             if powered != node.powered:
4621               logging.warning(("Recorded power state (%s) of node '%s' does not"
4622                                " match actual power state (%s)"), node.powered,
4623                               node.name, powered)
4624
4625           # For configuration changing commands we should update the node
4626           if self.op.command in (constants.OOB_POWER_ON,
4627                                  constants.OOB_POWER_OFF):
4628             self.cfg.Update(node, feedback_fn)
4629
4630           node_entry.append((constants.RS_NORMAL, result.payload))
4631
4632           if (self.op.command == constants.OOB_POWER_ON and
4633               idx < len(self.nodes) - 1):
4634             time.sleep(self.op.power_delay)
4635
4636     return ret
4637
4638   def _CheckPayload(self, result):
4639     """Checks if the payload is valid.
4640
4641     @param result: RPC result
4642     @raises errors.OpExecError: If payload is not valid
4643
4644     """
4645     errs = []
4646     if self.op.command == constants.OOB_HEALTH:
4647       if not isinstance(result.payload, list):
4648         errs.append("command 'health' is expected to return a list but got %s" %
4649                     type(result.payload))
4650       else:
4651         for item, status in result.payload:
4652           if status not in constants.OOB_STATUSES:
4653             errs.append("health item '%s' has invalid status '%s'" %
4654                         (item, status))
4655
4656     if self.op.command == constants.OOB_POWER_STATUS:
4657       if not isinstance(result.payload, dict):
4658         errs.append("power-status is expected to return a dict but got %s" %
4659                     type(result.payload))
4660
4661     if self.op.command in [
4662         constants.OOB_POWER_ON,
4663         constants.OOB_POWER_OFF,
4664         constants.OOB_POWER_CYCLE,
4665         ]:
4666       if result.payload is not None:
4667         errs.append("%s is expected to not return payload but got '%s'" %
4668                     (self.op.command, result.payload))
4669
4670     if errs:
4671       raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4672                                utils.CommaJoin(errs))
4673
4674
4675 class _OsQuery(_QueryBase):
4676   FIELDS = query.OS_FIELDS
4677
4678   def ExpandNames(self, lu):
4679     # Lock all nodes in shared mode
4680     # Temporary removal of locks, should be reverted later
4681     # TODO: reintroduce locks when they are lighter-weight
4682     lu.needed_locks = {}
4683     #self.share_locks[locking.LEVEL_NODE] = 1
4684     #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4685
4686     # The following variables interact with _QueryBase._GetNames
4687     if self.names:
4688       self.wanted = self.names
4689     else:
4690       self.wanted = locking.ALL_SET
4691
4692     self.do_locking = self.use_locking
4693
4694   def DeclareLocks(self, lu, level):
4695     pass
4696
4697   @staticmethod
4698   def _DiagnoseByOS(rlist):
4699     """Remaps a per-node return list into an a per-os per-node dictionary
4700
4701     @param rlist: a map with node names as keys and OS objects as values
4702
4703     @rtype: dict
4704     @return: a dictionary with osnames as keys and as value another
4705         map, with nodes as keys and tuples of (path, status, diagnose,
4706         variants, parameters, api_versions) as values, eg::
4707
4708           {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4709                                      (/srv/..., False, "invalid api")],
4710                            "node2": [(/srv/..., True, "", [], [])]}
4711           }
4712
4713     """
4714     all_os = {}
4715     # we build here the list of nodes that didn't fail the RPC (at RPC
4716     # level), so that nodes with a non-responding node daemon don't
4717     # make all OSes invalid
4718     good_nodes = [node_name for node_name in rlist
4719                   if not rlist[node_name].fail_msg]
4720     for node_name, nr in rlist.items():
4721       if nr.fail_msg or not nr.payload:
4722         continue
4723       for (name, path, status, diagnose, variants,
4724            params, api_versions) in nr.payload:
4725         if name not in all_os:
4726           # build a list of nodes for this os containing empty lists
4727           # for each node in node_list
4728           all_os[name] = {}
4729           for nname in good_nodes:
4730             all_os[name][nname] = []
4731         # convert params from [name, help] to (name, help)
4732         params = [tuple(v) for v in params]
4733         all_os[name][node_name].append((path, status, diagnose,
4734                                         variants, params, api_versions))
4735     return all_os
4736
4737   def _GetQueryData(self, lu):
4738     """Computes the list of nodes and their attributes.
4739
4740     """
4741     # Locking is not used
4742     assert not (compat.any(lu.glm.is_owned(level)
4743                            for level in locking.LEVELS
4744                            if level != locking.LEVEL_CLUSTER) or
4745                 self.do_locking or self.use_locking)
4746
4747     valid_nodes = [node.name
4748                    for node in lu.cfg.GetAllNodesInfo().values()
4749                    if not node.offline and node.vm_capable]
4750     pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4751     cluster = lu.cfg.GetClusterInfo()
4752
4753     data = {}
4754
4755     for (os_name, os_data) in pol.items():
4756       info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4757                           hidden=(os_name in cluster.hidden_os),
4758                           blacklisted=(os_name in cluster.blacklisted_os))
4759
4760       variants = set()
4761       parameters = set()
4762       api_versions = set()
4763
4764       for idx, osl in enumerate(os_data.values()):
4765         info.valid = bool(info.valid and osl and osl[0][1])
4766         if not info.valid:
4767           break
4768
4769         (node_variants, node_params, node_api) = osl[0][3:6]
4770         if idx == 0:
4771           # First entry
4772           variants.update(node_variants)
4773           parameters.update(node_params)
4774           api_versions.update(node_api)
4775         else:
4776           # Filter out inconsistent values
4777           variants.intersection_update(node_variants)
4778           parameters.intersection_update(node_params)
4779           api_versions.intersection_update(node_api)
4780
4781       info.variants = list(variants)
4782       info.parameters = list(parameters)
4783       info.api_versions = list(api_versions)
4784
4785       data[os_name] = info
4786
4787     # Prepare data in requested order
4788     return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4789             if name in data]
4790
4791
4792 class LUOsDiagnose(NoHooksLU):
4793   """Logical unit for OS diagnose/query.
4794
4795   """
4796   REQ_BGL = False
4797
4798   @staticmethod
4799   def _BuildFilter(fields, names):
4800     """Builds a filter for querying OSes.
4801
4802     """
4803     name_filter = qlang.MakeSimpleFilter("name", names)
4804
4805     # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4806     # respective field is not requested
4807     status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4808                      for fname in ["hidden", "blacklisted"]
4809                      if fname not in fields]
4810     if "valid" not in fields:
4811       status_filter.append([qlang.OP_TRUE, "valid"])
4812
4813     if status_filter:
4814       status_filter.insert(0, qlang.OP_AND)
4815     else:
4816       status_filter = None
4817
4818     if name_filter and status_filter:
4819       return [qlang.OP_AND, name_filter, status_filter]
4820     elif name_filter:
4821       return name_filter
4822     else:
4823       return status_filter
4824
4825   def CheckArguments(self):
4826     self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4827                        self.op.output_fields, False)
4828
4829   def ExpandNames(self):
4830     self.oq.ExpandNames(self)
4831
4832   def Exec(self, feedback_fn):
4833     return self.oq.OldStyleQuery(self)
4834
4835
4836 class LUNodeRemove(LogicalUnit):
4837   """Logical unit for removing a node.
4838
4839   """
4840   HPATH = "node-remove"
4841   HTYPE = constants.HTYPE_NODE
4842
4843   def BuildHooksEnv(self):
4844     """Build hooks env.
4845
4846     This doesn't run on the target node in the pre phase as a failed
4847     node would then be impossible to remove.
4848
4849     """
4850     return {
4851       "OP_TARGET": self.op.node_name,
4852       "NODE_NAME": self.op.node_name,
4853       }
4854
4855   def BuildHooksNodes(self):
4856     """Build hooks nodes.
4857
4858     """
4859     all_nodes = self.cfg.GetNodeList()
4860     try:
4861       all_nodes.remove(self.op.node_name)
4862     except ValueError:
4863       logging.warning("Node '%s', which is about to be removed, was not found"
4864                       " in the list of all nodes", self.op.node_name)
4865     return (all_nodes, all_nodes)
4866
4867   def CheckPrereq(self):
4868     """Check prerequisites.
4869
4870     This checks:
4871      - the node exists in the configuration
4872      - it does not have primary or secondary instances
4873      - it's not the master
4874
4875     Any errors are signaled by raising errors.OpPrereqError.
4876
4877     """
4878     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4879     node = self.cfg.GetNodeInfo(self.op.node_name)
4880     assert node is not None
4881
4882     masternode = self.cfg.GetMasterNode()
4883     if node.name == masternode:
4884       raise errors.OpPrereqError("Node is the master node, failover to another"
4885                                  " node is required", errors.ECODE_INVAL)
4886
4887     for instance_name, instance in self.cfg.GetAllInstancesInfo().items():
4888       if node.name in instance.all_nodes:
4889         raise errors.OpPrereqError("Instance %s is still running on the node,"
4890                                    " please remove first" % instance_name,
4891                                    errors.ECODE_INVAL)
4892     self.op.node_name = node.name
4893     self.node = node
4894
4895   def Exec(self, feedback_fn):
4896     """Removes the node from the cluster.
4897
4898     """
4899     node = self.node
4900     logging.info("Stopping the node daemon and removing configs from node %s",
4901                  node.name)
4902
4903     modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
4904
4905     assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
4906       "Not owning BGL"
4907
4908     # Promote nodes to master candidate as needed
4909     _AdjustCandidatePool(self, exceptions=[node.name])
4910     self.context.RemoveNode(node.name)
4911
4912     # Run post hooks on the node before it's removed
4913     _RunPostHook(self, node.name)
4914
4915     result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
4916     msg = result.fail_msg
4917     if msg:
4918       self.LogWarning("Errors encountered on the remote node while leaving"
4919                       " the cluster: %s", msg)
4920
4921     # Remove node from our /etc/hosts
4922     if self.cfg.GetClusterInfo().modify_etc_hosts:
4923       master_node = self.cfg.GetMasterNode()
4924       result = self.rpc.call_etc_hosts_modify(master_node,
4925                                               constants.ETC_HOSTS_REMOVE,
4926                                               node.name, None)
4927       result.Raise("Can't update hosts file with new host data")
4928       _RedistributeAncillaryFiles(self)
4929
4930
4931 class _NodeQuery(_QueryBase):
4932   FIELDS = query.NODE_FIELDS
4933
4934   def ExpandNames(self, lu):
4935     lu.needed_locks = {}
4936     lu.share_locks = _ShareAll()
4937
4938     if self.names:
4939       self.wanted = _GetWantedNodes(lu, self.names)
4940     else:
4941       self.wanted = locking.ALL_SET
4942
4943     self.do_locking = (self.use_locking and
4944                        query.NQ_LIVE in self.requested_data)
4945
4946     if self.do_locking:
4947       # If any non-static field is requested we need to lock the nodes
4948       lu.needed_locks[locking.LEVEL_NODE] = self.wanted
4949
4950   def DeclareLocks(self, lu, level):
4951     pass
4952
4953   def _GetQueryData(self, lu):
4954     """Computes the list of nodes and their attributes.
4955
4956     """
4957     all_info = lu.cfg.GetAllNodesInfo()
4958
4959     nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
4960
4961     # Gather data as requested
4962     if query.NQ_LIVE in self.requested_data:
4963       # filter out non-vm_capable nodes
4964       toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
4965
4966       node_data = lu.rpc.call_node_info(toquery_nodes, [lu.cfg.GetVGName()],
4967                                         [lu.cfg.GetHypervisorType()])
4968       live_data = dict((name, _MakeLegacyNodeInfo(nresult.payload))
4969                        for (name, nresult) in node_data.items()
4970                        if not nresult.fail_msg and nresult.payload)
4971     else:
4972       live_data = None
4973
4974     if query.NQ_INST in self.requested_data:
4975       node_to_primary = dict([(name, set()) for name in nodenames])
4976       node_to_secondary = dict([(name, set()) for name in nodenames])
4977
4978       inst_data = lu.cfg.GetAllInstancesInfo()
4979
4980       for inst in inst_data.values():
4981         if inst.primary_node in node_to_primary:
4982           node_to_primary[inst.primary_node].add(inst.name)
4983         for secnode in inst.secondary_nodes:
4984           if secnode in node_to_secondary:
4985             node_to_secondary[secnode].add(inst.name)
4986     else:
4987       node_to_primary = None
4988       node_to_secondary = None
4989
4990     if query.NQ_OOB in self.requested_data:
4991       oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
4992                          for name, node in all_info.iteritems())
4993     else:
4994       oob_support = None
4995
4996     if query.NQ_GROUP in self.requested_data:
4997       groups = lu.cfg.GetAllNodeGroupsInfo()
4998     else:
4999       groups = {}
5000
5001     return query.NodeQueryData([all_info[name] for name in nodenames],
5002                                live_data, lu.cfg.GetMasterNode(),
5003                                node_to_primary, node_to_secondary, groups,
5004                                oob_support, lu.cfg.GetClusterInfo())
5005
5006
5007 class LUNodeQuery(NoHooksLU):
5008   """Logical unit for querying nodes.
5009
5010   """
5011   # pylint: disable=W0142
5012   REQ_BGL = False
5013
5014   def CheckArguments(self):
5015     self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
5016                          self.op.output_fields, self.op.use_locking)
5017
5018   def ExpandNames(self):
5019     self.nq.ExpandNames(self)
5020
5021   def DeclareLocks(self, level):
5022     self.nq.DeclareLocks(self, level)
5023
5024   def Exec(self, feedback_fn):
5025     return self.nq.OldStyleQuery(self)
5026
5027
5028 class LUNodeQueryvols(NoHooksLU):
5029   """Logical unit for getting volumes on node(s).
5030
5031   """
5032   REQ_BGL = False
5033   _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
5034   _FIELDS_STATIC = utils.FieldSet("node")
5035
5036   def CheckArguments(self):
5037     _CheckOutputFields(static=self._FIELDS_STATIC,
5038                        dynamic=self._FIELDS_DYNAMIC,
5039                        selected=self.op.output_fields)
5040
5041   def ExpandNames(self):
5042     self.share_locks = _ShareAll()
5043     self.needed_locks = {}
5044
5045     if not self.op.nodes:
5046       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5047     else:
5048       self.needed_locks[locking.LEVEL_NODE] = \
5049         _GetWantedNodes(self, self.op.nodes)
5050
5051   def Exec(self, feedback_fn):
5052     """Computes the list of nodes and their attributes.
5053
5054     """
5055     nodenames = self.owned_locks(locking.LEVEL_NODE)
5056     volumes = self.rpc.call_node_volumes(nodenames)
5057
5058     ilist = self.cfg.GetAllInstancesInfo()
5059     vol2inst = _MapInstanceDisksToNodes(ilist.values())
5060
5061     output = []
5062     for node in nodenames:
5063       nresult = volumes[node]
5064       if nresult.offline:
5065         continue
5066       msg = nresult.fail_msg
5067       if msg:
5068         self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
5069         continue
5070
5071       node_vols = sorted(nresult.payload,
5072                          key=operator.itemgetter("dev"))
5073
5074       for vol in node_vols:
5075         node_output = []
5076         for field in self.op.output_fields:
5077           if field == "node":
5078             val = node
5079           elif field == "phys":
5080             val = vol["dev"]
5081           elif field == "vg":
5082             val = vol["vg"]
5083           elif field == "name":
5084             val = vol["name"]
5085           elif field == "size":
5086             val = int(float(vol["size"]))
5087           elif field == "instance":
5088             val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
5089           else:
5090             raise errors.ParameterError(field)
5091           node_output.append(str(val))
5092
5093         output.append(node_output)
5094
5095     return output
5096
5097
5098 class LUNodeQueryStorage(NoHooksLU):
5099   """Logical unit for getting information on storage units on node(s).
5100
5101   """
5102   _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
5103   REQ_BGL = False
5104
5105   def CheckArguments(self):
5106     _CheckOutputFields(static=self._FIELDS_STATIC,
5107                        dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
5108                        selected=self.op.output_fields)
5109
5110   def ExpandNames(self):
5111     self.share_locks = _ShareAll()
5112     self.needed_locks = {}
5113
5114     if self.op.nodes:
5115       self.needed_locks[locking.LEVEL_NODE] = \
5116         _GetWantedNodes(self, self.op.nodes)
5117     else:
5118       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5119
5120   def Exec(self, feedback_fn):
5121     """Computes the list of nodes and their attributes.
5122
5123     """
5124     self.nodes = self.owned_locks(locking.LEVEL_NODE)
5125
5126     # Always get name to sort by
5127     if constants.SF_NAME in self.op.output_fields:
5128       fields = self.op.output_fields[:]
5129     else:
5130       fields = [constants.SF_NAME] + self.op.output_fields
5131
5132     # Never ask for node or type as it's only known to the LU
5133     for extra in [constants.SF_NODE, constants.SF_TYPE]:
5134       while extra in fields:
5135         fields.remove(extra)
5136
5137     field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
5138     name_idx = field_idx[constants.SF_NAME]
5139
5140     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5141     data = self.rpc.call_storage_list(self.nodes,
5142                                       self.op.storage_type, st_args,
5143                                       self.op.name, fields)
5144
5145     result = []
5146
5147     for node in utils.NiceSort(self.nodes):
5148       nresult = data[node]
5149       if nresult.offline:
5150         continue
5151
5152       msg = nresult.fail_msg
5153       if msg:
5154         self.LogWarning("Can't get storage data from node %s: %s", node, msg)
5155         continue
5156
5157       rows = dict([(row[name_idx], row) for row in nresult.payload])
5158
5159       for name in utils.NiceSort(rows.keys()):
5160         row = rows[name]
5161
5162         out = []
5163
5164         for field in self.op.output_fields:
5165           if field == constants.SF_NODE:
5166             val = node
5167           elif field == constants.SF_TYPE:
5168             val = self.op.storage_type
5169           elif field in field_idx:
5170             val = row[field_idx[field]]
5171           else:
5172             raise errors.ParameterError(field)
5173
5174           out.append(val)
5175
5176         result.append(out)
5177
5178     return result
5179
5180
5181 class _InstanceQuery(_QueryBase):
5182   FIELDS = query.INSTANCE_FIELDS
5183
5184   def ExpandNames(self, lu):
5185     lu.needed_locks = {}
5186     lu.share_locks = _ShareAll()
5187
5188     if self.names:
5189       self.wanted = _GetWantedInstances(lu, self.names)
5190     else:
5191       self.wanted = locking.ALL_SET
5192
5193     self.do_locking = (self.use_locking and
5194                        query.IQ_LIVE in self.requested_data)
5195     if self.do_locking:
5196       lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
5197       lu.needed_locks[locking.LEVEL_NODEGROUP] = []
5198       lu.needed_locks[locking.LEVEL_NODE] = []
5199       lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5200
5201     self.do_grouplocks = (self.do_locking and
5202                           query.IQ_NODES in self.requested_data)
5203
5204   def DeclareLocks(self, lu, level):
5205     if self.do_locking:
5206       if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
5207         assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
5208
5209         # Lock all groups used by instances optimistically; this requires going
5210         # via the node before it's locked, requiring verification later on
5211         lu.needed_locks[locking.LEVEL_NODEGROUP] = \
5212           set(group_uuid
5213               for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
5214               for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
5215       elif level == locking.LEVEL_NODE:
5216         lu._LockInstancesNodes() # pylint: disable=W0212
5217
5218   @staticmethod
5219   def _CheckGroupLocks(lu):
5220     owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
5221     owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
5222
5223     # Check if node groups for locked instances are still correct
5224     for instance_name in owned_instances:
5225       _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
5226
5227   def _GetQueryData(self, lu):
5228     """Computes the list of instances and their attributes.
5229
5230     """
5231     if self.do_grouplocks:
5232       self._CheckGroupLocks(lu)
5233
5234     cluster = lu.cfg.GetClusterInfo()
5235     all_info = lu.cfg.GetAllInstancesInfo()
5236
5237     instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
5238
5239     instance_list = [all_info[name] for name in instance_names]
5240     nodes = frozenset(itertools.chain(*(inst.all_nodes
5241                                         for inst in instance_list)))
5242     hv_list = list(set([inst.hypervisor for inst in instance_list]))
5243     bad_nodes = []
5244     offline_nodes = []
5245     wrongnode_inst = set()
5246
5247     # Gather data as requested
5248     if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
5249       live_data = {}
5250       node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
5251       for name in nodes:
5252         result = node_data[name]
5253         if result.offline:
5254           # offline nodes will be in both lists
5255           assert result.fail_msg
5256           offline_nodes.append(name)
5257         if result.fail_msg:
5258           bad_nodes.append(name)
5259         elif result.payload:
5260           for inst in result.payload:
5261             if inst in all_info:
5262               if all_info[inst].primary_node == name:
5263                 live_data.update(result.payload)
5264               else:
5265                 wrongnode_inst.add(inst)
5266             else:
5267               # orphan instance; we don't list it here as we don't
5268               # handle this case yet in the output of instance listing
5269               logging.warning("Orphan instance '%s' found on node %s",
5270                               inst, name)
5271         # else no instance is alive
5272     else:
5273       live_data = {}
5274
5275     if query.IQ_DISKUSAGE in self.requested_data:
5276       disk_usage = dict((inst.name,
5277                          _ComputeDiskSize(inst.disk_template,
5278                                           [{constants.IDISK_SIZE: disk.size}
5279                                            for disk in inst.disks]))
5280                         for inst in instance_list)
5281     else:
5282       disk_usage = None
5283
5284     if query.IQ_CONSOLE in self.requested_data:
5285       consinfo = {}
5286       for inst in instance_list:
5287         if inst.name in live_data:
5288           # Instance is running
5289           consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
5290         else:
5291           consinfo[inst.name] = None
5292       assert set(consinfo.keys()) == set(instance_names)
5293     else:
5294       consinfo = None
5295
5296     if query.IQ_NODES in self.requested_data:
5297       node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
5298                                             instance_list)))
5299       nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
5300       groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
5301                     for uuid in set(map(operator.attrgetter("group"),
5302                                         nodes.values())))
5303     else:
5304       nodes = None
5305       groups = None
5306
5307     return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
5308                                    disk_usage, offline_nodes, bad_nodes,
5309                                    live_data, wrongnode_inst, consinfo,
5310                                    nodes, groups)
5311
5312
5313 class LUQuery(NoHooksLU):
5314   """Query for resources/items of a certain kind.
5315
5316   """
5317   # pylint: disable=W0142
5318   REQ_BGL = False
5319
5320   def CheckArguments(self):
5321     qcls = _GetQueryImplementation(self.op.what)
5322
5323     self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
5324
5325   def ExpandNames(self):
5326     self.impl.ExpandNames(self)
5327
5328   def DeclareLocks(self, level):
5329     self.impl.DeclareLocks(self, level)
5330
5331   def Exec(self, feedback_fn):
5332     return self.impl.NewStyleQuery(self)
5333
5334
5335 class LUQueryFields(NoHooksLU):
5336   """Query for resources/items of a certain kind.
5337
5338   """
5339   # pylint: disable=W0142
5340   REQ_BGL = False
5341
5342   def CheckArguments(self):
5343     self.qcls = _GetQueryImplementation(self.op.what)
5344
5345   def ExpandNames(self):
5346     self.needed_locks = {}
5347
5348   def Exec(self, feedback_fn):
5349     return query.QueryFields(self.qcls.FIELDS, self.op.fields)
5350
5351
5352 class LUNodeModifyStorage(NoHooksLU):
5353   """Logical unit for modifying a storage volume on a node.
5354
5355   """
5356   REQ_BGL = False
5357
5358   def CheckArguments(self):
5359     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5360
5361     storage_type = self.op.storage_type
5362
5363     try:
5364       modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
5365     except KeyError:
5366       raise errors.OpPrereqError("Storage units of type '%s' can not be"
5367                                  " modified" % storage_type,
5368                                  errors.ECODE_INVAL)
5369
5370     diff = set(self.op.changes.keys()) - modifiable
5371     if diff:
5372       raise errors.OpPrereqError("The following fields can not be modified for"
5373                                  " storage units of type '%s': %r" %
5374                                  (storage_type, list(diff)),
5375                                  errors.ECODE_INVAL)
5376
5377   def ExpandNames(self):
5378     self.needed_locks = {
5379       locking.LEVEL_NODE: self.op.node_name,
5380       }
5381
5382   def Exec(self, feedback_fn):
5383     """Computes the list of nodes and their attributes.
5384
5385     """
5386     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5387     result = self.rpc.call_storage_modify(self.op.node_name,
5388                                           self.op.storage_type, st_args,
5389                                           self.op.name, self.op.changes)
5390     result.Raise("Failed to modify storage unit '%s' on %s" %
5391                  (self.op.name, self.op.node_name))
5392
5393
5394 class LUNodeAdd(LogicalUnit):
5395   """Logical unit for adding node to the cluster.
5396
5397   """
5398   HPATH = "node-add"
5399   HTYPE = constants.HTYPE_NODE
5400   _NFLAGS = ["master_capable", "vm_capable"]
5401
5402   def CheckArguments(self):
5403     self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
5404     # validate/normalize the node name
5405     self.hostname = netutils.GetHostname(name=self.op.node_name,
5406                                          family=self.primary_ip_family)
5407     self.op.node_name = self.hostname.name
5408
5409     if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
5410       raise errors.OpPrereqError("Cannot readd the master node",
5411                                  errors.ECODE_STATE)
5412
5413     if self.op.readd and self.op.group:
5414       raise errors.OpPrereqError("Cannot pass a node group when a node is"
5415                                  " being readded", errors.ECODE_INVAL)
5416
5417   def BuildHooksEnv(self):
5418     """Build hooks env.
5419
5420     This will run on all nodes before, and on all nodes + the new node after.
5421
5422     """
5423     return {
5424       "OP_TARGET": self.op.node_name,
5425       "NODE_NAME": self.op.node_name,
5426       "NODE_PIP": self.op.primary_ip,
5427       "NODE_SIP": self.op.secondary_ip,
5428       "MASTER_CAPABLE": str(self.op.master_capable),
5429       "VM_CAPABLE": str(self.op.vm_capable),
5430       }
5431
5432   def BuildHooksNodes(self):
5433     """Build hooks nodes.
5434
5435     """
5436     # Exclude added node
5437     pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
5438     post_nodes = pre_nodes + [self.op.node_name, ]
5439
5440     return (pre_nodes, post_nodes)
5441
5442   def CheckPrereq(self):
5443     """Check prerequisites.
5444
5445     This checks:
5446      - the new node is not already in the config
5447      - it is resolvable
5448      - its parameters (single/dual homed) matches the cluster
5449
5450     Any errors are signaled by raising errors.OpPrereqError.
5451
5452     """
5453     cfg = self.cfg
5454     hostname = self.hostname
5455     node = hostname.name
5456     primary_ip = self.op.primary_ip = hostname.ip
5457     if self.op.secondary_ip is None:
5458       if self.primary_ip_family == netutils.IP6Address.family:
5459         raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
5460                                    " IPv4 address must be given as secondary",
5461                                    errors.ECODE_INVAL)
5462       self.op.secondary_ip = primary_ip
5463
5464     secondary_ip = self.op.secondary_ip
5465     if not netutils.IP4Address.IsValid(secondary_ip):
5466       raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5467                                  " address" % secondary_ip, errors.ECODE_INVAL)
5468
5469     node_list = cfg.GetNodeList()
5470     if not self.op.readd and node in node_list:
5471       raise errors.OpPrereqError("Node %s is already in the configuration" %
5472                                  node, errors.ECODE_EXISTS)
5473     elif self.op.readd and node not in node_list:
5474       raise errors.OpPrereqError("Node %s is not in the configuration" % node,
5475                                  errors.ECODE_NOENT)
5476
5477     self.changed_primary_ip = False
5478
5479     for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
5480       if self.op.readd and node == existing_node_name:
5481         if existing_node.secondary_ip != secondary_ip:
5482           raise errors.OpPrereqError("Readded node doesn't have the same IP"
5483                                      " address configuration as before",
5484                                      errors.ECODE_INVAL)
5485         if existing_node.primary_ip != primary_ip:
5486           self.changed_primary_ip = True
5487
5488         continue
5489
5490       if (existing_node.primary_ip == primary_ip or
5491           existing_node.secondary_ip == primary_ip or
5492           existing_node.primary_ip == secondary_ip or
5493           existing_node.secondary_ip == secondary_ip):
5494         raise errors.OpPrereqError("New node ip address(es) conflict with"
5495                                    " existing node %s" % existing_node.name,
5496                                    errors.ECODE_NOTUNIQUE)
5497
5498     # After this 'if' block, None is no longer a valid value for the
5499     # _capable op attributes
5500     if self.op.readd:
5501       old_node = self.cfg.GetNodeInfo(node)
5502       assert old_node is not None, "Can't retrieve locked node %s" % node
5503       for attr in self._NFLAGS:
5504         if getattr(self.op, attr) is None:
5505           setattr(self.op, attr, getattr(old_node, attr))
5506     else:
5507       for attr in self._NFLAGS:
5508         if getattr(self.op, attr) is None:
5509           setattr(self.op, attr, True)
5510
5511     if self.op.readd and not self.op.vm_capable:
5512       pri, sec = cfg.GetNodeInstances(node)
5513       if pri or sec:
5514         raise errors.OpPrereqError("Node %s being re-added with vm_capable"
5515                                    " flag set to false, but it already holds"
5516                                    " instances" % node,
5517                                    errors.ECODE_STATE)
5518
5519     # check that the type of the node (single versus dual homed) is the
5520     # same as for the master
5521     myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
5522     master_singlehomed = myself.secondary_ip == myself.primary_ip
5523     newbie_singlehomed = secondary_ip == primary_ip
5524     if master_singlehomed != newbie_singlehomed:
5525       if master_singlehomed:
5526         raise errors.OpPrereqError("The master has no secondary ip but the"
5527                                    " new node has one",
5528                                    errors.ECODE_INVAL)
5529       else:
5530         raise errors.OpPrereqError("The master has a secondary ip but the"
5531                                    " new node doesn't have one",
5532                                    errors.ECODE_INVAL)
5533
5534     # checks reachability
5535     if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
5536       raise errors.OpPrereqError("Node not reachable by ping",
5537                                  errors.ECODE_ENVIRON)
5538
5539     if not newbie_singlehomed:
5540       # check reachability from my secondary ip to newbie's secondary ip
5541       if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
5542                            source=myself.secondary_ip):
5543         raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5544                                    " based ping to node daemon port",
5545                                    errors.ECODE_ENVIRON)
5546
5547     if self.op.readd:
5548       exceptions = [node]
5549     else:
5550       exceptions = []
5551
5552     if self.op.master_capable:
5553       self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5554     else:
5555       self.master_candidate = False
5556
5557     if self.op.readd:
5558       self.new_node = old_node
5559     else:
5560       node_group = cfg.LookupNodeGroup(self.op.group)
5561       self.new_node = objects.Node(name=node,
5562                                    primary_ip=primary_ip,
5563                                    secondary_ip=secondary_ip,
5564                                    master_candidate=self.master_candidate,
5565                                    offline=False, drained=False,
5566                                    group=node_group)
5567
5568     if self.op.ndparams:
5569       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5570
5571     if self.op.hv_state:
5572       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
5573
5574     if self.op.disk_state:
5575       self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
5576
5577   def Exec(self, feedback_fn):
5578     """Adds the new node to the cluster.
5579
5580     """
5581     new_node = self.new_node
5582     node = new_node.name
5583
5584     assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5585       "Not owning BGL"
5586
5587     # We adding a new node so we assume it's powered
5588     new_node.powered = True
5589
5590     # for re-adds, reset the offline/drained/master-candidate flags;
5591     # we need to reset here, otherwise offline would prevent RPC calls
5592     # later in the procedure; this also means that if the re-add
5593     # fails, we are left with a non-offlined, broken node
5594     if self.op.readd:
5595       new_node.drained = new_node.offline = False # pylint: disable=W0201
5596       self.LogInfo("Readding a node, the offline/drained flags were reset")
5597       # if we demote the node, we do cleanup later in the procedure
5598       new_node.master_candidate = self.master_candidate
5599       if self.changed_primary_ip:
5600         new_node.primary_ip = self.op.primary_ip
5601
5602     # copy the master/vm_capable flags
5603     for attr in self._NFLAGS:
5604       setattr(new_node, attr, getattr(self.op, attr))
5605
5606     # notify the user about any possible mc promotion
5607     if new_node.master_candidate:
5608       self.LogInfo("Node will be a master candidate")
5609
5610     if self.op.ndparams:
5611       new_node.ndparams = self.op.ndparams
5612     else:
5613       new_node.ndparams = {}
5614
5615     if self.op.hv_state:
5616       new_node.hv_state_static = self.new_hv_state
5617
5618     if self.op.disk_state:
5619       new_node.disk_state_static = self.new_disk_state
5620
5621     # check connectivity
5622     result = self.rpc.call_version([node])[node]
5623     result.Raise("Can't get version information from node %s" % node)
5624     if constants.PROTOCOL_VERSION == result.payload:
5625       logging.info("Communication to node %s fine, sw version %s match",
5626                    node, result.payload)
5627     else:
5628       raise errors.OpExecError("Version mismatch master version %s,"
5629                                " node version %s" %
5630                                (constants.PROTOCOL_VERSION, result.payload))
5631
5632     # Add node to our /etc/hosts, and add key to known_hosts
5633     if self.cfg.GetClusterInfo().modify_etc_hosts:
5634       master_node = self.cfg.GetMasterNode()
5635       result = self.rpc.call_etc_hosts_modify(master_node,
5636                                               constants.ETC_HOSTS_ADD,
5637                                               self.hostname.name,
5638                                               self.hostname.ip)
5639       result.Raise("Can't update hosts file with new host data")
5640
5641     if new_node.secondary_ip != new_node.primary_ip:
5642       _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5643                                False)
5644
5645     node_verify_list = [self.cfg.GetMasterNode()]
5646     node_verify_param = {
5647       constants.NV_NODELIST: ([node], {}),
5648       # TODO: do a node-net-test as well?
5649     }
5650
5651     result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5652                                        self.cfg.GetClusterName())
5653     for verifier in node_verify_list:
5654       result[verifier].Raise("Cannot communicate with node %s" % verifier)
5655       nl_payload = result[verifier].payload[constants.NV_NODELIST]
5656       if nl_payload:
5657         for failed in nl_payload:
5658           feedback_fn("ssh/hostname verification failed"
5659                       " (checking from %s): %s" %
5660                       (verifier, nl_payload[failed]))
5661         raise errors.OpExecError("ssh/hostname verification failed")
5662
5663     if self.op.readd:
5664       _RedistributeAncillaryFiles(self)
5665       self.context.ReaddNode(new_node)
5666       # make sure we redistribute the config
5667       self.cfg.Update(new_node, feedback_fn)
5668       # and make sure the new node will not have old files around
5669       if not new_node.master_candidate:
5670         result = self.rpc.call_node_demote_from_mc(new_node.name)
5671         msg = result.fail_msg
5672         if msg:
5673           self.LogWarning("Node failed to demote itself from master"
5674                           " candidate status: %s" % msg)
5675     else:
5676       _RedistributeAncillaryFiles(self, additional_nodes=[node],
5677                                   additional_vm=self.op.vm_capable)
5678       self.context.AddNode(new_node, self.proc.GetECId())
5679
5680
5681 class LUNodeSetParams(LogicalUnit):
5682   """Modifies the parameters of a node.
5683
5684   @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5685       to the node role (as _ROLE_*)
5686   @cvar _R2F: a dictionary from node role to tuples of flags
5687   @cvar _FLAGS: a list of attribute names corresponding to the flags
5688
5689   """
5690   HPATH = "node-modify"
5691   HTYPE = constants.HTYPE_NODE
5692   REQ_BGL = False
5693   (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5694   _F2R = {
5695     (True, False, False): _ROLE_CANDIDATE,
5696     (False, True, False): _ROLE_DRAINED,
5697     (False, False, True): _ROLE_OFFLINE,
5698     (False, False, False): _ROLE_REGULAR,
5699     }
5700   _R2F = dict((v, k) for k, v in _F2R.items())
5701   _FLAGS = ["master_candidate", "drained", "offline"]
5702
5703   def CheckArguments(self):
5704     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5705     all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5706                 self.op.master_capable, self.op.vm_capable,
5707                 self.op.secondary_ip, self.op.ndparams, self.op.hv_state,
5708                 self.op.disk_state]
5709     if all_mods.count(None) == len(all_mods):
5710       raise errors.OpPrereqError("Please pass at least one modification",
5711                                  errors.ECODE_INVAL)
5712     if all_mods.count(True) > 1:
5713       raise errors.OpPrereqError("Can't set the node into more than one"
5714                                  " state at the same time",
5715                                  errors.ECODE_INVAL)
5716
5717     # Boolean value that tells us whether we might be demoting from MC
5718     self.might_demote = (self.op.master_candidate == False or
5719                          self.op.offline == True or
5720                          self.op.drained == True or
5721                          self.op.master_capable == False)
5722
5723     if self.op.secondary_ip:
5724       if not netutils.IP4Address.IsValid(self.op.secondary_ip):
5725         raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5726                                    " address" % self.op.secondary_ip,
5727                                    errors.ECODE_INVAL)
5728
5729     self.lock_all = self.op.auto_promote and self.might_demote
5730     self.lock_instances = self.op.secondary_ip is not None
5731
5732   def _InstanceFilter(self, instance):
5733     """Filter for getting affected instances.
5734
5735     """
5736     return (instance.disk_template in constants.DTS_INT_MIRROR and
5737             self.op.node_name in instance.all_nodes)
5738
5739   def ExpandNames(self):
5740     if self.lock_all:
5741       self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
5742     else:
5743       self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
5744
5745     # Since modifying a node can have severe effects on currently running
5746     # operations the resource lock is at least acquired in shared mode
5747     self.needed_locks[locking.LEVEL_NODE_RES] = \
5748       self.needed_locks[locking.LEVEL_NODE]
5749
5750     # Get node resource and instance locks in shared mode; they are not used
5751     # for anything but read-only access
5752     self.share_locks[locking.LEVEL_NODE_RES] = 1
5753     self.share_locks[locking.LEVEL_INSTANCE] = 1
5754
5755     if self.lock_instances:
5756       self.needed_locks[locking.LEVEL_INSTANCE] = \
5757         frozenset(self.cfg.GetInstancesInfoByFilter(self._InstanceFilter))
5758
5759   def BuildHooksEnv(self):
5760     """Build hooks env.
5761
5762     This runs on the master node.
5763
5764     """
5765     return {
5766       "OP_TARGET": self.op.node_name,
5767       "MASTER_CANDIDATE": str(self.op.master_candidate),
5768       "OFFLINE": str(self.op.offline),
5769       "DRAINED": str(self.op.drained),
5770       "MASTER_CAPABLE": str(self.op.master_capable),
5771       "VM_CAPABLE": str(self.op.vm_capable),
5772       }
5773
5774   def BuildHooksNodes(self):
5775     """Build hooks nodes.
5776
5777     """
5778     nl = [self.cfg.GetMasterNode(), self.op.node_name]
5779     return (nl, nl)
5780
5781   def CheckPrereq(self):
5782     """Check prerequisites.
5783
5784     This only checks the instance list against the existing names.
5785
5786     """
5787     node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5788
5789     if self.lock_instances:
5790       affected_instances = \
5791         self.cfg.GetInstancesInfoByFilter(self._InstanceFilter)
5792
5793       # Verify instance locks
5794       owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
5795       wanted_instances = frozenset(affected_instances.keys())
5796       if wanted_instances - owned_instances:
5797         raise errors.OpPrereqError("Instances affected by changing node %s's"
5798                                    " secondary IP address have changed since"
5799                                    " locks were acquired, wanted '%s', have"
5800                                    " '%s'; retry the operation" %
5801                                    (self.op.node_name,
5802                                     utils.CommaJoin(wanted_instances),
5803                                     utils.CommaJoin(owned_instances)),
5804                                    errors.ECODE_STATE)
5805     else:
5806       affected_instances = None
5807
5808     if (self.op.master_candidate is not None or
5809         self.op.drained is not None or
5810         self.op.offline is not None):
5811       # we can't change the master's node flags
5812       if self.op.node_name == self.cfg.GetMasterNode():
5813         raise errors.OpPrereqError("The master role can be changed"
5814                                    " only via master-failover",
5815                                    errors.ECODE_INVAL)
5816
5817     if self.op.master_candidate and not node.master_capable:
5818       raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5819                                  " it a master candidate" % node.name,
5820                                  errors.ECODE_STATE)
5821
5822     if self.op.vm_capable == False:
5823       (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5824       if ipri or isec:
5825         raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5826                                    " the vm_capable flag" % node.name,
5827                                    errors.ECODE_STATE)
5828
5829     if node.master_candidate and self.might_demote and not self.lock_all:
5830       assert not self.op.auto_promote, "auto_promote set but lock_all not"
5831       # check if after removing the current node, we're missing master
5832       # candidates
5833       (mc_remaining, mc_should, _) = \
5834           self.cfg.GetMasterCandidateStats(exceptions=[node.name])
5835       if mc_remaining < mc_should:
5836         raise errors.OpPrereqError("Not enough master candidates, please"
5837                                    " pass auto promote option to allow"
5838                                    " promotion", errors.ECODE_STATE)
5839
5840     self.old_flags = old_flags = (node.master_candidate,
5841                                   node.drained, node.offline)
5842     assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
5843     self.old_role = old_role = self._F2R[old_flags]
5844
5845     # Check for ineffective changes
5846     for attr in self._FLAGS:
5847       if (getattr(self.op, attr) == False and getattr(node, attr) == False):
5848         self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
5849         setattr(self.op, attr, None)
5850
5851     # Past this point, any flag change to False means a transition
5852     # away from the respective state, as only real changes are kept
5853
5854     # TODO: We might query the real power state if it supports OOB
5855     if _SupportsOob(self.cfg, node):
5856       if self.op.offline is False and not (node.powered or
5857                                            self.op.powered == True):
5858         raise errors.OpPrereqError(("Node %s needs to be turned on before its"
5859                                     " offline status can be reset") %
5860                                    self.op.node_name)
5861     elif self.op.powered is not None:
5862       raise errors.OpPrereqError(("Unable to change powered state for node %s"
5863                                   " as it does not support out-of-band"
5864                                   " handling") % self.op.node_name)
5865
5866     # If we're being deofflined/drained, we'll MC ourself if needed
5867     if (self.op.drained == False or self.op.offline == False or
5868         (self.op.master_capable and not node.master_capable)):
5869       if _DecideSelfPromotion(self):
5870         self.op.master_candidate = True
5871         self.LogInfo("Auto-promoting node to master candidate")
5872
5873     # If we're no longer master capable, we'll demote ourselves from MC
5874     if self.op.master_capable == False and node.master_candidate:
5875       self.LogInfo("Demoting from master candidate")
5876       self.op.master_candidate = False
5877
5878     # Compute new role
5879     assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5880     if self.op.master_candidate:
5881       new_role = self._ROLE_CANDIDATE
5882     elif self.op.drained:
5883       new_role = self._ROLE_DRAINED
5884     elif self.op.offline:
5885       new_role = self._ROLE_OFFLINE
5886     elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5887       # False is still in new flags, which means we're un-setting (the
5888       # only) True flag
5889       new_role = self._ROLE_REGULAR
5890     else: # no new flags, nothing, keep old role
5891       new_role = old_role
5892
5893     self.new_role = new_role
5894
5895     if old_role == self._ROLE_OFFLINE and new_role != old_role:
5896       # Trying to transition out of offline status
5897       # TODO: Use standard RPC runner, but make sure it works when the node is
5898       # still marked offline
5899       result = rpc.BootstrapRunner().call_version([node.name])[node.name]
5900       if result.fail_msg:
5901         raise errors.OpPrereqError("Node %s is being de-offlined but fails"
5902                                    " to report its version: %s" %
5903                                    (node.name, result.fail_msg),
5904                                    errors.ECODE_STATE)
5905       else:
5906         self.LogWarning("Transitioning node from offline to online state"
5907                         " without using re-add. Please make sure the node"
5908                         " is healthy!")
5909
5910     if self.op.secondary_ip:
5911       # Ok even without locking, because this can't be changed by any LU
5912       master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
5913       master_singlehomed = master.secondary_ip == master.primary_ip
5914       if master_singlehomed and self.op.secondary_ip:
5915         raise errors.OpPrereqError("Cannot change the secondary ip on a single"
5916                                    " homed cluster", errors.ECODE_INVAL)
5917
5918       assert not (frozenset(affected_instances) -
5919                   self.owned_locks(locking.LEVEL_INSTANCE))
5920
5921       if node.offline:
5922         if affected_instances:
5923           raise errors.OpPrereqError("Cannot change secondary IP address:"
5924                                      " offline node has instances (%s)"
5925                                      " configured to use it" %
5926                                      utils.CommaJoin(affected_instances.keys()))
5927       else:
5928         # On online nodes, check that no instances are running, and that
5929         # the node has the new ip and we can reach it.
5930         for instance in affected_instances.values():
5931           _CheckInstanceState(self, instance, INSTANCE_DOWN,
5932                               msg="cannot change secondary ip")
5933
5934         _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
5935         if master.name != node.name:
5936           # check reachability from master secondary ip to new secondary ip
5937           if not netutils.TcpPing(self.op.secondary_ip,
5938                                   constants.DEFAULT_NODED_PORT,
5939                                   source=master.secondary_ip):
5940             raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5941                                        " based ping to node daemon port",
5942                                        errors.ECODE_ENVIRON)
5943
5944     if self.op.ndparams:
5945       new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
5946       utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
5947       self.new_ndparams = new_ndparams
5948
5949     if self.op.hv_state:
5950       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
5951                                                  self.node.hv_state_static)
5952
5953     if self.op.disk_state:
5954       self.new_disk_state = \
5955         _MergeAndVerifyDiskState(self.op.disk_state,
5956                                  self.node.disk_state_static)
5957
5958   def Exec(self, feedback_fn):
5959     """Modifies a node.
5960
5961     """
5962     node = self.node
5963     old_role = self.old_role
5964     new_role = self.new_role
5965
5966     result = []
5967
5968     if self.op.ndparams:
5969       node.ndparams = self.new_ndparams
5970
5971     if self.op.powered is not None:
5972       node.powered = self.op.powered
5973
5974     if self.op.hv_state:
5975       node.hv_state_static = self.new_hv_state
5976
5977     if self.op.disk_state:
5978       node.disk_state_static = self.new_disk_state
5979
5980     for attr in ["master_capable", "vm_capable"]:
5981       val = getattr(self.op, attr)
5982       if val is not None:
5983         setattr(node, attr, val)
5984         result.append((attr, str(val)))
5985
5986     if new_role != old_role:
5987       # Tell the node to demote itself, if no longer MC and not offline
5988       if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
5989         msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
5990         if msg:
5991           self.LogWarning("Node failed to demote itself: %s", msg)
5992
5993       new_flags = self._R2F[new_role]
5994       for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
5995         if of != nf:
5996           result.append((desc, str(nf)))
5997       (node.master_candidate, node.drained, node.offline) = new_flags
5998
5999       # we locked all nodes, we adjust the CP before updating this node
6000       if self.lock_all:
6001         _AdjustCandidatePool(self, [node.name])
6002
6003     if self.op.secondary_ip:
6004       node.secondary_ip = self.op.secondary_ip
6005       result.append(("secondary_ip", self.op.secondary_ip))
6006
6007     # this will trigger configuration file update, if needed
6008     self.cfg.Update(node, feedback_fn)
6009
6010     # this will trigger job queue propagation or cleanup if the mc
6011     # flag changed
6012     if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
6013       self.context.ReaddNode(node)
6014
6015     return result
6016
6017
6018 class LUNodePowercycle(NoHooksLU):
6019   """Powercycles a node.
6020
6021   """
6022   REQ_BGL = False
6023
6024   def CheckArguments(self):
6025     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6026     if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
6027       raise errors.OpPrereqError("The node is the master and the force"
6028                                  " parameter was not set",
6029                                  errors.ECODE_INVAL)
6030
6031   def ExpandNames(self):
6032     """Locking for PowercycleNode.
6033
6034     This is a last-resort option and shouldn't block on other
6035     jobs. Therefore, we grab no locks.
6036
6037     """
6038     self.needed_locks = {}
6039
6040   def Exec(self, feedback_fn):
6041     """Reboots a node.
6042
6043     """
6044     result = self.rpc.call_node_powercycle(self.op.node_name,
6045                                            self.cfg.GetHypervisorType())
6046     result.Raise("Failed to schedule the reboot")
6047     return result.payload
6048
6049
6050 class LUClusterQuery(NoHooksLU):
6051   """Query cluster configuration.
6052
6053   """
6054   REQ_BGL = False
6055
6056   def ExpandNames(self):
6057     self.needed_locks = {}
6058
6059   def Exec(self, feedback_fn):
6060     """Return cluster config.
6061
6062     """
6063     cluster = self.cfg.GetClusterInfo()
6064     os_hvp = {}
6065
6066     # Filter just for enabled hypervisors
6067     for os_name, hv_dict in cluster.os_hvp.items():
6068       os_hvp[os_name] = {}
6069       for hv_name, hv_params in hv_dict.items():
6070         if hv_name in cluster.enabled_hypervisors:
6071           os_hvp[os_name][hv_name] = hv_params
6072
6073     # Convert ip_family to ip_version
6074     primary_ip_version = constants.IP4_VERSION
6075     if cluster.primary_ip_family == netutils.IP6Address.family:
6076       primary_ip_version = constants.IP6_VERSION
6077
6078     result = {
6079       "software_version": constants.RELEASE_VERSION,
6080       "protocol_version": constants.PROTOCOL_VERSION,
6081       "config_version": constants.CONFIG_VERSION,
6082       "os_api_version": max(constants.OS_API_VERSIONS),
6083       "export_version": constants.EXPORT_VERSION,
6084       "architecture": (platform.architecture()[0], platform.machine()),
6085       "name": cluster.cluster_name,
6086       "master": cluster.master_node,
6087       "default_hypervisor": cluster.primary_hypervisor,
6088       "enabled_hypervisors": cluster.enabled_hypervisors,
6089       "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
6090                         for hypervisor_name in cluster.enabled_hypervisors]),
6091       "os_hvp": os_hvp,
6092       "beparams": cluster.beparams,
6093       "osparams": cluster.osparams,
6094       "ipolicy": cluster.ipolicy,
6095       "nicparams": cluster.nicparams,
6096       "ndparams": cluster.ndparams,
6097       "candidate_pool_size": cluster.candidate_pool_size,
6098       "master_netdev": cluster.master_netdev,
6099       "master_netmask": cluster.master_netmask,
6100       "use_external_mip_script": cluster.use_external_mip_script,
6101       "volume_group_name": cluster.volume_group_name,
6102       "drbd_usermode_helper": cluster.drbd_usermode_helper,
6103       "file_storage_dir": cluster.file_storage_dir,
6104       "shared_file_storage_dir": cluster.shared_file_storage_dir,
6105       "maintain_node_health": cluster.maintain_node_health,
6106       "ctime": cluster.ctime,
6107       "mtime": cluster.mtime,
6108       "uuid": cluster.uuid,
6109       "tags": list(cluster.GetTags()),
6110       "uid_pool": cluster.uid_pool,
6111       "default_iallocator": cluster.default_iallocator,
6112       "reserved_lvs": cluster.reserved_lvs,
6113       "primary_ip_version": primary_ip_version,
6114       "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
6115       "hidden_os": cluster.hidden_os,
6116       "blacklisted_os": cluster.blacklisted_os,
6117       }
6118
6119     return result
6120
6121
6122 class LUClusterConfigQuery(NoHooksLU):
6123   """Return configuration values.
6124
6125   """
6126   REQ_BGL = False
6127   _FIELDS_DYNAMIC = utils.FieldSet()
6128   _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
6129                                   "watcher_pause", "volume_group_name")
6130
6131   def CheckArguments(self):
6132     _CheckOutputFields(static=self._FIELDS_STATIC,
6133                        dynamic=self._FIELDS_DYNAMIC,
6134                        selected=self.op.output_fields)
6135
6136   def ExpandNames(self):
6137     self.needed_locks = {}
6138
6139   def Exec(self, feedback_fn):
6140     """Dump a representation of the cluster config to the standard output.
6141
6142     """
6143     values = []
6144     for field in self.op.output_fields:
6145       if field == "cluster_name":
6146         entry = self.cfg.GetClusterName()
6147       elif field == "master_node":
6148         entry = self.cfg.GetMasterNode()
6149       elif field == "drain_flag":
6150         entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
6151       elif field == "watcher_pause":
6152         entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
6153       elif field == "volume_group_name":
6154         entry = self.cfg.GetVGName()
6155       else:
6156         raise errors.ParameterError(field)
6157       values.append(entry)
6158     return values
6159
6160
6161 class LUInstanceActivateDisks(NoHooksLU):
6162   """Bring up an instance's disks.
6163
6164   """
6165   REQ_BGL = False
6166
6167   def ExpandNames(self):
6168     self._ExpandAndLockInstance()
6169     self.needed_locks[locking.LEVEL_NODE] = []
6170     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6171
6172   def DeclareLocks(self, level):
6173     if level == locking.LEVEL_NODE:
6174       self._LockInstancesNodes()
6175
6176   def CheckPrereq(self):
6177     """Check prerequisites.
6178
6179     This checks that the instance is in the cluster.
6180
6181     """
6182     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6183     assert self.instance is not None, \
6184       "Cannot retrieve locked instance %s" % self.op.instance_name
6185     _CheckNodeOnline(self, self.instance.primary_node)
6186
6187   def Exec(self, feedback_fn):
6188     """Activate the disks.
6189
6190     """
6191     disks_ok, disks_info = \
6192               _AssembleInstanceDisks(self, self.instance,
6193                                      ignore_size=self.op.ignore_size)
6194     if not disks_ok:
6195       raise errors.OpExecError("Cannot activate block devices")
6196
6197     return disks_info
6198
6199
6200 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
6201                            ignore_size=False):
6202   """Prepare the block devices for an instance.
6203
6204   This sets up the block devices on all nodes.
6205
6206   @type lu: L{LogicalUnit}
6207   @param lu: the logical unit on whose behalf we execute
6208   @type instance: L{objects.Instance}
6209   @param instance: the instance for whose disks we assemble
6210   @type disks: list of L{objects.Disk} or None
6211   @param disks: which disks to assemble (or all, if None)
6212   @type ignore_secondaries: boolean
6213   @param ignore_secondaries: if true, errors on secondary nodes
6214       won't result in an error return from the function
6215   @type ignore_size: boolean
6216   @param ignore_size: if true, the current known size of the disk
6217       will not be used during the disk activation, useful for cases
6218       when the size is wrong
6219   @return: False if the operation failed, otherwise a list of
6220       (host, instance_visible_name, node_visible_name)
6221       with the mapping from node devices to instance devices
6222
6223   """
6224   device_info = []
6225   disks_ok = True
6226   iname = instance.name
6227   disks = _ExpandCheckDisks(instance, disks)
6228
6229   # With the two passes mechanism we try to reduce the window of
6230   # opportunity for the race condition of switching DRBD to primary
6231   # before handshaking occured, but we do not eliminate it
6232
6233   # The proper fix would be to wait (with some limits) until the
6234   # connection has been made and drbd transitions from WFConnection
6235   # into any other network-connected state (Connected, SyncTarget,
6236   # SyncSource, etc.)
6237
6238   # 1st pass, assemble on all nodes in secondary mode
6239   for idx, inst_disk in enumerate(disks):
6240     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6241       if ignore_size:
6242         node_disk = node_disk.Copy()
6243         node_disk.UnsetSize()
6244       lu.cfg.SetDiskID(node_disk, node)
6245       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
6246       msg = result.fail_msg
6247       if msg:
6248         lu.proc.LogWarning("Could not prepare block device %s on node %s"
6249                            " (is_primary=False, pass=1): %s",
6250                            inst_disk.iv_name, node, msg)
6251         if not ignore_secondaries:
6252           disks_ok = False
6253
6254   # FIXME: race condition on drbd migration to primary
6255
6256   # 2nd pass, do only the primary node
6257   for idx, inst_disk in enumerate(disks):
6258     dev_path = None
6259
6260     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6261       if node != instance.primary_node:
6262         continue
6263       if ignore_size:
6264         node_disk = node_disk.Copy()
6265         node_disk.UnsetSize()
6266       lu.cfg.SetDiskID(node_disk, node)
6267       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
6268       msg = result.fail_msg
6269       if msg:
6270         lu.proc.LogWarning("Could not prepare block device %s on node %s"
6271                            " (is_primary=True, pass=2): %s",
6272                            inst_disk.iv_name, node, msg)
6273         disks_ok = False
6274       else:
6275         dev_path = result.payload
6276
6277     device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
6278
6279   # leave the disks configured for the primary node
6280   # this is a workaround that would be fixed better by
6281   # improving the logical/physical id handling
6282   for disk in disks:
6283     lu.cfg.SetDiskID(disk, instance.primary_node)
6284
6285   return disks_ok, device_info
6286
6287
6288 def _StartInstanceDisks(lu, instance, force):
6289   """Start the disks of an instance.
6290
6291   """
6292   disks_ok, _ = _AssembleInstanceDisks(lu, instance,
6293                                            ignore_secondaries=force)
6294   if not disks_ok:
6295     _ShutdownInstanceDisks(lu, instance)
6296     if force is not None and not force:
6297       lu.proc.LogWarning("", hint="If the message above refers to a"
6298                          " secondary node,"
6299                          " you can retry the operation using '--force'.")
6300     raise errors.OpExecError("Disk consistency error")
6301
6302
6303 class LUInstanceDeactivateDisks(NoHooksLU):
6304   """Shutdown an instance's disks.
6305
6306   """
6307   REQ_BGL = False
6308
6309   def ExpandNames(self):
6310     self._ExpandAndLockInstance()
6311     self.needed_locks[locking.LEVEL_NODE] = []
6312     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6313
6314   def DeclareLocks(self, level):
6315     if level == locking.LEVEL_NODE:
6316       self._LockInstancesNodes()
6317
6318   def CheckPrereq(self):
6319     """Check prerequisites.
6320
6321     This checks that the instance is in the cluster.
6322
6323     """
6324     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6325     assert self.instance is not None, \
6326       "Cannot retrieve locked instance %s" % self.op.instance_name
6327
6328   def Exec(self, feedback_fn):
6329     """Deactivate the disks
6330
6331     """
6332     instance = self.instance
6333     if self.op.force:
6334       _ShutdownInstanceDisks(self, instance)
6335     else:
6336       _SafeShutdownInstanceDisks(self, instance)
6337
6338
6339 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
6340   """Shutdown block devices of an instance.
6341
6342   This function checks if an instance is running, before calling
6343   _ShutdownInstanceDisks.
6344
6345   """
6346   _CheckInstanceState(lu, instance, INSTANCE_DOWN, msg="cannot shutdown disks")
6347   _ShutdownInstanceDisks(lu, instance, disks=disks)
6348
6349
6350 def _ExpandCheckDisks(instance, disks):
6351   """Return the instance disks selected by the disks list
6352
6353   @type disks: list of L{objects.Disk} or None
6354   @param disks: selected disks
6355   @rtype: list of L{objects.Disk}
6356   @return: selected instance disks to act on
6357
6358   """
6359   if disks is None:
6360     return instance.disks
6361   else:
6362     if not set(disks).issubset(instance.disks):
6363       raise errors.ProgrammerError("Can only act on disks belonging to the"
6364                                    " target instance")
6365     return disks
6366
6367
6368 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
6369   """Shutdown block devices of an instance.
6370
6371   This does the shutdown on all nodes of the instance.
6372
6373   If the ignore_primary is false, errors on the primary node are
6374   ignored.
6375
6376   """
6377   all_result = True
6378   disks = _ExpandCheckDisks(instance, disks)
6379
6380   for disk in disks:
6381     for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
6382       lu.cfg.SetDiskID(top_disk, node)
6383       result = lu.rpc.call_blockdev_shutdown(node, top_disk)
6384       msg = result.fail_msg
6385       if msg:
6386         lu.LogWarning("Could not shutdown block device %s on node %s: %s",
6387                       disk.iv_name, node, msg)
6388         if ((node == instance.primary_node and not ignore_primary) or
6389             (node != instance.primary_node and not result.offline)):
6390           all_result = False
6391   return all_result
6392
6393
6394 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
6395   """Checks if a node has enough free memory.
6396
6397   This function check if a given node has the needed amount of free
6398   memory. In case the node has less memory or we cannot get the
6399   information from the node, this function raise an OpPrereqError
6400   exception.
6401
6402   @type lu: C{LogicalUnit}
6403   @param lu: a logical unit from which we get configuration data
6404   @type node: C{str}
6405   @param node: the node to check
6406   @type reason: C{str}
6407   @param reason: string to use in the error message
6408   @type requested: C{int}
6409   @param requested: the amount of memory in MiB to check for
6410   @type hypervisor_name: C{str}
6411   @param hypervisor_name: the hypervisor to ask for memory stats
6412   @rtype: integer
6413   @return: node current free memory
6414   @raise errors.OpPrereqError: if the node doesn't have enough memory, or
6415       we cannot check the node
6416
6417   """
6418   nodeinfo = lu.rpc.call_node_info([node], None, [hypervisor_name])
6419   nodeinfo[node].Raise("Can't get data from node %s" % node,
6420                        prereq=True, ecode=errors.ECODE_ENVIRON)
6421   (_, _, (hv_info, )) = nodeinfo[node].payload
6422
6423   free_mem = hv_info.get("memory_free", None)
6424   if not isinstance(free_mem, int):
6425     raise errors.OpPrereqError("Can't compute free memory on node %s, result"
6426                                " was '%s'" % (node, free_mem),
6427                                errors.ECODE_ENVIRON)
6428   if requested > free_mem:
6429     raise errors.OpPrereqError("Not enough memory on node %s for %s:"
6430                                " needed %s MiB, available %s MiB" %
6431                                (node, reason, requested, free_mem),
6432                                errors.ECODE_NORES)
6433   return free_mem
6434
6435
6436 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
6437   """Checks if nodes have enough free disk space in the all VGs.
6438
6439   This function check if all given nodes have the needed amount of
6440   free disk. In case any node has less disk or we cannot get the
6441   information from the node, this function raise an OpPrereqError
6442   exception.
6443
6444   @type lu: C{LogicalUnit}
6445   @param lu: a logical unit from which we get configuration data
6446   @type nodenames: C{list}
6447   @param nodenames: the list of node names to check
6448   @type req_sizes: C{dict}
6449   @param req_sizes: the hash of vg and corresponding amount of disk in
6450       MiB to check for
6451   @raise errors.OpPrereqError: if the node doesn't have enough disk,
6452       or we cannot check the node
6453
6454   """
6455   for vg, req_size in req_sizes.items():
6456     _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
6457
6458
6459 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
6460   """Checks if nodes have enough free disk space in the specified VG.
6461
6462   This function check if all given nodes have the needed amount of
6463   free disk. In case any node has less disk or we cannot get the
6464   information from the node, this function raise an OpPrereqError
6465   exception.
6466
6467   @type lu: C{LogicalUnit}
6468   @param lu: a logical unit from which we get configuration data
6469   @type nodenames: C{list}
6470   @param nodenames: the list of node names to check
6471   @type vg: C{str}
6472   @param vg: the volume group to check
6473   @type requested: C{int}
6474   @param requested: the amount of disk in MiB to check for
6475   @raise errors.OpPrereqError: if the node doesn't have enough disk,
6476       or we cannot check the node
6477
6478   """
6479   nodeinfo = lu.rpc.call_node_info(nodenames, [vg], None)
6480   for node in nodenames:
6481     info = nodeinfo[node]
6482     info.Raise("Cannot get current information from node %s" % node,
6483                prereq=True, ecode=errors.ECODE_ENVIRON)
6484     (_, (vg_info, ), _) = info.payload
6485     vg_free = vg_info.get("vg_free", None)
6486     if not isinstance(vg_free, int):
6487       raise errors.OpPrereqError("Can't compute free disk space on node"
6488                                  " %s for vg %s, result was '%s'" %
6489                                  (node, vg, vg_free), errors.ECODE_ENVIRON)
6490     if requested > vg_free:
6491       raise errors.OpPrereqError("Not enough disk space on target node %s"
6492                                  " vg %s: required %d MiB, available %d MiB" %
6493                                  (node, vg, requested, vg_free),
6494                                  errors.ECODE_NORES)
6495
6496
6497 def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
6498   """Checks if nodes have enough physical CPUs
6499
6500   This function checks if all given nodes have the needed number of
6501   physical CPUs. In case any node has less CPUs or we cannot get the
6502   information from the node, this function raises an OpPrereqError
6503   exception.
6504
6505   @type lu: C{LogicalUnit}
6506   @param lu: a logical unit from which we get configuration data
6507   @type nodenames: C{list}
6508   @param nodenames: the list of node names to check
6509   @type requested: C{int}
6510   @param requested: the minimum acceptable number of physical CPUs
6511   @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
6512       or we cannot check the node
6513
6514   """
6515   nodeinfo = lu.rpc.call_node_info(nodenames, None, [hypervisor_name])
6516   for node in nodenames:
6517     info = nodeinfo[node]
6518     info.Raise("Cannot get current information from node %s" % node,
6519                prereq=True, ecode=errors.ECODE_ENVIRON)
6520     (_, _, (hv_info, )) = info.payload
6521     num_cpus = hv_info.get("cpu_total", None)
6522     if not isinstance(num_cpus, int):
6523       raise errors.OpPrereqError("Can't compute the number of physical CPUs"
6524                                  " on node %s, result was '%s'" %
6525                                  (node, num_cpus), errors.ECODE_ENVIRON)
6526     if requested > num_cpus:
6527       raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
6528                                  "required" % (node, num_cpus, requested),
6529                                  errors.ECODE_NORES)
6530
6531
6532 class LUInstanceStartup(LogicalUnit):
6533   """Starts an instance.
6534
6535   """
6536   HPATH = "instance-start"
6537   HTYPE = constants.HTYPE_INSTANCE
6538   REQ_BGL = False
6539
6540   def CheckArguments(self):
6541     # extra beparams
6542     if self.op.beparams:
6543       # fill the beparams dict
6544       objects.UpgradeBeParams(self.op.beparams)
6545       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6546
6547   def ExpandNames(self):
6548     self._ExpandAndLockInstance()
6549     self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
6550
6551   def DeclareLocks(self, level):
6552     if level == locking.LEVEL_NODE_RES:
6553       self._LockInstancesNodes(primary_only=True, level=locking.LEVEL_NODE_RES)
6554
6555   def BuildHooksEnv(self):
6556     """Build hooks env.
6557
6558     This runs on master, primary and secondary nodes of the instance.
6559
6560     """
6561     env = {
6562       "FORCE": self.op.force,
6563       }
6564
6565     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6566
6567     return env
6568
6569   def BuildHooksNodes(self):
6570     """Build hooks nodes.
6571
6572     """
6573     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6574     return (nl, nl)
6575
6576   def CheckPrereq(self):
6577     """Check prerequisites.
6578
6579     This checks that the instance is in the cluster.
6580
6581     """
6582     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6583     assert self.instance is not None, \
6584       "Cannot retrieve locked instance %s" % self.op.instance_name
6585
6586     # extra hvparams
6587     if self.op.hvparams:
6588       # check hypervisor parameter syntax (locally)
6589       cluster = self.cfg.GetClusterInfo()
6590       utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6591       filled_hvp = cluster.FillHV(instance)
6592       filled_hvp.update(self.op.hvparams)
6593       hv_type = hypervisor.GetHypervisor(instance.hypervisor)
6594       hv_type.CheckParameterSyntax(filled_hvp)
6595       _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
6596
6597     _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6598
6599     self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
6600
6601     if self.primary_offline and self.op.ignore_offline_nodes:
6602       self.proc.LogWarning("Ignoring offline primary node")
6603
6604       if self.op.hvparams or self.op.beparams:
6605         self.proc.LogWarning("Overridden parameters are ignored")
6606     else:
6607       _CheckNodeOnline(self, instance.primary_node)
6608
6609       bep = self.cfg.GetClusterInfo().FillBE(instance)
6610       bep.update(self.op.beparams)
6611
6612       # check bridges existence
6613       _CheckInstanceBridgesExist(self, instance)
6614
6615       remote_info = self.rpc.call_instance_info(instance.primary_node,
6616                                                 instance.name,
6617                                                 instance.hypervisor)
6618       remote_info.Raise("Error checking node %s" % instance.primary_node,
6619                         prereq=True, ecode=errors.ECODE_ENVIRON)
6620       if not remote_info.payload: # not running already
6621         _CheckNodeFreeMemory(self, instance.primary_node,
6622                              "starting instance %s" % instance.name,
6623                              bep[constants.BE_MINMEM], instance.hypervisor)
6624
6625   def Exec(self, feedback_fn):
6626     """Start the instance.
6627
6628     """
6629     instance = self.instance
6630     force = self.op.force
6631
6632     if not self.op.no_remember:
6633       self.cfg.MarkInstanceUp(instance.name)
6634
6635     if self.primary_offline:
6636       assert self.op.ignore_offline_nodes
6637       self.proc.LogInfo("Primary node offline, marked instance as started")
6638     else:
6639       node_current = instance.primary_node
6640
6641       _StartInstanceDisks(self, instance, force)
6642
6643       result = \
6644         self.rpc.call_instance_start(node_current,
6645                                      (instance, self.op.hvparams,
6646                                       self.op.beparams),
6647                                      self.op.startup_paused)
6648       msg = result.fail_msg
6649       if msg:
6650         _ShutdownInstanceDisks(self, instance)
6651         raise errors.OpExecError("Could not start instance: %s" % msg)
6652
6653
6654 class LUInstanceReboot(LogicalUnit):
6655   """Reboot an instance.
6656
6657   """
6658   HPATH = "instance-reboot"
6659   HTYPE = constants.HTYPE_INSTANCE
6660   REQ_BGL = False
6661
6662   def ExpandNames(self):
6663     self._ExpandAndLockInstance()
6664
6665   def BuildHooksEnv(self):
6666     """Build hooks env.
6667
6668     This runs on master, primary and secondary nodes of the instance.
6669
6670     """
6671     env = {
6672       "IGNORE_SECONDARIES": self.op.ignore_secondaries,
6673       "REBOOT_TYPE": self.op.reboot_type,
6674       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6675       }
6676
6677     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6678
6679     return env
6680
6681   def BuildHooksNodes(self):
6682     """Build hooks nodes.
6683
6684     """
6685     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6686     return (nl, nl)
6687
6688   def CheckPrereq(self):
6689     """Check prerequisites.
6690
6691     This checks that the instance is in the cluster.
6692
6693     """
6694     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6695     assert self.instance is not None, \
6696       "Cannot retrieve locked instance %s" % self.op.instance_name
6697     _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6698     _CheckNodeOnline(self, instance.primary_node)
6699
6700     # check bridges existence
6701     _CheckInstanceBridgesExist(self, instance)
6702
6703   def Exec(self, feedback_fn):
6704     """Reboot the instance.
6705
6706     """
6707     instance = self.instance
6708     ignore_secondaries = self.op.ignore_secondaries
6709     reboot_type = self.op.reboot_type
6710
6711     remote_info = self.rpc.call_instance_info(instance.primary_node,
6712                                               instance.name,
6713                                               instance.hypervisor)
6714     remote_info.Raise("Error checking node %s" % instance.primary_node)
6715     instance_running = bool(remote_info.payload)
6716
6717     node_current = instance.primary_node
6718
6719     if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
6720                                             constants.INSTANCE_REBOOT_HARD]:
6721       for disk in instance.disks:
6722         self.cfg.SetDiskID(disk, node_current)
6723       result = self.rpc.call_instance_reboot(node_current, instance,
6724                                              reboot_type,
6725                                              self.op.shutdown_timeout)
6726       result.Raise("Could not reboot instance")
6727     else:
6728       if instance_running:
6729         result = self.rpc.call_instance_shutdown(node_current, instance,
6730                                                  self.op.shutdown_timeout)
6731         result.Raise("Could not shutdown instance for full reboot")
6732         _ShutdownInstanceDisks(self, instance)
6733       else:
6734         self.LogInfo("Instance %s was already stopped, starting now",
6735                      instance.name)
6736       _StartInstanceDisks(self, instance, ignore_secondaries)
6737       result = self.rpc.call_instance_start(node_current,
6738                                             (instance, None, None), False)
6739       msg = result.fail_msg
6740       if msg:
6741         _ShutdownInstanceDisks(self, instance)
6742         raise errors.OpExecError("Could not start instance for"
6743                                  " full reboot: %s" % msg)
6744
6745     self.cfg.MarkInstanceUp(instance.name)
6746
6747
6748 class LUInstanceShutdown(LogicalUnit):
6749   """Shutdown an instance.
6750
6751   """
6752   HPATH = "instance-stop"
6753   HTYPE = constants.HTYPE_INSTANCE
6754   REQ_BGL = False
6755
6756   def ExpandNames(self):
6757     self._ExpandAndLockInstance()
6758
6759   def BuildHooksEnv(self):
6760     """Build hooks env.
6761
6762     This runs on master, primary and secondary nodes of the instance.
6763
6764     """
6765     env = _BuildInstanceHookEnvByObject(self, self.instance)
6766     env["TIMEOUT"] = self.op.timeout
6767     return env
6768
6769   def BuildHooksNodes(self):
6770     """Build hooks nodes.
6771
6772     """
6773     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6774     return (nl, nl)
6775
6776   def CheckPrereq(self):
6777     """Check prerequisites.
6778
6779     This checks that the instance is in the cluster.
6780
6781     """
6782     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6783     assert self.instance is not None, \
6784       "Cannot retrieve locked instance %s" % self.op.instance_name
6785
6786     _CheckInstanceState(self, self.instance, INSTANCE_ONLINE)
6787
6788     self.primary_offline = \
6789       self.cfg.GetNodeInfo(self.instance.primary_node).offline
6790
6791     if self.primary_offline and self.op.ignore_offline_nodes:
6792       self.proc.LogWarning("Ignoring offline primary node")
6793     else:
6794       _CheckNodeOnline(self, self.instance.primary_node)
6795
6796   def Exec(self, feedback_fn):
6797     """Shutdown the instance.
6798
6799     """
6800     instance = self.instance
6801     node_current = instance.primary_node
6802     timeout = self.op.timeout
6803
6804     if not self.op.no_remember:
6805       self.cfg.MarkInstanceDown(instance.name)
6806
6807     if self.primary_offline:
6808       assert self.op.ignore_offline_nodes
6809       self.proc.LogInfo("Primary node offline, marked instance as stopped")
6810     else:
6811       result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
6812       msg = result.fail_msg
6813       if msg:
6814         self.proc.LogWarning("Could not shutdown instance: %s" % msg)
6815
6816       _ShutdownInstanceDisks(self, instance)
6817
6818
6819 class LUInstanceReinstall(LogicalUnit):
6820   """Reinstall an instance.
6821
6822   """
6823   HPATH = "instance-reinstall"
6824   HTYPE = constants.HTYPE_INSTANCE
6825   REQ_BGL = False
6826
6827   def ExpandNames(self):
6828     self._ExpandAndLockInstance()
6829
6830   def BuildHooksEnv(self):
6831     """Build hooks env.
6832
6833     This runs on master, primary and secondary nodes of the instance.
6834
6835     """
6836     return _BuildInstanceHookEnvByObject(self, self.instance)
6837
6838   def BuildHooksNodes(self):
6839     """Build hooks nodes.
6840
6841     """
6842     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6843     return (nl, nl)
6844
6845   def CheckPrereq(self):
6846     """Check prerequisites.
6847
6848     This checks that the instance is in the cluster and is not running.
6849
6850     """
6851     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6852     assert instance is not None, \
6853       "Cannot retrieve locked instance %s" % self.op.instance_name
6854     _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
6855                      " offline, cannot reinstall")
6856     for node in instance.secondary_nodes:
6857       _CheckNodeOnline(self, node, "Instance secondary node offline,"
6858                        " cannot reinstall")
6859
6860     if instance.disk_template == constants.DT_DISKLESS:
6861       raise errors.OpPrereqError("Instance '%s' has no disks" %
6862                                  self.op.instance_name,
6863                                  errors.ECODE_INVAL)
6864     _CheckInstanceState(self, instance, INSTANCE_DOWN, msg="cannot reinstall")
6865
6866     if self.op.os_type is not None:
6867       # OS verification
6868       pnode = _ExpandNodeName(self.cfg, instance.primary_node)
6869       _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
6870       instance_os = self.op.os_type
6871     else:
6872       instance_os = instance.os
6873
6874     nodelist = list(instance.all_nodes)
6875
6876     if self.op.osparams:
6877       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
6878       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
6879       self.os_inst = i_osdict # the new dict (without defaults)
6880     else:
6881       self.os_inst = None
6882
6883     self.instance = instance
6884
6885   def Exec(self, feedback_fn):
6886     """Reinstall the instance.
6887
6888     """
6889     inst = self.instance
6890
6891     if self.op.os_type is not None:
6892       feedback_fn("Changing OS to '%s'..." % self.op.os_type)
6893       inst.os = self.op.os_type
6894       # Write to configuration
6895       self.cfg.Update(inst, feedback_fn)
6896
6897     _StartInstanceDisks(self, inst, None)
6898     try:
6899       feedback_fn("Running the instance OS create scripts...")
6900       # FIXME: pass debug option from opcode to backend
6901       result = self.rpc.call_instance_os_add(inst.primary_node,
6902                                              (inst, self.os_inst), True,
6903                                              self.op.debug_level)
6904       result.Raise("Could not install OS for instance %s on node %s" %
6905                    (inst.name, inst.primary_node))
6906     finally:
6907       _ShutdownInstanceDisks(self, inst)
6908
6909
6910 class LUInstanceRecreateDisks(LogicalUnit):
6911   """Recreate an instance's missing disks.
6912
6913   """
6914   HPATH = "instance-recreate-disks"
6915   HTYPE = constants.HTYPE_INSTANCE
6916   REQ_BGL = False
6917
6918   _MODIFYABLE = frozenset([
6919     constants.IDISK_SIZE,
6920     constants.IDISK_MODE,
6921     ])
6922
6923   # New or changed disk parameters may have different semantics
6924   assert constants.IDISK_PARAMS == (_MODIFYABLE | frozenset([
6925     constants.IDISK_ADOPT,
6926
6927     # TODO: Implement support changing VG while recreating
6928     constants.IDISK_VG,
6929     constants.IDISK_METAVG,
6930     ]))
6931
6932   def CheckArguments(self):
6933     if self.op.disks and ht.TPositiveInt(self.op.disks[0]):
6934       # Normalize and convert deprecated list of disk indices
6935       self.op.disks = [(idx, {}) for idx in sorted(frozenset(self.op.disks))]
6936
6937     duplicates = utils.FindDuplicates(map(compat.fst, self.op.disks))
6938     if duplicates:
6939       raise errors.OpPrereqError("Some disks have been specified more than"
6940                                  " once: %s" % utils.CommaJoin(duplicates),
6941                                  errors.ECODE_INVAL)
6942
6943     for (idx, params) in self.op.disks:
6944       utils.ForceDictType(params, constants.IDISK_PARAMS_TYPES)
6945       unsupported = frozenset(params.keys()) - self._MODIFYABLE
6946       if unsupported:
6947         raise errors.OpPrereqError("Parameters for disk %s try to change"
6948                                    " unmodifyable parameter(s): %s" %
6949                                    (idx, utils.CommaJoin(unsupported)),
6950                                    errors.ECODE_INVAL)
6951
6952   def ExpandNames(self):
6953     self._ExpandAndLockInstance()
6954     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6955     if self.op.nodes:
6956       self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
6957       self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
6958     else:
6959       self.needed_locks[locking.LEVEL_NODE] = []
6960     self.needed_locks[locking.LEVEL_NODE_RES] = []
6961
6962   def DeclareLocks(self, level):
6963     if level == locking.LEVEL_NODE:
6964       # if we replace the nodes, we only need to lock the old primary,
6965       # otherwise we need to lock all nodes for disk re-creation
6966       primary_only = bool(self.op.nodes)
6967       self._LockInstancesNodes(primary_only=primary_only)
6968     elif level == locking.LEVEL_NODE_RES:
6969       # Copy node locks
6970       self.needed_locks[locking.LEVEL_NODE_RES] = \
6971         self.needed_locks[locking.LEVEL_NODE][:]
6972
6973   def BuildHooksEnv(self):
6974     """Build hooks env.
6975
6976     This runs on master, primary and secondary nodes of the instance.
6977
6978     """
6979     return _BuildInstanceHookEnvByObject(self, self.instance)
6980
6981   def BuildHooksNodes(self):
6982     """Build hooks nodes.
6983
6984     """
6985     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6986     return (nl, nl)
6987
6988   def CheckPrereq(self):
6989     """Check prerequisites.
6990
6991     This checks that the instance is in the cluster and is not running.
6992
6993     """
6994     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6995     assert instance is not None, \
6996       "Cannot retrieve locked instance %s" % self.op.instance_name
6997     if self.op.nodes:
6998       if len(self.op.nodes) != len(instance.all_nodes):
6999         raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
7000                                    " %d replacement nodes were specified" %
7001                                    (instance.name, len(instance.all_nodes),
7002                                     len(self.op.nodes)),
7003                                    errors.ECODE_INVAL)
7004       assert instance.disk_template != constants.DT_DRBD8 or \
7005           len(self.op.nodes) == 2
7006       assert instance.disk_template != constants.DT_PLAIN or \
7007           len(self.op.nodes) == 1
7008       primary_node = self.op.nodes[0]
7009     else:
7010       primary_node = instance.primary_node
7011     _CheckNodeOnline(self, primary_node)
7012
7013     if instance.disk_template == constants.DT_DISKLESS:
7014       raise errors.OpPrereqError("Instance '%s' has no disks" %
7015                                  self.op.instance_name, errors.ECODE_INVAL)
7016
7017     # if we replace nodes *and* the old primary is offline, we don't
7018     # check
7019     assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE)
7020     assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE_RES)
7021     old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
7022     if not (self.op.nodes and old_pnode.offline):
7023       _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7024                           msg="cannot recreate disks")
7025
7026     if self.op.disks:
7027       self.disks = dict(self.op.disks)
7028     else:
7029       self.disks = dict((idx, {}) for idx in range(len(instance.disks)))
7030
7031     maxidx = max(self.disks.keys())
7032     if maxidx >= len(instance.disks):
7033       raise errors.OpPrereqError("Invalid disk index '%s'" % maxidx,
7034                                  errors.ECODE_INVAL)
7035
7036     if (self.op.nodes and
7037         sorted(self.disks.keys()) != range(len(instance.disks))):
7038       raise errors.OpPrereqError("Can't recreate disks partially and"
7039                                  " change the nodes at the same time",
7040                                  errors.ECODE_INVAL)
7041
7042     self.instance = instance
7043
7044   def Exec(self, feedback_fn):
7045     """Recreate the disks.
7046
7047     """
7048     instance = self.instance
7049
7050     assert (self.owned_locks(locking.LEVEL_NODE) ==
7051             self.owned_locks(locking.LEVEL_NODE_RES))
7052
7053     to_skip = []
7054     mods = [] # keeps track of needed changes
7055
7056     for idx, disk in enumerate(instance.disks):
7057       try:
7058         changes = self.disks[idx]
7059       except KeyError:
7060         # Disk should not be recreated
7061         to_skip.append(idx)
7062         continue
7063
7064       # update secondaries for disks, if needed
7065       if self.op.nodes and disk.dev_type == constants.LD_DRBD8:
7066         # need to update the nodes and minors
7067         assert len(self.op.nodes) == 2
7068         assert len(disk.logical_id) == 6 # otherwise disk internals
7069                                          # have changed
7070         (_, _, old_port, _, _, old_secret) = disk.logical_id
7071         new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
7072         new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
7073                   new_minors[0], new_minors[1], old_secret)
7074         assert len(disk.logical_id) == len(new_id)
7075       else:
7076         new_id = None
7077
7078       mods.append((idx, new_id, changes))
7079
7080     # now that we have passed all asserts above, we can apply the mods
7081     # in a single run (to avoid partial changes)
7082     for idx, new_id, changes in mods:
7083       disk = instance.disks[idx]
7084       if new_id is not None:
7085         assert disk.dev_type == constants.LD_DRBD8
7086         disk.logical_id = new_id
7087       if changes:
7088         disk.Update(size=changes.get(constants.IDISK_SIZE, None),
7089                     mode=changes.get(constants.IDISK_MODE, None))
7090
7091     # change primary node, if needed
7092     if self.op.nodes:
7093       instance.primary_node = self.op.nodes[0]
7094       self.LogWarning("Changing the instance's nodes, you will have to"
7095                       " remove any disks left on the older nodes manually")
7096
7097     if self.op.nodes:
7098       self.cfg.Update(instance, feedback_fn)
7099
7100     _CreateDisks(self, instance, to_skip=to_skip)
7101
7102
7103 class LUInstanceRename(LogicalUnit):
7104   """Rename an instance.
7105
7106   """
7107   HPATH = "instance-rename"
7108   HTYPE = constants.HTYPE_INSTANCE
7109
7110   def CheckArguments(self):
7111     """Check arguments.
7112
7113     """
7114     if self.op.ip_check and not self.op.name_check:
7115       # TODO: make the ip check more flexible and not depend on the name check
7116       raise errors.OpPrereqError("IP address check requires a name check",
7117                                  errors.ECODE_INVAL)
7118
7119   def BuildHooksEnv(self):
7120     """Build hooks env.
7121
7122     This runs on master, primary and secondary nodes of the instance.
7123
7124     """
7125     env = _BuildInstanceHookEnvByObject(self, self.instance)
7126     env["INSTANCE_NEW_NAME"] = self.op.new_name
7127     return env
7128
7129   def BuildHooksNodes(self):
7130     """Build hooks nodes.
7131
7132     """
7133     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7134     return (nl, nl)
7135
7136   def CheckPrereq(self):
7137     """Check prerequisites.
7138
7139     This checks that the instance is in the cluster and is not running.
7140
7141     """
7142     self.op.instance_name = _ExpandInstanceName(self.cfg,
7143                                                 self.op.instance_name)
7144     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7145     assert instance is not None
7146     _CheckNodeOnline(self, instance.primary_node)
7147     _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7148                         msg="cannot rename")
7149     self.instance = instance
7150
7151     new_name = self.op.new_name
7152     if self.op.name_check:
7153       hostname = netutils.GetHostname(name=new_name)
7154       if hostname.name != new_name:
7155         self.LogInfo("Resolved given name '%s' to '%s'", new_name,
7156                      hostname.name)
7157       if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
7158         raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
7159                                     " same as given hostname '%s'") %
7160                                     (hostname.name, self.op.new_name),
7161                                     errors.ECODE_INVAL)
7162       new_name = self.op.new_name = hostname.name
7163       if (self.op.ip_check and
7164           netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
7165         raise errors.OpPrereqError("IP %s of instance %s already in use" %
7166                                    (hostname.ip, new_name),
7167                                    errors.ECODE_NOTUNIQUE)
7168
7169     instance_list = self.cfg.GetInstanceList()
7170     if new_name in instance_list and new_name != instance.name:
7171       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7172                                  new_name, errors.ECODE_EXISTS)
7173
7174   def Exec(self, feedback_fn):
7175     """Rename the instance.
7176
7177     """
7178     inst = self.instance
7179     old_name = inst.name
7180
7181     rename_file_storage = False
7182     if (inst.disk_template in constants.DTS_FILEBASED and
7183         self.op.new_name != inst.name):
7184       old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7185       rename_file_storage = True
7186
7187     self.cfg.RenameInstance(inst.name, self.op.new_name)
7188     # Change the instance lock. This is definitely safe while we hold the BGL.
7189     # Otherwise the new lock would have to be added in acquired mode.
7190     assert self.REQ_BGL
7191     self.glm.remove(locking.LEVEL_INSTANCE, old_name)
7192     self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
7193
7194     # re-read the instance from the configuration after rename
7195     inst = self.cfg.GetInstanceInfo(self.op.new_name)
7196
7197     if rename_file_storage:
7198       new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7199       result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
7200                                                      old_file_storage_dir,
7201                                                      new_file_storage_dir)
7202       result.Raise("Could not rename on node %s directory '%s' to '%s'"
7203                    " (but the instance has been renamed in Ganeti)" %
7204                    (inst.primary_node, old_file_storage_dir,
7205                     new_file_storage_dir))
7206
7207     _StartInstanceDisks(self, inst, None)
7208     try:
7209       result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
7210                                                  old_name, self.op.debug_level)
7211       msg = result.fail_msg
7212       if msg:
7213         msg = ("Could not run OS rename script for instance %s on node %s"
7214                " (but the instance has been renamed in Ganeti): %s" %
7215                (inst.name, inst.primary_node, msg))
7216         self.proc.LogWarning(msg)
7217     finally:
7218       _ShutdownInstanceDisks(self, inst)
7219
7220     return inst.name
7221
7222
7223 class LUInstanceRemove(LogicalUnit):
7224   """Remove an instance.
7225
7226   """
7227   HPATH = "instance-remove"
7228   HTYPE = constants.HTYPE_INSTANCE
7229   REQ_BGL = False
7230
7231   def ExpandNames(self):
7232     self._ExpandAndLockInstance()
7233     self.needed_locks[locking.LEVEL_NODE] = []
7234     self.needed_locks[locking.LEVEL_NODE_RES] = []
7235     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7236
7237   def DeclareLocks(self, level):
7238     if level == locking.LEVEL_NODE:
7239       self._LockInstancesNodes()
7240     elif level == locking.LEVEL_NODE_RES:
7241       # Copy node locks
7242       self.needed_locks[locking.LEVEL_NODE_RES] = \
7243         self.needed_locks[locking.LEVEL_NODE][:]
7244
7245   def BuildHooksEnv(self):
7246     """Build hooks env.
7247
7248     This runs on master, primary and secondary nodes of the instance.
7249
7250     """
7251     env = _BuildInstanceHookEnvByObject(self, self.instance)
7252     env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
7253     return env
7254
7255   def BuildHooksNodes(self):
7256     """Build hooks nodes.
7257
7258     """
7259     nl = [self.cfg.GetMasterNode()]
7260     nl_post = list(self.instance.all_nodes) + nl
7261     return (nl, nl_post)
7262
7263   def CheckPrereq(self):
7264     """Check prerequisites.
7265
7266     This checks that the instance is in the cluster.
7267
7268     """
7269     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7270     assert self.instance is not None, \
7271       "Cannot retrieve locked instance %s" % self.op.instance_name
7272
7273   def Exec(self, feedback_fn):
7274     """Remove the instance.
7275
7276     """
7277     instance = self.instance
7278     logging.info("Shutting down instance %s on node %s",
7279                  instance.name, instance.primary_node)
7280
7281     result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
7282                                              self.op.shutdown_timeout)
7283     msg = result.fail_msg
7284     if msg:
7285       if self.op.ignore_failures:
7286         feedback_fn("Warning: can't shutdown instance: %s" % msg)
7287       else:
7288         raise errors.OpExecError("Could not shutdown instance %s on"
7289                                  " node %s: %s" %
7290                                  (instance.name, instance.primary_node, msg))
7291
7292     assert (self.owned_locks(locking.LEVEL_NODE) ==
7293             self.owned_locks(locking.LEVEL_NODE_RES))
7294     assert not (set(instance.all_nodes) -
7295                 self.owned_locks(locking.LEVEL_NODE)), \
7296       "Not owning correct locks"
7297
7298     _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
7299
7300
7301 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
7302   """Utility function to remove an instance.
7303
7304   """
7305   logging.info("Removing block devices for instance %s", instance.name)
7306
7307   if not _RemoveDisks(lu, instance):
7308     if not ignore_failures:
7309       raise errors.OpExecError("Can't remove instance's disks")
7310     feedback_fn("Warning: can't remove instance's disks")
7311
7312   logging.info("Removing instance %s out of cluster config", instance.name)
7313
7314   lu.cfg.RemoveInstance(instance.name)
7315
7316   assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
7317     "Instance lock removal conflict"
7318
7319   # Remove lock for the instance
7320   lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
7321
7322
7323 class LUInstanceQuery(NoHooksLU):
7324   """Logical unit for querying instances.
7325
7326   """
7327   # pylint: disable=W0142
7328   REQ_BGL = False
7329
7330   def CheckArguments(self):
7331     self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
7332                              self.op.output_fields, self.op.use_locking)
7333
7334   def ExpandNames(self):
7335     self.iq.ExpandNames(self)
7336
7337   def DeclareLocks(self, level):
7338     self.iq.DeclareLocks(self, level)
7339
7340   def Exec(self, feedback_fn):
7341     return self.iq.OldStyleQuery(self)
7342
7343
7344 class LUInstanceFailover(LogicalUnit):
7345   """Failover an instance.
7346
7347   """
7348   HPATH = "instance-failover"
7349   HTYPE = constants.HTYPE_INSTANCE
7350   REQ_BGL = False
7351
7352   def CheckArguments(self):
7353     """Check the arguments.
7354
7355     """
7356     self.iallocator = getattr(self.op, "iallocator", None)
7357     self.target_node = getattr(self.op, "target_node", None)
7358
7359   def ExpandNames(self):
7360     self._ExpandAndLockInstance()
7361
7362     if self.op.target_node is not None:
7363       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7364
7365     self.needed_locks[locking.LEVEL_NODE] = []
7366     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7367
7368     self.needed_locks[locking.LEVEL_NODE_RES] = []
7369     self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
7370
7371     ignore_consistency = self.op.ignore_consistency
7372     shutdown_timeout = self.op.shutdown_timeout
7373     self._migrater = TLMigrateInstance(self, self.op.instance_name,
7374                                        cleanup=False,
7375                                        failover=True,
7376                                        ignore_consistency=ignore_consistency,
7377                                        shutdown_timeout=shutdown_timeout,
7378                                        ignore_ipolicy=self.op.ignore_ipolicy)
7379     self.tasklets = [self._migrater]
7380
7381   def DeclareLocks(self, level):
7382     if level == locking.LEVEL_NODE:
7383       instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7384       if instance.disk_template in constants.DTS_EXT_MIRROR:
7385         if self.op.target_node is None:
7386           self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7387         else:
7388           self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7389                                                    self.op.target_node]
7390         del self.recalculate_locks[locking.LEVEL_NODE]
7391       else:
7392         self._LockInstancesNodes()
7393     elif level == locking.LEVEL_NODE_RES:
7394       # Copy node locks
7395       self.needed_locks[locking.LEVEL_NODE_RES] = \
7396         self.needed_locks[locking.LEVEL_NODE][:]
7397
7398   def BuildHooksEnv(self):
7399     """Build hooks env.
7400
7401     This runs on master, primary and secondary nodes of the instance.
7402
7403     """
7404     instance = self._migrater.instance
7405     source_node = instance.primary_node
7406     target_node = self.op.target_node
7407     env = {
7408       "IGNORE_CONSISTENCY": self.op.ignore_consistency,
7409       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7410       "OLD_PRIMARY": source_node,
7411       "NEW_PRIMARY": target_node,
7412       }
7413
7414     if instance.disk_template in constants.DTS_INT_MIRROR:
7415       env["OLD_SECONDARY"] = instance.secondary_nodes[0]
7416       env["NEW_SECONDARY"] = source_node
7417     else:
7418       env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
7419
7420     env.update(_BuildInstanceHookEnvByObject(self, instance))
7421
7422     return env
7423
7424   def BuildHooksNodes(self):
7425     """Build hooks nodes.
7426
7427     """
7428     instance = self._migrater.instance
7429     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7430     return (nl, nl + [instance.primary_node])
7431
7432
7433 class LUInstanceMigrate(LogicalUnit):
7434   """Migrate an instance.
7435
7436   This is migration without shutting down, compared to the failover,
7437   which is done with shutdown.
7438
7439   """
7440   HPATH = "instance-migrate"
7441   HTYPE = constants.HTYPE_INSTANCE
7442   REQ_BGL = False
7443
7444   def ExpandNames(self):
7445     self._ExpandAndLockInstance()
7446
7447     if self.op.target_node is not None:
7448       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7449
7450     self.needed_locks[locking.LEVEL_NODE] = []
7451     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7452
7453     self.needed_locks[locking.LEVEL_NODE] = []
7454     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7455
7456     self._migrater = \
7457       TLMigrateInstance(self, self.op.instance_name,
7458                         cleanup=self.op.cleanup,
7459                         failover=False,
7460                         fallback=self.op.allow_failover,
7461                         allow_runtime_changes=self.op.allow_runtime_changes,
7462                         ignore_ipolicy=self.op.ignore_ipolicy)
7463     self.tasklets = [self._migrater]
7464
7465   def DeclareLocks(self, level):
7466     if level == locking.LEVEL_NODE:
7467       instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7468       if instance.disk_template in constants.DTS_EXT_MIRROR:
7469         if self.op.target_node is None:
7470           self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7471         else:
7472           self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7473                                                    self.op.target_node]
7474         del self.recalculate_locks[locking.LEVEL_NODE]
7475       else:
7476         self._LockInstancesNodes()
7477     elif level == locking.LEVEL_NODE_RES:
7478       # Copy node locks
7479       self.needed_locks[locking.LEVEL_NODE_RES] = \
7480         self.needed_locks[locking.LEVEL_NODE][:]
7481
7482   def BuildHooksEnv(self):
7483     """Build hooks env.
7484
7485     This runs on master, primary and secondary nodes of the instance.
7486
7487     """
7488     instance = self._migrater.instance
7489     source_node = instance.primary_node
7490     target_node = self.op.target_node
7491     env = _BuildInstanceHookEnvByObject(self, instance)
7492     env.update({
7493       "MIGRATE_LIVE": self._migrater.live,
7494       "MIGRATE_CLEANUP": self.op.cleanup,
7495       "OLD_PRIMARY": source_node,
7496       "NEW_PRIMARY": target_node,
7497       "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
7498       })
7499
7500     if instance.disk_template in constants.DTS_INT_MIRROR:
7501       env["OLD_SECONDARY"] = target_node
7502       env["NEW_SECONDARY"] = source_node
7503     else:
7504       env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
7505
7506     return env
7507
7508   def BuildHooksNodes(self):
7509     """Build hooks nodes.
7510
7511     """
7512     instance = self._migrater.instance
7513     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7514     return (nl, nl + [instance.primary_node])
7515
7516
7517 class LUInstanceMove(LogicalUnit):
7518   """Move an instance by data-copying.
7519
7520   """
7521   HPATH = "instance-move"
7522   HTYPE = constants.HTYPE_INSTANCE
7523   REQ_BGL = False
7524
7525   def ExpandNames(self):
7526     self._ExpandAndLockInstance()
7527     target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7528     self.op.target_node = target_node
7529     self.needed_locks[locking.LEVEL_NODE] = [target_node]
7530     self.needed_locks[locking.LEVEL_NODE_RES] = []
7531     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7532
7533   def DeclareLocks(self, level):
7534     if level == locking.LEVEL_NODE:
7535       self._LockInstancesNodes(primary_only=True)
7536     elif level == locking.LEVEL_NODE_RES:
7537       # Copy node locks
7538       self.needed_locks[locking.LEVEL_NODE_RES] = \
7539         self.needed_locks[locking.LEVEL_NODE][:]
7540
7541   def BuildHooksEnv(self):
7542     """Build hooks env.
7543
7544     This runs on master, primary and secondary nodes of the instance.
7545
7546     """
7547     env = {
7548       "TARGET_NODE": self.op.target_node,
7549       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7550       }
7551     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7552     return env
7553
7554   def BuildHooksNodes(self):
7555     """Build hooks nodes.
7556
7557     """
7558     nl = [
7559       self.cfg.GetMasterNode(),
7560       self.instance.primary_node,
7561       self.op.target_node,
7562       ]
7563     return (nl, nl)
7564
7565   def CheckPrereq(self):
7566     """Check prerequisites.
7567
7568     This checks that the instance is in the cluster.
7569
7570     """
7571     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7572     assert self.instance is not None, \
7573       "Cannot retrieve locked instance %s" % self.op.instance_name
7574
7575     node = self.cfg.GetNodeInfo(self.op.target_node)
7576     assert node is not None, \
7577       "Cannot retrieve locked node %s" % self.op.target_node
7578
7579     self.target_node = target_node = node.name
7580
7581     if target_node == instance.primary_node:
7582       raise errors.OpPrereqError("Instance %s is already on the node %s" %
7583                                  (instance.name, target_node),
7584                                  errors.ECODE_STATE)
7585
7586     bep = self.cfg.GetClusterInfo().FillBE(instance)
7587
7588     for idx, dsk in enumerate(instance.disks):
7589       if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
7590         raise errors.OpPrereqError("Instance disk %d has a complex layout,"
7591                                    " cannot copy" % idx, errors.ECODE_STATE)
7592
7593     _CheckNodeOnline(self, target_node)
7594     _CheckNodeNotDrained(self, target_node)
7595     _CheckNodeVmCapable(self, target_node)
7596     ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(),
7597                                      self.cfg.GetNodeGroup(node.group))
7598     _CheckTargetNodeIPolicy(self, ipolicy, instance, node,
7599                             ignore=self.op.ignore_ipolicy)
7600
7601     if instance.admin_state == constants.ADMINST_UP:
7602       # check memory requirements on the secondary node
7603       _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
7604                            instance.name, bep[constants.BE_MAXMEM],
7605                            instance.hypervisor)
7606     else:
7607       self.LogInfo("Not checking memory on the secondary node as"
7608                    " instance will not be started")
7609
7610     # check bridge existance
7611     _CheckInstanceBridgesExist(self, instance, node=target_node)
7612
7613   def Exec(self, feedback_fn):
7614     """Move an instance.
7615
7616     The move is done by shutting it down on its present node, copying
7617     the data over (slow) and starting it on the new node.
7618
7619     """
7620     instance = self.instance
7621
7622     source_node = instance.primary_node
7623     target_node = self.target_node
7624
7625     self.LogInfo("Shutting down instance %s on source node %s",
7626                  instance.name, source_node)
7627
7628     assert (self.owned_locks(locking.LEVEL_NODE) ==
7629             self.owned_locks(locking.LEVEL_NODE_RES))
7630
7631     result = self.rpc.call_instance_shutdown(source_node, instance,
7632                                              self.op.shutdown_timeout)
7633     msg = result.fail_msg
7634     if msg:
7635       if self.op.ignore_consistency:
7636         self.proc.LogWarning("Could not shutdown instance %s on node %s."
7637                              " Proceeding anyway. Please make sure node"
7638                              " %s is down. Error details: %s",
7639                              instance.name, source_node, source_node, msg)
7640       else:
7641         raise errors.OpExecError("Could not shutdown instance %s on"
7642                                  " node %s: %s" %
7643                                  (instance.name, source_node, msg))
7644
7645     # create the target disks
7646     try:
7647       _CreateDisks(self, instance, target_node=target_node)
7648     except errors.OpExecError:
7649       self.LogWarning("Device creation failed, reverting...")
7650       try:
7651         _RemoveDisks(self, instance, target_node=target_node)
7652       finally:
7653         self.cfg.ReleaseDRBDMinors(instance.name)
7654         raise
7655
7656     cluster_name = self.cfg.GetClusterInfo().cluster_name
7657
7658     errs = []
7659     # activate, get path, copy the data over
7660     for idx, disk in enumerate(instance.disks):
7661       self.LogInfo("Copying data for disk %d", idx)
7662       result = self.rpc.call_blockdev_assemble(target_node, disk,
7663                                                instance.name, True, idx)
7664       if result.fail_msg:
7665         self.LogWarning("Can't assemble newly created disk %d: %s",
7666                         idx, result.fail_msg)
7667         errs.append(result.fail_msg)
7668         break
7669       dev_path = result.payload
7670       result = self.rpc.call_blockdev_export(source_node, disk,
7671                                              target_node, dev_path,
7672                                              cluster_name)
7673       if result.fail_msg:
7674         self.LogWarning("Can't copy data over for disk %d: %s",
7675                         idx, result.fail_msg)
7676         errs.append(result.fail_msg)
7677         break
7678
7679     if errs:
7680       self.LogWarning("Some disks failed to copy, aborting")
7681       try:
7682         _RemoveDisks(self, instance, target_node=target_node)
7683       finally:
7684         self.cfg.ReleaseDRBDMinors(instance.name)
7685         raise errors.OpExecError("Errors during disk copy: %s" %
7686                                  (",".join(errs),))
7687
7688     instance.primary_node = target_node
7689     self.cfg.Update(instance, feedback_fn)
7690
7691     self.LogInfo("Removing the disks on the original node")
7692     _RemoveDisks(self, instance, target_node=source_node)
7693
7694     # Only start the instance if it's marked as up
7695     if instance.admin_state == constants.ADMINST_UP:
7696       self.LogInfo("Starting instance %s on node %s",
7697                    instance.name, target_node)
7698
7699       disks_ok, _ = _AssembleInstanceDisks(self, instance,
7700                                            ignore_secondaries=True)
7701       if not disks_ok:
7702         _ShutdownInstanceDisks(self, instance)
7703         raise errors.OpExecError("Can't activate the instance's disks")
7704
7705       result = self.rpc.call_instance_start(target_node,
7706                                             (instance, None, None), False)
7707       msg = result.fail_msg
7708       if msg:
7709         _ShutdownInstanceDisks(self, instance)
7710         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7711                                  (instance.name, target_node, msg))
7712
7713
7714 class LUNodeMigrate(LogicalUnit):
7715   """Migrate all instances from a node.
7716
7717   """
7718   HPATH = "node-migrate"
7719   HTYPE = constants.HTYPE_NODE
7720   REQ_BGL = False
7721
7722   def CheckArguments(self):
7723     pass
7724
7725   def ExpandNames(self):
7726     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7727
7728     self.share_locks = _ShareAll()
7729     self.needed_locks = {
7730       locking.LEVEL_NODE: [self.op.node_name],
7731       }
7732
7733   def BuildHooksEnv(self):
7734     """Build hooks env.
7735
7736     This runs on the master, the primary and all the secondaries.
7737
7738     """
7739     return {
7740       "NODE_NAME": self.op.node_name,
7741       "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
7742       }
7743
7744   def BuildHooksNodes(self):
7745     """Build hooks nodes.
7746
7747     """
7748     nl = [self.cfg.GetMasterNode()]
7749     return (nl, nl)
7750
7751   def CheckPrereq(self):
7752     pass
7753
7754   def Exec(self, feedback_fn):
7755     # Prepare jobs for migration instances
7756     allow_runtime_changes = self.op.allow_runtime_changes
7757     jobs = [
7758       [opcodes.OpInstanceMigrate(instance_name=inst.name,
7759                                  mode=self.op.mode,
7760                                  live=self.op.live,
7761                                  iallocator=self.op.iallocator,
7762                                  target_node=self.op.target_node,
7763                                  allow_runtime_changes=allow_runtime_changes,
7764                                  ignore_ipolicy=self.op.ignore_ipolicy)]
7765       for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
7766       ]
7767
7768     # TODO: Run iallocator in this opcode and pass correct placement options to
7769     # OpInstanceMigrate. Since other jobs can modify the cluster between
7770     # running the iallocator and the actual migration, a good consistency model
7771     # will have to be found.
7772
7773     assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
7774             frozenset([self.op.node_name]))
7775
7776     return ResultWithJobs(jobs)
7777
7778
7779 class TLMigrateInstance(Tasklet):
7780   """Tasklet class for instance migration.
7781
7782   @type live: boolean
7783   @ivar live: whether the migration will be done live or non-live;
7784       this variable is initalized only after CheckPrereq has run
7785   @type cleanup: boolean
7786   @ivar cleanup: Wheater we cleanup from a failed migration
7787   @type iallocator: string
7788   @ivar iallocator: The iallocator used to determine target_node
7789   @type target_node: string
7790   @ivar target_node: If given, the target_node to reallocate the instance to
7791   @type failover: boolean
7792   @ivar failover: Whether operation results in failover or migration
7793   @type fallback: boolean
7794   @ivar fallback: Whether fallback to failover is allowed if migration not
7795                   possible
7796   @type ignore_consistency: boolean
7797   @ivar ignore_consistency: Wheter we should ignore consistency between source
7798                             and target node
7799   @type shutdown_timeout: int
7800   @ivar shutdown_timeout: In case of failover timeout of the shutdown
7801   @type ignore_ipolicy: bool
7802   @ivar ignore_ipolicy: If true, we can ignore instance policy when migrating
7803
7804   """
7805
7806   # Constants
7807   _MIGRATION_POLL_INTERVAL = 1      # seconds
7808   _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
7809
7810   def __init__(self, lu, instance_name, cleanup=False,
7811                failover=False, fallback=False,
7812                ignore_consistency=False,
7813                allow_runtime_changes=True,
7814                shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT,
7815                ignore_ipolicy=False):
7816     """Initializes this class.
7817
7818     """
7819     Tasklet.__init__(self, lu)
7820
7821     # Parameters
7822     self.instance_name = instance_name
7823     self.cleanup = cleanup
7824     self.live = False # will be overridden later
7825     self.failover = failover
7826     self.fallback = fallback
7827     self.ignore_consistency = ignore_consistency
7828     self.shutdown_timeout = shutdown_timeout
7829     self.ignore_ipolicy = ignore_ipolicy
7830     self.allow_runtime_changes = allow_runtime_changes
7831
7832   def CheckPrereq(self):
7833     """Check prerequisites.
7834
7835     This checks that the instance is in the cluster.
7836
7837     """
7838     instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
7839     instance = self.cfg.GetInstanceInfo(instance_name)
7840     assert instance is not None
7841     self.instance = instance
7842     cluster = self.cfg.GetClusterInfo()
7843
7844     if (not self.cleanup and
7845         not instance.admin_state == constants.ADMINST_UP and
7846         not self.failover and self.fallback):
7847       self.lu.LogInfo("Instance is marked down or offline, fallback allowed,"
7848                       " switching to failover")
7849       self.failover = True
7850
7851     if instance.disk_template not in constants.DTS_MIRRORED:
7852       if self.failover:
7853         text = "failovers"
7854       else:
7855         text = "migrations"
7856       raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
7857                                  " %s" % (instance.disk_template, text),
7858                                  errors.ECODE_STATE)
7859
7860     if instance.disk_template in constants.DTS_EXT_MIRROR:
7861       _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
7862
7863       if self.lu.op.iallocator:
7864         self._RunAllocator()
7865       else:
7866         # We set set self.target_node as it is required by
7867         # BuildHooksEnv
7868         self.target_node = self.lu.op.target_node
7869
7870       # Check that the target node is correct in terms of instance policy
7871       nodeinfo = self.cfg.GetNodeInfo(self.target_node)
7872       group_info = self.cfg.GetNodeGroup(nodeinfo.group)
7873       ipolicy = _CalculateGroupIPolicy(cluster, group_info)
7874       _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
7875                               ignore=self.ignore_ipolicy)
7876
7877       # self.target_node is already populated, either directly or by the
7878       # iallocator run
7879       target_node = self.target_node
7880       if self.target_node == instance.primary_node:
7881         raise errors.OpPrereqError("Cannot migrate instance %s"
7882                                    " to its primary (%s)" %
7883                                    (instance.name, instance.primary_node))
7884
7885       if len(self.lu.tasklets) == 1:
7886         # It is safe to release locks only when we're the only tasklet
7887         # in the LU
7888         _ReleaseLocks(self.lu, locking.LEVEL_NODE,
7889                       keep=[instance.primary_node, self.target_node])
7890
7891     else:
7892       secondary_nodes = instance.secondary_nodes
7893       if not secondary_nodes:
7894         raise errors.ConfigurationError("No secondary node but using"
7895                                         " %s disk template" %
7896                                         instance.disk_template)
7897       target_node = secondary_nodes[0]
7898       if self.lu.op.iallocator or (self.lu.op.target_node and
7899                                    self.lu.op.target_node != target_node):
7900         if self.failover:
7901           text = "failed over"
7902         else:
7903           text = "migrated"
7904         raise errors.OpPrereqError("Instances with disk template %s cannot"
7905                                    " be %s to arbitrary nodes"
7906                                    " (neither an iallocator nor a target"
7907                                    " node can be passed)" %
7908                                    (instance.disk_template, text),
7909                                    errors.ECODE_INVAL)
7910       nodeinfo = self.cfg.GetNodeInfo(target_node)
7911       group_info = self.cfg.GetNodeGroup(nodeinfo.group)
7912       ipolicy = _CalculateGroupIPolicy(cluster, group_info)
7913       _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
7914                               ignore=self.ignore_ipolicy)
7915
7916     i_be = cluster.FillBE(instance)
7917
7918     # check memory requirements on the secondary node
7919     if (not self.cleanup and
7920          (not self.failover or instance.admin_state == constants.ADMINST_UP)):
7921       self.tgt_free_mem = _CheckNodeFreeMemory(self.lu, target_node,
7922                                                "migrating instance %s" %
7923                                                instance.name,
7924                                                i_be[constants.BE_MINMEM],
7925                                                instance.hypervisor)
7926     else:
7927       self.lu.LogInfo("Not checking memory on the secondary node as"
7928                       " instance will not be started")
7929
7930     # check if failover must be forced instead of migration
7931     if (not self.cleanup and not self.failover and
7932         i_be[constants.BE_ALWAYS_FAILOVER]):
7933       if self.fallback:
7934         self.lu.LogInfo("Instance configured to always failover; fallback"
7935                         " to failover")
7936         self.failover = True
7937       else:
7938         raise errors.OpPrereqError("This instance has been configured to"
7939                                    " always failover, please allow failover",
7940                                    errors.ECODE_STATE)
7941
7942     # check bridge existance
7943     _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
7944
7945     if not self.cleanup:
7946       _CheckNodeNotDrained(self.lu, target_node)
7947       if not self.failover:
7948         result = self.rpc.call_instance_migratable(instance.primary_node,
7949                                                    instance)
7950         if result.fail_msg and self.fallback:
7951           self.lu.LogInfo("Can't migrate, instance offline, fallback to"
7952                           " failover")
7953           self.failover = True
7954         else:
7955           result.Raise("Can't migrate, please use failover",
7956                        prereq=True, ecode=errors.ECODE_STATE)
7957
7958     assert not (self.failover and self.cleanup)
7959
7960     if not self.failover:
7961       if self.lu.op.live is not None and self.lu.op.mode is not None:
7962         raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
7963                                    " parameters are accepted",
7964                                    errors.ECODE_INVAL)
7965       if self.lu.op.live is not None:
7966         if self.lu.op.live:
7967           self.lu.op.mode = constants.HT_MIGRATION_LIVE
7968         else:
7969           self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
7970         # reset the 'live' parameter to None so that repeated
7971         # invocations of CheckPrereq do not raise an exception
7972         self.lu.op.live = None
7973       elif self.lu.op.mode is None:
7974         # read the default value from the hypervisor
7975         i_hv = cluster.FillHV(self.instance, skip_globals=False)
7976         self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
7977
7978       self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
7979     else:
7980       # Failover is never live
7981       self.live = False
7982
7983     if not (self.failover or self.cleanup):
7984       remote_info = self.rpc.call_instance_info(instance.primary_node,
7985                                                 instance.name,
7986                                                 instance.hypervisor)
7987       remote_info.Raise("Error checking instance on node %s" %
7988                         instance.primary_node)
7989       instance_running = bool(remote_info.payload)
7990       if instance_running:
7991         self.current_mem = int(remote_info.payload["memory"])
7992
7993   def _RunAllocator(self):
7994     """Run the allocator based on input opcode.
7995
7996     """
7997     # FIXME: add a self.ignore_ipolicy option
7998     ial = IAllocator(self.cfg, self.rpc,
7999                      mode=constants.IALLOCATOR_MODE_RELOC,
8000                      name=self.instance_name,
8001                      # TODO See why hail breaks with a single node below
8002                      relocate_from=[self.instance.primary_node,
8003                                     self.instance.primary_node],
8004                      )
8005
8006     ial.Run(self.lu.op.iallocator)
8007
8008     if not ial.success:
8009       raise errors.OpPrereqError("Can't compute nodes using"
8010                                  " iallocator '%s': %s" %
8011                                  (self.lu.op.iallocator, ial.info),
8012                                  errors.ECODE_NORES)
8013     if len(ial.result) != ial.required_nodes:
8014       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8015                                  " of nodes (%s), required %s" %
8016                                  (self.lu.op.iallocator, len(ial.result),
8017                                   ial.required_nodes), errors.ECODE_FAULT)
8018     self.target_node = ial.result[0]
8019     self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8020                  self.instance_name, self.lu.op.iallocator,
8021                  utils.CommaJoin(ial.result))
8022
8023   def _WaitUntilSync(self):
8024     """Poll with custom rpc for disk sync.
8025
8026     This uses our own step-based rpc call.
8027
8028     """
8029     self.feedback_fn("* wait until resync is done")
8030     all_done = False
8031     while not all_done:
8032       all_done = True
8033       result = self.rpc.call_drbd_wait_sync(self.all_nodes,
8034                                             self.nodes_ip,
8035                                             self.instance.disks)
8036       min_percent = 100
8037       for node, nres in result.items():
8038         nres.Raise("Cannot resync disks on node %s" % node)
8039         node_done, node_percent = nres.payload
8040         all_done = all_done and node_done
8041         if node_percent is not None:
8042           min_percent = min(min_percent, node_percent)
8043       if not all_done:
8044         if min_percent < 100:
8045           self.feedback_fn("   - progress: %.1f%%" % min_percent)
8046         time.sleep(2)
8047
8048   def _EnsureSecondary(self, node):
8049     """Demote a node to secondary.
8050
8051     """
8052     self.feedback_fn("* switching node %s to secondary mode" % node)
8053
8054     for dev in self.instance.disks:
8055       self.cfg.SetDiskID(dev, node)
8056
8057     result = self.rpc.call_blockdev_close(node, self.instance.name,
8058                                           self.instance.disks)
8059     result.Raise("Cannot change disk to secondary on node %s" % node)
8060
8061   def _GoStandalone(self):
8062     """Disconnect from the network.
8063
8064     """
8065     self.feedback_fn("* changing into standalone mode")
8066     result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
8067                                                self.instance.disks)
8068     for node, nres in result.items():
8069       nres.Raise("Cannot disconnect disks node %s" % node)
8070
8071   def _GoReconnect(self, multimaster):
8072     """Reconnect to the network.
8073
8074     """
8075     if multimaster:
8076       msg = "dual-master"
8077     else:
8078       msg = "single-master"
8079     self.feedback_fn("* changing disks into %s mode" % msg)
8080     result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
8081                                            self.instance.disks,
8082                                            self.instance.name, multimaster)
8083     for node, nres in result.items():
8084       nres.Raise("Cannot change disks config on node %s" % node)
8085
8086   def _ExecCleanup(self):
8087     """Try to cleanup after a failed migration.
8088
8089     The cleanup is done by:
8090       - check that the instance is running only on one node
8091         (and update the config if needed)
8092       - change disks on its secondary node to secondary
8093       - wait until disks are fully synchronized
8094       - disconnect from the network
8095       - change disks into single-master mode
8096       - wait again until disks are fully synchronized
8097
8098     """
8099     instance = self.instance
8100     target_node = self.target_node
8101     source_node = self.source_node
8102
8103     # check running on only one node
8104     self.feedback_fn("* checking where the instance actually runs"
8105                      " (if this hangs, the hypervisor might be in"
8106                      " a bad state)")
8107     ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
8108     for node, result in ins_l.items():
8109       result.Raise("Can't contact node %s" % node)
8110
8111     runningon_source = instance.name in ins_l[source_node].payload
8112     runningon_target = instance.name in ins_l[target_node].payload
8113
8114     if runningon_source and runningon_target:
8115       raise errors.OpExecError("Instance seems to be running on two nodes,"
8116                                " or the hypervisor is confused; you will have"
8117                                " to ensure manually that it runs only on one"
8118                                " and restart this operation")
8119
8120     if not (runningon_source or runningon_target):
8121       raise errors.OpExecError("Instance does not seem to be running at all;"
8122                                " in this case it's safer to repair by"
8123                                " running 'gnt-instance stop' to ensure disk"
8124                                " shutdown, and then restarting it")
8125
8126     if runningon_target:
8127       # the migration has actually succeeded, we need to update the config
8128       self.feedback_fn("* instance running on secondary node (%s),"
8129                        " updating config" % target_node)
8130       instance.primary_node = target_node
8131       self.cfg.Update(instance, self.feedback_fn)
8132       demoted_node = source_node
8133     else:
8134       self.feedback_fn("* instance confirmed to be running on its"
8135                        " primary node (%s)" % source_node)
8136       demoted_node = target_node
8137
8138     if instance.disk_template in constants.DTS_INT_MIRROR:
8139       self._EnsureSecondary(demoted_node)
8140       try:
8141         self._WaitUntilSync()
8142       except errors.OpExecError:
8143         # we ignore here errors, since if the device is standalone, it
8144         # won't be able to sync
8145         pass
8146       self._GoStandalone()
8147       self._GoReconnect(False)
8148       self._WaitUntilSync()
8149
8150     self.feedback_fn("* done")
8151
8152   def _RevertDiskStatus(self):
8153     """Try to revert the disk status after a failed migration.
8154
8155     """
8156     target_node = self.target_node
8157     if self.instance.disk_template in constants.DTS_EXT_MIRROR:
8158       return
8159
8160     try:
8161       self._EnsureSecondary(target_node)
8162       self._GoStandalone()
8163       self._GoReconnect(False)
8164       self._WaitUntilSync()
8165     except errors.OpExecError, err:
8166       self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
8167                          " please try to recover the instance manually;"
8168                          " error '%s'" % str(err))
8169
8170   def _AbortMigration(self):
8171     """Call the hypervisor code to abort a started migration.
8172
8173     """
8174     instance = self.instance
8175     target_node = self.target_node
8176     source_node = self.source_node
8177     migration_info = self.migration_info
8178
8179     abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
8180                                                                  instance,
8181                                                                  migration_info,
8182                                                                  False)
8183     abort_msg = abort_result.fail_msg
8184     if abort_msg:
8185       logging.error("Aborting migration failed on target node %s: %s",
8186                     target_node, abort_msg)
8187       # Don't raise an exception here, as we stil have to try to revert the
8188       # disk status, even if this step failed.
8189
8190     abort_result = self.rpc.call_instance_finalize_migration_src(source_node,
8191         instance, False, self.live)
8192     abort_msg = abort_result.fail_msg
8193     if abort_msg:
8194       logging.error("Aborting migration failed on source node %s: %s",
8195                     source_node, abort_msg)
8196
8197   def _ExecMigration(self):
8198     """Migrate an instance.
8199
8200     The migrate is done by:
8201       - change the disks into dual-master mode
8202       - wait until disks are fully synchronized again
8203       - migrate the instance
8204       - change disks on the new secondary node (the old primary) to secondary
8205       - wait until disks are fully synchronized
8206       - change disks into single-master mode
8207
8208     """
8209     instance = self.instance
8210     target_node = self.target_node
8211     source_node = self.source_node
8212
8213     # Check for hypervisor version mismatch and warn the user.
8214     nodeinfo = self.rpc.call_node_info([source_node, target_node],
8215                                        None, [self.instance.hypervisor])
8216     for ninfo in nodeinfo.values():
8217       ninfo.Raise("Unable to retrieve node information from node '%s'" %
8218                   ninfo.node)
8219     (_, _, (src_info, )) = nodeinfo[source_node].payload
8220     (_, _, (dst_info, )) = nodeinfo[target_node].payload
8221
8222     if ((constants.HV_NODEINFO_KEY_VERSION in src_info) and
8223         (constants.HV_NODEINFO_KEY_VERSION in dst_info)):
8224       src_version = src_info[constants.HV_NODEINFO_KEY_VERSION]
8225       dst_version = dst_info[constants.HV_NODEINFO_KEY_VERSION]
8226       if src_version != dst_version:
8227         self.feedback_fn("* warning: hypervisor version mismatch between"
8228                          " source (%s) and target (%s) node" %
8229                          (src_version, dst_version))
8230
8231     self.feedback_fn("* checking disk consistency between source and target")
8232     for (idx, dev) in enumerate(instance.disks):
8233       if not _CheckDiskConsistency(self.lu, dev, target_node, False):
8234         raise errors.OpExecError("Disk %s is degraded or not fully"
8235                                  " synchronized on target node,"
8236                                  " aborting migration" % idx)
8237
8238     if self.current_mem > self.tgt_free_mem:
8239       if not self.allow_runtime_changes:
8240         raise errors.OpExecError("Memory ballooning not allowed and not enough"
8241                                  " free memory to fit instance %s on target"
8242                                  " node %s (have %dMB, need %dMB)" %
8243                                  (instance.name, target_node,
8244                                   self.tgt_free_mem, self.current_mem))
8245       self.feedback_fn("* setting instance memory to %s" % self.tgt_free_mem)
8246       rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
8247                                                      instance,
8248                                                      self.tgt_free_mem)
8249       rpcres.Raise("Cannot modify instance runtime memory")
8250
8251     # First get the migration information from the remote node
8252     result = self.rpc.call_migration_info(source_node, instance)
8253     msg = result.fail_msg
8254     if msg:
8255       log_err = ("Failed fetching source migration information from %s: %s" %
8256                  (source_node, msg))
8257       logging.error(log_err)
8258       raise errors.OpExecError(log_err)
8259
8260     self.migration_info = migration_info = result.payload
8261
8262     if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8263       # Then switch the disks to master/master mode
8264       self._EnsureSecondary(target_node)
8265       self._GoStandalone()
8266       self._GoReconnect(True)
8267       self._WaitUntilSync()
8268
8269     self.feedback_fn("* preparing %s to accept the instance" % target_node)
8270     result = self.rpc.call_accept_instance(target_node,
8271                                            instance,
8272                                            migration_info,
8273                                            self.nodes_ip[target_node])
8274
8275     msg = result.fail_msg
8276     if msg:
8277       logging.error("Instance pre-migration failed, trying to revert"
8278                     " disk status: %s", msg)
8279       self.feedback_fn("Pre-migration failed, aborting")
8280       self._AbortMigration()
8281       self._RevertDiskStatus()
8282       raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
8283                                (instance.name, msg))
8284
8285     self.feedback_fn("* migrating instance to %s" % target_node)
8286     result = self.rpc.call_instance_migrate(source_node, instance,
8287                                             self.nodes_ip[target_node],
8288                                             self.live)
8289     msg = result.fail_msg
8290     if msg:
8291       logging.error("Instance migration failed, trying to revert"
8292                     " disk status: %s", msg)
8293       self.feedback_fn("Migration failed, aborting")
8294       self._AbortMigration()
8295       self._RevertDiskStatus()
8296       raise errors.OpExecError("Could not migrate instance %s: %s" %
8297                                (instance.name, msg))
8298
8299     self.feedback_fn("* starting memory transfer")
8300     last_feedback = time.time()
8301     while True:
8302       result = self.rpc.call_instance_get_migration_status(source_node,
8303                                                            instance)
8304       msg = result.fail_msg
8305       ms = result.payload   # MigrationStatus instance
8306       if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
8307         logging.error("Instance migration failed, trying to revert"
8308                       " disk status: %s", msg)
8309         self.feedback_fn("Migration failed, aborting")
8310         self._AbortMigration()
8311         self._RevertDiskStatus()
8312         raise errors.OpExecError("Could not migrate instance %s: %s" %
8313                                  (instance.name, msg))
8314
8315       if result.payload.status != constants.HV_MIGRATION_ACTIVE:
8316         self.feedback_fn("* memory transfer complete")
8317         break
8318
8319       if (utils.TimeoutExpired(last_feedback,
8320                                self._MIGRATION_FEEDBACK_INTERVAL) and
8321           ms.transferred_ram is not None):
8322         mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
8323         self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
8324         last_feedback = time.time()
8325
8326       time.sleep(self._MIGRATION_POLL_INTERVAL)
8327
8328     result = self.rpc.call_instance_finalize_migration_src(source_node,
8329                                                            instance,
8330                                                            True,
8331                                                            self.live)
8332     msg = result.fail_msg
8333     if msg:
8334       logging.error("Instance migration succeeded, but finalization failed"
8335                     " on the source node: %s", msg)
8336       raise errors.OpExecError("Could not finalize instance migration: %s" %
8337                                msg)
8338
8339     instance.primary_node = target_node
8340
8341     # distribute new instance config to the other nodes
8342     self.cfg.Update(instance, self.feedback_fn)
8343
8344     result = self.rpc.call_instance_finalize_migration_dst(target_node,
8345                                                            instance,
8346                                                            migration_info,
8347                                                            True)
8348     msg = result.fail_msg
8349     if msg:
8350       logging.error("Instance migration succeeded, but finalization failed"
8351                     " on the target node: %s", msg)
8352       raise errors.OpExecError("Could not finalize instance migration: %s" %
8353                                msg)
8354
8355     if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8356       self._EnsureSecondary(source_node)
8357       self._WaitUntilSync()
8358       self._GoStandalone()
8359       self._GoReconnect(False)
8360       self._WaitUntilSync()
8361
8362     # If the instance's disk template is `rbd' and there was a successful
8363     # migration, unmap the device from the source node.
8364     if self.instance.disk_template == constants.DT_RBD:
8365       disks = _ExpandCheckDisks(instance, instance.disks)
8366       self.feedback_fn("* unmapping instance's disks from %s" % source_node)
8367       for disk in disks:
8368         result = self.rpc.call_blockdev_shutdown(source_node, disk)
8369         msg = result.fail_msg
8370         if msg:
8371           logging.error("Migration was successful, but couldn't unmap the"
8372                         " block device %s on source node %s: %s",
8373                         disk.iv_name, source_node, msg)
8374           logging.error("You need to unmap the device %s manually on %s",
8375                         disk.iv_name, source_node)
8376
8377     self.feedback_fn("* done")
8378
8379   def _ExecFailover(self):
8380     """Failover an instance.
8381
8382     The failover is done by shutting it down on its present node and
8383     starting it on the secondary.
8384
8385     """
8386     instance = self.instance
8387     primary_node = self.cfg.GetNodeInfo(instance.primary_node)
8388
8389     source_node = instance.primary_node
8390     target_node = self.target_node
8391
8392     if instance.admin_state == constants.ADMINST_UP:
8393       self.feedback_fn("* checking disk consistency between source and target")
8394       for (idx, dev) in enumerate(instance.disks):
8395         # for drbd, these are drbd over lvm
8396         if not _CheckDiskConsistency(self.lu, dev, target_node, False):
8397           if primary_node.offline:
8398             self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
8399                              " target node %s" %
8400                              (primary_node.name, idx, target_node))
8401           elif not self.ignore_consistency:
8402             raise errors.OpExecError("Disk %s is degraded on target node,"
8403                                      " aborting failover" % idx)
8404     else:
8405       self.feedback_fn("* not checking disk consistency as instance is not"
8406                        " running")
8407
8408     self.feedback_fn("* shutting down instance on source node")
8409     logging.info("Shutting down instance %s on node %s",
8410                  instance.name, source_node)
8411
8412     result = self.rpc.call_instance_shutdown(source_node, instance,
8413                                              self.shutdown_timeout)
8414     msg = result.fail_msg
8415     if msg:
8416       if self.ignore_consistency or primary_node.offline:
8417         self.lu.LogWarning("Could not shutdown instance %s on node %s,"
8418                            " proceeding anyway; please make sure node"
8419                            " %s is down; error details: %s",
8420                            instance.name, source_node, source_node, msg)
8421       else:
8422         raise errors.OpExecError("Could not shutdown instance %s on"
8423                                  " node %s: %s" %
8424                                  (instance.name, source_node, msg))
8425
8426     self.feedback_fn("* deactivating the instance's disks on source node")
8427     if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
8428       raise errors.OpExecError("Can't shut down the instance's disks")
8429
8430     instance.primary_node = target_node
8431     # distribute new instance config to the other nodes
8432     self.cfg.Update(instance, self.feedback_fn)
8433
8434     # Only start the instance if it's marked as up
8435     if instance.admin_state == constants.ADMINST_UP:
8436       self.feedback_fn("* activating the instance's disks on target node %s" %
8437                        target_node)
8438       logging.info("Starting instance %s on node %s",
8439                    instance.name, target_node)
8440
8441       disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
8442                                            ignore_secondaries=True)
8443       if not disks_ok:
8444         _ShutdownInstanceDisks(self.lu, instance)
8445         raise errors.OpExecError("Can't activate the instance's disks")
8446
8447       self.feedback_fn("* starting the instance on the target node %s" %
8448                        target_node)
8449       result = self.rpc.call_instance_start(target_node, (instance, None, None),
8450                                             False)
8451       msg = result.fail_msg
8452       if msg:
8453         _ShutdownInstanceDisks(self.lu, instance)
8454         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
8455                                  (instance.name, target_node, msg))
8456
8457   def Exec(self, feedback_fn):
8458     """Perform the migration.
8459
8460     """
8461     self.feedback_fn = feedback_fn
8462     self.source_node = self.instance.primary_node
8463
8464     # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
8465     if self.instance.disk_template in constants.DTS_INT_MIRROR:
8466       self.target_node = self.instance.secondary_nodes[0]
8467       # Otherwise self.target_node has been populated either
8468       # directly, or through an iallocator.
8469
8470     self.all_nodes = [self.source_node, self.target_node]
8471     self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
8472                          in self.cfg.GetMultiNodeInfo(self.all_nodes))
8473
8474     if self.failover:
8475       feedback_fn("Failover instance %s" % self.instance.name)
8476       self._ExecFailover()
8477     else:
8478       feedback_fn("Migrating instance %s" % self.instance.name)
8479
8480       if self.cleanup:
8481         return self._ExecCleanup()
8482       else:
8483         return self._ExecMigration()
8484
8485
8486 def _CreateBlockDev(lu, node, instance, device, force_create,
8487                     info, force_open):
8488   """Create a tree of block devices on a given node.
8489
8490   If this device type has to be created on secondaries, create it and
8491   all its children.
8492
8493   If not, just recurse to children keeping the same 'force' value.
8494
8495   @param lu: the lu on whose behalf we execute
8496   @param node: the node on which to create the device
8497   @type instance: L{objects.Instance}
8498   @param instance: the instance which owns the device
8499   @type device: L{objects.Disk}
8500   @param device: the device to create
8501   @type force_create: boolean
8502   @param force_create: whether to force creation of this device; this
8503       will be change to True whenever we find a device which has
8504       CreateOnSecondary() attribute
8505   @param info: the extra 'metadata' we should attach to the device
8506       (this will be represented as a LVM tag)
8507   @type force_open: boolean
8508   @param force_open: this parameter will be passes to the
8509       L{backend.BlockdevCreate} function where it specifies
8510       whether we run on primary or not, and it affects both
8511       the child assembly and the device own Open() execution
8512
8513   """
8514   if device.CreateOnSecondary():
8515     force_create = True
8516
8517   if device.children:
8518     for child in device.children:
8519       _CreateBlockDev(lu, node, instance, child, force_create,
8520                       info, force_open)
8521
8522   if not force_create:
8523     return
8524
8525   _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
8526
8527
8528 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
8529   """Create a single block device on a given node.
8530
8531   This will not recurse over children of the device, so they must be
8532   created in advance.
8533
8534   @param lu: the lu on whose behalf we execute
8535   @param node: the node on which to create the device
8536   @type instance: L{objects.Instance}
8537   @param instance: the instance which owns the device
8538   @type device: L{objects.Disk}
8539   @param device: the device to create
8540   @param info: the extra 'metadata' we should attach to the device
8541       (this will be represented as a LVM tag)
8542   @type force_open: boolean
8543   @param force_open: this parameter will be passes to the
8544       L{backend.BlockdevCreate} function where it specifies
8545       whether we run on primary or not, and it affects both
8546       the child assembly and the device own Open() execution
8547
8548   """
8549   lu.cfg.SetDiskID(device, node)
8550   result = lu.rpc.call_blockdev_create(node, device, device.size,
8551                                        instance.name, force_open, info)
8552   result.Raise("Can't create block device %s on"
8553                " node %s for instance %s" % (device, node, instance.name))
8554   if device.physical_id is None:
8555     device.physical_id = result.payload
8556
8557
8558 def _GenerateUniqueNames(lu, exts):
8559   """Generate a suitable LV name.
8560
8561   This will generate a logical volume name for the given instance.
8562
8563   """
8564   results = []
8565   for val in exts:
8566     new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
8567     results.append("%s%s" % (new_id, val))
8568   return results
8569
8570
8571 def _ComputeLDParams(disk_template, disk_params):
8572   """Computes Logical Disk parameters from Disk Template parameters.
8573
8574   @type disk_template: string
8575   @param disk_template: disk template, one of L{constants.DISK_TEMPLATES}
8576   @type disk_params: dict
8577   @param disk_params: disk template parameters; dict(template_name -> parameters
8578   @rtype: list(dict)
8579   @return: a list of dicts, one for each node of the disk hierarchy. Each dict
8580     contains the LD parameters of the node. The tree is flattened in-order.
8581
8582   """
8583   if disk_template not in constants.DISK_TEMPLATES:
8584     raise errors.ProgrammerError("Unknown disk template %s" % disk_template)
8585
8586   result = list()
8587   dt_params = disk_params[disk_template]
8588   if disk_template == constants.DT_DRBD8:
8589     drbd_params = {
8590       constants.LDP_RESYNC_RATE: dt_params[constants.DRBD_RESYNC_RATE],
8591       constants.LDP_BARRIERS: dt_params[constants.DRBD_DISK_BARRIERS],
8592       constants.LDP_NO_META_FLUSH: dt_params[constants.DRBD_META_BARRIERS],
8593       constants.LDP_DEFAULT_METAVG: dt_params[constants.DRBD_DEFAULT_METAVG],
8594       constants.LDP_DISK_CUSTOM: dt_params[constants.DRBD_DISK_CUSTOM],
8595       constants.LDP_NET_CUSTOM: dt_params[constants.DRBD_NET_CUSTOM],
8596       constants.LDP_DYNAMIC_RESYNC: dt_params[constants.DRBD_DYNAMIC_RESYNC],
8597       constants.LDP_PLAN_AHEAD: dt_params[constants.DRBD_PLAN_AHEAD],
8598       constants.LDP_FILL_TARGET: dt_params[constants.DRBD_FILL_TARGET],
8599       constants.LDP_DELAY_TARGET: dt_params[constants.DRBD_DELAY_TARGET],
8600       constants.LDP_MAX_RATE: dt_params[constants.DRBD_MAX_RATE],
8601       constants.LDP_MIN_RATE: dt_params[constants.DRBD_MIN_RATE],
8602       }
8603
8604     drbd_params = \
8605       objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_DRBD8],
8606                        drbd_params)
8607
8608     result.append(drbd_params)
8609
8610     # data LV
8611     data_params = {
8612       constants.LDP_STRIPES: dt_params[constants.DRBD_DATA_STRIPES],
8613       }
8614     data_params = \
8615       objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_LV],
8616                        data_params)
8617     result.append(data_params)
8618
8619     # metadata LV
8620     meta_params = {
8621       constants.LDP_STRIPES: dt_params[constants.DRBD_META_STRIPES],
8622       }
8623     meta_params = \
8624       objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_LV],
8625                        meta_params)
8626     result.append(meta_params)
8627
8628   elif (disk_template == constants.DT_FILE or
8629         disk_template == constants.DT_SHARED_FILE):
8630     result.append(constants.DISK_LD_DEFAULTS[constants.LD_FILE])
8631
8632   elif disk_template == constants.DT_PLAIN:
8633     params = {
8634       constants.LDP_STRIPES: dt_params[constants.LV_STRIPES],
8635       }
8636     params = \
8637       objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_LV],
8638                        params)
8639     result.append(params)
8640
8641   elif disk_template == constants.DT_BLOCK:
8642     result.append(constants.DISK_LD_DEFAULTS[constants.LD_BLOCKDEV])
8643
8644   elif disk_template == constants.DT_RBD:
8645     params = {
8646       constants.LDP_POOL: dt_params[constants.RBD_POOL]
8647       }
8648     params = \
8649       objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_RBD],
8650                        params)
8651     result.append(params)
8652
8653   return result
8654
8655
8656 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
8657                          iv_name, p_minor, s_minor, drbd_params, data_params,
8658                          meta_params):
8659   """Generate a drbd8 device complete with its children.
8660
8661   """
8662   assert len(vgnames) == len(names) == 2
8663   port = lu.cfg.AllocatePort()
8664   shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
8665
8666   dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
8667                           logical_id=(vgnames[0], names[0]),
8668                           params=data_params)
8669   dev_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
8670                           logical_id=(vgnames[1], names[1]),
8671                           params=meta_params)
8672   drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
8673                           logical_id=(primary, secondary, port,
8674                                       p_minor, s_minor,
8675                                       shared_secret),
8676                           children=[dev_data, dev_meta],
8677                           iv_name=iv_name, params=drbd_params)
8678   return drbd_dev
8679
8680
8681 _DISK_TEMPLATE_NAME_PREFIX = {
8682   constants.DT_PLAIN: "",
8683   constants.DT_RBD: ".rbd",
8684   }
8685
8686
8687 _DISK_TEMPLATE_DEVICE_TYPE = {
8688   constants.DT_PLAIN: constants.LD_LV,
8689   constants.DT_FILE: constants.LD_FILE,
8690   constants.DT_SHARED_FILE: constants.LD_FILE,
8691   constants.DT_BLOCK: constants.LD_BLOCKDEV,
8692   constants.DT_RBD: constants.LD_RBD,
8693   }
8694
8695
8696 def _GenerateDiskTemplate(lu, template_name, instance_name, primary_node,
8697     secondary_nodes, disk_info, file_storage_dir, file_driver, base_index,
8698     feedback_fn, disk_params,
8699     _req_file_storage=opcodes.RequireFileStorage,
8700     _req_shr_file_storage=opcodes.RequireSharedFileStorage):
8701   """Generate the entire disk layout for a given template type.
8702
8703   """
8704   #TODO: compute space requirements
8705
8706   vgname = lu.cfg.GetVGName()
8707   disk_count = len(disk_info)
8708   disks = []
8709   ld_params = _ComputeLDParams(template_name, disk_params)
8710
8711   if template_name == constants.DT_DISKLESS:
8712     pass
8713   elif template_name == constants.DT_DRBD8:
8714     drbd_params, data_params, meta_params = ld_params
8715     if len(secondary_nodes) != 1:
8716       raise errors.ProgrammerError("Wrong template configuration")
8717     remote_node = secondary_nodes[0]
8718     minors = lu.cfg.AllocateDRBDMinor(
8719       [primary_node, remote_node] * len(disk_info), instance_name)
8720
8721     names = []
8722     for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
8723                                                for i in range(disk_count)]):
8724       names.append(lv_prefix + "_data")
8725       names.append(lv_prefix + "_meta")
8726     for idx, disk in enumerate(disk_info):
8727       disk_index = idx + base_index
8728       drbd_default_metavg = drbd_params[constants.LDP_DEFAULT_METAVG]
8729       data_vg = disk.get(constants.IDISK_VG, vgname)
8730       meta_vg = disk.get(constants.IDISK_METAVG, drbd_default_metavg)
8731       disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
8732                                       disk[constants.IDISK_SIZE],
8733                                       [data_vg, meta_vg],
8734                                       names[idx * 2:idx * 2 + 2],
8735                                       "disk/%d" % disk_index,
8736                                       minors[idx * 2], minors[idx * 2 + 1],
8737                                       drbd_params, data_params, meta_params)
8738       disk_dev.mode = disk[constants.IDISK_MODE]
8739       disks.append(disk_dev)
8740   else:
8741     if secondary_nodes:
8742       raise errors.ProgrammerError("Wrong template configuration")
8743
8744     if template_name == constants.DT_FILE:
8745       _req_file_storage()
8746     elif template_name == constants.DT_SHARED_FILE:
8747       _req_shr_file_storage()
8748
8749     name_prefix = _DISK_TEMPLATE_NAME_PREFIX.get(template_name, None)
8750     if name_prefix is None:
8751       names = None
8752     else:
8753       names = _GenerateUniqueNames(lu, ["%s.disk%s" %
8754                                         (name_prefix, base_index + i)
8755                                         for i in range(disk_count)])
8756
8757     dev_type = _DISK_TEMPLATE_DEVICE_TYPE[template_name]
8758
8759     if template_name == constants.DT_PLAIN:
8760       def logical_id_fn(idx, _, disk):
8761         vg = disk.get(constants.IDISK_VG, vgname)
8762         return (vg, names[idx])
8763     elif template_name in (constants.DT_FILE, constants.DT_SHARED_FILE):
8764       logical_id_fn = \
8765         lambda _, disk_index, disk: (file_driver,
8766                                      "%s/disk%d" % (file_storage_dir,
8767                                                     disk_index))
8768     elif template_name == constants.DT_BLOCK:
8769       logical_id_fn = \
8770         lambda idx, disk_index, disk: (constants.BLOCKDEV_DRIVER_MANUAL,
8771                                        disk[constants.IDISK_ADOPT])
8772     elif template_name == constants.DT_RBD:
8773       logical_id_fn = lambda idx, _, disk: ("rbd", names[idx])
8774     else:
8775       raise errors.ProgrammerError("Unknown disk template '%s'" % template_name)
8776
8777     for idx, disk in enumerate(disk_info):
8778       disk_index = idx + base_index
8779       size = disk[constants.IDISK_SIZE]
8780       feedback_fn("* disk %s, size %s" %
8781                   (disk_index, utils.FormatUnit(size, "h")))
8782       disks.append(objects.Disk(dev_type=dev_type, size=size,
8783                                 logical_id=logical_id_fn(idx, disk_index, disk),
8784                                 iv_name="disk/%d" % disk_index,
8785                                 mode=disk[constants.IDISK_MODE],
8786                                 params=ld_params[0]))
8787
8788   return disks
8789
8790
8791 def _GetInstanceInfoText(instance):
8792   """Compute that text that should be added to the disk's metadata.
8793
8794   """
8795   return "originstname+%s" % instance.name
8796
8797
8798 def _CalcEta(time_taken, written, total_size):
8799   """Calculates the ETA based on size written and total size.
8800
8801   @param time_taken: The time taken so far
8802   @param written: amount written so far
8803   @param total_size: The total size of data to be written
8804   @return: The remaining time in seconds
8805
8806   """
8807   avg_time = time_taken / float(written)
8808   return (total_size - written) * avg_time
8809
8810
8811 def _WipeDisks(lu, instance):
8812   """Wipes instance disks.
8813
8814   @type lu: L{LogicalUnit}
8815   @param lu: the logical unit on whose behalf we execute
8816   @type instance: L{objects.Instance}
8817   @param instance: the instance whose disks we should create
8818   @return: the success of the wipe
8819
8820   """
8821   node = instance.primary_node
8822
8823   for device in instance.disks:
8824     lu.cfg.SetDiskID(device, node)
8825
8826   logging.info("Pause sync of instance %s disks", instance.name)
8827   result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
8828
8829   for idx, success in enumerate(result.payload):
8830     if not success:
8831       logging.warn("pause-sync of instance %s for disks %d failed",
8832                    instance.name, idx)
8833
8834   try:
8835     for idx, device in enumerate(instance.disks):
8836       # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
8837       # MAX_WIPE_CHUNK at max
8838       wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
8839                             constants.MIN_WIPE_CHUNK_PERCENT)
8840       # we _must_ make this an int, otherwise rounding errors will
8841       # occur
8842       wipe_chunk_size = int(wipe_chunk_size)
8843
8844       lu.LogInfo("* Wiping disk %d", idx)
8845       logging.info("Wiping disk %d for instance %s, node %s using"
8846                    " chunk size %s", idx, instance.name, node, wipe_chunk_size)
8847
8848       offset = 0
8849       size = device.size
8850       last_output = 0
8851       start_time = time.time()
8852
8853       while offset < size:
8854         wipe_size = min(wipe_chunk_size, size - offset)
8855         logging.debug("Wiping disk %d, offset %s, chunk %s",
8856                       idx, offset, wipe_size)
8857         result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
8858         result.Raise("Could not wipe disk %d at offset %d for size %d" %
8859                      (idx, offset, wipe_size))
8860         now = time.time()
8861         offset += wipe_size
8862         if now - last_output >= 60:
8863           eta = _CalcEta(now - start_time, offset, size)
8864           lu.LogInfo(" - done: %.1f%% ETA: %s" %
8865                      (offset / float(size) * 100, utils.FormatSeconds(eta)))
8866           last_output = now
8867   finally:
8868     logging.info("Resume sync of instance %s disks", instance.name)
8869
8870     result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
8871
8872     for idx, success in enumerate(result.payload):
8873       if not success:
8874         lu.LogWarning("Resume sync of disk %d failed, please have a"
8875                       " look at the status and troubleshoot the issue", idx)
8876         logging.warn("resume-sync of instance %s for disks %d failed",
8877                      instance.name, idx)
8878
8879
8880 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
8881   """Create all disks for an instance.
8882
8883   This abstracts away some work from AddInstance.
8884
8885   @type lu: L{LogicalUnit}
8886   @param lu: the logical unit on whose behalf we execute
8887   @type instance: L{objects.Instance}
8888   @param instance: the instance whose disks we should create
8889   @type to_skip: list
8890   @param to_skip: list of indices to skip
8891   @type target_node: string
8892   @param target_node: if passed, overrides the target node for creation
8893   @rtype: boolean
8894   @return: the success of the creation
8895
8896   """
8897   info = _GetInstanceInfoText(instance)
8898   if target_node is None:
8899     pnode = instance.primary_node
8900     all_nodes = instance.all_nodes
8901   else:
8902     pnode = target_node
8903     all_nodes = [pnode]
8904
8905   if instance.disk_template in constants.DTS_FILEBASED:
8906     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8907     result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
8908
8909     result.Raise("Failed to create directory '%s' on"
8910                  " node %s" % (file_storage_dir, pnode))
8911
8912   # Note: this needs to be kept in sync with adding of disks in
8913   # LUInstanceSetParams
8914   for idx, device in enumerate(instance.disks):
8915     if to_skip and idx in to_skip:
8916       continue
8917     logging.info("Creating disk %s for instance '%s'", idx, instance.name)
8918     #HARDCODE
8919     for node in all_nodes:
8920       f_create = node == pnode
8921       _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
8922
8923
8924 def _RemoveDisks(lu, instance, target_node=None):
8925   """Remove all disks for an instance.
8926
8927   This abstracts away some work from `AddInstance()` and
8928   `RemoveInstance()`. Note that in case some of the devices couldn't
8929   be removed, the removal will continue with the other ones (compare
8930   with `_CreateDisks()`).
8931
8932   @type lu: L{LogicalUnit}
8933   @param lu: the logical unit on whose behalf we execute
8934   @type instance: L{objects.Instance}
8935   @param instance: the instance whose disks we should remove
8936   @type target_node: string
8937   @param target_node: used to override the node on which to remove the disks
8938   @rtype: boolean
8939   @return: the success of the removal
8940
8941   """
8942   logging.info("Removing block devices for instance %s", instance.name)
8943
8944   all_result = True
8945   for (idx, device) in enumerate(instance.disks):
8946     if target_node:
8947       edata = [(target_node, device)]
8948     else:
8949       edata = device.ComputeNodeTree(instance.primary_node)
8950     for node, disk in edata:
8951       lu.cfg.SetDiskID(disk, node)
8952       msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
8953       if msg:
8954         lu.LogWarning("Could not remove disk %s on node %s,"
8955                       " continuing anyway: %s", idx, node, msg)
8956         all_result = False
8957
8958     # if this is a DRBD disk, return its port to the pool
8959     if device.dev_type in constants.LDS_DRBD:
8960       tcp_port = device.logical_id[2]
8961       lu.cfg.AddTcpUdpPort(tcp_port)
8962
8963   if instance.disk_template == constants.DT_FILE:
8964     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8965     if target_node:
8966       tgt = target_node
8967     else:
8968       tgt = instance.primary_node
8969     result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
8970     if result.fail_msg:
8971       lu.LogWarning("Could not remove directory '%s' on node %s: %s",
8972                     file_storage_dir, instance.primary_node, result.fail_msg)
8973       all_result = False
8974
8975   return all_result
8976
8977
8978 def _ComputeDiskSizePerVG(disk_template, disks):
8979   """Compute disk size requirements in the volume group
8980
8981   """
8982   def _compute(disks, payload):
8983     """Universal algorithm.
8984
8985     """
8986     vgs = {}
8987     for disk in disks:
8988       vgs[disk[constants.IDISK_VG]] = \
8989         vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
8990
8991     return vgs
8992
8993   # Required free disk space as a function of disk and swap space
8994   req_size_dict = {
8995     constants.DT_DISKLESS: {},
8996     constants.DT_PLAIN: _compute(disks, 0),
8997     # 128 MB are added for drbd metadata for each disk
8998     constants.DT_DRBD8: _compute(disks, DRBD_META_SIZE),
8999     constants.DT_FILE: {},
9000     constants.DT_SHARED_FILE: {},
9001   }
9002
9003   if disk_template not in req_size_dict:
9004     raise errors.ProgrammerError("Disk template '%s' size requirement"
9005                                  " is unknown" % disk_template)
9006
9007   return req_size_dict[disk_template]
9008
9009
9010 def _ComputeDiskSize(disk_template, disks):
9011   """Compute disk size requirements in the volume group
9012
9013   """
9014   # Required free disk space as a function of disk and swap space
9015   req_size_dict = {
9016     constants.DT_DISKLESS: None,
9017     constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
9018     # 128 MB are added for drbd metadata for each disk
9019     constants.DT_DRBD8:
9020       sum(d[constants.IDISK_SIZE] + DRBD_META_SIZE for d in disks),
9021     constants.DT_FILE: None,
9022     constants.DT_SHARED_FILE: 0,
9023     constants.DT_BLOCK: 0,
9024     constants.DT_RBD: 0,
9025   }
9026
9027   if disk_template not in req_size_dict:
9028     raise errors.ProgrammerError("Disk template '%s' size requirement"
9029                                  " is unknown" % disk_template)
9030
9031   return req_size_dict[disk_template]
9032
9033
9034 def _FilterVmNodes(lu, nodenames):
9035   """Filters out non-vm_capable nodes from a list.
9036
9037   @type lu: L{LogicalUnit}
9038   @param lu: the logical unit for which we check
9039   @type nodenames: list
9040   @param nodenames: the list of nodes on which we should check
9041   @rtype: list
9042   @return: the list of vm-capable nodes
9043
9044   """
9045   vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
9046   return [name for name in nodenames if name not in vm_nodes]
9047
9048
9049 def _CheckHVParams(lu, nodenames, hvname, hvparams):
9050   """Hypervisor parameter validation.
9051
9052   This function abstract the hypervisor parameter validation to be
9053   used in both instance create and instance modify.
9054
9055   @type lu: L{LogicalUnit}
9056   @param lu: the logical unit for which we check
9057   @type nodenames: list
9058   @param nodenames: the list of nodes on which we should check
9059   @type hvname: string
9060   @param hvname: the name of the hypervisor we should use
9061   @type hvparams: dict
9062   @param hvparams: the parameters which we need to check
9063   @raise errors.OpPrereqError: if the parameters are not valid
9064
9065   """
9066   nodenames = _FilterVmNodes(lu, nodenames)
9067
9068   cluster = lu.cfg.GetClusterInfo()
9069   hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams)
9070
9071   hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, hvname, hvfull)
9072   for node in nodenames:
9073     info = hvinfo[node]
9074     if info.offline:
9075       continue
9076     info.Raise("Hypervisor parameter validation failed on node %s" % node)
9077
9078
9079 def _CheckOSParams(lu, required, nodenames, osname, osparams):
9080   """OS parameters validation.
9081
9082   @type lu: L{LogicalUnit}
9083   @param lu: the logical unit for which we check
9084   @type required: boolean
9085   @param required: whether the validation should fail if the OS is not
9086       found
9087   @type nodenames: list
9088   @param nodenames: the list of nodes on which we should check
9089   @type osname: string
9090   @param osname: the name of the hypervisor we should use
9091   @type osparams: dict
9092   @param osparams: the parameters which we need to check
9093   @raise errors.OpPrereqError: if the parameters are not valid
9094
9095   """
9096   nodenames = _FilterVmNodes(lu, nodenames)
9097   result = lu.rpc.call_os_validate(nodenames, required, osname,
9098                                    [constants.OS_VALIDATE_PARAMETERS],
9099                                    osparams)
9100   for node, nres in result.items():
9101     # we don't check for offline cases since this should be run only
9102     # against the master node and/or an instance's nodes
9103     nres.Raise("OS Parameters validation failed on node %s" % node)
9104     if not nres.payload:
9105       lu.LogInfo("OS %s not found on node %s, validation skipped",
9106                  osname, node)
9107
9108
9109 class LUInstanceCreate(LogicalUnit):
9110   """Create an instance.
9111
9112   """
9113   HPATH = "instance-add"
9114   HTYPE = constants.HTYPE_INSTANCE
9115   REQ_BGL = False
9116
9117   def CheckArguments(self):
9118     """Check arguments.
9119
9120     """
9121     # do not require name_check to ease forward/backward compatibility
9122     # for tools
9123     if self.op.no_install and self.op.start:
9124       self.LogInfo("No-installation mode selected, disabling startup")
9125       self.op.start = False
9126     # validate/normalize the instance name
9127     self.op.instance_name = \
9128       netutils.Hostname.GetNormalizedName(self.op.instance_name)
9129
9130     if self.op.ip_check and not self.op.name_check:
9131       # TODO: make the ip check more flexible and not depend on the name check
9132       raise errors.OpPrereqError("Cannot do IP address check without a name"
9133                                  " check", errors.ECODE_INVAL)
9134
9135     # check nics' parameter names
9136     for nic in self.op.nics:
9137       utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
9138
9139     # check disks. parameter names and consistent adopt/no-adopt strategy
9140     has_adopt = has_no_adopt = False
9141     for disk in self.op.disks:
9142       utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
9143       if constants.IDISK_ADOPT in disk:
9144         has_adopt = True
9145       else:
9146         has_no_adopt = True
9147     if has_adopt and has_no_adopt:
9148       raise errors.OpPrereqError("Either all disks are adopted or none is",
9149                                  errors.ECODE_INVAL)
9150     if has_adopt:
9151       if self.op.disk_template not in constants.DTS_MAY_ADOPT:
9152         raise errors.OpPrereqError("Disk adoption is not supported for the"
9153                                    " '%s' disk template" %
9154                                    self.op.disk_template,
9155                                    errors.ECODE_INVAL)
9156       if self.op.iallocator is not None:
9157         raise errors.OpPrereqError("Disk adoption not allowed with an"
9158                                    " iallocator script", errors.ECODE_INVAL)
9159       if self.op.mode == constants.INSTANCE_IMPORT:
9160         raise errors.OpPrereqError("Disk adoption not allowed for"
9161                                    " instance import", errors.ECODE_INVAL)
9162     else:
9163       if self.op.disk_template in constants.DTS_MUST_ADOPT:
9164         raise errors.OpPrereqError("Disk template %s requires disk adoption,"
9165                                    " but no 'adopt' parameter given" %
9166                                    self.op.disk_template,
9167                                    errors.ECODE_INVAL)
9168
9169     self.adopt_disks = has_adopt
9170
9171     # instance name verification
9172     if self.op.name_check:
9173       self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
9174       self.op.instance_name = self.hostname1.name
9175       # used in CheckPrereq for ip ping check
9176       self.check_ip = self.hostname1.ip
9177     else:
9178       self.check_ip = None
9179
9180     # file storage checks
9181     if (self.op.file_driver and
9182         not self.op.file_driver in constants.FILE_DRIVER):
9183       raise errors.OpPrereqError("Invalid file driver name '%s'" %
9184                                  self.op.file_driver, errors.ECODE_INVAL)
9185
9186     if self.op.disk_template == constants.DT_FILE:
9187       opcodes.RequireFileStorage()
9188     elif self.op.disk_template == constants.DT_SHARED_FILE:
9189       opcodes.RequireSharedFileStorage()
9190
9191     ### Node/iallocator related checks
9192     _CheckIAllocatorOrNode(self, "iallocator", "pnode")
9193
9194     if self.op.pnode is not None:
9195       if self.op.disk_template in constants.DTS_INT_MIRROR:
9196         if self.op.snode is None:
9197           raise errors.OpPrereqError("The networked disk templates need"
9198                                      " a mirror node", errors.ECODE_INVAL)
9199       elif self.op.snode:
9200         self.LogWarning("Secondary node will be ignored on non-mirrored disk"
9201                         " template")
9202         self.op.snode = None
9203
9204     self._cds = _GetClusterDomainSecret()
9205
9206     if self.op.mode == constants.INSTANCE_IMPORT:
9207       # On import force_variant must be True, because if we forced it at
9208       # initial install, our only chance when importing it back is that it
9209       # works again!
9210       self.op.force_variant = True
9211
9212       if self.op.no_install:
9213         self.LogInfo("No-installation mode has no effect during import")
9214
9215     elif self.op.mode == constants.INSTANCE_CREATE:
9216       if self.op.os_type is None:
9217         raise errors.OpPrereqError("No guest OS specified",
9218                                    errors.ECODE_INVAL)
9219       if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
9220         raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
9221                                    " installation" % self.op.os_type,
9222                                    errors.ECODE_STATE)
9223       if self.op.disk_template is None:
9224         raise errors.OpPrereqError("No disk template specified",
9225                                    errors.ECODE_INVAL)
9226
9227     elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9228       # Check handshake to ensure both clusters have the same domain secret
9229       src_handshake = self.op.source_handshake
9230       if not src_handshake:
9231         raise errors.OpPrereqError("Missing source handshake",
9232                                    errors.ECODE_INVAL)
9233
9234       errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
9235                                                            src_handshake)
9236       if errmsg:
9237         raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
9238                                    errors.ECODE_INVAL)
9239
9240       # Load and check source CA
9241       self.source_x509_ca_pem = self.op.source_x509_ca
9242       if not self.source_x509_ca_pem:
9243         raise errors.OpPrereqError("Missing source X509 CA",
9244                                    errors.ECODE_INVAL)
9245
9246       try:
9247         (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
9248                                                     self._cds)
9249       except OpenSSL.crypto.Error, err:
9250         raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
9251                                    (err, ), errors.ECODE_INVAL)
9252
9253       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9254       if errcode is not None:
9255         raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
9256                                    errors.ECODE_INVAL)
9257
9258       self.source_x509_ca = cert
9259
9260       src_instance_name = self.op.source_instance_name
9261       if not src_instance_name:
9262         raise errors.OpPrereqError("Missing source instance name",
9263                                    errors.ECODE_INVAL)
9264
9265       self.source_instance_name = \
9266           netutils.GetHostname(name=src_instance_name).name
9267
9268     else:
9269       raise errors.OpPrereqError("Invalid instance creation mode %r" %
9270                                  self.op.mode, errors.ECODE_INVAL)
9271
9272   def ExpandNames(self):
9273     """ExpandNames for CreateInstance.
9274
9275     Figure out the right locks for instance creation.
9276
9277     """
9278     self.needed_locks = {}
9279
9280     instance_name = self.op.instance_name
9281     # this is just a preventive check, but someone might still add this
9282     # instance in the meantime, and creation will fail at lock-add time
9283     if instance_name in self.cfg.GetInstanceList():
9284       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
9285                                  instance_name, errors.ECODE_EXISTS)
9286
9287     self.add_locks[locking.LEVEL_INSTANCE] = instance_name
9288
9289     if self.op.iallocator:
9290       # TODO: Find a solution to not lock all nodes in the cluster, e.g. by
9291       # specifying a group on instance creation and then selecting nodes from
9292       # that group
9293       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9294       self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
9295     else:
9296       self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
9297       nodelist = [self.op.pnode]
9298       if self.op.snode is not None:
9299         self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
9300         nodelist.append(self.op.snode)
9301       self.needed_locks[locking.LEVEL_NODE] = nodelist
9302       # Lock resources of instance's primary and secondary nodes (copy to
9303       # prevent accidential modification)
9304       self.needed_locks[locking.LEVEL_NODE_RES] = list(nodelist)
9305
9306     # in case of import lock the source node too
9307     if self.op.mode == constants.INSTANCE_IMPORT:
9308       src_node = self.op.src_node
9309       src_path = self.op.src_path
9310
9311       if src_path is None:
9312         self.op.src_path = src_path = self.op.instance_name
9313
9314       if src_node is None:
9315         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9316         self.op.src_node = None
9317         if os.path.isabs(src_path):
9318           raise errors.OpPrereqError("Importing an instance from a path"
9319                                      " requires a source node option",
9320                                      errors.ECODE_INVAL)
9321       else:
9322         self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
9323         if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
9324           self.needed_locks[locking.LEVEL_NODE].append(src_node)
9325         if not os.path.isabs(src_path):
9326           self.op.src_path = src_path = \
9327             utils.PathJoin(constants.EXPORT_DIR, src_path)
9328
9329   def _RunAllocator(self):
9330     """Run the allocator based on input opcode.
9331
9332     """
9333     nics = [n.ToDict() for n in self.nics]
9334     ial = IAllocator(self.cfg, self.rpc,
9335                      mode=constants.IALLOCATOR_MODE_ALLOC,
9336                      name=self.op.instance_name,
9337                      disk_template=self.op.disk_template,
9338                      tags=self.op.tags,
9339                      os=self.op.os_type,
9340                      vcpus=self.be_full[constants.BE_VCPUS],
9341                      memory=self.be_full[constants.BE_MAXMEM],
9342                      disks=self.disks,
9343                      nics=nics,
9344                      hypervisor=self.op.hypervisor,
9345                      )
9346
9347     ial.Run(self.op.iallocator)
9348
9349     if not ial.success:
9350       raise errors.OpPrereqError("Can't compute nodes using"
9351                                  " iallocator '%s': %s" %
9352                                  (self.op.iallocator, ial.info),
9353                                  errors.ECODE_NORES)
9354     if len(ial.result) != ial.required_nodes:
9355       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
9356                                  " of nodes (%s), required %s" %
9357                                  (self.op.iallocator, len(ial.result),
9358                                   ial.required_nodes), errors.ECODE_FAULT)
9359     self.op.pnode = ial.result[0]
9360     self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
9361                  self.op.instance_name, self.op.iallocator,
9362                  utils.CommaJoin(ial.result))
9363     if ial.required_nodes == 2:
9364       self.op.snode = ial.result[1]
9365
9366   def BuildHooksEnv(self):
9367     """Build hooks env.
9368
9369     This runs on master, primary and secondary nodes of the instance.
9370
9371     """
9372     env = {
9373       "ADD_MODE": self.op.mode,
9374       }
9375     if self.op.mode == constants.INSTANCE_IMPORT:
9376       env["SRC_NODE"] = self.op.src_node
9377       env["SRC_PATH"] = self.op.src_path
9378       env["SRC_IMAGES"] = self.src_images
9379
9380     env.update(_BuildInstanceHookEnv(
9381       name=self.op.instance_name,
9382       primary_node=self.op.pnode,
9383       secondary_nodes=self.secondaries,
9384       status=self.op.start,
9385       os_type=self.op.os_type,
9386       minmem=self.be_full[constants.BE_MINMEM],
9387       maxmem=self.be_full[constants.BE_MAXMEM],
9388       vcpus=self.be_full[constants.BE_VCPUS],
9389       nics=_NICListToTuple(self, self.nics),
9390       disk_template=self.op.disk_template,
9391       disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
9392              for d in self.disks],
9393       bep=self.be_full,
9394       hvp=self.hv_full,
9395       hypervisor_name=self.op.hypervisor,
9396       tags=self.op.tags,
9397     ))
9398
9399     return env
9400
9401   def BuildHooksNodes(self):
9402     """Build hooks nodes.
9403
9404     """
9405     nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
9406     return nl, nl
9407
9408   def _ReadExportInfo(self):
9409     """Reads the export information from disk.
9410
9411     It will override the opcode source node and path with the actual
9412     information, if these two were not specified before.
9413
9414     @return: the export information
9415
9416     """
9417     assert self.op.mode == constants.INSTANCE_IMPORT
9418
9419     src_node = self.op.src_node
9420     src_path = self.op.src_path
9421
9422     if src_node is None:
9423       locked_nodes = self.owned_locks(locking.LEVEL_NODE)
9424       exp_list = self.rpc.call_export_list(locked_nodes)
9425       found = False
9426       for node in exp_list:
9427         if exp_list[node].fail_msg:
9428           continue
9429         if src_path in exp_list[node].payload:
9430           found = True
9431           self.op.src_node = src_node = node
9432           self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
9433                                                        src_path)
9434           break
9435       if not found:
9436         raise errors.OpPrereqError("No export found for relative path %s" %
9437                                     src_path, errors.ECODE_INVAL)
9438
9439     _CheckNodeOnline(self, src_node)
9440     result = self.rpc.call_export_info(src_node, src_path)
9441     result.Raise("No export or invalid export found in dir %s" % src_path)
9442
9443     export_info = objects.SerializableConfigParser.Loads(str(result.payload))
9444     if not export_info.has_section(constants.INISECT_EXP):
9445       raise errors.ProgrammerError("Corrupted export config",
9446                                    errors.ECODE_ENVIRON)
9447
9448     ei_version = export_info.get(constants.INISECT_EXP, "version")
9449     if (int(ei_version) != constants.EXPORT_VERSION):
9450       raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
9451                                  (ei_version, constants.EXPORT_VERSION),
9452                                  errors.ECODE_ENVIRON)
9453     return export_info
9454
9455   def _ReadExportParams(self, einfo):
9456     """Use export parameters as defaults.
9457
9458     In case the opcode doesn't specify (as in override) some instance
9459     parameters, then try to use them from the export information, if
9460     that declares them.
9461
9462     """
9463     self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
9464
9465     if self.op.disk_template is None:
9466       if einfo.has_option(constants.INISECT_INS, "disk_template"):
9467         self.op.disk_template = einfo.get(constants.INISECT_INS,
9468                                           "disk_template")
9469         if self.op.disk_template not in constants.DISK_TEMPLATES:
9470           raise errors.OpPrereqError("Disk template specified in configuration"
9471                                      " file is not one of the allowed values:"
9472                                      " %s" % " ".join(constants.DISK_TEMPLATES))
9473       else:
9474         raise errors.OpPrereqError("No disk template specified and the export"
9475                                    " is missing the disk_template information",
9476                                    errors.ECODE_INVAL)
9477
9478     if not self.op.disks:
9479       disks = []
9480       # TODO: import the disk iv_name too
9481       for idx in range(constants.MAX_DISKS):
9482         if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
9483           disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
9484           disks.append({constants.IDISK_SIZE: disk_sz})
9485       self.op.disks = disks
9486       if not disks and self.op.disk_template != constants.DT_DISKLESS:
9487         raise errors.OpPrereqError("No disk info specified and the export"
9488                                    " is missing the disk information",
9489                                    errors.ECODE_INVAL)
9490
9491     if not self.op.nics:
9492       nics = []
9493       for idx in range(constants.MAX_NICS):
9494         if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
9495           ndict = {}
9496           for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
9497             v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
9498             ndict[name] = v
9499           nics.append(ndict)
9500         else:
9501           break
9502       self.op.nics = nics
9503
9504     if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
9505       self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
9506
9507     if (self.op.hypervisor is None and
9508         einfo.has_option(constants.INISECT_INS, "hypervisor")):
9509       self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
9510
9511     if einfo.has_section(constants.INISECT_HYP):
9512       # use the export parameters but do not override the ones
9513       # specified by the user
9514       for name, value in einfo.items(constants.INISECT_HYP):
9515         if name not in self.op.hvparams:
9516           self.op.hvparams[name] = value
9517
9518     if einfo.has_section(constants.INISECT_BEP):
9519       # use the parameters, without overriding
9520       for name, value in einfo.items(constants.INISECT_BEP):
9521         if name not in self.op.beparams:
9522           self.op.beparams[name] = value
9523         # Compatibility for the old "memory" be param
9524         if name == constants.BE_MEMORY:
9525           if constants.BE_MAXMEM not in self.op.beparams:
9526             self.op.beparams[constants.BE_MAXMEM] = value
9527           if constants.BE_MINMEM not in self.op.beparams:
9528             self.op.beparams[constants.BE_MINMEM] = value
9529     else:
9530       # try to read the parameters old style, from the main section
9531       for name in constants.BES_PARAMETERS:
9532         if (name not in self.op.beparams and
9533             einfo.has_option(constants.INISECT_INS, name)):
9534           self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
9535
9536     if einfo.has_section(constants.INISECT_OSP):
9537       # use the parameters, without overriding
9538       for name, value in einfo.items(constants.INISECT_OSP):
9539         if name not in self.op.osparams:
9540           self.op.osparams[name] = value
9541
9542   def _RevertToDefaults(self, cluster):
9543     """Revert the instance parameters to the default values.
9544
9545     """
9546     # hvparams
9547     hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
9548     for name in self.op.hvparams.keys():
9549       if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
9550         del self.op.hvparams[name]
9551     # beparams
9552     be_defs = cluster.SimpleFillBE({})
9553     for name in self.op.beparams.keys():
9554       if name in be_defs and be_defs[name] == self.op.beparams[name]:
9555         del self.op.beparams[name]
9556     # nic params
9557     nic_defs = cluster.SimpleFillNIC({})
9558     for nic in self.op.nics:
9559       for name in constants.NICS_PARAMETERS:
9560         if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
9561           del nic[name]
9562     # osparams
9563     os_defs = cluster.SimpleFillOS(self.op.os_type, {})
9564     for name in self.op.osparams.keys():
9565       if name in os_defs and os_defs[name] == self.op.osparams[name]:
9566         del self.op.osparams[name]
9567
9568   def _CalculateFileStorageDir(self):
9569     """Calculate final instance file storage dir.
9570
9571     """
9572     # file storage dir calculation/check
9573     self.instance_file_storage_dir = None
9574     if self.op.disk_template in constants.DTS_FILEBASED:
9575       # build the full file storage dir path
9576       joinargs = []
9577
9578       if self.op.disk_template == constants.DT_SHARED_FILE:
9579         get_fsd_fn = self.cfg.GetSharedFileStorageDir
9580       else:
9581         get_fsd_fn = self.cfg.GetFileStorageDir
9582
9583       cfg_storagedir = get_fsd_fn()
9584       if not cfg_storagedir:
9585         raise errors.OpPrereqError("Cluster file storage dir not defined")
9586       joinargs.append(cfg_storagedir)
9587
9588       if self.op.file_storage_dir is not None:
9589         joinargs.append(self.op.file_storage_dir)
9590
9591       joinargs.append(self.op.instance_name)
9592
9593       # pylint: disable=W0142
9594       self.instance_file_storage_dir = utils.PathJoin(*joinargs)
9595
9596   def CheckPrereq(self): # pylint: disable=R0914
9597     """Check prerequisites.
9598
9599     """
9600     self._CalculateFileStorageDir()
9601
9602     if self.op.mode == constants.INSTANCE_IMPORT:
9603       export_info = self._ReadExportInfo()
9604       self._ReadExportParams(export_info)
9605
9606     if (not self.cfg.GetVGName() and
9607         self.op.disk_template not in constants.DTS_NOT_LVM):
9608       raise errors.OpPrereqError("Cluster does not support lvm-based"
9609                                  " instances", errors.ECODE_STATE)
9610
9611     if (self.op.hypervisor is None or
9612         self.op.hypervisor == constants.VALUE_AUTO):
9613       self.op.hypervisor = self.cfg.GetHypervisorType()
9614
9615     cluster = self.cfg.GetClusterInfo()
9616     enabled_hvs = cluster.enabled_hypervisors
9617     if self.op.hypervisor not in enabled_hvs:
9618       raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
9619                                  " cluster (%s)" % (self.op.hypervisor,
9620                                   ",".join(enabled_hvs)),
9621                                  errors.ECODE_STATE)
9622
9623     # Check tag validity
9624     for tag in self.op.tags:
9625       objects.TaggableObject.ValidateTag(tag)
9626
9627     # check hypervisor parameter syntax (locally)
9628     utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
9629     filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
9630                                       self.op.hvparams)
9631     hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
9632     hv_type.CheckParameterSyntax(filled_hvp)
9633     self.hv_full = filled_hvp
9634     # check that we don't specify global parameters on an instance
9635     _CheckGlobalHvParams(self.op.hvparams)
9636
9637     # fill and remember the beparams dict
9638     default_beparams = cluster.beparams[constants.PP_DEFAULT]
9639     for param, value in self.op.beparams.iteritems():
9640       if value == constants.VALUE_AUTO:
9641         self.op.beparams[param] = default_beparams[param]
9642     objects.UpgradeBeParams(self.op.beparams)
9643     utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
9644     self.be_full = cluster.SimpleFillBE(self.op.beparams)
9645
9646     # build os parameters
9647     self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
9648
9649     # now that hvp/bep are in final format, let's reset to defaults,
9650     # if told to do so
9651     if self.op.identify_defaults:
9652       self._RevertToDefaults(cluster)
9653
9654     # NIC buildup
9655     self.nics = []
9656     for idx, nic in enumerate(self.op.nics):
9657       nic_mode_req = nic.get(constants.INIC_MODE, None)
9658       nic_mode = nic_mode_req
9659       if nic_mode is None or nic_mode == constants.VALUE_AUTO:
9660         nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
9661
9662       # in routed mode, for the first nic, the default ip is 'auto'
9663       if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
9664         default_ip_mode = constants.VALUE_AUTO
9665       else:
9666         default_ip_mode = constants.VALUE_NONE
9667
9668       # ip validity checks
9669       ip = nic.get(constants.INIC_IP, default_ip_mode)
9670       if ip is None or ip.lower() == constants.VALUE_NONE:
9671         nic_ip = None
9672       elif ip.lower() == constants.VALUE_AUTO:
9673         if not self.op.name_check:
9674           raise errors.OpPrereqError("IP address set to auto but name checks"
9675                                      " have been skipped",
9676                                      errors.ECODE_INVAL)
9677         nic_ip = self.hostname1.ip
9678       else:
9679         if not netutils.IPAddress.IsValid(ip):
9680           raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
9681                                      errors.ECODE_INVAL)
9682         nic_ip = ip
9683
9684       # TODO: check the ip address for uniqueness
9685       if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
9686         raise errors.OpPrereqError("Routed nic mode requires an ip address",
9687                                    errors.ECODE_INVAL)
9688
9689       # MAC address verification
9690       mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
9691       if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9692         mac = utils.NormalizeAndValidateMac(mac)
9693
9694         try:
9695           self.cfg.ReserveMAC(mac, self.proc.GetECId())
9696         except errors.ReservationError:
9697           raise errors.OpPrereqError("MAC address %s already in use"
9698                                      " in cluster" % mac,
9699                                      errors.ECODE_NOTUNIQUE)
9700
9701       #  Build nic parameters
9702       link = nic.get(constants.INIC_LINK, None)
9703       if link == constants.VALUE_AUTO:
9704         link = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_LINK]
9705       nicparams = {}
9706       if nic_mode_req:
9707         nicparams[constants.NIC_MODE] = nic_mode
9708       if link:
9709         nicparams[constants.NIC_LINK] = link
9710
9711       check_params = cluster.SimpleFillNIC(nicparams)
9712       objects.NIC.CheckParameterSyntax(check_params)
9713       self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
9714
9715     # disk checks/pre-build
9716     default_vg = self.cfg.GetVGName()
9717     self.disks = []
9718     for disk in self.op.disks:
9719       mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
9720       if mode not in constants.DISK_ACCESS_SET:
9721         raise errors.OpPrereqError("Invalid disk access mode '%s'" %
9722                                    mode, errors.ECODE_INVAL)
9723       size = disk.get(constants.IDISK_SIZE, None)
9724       if size is None:
9725         raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
9726       try:
9727         size = int(size)
9728       except (TypeError, ValueError):
9729         raise errors.OpPrereqError("Invalid disk size '%s'" % size,
9730                                    errors.ECODE_INVAL)
9731
9732       data_vg = disk.get(constants.IDISK_VG, default_vg)
9733       new_disk = {
9734         constants.IDISK_SIZE: size,
9735         constants.IDISK_MODE: mode,
9736         constants.IDISK_VG: data_vg,
9737         }
9738       if constants.IDISK_METAVG in disk:
9739         new_disk[constants.IDISK_METAVG] = disk[constants.IDISK_METAVG]
9740       if constants.IDISK_ADOPT in disk:
9741         new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
9742       self.disks.append(new_disk)
9743
9744     if self.op.mode == constants.INSTANCE_IMPORT:
9745       disk_images = []
9746       for idx in range(len(self.disks)):
9747         option = "disk%d_dump" % idx
9748         if export_info.has_option(constants.INISECT_INS, option):
9749           # FIXME: are the old os-es, disk sizes, etc. useful?
9750           export_name = export_info.get(constants.INISECT_INS, option)
9751           image = utils.PathJoin(self.op.src_path, export_name)
9752           disk_images.append(image)
9753         else:
9754           disk_images.append(False)
9755
9756       self.src_images = disk_images
9757
9758       old_name = export_info.get(constants.INISECT_INS, "name")
9759       if self.op.instance_name == old_name:
9760         for idx, nic in enumerate(self.nics):
9761           if nic.mac == constants.VALUE_AUTO:
9762             nic_mac_ini = "nic%d_mac" % idx
9763             nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
9764
9765     # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
9766
9767     # ip ping checks (we use the same ip that was resolved in ExpandNames)
9768     if self.op.ip_check:
9769       if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
9770         raise errors.OpPrereqError("IP %s of instance %s already in use" %
9771                                    (self.check_ip, self.op.instance_name),
9772                                    errors.ECODE_NOTUNIQUE)
9773
9774     #### mac address generation
9775     # By generating here the mac address both the allocator and the hooks get
9776     # the real final mac address rather than the 'auto' or 'generate' value.
9777     # There is a race condition between the generation and the instance object
9778     # creation, which means that we know the mac is valid now, but we're not
9779     # sure it will be when we actually add the instance. If things go bad
9780     # adding the instance will abort because of a duplicate mac, and the
9781     # creation job will fail.
9782     for nic in self.nics:
9783       if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9784         nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
9785
9786     #### allocator run
9787
9788     if self.op.iallocator is not None:
9789       self._RunAllocator()
9790
9791     # Release all unneeded node locks
9792     _ReleaseLocks(self, locking.LEVEL_NODE,
9793                   keep=filter(None, [self.op.pnode, self.op.snode,
9794                                      self.op.src_node]))
9795     _ReleaseLocks(self, locking.LEVEL_NODE_RES,
9796                   keep=filter(None, [self.op.pnode, self.op.snode,
9797                                      self.op.src_node]))
9798
9799     #### node related checks
9800
9801     # check primary node
9802     self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
9803     assert self.pnode is not None, \
9804       "Cannot retrieve locked node %s" % self.op.pnode
9805     if pnode.offline:
9806       raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
9807                                  pnode.name, errors.ECODE_STATE)
9808     if pnode.drained:
9809       raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
9810                                  pnode.name, errors.ECODE_STATE)
9811     if not pnode.vm_capable:
9812       raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
9813                                  " '%s'" % pnode.name, errors.ECODE_STATE)
9814
9815     self.secondaries = []
9816
9817     # mirror node verification
9818     if self.op.disk_template in constants.DTS_INT_MIRROR:
9819       if self.op.snode == pnode.name:
9820         raise errors.OpPrereqError("The secondary node cannot be the"
9821                                    " primary node", errors.ECODE_INVAL)
9822       _CheckNodeOnline(self, self.op.snode)
9823       _CheckNodeNotDrained(self, self.op.snode)
9824       _CheckNodeVmCapable(self, self.op.snode)
9825       self.secondaries.append(self.op.snode)
9826
9827       snode = self.cfg.GetNodeInfo(self.op.snode)
9828       if pnode.group != snode.group:
9829         self.LogWarning("The primary and secondary nodes are in two"
9830                         " different node groups; the disk parameters"
9831                         " from the first disk's node group will be"
9832                         " used")
9833
9834     nodenames = [pnode.name] + self.secondaries
9835
9836     # Verify instance specs
9837     ispec = {
9838       constants.ISPEC_MEM_SIZE: self.be_full.get(constants.BE_MAXMEM, None),
9839       constants.ISPEC_CPU_COUNT: self.be_full.get(constants.BE_VCPUS, None),
9840       constants.ISPEC_DISK_COUNT: len(self.disks),
9841       constants.ISPEC_DISK_SIZE: [disk["size"] for disk in self.disks],
9842       constants.ISPEC_NIC_COUNT: len(self.nics),
9843       }
9844
9845     group_info = self.cfg.GetNodeGroup(pnode.group)
9846     ipolicy = _CalculateGroupIPolicy(cluster, group_info)
9847     res = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec)
9848     if not self.op.ignore_ipolicy and res:
9849       raise errors.OpPrereqError(("Instance allocation to group %s violates"
9850                                   " policy: %s") % (pnode.group,
9851                                                     utils.CommaJoin(res)),
9852                                   errors.ECODE_INVAL)
9853
9854     # disk parameters (not customizable at instance or node level)
9855     # just use the primary node parameters, ignoring the secondary.
9856     self.diskparams = group_info.diskparams
9857
9858     if not self.adopt_disks:
9859       if self.op.disk_template == constants.DT_RBD:
9860         # _CheckRADOSFreeSpace() is just a placeholder.
9861         # Any function that checks prerequisites can be placed here.
9862         # Check if there is enough space on the RADOS cluster.
9863         _CheckRADOSFreeSpace()
9864       else:
9865         # Check lv size requirements, if not adopting
9866         req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
9867         _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
9868
9869     elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
9870       all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
9871                                 disk[constants.IDISK_ADOPT])
9872                      for disk in self.disks])
9873       if len(all_lvs) != len(self.disks):
9874         raise errors.OpPrereqError("Duplicate volume names given for adoption",
9875                                    errors.ECODE_INVAL)
9876       for lv_name in all_lvs:
9877         try:
9878           # FIXME: lv_name here is "vg/lv" need to ensure that other calls
9879           # to ReserveLV uses the same syntax
9880           self.cfg.ReserveLV(lv_name, self.proc.GetECId())
9881         except errors.ReservationError:
9882           raise errors.OpPrereqError("LV named %s used by another instance" %
9883                                      lv_name, errors.ECODE_NOTUNIQUE)
9884
9885       vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
9886       vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
9887
9888       node_lvs = self.rpc.call_lv_list([pnode.name],
9889                                        vg_names.payload.keys())[pnode.name]
9890       node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
9891       node_lvs = node_lvs.payload
9892
9893       delta = all_lvs.difference(node_lvs.keys())
9894       if delta:
9895         raise errors.OpPrereqError("Missing logical volume(s): %s" %
9896                                    utils.CommaJoin(delta),
9897                                    errors.ECODE_INVAL)
9898       online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
9899       if online_lvs:
9900         raise errors.OpPrereqError("Online logical volumes found, cannot"
9901                                    " adopt: %s" % utils.CommaJoin(online_lvs),
9902                                    errors.ECODE_STATE)
9903       # update the size of disk based on what is found
9904       for dsk in self.disks:
9905         dsk[constants.IDISK_SIZE] = \
9906           int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
9907                                         dsk[constants.IDISK_ADOPT])][0]))
9908
9909     elif self.op.disk_template == constants.DT_BLOCK:
9910       # Normalize and de-duplicate device paths
9911       all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
9912                        for disk in self.disks])
9913       if len(all_disks) != len(self.disks):
9914         raise errors.OpPrereqError("Duplicate disk names given for adoption",
9915                                    errors.ECODE_INVAL)
9916       baddisks = [d for d in all_disks
9917                   if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
9918       if baddisks:
9919         raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
9920                                    " cannot be adopted" %
9921                                    (", ".join(baddisks),
9922                                     constants.ADOPTABLE_BLOCKDEV_ROOT),
9923                                    errors.ECODE_INVAL)
9924
9925       node_disks = self.rpc.call_bdev_sizes([pnode.name],
9926                                             list(all_disks))[pnode.name]
9927       node_disks.Raise("Cannot get block device information from node %s" %
9928                        pnode.name)
9929       node_disks = node_disks.payload
9930       delta = all_disks.difference(node_disks.keys())
9931       if delta:
9932         raise errors.OpPrereqError("Missing block device(s): %s" %
9933                                    utils.CommaJoin(delta),
9934                                    errors.ECODE_INVAL)
9935       for dsk in self.disks:
9936         dsk[constants.IDISK_SIZE] = \
9937           int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
9938
9939     _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
9940
9941     _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
9942     # check OS parameters (remotely)
9943     _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
9944
9945     _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
9946
9947     # memory check on primary node
9948     #TODO(dynmem): use MINMEM for checking
9949     if self.op.start:
9950       _CheckNodeFreeMemory(self, self.pnode.name,
9951                            "creating instance %s" % self.op.instance_name,
9952                            self.be_full[constants.BE_MAXMEM],
9953                            self.op.hypervisor)
9954
9955     self.dry_run_result = list(nodenames)
9956
9957   def Exec(self, feedback_fn):
9958     """Create and add the instance to the cluster.
9959
9960     """
9961     instance = self.op.instance_name
9962     pnode_name = self.pnode.name
9963
9964     assert not (self.owned_locks(locking.LEVEL_NODE_RES) -
9965                 self.owned_locks(locking.LEVEL_NODE)), \
9966       "Node locks differ from node resource locks"
9967
9968     ht_kind = self.op.hypervisor
9969     if ht_kind in constants.HTS_REQ_PORT:
9970       network_port = self.cfg.AllocatePort()
9971     else:
9972       network_port = None
9973
9974     disks = _GenerateDiskTemplate(self,
9975                                   self.op.disk_template,
9976                                   instance, pnode_name,
9977                                   self.secondaries,
9978                                   self.disks,
9979                                   self.instance_file_storage_dir,
9980                                   self.op.file_driver,
9981                                   0,
9982                                   feedback_fn,
9983                                   self.diskparams)
9984
9985     iobj = objects.Instance(name=instance, os=self.op.os_type,
9986                             primary_node=pnode_name,
9987                             nics=self.nics, disks=disks,
9988                             disk_template=self.op.disk_template,
9989                             admin_state=constants.ADMINST_DOWN,
9990                             network_port=network_port,
9991                             beparams=self.op.beparams,
9992                             hvparams=self.op.hvparams,
9993                             hypervisor=self.op.hypervisor,
9994                             osparams=self.op.osparams,
9995                             )
9996
9997     if self.op.tags:
9998       for tag in self.op.tags:
9999         iobj.AddTag(tag)
10000
10001     if self.adopt_disks:
10002       if self.op.disk_template == constants.DT_PLAIN:
10003         # rename LVs to the newly-generated names; we need to construct
10004         # 'fake' LV disks with the old data, plus the new unique_id
10005         tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
10006         rename_to = []
10007         for t_dsk, a_dsk in zip(tmp_disks, self.disks):
10008           rename_to.append(t_dsk.logical_id)
10009           t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
10010           self.cfg.SetDiskID(t_dsk, pnode_name)
10011         result = self.rpc.call_blockdev_rename(pnode_name,
10012                                                zip(tmp_disks, rename_to))
10013         result.Raise("Failed to rename adoped LVs")
10014     else:
10015       feedback_fn("* creating instance disks...")
10016       try:
10017         _CreateDisks(self, iobj)
10018       except errors.OpExecError:
10019         self.LogWarning("Device creation failed, reverting...")
10020         try:
10021           _RemoveDisks(self, iobj)
10022         finally:
10023           self.cfg.ReleaseDRBDMinors(instance)
10024           raise
10025
10026     feedback_fn("adding instance %s to cluster config" % instance)
10027
10028     self.cfg.AddInstance(iobj, self.proc.GetECId())
10029
10030     # Declare that we don't want to remove the instance lock anymore, as we've
10031     # added the instance to the config
10032     del self.remove_locks[locking.LEVEL_INSTANCE]
10033
10034     if self.op.mode == constants.INSTANCE_IMPORT:
10035       # Release unused nodes
10036       _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
10037     else:
10038       # Release all nodes
10039       _ReleaseLocks(self, locking.LEVEL_NODE)
10040
10041     disk_abort = False
10042     if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
10043       feedback_fn("* wiping instance disks...")
10044       try:
10045         _WipeDisks(self, iobj)
10046       except errors.OpExecError, err:
10047         logging.exception("Wiping disks failed")
10048         self.LogWarning("Wiping instance disks failed (%s)", err)
10049         disk_abort = True
10050
10051     if disk_abort:
10052       # Something is already wrong with the disks, don't do anything else
10053       pass
10054     elif self.op.wait_for_sync:
10055       disk_abort = not _WaitForSync(self, iobj)
10056     elif iobj.disk_template in constants.DTS_INT_MIRROR:
10057       # make sure the disks are not degraded (still sync-ing is ok)
10058       feedback_fn("* checking mirrors status")
10059       disk_abort = not _WaitForSync(self, iobj, oneshot=True)
10060     else:
10061       disk_abort = False
10062
10063     if disk_abort:
10064       _RemoveDisks(self, iobj)
10065       self.cfg.RemoveInstance(iobj.name)
10066       # Make sure the instance lock gets removed
10067       self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
10068       raise errors.OpExecError("There are some degraded disks for"
10069                                " this instance")
10070
10071     # Release all node resource locks
10072     _ReleaseLocks(self, locking.LEVEL_NODE_RES)
10073
10074     if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
10075       if self.op.mode == constants.INSTANCE_CREATE:
10076         if not self.op.no_install:
10077           pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
10078                         not self.op.wait_for_sync)
10079           if pause_sync:
10080             feedback_fn("* pausing disk sync to install instance OS")
10081             result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10082                                                               iobj.disks, True)
10083             for idx, success in enumerate(result.payload):
10084               if not success:
10085                 logging.warn("pause-sync of instance %s for disk %d failed",
10086                              instance, idx)
10087
10088           feedback_fn("* running the instance OS create scripts...")
10089           # FIXME: pass debug option from opcode to backend
10090           os_add_result = \
10091             self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
10092                                           self.op.debug_level)
10093           if pause_sync:
10094             feedback_fn("* resuming disk sync")
10095             result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10096                                                               iobj.disks, False)
10097             for idx, success in enumerate(result.payload):
10098               if not success:
10099                 logging.warn("resume-sync of instance %s for disk %d failed",
10100                              instance, idx)
10101
10102           os_add_result.Raise("Could not add os for instance %s"
10103                               " on node %s" % (instance, pnode_name))
10104
10105       elif self.op.mode == constants.INSTANCE_IMPORT:
10106         feedback_fn("* running the instance OS import scripts...")
10107
10108         transfers = []
10109
10110         for idx, image in enumerate(self.src_images):
10111           if not image:
10112             continue
10113
10114           # FIXME: pass debug option from opcode to backend
10115           dt = masterd.instance.DiskTransfer("disk/%s" % idx,
10116                                              constants.IEIO_FILE, (image, ),
10117                                              constants.IEIO_SCRIPT,
10118                                              (iobj.disks[idx], idx),
10119                                              None)
10120           transfers.append(dt)
10121
10122         import_result = \
10123           masterd.instance.TransferInstanceData(self, feedback_fn,
10124                                                 self.op.src_node, pnode_name,
10125                                                 self.pnode.secondary_ip,
10126                                                 iobj, transfers)
10127         if not compat.all(import_result):
10128           self.LogWarning("Some disks for instance %s on node %s were not"
10129                           " imported successfully" % (instance, pnode_name))
10130
10131       elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
10132         feedback_fn("* preparing remote import...")
10133         # The source cluster will stop the instance before attempting to make a
10134         # connection. In some cases stopping an instance can take a long time,
10135         # hence the shutdown timeout is added to the connection timeout.
10136         connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
10137                            self.op.source_shutdown_timeout)
10138         timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
10139
10140         assert iobj.primary_node == self.pnode.name
10141         disk_results = \
10142           masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
10143                                         self.source_x509_ca,
10144                                         self._cds, timeouts)
10145         if not compat.all(disk_results):
10146           # TODO: Should the instance still be started, even if some disks
10147           # failed to import (valid for local imports, too)?
10148           self.LogWarning("Some disks for instance %s on node %s were not"
10149                           " imported successfully" % (instance, pnode_name))
10150
10151         # Run rename script on newly imported instance
10152         assert iobj.name == instance
10153         feedback_fn("Running rename script for %s" % instance)
10154         result = self.rpc.call_instance_run_rename(pnode_name, iobj,
10155                                                    self.source_instance_name,
10156                                                    self.op.debug_level)
10157         if result.fail_msg:
10158           self.LogWarning("Failed to run rename script for %s on node"
10159                           " %s: %s" % (instance, pnode_name, result.fail_msg))
10160
10161       else:
10162         # also checked in the prereq part
10163         raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
10164                                      % self.op.mode)
10165
10166     assert not self.owned_locks(locking.LEVEL_NODE_RES)
10167
10168     if self.op.start:
10169       iobj.admin_state = constants.ADMINST_UP
10170       self.cfg.Update(iobj, feedback_fn)
10171       logging.info("Starting instance %s on node %s", instance, pnode_name)
10172       feedback_fn("* starting instance...")
10173       result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
10174                                             False)
10175       result.Raise("Could not start instance")
10176
10177     return list(iobj.all_nodes)
10178
10179
10180 def _CheckRADOSFreeSpace():
10181   """Compute disk size requirements inside the RADOS cluster.
10182
10183   """
10184   # For the RADOS cluster we assume there is always enough space.
10185   pass
10186
10187
10188 class LUInstanceConsole(NoHooksLU):
10189   """Connect to an instance's console.
10190
10191   This is somewhat special in that it returns the command line that
10192   you need to run on the master node in order to connect to the
10193   console.
10194
10195   """
10196   REQ_BGL = False
10197
10198   def ExpandNames(self):
10199     self.share_locks = _ShareAll()
10200     self._ExpandAndLockInstance()
10201
10202   def CheckPrereq(self):
10203     """Check prerequisites.
10204
10205     This checks that the instance is in the cluster.
10206
10207     """
10208     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10209     assert self.instance is not None, \
10210       "Cannot retrieve locked instance %s" % self.op.instance_name
10211     _CheckNodeOnline(self, self.instance.primary_node)
10212
10213   def Exec(self, feedback_fn):
10214     """Connect to the console of an instance
10215
10216     """
10217     instance = self.instance
10218     node = instance.primary_node
10219
10220     node_insts = self.rpc.call_instance_list([node],
10221                                              [instance.hypervisor])[node]
10222     node_insts.Raise("Can't get node information from %s" % node)
10223
10224     if instance.name not in node_insts.payload:
10225       if instance.admin_state == constants.ADMINST_UP:
10226         state = constants.INSTST_ERRORDOWN
10227       elif instance.admin_state == constants.ADMINST_DOWN:
10228         state = constants.INSTST_ADMINDOWN
10229       else:
10230         state = constants.INSTST_ADMINOFFLINE
10231       raise errors.OpExecError("Instance %s is not running (state %s)" %
10232                                (instance.name, state))
10233
10234     logging.debug("Connecting to console of %s on %s", instance.name, node)
10235
10236     return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
10237
10238
10239 def _GetInstanceConsole(cluster, instance):
10240   """Returns console information for an instance.
10241
10242   @type cluster: L{objects.Cluster}
10243   @type instance: L{objects.Instance}
10244   @rtype: dict
10245
10246   """
10247   hyper = hypervisor.GetHypervisor(instance.hypervisor)
10248   # beparams and hvparams are passed separately, to avoid editing the
10249   # instance and then saving the defaults in the instance itself.
10250   hvparams = cluster.FillHV(instance)
10251   beparams = cluster.FillBE(instance)
10252   console = hyper.GetInstanceConsole(instance, hvparams, beparams)
10253
10254   assert console.instance == instance.name
10255   assert console.Validate()
10256
10257   return console.ToDict()
10258
10259
10260 class LUInstanceReplaceDisks(LogicalUnit):
10261   """Replace the disks of an instance.
10262
10263   """
10264   HPATH = "mirrors-replace"
10265   HTYPE = constants.HTYPE_INSTANCE
10266   REQ_BGL = False
10267
10268   def CheckArguments(self):
10269     TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
10270                                   self.op.iallocator)
10271
10272   def ExpandNames(self):
10273     self._ExpandAndLockInstance()
10274
10275     assert locking.LEVEL_NODE not in self.needed_locks
10276     assert locking.LEVEL_NODE_RES not in self.needed_locks
10277     assert locking.LEVEL_NODEGROUP not in self.needed_locks
10278
10279     assert self.op.iallocator is None or self.op.remote_node is None, \
10280       "Conflicting options"
10281
10282     if self.op.remote_node is not None:
10283       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10284
10285       # Warning: do not remove the locking of the new secondary here
10286       # unless DRBD8.AddChildren is changed to work in parallel;
10287       # currently it doesn't since parallel invocations of
10288       # FindUnusedMinor will conflict
10289       self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
10290       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
10291     else:
10292       self.needed_locks[locking.LEVEL_NODE] = []
10293       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10294
10295       if self.op.iallocator is not None:
10296         # iallocator will select a new node in the same group
10297         self.needed_locks[locking.LEVEL_NODEGROUP] = []
10298
10299     self.needed_locks[locking.LEVEL_NODE_RES] = []
10300
10301     self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
10302                                    self.op.iallocator, self.op.remote_node,
10303                                    self.op.disks, False, self.op.early_release,
10304                                    self.op.ignore_ipolicy)
10305
10306     self.tasklets = [self.replacer]
10307
10308   def DeclareLocks(self, level):
10309     if level == locking.LEVEL_NODEGROUP:
10310       assert self.op.remote_node is None
10311       assert self.op.iallocator is not None
10312       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
10313
10314       self.share_locks[locking.LEVEL_NODEGROUP] = 1
10315       # Lock all groups used by instance optimistically; this requires going
10316       # via the node before it's locked, requiring verification later on
10317       self.needed_locks[locking.LEVEL_NODEGROUP] = \
10318         self.cfg.GetInstanceNodeGroups(self.op.instance_name)
10319
10320     elif level == locking.LEVEL_NODE:
10321       if self.op.iallocator is not None:
10322         assert self.op.remote_node is None
10323         assert not self.needed_locks[locking.LEVEL_NODE]
10324
10325         # Lock member nodes of all locked groups
10326         self.needed_locks[locking.LEVEL_NODE] = [node_name
10327           for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
10328           for node_name in self.cfg.GetNodeGroup(group_uuid).members]
10329       else:
10330         self._LockInstancesNodes()
10331     elif level == locking.LEVEL_NODE_RES:
10332       # Reuse node locks
10333       self.needed_locks[locking.LEVEL_NODE_RES] = \
10334         self.needed_locks[locking.LEVEL_NODE]
10335
10336   def BuildHooksEnv(self):
10337     """Build hooks env.
10338
10339     This runs on the master, the primary and all the secondaries.
10340
10341     """
10342     instance = self.replacer.instance
10343     env = {
10344       "MODE": self.op.mode,
10345       "NEW_SECONDARY": self.op.remote_node,
10346       "OLD_SECONDARY": instance.secondary_nodes[0],
10347       }
10348     env.update(_BuildInstanceHookEnvByObject(self, instance))
10349     return env
10350
10351   def BuildHooksNodes(self):
10352     """Build hooks nodes.
10353
10354     """
10355     instance = self.replacer.instance
10356     nl = [
10357       self.cfg.GetMasterNode(),
10358       instance.primary_node,
10359       ]
10360     if self.op.remote_node is not None:
10361       nl.append(self.op.remote_node)
10362     return nl, nl
10363
10364   def CheckPrereq(self):
10365     """Check prerequisites.
10366
10367     """
10368     assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
10369             self.op.iallocator is None)
10370
10371     # Verify if node group locks are still correct
10372     owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
10373     if owned_groups:
10374       _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
10375
10376     return LogicalUnit.CheckPrereq(self)
10377
10378
10379 class TLReplaceDisks(Tasklet):
10380   """Replaces disks for an instance.
10381
10382   Note: Locking is not within the scope of this class.
10383
10384   """
10385   def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
10386                disks, delay_iallocator, early_release, ignore_ipolicy):
10387     """Initializes this class.
10388
10389     """
10390     Tasklet.__init__(self, lu)
10391
10392     # Parameters
10393     self.instance_name = instance_name
10394     self.mode = mode
10395     self.iallocator_name = iallocator_name
10396     self.remote_node = remote_node
10397     self.disks = disks
10398     self.delay_iallocator = delay_iallocator
10399     self.early_release = early_release
10400     self.ignore_ipolicy = ignore_ipolicy
10401
10402     # Runtime data
10403     self.instance = None
10404     self.new_node = None
10405     self.target_node = None
10406     self.other_node = None
10407     self.remote_node_info = None
10408     self.node_secondary_ip = None
10409
10410   @staticmethod
10411   def CheckArguments(mode, remote_node, iallocator):
10412     """Helper function for users of this class.
10413
10414     """
10415     # check for valid parameter combination
10416     if mode == constants.REPLACE_DISK_CHG:
10417       if remote_node is None and iallocator is None:
10418         raise errors.OpPrereqError("When changing the secondary either an"
10419                                    " iallocator script must be used or the"
10420                                    " new node given", errors.ECODE_INVAL)
10421
10422       if remote_node is not None and iallocator is not None:
10423         raise errors.OpPrereqError("Give either the iallocator or the new"
10424                                    " secondary, not both", errors.ECODE_INVAL)
10425
10426     elif remote_node is not None or iallocator is not None:
10427       # Not replacing the secondary
10428       raise errors.OpPrereqError("The iallocator and new node options can"
10429                                  " only be used when changing the"
10430                                  " secondary node", errors.ECODE_INVAL)
10431
10432   @staticmethod
10433   def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
10434     """Compute a new secondary node using an IAllocator.
10435
10436     """
10437     ial = IAllocator(lu.cfg, lu.rpc,
10438                      mode=constants.IALLOCATOR_MODE_RELOC,
10439                      name=instance_name,
10440                      relocate_from=list(relocate_from))
10441
10442     ial.Run(iallocator_name)
10443
10444     if not ial.success:
10445       raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
10446                                  " %s" % (iallocator_name, ial.info),
10447                                  errors.ECODE_NORES)
10448
10449     if len(ial.result) != ial.required_nodes:
10450       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
10451                                  " of nodes (%s), required %s" %
10452                                  (iallocator_name,
10453                                   len(ial.result), ial.required_nodes),
10454                                  errors.ECODE_FAULT)
10455
10456     remote_node_name = ial.result[0]
10457
10458     lu.LogInfo("Selected new secondary for instance '%s': %s",
10459                instance_name, remote_node_name)
10460
10461     return remote_node_name
10462
10463   def _FindFaultyDisks(self, node_name):
10464     """Wrapper for L{_FindFaultyInstanceDisks}.
10465
10466     """
10467     return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
10468                                     node_name, True)
10469
10470   def _CheckDisksActivated(self, instance):
10471     """Checks if the instance disks are activated.
10472
10473     @param instance: The instance to check disks
10474     @return: True if they are activated, False otherwise
10475
10476     """
10477     nodes = instance.all_nodes
10478
10479     for idx, dev in enumerate(instance.disks):
10480       for node in nodes:
10481         self.lu.LogInfo("Checking disk/%d on %s", idx, node)
10482         self.cfg.SetDiskID(dev, node)
10483
10484         result = self.rpc.call_blockdev_find(node, dev)
10485
10486         if result.offline:
10487           continue
10488         elif result.fail_msg or not result.payload:
10489           return False
10490
10491     return True
10492
10493   def CheckPrereq(self):
10494     """Check prerequisites.
10495
10496     This checks that the instance is in the cluster.
10497
10498     """
10499     self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
10500     assert instance is not None, \
10501       "Cannot retrieve locked instance %s" % self.instance_name
10502
10503     if instance.disk_template != constants.DT_DRBD8:
10504       raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
10505                                  " instances", errors.ECODE_INVAL)
10506
10507     if len(instance.secondary_nodes) != 1:
10508       raise errors.OpPrereqError("The instance has a strange layout,"
10509                                  " expected one secondary but found %d" %
10510                                  len(instance.secondary_nodes),
10511                                  errors.ECODE_FAULT)
10512
10513     if not self.delay_iallocator:
10514       self._CheckPrereq2()
10515
10516   def _CheckPrereq2(self):
10517     """Check prerequisites, second part.
10518
10519     This function should always be part of CheckPrereq. It was separated and is
10520     now called from Exec because during node evacuation iallocator was only
10521     called with an unmodified cluster model, not taking planned changes into
10522     account.
10523
10524     """
10525     instance = self.instance
10526     secondary_node = instance.secondary_nodes[0]
10527
10528     if self.iallocator_name is None:
10529       remote_node = self.remote_node
10530     else:
10531       remote_node = self._RunAllocator(self.lu, self.iallocator_name,
10532                                        instance.name, instance.secondary_nodes)
10533
10534     if remote_node is None:
10535       self.remote_node_info = None
10536     else:
10537       assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
10538              "Remote node '%s' is not locked" % remote_node
10539
10540       self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
10541       assert self.remote_node_info is not None, \
10542         "Cannot retrieve locked node %s" % remote_node
10543
10544     if remote_node == self.instance.primary_node:
10545       raise errors.OpPrereqError("The specified node is the primary node of"
10546                                  " the instance", errors.ECODE_INVAL)
10547
10548     if remote_node == secondary_node:
10549       raise errors.OpPrereqError("The specified node is already the"
10550                                  " secondary node of the instance",
10551                                  errors.ECODE_INVAL)
10552
10553     if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
10554                                     constants.REPLACE_DISK_CHG):
10555       raise errors.OpPrereqError("Cannot specify disks to be replaced",
10556                                  errors.ECODE_INVAL)
10557
10558     if self.mode == constants.REPLACE_DISK_AUTO:
10559       if not self._CheckDisksActivated(instance):
10560         raise errors.OpPrereqError("Please run activate-disks on instance %s"
10561                                    " first" % self.instance_name,
10562                                    errors.ECODE_STATE)
10563       faulty_primary = self._FindFaultyDisks(instance.primary_node)
10564       faulty_secondary = self._FindFaultyDisks(secondary_node)
10565
10566       if faulty_primary and faulty_secondary:
10567         raise errors.OpPrereqError("Instance %s has faulty disks on more than"
10568                                    " one node and can not be repaired"
10569                                    " automatically" % self.instance_name,
10570                                    errors.ECODE_STATE)
10571
10572       if faulty_primary:
10573         self.disks = faulty_primary
10574         self.target_node = instance.primary_node
10575         self.other_node = secondary_node
10576         check_nodes = [self.target_node, self.other_node]
10577       elif faulty_secondary:
10578         self.disks = faulty_secondary
10579         self.target_node = secondary_node
10580         self.other_node = instance.primary_node
10581         check_nodes = [self.target_node, self.other_node]
10582       else:
10583         self.disks = []
10584         check_nodes = []
10585
10586     else:
10587       # Non-automatic modes
10588       if self.mode == constants.REPLACE_DISK_PRI:
10589         self.target_node = instance.primary_node
10590         self.other_node = secondary_node
10591         check_nodes = [self.target_node, self.other_node]
10592
10593       elif self.mode == constants.REPLACE_DISK_SEC:
10594         self.target_node = secondary_node
10595         self.other_node = instance.primary_node
10596         check_nodes = [self.target_node, self.other_node]
10597
10598       elif self.mode == constants.REPLACE_DISK_CHG:
10599         self.new_node = remote_node
10600         self.other_node = instance.primary_node
10601         self.target_node = secondary_node
10602         check_nodes = [self.new_node, self.other_node]
10603
10604         _CheckNodeNotDrained(self.lu, remote_node)
10605         _CheckNodeVmCapable(self.lu, remote_node)
10606
10607         old_node_info = self.cfg.GetNodeInfo(secondary_node)
10608         assert old_node_info is not None
10609         if old_node_info.offline and not self.early_release:
10610           # doesn't make sense to delay the release
10611           self.early_release = True
10612           self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
10613                           " early-release mode", secondary_node)
10614
10615       else:
10616         raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
10617                                      self.mode)
10618
10619       # If not specified all disks should be replaced
10620       if not self.disks:
10621         self.disks = range(len(self.instance.disks))
10622
10623     # TODO: This is ugly, but right now we can't distinguish between internal
10624     # submitted opcode and external one. We should fix that.
10625     if self.remote_node_info:
10626       # We change the node, lets verify it still meets instance policy
10627       new_group_info = self.cfg.GetNodeGroup(self.remote_node_info.group)
10628       ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(),
10629                                        new_group_info)
10630       _CheckTargetNodeIPolicy(self, ipolicy, instance, self.remote_node_info,
10631                               ignore=self.ignore_ipolicy)
10632
10633     # TODO: compute disk parameters
10634     primary_node_info = self.cfg.GetNodeInfo(instance.primary_node)
10635     secondary_node_info = self.cfg.GetNodeInfo(secondary_node)
10636     if primary_node_info.group != secondary_node_info.group:
10637       self.lu.LogInfo("The instance primary and secondary nodes are in two"
10638                       " different node groups; the disk parameters of the"
10639                       " primary node's group will be applied.")
10640
10641     self.diskparams = self.cfg.GetNodeGroup(primary_node_info.group).diskparams
10642
10643     for node in check_nodes:
10644       _CheckNodeOnline(self.lu, node)
10645
10646     touched_nodes = frozenset(node_name for node_name in [self.new_node,
10647                                                           self.other_node,
10648                                                           self.target_node]
10649                               if node_name is not None)
10650
10651     # Release unneeded node and node resource locks
10652     _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
10653     _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, keep=touched_nodes)
10654
10655     # Release any owned node group
10656     if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
10657       _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
10658
10659     # Check whether disks are valid
10660     for disk_idx in self.disks:
10661       instance.FindDisk(disk_idx)
10662
10663     # Get secondary node IP addresses
10664     self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
10665                                   in self.cfg.GetMultiNodeInfo(touched_nodes))
10666
10667   def Exec(self, feedback_fn):
10668     """Execute disk replacement.
10669
10670     This dispatches the disk replacement to the appropriate handler.
10671
10672     """
10673     if self.delay_iallocator:
10674       self._CheckPrereq2()
10675
10676     if __debug__:
10677       # Verify owned locks before starting operation
10678       owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
10679       assert set(owned_nodes) == set(self.node_secondary_ip), \
10680           ("Incorrect node locks, owning %s, expected %s" %
10681            (owned_nodes, self.node_secondary_ip.keys()))
10682       assert (self.lu.owned_locks(locking.LEVEL_NODE) ==
10683               self.lu.owned_locks(locking.LEVEL_NODE_RES))
10684
10685       owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
10686       assert list(owned_instances) == [self.instance_name], \
10687           "Instance '%s' not locked" % self.instance_name
10688
10689       assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
10690           "Should not own any node group lock at this point"
10691
10692     if not self.disks:
10693       feedback_fn("No disks need replacement")
10694       return
10695
10696     feedback_fn("Replacing disk(s) %s for %s" %
10697                 (utils.CommaJoin(self.disks), self.instance.name))
10698
10699     activate_disks = (self.instance.admin_state != constants.ADMINST_UP)
10700
10701     # Activate the instance disks if we're replacing them on a down instance
10702     if activate_disks:
10703       _StartInstanceDisks(self.lu, self.instance, True)
10704
10705     try:
10706       # Should we replace the secondary node?
10707       if self.new_node is not None:
10708         fn = self._ExecDrbd8Secondary
10709       else:
10710         fn = self._ExecDrbd8DiskOnly
10711
10712       result = fn(feedback_fn)
10713     finally:
10714       # Deactivate the instance disks if we're replacing them on a
10715       # down instance
10716       if activate_disks:
10717         _SafeShutdownInstanceDisks(self.lu, self.instance)
10718
10719     assert not self.lu.owned_locks(locking.LEVEL_NODE)
10720
10721     if __debug__:
10722       # Verify owned locks
10723       owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE_RES)
10724       nodes = frozenset(self.node_secondary_ip)
10725       assert ((self.early_release and not owned_nodes) or
10726               (not self.early_release and not (set(owned_nodes) - nodes))), \
10727         ("Not owning the correct locks, early_release=%s, owned=%r,"
10728          " nodes=%r" % (self.early_release, owned_nodes, nodes))
10729
10730     return result
10731
10732   def _CheckVolumeGroup(self, nodes):
10733     self.lu.LogInfo("Checking volume groups")
10734
10735     vgname = self.cfg.GetVGName()
10736
10737     # Make sure volume group exists on all involved nodes
10738     results = self.rpc.call_vg_list(nodes)
10739     if not results:
10740       raise errors.OpExecError("Can't list volume groups on the nodes")
10741
10742     for node in nodes:
10743       res = results[node]
10744       res.Raise("Error checking node %s" % node)
10745       if vgname not in res.payload:
10746         raise errors.OpExecError("Volume group '%s' not found on node %s" %
10747                                  (vgname, node))
10748
10749   def _CheckDisksExistence(self, nodes):
10750     # Check disk existence
10751     for idx, dev in enumerate(self.instance.disks):
10752       if idx not in self.disks:
10753         continue
10754
10755       for node in nodes:
10756         self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
10757         self.cfg.SetDiskID(dev, node)
10758
10759         result = self.rpc.call_blockdev_find(node, dev)
10760
10761         msg = result.fail_msg
10762         if msg or not result.payload:
10763           if not msg:
10764             msg = "disk not found"
10765           raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
10766                                    (idx, node, msg))
10767
10768   def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
10769     for idx, dev in enumerate(self.instance.disks):
10770       if idx not in self.disks:
10771         continue
10772
10773       self.lu.LogInfo("Checking disk/%d consistency on node %s" %
10774                       (idx, node_name))
10775
10776       if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
10777                                    ldisk=ldisk):
10778         raise errors.OpExecError("Node %s has degraded storage, unsafe to"
10779                                  " replace disks for instance %s" %
10780                                  (node_name, self.instance.name))
10781
10782   def _CreateNewStorage(self, node_name):
10783     """Create new storage on the primary or secondary node.
10784
10785     This is only used for same-node replaces, not for changing the
10786     secondary node, hence we don't want to modify the existing disk.
10787
10788     """
10789     iv_names = {}
10790
10791     for idx, dev in enumerate(self.instance.disks):
10792       if idx not in self.disks:
10793         continue
10794
10795       self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
10796
10797       self.cfg.SetDiskID(dev, node_name)
10798
10799       lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
10800       names = _GenerateUniqueNames(self.lu, lv_names)
10801
10802       _, data_p, meta_p = _ComputeLDParams(constants.DT_DRBD8, self.diskparams)
10803
10804       vg_data = dev.children[0].logical_id[0]
10805       lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
10806                              logical_id=(vg_data, names[0]), params=data_p)
10807       vg_meta = dev.children[1].logical_id[0]
10808       lv_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
10809                              logical_id=(vg_meta, names[1]), params=meta_p)
10810
10811       new_lvs = [lv_data, lv_meta]
10812       old_lvs = [child.Copy() for child in dev.children]
10813       iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
10814
10815       # we pass force_create=True to force the LVM creation
10816       for new_lv in new_lvs:
10817         _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
10818                         _GetInstanceInfoText(self.instance), False)
10819
10820     return iv_names
10821
10822   def _CheckDevices(self, node_name, iv_names):
10823     for name, (dev, _, _) in iv_names.iteritems():
10824       self.cfg.SetDiskID(dev, node_name)
10825
10826       result = self.rpc.call_blockdev_find(node_name, dev)
10827
10828       msg = result.fail_msg
10829       if msg or not result.payload:
10830         if not msg:
10831           msg = "disk not found"
10832         raise errors.OpExecError("Can't find DRBD device %s: %s" %
10833                                  (name, msg))
10834
10835       if result.payload.is_degraded:
10836         raise errors.OpExecError("DRBD device %s is degraded!" % name)
10837
10838   def _RemoveOldStorage(self, node_name, iv_names):
10839     for name, (_, old_lvs, _) in iv_names.iteritems():
10840       self.lu.LogInfo("Remove logical volumes for %s" % name)
10841
10842       for lv in old_lvs:
10843         self.cfg.SetDiskID(lv, node_name)
10844
10845         msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
10846         if msg:
10847           self.lu.LogWarning("Can't remove old LV: %s" % msg,
10848                              hint="remove unused LVs manually")
10849
10850   def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
10851     """Replace a disk on the primary or secondary for DRBD 8.
10852
10853     The algorithm for replace is quite complicated:
10854
10855       1. for each disk to be replaced:
10856
10857         1. create new LVs on the target node with unique names
10858         1. detach old LVs from the drbd device
10859         1. rename old LVs to name_replaced.<time_t>
10860         1. rename new LVs to old LVs
10861         1. attach the new LVs (with the old names now) to the drbd device
10862
10863       1. wait for sync across all devices
10864
10865       1. for each modified disk:
10866
10867         1. remove old LVs (which have the name name_replaces.<time_t>)
10868
10869     Failures are not very well handled.
10870
10871     """
10872     steps_total = 6
10873
10874     # Step: check device activation
10875     self.lu.LogStep(1, steps_total, "Check device existence")
10876     self._CheckDisksExistence([self.other_node, self.target_node])
10877     self._CheckVolumeGroup([self.target_node, self.other_node])
10878
10879     # Step: check other node consistency
10880     self.lu.LogStep(2, steps_total, "Check peer consistency")
10881     self._CheckDisksConsistency(self.other_node,
10882                                 self.other_node == self.instance.primary_node,
10883                                 False)
10884
10885     # Step: create new storage
10886     self.lu.LogStep(3, steps_total, "Allocate new storage")
10887     iv_names = self._CreateNewStorage(self.target_node)
10888
10889     # Step: for each lv, detach+rename*2+attach
10890     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
10891     for dev, old_lvs, new_lvs in iv_names.itervalues():
10892       self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
10893
10894       result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
10895                                                      old_lvs)
10896       result.Raise("Can't detach drbd from local storage on node"
10897                    " %s for device %s" % (self.target_node, dev.iv_name))
10898       #dev.children = []
10899       #cfg.Update(instance)
10900
10901       # ok, we created the new LVs, so now we know we have the needed
10902       # storage; as such, we proceed on the target node to rename
10903       # old_lv to _old, and new_lv to old_lv; note that we rename LVs
10904       # using the assumption that logical_id == physical_id (which in
10905       # turn is the unique_id on that node)
10906
10907       # FIXME(iustin): use a better name for the replaced LVs
10908       temp_suffix = int(time.time())
10909       ren_fn = lambda d, suff: (d.physical_id[0],
10910                                 d.physical_id[1] + "_replaced-%s" % suff)
10911
10912       # Build the rename list based on what LVs exist on the node
10913       rename_old_to_new = []
10914       for to_ren in old_lvs:
10915         result = self.rpc.call_blockdev_find(self.target_node, to_ren)
10916         if not result.fail_msg and result.payload:
10917           # device exists
10918           rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
10919
10920       self.lu.LogInfo("Renaming the old LVs on the target node")
10921       result = self.rpc.call_blockdev_rename(self.target_node,
10922                                              rename_old_to_new)
10923       result.Raise("Can't rename old LVs on node %s" % self.target_node)
10924
10925       # Now we rename the new LVs to the old LVs
10926       self.lu.LogInfo("Renaming the new LVs on the target node")
10927       rename_new_to_old = [(new, old.physical_id)
10928                            for old, new in zip(old_lvs, new_lvs)]
10929       result = self.rpc.call_blockdev_rename(self.target_node,
10930                                              rename_new_to_old)
10931       result.Raise("Can't rename new LVs on node %s" % self.target_node)
10932
10933       # Intermediate steps of in memory modifications
10934       for old, new in zip(old_lvs, new_lvs):
10935         new.logical_id = old.logical_id
10936         self.cfg.SetDiskID(new, self.target_node)
10937
10938       # We need to modify old_lvs so that removal later removes the
10939       # right LVs, not the newly added ones; note that old_lvs is a
10940       # copy here
10941       for disk in old_lvs:
10942         disk.logical_id = ren_fn(disk, temp_suffix)
10943         self.cfg.SetDiskID(disk, self.target_node)
10944
10945       # Now that the new lvs have the old name, we can add them to the device
10946       self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
10947       result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
10948                                                   new_lvs)
10949       msg = result.fail_msg
10950       if msg:
10951         for new_lv in new_lvs:
10952           msg2 = self.rpc.call_blockdev_remove(self.target_node,
10953                                                new_lv).fail_msg
10954           if msg2:
10955             self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
10956                                hint=("cleanup manually the unused logical"
10957                                      "volumes"))
10958         raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
10959
10960     cstep = itertools.count(5)
10961
10962     if self.early_release:
10963       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
10964       self._RemoveOldStorage(self.target_node, iv_names)
10965       # TODO: Check if releasing locks early still makes sense
10966       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
10967     else:
10968       # Release all resource locks except those used by the instance
10969       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
10970                     keep=self.node_secondary_ip.keys())
10971
10972     # Release all node locks while waiting for sync
10973     _ReleaseLocks(self.lu, locking.LEVEL_NODE)
10974
10975     # TODO: Can the instance lock be downgraded here? Take the optional disk
10976     # shutdown in the caller into consideration.
10977
10978     # Wait for sync
10979     # This can fail as the old devices are degraded and _WaitForSync
10980     # does a combined result over all disks, so we don't check its return value
10981     self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
10982     _WaitForSync(self.lu, self.instance)
10983
10984     # Check all devices manually
10985     self._CheckDevices(self.instance.primary_node, iv_names)
10986
10987     # Step: remove old storage
10988     if not self.early_release:
10989       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
10990       self._RemoveOldStorage(self.target_node, iv_names)
10991
10992   def _ExecDrbd8Secondary(self, feedback_fn):
10993     """Replace the secondary node for DRBD 8.
10994
10995     The algorithm for replace is quite complicated:
10996       - for all disks of the instance:
10997         - create new LVs on the new node with same names
10998         - shutdown the drbd device on the old secondary
10999         - disconnect the drbd network on the primary
11000         - create the drbd device on the new secondary
11001         - network attach the drbd on the primary, using an artifice:
11002           the drbd code for Attach() will connect to the network if it
11003           finds a device which is connected to the good local disks but
11004           not network enabled
11005       - wait for sync across all devices
11006       - remove all disks from the old secondary
11007
11008     Failures are not very well handled.
11009
11010     """
11011     steps_total = 6
11012
11013     pnode = self.instance.primary_node
11014
11015     # Step: check device activation
11016     self.lu.LogStep(1, steps_total, "Check device existence")
11017     self._CheckDisksExistence([self.instance.primary_node])
11018     self._CheckVolumeGroup([self.instance.primary_node])
11019
11020     # Step: check other node consistency
11021     self.lu.LogStep(2, steps_total, "Check peer consistency")
11022     self._CheckDisksConsistency(self.instance.primary_node, True, True)
11023
11024     # Step: create new storage
11025     self.lu.LogStep(3, steps_total, "Allocate new storage")
11026     for idx, dev in enumerate(self.instance.disks):
11027       self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
11028                       (self.new_node, idx))
11029       # we pass force_create=True to force LVM creation
11030       for new_lv in dev.children:
11031         _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
11032                         _GetInstanceInfoText(self.instance), False)
11033
11034     # Step 4: dbrd minors and drbd setups changes
11035     # after this, we must manually remove the drbd minors on both the
11036     # error and the success paths
11037     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
11038     minors = self.cfg.AllocateDRBDMinor([self.new_node
11039                                          for dev in self.instance.disks],
11040                                         self.instance.name)
11041     logging.debug("Allocated minors %r", minors)
11042
11043     iv_names = {}
11044     for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
11045       self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
11046                       (self.new_node, idx))
11047       # create new devices on new_node; note that we create two IDs:
11048       # one without port, so the drbd will be activated without
11049       # networking information on the new node at this stage, and one
11050       # with network, for the latter activation in step 4
11051       (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
11052       if self.instance.primary_node == o_node1:
11053         p_minor = o_minor1
11054       else:
11055         assert self.instance.primary_node == o_node2, "Three-node instance?"
11056         p_minor = o_minor2
11057
11058       new_alone_id = (self.instance.primary_node, self.new_node, None,
11059                       p_minor, new_minor, o_secret)
11060       new_net_id = (self.instance.primary_node, self.new_node, o_port,
11061                     p_minor, new_minor, o_secret)
11062
11063       iv_names[idx] = (dev, dev.children, new_net_id)
11064       logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
11065                     new_net_id)
11066       drbd_params, _, _ = _ComputeLDParams(constants.DT_DRBD8, self.diskparams)
11067       new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
11068                               logical_id=new_alone_id,
11069                               children=dev.children,
11070                               size=dev.size,
11071                               params=drbd_params)
11072       try:
11073         _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
11074                               _GetInstanceInfoText(self.instance), False)
11075       except errors.GenericError:
11076         self.cfg.ReleaseDRBDMinors(self.instance.name)
11077         raise
11078
11079     # We have new devices, shutdown the drbd on the old secondary
11080     for idx, dev in enumerate(self.instance.disks):
11081       self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
11082       self.cfg.SetDiskID(dev, self.target_node)
11083       msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
11084       if msg:
11085         self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
11086                            "node: %s" % (idx, msg),
11087                            hint=("Please cleanup this device manually as"
11088                                  " soon as possible"))
11089
11090     self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
11091     result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
11092                                                self.instance.disks)[pnode]
11093
11094     msg = result.fail_msg
11095     if msg:
11096       # detaches didn't succeed (unlikely)
11097       self.cfg.ReleaseDRBDMinors(self.instance.name)
11098       raise errors.OpExecError("Can't detach the disks from the network on"
11099                                " old node: %s" % (msg,))
11100
11101     # if we managed to detach at least one, we update all the disks of
11102     # the instance to point to the new secondary
11103     self.lu.LogInfo("Updating instance configuration")
11104     for dev, _, new_logical_id in iv_names.itervalues():
11105       dev.logical_id = new_logical_id
11106       self.cfg.SetDiskID(dev, self.instance.primary_node)
11107
11108     self.cfg.Update(self.instance, feedback_fn)
11109
11110     # Release all node locks (the configuration has been updated)
11111     _ReleaseLocks(self.lu, locking.LEVEL_NODE)
11112
11113     # and now perform the drbd attach
11114     self.lu.LogInfo("Attaching primary drbds to new secondary"
11115                     " (standalone => connected)")
11116     result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
11117                                             self.new_node],
11118                                            self.node_secondary_ip,
11119                                            self.instance.disks,
11120                                            self.instance.name,
11121                                            False)
11122     for to_node, to_result in result.items():
11123       msg = to_result.fail_msg
11124       if msg:
11125         self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
11126                            to_node, msg,
11127                            hint=("please do a gnt-instance info to see the"
11128                                  " status of disks"))
11129
11130     cstep = itertools.count(5)
11131
11132     if self.early_release:
11133       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11134       self._RemoveOldStorage(self.target_node, iv_names)
11135       # TODO: Check if releasing locks early still makes sense
11136       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
11137     else:
11138       # Release all resource locks except those used by the instance
11139       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
11140                     keep=self.node_secondary_ip.keys())
11141
11142     # TODO: Can the instance lock be downgraded here? Take the optional disk
11143     # shutdown in the caller into consideration.
11144
11145     # Wait for sync
11146     # This can fail as the old devices are degraded and _WaitForSync
11147     # does a combined result over all disks, so we don't check its return value
11148     self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
11149     _WaitForSync(self.lu, self.instance)
11150
11151     # Check all devices manually
11152     self._CheckDevices(self.instance.primary_node, iv_names)
11153
11154     # Step: remove old storage
11155     if not self.early_release:
11156       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11157       self._RemoveOldStorage(self.target_node, iv_names)
11158
11159
11160 class LURepairNodeStorage(NoHooksLU):
11161   """Repairs the volume group on a node.
11162
11163   """
11164   REQ_BGL = False
11165
11166   def CheckArguments(self):
11167     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11168
11169     storage_type = self.op.storage_type
11170
11171     if (constants.SO_FIX_CONSISTENCY not in
11172         constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
11173       raise errors.OpPrereqError("Storage units of type '%s' can not be"
11174                                  " repaired" % storage_type,
11175                                  errors.ECODE_INVAL)
11176
11177   def ExpandNames(self):
11178     self.needed_locks = {
11179       locking.LEVEL_NODE: [self.op.node_name],
11180       }
11181
11182   def _CheckFaultyDisks(self, instance, node_name):
11183     """Ensure faulty disks abort the opcode or at least warn."""
11184     try:
11185       if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
11186                                   node_name, True):
11187         raise errors.OpPrereqError("Instance '%s' has faulty disks on"
11188                                    " node '%s'" % (instance.name, node_name),
11189                                    errors.ECODE_STATE)
11190     except errors.OpPrereqError, err:
11191       if self.op.ignore_consistency:
11192         self.proc.LogWarning(str(err.args[0]))
11193       else:
11194         raise
11195
11196   def CheckPrereq(self):
11197     """Check prerequisites.
11198
11199     """
11200     # Check whether any instance on this node has faulty disks
11201     for inst in _GetNodeInstances(self.cfg, self.op.node_name):
11202       if inst.admin_state != constants.ADMINST_UP:
11203         continue
11204       check_nodes = set(inst.all_nodes)
11205       check_nodes.discard(self.op.node_name)
11206       for inst_node_name in check_nodes:
11207         self._CheckFaultyDisks(inst, inst_node_name)
11208
11209   def Exec(self, feedback_fn):
11210     feedback_fn("Repairing storage unit '%s' on %s ..." %
11211                 (self.op.name, self.op.node_name))
11212
11213     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
11214     result = self.rpc.call_storage_execute(self.op.node_name,
11215                                            self.op.storage_type, st_args,
11216                                            self.op.name,
11217                                            constants.SO_FIX_CONSISTENCY)
11218     result.Raise("Failed to repair storage unit '%s' on %s" %
11219                  (self.op.name, self.op.node_name))
11220
11221
11222 class LUNodeEvacuate(NoHooksLU):
11223   """Evacuates instances off a list of nodes.
11224
11225   """
11226   REQ_BGL = False
11227
11228   _MODE2IALLOCATOR = {
11229     constants.NODE_EVAC_PRI: constants.IALLOCATOR_NEVAC_PRI,
11230     constants.NODE_EVAC_SEC: constants.IALLOCATOR_NEVAC_SEC,
11231     constants.NODE_EVAC_ALL: constants.IALLOCATOR_NEVAC_ALL,
11232     }
11233   assert frozenset(_MODE2IALLOCATOR.keys()) == constants.NODE_EVAC_MODES
11234   assert (frozenset(_MODE2IALLOCATOR.values()) ==
11235           constants.IALLOCATOR_NEVAC_MODES)
11236
11237   def CheckArguments(self):
11238     _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
11239
11240   def ExpandNames(self):
11241     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11242
11243     if self.op.remote_node is not None:
11244       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11245       assert self.op.remote_node
11246
11247       if self.op.remote_node == self.op.node_name:
11248         raise errors.OpPrereqError("Can not use evacuated node as a new"
11249                                    " secondary node", errors.ECODE_INVAL)
11250
11251       if self.op.mode != constants.NODE_EVAC_SEC:
11252         raise errors.OpPrereqError("Without the use of an iallocator only"
11253                                    " secondary instances can be evacuated",
11254                                    errors.ECODE_INVAL)
11255
11256     # Declare locks
11257     self.share_locks = _ShareAll()
11258     self.needed_locks = {
11259       locking.LEVEL_INSTANCE: [],
11260       locking.LEVEL_NODEGROUP: [],
11261       locking.LEVEL_NODE: [],
11262       }
11263
11264     # Determine nodes (via group) optimistically, needs verification once locks
11265     # have been acquired
11266     self.lock_nodes = self._DetermineNodes()
11267
11268   def _DetermineNodes(self):
11269     """Gets the list of nodes to operate on.
11270
11271     """
11272     if self.op.remote_node is None:
11273       # Iallocator will choose any node(s) in the same group
11274       group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
11275     else:
11276       group_nodes = frozenset([self.op.remote_node])
11277
11278     # Determine nodes to be locked
11279     return set([self.op.node_name]) | group_nodes
11280
11281   def _DetermineInstances(self):
11282     """Builds list of instances to operate on.
11283
11284     """
11285     assert self.op.mode in constants.NODE_EVAC_MODES
11286
11287     if self.op.mode == constants.NODE_EVAC_PRI:
11288       # Primary instances only
11289       inst_fn = _GetNodePrimaryInstances
11290       assert self.op.remote_node is None, \
11291         "Evacuating primary instances requires iallocator"
11292     elif self.op.mode == constants.NODE_EVAC_SEC:
11293       # Secondary instances only
11294       inst_fn = _GetNodeSecondaryInstances
11295     else:
11296       # All instances
11297       assert self.op.mode == constants.NODE_EVAC_ALL
11298       inst_fn = _GetNodeInstances
11299       # TODO: In 2.6, change the iallocator interface to take an evacuation mode
11300       # per instance
11301       raise errors.OpPrereqError("Due to an issue with the iallocator"
11302                                  " interface it is not possible to evacuate"
11303                                  " all instances at once; specify explicitly"
11304                                  " whether to evacuate primary or secondary"
11305                                  " instances",
11306                                  errors.ECODE_INVAL)
11307
11308     return inst_fn(self.cfg, self.op.node_name)
11309
11310   def DeclareLocks(self, level):
11311     if level == locking.LEVEL_INSTANCE:
11312       # Lock instances optimistically, needs verification once node and group
11313       # locks have been acquired
11314       self.needed_locks[locking.LEVEL_INSTANCE] = \
11315         set(i.name for i in self._DetermineInstances())
11316
11317     elif level == locking.LEVEL_NODEGROUP:
11318       # Lock node groups for all potential target nodes optimistically, needs
11319       # verification once nodes have been acquired
11320       self.needed_locks[locking.LEVEL_NODEGROUP] = \
11321         self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
11322
11323     elif level == locking.LEVEL_NODE:
11324       self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
11325
11326   def CheckPrereq(self):
11327     # Verify locks
11328     owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
11329     owned_nodes = self.owned_locks(locking.LEVEL_NODE)
11330     owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
11331
11332     need_nodes = self._DetermineNodes()
11333
11334     if not owned_nodes.issuperset(need_nodes):
11335       raise errors.OpPrereqError("Nodes in same group as '%s' changed since"
11336                                  " locks were acquired, current nodes are"
11337                                  " are '%s', used to be '%s'; retry the"
11338                                  " operation" %
11339                                  (self.op.node_name,
11340                                   utils.CommaJoin(need_nodes),
11341                                   utils.CommaJoin(owned_nodes)),
11342                                  errors.ECODE_STATE)
11343
11344     wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
11345     if owned_groups != wanted_groups:
11346       raise errors.OpExecError("Node groups changed since locks were acquired,"
11347                                " current groups are '%s', used to be '%s';"
11348                                " retry the operation" %
11349                                (utils.CommaJoin(wanted_groups),
11350                                 utils.CommaJoin(owned_groups)))
11351
11352     # Determine affected instances
11353     self.instances = self._DetermineInstances()
11354     self.instance_names = [i.name for i in self.instances]
11355
11356     if set(self.instance_names) != owned_instances:
11357       raise errors.OpExecError("Instances on node '%s' changed since locks"
11358                                " were acquired, current instances are '%s',"
11359                                " used to be '%s'; retry the operation" %
11360                                (self.op.node_name,
11361                                 utils.CommaJoin(self.instance_names),
11362                                 utils.CommaJoin(owned_instances)))
11363
11364     if self.instance_names:
11365       self.LogInfo("Evacuating instances from node '%s': %s",
11366                    self.op.node_name,
11367                    utils.CommaJoin(utils.NiceSort(self.instance_names)))
11368     else:
11369       self.LogInfo("No instances to evacuate from node '%s'",
11370                    self.op.node_name)
11371
11372     if self.op.remote_node is not None:
11373       for i in self.instances:
11374         if i.primary_node == self.op.remote_node:
11375           raise errors.OpPrereqError("Node %s is the primary node of"
11376                                      " instance %s, cannot use it as"
11377                                      " secondary" %
11378                                      (self.op.remote_node, i.name),
11379                                      errors.ECODE_INVAL)
11380
11381   def Exec(self, feedback_fn):
11382     assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
11383
11384     if not self.instance_names:
11385       # No instances to evacuate
11386       jobs = []
11387
11388     elif self.op.iallocator is not None:
11389       # TODO: Implement relocation to other group
11390       ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_NODE_EVAC,
11391                        evac_mode=self._MODE2IALLOCATOR[self.op.mode],
11392                        instances=list(self.instance_names))
11393
11394       ial.Run(self.op.iallocator)
11395
11396       if not ial.success:
11397         raise errors.OpPrereqError("Can't compute node evacuation using"
11398                                    " iallocator '%s': %s" %
11399                                    (self.op.iallocator, ial.info),
11400                                    errors.ECODE_NORES)
11401
11402       jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
11403
11404     elif self.op.remote_node is not None:
11405       assert self.op.mode == constants.NODE_EVAC_SEC
11406       jobs = [
11407         [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
11408                                         remote_node=self.op.remote_node,
11409                                         disks=[],
11410                                         mode=constants.REPLACE_DISK_CHG,
11411                                         early_release=self.op.early_release)]
11412         for instance_name in self.instance_names
11413         ]
11414
11415     else:
11416       raise errors.ProgrammerError("No iallocator or remote node")
11417
11418     return ResultWithJobs(jobs)
11419
11420
11421 def _SetOpEarlyRelease(early_release, op):
11422   """Sets C{early_release} flag on opcodes if available.
11423
11424   """
11425   try:
11426     op.early_release = early_release
11427   except AttributeError:
11428     assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
11429
11430   return op
11431
11432
11433 def _NodeEvacDest(use_nodes, group, nodes):
11434   """Returns group or nodes depending on caller's choice.
11435
11436   """
11437   if use_nodes:
11438     return utils.CommaJoin(nodes)
11439   else:
11440     return group
11441
11442
11443 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
11444   """Unpacks the result of change-group and node-evacuate iallocator requests.
11445
11446   Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
11447   L{constants.IALLOCATOR_MODE_CHG_GROUP}.
11448
11449   @type lu: L{LogicalUnit}
11450   @param lu: Logical unit instance
11451   @type alloc_result: tuple/list
11452   @param alloc_result: Result from iallocator
11453   @type early_release: bool
11454   @param early_release: Whether to release locks early if possible
11455   @type use_nodes: bool
11456   @param use_nodes: Whether to display node names instead of groups
11457
11458   """
11459   (moved, failed, jobs) = alloc_result
11460
11461   if failed:
11462     failreason = utils.CommaJoin("%s (%s)" % (name, reason)
11463                                  for (name, reason) in failed)
11464     lu.LogWarning("Unable to evacuate instances %s", failreason)
11465     raise errors.OpExecError("Unable to evacuate instances %s" % failreason)
11466
11467   if moved:
11468     lu.LogInfo("Instances to be moved: %s",
11469                utils.CommaJoin("%s (to %s)" %
11470                                (name, _NodeEvacDest(use_nodes, group, nodes))
11471                                for (name, group, nodes) in moved))
11472
11473   return [map(compat.partial(_SetOpEarlyRelease, early_release),
11474               map(opcodes.OpCode.LoadOpCode, ops))
11475           for ops in jobs]
11476
11477
11478 class LUInstanceGrowDisk(LogicalUnit):
11479   """Grow a disk of an instance.
11480
11481   """
11482   HPATH = "disk-grow"
11483   HTYPE = constants.HTYPE_INSTANCE
11484   REQ_BGL = False
11485
11486   def ExpandNames(self):
11487     self._ExpandAndLockInstance()
11488     self.needed_locks[locking.LEVEL_NODE] = []
11489     self.needed_locks[locking.LEVEL_NODE_RES] = []
11490     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11491     self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
11492
11493   def DeclareLocks(self, level):
11494     if level == locking.LEVEL_NODE:
11495       self._LockInstancesNodes()
11496     elif level == locking.LEVEL_NODE_RES:
11497       # Copy node locks
11498       self.needed_locks[locking.LEVEL_NODE_RES] = \
11499         self.needed_locks[locking.LEVEL_NODE][:]
11500
11501   def BuildHooksEnv(self):
11502     """Build hooks env.
11503
11504     This runs on the master, the primary and all the secondaries.
11505
11506     """
11507     env = {
11508       "DISK": self.op.disk,
11509       "AMOUNT": self.op.amount,
11510       }
11511     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11512     return env
11513
11514   def BuildHooksNodes(self):
11515     """Build hooks nodes.
11516
11517     """
11518     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
11519     return (nl, nl)
11520
11521   def CheckPrereq(self):
11522     """Check prerequisites.
11523
11524     This checks that the instance is in the cluster.
11525
11526     """
11527     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11528     assert instance is not None, \
11529       "Cannot retrieve locked instance %s" % self.op.instance_name
11530     nodenames = list(instance.all_nodes)
11531     for node in nodenames:
11532       _CheckNodeOnline(self, node)
11533
11534     self.instance = instance
11535
11536     if instance.disk_template not in constants.DTS_GROWABLE:
11537       raise errors.OpPrereqError("Instance's disk layout does not support"
11538                                  " growing", errors.ECODE_INVAL)
11539
11540     self.disk = instance.FindDisk(self.op.disk)
11541
11542     if instance.disk_template not in (constants.DT_FILE,
11543                                       constants.DT_SHARED_FILE,
11544                                       constants.DT_RBD):
11545       # TODO: check the free disk space for file, when that feature will be
11546       # supported
11547       _CheckNodesFreeDiskPerVG(self, nodenames,
11548                                self.disk.ComputeGrowth(self.op.amount))
11549
11550   def Exec(self, feedback_fn):
11551     """Execute disk grow.
11552
11553     """
11554     instance = self.instance
11555     disk = self.disk
11556
11557     assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11558     assert (self.owned_locks(locking.LEVEL_NODE) ==
11559             self.owned_locks(locking.LEVEL_NODE_RES))
11560
11561     disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
11562     if not disks_ok:
11563       raise errors.OpExecError("Cannot activate block device to grow")
11564
11565     feedback_fn("Growing disk %s of instance '%s' by %s" %
11566                 (self.op.disk, instance.name,
11567                  utils.FormatUnit(self.op.amount, "h")))
11568
11569     # First run all grow ops in dry-run mode
11570     for node in instance.all_nodes:
11571       self.cfg.SetDiskID(disk, node)
11572       result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, True)
11573       result.Raise("Grow request failed to node %s" % node)
11574
11575     # We know that (as far as we can test) operations across different
11576     # nodes will succeed, time to run it for real
11577     for node in instance.all_nodes:
11578       self.cfg.SetDiskID(disk, node)
11579       result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, False)
11580       result.Raise("Grow request failed to node %s" % node)
11581
11582       # TODO: Rewrite code to work properly
11583       # DRBD goes into sync mode for a short amount of time after executing the
11584       # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
11585       # calling "resize" in sync mode fails. Sleeping for a short amount of
11586       # time is a work-around.
11587       time.sleep(5)
11588
11589     disk.RecordGrow(self.op.amount)
11590     self.cfg.Update(instance, feedback_fn)
11591
11592     # Changes have been recorded, release node lock
11593     _ReleaseLocks(self, locking.LEVEL_NODE)
11594
11595     # Downgrade lock while waiting for sync
11596     self.glm.downgrade(locking.LEVEL_INSTANCE)
11597
11598     if self.op.wait_for_sync:
11599       disk_abort = not _WaitForSync(self, instance, disks=[disk])
11600       if disk_abort:
11601         self.proc.LogWarning("Disk sync-ing has not returned a good"
11602                              " status; please check the instance")
11603       if instance.admin_state != constants.ADMINST_UP:
11604         _SafeShutdownInstanceDisks(self, instance, disks=[disk])
11605     elif instance.admin_state != constants.ADMINST_UP:
11606       self.proc.LogWarning("Not shutting down the disk even if the instance is"
11607                            " not supposed to be running because no wait for"
11608                            " sync mode was requested")
11609
11610     assert self.owned_locks(locking.LEVEL_NODE_RES)
11611     assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11612
11613
11614 class LUInstanceQueryData(NoHooksLU):
11615   """Query runtime instance data.
11616
11617   """
11618   REQ_BGL = False
11619
11620   def ExpandNames(self):
11621     self.needed_locks = {}
11622
11623     # Use locking if requested or when non-static information is wanted
11624     if not (self.op.static or self.op.use_locking):
11625       self.LogWarning("Non-static data requested, locks need to be acquired")
11626       self.op.use_locking = True
11627
11628     if self.op.instances or not self.op.use_locking:
11629       # Expand instance names right here
11630       self.wanted_names = _GetWantedInstances(self, self.op.instances)
11631     else:
11632       # Will use acquired locks
11633       self.wanted_names = None
11634
11635     if self.op.use_locking:
11636       self.share_locks = _ShareAll()
11637
11638       if self.wanted_names is None:
11639         self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
11640       else:
11641         self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
11642
11643       self.needed_locks[locking.LEVEL_NODE] = []
11644       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11645
11646   def DeclareLocks(self, level):
11647     if self.op.use_locking and level == locking.LEVEL_NODE:
11648       self._LockInstancesNodes()
11649
11650   def CheckPrereq(self):
11651     """Check prerequisites.
11652
11653     This only checks the optional instance list against the existing names.
11654
11655     """
11656     if self.wanted_names is None:
11657       assert self.op.use_locking, "Locking was not used"
11658       self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
11659
11660     self.wanted_instances = \
11661         map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
11662
11663   def _ComputeBlockdevStatus(self, node, instance_name, dev):
11664     """Returns the status of a block device
11665
11666     """
11667     if self.op.static or not node:
11668       return None
11669
11670     self.cfg.SetDiskID(dev, node)
11671
11672     result = self.rpc.call_blockdev_find(node, dev)
11673     if result.offline:
11674       return None
11675
11676     result.Raise("Can't compute disk status for %s" % instance_name)
11677
11678     status = result.payload
11679     if status is None:
11680       return None
11681
11682     return (status.dev_path, status.major, status.minor,
11683             status.sync_percent, status.estimated_time,
11684             status.is_degraded, status.ldisk_status)
11685
11686   def _ComputeDiskStatus(self, instance, snode, dev):
11687     """Compute block device status.
11688
11689     """
11690     if dev.dev_type in constants.LDS_DRBD:
11691       # we change the snode then (otherwise we use the one passed in)
11692       if dev.logical_id[0] == instance.primary_node:
11693         snode = dev.logical_id[1]
11694       else:
11695         snode = dev.logical_id[0]
11696
11697     dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
11698                                               instance.name, dev)
11699     dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
11700
11701     if dev.children:
11702       dev_children = map(compat.partial(self._ComputeDiskStatus,
11703                                         instance, snode),
11704                          dev.children)
11705     else:
11706       dev_children = []
11707
11708     return {
11709       "iv_name": dev.iv_name,
11710       "dev_type": dev.dev_type,
11711       "logical_id": dev.logical_id,
11712       "physical_id": dev.physical_id,
11713       "pstatus": dev_pstatus,
11714       "sstatus": dev_sstatus,
11715       "children": dev_children,
11716       "mode": dev.mode,
11717       "size": dev.size,
11718       }
11719
11720   def Exec(self, feedback_fn):
11721     """Gather and return data"""
11722     result = {}
11723
11724     cluster = self.cfg.GetClusterInfo()
11725
11726     pri_nodes = self.cfg.GetMultiNodeInfo(i.primary_node
11727                                           for i in self.wanted_instances)
11728     for instance, (_, pnode) in zip(self.wanted_instances, pri_nodes):
11729       if self.op.static or pnode.offline:
11730         remote_state = None
11731         if pnode.offline:
11732           self.LogWarning("Primary node %s is marked offline, returning static"
11733                           " information only for instance %s" %
11734                           (pnode.name, instance.name))
11735       else:
11736         remote_info = self.rpc.call_instance_info(instance.primary_node,
11737                                                   instance.name,
11738                                                   instance.hypervisor)
11739         remote_info.Raise("Error checking node %s" % instance.primary_node)
11740         remote_info = remote_info.payload
11741         if remote_info and "state" in remote_info:
11742           remote_state = "up"
11743         else:
11744           if instance.admin_state == constants.ADMINST_UP:
11745             remote_state = "down"
11746           else:
11747             remote_state = instance.admin_state
11748
11749       disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
11750                   instance.disks)
11751
11752       result[instance.name] = {
11753         "name": instance.name,
11754         "config_state": instance.admin_state,
11755         "run_state": remote_state,
11756         "pnode": instance.primary_node,
11757         "snodes": instance.secondary_nodes,
11758         "os": instance.os,
11759         # this happens to be the same format used for hooks
11760         "nics": _NICListToTuple(self, instance.nics),
11761         "disk_template": instance.disk_template,
11762         "disks": disks,
11763         "hypervisor": instance.hypervisor,
11764         "network_port": instance.network_port,
11765         "hv_instance": instance.hvparams,
11766         "hv_actual": cluster.FillHV(instance, skip_globals=True),
11767         "be_instance": instance.beparams,
11768         "be_actual": cluster.FillBE(instance),
11769         "os_instance": instance.osparams,
11770         "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
11771         "serial_no": instance.serial_no,
11772         "mtime": instance.mtime,
11773         "ctime": instance.ctime,
11774         "uuid": instance.uuid,
11775         }
11776
11777     return result
11778
11779
11780 def PrepareContainerMods(mods, private_fn):
11781   """Prepares a list of container modifications by adding a private data field.
11782
11783   @type mods: list of tuples; (operation, index, parameters)
11784   @param mods: List of modifications
11785   @type private_fn: callable or None
11786   @param private_fn: Callable for constructing a private data field for a
11787     modification
11788   @rtype: list
11789
11790   """
11791   if private_fn is None:
11792     fn = lambda: None
11793   else:
11794     fn = private_fn
11795
11796   return [(op, idx, params, fn()) for (op, idx, params) in mods]
11797
11798
11799 #: Type description for changes as returned by L{ApplyContainerMods}'s
11800 #: callbacks
11801 _TApplyContModsCbChanges = \
11802   ht.TMaybeListOf(ht.TAnd(ht.TIsLength(2), ht.TItems([
11803     ht.TNonEmptyString,
11804     ht.TAny,
11805     ])))
11806
11807
11808 def ApplyContainerMods(kind, container, chgdesc, mods,
11809                        create_fn, modify_fn, remove_fn):
11810   """Applies descriptions in C{mods} to C{container}.
11811
11812   @type kind: string
11813   @param kind: One-word item description
11814   @type container: list
11815   @param container: Container to modify
11816   @type chgdesc: None or list
11817   @param chgdesc: List of applied changes
11818   @type mods: list
11819   @param mods: Modifications as returned by L{PrepareContainerMods}
11820   @type create_fn: callable
11821   @param create_fn: Callback for creating a new item (L{constants.DDM_ADD});
11822     receives absolute item index, parameters and private data object as added
11823     by L{PrepareContainerMods}, returns tuple containing new item and changes
11824     as list
11825   @type modify_fn: callable
11826   @param modify_fn: Callback for modifying an existing item
11827     (L{constants.DDM_MODIFY}); receives absolute item index, item, parameters
11828     and private data object as added by L{PrepareContainerMods}, returns
11829     changes as list
11830   @type remove_fn: callable
11831   @param remove_fn: Callback on removing item; receives absolute item index,
11832     item and private data object as added by L{PrepareContainerMods}
11833
11834   """
11835   for (op, idx, params, private) in mods:
11836     if idx == -1:
11837       # Append
11838       absidx = len(container) - 1
11839     elif idx < 0:
11840       raise IndexError("Not accepting negative indices other than -1")
11841     elif idx > len(container):
11842       raise IndexError("Got %s index %s, but there are only %s" %
11843                        (kind, idx, len(container)))
11844     else:
11845       absidx = idx
11846
11847     changes = None
11848
11849     if op == constants.DDM_ADD:
11850       # Calculate where item will be added
11851       if idx == -1:
11852         addidx = len(container)
11853       else:
11854         addidx = idx
11855
11856       if create_fn is None:
11857         item = params
11858       else:
11859         (item, changes) = create_fn(addidx, params, private)
11860
11861       if idx == -1:
11862         container.append(item)
11863       else:
11864         assert idx >= 0
11865         assert idx <= len(container)
11866         # list.insert does so before the specified index
11867         container.insert(idx, item)
11868     else:
11869       # Retrieve existing item
11870       try:
11871         item = container[absidx]
11872       except IndexError:
11873         raise IndexError("Invalid %s index %s" % (kind, idx))
11874
11875       if op == constants.DDM_REMOVE:
11876         assert not params
11877
11878         if remove_fn is not None:
11879           remove_fn(absidx, item, private)
11880
11881         changes = [("%s/%s" % (kind, absidx), "remove")]
11882
11883         assert container[absidx] == item
11884         del container[absidx]
11885       elif op == constants.DDM_MODIFY:
11886         if modify_fn is not None:
11887           changes = modify_fn(absidx, item, params, private)
11888       else:
11889         raise errors.ProgrammerError("Unhandled operation '%s'" % op)
11890
11891     assert _TApplyContModsCbChanges(changes)
11892
11893     if not (chgdesc is None or changes is None):
11894       chgdesc.extend(changes)
11895
11896
11897 def _UpdateIvNames(base_index, disks):
11898   """Updates the C{iv_name} attribute of disks.
11899
11900   @type disks: list of L{objects.Disk}
11901
11902   """
11903   for (idx, disk) in enumerate(disks):
11904     disk.iv_name = "disk/%s" % (base_index + idx, )
11905
11906
11907 class _InstNicModPrivate:
11908   """Data structure for network interface modifications.
11909
11910   Used by L{LUInstanceSetParams}.
11911
11912   """
11913   def __init__(self):
11914     self.params = None
11915     self.filled = None
11916
11917
11918 class LUInstanceSetParams(LogicalUnit):
11919   """Modifies an instances's parameters.
11920
11921   """
11922   HPATH = "instance-modify"
11923   HTYPE = constants.HTYPE_INSTANCE
11924   REQ_BGL = False
11925
11926   @staticmethod
11927   def _UpgradeDiskNicMods(kind, mods, verify_fn):
11928     assert ht.TList(mods)
11929     assert not mods or len(mods[0]) in (2, 3)
11930
11931     if mods and len(mods[0]) == 2:
11932       result = []
11933
11934       addremove = 0
11935       for op, params in mods:
11936         if op in (constants.DDM_ADD, constants.DDM_REMOVE):
11937           result.append((op, -1, params))
11938           addremove += 1
11939
11940           if addremove > 1:
11941             raise errors.OpPrereqError("Only one %s add or remove operation is"
11942                                        " supported at a time" % kind,
11943                                        errors.ECODE_INVAL)
11944         else:
11945           result.append((constants.DDM_MODIFY, op, params))
11946
11947       assert verify_fn(result)
11948     else:
11949       result = mods
11950
11951     return result
11952
11953   @staticmethod
11954   def _CheckMods(kind, mods, key_types, item_fn):
11955     """Ensures requested disk/NIC modifications are valid.
11956
11957     """
11958     for (op, _, params) in mods:
11959       assert ht.TDict(params)
11960
11961       utils.ForceDictType(params, key_types)
11962
11963       if op == constants.DDM_REMOVE:
11964         if params:
11965           raise errors.OpPrereqError("No settings should be passed when"
11966                                      " removing a %s" % kind,
11967                                      errors.ECODE_INVAL)
11968       elif op in (constants.DDM_ADD, constants.DDM_MODIFY):
11969         item_fn(op, params)
11970       else:
11971         raise errors.ProgrammerError("Unhandled operation '%s'" % op)
11972
11973   @staticmethod
11974   def _VerifyDiskModification(op, params):
11975     """Verifies a disk modification.
11976
11977     """
11978     if op == constants.DDM_ADD:
11979       mode = params.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
11980       if mode not in constants.DISK_ACCESS_SET:
11981         raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
11982                                    errors.ECODE_INVAL)
11983
11984       size = params.get(constants.IDISK_SIZE, None)
11985       if size is None:
11986         raise errors.OpPrereqError("Required disk parameter '%s' missing" %
11987                                    constants.IDISK_SIZE, errors.ECODE_INVAL)
11988
11989       try:
11990         size = int(size)
11991       except (TypeError, ValueError), err:
11992         raise errors.OpPrereqError("Invalid disk size parameter: %s" % err,
11993                                    errors.ECODE_INVAL)
11994
11995       params[constants.IDISK_SIZE] = size
11996
11997     elif op == constants.DDM_MODIFY and constants.IDISK_SIZE in params:
11998       raise errors.OpPrereqError("Disk size change not possible, use"
11999                                  " grow-disk", errors.ECODE_INVAL)
12000
12001   @staticmethod
12002   def _VerifyNicModification(op, params):
12003     """Verifies a network interface modification.
12004
12005     """
12006     if op in (constants.DDM_ADD, constants.DDM_MODIFY):
12007       ip = params.get(constants.INIC_IP, None)
12008       if ip is None:
12009         pass
12010       elif ip.lower() == constants.VALUE_NONE:
12011         params[constants.INIC_IP] = None
12012       elif not netutils.IPAddress.IsValid(ip):
12013         raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
12014                                    errors.ECODE_INVAL)
12015
12016       bridge = params.get("bridge", None)
12017       link = params.get(constants.INIC_LINK, None)
12018       if bridge and link:
12019         raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
12020                                    " at the same time", errors.ECODE_INVAL)
12021       elif bridge and bridge.lower() == constants.VALUE_NONE:
12022         params["bridge"] = None
12023       elif link and link.lower() == constants.VALUE_NONE:
12024         params[constants.INIC_LINK] = None
12025
12026       if op == constants.DDM_ADD:
12027         macaddr = params.get(constants.INIC_MAC, None)
12028         if macaddr is None:
12029           params[constants.INIC_MAC] = constants.VALUE_AUTO
12030
12031       if constants.INIC_MAC in params:
12032         macaddr = params[constants.INIC_MAC]
12033         if macaddr not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12034           macaddr = utils.NormalizeAndValidateMac(macaddr)
12035
12036         if op == constants.DDM_MODIFY and macaddr == constants.VALUE_AUTO:
12037           raise errors.OpPrereqError("'auto' is not a valid MAC address when"
12038                                      " modifying an existing NIC",
12039                                      errors.ECODE_INVAL)
12040
12041   def CheckArguments(self):
12042     if not (self.op.nics or self.op.disks or self.op.disk_template or
12043             self.op.hvparams or self.op.beparams or self.op.os_name or
12044             self.op.offline is not None or self.op.runtime_mem):
12045       raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
12046
12047     if self.op.hvparams:
12048       _CheckGlobalHvParams(self.op.hvparams)
12049
12050     self.op.disks = \
12051       self._UpgradeDiskNicMods("disk", self.op.disks,
12052         opcodes.OpInstanceSetParams.TestDiskModifications)
12053     self.op.nics = \
12054       self._UpgradeDiskNicMods("NIC", self.op.nics,
12055         opcodes.OpInstanceSetParams.TestNicModifications)
12056
12057     # Check disk modifications
12058     self._CheckMods("disk", self.op.disks, constants.IDISK_PARAMS_TYPES,
12059                     self._VerifyDiskModification)
12060
12061     if self.op.disks and self.op.disk_template is not None:
12062       raise errors.OpPrereqError("Disk template conversion and other disk"
12063                                  " changes not supported at the same time",
12064                                  errors.ECODE_INVAL)
12065
12066     if (self.op.disk_template and
12067         self.op.disk_template in constants.DTS_INT_MIRROR and
12068         self.op.remote_node is None):
12069       raise errors.OpPrereqError("Changing the disk template to a mirrored"
12070                                  " one requires specifying a secondary node",
12071                                  errors.ECODE_INVAL)
12072
12073     # Check NIC modifications
12074     self._CheckMods("NIC", self.op.nics, constants.INIC_PARAMS_TYPES,
12075                     self._VerifyNicModification)
12076
12077   def ExpandNames(self):
12078     self._ExpandAndLockInstance()
12079     # Can't even acquire node locks in shared mode as upcoming changes in
12080     # Ganeti 2.6 will start to modify the node object on disk conversion
12081     self.needed_locks[locking.LEVEL_NODE] = []
12082     self.needed_locks[locking.LEVEL_NODE_RES] = []
12083     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12084
12085   def DeclareLocks(self, level):
12086     # TODO: Acquire group lock in shared mode (disk parameters)
12087     if level == locking.LEVEL_NODE:
12088       self._LockInstancesNodes()
12089       if self.op.disk_template and self.op.remote_node:
12090         self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
12091         self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
12092     elif level == locking.LEVEL_NODE_RES and self.op.disk_template:
12093       # Copy node locks
12094       self.needed_locks[locking.LEVEL_NODE_RES] = \
12095         self.needed_locks[locking.LEVEL_NODE][:]
12096
12097   def BuildHooksEnv(self):
12098     """Build hooks env.
12099
12100     This runs on the master, primary and secondaries.
12101
12102     """
12103     args = dict()
12104     if constants.BE_MINMEM in self.be_new:
12105       args["minmem"] = self.be_new[constants.BE_MINMEM]
12106     if constants.BE_MAXMEM in self.be_new:
12107       args["maxmem"] = self.be_new[constants.BE_MAXMEM]
12108     if constants.BE_VCPUS in self.be_new:
12109       args["vcpus"] = self.be_new[constants.BE_VCPUS]
12110     # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
12111     # information at all.
12112
12113     if self._new_nics is not None:
12114       nics = []
12115
12116       for nic in self._new_nics:
12117         nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
12118         mode = nicparams[constants.NIC_MODE]
12119         link = nicparams[constants.NIC_LINK]
12120         nics.append((nic.ip, nic.mac, mode, link))
12121
12122       args["nics"] = nics
12123
12124     env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
12125     if self.op.disk_template:
12126       env["NEW_DISK_TEMPLATE"] = self.op.disk_template
12127     if self.op.runtime_mem:
12128       env["RUNTIME_MEMORY"] = self.op.runtime_mem
12129
12130     return env
12131
12132   def BuildHooksNodes(self):
12133     """Build hooks nodes.
12134
12135     """
12136     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
12137     return (nl, nl)
12138
12139   def _PrepareNicModification(self, params, private, old_ip, old_params,
12140                               cluster, pnode):
12141     update_params_dict = dict([(key, params[key])
12142                                for key in constants.NICS_PARAMETERS
12143                                if key in params])
12144
12145     if "bridge" in params:
12146       update_params_dict[constants.NIC_LINK] = params["bridge"]
12147
12148     new_params = _GetUpdatedParams(old_params, update_params_dict)
12149     utils.ForceDictType(new_params, constants.NICS_PARAMETER_TYPES)
12150
12151     new_filled_params = cluster.SimpleFillNIC(new_params)
12152     objects.NIC.CheckParameterSyntax(new_filled_params)
12153
12154     new_mode = new_filled_params[constants.NIC_MODE]
12155     if new_mode == constants.NIC_MODE_BRIDGED:
12156       bridge = new_filled_params[constants.NIC_LINK]
12157       msg = self.rpc.call_bridges_exist(pnode, [bridge]).fail_msg
12158       if msg:
12159         msg = "Error checking bridges on node '%s': %s" % (pnode, msg)
12160         if self.op.force:
12161           self.warn.append(msg)
12162         else:
12163           raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
12164
12165     elif new_mode == constants.NIC_MODE_ROUTED:
12166       ip = params.get(constants.INIC_IP, old_ip)
12167       if ip is None:
12168         raise errors.OpPrereqError("Cannot set the NIC IP address to None"
12169                                    " on a routed NIC", errors.ECODE_INVAL)
12170
12171     if constants.INIC_MAC in params:
12172       mac = params[constants.INIC_MAC]
12173       if mac is None:
12174         raise errors.OpPrereqError("Cannot unset the NIC MAC address",
12175                                    errors.ECODE_INVAL)
12176       elif mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12177         # otherwise generate the MAC address
12178         params[constants.INIC_MAC] = \
12179           self.cfg.GenerateMAC(self.proc.GetECId())
12180       else:
12181         # or validate/reserve the current one
12182         try:
12183           self.cfg.ReserveMAC(mac, self.proc.GetECId())
12184         except errors.ReservationError:
12185           raise errors.OpPrereqError("MAC address '%s' already in use"
12186                                      " in cluster" % mac,
12187                                      errors.ECODE_NOTUNIQUE)
12188
12189     private.params = new_params
12190     private.filled = new_filled_params
12191
12192     return (None, None)
12193
12194   def CheckPrereq(self):
12195     """Check prerequisites.
12196
12197     This only checks the instance list against the existing names.
12198
12199     """
12200     # checking the new params on the primary/secondary nodes
12201
12202     instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12203     cluster = self.cluster = self.cfg.GetClusterInfo()
12204     assert self.instance is not None, \
12205       "Cannot retrieve locked instance %s" % self.op.instance_name
12206     pnode = instance.primary_node
12207     nodelist = list(instance.all_nodes)
12208     pnode_info = self.cfg.GetNodeInfo(pnode)
12209     self.diskparams = self.cfg.GetNodeGroup(pnode_info.group).diskparams
12210
12211     # Prepare disk/NIC modifications
12212     self.diskmod = PrepareContainerMods(self.op.disks, None)
12213     self.nicmod = PrepareContainerMods(self.op.nics, _InstNicModPrivate)
12214
12215     # OS change
12216     if self.op.os_name and not self.op.force:
12217       _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
12218                       self.op.force_variant)
12219       instance_os = self.op.os_name
12220     else:
12221       instance_os = instance.os
12222
12223     assert not (self.op.disk_template and self.op.disks), \
12224       "Can't modify disk template and apply disk changes at the same time"
12225
12226     if self.op.disk_template:
12227       if instance.disk_template == self.op.disk_template:
12228         raise errors.OpPrereqError("Instance already has disk template %s" %
12229                                    instance.disk_template, errors.ECODE_INVAL)
12230
12231       if (instance.disk_template,
12232           self.op.disk_template) not in self._DISK_CONVERSIONS:
12233         raise errors.OpPrereqError("Unsupported disk template conversion from"
12234                                    " %s to %s" % (instance.disk_template,
12235                                                   self.op.disk_template),
12236                                    errors.ECODE_INVAL)
12237       _CheckInstanceState(self, instance, INSTANCE_DOWN,
12238                           msg="cannot change disk template")
12239       if self.op.disk_template in constants.DTS_INT_MIRROR:
12240         if self.op.remote_node == pnode:
12241           raise errors.OpPrereqError("Given new secondary node %s is the same"
12242                                      " as the primary node of the instance" %
12243                                      self.op.remote_node, errors.ECODE_STATE)
12244         _CheckNodeOnline(self, self.op.remote_node)
12245         _CheckNodeNotDrained(self, self.op.remote_node)
12246         # FIXME: here we assume that the old instance type is DT_PLAIN
12247         assert instance.disk_template == constants.DT_PLAIN
12248         disks = [{constants.IDISK_SIZE: d.size,
12249                   constants.IDISK_VG: d.logical_id[0]}
12250                  for d in instance.disks]
12251         required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
12252         _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
12253
12254         snode_info = self.cfg.GetNodeInfo(self.op.remote_node)
12255         snode_group = self.cfg.GetNodeGroup(snode_info.group)
12256         ipolicy = _CalculateGroupIPolicy(cluster, snode_group)
12257         _CheckTargetNodeIPolicy(self, ipolicy, instance, snode_info,
12258                                 ignore=self.op.ignore_ipolicy)
12259         if pnode_info.group != snode_info.group:
12260           self.LogWarning("The primary and secondary nodes are in two"
12261                           " different node groups; the disk parameters"
12262                           " from the first disk's node group will be"
12263                           " used")
12264
12265     # hvparams processing
12266     if self.op.hvparams:
12267       hv_type = instance.hypervisor
12268       i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
12269       utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
12270       hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
12271
12272       # local check
12273       hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
12274       _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
12275       self.hv_proposed = self.hv_new = hv_new # the new actual values
12276       self.hv_inst = i_hvdict # the new dict (without defaults)
12277     else:
12278       self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
12279                                               instance.hvparams)
12280       self.hv_new = self.hv_inst = {}
12281
12282     # beparams processing
12283     if self.op.beparams:
12284       i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
12285                                    use_none=True)
12286       objects.UpgradeBeParams(i_bedict)
12287       utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
12288       be_new = cluster.SimpleFillBE(i_bedict)
12289       self.be_proposed = self.be_new = be_new # the new actual values
12290       self.be_inst = i_bedict # the new dict (without defaults)
12291     else:
12292       self.be_new = self.be_inst = {}
12293       self.be_proposed = cluster.SimpleFillBE(instance.beparams)
12294     be_old = cluster.FillBE(instance)
12295
12296     # CPU param validation -- checking every time a paramtere is
12297     # changed to cover all cases where either CPU mask or vcpus have
12298     # changed
12299     if (constants.BE_VCPUS in self.be_proposed and
12300         constants.HV_CPU_MASK in self.hv_proposed):
12301       cpu_list = \
12302         utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
12303       # Verify mask is consistent with number of vCPUs. Can skip this
12304       # test if only 1 entry in the CPU mask, which means same mask
12305       # is applied to all vCPUs.
12306       if (len(cpu_list) > 1 and
12307           len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
12308         raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
12309                                    " CPU mask [%s]" %
12310                                    (self.be_proposed[constants.BE_VCPUS],
12311                                     self.hv_proposed[constants.HV_CPU_MASK]),
12312                                    errors.ECODE_INVAL)
12313
12314       # Only perform this test if a new CPU mask is given
12315       if constants.HV_CPU_MASK in self.hv_new:
12316         # Calculate the largest CPU number requested
12317         max_requested_cpu = max(map(max, cpu_list))
12318         # Check that all of the instance's nodes have enough physical CPUs to
12319         # satisfy the requested CPU mask
12320         _CheckNodesPhysicalCPUs(self, instance.all_nodes,
12321                                 max_requested_cpu + 1, instance.hypervisor)
12322
12323     # osparams processing
12324     if self.op.osparams:
12325       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
12326       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
12327       self.os_inst = i_osdict # the new dict (without defaults)
12328     else:
12329       self.os_inst = {}
12330
12331     self.warn = []
12332
12333     #TODO(dynmem): do the appropriate check involving MINMEM
12334     if (constants.BE_MAXMEM in self.op.beparams and not self.op.force and
12335         be_new[constants.BE_MAXMEM] > be_old[constants.BE_MAXMEM]):
12336       mem_check_list = [pnode]
12337       if be_new[constants.BE_AUTO_BALANCE]:
12338         # either we changed auto_balance to yes or it was from before
12339         mem_check_list.extend(instance.secondary_nodes)
12340       instance_info = self.rpc.call_instance_info(pnode, instance.name,
12341                                                   instance.hypervisor)
12342       nodeinfo = self.rpc.call_node_info(mem_check_list, None,
12343                                          [instance.hypervisor])
12344       pninfo = nodeinfo[pnode]
12345       msg = pninfo.fail_msg
12346       if msg:
12347         # Assume the primary node is unreachable and go ahead
12348         self.warn.append("Can't get info from primary node %s: %s" %
12349                          (pnode, msg))
12350       else:
12351         (_, _, (pnhvinfo, )) = pninfo.payload
12352         if not isinstance(pnhvinfo.get("memory_free", None), int):
12353           self.warn.append("Node data from primary node %s doesn't contain"
12354                            " free memory information" % pnode)
12355         elif instance_info.fail_msg:
12356           self.warn.append("Can't get instance runtime information: %s" %
12357                           instance_info.fail_msg)
12358         else:
12359           if instance_info.payload:
12360             current_mem = int(instance_info.payload["memory"])
12361           else:
12362             # Assume instance not running
12363             # (there is a slight race condition here, but it's not very
12364             # probable, and we have no other way to check)
12365             # TODO: Describe race condition
12366             current_mem = 0
12367           #TODO(dynmem): do the appropriate check involving MINMEM
12368           miss_mem = (be_new[constants.BE_MAXMEM] - current_mem -
12369                       pnhvinfo["memory_free"])
12370           if miss_mem > 0:
12371             raise errors.OpPrereqError("This change will prevent the instance"
12372                                        " from starting, due to %d MB of memory"
12373                                        " missing on its primary node" %
12374                                        miss_mem,
12375                                        errors.ECODE_NORES)
12376
12377       if be_new[constants.BE_AUTO_BALANCE]:
12378         for node, nres in nodeinfo.items():
12379           if node not in instance.secondary_nodes:
12380             continue
12381           nres.Raise("Can't get info from secondary node %s" % node,
12382                      prereq=True, ecode=errors.ECODE_STATE)
12383           (_, _, (nhvinfo, )) = nres.payload
12384           if not isinstance(nhvinfo.get("memory_free", None), int):
12385             raise errors.OpPrereqError("Secondary node %s didn't return free"
12386                                        " memory information" % node,
12387                                        errors.ECODE_STATE)
12388           #TODO(dynmem): do the appropriate check involving MINMEM
12389           elif be_new[constants.BE_MAXMEM] > nhvinfo["memory_free"]:
12390             raise errors.OpPrereqError("This change will prevent the instance"
12391                                        " from failover to its secondary node"
12392                                        " %s, due to not enough memory" % node,
12393                                        errors.ECODE_STATE)
12394
12395     if self.op.runtime_mem:
12396       remote_info = self.rpc.call_instance_info(instance.primary_node,
12397                                                 instance.name,
12398                                                 instance.hypervisor)
12399       remote_info.Raise("Error checking node %s" % instance.primary_node)
12400       if not remote_info.payload: # not running already
12401         raise errors.OpPrereqError("Instance %s is not running" % instance.name,
12402                                    errors.ECODE_STATE)
12403
12404       current_memory = remote_info.payload["memory"]
12405       if (not self.op.force and
12406            (self.op.runtime_mem > self.be_proposed[constants.BE_MAXMEM] or
12407             self.op.runtime_mem < self.be_proposed[constants.BE_MINMEM])):
12408         raise errors.OpPrereqError("Instance %s must have memory between %d"
12409                                    " and %d MB of memory unless --force is"
12410                                    " given" % (instance.name,
12411                                     self.be_proposed[constants.BE_MINMEM],
12412                                     self.be_proposed[constants.BE_MAXMEM]),
12413                                    errors.ECODE_INVAL)
12414
12415       if self.op.runtime_mem > current_memory:
12416         _CheckNodeFreeMemory(self, instance.primary_node,
12417                              "ballooning memory for instance %s" %
12418                              instance.name,
12419                              self.op.memory - current_memory,
12420                              instance.hypervisor)
12421
12422     if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
12423       raise errors.OpPrereqError("Disk operations not supported for"
12424                                  " diskless instances",
12425                                  errors.ECODE_INVAL)
12426
12427     def _PrepareNicCreate(_, params, private):
12428       return self._PrepareNicModification(params, private, None, {},
12429                                           cluster, pnode)
12430
12431     def _PrepareNicMod(_, nic, params, private):
12432       return self._PrepareNicModification(params, private, nic.ip,
12433                                           nic.nicparams, cluster, pnode)
12434
12435     # Verify NIC changes (operating on copy)
12436     nics = instance.nics[:]
12437     ApplyContainerMods("NIC", nics, None, self.nicmod,
12438                        _PrepareNicCreate, _PrepareNicMod, None)
12439     if len(nics) > constants.MAX_NICS:
12440       raise errors.OpPrereqError("Instance has too many network interfaces"
12441                                  " (%d), cannot add more" % constants.MAX_NICS,
12442                                  errors.ECODE_STATE)
12443
12444     # Verify disk changes (operating on a copy)
12445     disks = instance.disks[:]
12446     ApplyContainerMods("disk", disks, None, self.diskmod, None, None, None)
12447     if len(disks) > constants.MAX_DISKS:
12448       raise errors.OpPrereqError("Instance has too many disks (%d), cannot add"
12449                                  " more" % constants.MAX_DISKS,
12450                                  errors.ECODE_STATE)
12451
12452     if self.op.offline is not None:
12453       if self.op.offline:
12454         msg = "can't change to offline"
12455       else:
12456         msg = "can't change to online"
12457       _CheckInstanceState(self, instance, CAN_CHANGE_INSTANCE_OFFLINE, msg=msg)
12458
12459     # Pre-compute NIC changes (necessary to use result in hooks)
12460     self._nic_chgdesc = []
12461     if self.nicmod:
12462       # Operate on copies as this is still in prereq
12463       nics = [nic.Copy() for nic in instance.nics]
12464       ApplyContainerMods("NIC", nics, self._nic_chgdesc, self.nicmod,
12465                          self._CreateNewNic, self._ApplyNicMods, None)
12466       self._new_nics = nics
12467     else:
12468       self._new_nics = None
12469
12470   def _ConvertPlainToDrbd(self, feedback_fn):
12471     """Converts an instance from plain to drbd.
12472
12473     """
12474     feedback_fn("Converting template to drbd")
12475     instance = self.instance
12476     pnode = instance.primary_node
12477     snode = self.op.remote_node
12478
12479     assert instance.disk_template == constants.DT_PLAIN
12480
12481     # create a fake disk info for _GenerateDiskTemplate
12482     disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
12483                   constants.IDISK_VG: d.logical_id[0]}
12484                  for d in instance.disks]
12485     new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
12486                                       instance.name, pnode, [snode],
12487                                       disk_info, None, None, 0, feedback_fn,
12488                                       self.diskparams)
12489     info = _GetInstanceInfoText(instance)
12490     feedback_fn("Creating aditional volumes...")
12491     # first, create the missing data and meta devices
12492     for disk in new_disks:
12493       # unfortunately this is... not too nice
12494       _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
12495                             info, True)
12496       for child in disk.children:
12497         _CreateSingleBlockDev(self, snode, instance, child, info, True)
12498     # at this stage, all new LVs have been created, we can rename the
12499     # old ones
12500     feedback_fn("Renaming original volumes...")
12501     rename_list = [(o, n.children[0].logical_id)
12502                    for (o, n) in zip(instance.disks, new_disks)]
12503     result = self.rpc.call_blockdev_rename(pnode, rename_list)
12504     result.Raise("Failed to rename original LVs")
12505
12506     feedback_fn("Initializing DRBD devices...")
12507     # all child devices are in place, we can now create the DRBD devices
12508     for disk in new_disks:
12509       for node in [pnode, snode]:
12510         f_create = node == pnode
12511         _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
12512
12513     # at this point, the instance has been modified
12514     instance.disk_template = constants.DT_DRBD8
12515     instance.disks = new_disks
12516     self.cfg.Update(instance, feedback_fn)
12517
12518     # Release node locks while waiting for sync
12519     _ReleaseLocks(self, locking.LEVEL_NODE)
12520
12521     # disks are created, waiting for sync
12522     disk_abort = not _WaitForSync(self, instance,
12523                                   oneshot=not self.op.wait_for_sync)
12524     if disk_abort:
12525       raise errors.OpExecError("There are some degraded disks for"
12526                                " this instance, please cleanup manually")
12527
12528     # Node resource locks will be released by caller
12529
12530   def _ConvertDrbdToPlain(self, feedback_fn):
12531     """Converts an instance from drbd to plain.
12532
12533     """
12534     instance = self.instance
12535
12536     assert len(instance.secondary_nodes) == 1
12537     assert instance.disk_template == constants.DT_DRBD8
12538
12539     pnode = instance.primary_node
12540     snode = instance.secondary_nodes[0]
12541     feedback_fn("Converting template to plain")
12542
12543     old_disks = instance.disks
12544     new_disks = [d.children[0] for d in old_disks]
12545
12546     # copy over size and mode
12547     for parent, child in zip(old_disks, new_disks):
12548       child.size = parent.size
12549       child.mode = parent.mode
12550
12551     # update instance structure
12552     instance.disks = new_disks
12553     instance.disk_template = constants.DT_PLAIN
12554     self.cfg.Update(instance, feedback_fn)
12555
12556     # Release locks in case removing disks takes a while
12557     _ReleaseLocks(self, locking.LEVEL_NODE)
12558
12559     feedback_fn("Removing volumes on the secondary node...")
12560     for disk in old_disks:
12561       self.cfg.SetDiskID(disk, snode)
12562       msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
12563       if msg:
12564         self.LogWarning("Could not remove block device %s on node %s,"
12565                         " continuing anyway: %s", disk.iv_name, snode, msg)
12566
12567     feedback_fn("Removing unneeded volumes on the primary node...")
12568     for idx, disk in enumerate(old_disks):
12569       meta = disk.children[1]
12570       self.cfg.SetDiskID(meta, pnode)
12571       msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
12572       if msg:
12573         self.LogWarning("Could not remove metadata for disk %d on node %s,"
12574                         " continuing anyway: %s", idx, pnode, msg)
12575
12576     # this is a DRBD disk, return its port to the pool
12577     for disk in old_disks:
12578       tcp_port = disk.logical_id[2]
12579       self.cfg.AddTcpUdpPort(tcp_port)
12580
12581     # Node resource locks will be released by caller
12582
12583   def _CreateNewDisk(self, idx, params, _):
12584     """Creates a new disk.
12585
12586     """
12587     instance = self.instance
12588
12589     # add a new disk
12590     if instance.disk_template in constants.DTS_FILEBASED:
12591       (file_driver, file_path) = instance.disks[0].logical_id
12592       file_path = os.path.dirname(file_path)
12593     else:
12594       file_driver = file_path = None
12595
12596     disk = \
12597       _GenerateDiskTemplate(self, instance.disk_template, instance.name,
12598                             instance.primary_node, instance.secondary_nodes,
12599                             [params], file_path, file_driver, idx,
12600                             self.Log, self.diskparams)[0]
12601
12602     info = _GetInstanceInfoText(instance)
12603
12604     logging.info("Creating volume %s for instance %s",
12605                  disk.iv_name, instance.name)
12606     # Note: this needs to be kept in sync with _CreateDisks
12607     #HARDCODE
12608     for node in instance.all_nodes:
12609       f_create = (node == instance.primary_node)
12610       try:
12611         _CreateBlockDev(self, node, instance, disk, f_create, info, f_create)
12612       except errors.OpExecError, err:
12613         self.LogWarning("Failed to create volume %s (%s) on node '%s': %s",
12614                         disk.iv_name, disk, node, err)
12615
12616     return (disk, [
12617       ("disk/%d" % idx, "add:size=%s,mode=%s" % (disk.size, disk.mode)),
12618       ])
12619
12620   @staticmethod
12621   def _ModifyDisk(idx, disk, params, _):
12622     """Modifies a disk.
12623
12624     """
12625     disk.mode = params[constants.IDISK_MODE]
12626
12627     return [
12628       ("disk.mode/%d" % idx, disk.mode),
12629       ]
12630
12631   def _RemoveDisk(self, idx, root, _):
12632     """Removes a disk.
12633
12634     """
12635     for node, disk in root.ComputeNodeTree(self.instance.primary_node):
12636       self.cfg.SetDiskID(disk, node)
12637       msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
12638       if msg:
12639         self.LogWarning("Could not remove disk/%d on node '%s': %s,"
12640                         " continuing anyway", idx, node, msg)
12641
12642     # if this is a DRBD disk, return its port to the pool
12643     if root.dev_type in constants.LDS_DRBD:
12644       self.cfg.AddTcpUdpPort(root.logical_id[2])
12645
12646   @staticmethod
12647   def _CreateNewNic(idx, params, private):
12648     """Creates data structure for a new network interface.
12649
12650     """
12651     mac = params[constants.INIC_MAC]
12652     ip = params.get(constants.INIC_IP, None)
12653     nicparams = private.params
12654
12655     return (objects.NIC(mac=mac, ip=ip, nicparams=nicparams), [
12656       ("nic.%d" % idx,
12657        "add:mac=%s,ip=%s,mode=%s,link=%s" %
12658        (mac, ip, private.filled[constants.NIC_MODE],
12659        private.filled[constants.NIC_LINK])),
12660       ])
12661
12662   @staticmethod
12663   def _ApplyNicMods(idx, nic, params, private):
12664     """Modifies a network interface.
12665
12666     """
12667     changes = []
12668
12669     for key in [constants.INIC_MAC, constants.INIC_IP]:
12670       if key in params:
12671         changes.append(("nic.%s/%d" % (key, idx), params[key]))
12672         setattr(nic, key, params[key])
12673
12674     if private.params:
12675       nic.nicparams = private.params
12676
12677       for (key, val) in params.items():
12678         changes.append(("nic.%s/%d" % (key, idx), val))
12679
12680     return changes
12681
12682   def Exec(self, feedback_fn):
12683     """Modifies an instance.
12684
12685     All parameters take effect only at the next restart of the instance.
12686
12687     """
12688     # Process here the warnings from CheckPrereq, as we don't have a
12689     # feedback_fn there.
12690     # TODO: Replace with self.LogWarning
12691     for warn in self.warn:
12692       feedback_fn("WARNING: %s" % warn)
12693
12694     assert ((self.op.disk_template is None) ^
12695             bool(self.owned_locks(locking.LEVEL_NODE_RES))), \
12696       "Not owning any node resource locks"
12697
12698     result = []
12699     instance = self.instance
12700
12701     # runtime memory
12702     if self.op.runtime_mem:
12703       rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
12704                                                      instance,
12705                                                      self.op.runtime_mem)
12706       rpcres.Raise("Cannot modify instance runtime memory")
12707       result.append(("runtime_memory", self.op.runtime_mem))
12708
12709     # Apply disk changes
12710     ApplyContainerMods("disk", instance.disks, result, self.diskmod,
12711                        self._CreateNewDisk, self._ModifyDisk, self._RemoveDisk)
12712     _UpdateIvNames(0, instance.disks)
12713
12714     if self.op.disk_template:
12715       if __debug__:
12716         check_nodes = set(instance.all_nodes)
12717         if self.op.remote_node:
12718           check_nodes.add(self.op.remote_node)
12719         for level in [locking.LEVEL_NODE, locking.LEVEL_NODE_RES]:
12720           owned = self.owned_locks(level)
12721           assert not (check_nodes - owned), \
12722             ("Not owning the correct locks, owning %r, expected at least %r" %
12723              (owned, check_nodes))
12724
12725       r_shut = _ShutdownInstanceDisks(self, instance)
12726       if not r_shut:
12727         raise errors.OpExecError("Cannot shutdown instance disks, unable to"
12728                                  " proceed with disk template conversion")
12729       mode = (instance.disk_template, self.op.disk_template)
12730       try:
12731         self._DISK_CONVERSIONS[mode](self, feedback_fn)
12732       except:
12733         self.cfg.ReleaseDRBDMinors(instance.name)
12734         raise
12735       result.append(("disk_template", self.op.disk_template))
12736
12737       assert instance.disk_template == self.op.disk_template, \
12738         ("Expected disk template '%s', found '%s'" %
12739          (self.op.disk_template, instance.disk_template))
12740
12741     # Release node and resource locks if there are any (they might already have
12742     # been released during disk conversion)
12743     _ReleaseLocks(self, locking.LEVEL_NODE)
12744     _ReleaseLocks(self, locking.LEVEL_NODE_RES)
12745
12746     # Apply NIC changes
12747     if self._new_nics is not None:
12748       instance.nics = self._new_nics
12749       result.extend(self._nic_chgdesc)
12750
12751     # hvparams changes
12752     if self.op.hvparams:
12753       instance.hvparams = self.hv_inst
12754       for key, val in self.op.hvparams.iteritems():
12755         result.append(("hv/%s" % key, val))
12756
12757     # beparams changes
12758     if self.op.beparams:
12759       instance.beparams = self.be_inst
12760       for key, val in self.op.beparams.iteritems():
12761         result.append(("be/%s" % key, val))
12762
12763     # OS change
12764     if self.op.os_name:
12765       instance.os = self.op.os_name
12766
12767     # osparams changes
12768     if self.op.osparams:
12769       instance.osparams = self.os_inst
12770       for key, val in self.op.osparams.iteritems():
12771         result.append(("os/%s" % key, val))
12772
12773     if self.op.offline is None:
12774       # Ignore
12775       pass
12776     elif self.op.offline:
12777       # Mark instance as offline
12778       self.cfg.MarkInstanceOffline(instance.name)
12779       result.append(("admin_state", constants.ADMINST_OFFLINE))
12780     else:
12781       # Mark instance as online, but stopped
12782       self.cfg.MarkInstanceDown(instance.name)
12783       result.append(("admin_state", constants.ADMINST_DOWN))
12784
12785     self.cfg.Update(instance, feedback_fn)
12786
12787     assert not (self.owned_locks(locking.LEVEL_NODE_RES) or
12788                 self.owned_locks(locking.LEVEL_NODE)), \
12789       "All node locks should have been released by now"
12790
12791     return result
12792
12793   _DISK_CONVERSIONS = {
12794     (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
12795     (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
12796     }
12797
12798
12799 class LUInstanceChangeGroup(LogicalUnit):
12800   HPATH = "instance-change-group"
12801   HTYPE = constants.HTYPE_INSTANCE
12802   REQ_BGL = False
12803
12804   def ExpandNames(self):
12805     self.share_locks = _ShareAll()
12806     self.needed_locks = {
12807       locking.LEVEL_NODEGROUP: [],
12808       locking.LEVEL_NODE: [],
12809       }
12810
12811     self._ExpandAndLockInstance()
12812
12813     if self.op.target_groups:
12814       self.req_target_uuids = map(self.cfg.LookupNodeGroup,
12815                                   self.op.target_groups)
12816     else:
12817       self.req_target_uuids = None
12818
12819     self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
12820
12821   def DeclareLocks(self, level):
12822     if level == locking.LEVEL_NODEGROUP:
12823       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
12824
12825       if self.req_target_uuids:
12826         lock_groups = set(self.req_target_uuids)
12827
12828         # Lock all groups used by instance optimistically; this requires going
12829         # via the node before it's locked, requiring verification later on
12830         instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
12831         lock_groups.update(instance_groups)
12832       else:
12833         # No target groups, need to lock all of them
12834         lock_groups = locking.ALL_SET
12835
12836       self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
12837
12838     elif level == locking.LEVEL_NODE:
12839       if self.req_target_uuids:
12840         # Lock all nodes used by instances
12841         self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
12842         self._LockInstancesNodes()
12843
12844         # Lock all nodes in all potential target groups
12845         lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
12846                        self.cfg.GetInstanceNodeGroups(self.op.instance_name))
12847         member_nodes = [node_name
12848                         for group in lock_groups
12849                         for node_name in self.cfg.GetNodeGroup(group).members]
12850         self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
12851       else:
12852         # Lock all nodes as all groups are potential targets
12853         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12854
12855   def CheckPrereq(self):
12856     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12857     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12858     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12859
12860     assert (self.req_target_uuids is None or
12861             owned_groups.issuperset(self.req_target_uuids))
12862     assert owned_instances == set([self.op.instance_name])
12863
12864     # Get instance information
12865     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12866
12867     # Check if node groups for locked instance are still correct
12868     assert owned_nodes.issuperset(self.instance.all_nodes), \
12869       ("Instance %s's nodes changed while we kept the lock" %
12870        self.op.instance_name)
12871
12872     inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
12873                                            owned_groups)
12874
12875     if self.req_target_uuids:
12876       # User requested specific target groups
12877       self.target_uuids = self.req_target_uuids
12878     else:
12879       # All groups except those used by the instance are potential targets
12880       self.target_uuids = owned_groups - inst_groups
12881
12882     conflicting_groups = self.target_uuids & inst_groups
12883     if conflicting_groups:
12884       raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
12885                                  " used by the instance '%s'" %
12886                                  (utils.CommaJoin(conflicting_groups),
12887                                   self.op.instance_name),
12888                                  errors.ECODE_INVAL)
12889
12890     if not self.target_uuids:
12891       raise errors.OpPrereqError("There are no possible target groups",
12892                                  errors.ECODE_INVAL)
12893
12894   def BuildHooksEnv(self):
12895     """Build hooks env.
12896
12897     """
12898     assert self.target_uuids
12899
12900     env = {
12901       "TARGET_GROUPS": " ".join(self.target_uuids),
12902       }
12903
12904     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
12905
12906     return env
12907
12908   def BuildHooksNodes(self):
12909     """Build hooks nodes.
12910
12911     """
12912     mn = self.cfg.GetMasterNode()
12913     return ([mn], [mn])
12914
12915   def Exec(self, feedback_fn):
12916     instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
12917
12918     assert instances == [self.op.instance_name], "Instance not locked"
12919
12920     ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
12921                      instances=instances, target_groups=list(self.target_uuids))
12922
12923     ial.Run(self.op.iallocator)
12924
12925     if not ial.success:
12926       raise errors.OpPrereqError("Can't compute solution for changing group of"
12927                                  " instance '%s' using iallocator '%s': %s" %
12928                                  (self.op.instance_name, self.op.iallocator,
12929                                   ial.info),
12930                                  errors.ECODE_NORES)
12931
12932     jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
12933
12934     self.LogInfo("Iallocator returned %s job(s) for changing group of"
12935                  " instance '%s'", len(jobs), self.op.instance_name)
12936
12937     return ResultWithJobs(jobs)
12938
12939
12940 class LUBackupQuery(NoHooksLU):
12941   """Query the exports list
12942
12943   """
12944   REQ_BGL = False
12945
12946   def ExpandNames(self):
12947     self.needed_locks = {}
12948     self.share_locks[locking.LEVEL_NODE] = 1
12949     if not self.op.nodes:
12950       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12951     else:
12952       self.needed_locks[locking.LEVEL_NODE] = \
12953         _GetWantedNodes(self, self.op.nodes)
12954
12955   def Exec(self, feedback_fn):
12956     """Compute the list of all the exported system images.
12957
12958     @rtype: dict
12959     @return: a dictionary with the structure node->(export-list)
12960         where export-list is a list of the instances exported on
12961         that node.
12962
12963     """
12964     self.nodes = self.owned_locks(locking.LEVEL_NODE)
12965     rpcresult = self.rpc.call_export_list(self.nodes)
12966     result = {}
12967     for node in rpcresult:
12968       if rpcresult[node].fail_msg:
12969         result[node] = False
12970       else:
12971         result[node] = rpcresult[node].payload
12972
12973     return result
12974
12975
12976 class LUBackupPrepare(NoHooksLU):
12977   """Prepares an instance for an export and returns useful information.
12978
12979   """
12980   REQ_BGL = False
12981
12982   def ExpandNames(self):
12983     self._ExpandAndLockInstance()
12984
12985   def CheckPrereq(self):
12986     """Check prerequisites.
12987
12988     """
12989     instance_name = self.op.instance_name
12990
12991     self.instance = self.cfg.GetInstanceInfo(instance_name)
12992     assert self.instance is not None, \
12993           "Cannot retrieve locked instance %s" % self.op.instance_name
12994     _CheckNodeOnline(self, self.instance.primary_node)
12995
12996     self._cds = _GetClusterDomainSecret()
12997
12998   def Exec(self, feedback_fn):
12999     """Prepares an instance for an export.
13000
13001     """
13002     instance = self.instance
13003
13004     if self.op.mode == constants.EXPORT_MODE_REMOTE:
13005       salt = utils.GenerateSecret(8)
13006
13007       feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
13008       result = self.rpc.call_x509_cert_create(instance.primary_node,
13009                                               constants.RIE_CERT_VALIDITY)
13010       result.Raise("Can't create X509 key and certificate on %s" % result.node)
13011
13012       (name, cert_pem) = result.payload
13013
13014       cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
13015                                              cert_pem)
13016
13017       return {
13018         "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
13019         "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
13020                           salt),
13021         "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
13022         }
13023
13024     return None
13025
13026
13027 class LUBackupExport(LogicalUnit):
13028   """Export an instance to an image in the cluster.
13029
13030   """
13031   HPATH = "instance-export"
13032   HTYPE = constants.HTYPE_INSTANCE
13033   REQ_BGL = False
13034
13035   def CheckArguments(self):
13036     """Check the arguments.
13037
13038     """
13039     self.x509_key_name = self.op.x509_key_name
13040     self.dest_x509_ca_pem = self.op.destination_x509_ca
13041
13042     if self.op.mode == constants.EXPORT_MODE_REMOTE:
13043       if not self.x509_key_name:
13044         raise errors.OpPrereqError("Missing X509 key name for encryption",
13045                                    errors.ECODE_INVAL)
13046
13047       if not self.dest_x509_ca_pem:
13048         raise errors.OpPrereqError("Missing destination X509 CA",
13049                                    errors.ECODE_INVAL)
13050
13051   def ExpandNames(self):
13052     self._ExpandAndLockInstance()
13053
13054     # Lock all nodes for local exports
13055     if self.op.mode == constants.EXPORT_MODE_LOCAL:
13056       # FIXME: lock only instance primary and destination node
13057       #
13058       # Sad but true, for now we have do lock all nodes, as we don't know where
13059       # the previous export might be, and in this LU we search for it and
13060       # remove it from its current node. In the future we could fix this by:
13061       #  - making a tasklet to search (share-lock all), then create the
13062       #    new one, then one to remove, after
13063       #  - removing the removal operation altogether
13064       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13065
13066   def DeclareLocks(self, level):
13067     """Last minute lock declaration."""
13068     # All nodes are locked anyway, so nothing to do here.
13069
13070   def BuildHooksEnv(self):
13071     """Build hooks env.
13072
13073     This will run on the master, primary node and target node.
13074
13075     """
13076     env = {
13077       "EXPORT_MODE": self.op.mode,
13078       "EXPORT_NODE": self.op.target_node,
13079       "EXPORT_DO_SHUTDOWN": self.op.shutdown,
13080       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
13081       # TODO: Generic function for boolean env variables
13082       "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
13083       }
13084
13085     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
13086
13087     return env
13088
13089   def BuildHooksNodes(self):
13090     """Build hooks nodes.
13091
13092     """
13093     nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
13094
13095     if self.op.mode == constants.EXPORT_MODE_LOCAL:
13096       nl.append(self.op.target_node)
13097
13098     return (nl, nl)
13099
13100   def CheckPrereq(self):
13101     """Check prerequisites.
13102
13103     This checks that the instance and node names are valid.
13104
13105     """
13106     instance_name = self.op.instance_name
13107
13108     self.instance = self.cfg.GetInstanceInfo(instance_name)
13109     assert self.instance is not None, \
13110           "Cannot retrieve locked instance %s" % self.op.instance_name
13111     _CheckNodeOnline(self, self.instance.primary_node)
13112
13113     if (self.op.remove_instance and
13114         self.instance.admin_state == constants.ADMINST_UP and
13115         not self.op.shutdown):
13116       raise errors.OpPrereqError("Can not remove instance without shutting it"
13117                                  " down before")
13118
13119     if self.op.mode == constants.EXPORT_MODE_LOCAL:
13120       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
13121       self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
13122       assert self.dst_node is not None
13123
13124       _CheckNodeOnline(self, self.dst_node.name)
13125       _CheckNodeNotDrained(self, self.dst_node.name)
13126
13127       self._cds = None
13128       self.dest_disk_info = None
13129       self.dest_x509_ca = None
13130
13131     elif self.op.mode == constants.EXPORT_MODE_REMOTE:
13132       self.dst_node = None
13133
13134       if len(self.op.target_node) != len(self.instance.disks):
13135         raise errors.OpPrereqError(("Received destination information for %s"
13136                                     " disks, but instance %s has %s disks") %
13137                                    (len(self.op.target_node), instance_name,
13138                                     len(self.instance.disks)),
13139                                    errors.ECODE_INVAL)
13140
13141       cds = _GetClusterDomainSecret()
13142
13143       # Check X509 key name
13144       try:
13145         (key_name, hmac_digest, hmac_salt) = self.x509_key_name
13146       except (TypeError, ValueError), err:
13147         raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
13148
13149       if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
13150         raise errors.OpPrereqError("HMAC for X509 key name is wrong",
13151                                    errors.ECODE_INVAL)
13152
13153       # Load and verify CA
13154       try:
13155         (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
13156       except OpenSSL.crypto.Error, err:
13157         raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
13158                                    (err, ), errors.ECODE_INVAL)
13159
13160       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
13161       if errcode is not None:
13162         raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
13163                                    (msg, ), errors.ECODE_INVAL)
13164
13165       self.dest_x509_ca = cert
13166
13167       # Verify target information
13168       disk_info = []
13169       for idx, disk_data in enumerate(self.op.target_node):
13170         try:
13171           (host, port, magic) = \
13172             masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
13173         except errors.GenericError, err:
13174           raise errors.OpPrereqError("Target info for disk %s: %s" %
13175                                      (idx, err), errors.ECODE_INVAL)
13176
13177         disk_info.append((host, port, magic))
13178
13179       assert len(disk_info) == len(self.op.target_node)
13180       self.dest_disk_info = disk_info
13181
13182     else:
13183       raise errors.ProgrammerError("Unhandled export mode %r" %
13184                                    self.op.mode)
13185
13186     # instance disk type verification
13187     # TODO: Implement export support for file-based disks
13188     for disk in self.instance.disks:
13189       if disk.dev_type == constants.LD_FILE:
13190         raise errors.OpPrereqError("Export not supported for instances with"
13191                                    " file-based disks", errors.ECODE_INVAL)
13192
13193   def _CleanupExports(self, feedback_fn):
13194     """Removes exports of current instance from all other nodes.
13195
13196     If an instance in a cluster with nodes A..D was exported to node C, its
13197     exports will be removed from the nodes A, B and D.
13198
13199     """
13200     assert self.op.mode != constants.EXPORT_MODE_REMOTE
13201
13202     nodelist = self.cfg.GetNodeList()
13203     nodelist.remove(self.dst_node.name)
13204
13205     # on one-node clusters nodelist will be empty after the removal
13206     # if we proceed the backup would be removed because OpBackupQuery
13207     # substitutes an empty list with the full cluster node list.
13208     iname = self.instance.name
13209     if nodelist:
13210       feedback_fn("Removing old exports for instance %s" % iname)
13211       exportlist = self.rpc.call_export_list(nodelist)
13212       for node in exportlist:
13213         if exportlist[node].fail_msg:
13214           continue
13215         if iname in exportlist[node].payload:
13216           msg = self.rpc.call_export_remove(node, iname).fail_msg
13217           if msg:
13218             self.LogWarning("Could not remove older export for instance %s"
13219                             " on node %s: %s", iname, node, msg)
13220
13221   def Exec(self, feedback_fn):
13222     """Export an instance to an image in the cluster.
13223
13224     """
13225     assert self.op.mode in constants.EXPORT_MODES
13226
13227     instance = self.instance
13228     src_node = instance.primary_node
13229
13230     if self.op.shutdown:
13231       # shutdown the instance, but not the disks
13232       feedback_fn("Shutting down instance %s" % instance.name)
13233       result = self.rpc.call_instance_shutdown(src_node, instance,
13234                                                self.op.shutdown_timeout)
13235       # TODO: Maybe ignore failures if ignore_remove_failures is set
13236       result.Raise("Could not shutdown instance %s on"
13237                    " node %s" % (instance.name, src_node))
13238
13239     # set the disks ID correctly since call_instance_start needs the
13240     # correct drbd minor to create the symlinks
13241     for disk in instance.disks:
13242       self.cfg.SetDiskID(disk, src_node)
13243
13244     activate_disks = (instance.admin_state != constants.ADMINST_UP)
13245
13246     if activate_disks:
13247       # Activate the instance disks if we'exporting a stopped instance
13248       feedback_fn("Activating disks for %s" % instance.name)
13249       _StartInstanceDisks(self, instance, None)
13250
13251     try:
13252       helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
13253                                                      instance)
13254
13255       helper.CreateSnapshots()
13256       try:
13257         if (self.op.shutdown and
13258             instance.admin_state == constants.ADMINST_UP and
13259             not self.op.remove_instance):
13260           assert not activate_disks
13261           feedback_fn("Starting instance %s" % instance.name)
13262           result = self.rpc.call_instance_start(src_node,
13263                                                 (instance, None, None), False)
13264           msg = result.fail_msg
13265           if msg:
13266             feedback_fn("Failed to start instance: %s" % msg)
13267             _ShutdownInstanceDisks(self, instance)
13268             raise errors.OpExecError("Could not start instance: %s" % msg)
13269
13270         if self.op.mode == constants.EXPORT_MODE_LOCAL:
13271           (fin_resu, dresults) = helper.LocalExport(self.dst_node)
13272         elif self.op.mode == constants.EXPORT_MODE_REMOTE:
13273           connect_timeout = constants.RIE_CONNECT_TIMEOUT
13274           timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
13275
13276           (key_name, _, _) = self.x509_key_name
13277
13278           dest_ca_pem = \
13279             OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
13280                                             self.dest_x509_ca)
13281
13282           (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
13283                                                      key_name, dest_ca_pem,
13284                                                      timeouts)
13285       finally:
13286         helper.Cleanup()
13287
13288       # Check for backwards compatibility
13289       assert len(dresults) == len(instance.disks)
13290       assert compat.all(isinstance(i, bool) for i in dresults), \
13291              "Not all results are boolean: %r" % dresults
13292
13293     finally:
13294       if activate_disks:
13295         feedback_fn("Deactivating disks for %s" % instance.name)
13296         _ShutdownInstanceDisks(self, instance)
13297
13298     if not (compat.all(dresults) and fin_resu):
13299       failures = []
13300       if not fin_resu:
13301         failures.append("export finalization")
13302       if not compat.all(dresults):
13303         fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
13304                                if not dsk)
13305         failures.append("disk export: disk(s) %s" % fdsk)
13306
13307       raise errors.OpExecError("Export failed, errors in %s" %
13308                                utils.CommaJoin(failures))
13309
13310     # At this point, the export was successful, we can cleanup/finish
13311
13312     # Remove instance if requested
13313     if self.op.remove_instance:
13314       feedback_fn("Removing instance %s" % instance.name)
13315       _RemoveInstance(self, feedback_fn, instance,
13316                       self.op.ignore_remove_failures)
13317
13318     if self.op.mode == constants.EXPORT_MODE_LOCAL:
13319       self._CleanupExports(feedback_fn)
13320
13321     return fin_resu, dresults
13322
13323
13324 class LUBackupRemove(NoHooksLU):
13325   """Remove exports related to the named instance.
13326
13327   """
13328   REQ_BGL = False
13329
13330   def ExpandNames(self):
13331     self.needed_locks = {}
13332     # We need all nodes to be locked in order for RemoveExport to work, but we
13333     # don't need to lock the instance itself, as nothing will happen to it (and
13334     # we can remove exports also for a removed instance)
13335     self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13336
13337   def Exec(self, feedback_fn):
13338     """Remove any export.
13339
13340     """
13341     instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
13342     # If the instance was not found we'll try with the name that was passed in.
13343     # This will only work if it was an FQDN, though.
13344     fqdn_warn = False
13345     if not instance_name:
13346       fqdn_warn = True
13347       instance_name = self.op.instance_name
13348
13349     locked_nodes = self.owned_locks(locking.LEVEL_NODE)
13350     exportlist = self.rpc.call_export_list(locked_nodes)
13351     found = False
13352     for node in exportlist:
13353       msg = exportlist[node].fail_msg
13354       if msg:
13355         self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
13356         continue
13357       if instance_name in exportlist[node].payload:
13358         found = True
13359         result = self.rpc.call_export_remove(node, instance_name)
13360         msg = result.fail_msg
13361         if msg:
13362           logging.error("Could not remove export for instance %s"
13363                         " on node %s: %s", instance_name, node, msg)
13364
13365     if fqdn_warn and not found:
13366       feedback_fn("Export not found. If trying to remove an export belonging"
13367                   " to a deleted instance please use its Fully Qualified"
13368                   " Domain Name.")
13369
13370
13371 class LUGroupAdd(LogicalUnit):
13372   """Logical unit for creating node groups.
13373
13374   """
13375   HPATH = "group-add"
13376   HTYPE = constants.HTYPE_GROUP
13377   REQ_BGL = False
13378
13379   def ExpandNames(self):
13380     # We need the new group's UUID here so that we can create and acquire the
13381     # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
13382     # that it should not check whether the UUID exists in the configuration.
13383     self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
13384     self.needed_locks = {}
13385     self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
13386
13387   def CheckPrereq(self):
13388     """Check prerequisites.
13389
13390     This checks that the given group name is not an existing node group
13391     already.
13392
13393     """
13394     try:
13395       existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13396     except errors.OpPrereqError:
13397       pass
13398     else:
13399       raise errors.OpPrereqError("Desired group name '%s' already exists as a"
13400                                  " node group (UUID: %s)" %
13401                                  (self.op.group_name, existing_uuid),
13402                                  errors.ECODE_EXISTS)
13403
13404     if self.op.ndparams:
13405       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
13406
13407     if self.op.hv_state:
13408       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
13409     else:
13410       self.new_hv_state = None
13411
13412     if self.op.disk_state:
13413       self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
13414     else:
13415       self.new_disk_state = None
13416
13417     if self.op.diskparams:
13418       for templ in constants.DISK_TEMPLATES:
13419         if templ not in self.op.diskparams:
13420           self.op.diskparams[templ] = {}
13421         utils.ForceDictType(self.op.diskparams[templ], constants.DISK_DT_TYPES)
13422     else:
13423       self.op.diskparams = self.cfg.GetClusterInfo().diskparams
13424
13425     if self.op.ipolicy:
13426       cluster = self.cfg.GetClusterInfo()
13427       full_ipolicy = cluster.SimpleFillIPolicy(self.op.ipolicy)
13428       try:
13429         objects.InstancePolicy.CheckParameterSyntax(full_ipolicy)
13430       except errors.ConfigurationError, err:
13431         raise errors.OpPrereqError("Invalid instance policy: %s" % err,
13432                                    errors.ECODE_INVAL)
13433
13434   def BuildHooksEnv(self):
13435     """Build hooks env.
13436
13437     """
13438     return {
13439       "GROUP_NAME": self.op.group_name,
13440       }
13441
13442   def BuildHooksNodes(self):
13443     """Build hooks nodes.
13444
13445     """
13446     mn = self.cfg.GetMasterNode()
13447     return ([mn], [mn])
13448
13449   def Exec(self, feedback_fn):
13450     """Add the node group to the cluster.
13451
13452     """
13453     group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
13454                                   uuid=self.group_uuid,
13455                                   alloc_policy=self.op.alloc_policy,
13456                                   ndparams=self.op.ndparams,
13457                                   diskparams=self.op.diskparams,
13458                                   ipolicy=self.op.ipolicy,
13459                                   hv_state_static=self.new_hv_state,
13460                                   disk_state_static=self.new_disk_state)
13461
13462     self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
13463     del self.remove_locks[locking.LEVEL_NODEGROUP]
13464
13465
13466 class LUGroupAssignNodes(NoHooksLU):
13467   """Logical unit for assigning nodes to groups.
13468
13469   """
13470   REQ_BGL = False
13471
13472   def ExpandNames(self):
13473     # These raise errors.OpPrereqError on their own:
13474     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13475     self.op.nodes = _GetWantedNodes(self, self.op.nodes)
13476
13477     # We want to lock all the affected nodes and groups. We have readily
13478     # available the list of nodes, and the *destination* group. To gather the
13479     # list of "source" groups, we need to fetch node information later on.
13480     self.needed_locks = {
13481       locking.LEVEL_NODEGROUP: set([self.group_uuid]),
13482       locking.LEVEL_NODE: self.op.nodes,
13483       }
13484
13485   def DeclareLocks(self, level):
13486     if level == locking.LEVEL_NODEGROUP:
13487       assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
13488
13489       # Try to get all affected nodes' groups without having the group or node
13490       # lock yet. Needs verification later in the code flow.
13491       groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
13492
13493       self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
13494
13495   def CheckPrereq(self):
13496     """Check prerequisites.
13497
13498     """
13499     assert self.needed_locks[locking.LEVEL_NODEGROUP]
13500     assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
13501             frozenset(self.op.nodes))
13502
13503     expected_locks = (set([self.group_uuid]) |
13504                       self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
13505     actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
13506     if actual_locks != expected_locks:
13507       raise errors.OpExecError("Nodes changed groups since locks were acquired,"
13508                                " current groups are '%s', used to be '%s'" %
13509                                (utils.CommaJoin(expected_locks),
13510                                 utils.CommaJoin(actual_locks)))
13511
13512     self.node_data = self.cfg.GetAllNodesInfo()
13513     self.group = self.cfg.GetNodeGroup(self.group_uuid)
13514     instance_data = self.cfg.GetAllInstancesInfo()
13515
13516     if self.group is None:
13517       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13518                                (self.op.group_name, self.group_uuid))
13519
13520     (new_splits, previous_splits) = \
13521       self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
13522                                              for node in self.op.nodes],
13523                                             self.node_data, instance_data)
13524
13525     if new_splits:
13526       fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
13527
13528       if not self.op.force:
13529         raise errors.OpExecError("The following instances get split by this"
13530                                  " change and --force was not given: %s" %
13531                                  fmt_new_splits)
13532       else:
13533         self.LogWarning("This operation will split the following instances: %s",
13534                         fmt_new_splits)
13535
13536         if previous_splits:
13537           self.LogWarning("In addition, these already-split instances continue"
13538                           " to be split across groups: %s",
13539                           utils.CommaJoin(utils.NiceSort(previous_splits)))
13540
13541   def Exec(self, feedback_fn):
13542     """Assign nodes to a new group.
13543
13544     """
13545     mods = [(node_name, self.group_uuid) for node_name in self.op.nodes]
13546
13547     self.cfg.AssignGroupNodes(mods)
13548
13549   @staticmethod
13550   def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
13551     """Check for split instances after a node assignment.
13552
13553     This method considers a series of node assignments as an atomic operation,
13554     and returns information about split instances after applying the set of
13555     changes.
13556
13557     In particular, it returns information about newly split instances, and
13558     instances that were already split, and remain so after the change.
13559
13560     Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
13561     considered.
13562
13563     @type changes: list of (node_name, new_group_uuid) pairs.
13564     @param changes: list of node assignments to consider.
13565     @param node_data: a dict with data for all nodes
13566     @param instance_data: a dict with all instances to consider
13567     @rtype: a two-tuple
13568     @return: a list of instances that were previously okay and result split as a
13569       consequence of this change, and a list of instances that were previously
13570       split and this change does not fix.
13571
13572     """
13573     changed_nodes = dict((node, group) for node, group in changes
13574                          if node_data[node].group != group)
13575
13576     all_split_instances = set()
13577     previously_split_instances = set()
13578
13579     def InstanceNodes(instance):
13580       return [instance.primary_node] + list(instance.secondary_nodes)
13581
13582     for inst in instance_data.values():
13583       if inst.disk_template not in constants.DTS_INT_MIRROR:
13584         continue
13585
13586       instance_nodes = InstanceNodes(inst)
13587
13588       if len(set(node_data[node].group for node in instance_nodes)) > 1:
13589         previously_split_instances.add(inst.name)
13590
13591       if len(set(changed_nodes.get(node, node_data[node].group)
13592                  for node in instance_nodes)) > 1:
13593         all_split_instances.add(inst.name)
13594
13595     return (list(all_split_instances - previously_split_instances),
13596             list(previously_split_instances & all_split_instances))
13597
13598
13599 class _GroupQuery(_QueryBase):
13600   FIELDS = query.GROUP_FIELDS
13601
13602   def ExpandNames(self, lu):
13603     lu.needed_locks = {}
13604
13605     self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
13606     self._cluster = lu.cfg.GetClusterInfo()
13607     name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
13608
13609     if not self.names:
13610       self.wanted = [name_to_uuid[name]
13611                      for name in utils.NiceSort(name_to_uuid.keys())]
13612     else:
13613       # Accept names to be either names or UUIDs.
13614       missing = []
13615       self.wanted = []
13616       all_uuid = frozenset(self._all_groups.keys())
13617
13618       for name in self.names:
13619         if name in all_uuid:
13620           self.wanted.append(name)
13621         elif name in name_to_uuid:
13622           self.wanted.append(name_to_uuid[name])
13623         else:
13624           missing.append(name)
13625
13626       if missing:
13627         raise errors.OpPrereqError("Some groups do not exist: %s" %
13628                                    utils.CommaJoin(missing),
13629                                    errors.ECODE_NOENT)
13630
13631   def DeclareLocks(self, lu, level):
13632     pass
13633
13634   def _GetQueryData(self, lu):
13635     """Computes the list of node groups and their attributes.
13636
13637     """
13638     do_nodes = query.GQ_NODE in self.requested_data
13639     do_instances = query.GQ_INST in self.requested_data
13640
13641     group_to_nodes = None
13642     group_to_instances = None
13643
13644     # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
13645     # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
13646     # latter GetAllInstancesInfo() is not enough, for we have to go through
13647     # instance->node. Hence, we will need to process nodes even if we only need
13648     # instance information.
13649     if do_nodes or do_instances:
13650       all_nodes = lu.cfg.GetAllNodesInfo()
13651       group_to_nodes = dict((uuid, []) for uuid in self.wanted)
13652       node_to_group = {}
13653
13654       for node in all_nodes.values():
13655         if node.group in group_to_nodes:
13656           group_to_nodes[node.group].append(node.name)
13657           node_to_group[node.name] = node.group
13658
13659       if do_instances:
13660         all_instances = lu.cfg.GetAllInstancesInfo()
13661         group_to_instances = dict((uuid, []) for uuid in self.wanted)
13662
13663         for instance in all_instances.values():
13664           node = instance.primary_node
13665           if node in node_to_group:
13666             group_to_instances[node_to_group[node]].append(instance.name)
13667
13668         if not do_nodes:
13669           # Do not pass on node information if it was not requested.
13670           group_to_nodes = None
13671
13672     return query.GroupQueryData(self._cluster,
13673                                 [self._all_groups[uuid]
13674                                  for uuid in self.wanted],
13675                                 group_to_nodes, group_to_instances)
13676
13677
13678 class LUGroupQuery(NoHooksLU):
13679   """Logical unit for querying node groups.
13680
13681   """
13682   REQ_BGL = False
13683
13684   def CheckArguments(self):
13685     self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
13686                           self.op.output_fields, False)
13687
13688   def ExpandNames(self):
13689     self.gq.ExpandNames(self)
13690
13691   def DeclareLocks(self, level):
13692     self.gq.DeclareLocks(self, level)
13693
13694   def Exec(self, feedback_fn):
13695     return self.gq.OldStyleQuery(self)
13696
13697
13698 class LUGroupSetParams(LogicalUnit):
13699   """Modifies the parameters of a node group.
13700
13701   """
13702   HPATH = "group-modify"
13703   HTYPE = constants.HTYPE_GROUP
13704   REQ_BGL = False
13705
13706   def CheckArguments(self):
13707     all_changes = [
13708       self.op.ndparams,
13709       self.op.diskparams,
13710       self.op.alloc_policy,
13711       self.op.hv_state,
13712       self.op.disk_state,
13713       self.op.ipolicy,
13714       ]
13715
13716     if all_changes.count(None) == len(all_changes):
13717       raise errors.OpPrereqError("Please pass at least one modification",
13718                                  errors.ECODE_INVAL)
13719
13720   def ExpandNames(self):
13721     # This raises errors.OpPrereqError on its own:
13722     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13723
13724     self.needed_locks = {
13725       locking.LEVEL_INSTANCE: [],
13726       locking.LEVEL_NODEGROUP: [self.group_uuid],
13727       }
13728
13729     self.share_locks[locking.LEVEL_INSTANCE] = 1
13730
13731   def DeclareLocks(self, level):
13732     if level == locking.LEVEL_INSTANCE:
13733       assert not self.needed_locks[locking.LEVEL_INSTANCE]
13734
13735       # Lock instances optimistically, needs verification once group lock has
13736       # been acquired
13737       self.needed_locks[locking.LEVEL_INSTANCE] = \
13738           self.cfg.GetNodeGroupInstances(self.group_uuid)
13739
13740   def CheckPrereq(self):
13741     """Check prerequisites.
13742
13743     """
13744     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
13745
13746     # Check if locked instances are still correct
13747     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
13748
13749     self.group = self.cfg.GetNodeGroup(self.group_uuid)
13750     cluster = self.cfg.GetClusterInfo()
13751
13752     if self.group is None:
13753       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13754                                (self.op.group_name, self.group_uuid))
13755
13756     if self.op.ndparams:
13757       new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
13758       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
13759       self.new_ndparams = new_ndparams
13760
13761     if self.op.diskparams:
13762       self.new_diskparams = dict()
13763       for templ in constants.DISK_TEMPLATES:
13764         if templ not in self.op.diskparams:
13765           self.op.diskparams[templ] = {}
13766         new_templ_params = _GetUpdatedParams(self.group.diskparams[templ],
13767                                              self.op.diskparams[templ])
13768         utils.ForceDictType(new_templ_params, constants.DISK_DT_TYPES)
13769         self.new_diskparams[templ] = new_templ_params
13770
13771     if self.op.hv_state:
13772       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
13773                                                  self.group.hv_state_static)
13774
13775     if self.op.disk_state:
13776       self.new_disk_state = \
13777         _MergeAndVerifyDiskState(self.op.disk_state,
13778                                  self.group.disk_state_static)
13779
13780     if self.op.ipolicy:
13781       self.new_ipolicy = _GetUpdatedIPolicy(self.group.ipolicy,
13782                                             self.op.ipolicy,
13783                                             group_policy=True)
13784
13785       new_ipolicy = cluster.SimpleFillIPolicy(self.new_ipolicy)
13786       inst_filter = lambda inst: inst.name in owned_instances
13787       instances = self.cfg.GetInstancesInfoByFilter(inst_filter).values()
13788       violations = \
13789           _ComputeNewInstanceViolations(_CalculateGroupIPolicy(cluster,
13790                                                                self.group),
13791                                         new_ipolicy, instances)
13792
13793       if violations:
13794         self.LogWarning("After the ipolicy change the following instances"
13795                         " violate them: %s",
13796                         utils.CommaJoin(violations))
13797
13798   def BuildHooksEnv(self):
13799     """Build hooks env.
13800
13801     """
13802     return {
13803       "GROUP_NAME": self.op.group_name,
13804       "NEW_ALLOC_POLICY": self.op.alloc_policy,
13805       }
13806
13807   def BuildHooksNodes(self):
13808     """Build hooks nodes.
13809
13810     """
13811     mn = self.cfg.GetMasterNode()
13812     return ([mn], [mn])
13813
13814   def Exec(self, feedback_fn):
13815     """Modifies the node group.
13816
13817     """
13818     result = []
13819
13820     if self.op.ndparams:
13821       self.group.ndparams = self.new_ndparams
13822       result.append(("ndparams", str(self.group.ndparams)))
13823
13824     if self.op.diskparams:
13825       self.group.diskparams = self.new_diskparams
13826       result.append(("diskparams", str(self.group.diskparams)))
13827
13828     if self.op.alloc_policy:
13829       self.group.alloc_policy = self.op.alloc_policy
13830
13831     if self.op.hv_state:
13832       self.group.hv_state_static = self.new_hv_state
13833
13834     if self.op.disk_state:
13835       self.group.disk_state_static = self.new_disk_state
13836
13837     if self.op.ipolicy:
13838       self.group.ipolicy = self.new_ipolicy
13839
13840     self.cfg.Update(self.group, feedback_fn)
13841     return result
13842
13843
13844 class LUGroupRemove(LogicalUnit):
13845   HPATH = "group-remove"
13846   HTYPE = constants.HTYPE_GROUP
13847   REQ_BGL = False
13848
13849   def ExpandNames(self):
13850     # This will raises errors.OpPrereqError on its own:
13851     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13852     self.needed_locks = {
13853       locking.LEVEL_NODEGROUP: [self.group_uuid],
13854       }
13855
13856   def CheckPrereq(self):
13857     """Check prerequisites.
13858
13859     This checks that the given group name exists as a node group, that is
13860     empty (i.e., contains no nodes), and that is not the last group of the
13861     cluster.
13862
13863     """
13864     # Verify that the group is empty.
13865     group_nodes = [node.name
13866                    for node in self.cfg.GetAllNodesInfo().values()
13867                    if node.group == self.group_uuid]
13868
13869     if group_nodes:
13870       raise errors.OpPrereqError("Group '%s' not empty, has the following"
13871                                  " nodes: %s" %
13872                                  (self.op.group_name,
13873                                   utils.CommaJoin(utils.NiceSort(group_nodes))),
13874                                  errors.ECODE_STATE)
13875
13876     # Verify the cluster would not be left group-less.
13877     if len(self.cfg.GetNodeGroupList()) == 1:
13878       raise errors.OpPrereqError("Group '%s' is the only group,"
13879                                  " cannot be removed" %
13880                                  self.op.group_name,
13881                                  errors.ECODE_STATE)
13882
13883   def BuildHooksEnv(self):
13884     """Build hooks env.
13885
13886     """
13887     return {
13888       "GROUP_NAME": self.op.group_name,
13889       }
13890
13891   def BuildHooksNodes(self):
13892     """Build hooks nodes.
13893
13894     """
13895     mn = self.cfg.GetMasterNode()
13896     return ([mn], [mn])
13897
13898   def Exec(self, feedback_fn):
13899     """Remove the node group.
13900
13901     """
13902     try:
13903       self.cfg.RemoveNodeGroup(self.group_uuid)
13904     except errors.ConfigurationError:
13905       raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
13906                                (self.op.group_name, self.group_uuid))
13907
13908     self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
13909
13910
13911 class LUGroupRename(LogicalUnit):
13912   HPATH = "group-rename"
13913   HTYPE = constants.HTYPE_GROUP
13914   REQ_BGL = False
13915
13916   def ExpandNames(self):
13917     # This raises errors.OpPrereqError on its own:
13918     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13919
13920     self.needed_locks = {
13921       locking.LEVEL_NODEGROUP: [self.group_uuid],
13922       }
13923
13924   def CheckPrereq(self):
13925     """Check prerequisites.
13926
13927     Ensures requested new name is not yet used.
13928
13929     """
13930     try:
13931       new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
13932     except errors.OpPrereqError:
13933       pass
13934     else:
13935       raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
13936                                  " node group (UUID: %s)" %
13937                                  (self.op.new_name, new_name_uuid),
13938                                  errors.ECODE_EXISTS)
13939
13940   def BuildHooksEnv(self):
13941     """Build hooks env.
13942
13943     """
13944     return {
13945       "OLD_NAME": self.op.group_name,
13946       "NEW_NAME": self.op.new_name,
13947       }
13948
13949   def BuildHooksNodes(self):
13950     """Build hooks nodes.
13951
13952     """
13953     mn = self.cfg.GetMasterNode()
13954
13955     all_nodes = self.cfg.GetAllNodesInfo()
13956     all_nodes.pop(mn, None)
13957
13958     run_nodes = [mn]
13959     run_nodes.extend(node.name for node in all_nodes.values()
13960                      if node.group == self.group_uuid)
13961
13962     return (run_nodes, run_nodes)
13963
13964   def Exec(self, feedback_fn):
13965     """Rename the node group.
13966
13967     """
13968     group = self.cfg.GetNodeGroup(self.group_uuid)
13969
13970     if group is None:
13971       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13972                                (self.op.group_name, self.group_uuid))
13973
13974     group.name = self.op.new_name
13975     self.cfg.Update(group, feedback_fn)
13976
13977     return self.op.new_name
13978
13979
13980 class LUGroupEvacuate(LogicalUnit):
13981   HPATH = "group-evacuate"
13982   HTYPE = constants.HTYPE_GROUP
13983   REQ_BGL = False
13984
13985   def ExpandNames(self):
13986     # This raises errors.OpPrereqError on its own:
13987     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13988
13989     if self.op.target_groups:
13990       self.req_target_uuids = map(self.cfg.LookupNodeGroup,
13991                                   self.op.target_groups)
13992     else:
13993       self.req_target_uuids = []
13994
13995     if self.group_uuid in self.req_target_uuids:
13996       raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
13997                                  " as a target group (targets are %s)" %
13998                                  (self.group_uuid,
13999                                   utils.CommaJoin(self.req_target_uuids)),
14000                                  errors.ECODE_INVAL)
14001
14002     self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
14003
14004     self.share_locks = _ShareAll()
14005     self.needed_locks = {
14006       locking.LEVEL_INSTANCE: [],
14007       locking.LEVEL_NODEGROUP: [],
14008       locking.LEVEL_NODE: [],
14009       }
14010
14011   def DeclareLocks(self, level):
14012     if level == locking.LEVEL_INSTANCE:
14013       assert not self.needed_locks[locking.LEVEL_INSTANCE]
14014
14015       # Lock instances optimistically, needs verification once node and group
14016       # locks have been acquired
14017       self.needed_locks[locking.LEVEL_INSTANCE] = \
14018         self.cfg.GetNodeGroupInstances(self.group_uuid)
14019
14020     elif level == locking.LEVEL_NODEGROUP:
14021       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
14022
14023       if self.req_target_uuids:
14024         lock_groups = set([self.group_uuid] + self.req_target_uuids)
14025
14026         # Lock all groups used by instances optimistically; this requires going
14027         # via the node before it's locked, requiring verification later on
14028         lock_groups.update(group_uuid
14029                            for instance_name in
14030                              self.owned_locks(locking.LEVEL_INSTANCE)
14031                            for group_uuid in
14032                              self.cfg.GetInstanceNodeGroups(instance_name))
14033       else:
14034         # No target groups, need to lock all of them
14035         lock_groups = locking.ALL_SET
14036
14037       self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
14038
14039     elif level == locking.LEVEL_NODE:
14040       # This will only lock the nodes in the group to be evacuated which
14041       # contain actual instances
14042       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
14043       self._LockInstancesNodes()
14044
14045       # Lock all nodes in group to be evacuated and target groups
14046       owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
14047       assert self.group_uuid in owned_groups
14048       member_nodes = [node_name
14049                       for group in owned_groups
14050                       for node_name in self.cfg.GetNodeGroup(group).members]
14051       self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
14052
14053   def CheckPrereq(self):
14054     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
14055     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
14056     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
14057
14058     assert owned_groups.issuperset(self.req_target_uuids)
14059     assert self.group_uuid in owned_groups
14060
14061     # Check if locked instances are still correct
14062     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
14063
14064     # Get instance information
14065     self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
14066
14067     # Check if node groups for locked instances are still correct
14068     for instance_name in owned_instances:
14069       inst = self.instances[instance_name]
14070       assert owned_nodes.issuperset(inst.all_nodes), \
14071         "Instance %s's nodes changed while we kept the lock" % instance_name
14072
14073       inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
14074                                              owned_groups)
14075
14076       assert self.group_uuid in inst_groups, \
14077         "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
14078
14079     if self.req_target_uuids:
14080       # User requested specific target groups
14081       self.target_uuids = self.req_target_uuids
14082     else:
14083       # All groups except the one to be evacuated are potential targets
14084       self.target_uuids = [group_uuid for group_uuid in owned_groups
14085                            if group_uuid != self.group_uuid]
14086
14087       if not self.target_uuids:
14088         raise errors.OpPrereqError("There are no possible target groups",
14089                                    errors.ECODE_INVAL)
14090
14091   def BuildHooksEnv(self):
14092     """Build hooks env.
14093
14094     """
14095     return {
14096       "GROUP_NAME": self.op.group_name,
14097       "TARGET_GROUPS": " ".join(self.target_uuids),
14098       }
14099
14100   def BuildHooksNodes(self):
14101     """Build hooks nodes.
14102
14103     """
14104     mn = self.cfg.GetMasterNode()
14105
14106     assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
14107
14108     run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
14109
14110     return (run_nodes, run_nodes)
14111
14112   def Exec(self, feedback_fn):
14113     instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
14114
14115     assert self.group_uuid not in self.target_uuids
14116
14117     ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
14118                      instances=instances, target_groups=self.target_uuids)
14119
14120     ial.Run(self.op.iallocator)
14121
14122     if not ial.success:
14123       raise errors.OpPrereqError("Can't compute group evacuation using"
14124                                  " iallocator '%s': %s" %
14125                                  (self.op.iallocator, ial.info),
14126                                  errors.ECODE_NORES)
14127
14128     jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
14129
14130     self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
14131                  len(jobs), self.op.group_name)
14132
14133     return ResultWithJobs(jobs)
14134
14135
14136 class TagsLU(NoHooksLU): # pylint: disable=W0223
14137   """Generic tags LU.
14138
14139   This is an abstract class which is the parent of all the other tags LUs.
14140
14141   """
14142   def ExpandNames(self):
14143     self.group_uuid = None
14144     self.needed_locks = {}
14145     if self.op.kind == constants.TAG_NODE:
14146       self.op.name = _ExpandNodeName(self.cfg, self.op.name)
14147       self.needed_locks[locking.LEVEL_NODE] = self.op.name
14148     elif self.op.kind == constants.TAG_INSTANCE:
14149       self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
14150       self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
14151     elif self.op.kind == constants.TAG_NODEGROUP:
14152       self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
14153
14154     # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
14155     # not possible to acquire the BGL based on opcode parameters)
14156
14157   def CheckPrereq(self):
14158     """Check prerequisites.
14159
14160     """
14161     if self.op.kind == constants.TAG_CLUSTER:
14162       self.target = self.cfg.GetClusterInfo()
14163     elif self.op.kind == constants.TAG_NODE:
14164       self.target = self.cfg.GetNodeInfo(self.op.name)
14165     elif self.op.kind == constants.TAG_INSTANCE:
14166       self.target = self.cfg.GetInstanceInfo(self.op.name)
14167     elif self.op.kind == constants.TAG_NODEGROUP:
14168       self.target = self.cfg.GetNodeGroup(self.group_uuid)
14169     else:
14170       raise errors.OpPrereqError("Wrong tag type requested (%s)" %
14171                                  str(self.op.kind), errors.ECODE_INVAL)
14172
14173
14174 class LUTagsGet(TagsLU):
14175   """Returns the tags of a given object.
14176
14177   """
14178   REQ_BGL = False
14179
14180   def ExpandNames(self):
14181     TagsLU.ExpandNames(self)
14182
14183     # Share locks as this is only a read operation
14184     self.share_locks = _ShareAll()
14185
14186   def Exec(self, feedback_fn):
14187     """Returns the tag list.
14188
14189     """
14190     return list(self.target.GetTags())
14191
14192
14193 class LUTagsSearch(NoHooksLU):
14194   """Searches the tags for a given pattern.
14195
14196   """
14197   REQ_BGL = False
14198
14199   def ExpandNames(self):
14200     self.needed_locks = {}
14201
14202   def CheckPrereq(self):
14203     """Check prerequisites.
14204
14205     This checks the pattern passed for validity by compiling it.
14206
14207     """
14208     try:
14209       self.re = re.compile(self.op.pattern)
14210     except re.error, err:
14211       raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
14212                                  (self.op.pattern, err), errors.ECODE_INVAL)
14213
14214   def Exec(self, feedback_fn):
14215     """Returns the tag list.
14216
14217     """
14218     cfg = self.cfg
14219     tgts = [("/cluster", cfg.GetClusterInfo())]
14220     ilist = cfg.GetAllInstancesInfo().values()
14221     tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
14222     nlist = cfg.GetAllNodesInfo().values()
14223     tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
14224     tgts.extend(("/nodegroup/%s" % n.name, n)
14225                 for n in cfg.GetAllNodeGroupsInfo().values())
14226     results = []
14227     for path, target in tgts:
14228       for tag in target.GetTags():
14229         if self.re.search(tag):
14230           results.append((path, tag))
14231     return results
14232
14233
14234 class LUTagsSet(TagsLU):
14235   """Sets a tag on a given object.
14236
14237   """
14238   REQ_BGL = False
14239
14240   def CheckPrereq(self):
14241     """Check prerequisites.
14242
14243     This checks the type and length of the tag name and value.
14244
14245     """
14246     TagsLU.CheckPrereq(self)
14247     for tag in self.op.tags:
14248       objects.TaggableObject.ValidateTag(tag)
14249
14250   def Exec(self, feedback_fn):
14251     """Sets the tag.
14252
14253     """
14254     try:
14255       for tag in self.op.tags:
14256         self.target.AddTag(tag)
14257     except errors.TagError, err:
14258       raise errors.OpExecError("Error while setting tag: %s" % str(err))
14259     self.cfg.Update(self.target, feedback_fn)
14260
14261
14262 class LUTagsDel(TagsLU):
14263   """Delete a list of tags from a given object.
14264
14265   """
14266   REQ_BGL = False
14267
14268   def CheckPrereq(self):
14269     """Check prerequisites.
14270
14271     This checks that we have the given tag.
14272
14273     """
14274     TagsLU.CheckPrereq(self)
14275     for tag in self.op.tags:
14276       objects.TaggableObject.ValidateTag(tag)
14277     del_tags = frozenset(self.op.tags)
14278     cur_tags = self.target.GetTags()
14279
14280     diff_tags = del_tags - cur_tags
14281     if diff_tags:
14282       diff_names = ("'%s'" % i for i in sorted(diff_tags))
14283       raise errors.OpPrereqError("Tag(s) %s not found" %
14284                                  (utils.CommaJoin(diff_names), ),
14285                                  errors.ECODE_NOENT)
14286
14287   def Exec(self, feedback_fn):
14288     """Remove the tag from the object.
14289
14290     """
14291     for tag in self.op.tags:
14292       self.target.RemoveTag(tag)
14293     self.cfg.Update(self.target, feedback_fn)
14294
14295
14296 class LUTestDelay(NoHooksLU):
14297   """Sleep for a specified amount of time.
14298
14299   This LU sleeps on the master and/or nodes for a specified amount of
14300   time.
14301
14302   """
14303   REQ_BGL = False
14304
14305   def ExpandNames(self):
14306     """Expand names and set required locks.
14307
14308     This expands the node list, if any.
14309
14310     """
14311     self.needed_locks = {}
14312     if self.op.on_nodes:
14313       # _GetWantedNodes can be used here, but is not always appropriate to use
14314       # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
14315       # more information.
14316       self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
14317       self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
14318
14319   def _TestDelay(self):
14320     """Do the actual sleep.
14321
14322     """
14323     if self.op.on_master:
14324       if not utils.TestDelay(self.op.duration):
14325         raise errors.OpExecError("Error during master delay test")
14326     if self.op.on_nodes:
14327       result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
14328       for node, node_result in result.items():
14329         node_result.Raise("Failure during rpc call to node %s" % node)
14330
14331   def Exec(self, feedback_fn):
14332     """Execute the test delay opcode, with the wanted repetitions.
14333
14334     """
14335     if self.op.repeat == 0:
14336       self._TestDelay()
14337     else:
14338       top_value = self.op.repeat - 1
14339       for i in range(self.op.repeat):
14340         self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
14341         self._TestDelay()
14342
14343
14344 class LUTestJqueue(NoHooksLU):
14345   """Utility LU to test some aspects of the job queue.
14346
14347   """
14348   REQ_BGL = False
14349
14350   # Must be lower than default timeout for WaitForJobChange to see whether it
14351   # notices changed jobs
14352   _CLIENT_CONNECT_TIMEOUT = 20.0
14353   _CLIENT_CONFIRM_TIMEOUT = 60.0
14354
14355   @classmethod
14356   def _NotifyUsingSocket(cls, cb, errcls):
14357     """Opens a Unix socket and waits for another program to connect.
14358
14359     @type cb: callable
14360     @param cb: Callback to send socket name to client
14361     @type errcls: class
14362     @param errcls: Exception class to use for errors
14363
14364     """
14365     # Using a temporary directory as there's no easy way to create temporary
14366     # sockets without writing a custom loop around tempfile.mktemp and
14367     # socket.bind
14368     tmpdir = tempfile.mkdtemp()
14369     try:
14370       tmpsock = utils.PathJoin(tmpdir, "sock")
14371
14372       logging.debug("Creating temporary socket at %s", tmpsock)
14373       sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
14374       try:
14375         sock.bind(tmpsock)
14376         sock.listen(1)
14377
14378         # Send details to client
14379         cb(tmpsock)
14380
14381         # Wait for client to connect before continuing
14382         sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
14383         try:
14384           (conn, _) = sock.accept()
14385         except socket.error, err:
14386           raise errcls("Client didn't connect in time (%s)" % err)
14387       finally:
14388         sock.close()
14389     finally:
14390       # Remove as soon as client is connected
14391       shutil.rmtree(tmpdir)
14392
14393     # Wait for client to close
14394     try:
14395       try:
14396         # pylint: disable=E1101
14397         # Instance of '_socketobject' has no ... member
14398         conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
14399         conn.recv(1)
14400       except socket.error, err:
14401         raise errcls("Client failed to confirm notification (%s)" % err)
14402     finally:
14403       conn.close()
14404
14405   def _SendNotification(self, test, arg, sockname):
14406     """Sends a notification to the client.
14407
14408     @type test: string
14409     @param test: Test name
14410     @param arg: Test argument (depends on test)
14411     @type sockname: string
14412     @param sockname: Socket path
14413
14414     """
14415     self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
14416
14417   def _Notify(self, prereq, test, arg):
14418     """Notifies the client of a test.
14419
14420     @type prereq: bool
14421     @param prereq: Whether this is a prereq-phase test
14422     @type test: string
14423     @param test: Test name
14424     @param arg: Test argument (depends on test)
14425
14426     """
14427     if prereq:
14428       errcls = errors.OpPrereqError
14429     else:
14430       errcls = errors.OpExecError
14431
14432     return self._NotifyUsingSocket(compat.partial(self._SendNotification,
14433                                                   test, arg),
14434                                    errcls)
14435
14436   def CheckArguments(self):
14437     self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
14438     self.expandnames_calls = 0
14439
14440   def ExpandNames(self):
14441     checkargs_calls = getattr(self, "checkargs_calls", 0)
14442     if checkargs_calls < 1:
14443       raise errors.ProgrammerError("CheckArguments was not called")
14444
14445     self.expandnames_calls += 1
14446
14447     if self.op.notify_waitlock:
14448       self._Notify(True, constants.JQT_EXPANDNAMES, None)
14449
14450     self.LogInfo("Expanding names")
14451
14452     # Get lock on master node (just to get a lock, not for a particular reason)
14453     self.needed_locks = {
14454       locking.LEVEL_NODE: self.cfg.GetMasterNode(),
14455       }
14456
14457   def Exec(self, feedback_fn):
14458     if self.expandnames_calls < 1:
14459       raise errors.ProgrammerError("ExpandNames was not called")
14460
14461     if self.op.notify_exec:
14462       self._Notify(False, constants.JQT_EXEC, None)
14463
14464     self.LogInfo("Executing")
14465
14466     if self.op.log_messages:
14467       self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
14468       for idx, msg in enumerate(self.op.log_messages):
14469         self.LogInfo("Sending log message %s", idx + 1)
14470         feedback_fn(constants.JQT_MSGPREFIX + msg)
14471         # Report how many test messages have been sent
14472         self._Notify(False, constants.JQT_LOGMSG, idx + 1)
14473
14474     if self.op.fail:
14475       raise errors.OpExecError("Opcode failure was requested")
14476
14477     return True
14478
14479
14480 class IAllocator(object):
14481   """IAllocator framework.
14482
14483   An IAllocator instance has three sets of attributes:
14484     - cfg that is needed to query the cluster
14485     - input data (all members of the _KEYS class attribute are required)
14486     - four buffer attributes (in|out_data|text), that represent the
14487       input (to the external script) in text and data structure format,
14488       and the output from it, again in two formats
14489     - the result variables from the script (success, info, nodes) for
14490       easy usage
14491
14492   """
14493   # pylint: disable=R0902
14494   # lots of instance attributes
14495
14496   def __init__(self, cfg, rpc_runner, mode, **kwargs):
14497     self.cfg = cfg
14498     self.rpc = rpc_runner
14499     # init buffer variables
14500     self.in_text = self.out_text = self.in_data = self.out_data = None
14501     # init all input fields so that pylint is happy
14502     self.mode = mode
14503     self.memory = self.disks = self.disk_template = None
14504     self.os = self.tags = self.nics = self.vcpus = None
14505     self.hypervisor = None
14506     self.relocate_from = None
14507     self.name = None
14508     self.instances = None
14509     self.evac_mode = None
14510     self.target_groups = []
14511     # computed fields
14512     self.required_nodes = None
14513     # init result fields
14514     self.success = self.info = self.result = None
14515
14516     try:
14517       (fn, keydata, self._result_check) = self._MODE_DATA[self.mode]
14518     except KeyError:
14519       raise errors.ProgrammerError("Unknown mode '%s' passed to the"
14520                                    " IAllocator" % self.mode)
14521
14522     keyset = [n for (n, _) in keydata]
14523
14524     for key in kwargs:
14525       if key not in keyset:
14526         raise errors.ProgrammerError("Invalid input parameter '%s' to"
14527                                      " IAllocator" % key)
14528       setattr(self, key, kwargs[key])
14529
14530     for key in keyset:
14531       if key not in kwargs:
14532         raise errors.ProgrammerError("Missing input parameter '%s' to"
14533                                      " IAllocator" % key)
14534     self._BuildInputData(compat.partial(fn, self), keydata)
14535
14536   def _ComputeClusterData(self):
14537     """Compute the generic allocator input data.
14538
14539     This is the data that is independent of the actual operation.
14540
14541     """
14542     cfg = self.cfg
14543     cluster_info = cfg.GetClusterInfo()
14544     # cluster data
14545     data = {
14546       "version": constants.IALLOCATOR_VERSION,
14547       "cluster_name": cfg.GetClusterName(),
14548       "cluster_tags": list(cluster_info.GetTags()),
14549       "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
14550       "ipolicy": cluster_info.ipolicy,
14551       }
14552     ninfo = cfg.GetAllNodesInfo()
14553     iinfo = cfg.GetAllInstancesInfo().values()
14554     i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
14555
14556     # node data
14557     node_list = [n.name for n in ninfo.values() if n.vm_capable]
14558
14559     if self.mode == constants.IALLOCATOR_MODE_ALLOC:
14560       hypervisor_name = self.hypervisor
14561     elif self.mode == constants.IALLOCATOR_MODE_RELOC:
14562       hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
14563     else:
14564       hypervisor_name = cluster_info.primary_hypervisor
14565
14566     node_data = self.rpc.call_node_info(node_list, [cfg.GetVGName()],
14567                                         [hypervisor_name])
14568     node_iinfo = \
14569       self.rpc.call_all_instances_info(node_list,
14570                                        cluster_info.enabled_hypervisors)
14571
14572     data["nodegroups"] = self._ComputeNodeGroupData(cfg)
14573
14574     config_ndata = self._ComputeBasicNodeData(cfg, ninfo)
14575     data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
14576                                                  i_list, config_ndata)
14577     assert len(data["nodes"]) == len(ninfo), \
14578         "Incomplete node data computed"
14579
14580     data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
14581
14582     self.in_data = data
14583
14584   @staticmethod
14585   def _ComputeNodeGroupData(cfg):
14586     """Compute node groups data.
14587
14588     """
14589     cluster = cfg.GetClusterInfo()
14590     ng = dict((guuid, {
14591       "name": gdata.name,
14592       "alloc_policy": gdata.alloc_policy,
14593       "ipolicy": _CalculateGroupIPolicy(cluster, gdata),
14594       })
14595       for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
14596
14597     return ng
14598
14599   @staticmethod
14600   def _ComputeBasicNodeData(cfg, node_cfg):
14601     """Compute global node data.
14602
14603     @rtype: dict
14604     @returns: a dict of name: (node dict, node config)
14605
14606     """
14607     # fill in static (config-based) values
14608     node_results = dict((ninfo.name, {
14609       "tags": list(ninfo.GetTags()),
14610       "primary_ip": ninfo.primary_ip,
14611       "secondary_ip": ninfo.secondary_ip,
14612       "offline": ninfo.offline,
14613       "drained": ninfo.drained,
14614       "master_candidate": ninfo.master_candidate,
14615       "group": ninfo.group,
14616       "master_capable": ninfo.master_capable,
14617       "vm_capable": ninfo.vm_capable,
14618       "ndparams": cfg.GetNdParams(ninfo),
14619       })
14620       for ninfo in node_cfg.values())
14621
14622     return node_results
14623
14624   @staticmethod
14625   def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
14626                               node_results):
14627     """Compute global node data.
14628
14629     @param node_results: the basic node structures as filled from the config
14630
14631     """
14632     #TODO(dynmem): compute the right data on MAX and MIN memory
14633     # make a copy of the current dict
14634     node_results = dict(node_results)
14635     for nname, nresult in node_data.items():
14636       assert nname in node_results, "Missing basic data for node %s" % nname
14637       ninfo = node_cfg[nname]
14638
14639       if not (ninfo.offline or ninfo.drained):
14640         nresult.Raise("Can't get data for node %s" % nname)
14641         node_iinfo[nname].Raise("Can't get node instance info from node %s" %
14642                                 nname)
14643         remote_info = _MakeLegacyNodeInfo(nresult.payload)
14644
14645         for attr in ["memory_total", "memory_free", "memory_dom0",
14646                      "vg_size", "vg_free", "cpu_total"]:
14647           if attr not in remote_info:
14648             raise errors.OpExecError("Node '%s' didn't return attribute"
14649                                      " '%s'" % (nname, attr))
14650           if not isinstance(remote_info[attr], int):
14651             raise errors.OpExecError("Node '%s' returned invalid value"
14652                                      " for '%s': %s" %
14653                                      (nname, attr, remote_info[attr]))
14654         # compute memory used by primary instances
14655         i_p_mem = i_p_up_mem = 0
14656         for iinfo, beinfo in i_list:
14657           if iinfo.primary_node == nname:
14658             i_p_mem += beinfo[constants.BE_MAXMEM]
14659             if iinfo.name not in node_iinfo[nname].payload:
14660               i_used_mem = 0
14661             else:
14662               i_used_mem = int(node_iinfo[nname].payload[iinfo.name]["memory"])
14663             i_mem_diff = beinfo[constants.BE_MAXMEM] - i_used_mem
14664             remote_info["memory_free"] -= max(0, i_mem_diff)
14665
14666             if iinfo.admin_state == constants.ADMINST_UP:
14667               i_p_up_mem += beinfo[constants.BE_MAXMEM]
14668
14669         # compute memory used by instances
14670         pnr_dyn = {
14671           "total_memory": remote_info["memory_total"],
14672           "reserved_memory": remote_info["memory_dom0"],
14673           "free_memory": remote_info["memory_free"],
14674           "total_disk": remote_info["vg_size"],
14675           "free_disk": remote_info["vg_free"],
14676           "total_cpus": remote_info["cpu_total"],
14677           "i_pri_memory": i_p_mem,
14678           "i_pri_up_memory": i_p_up_mem,
14679           }
14680         pnr_dyn.update(node_results[nname])
14681         node_results[nname] = pnr_dyn
14682
14683     return node_results
14684
14685   @staticmethod
14686   def _ComputeInstanceData(cluster_info, i_list):
14687     """Compute global instance data.
14688
14689     """
14690     instance_data = {}
14691     for iinfo, beinfo in i_list:
14692       nic_data = []
14693       for nic in iinfo.nics:
14694         filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
14695         nic_dict = {
14696           "mac": nic.mac,
14697           "ip": nic.ip,
14698           "mode": filled_params[constants.NIC_MODE],
14699           "link": filled_params[constants.NIC_LINK],
14700           }
14701         if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
14702           nic_dict["bridge"] = filled_params[constants.NIC_LINK]
14703         nic_data.append(nic_dict)
14704       pir = {
14705         "tags": list(iinfo.GetTags()),
14706         "admin_state": iinfo.admin_state,
14707         "vcpus": beinfo[constants.BE_VCPUS],
14708         "memory": beinfo[constants.BE_MAXMEM],
14709         "os": iinfo.os,
14710         "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
14711         "nics": nic_data,
14712         "disks": [{constants.IDISK_SIZE: dsk.size,
14713                    constants.IDISK_MODE: dsk.mode}
14714                   for dsk in iinfo.disks],
14715         "disk_template": iinfo.disk_template,
14716         "hypervisor": iinfo.hypervisor,
14717         }
14718       pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
14719                                                  pir["disks"])
14720       instance_data[iinfo.name] = pir
14721
14722     return instance_data
14723
14724   def _AddNewInstance(self):
14725     """Add new instance data to allocator structure.
14726
14727     This in combination with _AllocatorGetClusterData will create the
14728     correct structure needed as input for the allocator.
14729
14730     The checks for the completeness of the opcode must have already been
14731     done.
14732
14733     """
14734     disk_space = _ComputeDiskSize(self.disk_template, self.disks)
14735
14736     if self.disk_template in constants.DTS_INT_MIRROR:
14737       self.required_nodes = 2
14738     else:
14739       self.required_nodes = 1
14740
14741     request = {
14742       "name": self.name,
14743       "disk_template": self.disk_template,
14744       "tags": self.tags,
14745       "os": self.os,
14746       "vcpus": self.vcpus,
14747       "memory": self.memory,
14748       "disks": self.disks,
14749       "disk_space_total": disk_space,
14750       "nics": self.nics,
14751       "required_nodes": self.required_nodes,
14752       "hypervisor": self.hypervisor,
14753       }
14754
14755     return request
14756
14757   def _AddRelocateInstance(self):
14758     """Add relocate instance data to allocator structure.
14759
14760     This in combination with _IAllocatorGetClusterData will create the
14761     correct structure needed as input for the allocator.
14762
14763     The checks for the completeness of the opcode must have already been
14764     done.
14765
14766     """
14767     instance = self.cfg.GetInstanceInfo(self.name)
14768     if instance is None:
14769       raise errors.ProgrammerError("Unknown instance '%s' passed to"
14770                                    " IAllocator" % self.name)
14771
14772     if instance.disk_template not in constants.DTS_MIRRORED:
14773       raise errors.OpPrereqError("Can't relocate non-mirrored instances",
14774                                  errors.ECODE_INVAL)
14775
14776     if instance.disk_template in constants.DTS_INT_MIRROR and \
14777         len(instance.secondary_nodes) != 1:
14778       raise errors.OpPrereqError("Instance has not exactly one secondary node",
14779                                  errors.ECODE_STATE)
14780
14781     self.required_nodes = 1
14782     disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
14783     disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
14784
14785     request = {
14786       "name": self.name,
14787       "disk_space_total": disk_space,
14788       "required_nodes": self.required_nodes,
14789       "relocate_from": self.relocate_from,
14790       }
14791     return request
14792
14793   def _AddNodeEvacuate(self):
14794     """Get data for node-evacuate requests.
14795
14796     """
14797     return {
14798       "instances": self.instances,
14799       "evac_mode": self.evac_mode,
14800       }
14801
14802   def _AddChangeGroup(self):
14803     """Get data for node-evacuate requests.
14804
14805     """
14806     return {
14807       "instances": self.instances,
14808       "target_groups": self.target_groups,
14809       }
14810
14811   def _BuildInputData(self, fn, keydata):
14812     """Build input data structures.
14813
14814     """
14815     self._ComputeClusterData()
14816
14817     request = fn()
14818     request["type"] = self.mode
14819     for keyname, keytype in keydata:
14820       if keyname not in request:
14821         raise errors.ProgrammerError("Request parameter %s is missing" %
14822                                      keyname)
14823       val = request[keyname]
14824       if not keytype(val):
14825         raise errors.ProgrammerError("Request parameter %s doesn't pass"
14826                                      " validation, value %s, expected"
14827                                      " type %s" % (keyname, val, keytype))
14828     self.in_data["request"] = request
14829
14830     self.in_text = serializer.Dump(self.in_data)
14831
14832   _STRING_LIST = ht.TListOf(ht.TString)
14833   _JOB_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
14834      # pylint: disable=E1101
14835      # Class '...' has no 'OP_ID' member
14836      "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
14837                           opcodes.OpInstanceMigrate.OP_ID,
14838                           opcodes.OpInstanceReplaceDisks.OP_ID])
14839      })))
14840
14841   _NEVAC_MOVED = \
14842     ht.TListOf(ht.TAnd(ht.TIsLength(3),
14843                        ht.TItems([ht.TNonEmptyString,
14844                                   ht.TNonEmptyString,
14845                                   ht.TListOf(ht.TNonEmptyString),
14846                                  ])))
14847   _NEVAC_FAILED = \
14848     ht.TListOf(ht.TAnd(ht.TIsLength(2),
14849                        ht.TItems([ht.TNonEmptyString,
14850                                   ht.TMaybeString,
14851                                  ])))
14852   _NEVAC_RESULT = ht.TAnd(ht.TIsLength(3),
14853                           ht.TItems([_NEVAC_MOVED, _NEVAC_FAILED, _JOB_LIST]))
14854
14855   _MODE_DATA = {
14856     constants.IALLOCATOR_MODE_ALLOC:
14857       (_AddNewInstance,
14858        [
14859         ("name", ht.TString),
14860         ("memory", ht.TInt),
14861         ("disks", ht.TListOf(ht.TDict)),
14862         ("disk_template", ht.TString),
14863         ("os", ht.TString),
14864         ("tags", _STRING_LIST),
14865         ("nics", ht.TListOf(ht.TDict)),
14866         ("vcpus", ht.TInt),
14867         ("hypervisor", ht.TString),
14868         ], ht.TList),
14869     constants.IALLOCATOR_MODE_RELOC:
14870       (_AddRelocateInstance,
14871        [("name", ht.TString), ("relocate_from", _STRING_LIST)],
14872        ht.TList),
14873      constants.IALLOCATOR_MODE_NODE_EVAC:
14874       (_AddNodeEvacuate, [
14875         ("instances", _STRING_LIST),
14876         ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)),
14877         ], _NEVAC_RESULT),
14878      constants.IALLOCATOR_MODE_CHG_GROUP:
14879       (_AddChangeGroup, [
14880         ("instances", _STRING_LIST),
14881         ("target_groups", _STRING_LIST),
14882         ], _NEVAC_RESULT),
14883     }
14884
14885   def Run(self, name, validate=True, call_fn=None):
14886     """Run an instance allocator and return the results.
14887
14888     """
14889     if call_fn is None:
14890       call_fn = self.rpc.call_iallocator_runner
14891
14892     result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
14893     result.Raise("Failure while running the iallocator script")
14894
14895     self.out_text = result.payload
14896     if validate:
14897       self._ValidateResult()
14898
14899   def _ValidateResult(self):
14900     """Process the allocator results.
14901
14902     This will process and if successful save the result in
14903     self.out_data and the other parameters.
14904
14905     """
14906     try:
14907       rdict = serializer.Load(self.out_text)
14908     except Exception, err:
14909       raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
14910
14911     if not isinstance(rdict, dict):
14912       raise errors.OpExecError("Can't parse iallocator results: not a dict")
14913
14914     # TODO: remove backwards compatiblity in later versions
14915     if "nodes" in rdict and "result" not in rdict:
14916       rdict["result"] = rdict["nodes"]
14917       del rdict["nodes"]
14918
14919     for key in "success", "info", "result":
14920       if key not in rdict:
14921         raise errors.OpExecError("Can't parse iallocator results:"
14922                                  " missing key '%s'" % key)
14923       setattr(self, key, rdict[key])
14924
14925     if not self._result_check(self.result):
14926       raise errors.OpExecError("Iallocator returned invalid result,"
14927                                " expected %s, got %s" %
14928                                (self._result_check, self.result),
14929                                errors.ECODE_INVAL)
14930
14931     if self.mode == constants.IALLOCATOR_MODE_RELOC:
14932       assert self.relocate_from is not None
14933       assert self.required_nodes == 1
14934
14935       node2group = dict((name, ndata["group"])
14936                         for (name, ndata) in self.in_data["nodes"].items())
14937
14938       fn = compat.partial(self._NodesToGroups, node2group,
14939                           self.in_data["nodegroups"])
14940
14941       instance = self.cfg.GetInstanceInfo(self.name)
14942       request_groups = fn(self.relocate_from + [instance.primary_node])
14943       result_groups = fn(rdict["result"] + [instance.primary_node])
14944
14945       if self.success and not set(result_groups).issubset(request_groups):
14946         raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
14947                                  " differ from original groups (%s)" %
14948                                  (utils.CommaJoin(result_groups),
14949                                   utils.CommaJoin(request_groups)))
14950
14951     elif self.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
14952       assert self.evac_mode in constants.IALLOCATOR_NEVAC_MODES
14953
14954     self.out_data = rdict
14955
14956   @staticmethod
14957   def _NodesToGroups(node2group, groups, nodes):
14958     """Returns a list of unique group names for a list of nodes.
14959
14960     @type node2group: dict
14961     @param node2group: Map from node name to group UUID
14962     @type groups: dict
14963     @param groups: Group information
14964     @type nodes: list
14965     @param nodes: Node names
14966
14967     """
14968     result = set()
14969
14970     for node in nodes:
14971       try:
14972         group_uuid = node2group[node]
14973       except KeyError:
14974         # Ignore unknown node
14975         pass
14976       else:
14977         try:
14978           group = groups[group_uuid]
14979         except KeyError:
14980           # Can't find group, let's use UUID
14981           group_name = group_uuid
14982         else:
14983           group_name = group["name"]
14984
14985         result.add(group_name)
14986
14987     return sorted(result)
14988
14989
14990 class LUTestAllocator(NoHooksLU):
14991   """Run allocator tests.
14992
14993   This LU runs the allocator tests
14994
14995   """
14996   def CheckPrereq(self):
14997     """Check prerequisites.
14998
14999     This checks the opcode parameters depending on the director and mode test.
15000
15001     """
15002     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
15003       for attr in ["memory", "disks", "disk_template",
15004                    "os", "tags", "nics", "vcpus"]:
15005         if not hasattr(self.op, attr):
15006           raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
15007                                      attr, errors.ECODE_INVAL)
15008       iname = self.cfg.ExpandInstanceName(self.op.name)
15009       if iname is not None:
15010         raise errors.OpPrereqError("Instance '%s' already in the cluster" %
15011                                    iname, errors.ECODE_EXISTS)
15012       if not isinstance(self.op.nics, list):
15013         raise errors.OpPrereqError("Invalid parameter 'nics'",
15014                                    errors.ECODE_INVAL)
15015       if not isinstance(self.op.disks, list):
15016         raise errors.OpPrereqError("Invalid parameter 'disks'",
15017                                    errors.ECODE_INVAL)
15018       for row in self.op.disks:
15019         if (not isinstance(row, dict) or
15020             constants.IDISK_SIZE not in row or
15021             not isinstance(row[constants.IDISK_SIZE], int) or
15022             constants.IDISK_MODE not in row or
15023             row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
15024           raise errors.OpPrereqError("Invalid contents of the 'disks'"
15025                                      " parameter", errors.ECODE_INVAL)
15026       if self.op.hypervisor is None:
15027         self.op.hypervisor = self.cfg.GetHypervisorType()
15028     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
15029       fname = _ExpandInstanceName(self.cfg, self.op.name)
15030       self.op.name = fname
15031       self.relocate_from = \
15032           list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
15033     elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
15034                           constants.IALLOCATOR_MODE_NODE_EVAC):
15035       if not self.op.instances:
15036         raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
15037       self.op.instances = _GetWantedInstances(self, self.op.instances)
15038     else:
15039       raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
15040                                  self.op.mode, errors.ECODE_INVAL)
15041
15042     if self.op.direction == constants.IALLOCATOR_DIR_OUT:
15043       if self.op.allocator is None:
15044         raise errors.OpPrereqError("Missing allocator name",
15045                                    errors.ECODE_INVAL)
15046     elif self.op.direction != constants.IALLOCATOR_DIR_IN:
15047       raise errors.OpPrereqError("Wrong allocator test '%s'" %
15048                                  self.op.direction, errors.ECODE_INVAL)
15049
15050   def Exec(self, feedback_fn):
15051     """Run the allocator test.
15052
15053     """
15054     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
15055       ial = IAllocator(self.cfg, self.rpc,
15056                        mode=self.op.mode,
15057                        name=self.op.name,
15058                        memory=self.op.memory,
15059                        disks=self.op.disks,
15060                        disk_template=self.op.disk_template,
15061                        os=self.op.os,
15062                        tags=self.op.tags,
15063                        nics=self.op.nics,
15064                        vcpus=self.op.vcpus,
15065                        hypervisor=self.op.hypervisor,
15066                        )
15067     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
15068       ial = IAllocator(self.cfg, self.rpc,
15069                        mode=self.op.mode,
15070                        name=self.op.name,
15071                        relocate_from=list(self.relocate_from),
15072                        )
15073     elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
15074       ial = IAllocator(self.cfg, self.rpc,
15075                        mode=self.op.mode,
15076                        instances=self.op.instances,
15077                        target_groups=self.op.target_groups)
15078     elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
15079       ial = IAllocator(self.cfg, self.rpc,
15080                        mode=self.op.mode,
15081                        instances=self.op.instances,
15082                        evac_mode=self.op.evac_mode)
15083     else:
15084       raise errors.ProgrammerError("Uncatched mode %s in"
15085                                    " LUTestAllocator.Exec", self.op.mode)
15086
15087     if self.op.direction == constants.IALLOCATOR_DIR_IN:
15088       result = ial.in_text
15089     else:
15090       ial.Run(self.op.allocator, validate=False)
15091       result = ial.out_text
15092     return result
15093
15094
15095 #: Query type implementations
15096 _QUERY_IMPL = {
15097   constants.QR_INSTANCE: _InstanceQuery,
15098   constants.QR_NODE: _NodeQuery,
15099   constants.QR_GROUP: _GroupQuery,
15100   constants.QR_OS: _OsQuery,
15101   }
15102
15103 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
15104
15105
15106 def _GetQueryImplementation(name):
15107   """Returns the implemtnation for a query type.
15108
15109   @param name: Query type, must be one of L{constants.QR_VIA_OP}
15110
15111   """
15112   try:
15113     return _QUERY_IMPL[name]
15114   except KeyError:
15115     raise errors.OpPrereqError("Unknown query resource '%s'" % name,
15116                                errors.ECODE_INVAL)