code.grnet.gr Git - ganeti-local/blob - lib/cmdlib.py

   1 #
   2 #
   3
   4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012 Google Inc.
   5 #
   6 # This program is free software; you can redistribute it and/or modify
   7 # it under the terms of the GNU General Public License as published by
   8 # the Free Software Foundation; either version 2 of the License, or
   9 # (at your option) any later version.
  10 #
  11 # This program is distributed in the hope that it will be useful, but
  12 # WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 # General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program; if not, write to the Free Software
  18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19 # 02110-1301, USA.
  20
  21
  22 """Module implementing the master-side code."""
  23
  24 # pylint: disable=W0201,C0302
  25
  26 # W0201 since most LU attributes are defined in CheckPrereq or similar
  27 # functions
  28
  29 # C0302: since we have waaaay too many lines in this module
  30
  31 import os
  32 import os.path
  33 import time
  34 import re
  35 import platform
  36 import logging
  37 import copy
  38 import OpenSSL
  39 import socket
  40 import tempfile
  41 import shutil
  42 import itertools
  43 import operator
  44
  45 from ganeti import ssh
  46 from ganeti import utils
  47 from ganeti import errors
  48 from ganeti import hypervisor
  49 from ganeti import locking
  50 from ganeti import constants
  51 from ganeti import objects
  52 from ganeti import serializer
  53 from ganeti import ssconf
  54 from ganeti import uidpool
  55 from ganeti import compat
  56 from ganeti import masterd
  57 from ganeti import netutils
  58 from ganeti import query
  59 from ganeti import qlang
  60 from ganeti import opcodes
  61 from ganeti import ht
  62 from ganeti import rpc
  63
  64 import ganeti.masterd.instance # pylint: disable=W0611
  65
  66
  67 #: Size of DRBD meta block device
  68 DRBD_META_SIZE = 128
  69
  70 # States of instance
  71 INSTANCE_DOWN = [constants.ADMINST_DOWN]
  72 INSTANCE_ONLINE = [constants.ADMINST_DOWN, constants.ADMINST_UP]
  73 INSTANCE_NOT_RUNNING = [constants.ADMINST_DOWN, constants.ADMINST_OFFLINE]
  74
  75 #: Instance status in which an instance can be marked as offline/online
  76 CAN_CHANGE_INSTANCE_OFFLINE = (frozenset(INSTANCE_DOWN) | frozenset([
  77   constants.ADMINST_OFFLINE,
  78   ]))
  79
  80
  81 class ResultWithJobs:
  82   """Data container for LU results with jobs.
  83
  84   Instances of this class returned from L{LogicalUnit.Exec} will be recognized
  85   by L{mcpu.Processor._ProcessResult}. The latter will then submit the jobs
  86   contained in the C{jobs} attribute and include the job IDs in the opcode
  87   result.
  88
  89   """
  90   def __init__(self, jobs, **kwargs):
  91     """Initializes this class.
  92
  93     Additional return values can be specified as keyword arguments.
  94
  95     @type jobs: list of lists of L{opcode.OpCode}
  96     @param jobs: A list of lists of opcode objects
  97
  98     """
  99     self.jobs = jobs
 100     self.other = kwargs
 101
 102
 103 class LogicalUnit(object):
 104   """Logical Unit base class.
 105
 106   Subclasses must follow these rules:
 107     - implement ExpandNames
 108     - implement CheckPrereq (except when tasklets are used)
 109     - implement Exec (except when tasklets are used)
 110     - implement BuildHooksEnv
 111     - implement BuildHooksNodes
 112     - redefine HPATH and HTYPE
 113     - optionally redefine their run requirements:
 114         REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
 115
 116   Note that all commands require root permissions.
 117
 118   @ivar dry_run_result: the value (if any) that will be returned to the caller
 119       in dry-run mode (signalled by opcode dry_run parameter)
 120
 121   """
 122   HPATH = None
 123   HTYPE = None
 124   REQ_BGL = True
 125
 126   def __init__(self, processor, op, context, rpc_runner):
 127     """Constructor for LogicalUnit.
 128
 129     This needs to be overridden in derived classes in order to check op
 130     validity.
 131
 132     """
 133     self.proc = processor
 134     self.op = op
 135     self.cfg = context.cfg
 136     self.glm = context.glm
 137     # readability alias
 138     self.owned_locks = context.glm.list_owned
 139     self.context = context
 140     self.rpc = rpc_runner
 141     # Dicts used to declare locking needs to mcpu
 142     self.needed_locks = None
 143     self.share_locks = dict.fromkeys(locking.LEVELS, 0)
 144     self.add_locks = {}
 145     self.remove_locks = {}
 146     # Used to force good behavior when calling helper functions
 147     self.recalculate_locks = {}
 148     # logging
 149     self.Log = processor.Log # pylint: disable=C0103
 150     self.LogWarning = processor.LogWarning # pylint: disable=C0103
 151     self.LogInfo = processor.LogInfo # pylint: disable=C0103
 152     self.LogStep = processor.LogStep # pylint: disable=C0103
 153     # support for dry-run
 154     self.dry_run_result = None
 155     # support for generic debug attribute
 156     if (not hasattr(self.op, "debug_level") or
 157         not isinstance(self.op.debug_level, int)):
 158       self.op.debug_level = 0
 159
 160     # Tasklets
 161     self.tasklets = None
 162
 163     # Validate opcode parameters and set defaults
 164     self.op.Validate(True)
 165
 166     self.CheckArguments()
 167
 168   def CheckArguments(self):
 169     """Check syntactic validity for the opcode arguments.
 170
 171     This method is for doing a simple syntactic check and ensure
 172     validity of opcode parameters, without any cluster-related
 173     checks. While the same can be accomplished in ExpandNames and/or
 174     CheckPrereq, doing these separate is better because:
 175
 176       - ExpandNames is left as as purely a lock-related function
 177       - CheckPrereq is run after we have acquired locks (and possible
 178         waited for them)
 179
 180     The function is allowed to change the self.op attribute so that
 181     later methods can no longer worry about missing parameters.
 182
 183     """
 184     pass
 185
 186   def ExpandNames(self):
 187     """Expand names for this LU.
 188
 189     This method is called before starting to execute the opcode, and it should
 190     update all the parameters of the opcode to their canonical form (e.g. a
 191     short node name must be fully expanded after this method has successfully
 192     completed). This way locking, hooks, logging, etc. can work correctly.
 193
 194     LUs which implement this method must also populate the self.needed_locks
 195     member, as a dict with lock levels as keys, and a list of needed lock names
 196     as values. Rules:
 197
 198       - use an empty dict if you don't need any lock
 199       - if you don't need any lock at a particular level omit that level
 200       - don't put anything for the BGL level
 201       - if you want all locks at a level use locking.ALL_SET as a value
 202
 203     If you need to share locks (rather than acquire them exclusively) at one
 204     level you can modify self.share_locks, setting a true value (usually 1) for
 205     that level. By default locks are not shared.
 206
 207     This function can also define a list of tasklets, which then will be
 208     executed in order instead of the usual LU-level CheckPrereq and Exec
 209     functions, if those are not defined by the LU.
 210
 211     Examples::
 212
 213       # Acquire all nodes and one instance
 214       self.needed_locks = {
 215         locking.LEVEL_NODE: locking.ALL_SET,
 216         locking.LEVEL_INSTANCE: ['instance1.example.com'],
 217       }
 218       # Acquire just two nodes
 219       self.needed_locks = {
 220         locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
 221       }
 222       # Acquire no locks
 223       self.needed_locks = {} # No, you can't leave it to the default value None
 224
 225     """
 226     # The implementation of this method is mandatory only if the new LU is
 227     # concurrent, so that old LUs don't need to be changed all at the same
 228     # time.
 229     if self.REQ_BGL:
 230       self.needed_locks = {} # Exclusive LUs don't need locks.
 231     else:
 232       raise NotImplementedError
 233
 234   def DeclareLocks(self, level):
 235     """Declare LU locking needs for a level
 236
 237     While most LUs can just declare their locking needs at ExpandNames time,
 238     sometimes there's the need to calculate some locks after having acquired
 239     the ones before. This function is called just before acquiring locks at a
 240     particular level, but after acquiring the ones at lower levels, and permits
 241     such calculations. It can be used to modify self.needed_locks, and by
 242     default it does nothing.
 243
 244     This function is only called if you have something already set in
 245     self.needed_locks for the level.
 246
 247     @param level: Locking level which is going to be locked
 248     @type level: member of ganeti.locking.LEVELS
 249
 250     """
 251
 252   def CheckPrereq(self):
 253     """Check prerequisites for this LU.
 254
 255     This method should check that the prerequisites for the execution
 256     of this LU are fulfilled. It can do internode communication, but
 257     it should be idempotent - no cluster or system changes are
 258     allowed.
 259
 260     The method should raise errors.OpPrereqError in case something is
 261     not fulfilled. Its return value is ignored.
 262
 263     This method should also update all the parameters of the opcode to
 264     their canonical form if it hasn't been done by ExpandNames before.
 265
 266     """
 267     if self.tasklets is not None:
 268       for (idx, tl) in enumerate(self.tasklets):
 269         logging.debug("Checking prerequisites for tasklet %s/%s",
 270                       idx + 1, len(self.tasklets))
 271         tl.CheckPrereq()
 272     else:
 273       pass
 274
 275   def Exec(self, feedback_fn):
 276     """Execute the LU.
 277
 278     This method should implement the actual work. It should raise
 279     errors.OpExecError for failures that are somewhat dealt with in
 280     code, or expected.
 281
 282     """
 283     if self.tasklets is not None:
 284       for (idx, tl) in enumerate(self.tasklets):
 285         logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
 286         tl.Exec(feedback_fn)
 287     else:
 288       raise NotImplementedError
 289
 290   def BuildHooksEnv(self):
 291     """Build hooks environment for this LU.
 292
 293     @rtype: dict
 294     @return: Dictionary containing the environment that will be used for
 295       running the hooks for this LU. The keys of the dict must not be prefixed
 296       with "GANETI_"--that'll be added by the hooks runner. The hooks runner
 297       will extend the environment with additional variables. If no environment
 298       should be defined, an empty dictionary should be returned (not C{None}).
 299     @note: If the C{HPATH} attribute of the LU class is C{None}, this function
 300       will not be called.
 301
 302     """
 303     raise NotImplementedError
 304
 305   def BuildHooksNodes(self):
 306     """Build list of nodes to run LU's hooks.
 307
 308     @rtype: tuple; (list, list)
 309     @return: Tuple containing a list of node names on which the hook
 310       should run before the execution and a list of node names on which the
 311       hook should run after the execution. No nodes should be returned as an
 312       empty list (and not None).
 313     @note: If the C{HPATH} attribute of the LU class is C{None}, this function
 314       will not be called.
 315
 316     """
 317     raise NotImplementedError
 318
 319   def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
 320     """Notify the LU about the results of its hooks.
 321
 322     This method is called every time a hooks phase is executed, and notifies
 323     the Logical Unit about the hooks' result. The LU can then use it to alter
 324     its result based on the hooks.  By default the method does nothing and the
 325     previous result is passed back unchanged but any LU can define it if it
 326     wants to use the local cluster hook-scripts somehow.
 327
 328     @param phase: one of L{constants.HOOKS_PHASE_POST} or
 329         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
 330     @param hook_results: the results of the multi-node hooks rpc call
 331     @param feedback_fn: function used send feedback back to the caller
 332     @param lu_result: the previous Exec result this LU had, or None
 333         in the PRE phase
 334     @return: the new Exec result, based on the previous result
 335         and hook results
 336
 337     """
 338     # API must be kept, thus we ignore the unused argument and could
 339     # be a function warnings
 340     # pylint: disable=W0613,R0201
 341     return lu_result
 342
 343   def _ExpandAndLockInstance(self):
 344     """Helper function to expand and lock an instance.
 345
 346     Many LUs that work on an instance take its name in self.op.instance_name
 347     and need to expand it and then declare the expanded name for locking. This
 348     function does it, and then updates self.op.instance_name to the expanded
 349     name. It also initializes needed_locks as a dict, if this hasn't been done
 350     before.
 351
 352     """
 353     if self.needed_locks is None:
 354       self.needed_locks = {}
 355     else:
 356       assert locking.LEVEL_INSTANCE not in self.needed_locks, \
 357         "_ExpandAndLockInstance called with instance-level locks set"
 358     self.op.instance_name = _ExpandInstanceName(self.cfg,
 359                                                 self.op.instance_name)
 360     self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
 361
 362   def _LockInstancesNodes(self, primary_only=False,
 363                           level=locking.LEVEL_NODE):
 364     """Helper function to declare instances' nodes for locking.
 365
 366     This function should be called after locking one or more instances to lock
 367     their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
 368     with all primary or secondary nodes for instances already locked and
 369     present in self.needed_locks[locking.LEVEL_INSTANCE].
 370
 371     It should be called from DeclareLocks, and for safety only works if
 372     self.recalculate_locks[locking.LEVEL_NODE] is set.
 373
 374     In the future it may grow parameters to just lock some instance's nodes, or
 375     to just lock primaries or secondary nodes, if needed.
 376
 377     If should be called in DeclareLocks in a way similar to::
 378
 379       if level == locking.LEVEL_NODE:
 380         self._LockInstancesNodes()
 381
 382     @type primary_only: boolean
 383     @param primary_only: only lock primary nodes of locked instances
 384     @param level: Which lock level to use for locking nodes
 385
 386     """
 387     assert level in self.recalculate_locks, \
 388       "_LockInstancesNodes helper function called with no nodes to recalculate"
 389
 390     # TODO: check if we're really been called with the instance locks held
 391
 392     # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
 393     # future we might want to have different behaviors depending on the value
 394     # of self.recalculate_locks[locking.LEVEL_NODE]
 395     wanted_nodes = []
 396     locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
 397     for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
 398       wanted_nodes.append(instance.primary_node)
 399       if not primary_only:
 400         wanted_nodes.extend(instance.secondary_nodes)
 401
 402     if self.recalculate_locks[level] == constants.LOCKS_REPLACE:
 403       self.needed_locks[level] = wanted_nodes
 404     elif self.recalculate_locks[level] == constants.LOCKS_APPEND:
 405       self.needed_locks[level].extend(wanted_nodes)
 406     else:
 407       raise errors.ProgrammerError("Unknown recalculation mode")
 408
 409     del self.recalculate_locks[level]
 410
 411
 412 class NoHooksLU(LogicalUnit): # pylint: disable=W0223
 413   """Simple LU which runs no hooks.
 414
 415   This LU is intended as a parent for other LogicalUnits which will
 416   run no hooks, in order to reduce duplicate code.
 417
 418   """
 419   HPATH = None
 420   HTYPE = None
 421
 422   def BuildHooksEnv(self):
 423     """Empty BuildHooksEnv for NoHooksLu.
 424
 425     This just raises an error.
 426
 427     """
 428     raise AssertionError("BuildHooksEnv called for NoHooksLUs")
 429
 430   def BuildHooksNodes(self):
 431     """Empty BuildHooksNodes for NoHooksLU.
 432
 433     """
 434     raise AssertionError("BuildHooksNodes called for NoHooksLU")
 435
 436
 437 class Tasklet:
 438   """Tasklet base class.
 439
 440   Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
 441   they can mix legacy code with tasklets. Locking needs to be done in the LU,
 442   tasklets know nothing about locks.
 443
 444   Subclasses must follow these rules:
 445     - Implement CheckPrereq
 446     - Implement Exec
 447
 448   """
 449   def __init__(self, lu):
 450     self.lu = lu
 451
 452     # Shortcuts
 453     self.cfg = lu.cfg
 454     self.rpc = lu.rpc
 455
 456   def CheckPrereq(self):
 457     """Check prerequisites for this tasklets.
 458
 459     This method should check whether the prerequisites for the execution of
 460     this tasklet are fulfilled. It can do internode communication, but it
 461     should be idempotent - no cluster or system changes are allowed.
 462
 463     The method should raise errors.OpPrereqError in case something is not
 464     fulfilled. Its return value is ignored.
 465
 466     This method should also update all parameters to their canonical form if it
 467     hasn't been done before.
 468
 469     """
 470     pass
 471
 472   def Exec(self, feedback_fn):
 473     """Execute the tasklet.
 474
 475     This method should implement the actual work. It should raise
 476     errors.OpExecError for failures that are somewhat dealt with in code, or
 477     expected.
 478
 479     """
 480     raise NotImplementedError
 481
 482
 483 class _QueryBase:
 484   """Base for query utility classes.
 485
 486   """
 487   #: Attribute holding field definitions
 488   FIELDS = None
 489
 490   def __init__(self, qfilter, fields, use_locking):
 491     """Initializes this class.
 492
 493     """
 494     self.use_locking = use_locking
 495
 496     self.query = query.Query(self.FIELDS, fields, qfilter=qfilter,
 497                              namefield="name")
 498     self.requested_data = self.query.RequestedData()
 499     self.names = self.query.RequestedNames()
 500
 501     # Sort only if no names were requested
 502     self.sort_by_name = not self.names
 503
 504     self.do_locking = None
 505     self.wanted = None
 506
 507   def _GetNames(self, lu, all_names, lock_level):
 508     """Helper function to determine names asked for in the query.
 509
 510     """
 511     if self.do_locking:
 512       names = lu.owned_locks(lock_level)
 513     else:
 514       names = all_names
 515
 516     if self.wanted == locking.ALL_SET:
 517       assert not self.names
 518       # caller didn't specify names, so ordering is not important
 519       return utils.NiceSort(names)
 520
 521     # caller specified names and we must keep the same order
 522     assert self.names
 523     assert not self.do_locking or lu.glm.is_owned(lock_level)
 524
 525     missing = set(self.wanted).difference(names)
 526     if missing:
 527       raise errors.OpExecError("Some items were removed before retrieving"
 528                                " their data: %s" % missing)
 529
 530     # Return expanded names
 531     return self.wanted
 532
 533   def ExpandNames(self, lu):
 534     """Expand names for this query.
 535
 536     See L{LogicalUnit.ExpandNames}.
 537
 538     """
 539     raise NotImplementedError()
 540
 541   def DeclareLocks(self, lu, level):
 542     """Declare locks for this query.
 543
 544     See L{LogicalUnit.DeclareLocks}.
 545
 546     """
 547     raise NotImplementedError()
 548
 549   def _GetQueryData(self, lu):
 550     """Collects all data for this query.
 551
 552     @return: Query data object
 553
 554     """
 555     raise NotImplementedError()
 556
 557   def NewStyleQuery(self, lu):
 558     """Collect data and execute query.
 559
 560     """
 561     return query.GetQueryResponse(self.query, self._GetQueryData(lu),
 562                                   sort_by_name=self.sort_by_name)
 563
 564   def OldStyleQuery(self, lu):
 565     """Collect data and execute query.
 566
 567     """
 568     return self.query.OldStyleQuery(self._GetQueryData(lu),
 569                                     sort_by_name=self.sort_by_name)
 570
 571
 572 def _ShareAll():
 573   """Returns a dict declaring all lock levels shared.
 574
 575   """
 576   return dict.fromkeys(locking.LEVELS, 1)
 577
 578
 579 def _MakeLegacyNodeInfo(data):
 580   """Formats the data returned by L{rpc.RpcRunner.call_node_info}.
 581
 582   Converts the data into a single dictionary. This is fine for most use cases,
 583   but some require information from more than one volume group or hypervisor.
 584
 585   """
 586   (bootid, (vg_info, ), (hv_info, )) = data
 587
 588   return utils.JoinDisjointDicts(utils.JoinDisjointDicts(vg_info, hv_info), {
 589     "bootid": bootid,
 590     })
 591
 592
 593 def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups):
 594   """Checks if the owned node groups are still correct for an instance.
 595
 596   @type cfg: L{config.ConfigWriter}
 597   @param cfg: The cluster configuration
 598   @type instance_name: string
 599   @param instance_name: Instance name
 600   @type owned_groups: set or frozenset
 601   @param owned_groups: List of currently owned node groups
 602
 603   """
 604   inst_groups = cfg.GetInstanceNodeGroups(instance_name)
 605
 606   if not owned_groups.issuperset(inst_groups):
 607     raise errors.OpPrereqError("Instance %s's node groups changed since"
 608                                " locks were acquired, current groups are"
 609                                " are '%s', owning groups '%s'; retry the"
 610                                " operation" %
 611                                (instance_name,
 612                                 utils.CommaJoin(inst_groups),
 613                                 utils.CommaJoin(owned_groups)),
 614                                errors.ECODE_STATE)
 615
 616   return inst_groups
 617
 618
 619 def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
 620   """Checks if the instances in a node group are still correct.
 621
 622   @type cfg: L{config.ConfigWriter}
 623   @param cfg: The cluster configuration
 624   @type group_uuid: string
 625   @param group_uuid: Node group UUID
 626   @type owned_instances: set or frozenset
 627   @param owned_instances: List of currently owned instances
 628
 629   """
 630   wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
 631   if owned_instances != wanted_instances:
 632     raise errors.OpPrereqError("Instances in node group '%s' changed since"
 633                                " locks were acquired, wanted '%s', have '%s';"
 634                                " retry the operation" %
 635                                (group_uuid,
 636                                 utils.CommaJoin(wanted_instances),
 637                                 utils.CommaJoin(owned_instances)),
 638                                errors.ECODE_STATE)
 639
 640   return wanted_instances
 641
 642
 643 def _SupportsOob(cfg, node):
 644   """Tells if node supports OOB.
 645
 646   @type cfg: L{config.ConfigWriter}
 647   @param cfg: The cluster configuration
 648   @type node: L{objects.Node}
 649   @param node: The node
 650   @return: The OOB script if supported or an empty string otherwise
 651
 652   """
 653   return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
 654
 655
 656 def _GetWantedNodes(lu, nodes):
 657   """Returns list of checked and expanded node names.
 658
 659   @type lu: L{LogicalUnit}
 660   @param lu: the logical unit on whose behalf we execute
 661   @type nodes: list
 662   @param nodes: list of node names or None for all nodes
 663   @rtype: list
 664   @return: the list of nodes, sorted
 665   @raise errors.ProgrammerError: if the nodes parameter is wrong type
 666
 667   """
 668   if nodes:
 669     return [_ExpandNodeName(lu.cfg, name) for name in nodes]
 670
 671   return utils.NiceSort(lu.cfg.GetNodeList())
 672
 673
 674 def _GetWantedInstances(lu, instances):
 675   """Returns list of checked and expanded instance names.
 676
 677   @type lu: L{LogicalUnit}
 678   @param lu: the logical unit on whose behalf we execute
 679   @type instances: list
 680   @param instances: list of instance names or None for all instances
 681   @rtype: list
 682   @return: the list of instances, sorted
 683   @raise errors.OpPrereqError: if the instances parameter is wrong type
 684   @raise errors.OpPrereqError: if any of the passed instances is not found
 685
 686   """
 687   if instances:
 688     wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
 689   else:
 690     wanted = utils.NiceSort(lu.cfg.GetInstanceList())
 691   return wanted
 692
 693
 694 def _GetUpdatedParams(old_params, update_dict,
 695                       use_default=True, use_none=False):
 696   """Return the new version of a parameter dictionary.
 697
 698   @type old_params: dict
 699   @param old_params: old parameters
 700   @type update_dict: dict
 701   @param update_dict: dict containing new parameter values, or
 702       constants.VALUE_DEFAULT to reset the parameter to its default
 703       value
 704   @param use_default: boolean
 705   @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
 706       values as 'to be deleted' values
 707   @param use_none: boolean
 708   @type use_none: whether to recognise C{None} values as 'to be
 709       deleted' values
 710   @rtype: dict
 711   @return: the new parameter dictionary
 712
 713   """
 714   params_copy = copy.deepcopy(old_params)
 715   for key, val in update_dict.iteritems():
 716     if ((use_default and val == constants.VALUE_DEFAULT) or
 717         (use_none and val is None)):
 718       try:
 719         del params_copy[key]
 720       except KeyError:
 721         pass
 722     else:
 723       params_copy[key] = val
 724   return params_copy
 725
 726
 727 def _GetUpdatedIPolicy(old_ipolicy, new_ipolicy, group_policy=False):
 728   """Return the new version of a instance policy.
 729
 730   @param group_policy: whether this policy applies to a group and thus
 731     we should support removal of policy entries
 732
 733   """
 734   use_none = use_default = group_policy
 735   ipolicy = copy.deepcopy(old_ipolicy)
 736   for key, value in new_ipolicy.items():
 737     if key not in constants.IPOLICY_ALL_KEYS:
 738       raise errors.OpPrereqError("Invalid key in new ipolicy: %s" % key,
 739                                  errors.ECODE_INVAL)
 740     if key in constants.IPOLICY_ISPECS:
 741       utils.ForceDictType(value, constants.ISPECS_PARAMETER_TYPES)
 742       ipolicy[key] = _GetUpdatedParams(old_ipolicy.get(key, {}), value,
 743                                        use_none=use_none,
 744                                        use_default=use_default)
 745     else:
 746       if not value or value == [constants.VALUE_DEFAULT]:
 747         if group_policy:
 748           del ipolicy[key]
 749         else:
 750           raise errors.OpPrereqError("Can't unset ipolicy attribute '%s'"
 751                                      " on the cluster'" % key,
 752                                      errors.ECODE_INVAL)
 753       else:
 754         if key in constants.IPOLICY_PARAMETERS:
 755           # FIXME: we assume all such values are float
 756           try:
 757             ipolicy[key] = float(value)
 758           except (TypeError, ValueError), err:
 759             raise errors.OpPrereqError("Invalid value for attribute"
 760                                        " '%s': '%s', error: %s" %
 761                                        (key, value, err), errors.ECODE_INVAL)
 762         else:
 763           # FIXME: we assume all others are lists; this should be redone
 764           # in a nicer way
 765           ipolicy[key] = list(value)
 766   try:
 767     objects.InstancePolicy.CheckParameterSyntax(ipolicy)
 768   except errors.ConfigurationError, err:
 769     raise errors.OpPrereqError("Invalid instance policy: %s" % err,
 770                                errors.ECODE_INVAL)
 771   return ipolicy
 772
 773
 774 def _UpdateAndVerifySubDict(base, updates, type_check):
 775   """Updates and verifies a dict with sub dicts of the same type.
 776
 777   @param base: The dict with the old data
 778   @param updates: The dict with the new data
 779   @param type_check: Dict suitable to ForceDictType to verify correct types
 780   @returns: A new dict with updated and verified values
 781
 782   """
 783   def fn(old, value):
 784     new = _GetUpdatedParams(old, value)
 785     utils.ForceDictType(new, type_check)
 786     return new
 787
 788   ret = copy.deepcopy(base)
 789   ret.update(dict((key, fn(base.get(key, {}), value))
 790                   for key, value in updates.items()))
 791   return ret
 792
 793
 794 def _MergeAndVerifyHvState(op_input, obj_input):
 795   """Combines the hv state from an opcode with the one of the object
 796
 797   @param op_input: The input dict from the opcode
 798   @param obj_input: The input dict from the objects
 799   @return: The verified and updated dict
 800
 801   """
 802   if op_input:
 803     invalid_hvs = set(op_input) - constants.HYPER_TYPES
 804     if invalid_hvs:
 805       raise errors.OpPrereqError("Invalid hypervisor(s) in hypervisor state:"
 806                                  " %s" % utils.CommaJoin(invalid_hvs),
 807                                  errors.ECODE_INVAL)
 808     if obj_input is None:
 809       obj_input = {}
 810     type_check = constants.HVSTS_PARAMETER_TYPES
 811     return _UpdateAndVerifySubDict(obj_input, op_input, type_check)
 812
 813   return None
 814
 815
 816 def _MergeAndVerifyDiskState(op_input, obj_input):
 817   """Combines the disk state from an opcode with the one of the object
 818
 819   @param op_input: The input dict from the opcode
 820   @param obj_input: The input dict from the objects
 821   @return: The verified and updated dict
 822   """
 823   if op_input:
 824     invalid_dst = set(op_input) - constants.DS_VALID_TYPES
 825     if invalid_dst:
 826       raise errors.OpPrereqError("Invalid storage type(s) in disk state: %s" %
 827                                  utils.CommaJoin(invalid_dst),
 828                                  errors.ECODE_INVAL)
 829     type_check = constants.DSS_PARAMETER_TYPES
 830     if obj_input is None:
 831       obj_input = {}
 832     return dict((key, _UpdateAndVerifySubDict(obj_input.get(key, {}), value,
 833                                               type_check))
 834                 for key, value in op_input.items())
 835
 836   return None
 837
 838
 839 def _ReleaseLocks(lu, level, names=None, keep=None):
 840   """Releases locks owned by an LU.
 841
 842   @type lu: L{LogicalUnit}
 843   @param level: Lock level
 844   @type names: list or None
 845   @param names: Names of locks to release
 846   @type keep: list or None
 847   @param keep: Names of locks to retain
 848
 849   """
 850   assert not (keep is not None and names is not None), \
 851          "Only one of the 'names' and the 'keep' parameters can be given"
 852
 853   if names is not None:
 854     should_release = names.__contains__
 855   elif keep:
 856     should_release = lambda name: name not in keep
 857   else:
 858     should_release = None
 859
 860   owned = lu.owned_locks(level)
 861   if not owned:
 862     # Not owning any lock at this level, do nothing
 863     pass
 864
 865   elif should_release:
 866     retain = []
 867     release = []
 868
 869     # Determine which locks to release
 870     for name in owned:
 871       if should_release(name):
 872         release.append(name)
 873       else:
 874         retain.append(name)
 875
 876     assert len(lu.owned_locks(level)) == (len(retain) + len(release))
 877
 878     # Release just some locks
 879     lu.glm.release(level, names=release)
 880
 881     assert frozenset(lu.owned_locks(level)) == frozenset(retain)
 882   else:
 883     # Release everything
 884     lu.glm.release(level)
 885
 886     assert not lu.glm.is_owned(level), "No locks should be owned"
 887
 888
 889 def _MapInstanceDisksToNodes(instances):
 890   """Creates a map from (node, volume) to instance name.
 891
 892   @type instances: list of L{objects.Instance}
 893   @rtype: dict; tuple of (node name, volume name) as key, instance name as value
 894
 895   """
 896   return dict(((node, vol), inst.name)
 897               for inst in instances
 898               for (node, vols) in inst.MapLVsByNode().items()
 899               for vol in vols)
 900
 901
 902 def _RunPostHook(lu, node_name):
 903   """Runs the post-hook for an opcode on a single node.
 904
 905   """
 906   hm = lu.proc.BuildHooksManager(lu)
 907   try:
 908     hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
 909   except:
 910     # pylint: disable=W0702
 911     lu.LogWarning("Errors occurred running hooks on %s" % node_name)
 912
 913
 914 def _CheckOutputFields(static, dynamic, selected):
 915   """Checks whether all selected fields are valid.
 916
 917   @type static: L{utils.FieldSet}
 918   @param static: static fields set
 919   @type dynamic: L{utils.FieldSet}
 920   @param dynamic: dynamic fields set
 921
 922   """
 923   f = utils.FieldSet()
 924   f.Extend(static)
 925   f.Extend(dynamic)
 926
 927   delta = f.NonMatching(selected)
 928   if delta:
 929     raise errors.OpPrereqError("Unknown output fields selected: %s"
 930                                % ",".join(delta), errors.ECODE_INVAL)
 931
 932
 933 def _CheckGlobalHvParams(params):
 934   """Validates that given hypervisor params are not global ones.
 935
 936   This will ensure that instances don't get customised versions of
 937   global params.
 938
 939   """
 940   used_globals = constants.HVC_GLOBALS.intersection(params)
 941   if used_globals:
 942     msg = ("The following hypervisor parameters are global and cannot"
 943            " be customized at instance level, please modify them at"
 944            " cluster level: %s" % utils.CommaJoin(used_globals))
 945     raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
 946
 947
 948 def _CheckNodeOnline(lu, node, msg=None):
 949   """Ensure that a given node is online.
 950
 951   @param lu: the LU on behalf of which we make the check
 952   @param node: the node to check
 953   @param msg: if passed, should be a message to replace the default one
 954   @raise errors.OpPrereqError: if the node is offline
 955
 956   """
 957   if msg is None:
 958     msg = "Can't use offline node"
 959   if lu.cfg.GetNodeInfo(node).offline:
 960     raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
 961
 962
 963 def _CheckNodeNotDrained(lu, node):
 964   """Ensure that a given node is not drained.
 965
 966   @param lu: the LU on behalf of which we make the check
 967   @param node: the node to check
 968   @raise errors.OpPrereqError: if the node is drained
 969
 970   """
 971   if lu.cfg.GetNodeInfo(node).drained:
 972     raise errors.OpPrereqError("Can't use drained node %s" % node,
 973                                errors.ECODE_STATE)
 974
 975
 976 def _CheckNodeVmCapable(lu, node):
 977   """Ensure that a given node is vm capable.
 978
 979   @param lu: the LU on behalf of which we make the check
 980   @param node: the node to check
 981   @raise errors.OpPrereqError: if the node is not vm capable
 982
 983   """
 984   if not lu.cfg.GetNodeInfo(node).vm_capable:
 985     raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
 986                                errors.ECODE_STATE)
 987
 988
 989 def _CheckNodeHasOS(lu, node, os_name, force_variant):
 990   """Ensure that a node supports a given OS.
 991
 992   @param lu: the LU on behalf of which we make the check
 993   @param node: the node to check
 994   @param os_name: the OS to query about
 995   @param force_variant: whether to ignore variant errors
 996   @raise errors.OpPrereqError: if the node is not supporting the OS
 997
 998   """
 999   result = lu.rpc.call_os_get(node, os_name)
1000   result.Raise("OS '%s' not in supported OS list for node %s" %
1001                (os_name, node),
1002                prereq=True, ecode=errors.ECODE_INVAL)
1003   if not force_variant:
1004     _CheckOSVariant(result.payload, os_name)
1005
1006
1007 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
1008   """Ensure that a node has the given secondary ip.
1009
1010   @type lu: L{LogicalUnit}
1011   @param lu: the LU on behalf of which we make the check
1012   @type node: string
1013   @param node: the node to check
1014   @type secondary_ip: string
1015   @param secondary_ip: the ip to check
1016   @type prereq: boolean
1017   @param prereq: whether to throw a prerequisite or an execute error
1018   @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
1019   @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
1020
1021   """
1022   result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
1023   result.Raise("Failure checking secondary ip on node %s" % node,
1024                prereq=prereq, ecode=errors.ECODE_ENVIRON)
1025   if not result.payload:
1026     msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
1027            " please fix and re-run this command" % secondary_ip)
1028     if prereq:
1029       raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
1030     else:
1031       raise errors.OpExecError(msg)
1032
1033
1034 def _GetClusterDomainSecret():
1035   """Reads the cluster domain secret.
1036
1037   """
1038   return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
1039                                strict=True)
1040
1041
1042 def _CheckInstanceState(lu, instance, req_states, msg=None):
1043   """Ensure that an instance is in one of the required states.
1044
1045   @param lu: the LU on behalf of which we make the check
1046   @param instance: the instance to check
1047   @param msg: if passed, should be a message to replace the default one
1048   @raise errors.OpPrereqError: if the instance is not in the required state
1049
1050   """
1051   if msg is None:
1052     msg = "can't use instance from outside %s states" % ", ".join(req_states)
1053   if instance.admin_state not in req_states:
1054     raise errors.OpPrereqError("Instance '%s' is marked to be %s, %s" %
1055                                (instance.name, instance.admin_state, msg),
1056                                errors.ECODE_STATE)
1057
1058   if constants.ADMINST_UP not in req_states:
1059     pnode = instance.primary_node
1060     ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
1061     ins_l.Raise("Can't contact node %s for instance information" % pnode,
1062                 prereq=True, ecode=errors.ECODE_ENVIRON)
1063
1064     if instance.name in ins_l.payload:
1065       raise errors.OpPrereqError("Instance %s is running, %s" %
1066                                  (instance.name, msg), errors.ECODE_STATE)
1067
1068
1069 def _ComputeMinMaxSpec(name, ipolicy, value):
1070   """Computes if value is in the desired range.
1071
1072   @param name: name of the parameter for which we perform the check
1073   @param ipolicy: dictionary containing min, max and std values
1074   @param value: actual value that we want to use
1075   @return: None or element not meeting the criteria
1076
1077
1078   """
1079   if value in [None, constants.VALUE_AUTO]:
1080     return None
1081   max_v = ipolicy[constants.ISPECS_MAX].get(name, value)
1082   min_v = ipolicy[constants.ISPECS_MIN].get(name, value)
1083   if value > max_v or min_v > value:
1084     return ("%s value %s is not in range [%s, %s]" %
1085             (name, value, min_v, max_v))
1086   return None
1087
1088
1089 def _ComputeIPolicySpecViolation(ipolicy, mem_size, cpu_count, disk_count,
1090                                  nic_count, disk_sizes,
1091                                  _compute_fn=_ComputeMinMaxSpec):
1092   """Verifies ipolicy against provided specs.
1093
1094   @type ipolicy: dict
1095   @param ipolicy: The ipolicy
1096   @type mem_size: int
1097   @param mem_size: The memory size
1098   @type cpu_count: int
1099   @param cpu_count: Used cpu cores
1100   @type disk_count: int
1101   @param disk_count: Number of disks used
1102   @type nic_count: int
1103   @param nic_count: Number of nics used
1104   @type disk_sizes: list of ints
1105   @param disk_sizes: Disk sizes of used disk (len must match C{disk_count})
1106   @param _compute_fn: The compute function (unittest only)
1107   @return: A list of violations, or an empty list of no violations are found
1108
1109   """
1110   assert disk_count == len(disk_sizes)
1111
1112   test_settings = [
1113     (constants.ISPEC_MEM_SIZE, mem_size),
1114     (constants.ISPEC_CPU_COUNT, cpu_count),
1115     (constants.ISPEC_DISK_COUNT, disk_count),
1116     (constants.ISPEC_NIC_COUNT, nic_count),
1117     ] + map((lambda d: (constants.ISPEC_DISK_SIZE, d)), disk_sizes)
1118
1119   return filter(None,
1120                 (_compute_fn(name, ipolicy, value)
1121                  for (name, value) in test_settings))
1122
1123
1124 def _ComputeIPolicyInstanceViolation(ipolicy, instance,
1125                                      _compute_fn=_ComputeIPolicySpecViolation):
1126   """Compute if instance meets the specs of ipolicy.
1127
1128   @type ipolicy: dict
1129   @param ipolicy: The ipolicy to verify against
1130   @type instance: L{objects.Instance}
1131   @param instance: The instance to verify
1132   @param _compute_fn: The function to verify ipolicy (unittest only)
1133   @see: L{_ComputeIPolicySpecViolation}
1134
1135   """
1136   mem_size = instance.beparams.get(constants.BE_MAXMEM, None)
1137   cpu_count = instance.beparams.get(constants.BE_VCPUS, None)
1138   disk_count = len(instance.disks)
1139   disk_sizes = [disk.size for disk in instance.disks]
1140   nic_count = len(instance.nics)
1141
1142   return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1143                      disk_sizes)
1144
1145
1146 def _ComputeIPolicyInstanceSpecViolation(ipolicy, instance_spec,
1147     _compute_fn=_ComputeIPolicySpecViolation):
1148   """Compute if instance specs meets the specs of ipolicy.
1149
1150   @type ipolicy: dict
1151   @param ipolicy: The ipolicy to verify against
1152   @param instance_spec: dict
1153   @param instance_spec: The instance spec to verify
1154   @param _compute_fn: The function to verify ipolicy (unittest only)
1155   @see: L{_ComputeIPolicySpecViolation}
1156
1157   """
1158   mem_size = instance_spec.get(constants.ISPEC_MEM_SIZE, None)
1159   cpu_count = instance_spec.get(constants.ISPEC_CPU_COUNT, None)
1160   disk_count = instance_spec.get(constants.ISPEC_DISK_COUNT, 0)
1161   disk_sizes = instance_spec.get(constants.ISPEC_DISK_SIZE, [])
1162   nic_count = instance_spec.get(constants.ISPEC_NIC_COUNT, 0)
1163
1164   return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1165                      disk_sizes)
1166
1167
1168 def _ComputeIPolicyNodeViolation(ipolicy, instance, current_group,
1169                                  target_group,
1170                                  _compute_fn=_ComputeIPolicyInstanceViolation):
1171   """Compute if instance meets the specs of the new target group.
1172
1173   @param ipolicy: The ipolicy to verify
1174   @param instance: The instance object to verify
1175   @param current_group: The current group of the instance
1176   @param target_group: The new group of the instance
1177   @param _compute_fn: The function to verify ipolicy (unittest only)
1178   @see: L{_ComputeIPolicySpecViolation}
1179
1180   """
1181   if current_group == target_group:
1182     return []
1183   else:
1184     return _compute_fn(ipolicy, instance)
1185
1186
1187 def _CheckTargetNodeIPolicy(lu, ipolicy, instance, node, ignore=False,
1188                             _compute_fn=_ComputeIPolicyNodeViolation):
1189   """Checks that the target node is correct in terms of instance policy.
1190
1191   @param ipolicy: The ipolicy to verify
1192   @param instance: The instance object to verify
1193   @param node: The new node to relocate
1194   @param ignore: Ignore violations of the ipolicy
1195   @param _compute_fn: The function to verify ipolicy (unittest only)
1196   @see: L{_ComputeIPolicySpecViolation}
1197
1198   """
1199   primary_node = lu.cfg.GetNodeInfo(instance.primary_node)
1200   res = _compute_fn(ipolicy, instance, primary_node.group, node.group)
1201
1202   if res:
1203     msg = ("Instance does not meet target node group's (%s) instance"
1204            " policy: %s") % (node.group, utils.CommaJoin(res))
1205     if ignore:
1206       lu.LogWarning(msg)
1207     else:
1208       raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1209
1210
1211 def _ComputeNewInstanceViolations(old_ipolicy, new_ipolicy, instances):
1212   """Computes a set of any instances that would violate the new ipolicy.
1213
1214   @param old_ipolicy: The current (still in-place) ipolicy
1215   @param new_ipolicy: The new (to become) ipolicy
1216   @param instances: List of instances to verify
1217   @return: A list of instances which violates the new ipolicy but did not before
1218
1219   """
1220   return (_ComputeViolatingInstances(old_ipolicy, instances) -
1221           _ComputeViolatingInstances(new_ipolicy, instances))
1222
1223
1224 def _ExpandItemName(fn, name, kind):
1225   """Expand an item name.
1226
1227   @param fn: the function to use for expansion
1228   @param name: requested item name
1229   @param kind: text description ('Node' or 'Instance')
1230   @return: the resolved (full) name
1231   @raise errors.OpPrereqError: if the item is not found
1232
1233   """
1234   full_name = fn(name)
1235   if full_name is None:
1236     raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
1237                                errors.ECODE_NOENT)
1238   return full_name
1239
1240
1241 def _ExpandNodeName(cfg, name):
1242   """Wrapper over L{_ExpandItemName} for nodes."""
1243   return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
1244
1245
1246 def _ExpandInstanceName(cfg, name):
1247   """Wrapper over L{_ExpandItemName} for instance."""
1248   return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
1249
1250
1251 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
1252                           minmem, maxmem, vcpus, nics, disk_template, disks,
1253                           bep, hvp, hypervisor_name, tags):
1254   """Builds instance related env variables for hooks
1255
1256   This builds the hook environment from individual variables.
1257
1258   @type name: string
1259   @param name: the name of the instance
1260   @type primary_node: string
1261   @param primary_node: the name of the instance's primary node
1262   @type secondary_nodes: list
1263   @param secondary_nodes: list of secondary nodes as strings
1264   @type os_type: string
1265   @param os_type: the name of the instance's OS
1266   @type status: string
1267   @param status: the desired status of the instance
1268   @type minmem: string
1269   @param minmem: the minimum memory size of the instance
1270   @type maxmem: string
1271   @param maxmem: the maximum memory size of the instance
1272   @type vcpus: string
1273   @param vcpus: the count of VCPUs the instance has
1274   @type nics: list
1275   @param nics: list of tuples (ip, mac, mode, link) representing
1276       the NICs the instance has
1277   @type disk_template: string
1278   @param disk_template: the disk template of the instance
1279   @type disks: list
1280   @param disks: the list of (size, mode) pairs
1281   @type bep: dict
1282   @param bep: the backend parameters for the instance
1283   @type hvp: dict
1284   @param hvp: the hypervisor parameters for the instance
1285   @type hypervisor_name: string
1286   @param hypervisor_name: the hypervisor for the instance
1287   @type tags: list
1288   @param tags: list of instance tags as strings
1289   @rtype: dict
1290   @return: the hook environment for this instance
1291
1292   """
1293   env = {
1294     "OP_TARGET": name,
1295     "INSTANCE_NAME": name,
1296     "INSTANCE_PRIMARY": primary_node,
1297     "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
1298     "INSTANCE_OS_TYPE": os_type,
1299     "INSTANCE_STATUS": status,
1300     "INSTANCE_MINMEM": minmem,
1301     "INSTANCE_MAXMEM": maxmem,
1302     # TODO(2.7) remove deprecated "memory" value
1303     "INSTANCE_MEMORY": maxmem,
1304     "INSTANCE_VCPUS": vcpus,
1305     "INSTANCE_DISK_TEMPLATE": disk_template,
1306     "INSTANCE_HYPERVISOR": hypervisor_name,
1307   }
1308   if nics:
1309     nic_count = len(nics)
1310     for idx, (ip, mac, mode, link) in enumerate(nics):
1311       if ip is None:
1312         ip = ""
1313       env["INSTANCE_NIC%d_IP" % idx] = ip
1314       env["INSTANCE_NIC%d_MAC" % idx] = mac
1315       env["INSTANCE_NIC%d_MODE" % idx] = mode
1316       env["INSTANCE_NIC%d_LINK" % idx] = link
1317       if mode == constants.NIC_MODE_BRIDGED:
1318         env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1319   else:
1320     nic_count = 0
1321
1322   env["INSTANCE_NIC_COUNT"] = nic_count
1323
1324   if disks:
1325     disk_count = len(disks)
1326     for idx, (size, mode) in enumerate(disks):
1327       env["INSTANCE_DISK%d_SIZE" % idx] = size
1328       env["INSTANCE_DISK%d_MODE" % idx] = mode
1329   else:
1330     disk_count = 0
1331
1332   env["INSTANCE_DISK_COUNT"] = disk_count
1333
1334   if not tags:
1335     tags = []
1336
1337   env["INSTANCE_TAGS"] = " ".join(tags)
1338
1339   for source, kind in [(bep, "BE"), (hvp, "HV")]:
1340     for key, value in source.items():
1341       env["INSTANCE_%s_%s" % (kind, key)] = value
1342
1343   return env
1344
1345
1346 def _NICListToTuple(lu, nics):
1347   """Build a list of nic information tuples.
1348
1349   This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1350   value in LUInstanceQueryData.
1351
1352   @type lu:  L{LogicalUnit}
1353   @param lu: the logical unit on whose behalf we execute
1354   @type nics: list of L{objects.NIC}
1355   @param nics: list of nics to convert to hooks tuples
1356
1357   """
1358   hooks_nics = []
1359   cluster = lu.cfg.GetClusterInfo()
1360   for nic in nics:
1361     ip = nic.ip
1362     mac = nic.mac
1363     filled_params = cluster.SimpleFillNIC(nic.nicparams)
1364     mode = filled_params[constants.NIC_MODE]
1365     link = filled_params[constants.NIC_LINK]
1366     hooks_nics.append((ip, mac, mode, link))
1367   return hooks_nics
1368
1369
1370 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1371   """Builds instance related env variables for hooks from an object.
1372
1373   @type lu: L{LogicalUnit}
1374   @param lu: the logical unit on whose behalf we execute
1375   @type instance: L{objects.Instance}
1376   @param instance: the instance for which we should build the
1377       environment
1378   @type override: dict
1379   @param override: dictionary with key/values that will override
1380       our values
1381   @rtype: dict
1382   @return: the hook environment dictionary
1383
1384   """
1385   cluster = lu.cfg.GetClusterInfo()
1386   bep = cluster.FillBE(instance)
1387   hvp = cluster.FillHV(instance)
1388   args = {
1389     "name": instance.name,
1390     "primary_node": instance.primary_node,
1391     "secondary_nodes": instance.secondary_nodes,
1392     "os_type": instance.os,
1393     "status": instance.admin_state,
1394     "maxmem": bep[constants.BE_MAXMEM],
1395     "minmem": bep[constants.BE_MINMEM],
1396     "vcpus": bep[constants.BE_VCPUS],
1397     "nics": _NICListToTuple(lu, instance.nics),
1398     "disk_template": instance.disk_template,
1399     "disks": [(disk.size, disk.mode) for disk in instance.disks],
1400     "bep": bep,
1401     "hvp": hvp,
1402     "hypervisor_name": instance.hypervisor,
1403     "tags": instance.tags,
1404   }
1405   if override:
1406     args.update(override)
1407   return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1408
1409
1410 def _AdjustCandidatePool(lu, exceptions):
1411   """Adjust the candidate pool after node operations.
1412
1413   """
1414   mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1415   if mod_list:
1416     lu.LogInfo("Promoted nodes to master candidate role: %s",
1417                utils.CommaJoin(node.name for node in mod_list))
1418     for name in mod_list:
1419       lu.context.ReaddNode(name)
1420   mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1421   if mc_now > mc_max:
1422     lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1423                (mc_now, mc_max))
1424
1425
1426 def _DecideSelfPromotion(lu, exceptions=None):
1427   """Decide whether I should promote myself as a master candidate.
1428
1429   """
1430   cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1431   mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1432   # the new node will increase mc_max with one, so:
1433   mc_should = min(mc_should + 1, cp_size)
1434   return mc_now < mc_should
1435
1436
1437 def _CalculateGroupIPolicy(cluster, group):
1438   """Calculate instance policy for group.
1439
1440   """
1441   return cluster.SimpleFillIPolicy(group.ipolicy)
1442
1443
1444 def _ComputeViolatingInstances(ipolicy, instances):
1445   """Computes a set of instances who violates given ipolicy.
1446
1447   @param ipolicy: The ipolicy to verify
1448   @type instances: object.Instance
1449   @param instances: List of instances to verify
1450   @return: A frozenset of instance names violating the ipolicy
1451
1452   """
1453   return frozenset([inst.name for inst in instances
1454                     if _ComputeIPolicyInstanceViolation(ipolicy, inst)])
1455
1456
1457 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1458   """Check that the brigdes needed by a list of nics exist.
1459
1460   """
1461   cluster = lu.cfg.GetClusterInfo()
1462   paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1463   brlist = [params[constants.NIC_LINK] for params in paramslist
1464             if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1465   if brlist:
1466     result = lu.rpc.call_bridges_exist(target_node, brlist)
1467     result.Raise("Error checking bridges on destination node '%s'" %
1468                  target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1469
1470
1471 def _CheckInstanceBridgesExist(lu, instance, node=None):
1472   """Check that the brigdes needed by an instance exist.
1473
1474   """
1475   if node is None:
1476     node = instance.primary_node
1477   _CheckNicsBridgesExist(lu, instance.nics, node)
1478
1479
1480 def _CheckOSVariant(os_obj, name):
1481   """Check whether an OS name conforms to the os variants specification.
1482
1483   @type os_obj: L{objects.OS}
1484   @param os_obj: OS object to check
1485   @type name: string
1486   @param name: OS name passed by the user, to check for validity
1487
1488   """
1489   variant = objects.OS.GetVariant(name)
1490   if not os_obj.supported_variants:
1491     if variant:
1492       raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1493                                  " passed)" % (os_obj.name, variant),
1494                                  errors.ECODE_INVAL)
1495     return
1496   if not variant:
1497     raise errors.OpPrereqError("OS name must include a variant",
1498                                errors.ECODE_INVAL)
1499
1500   if variant not in os_obj.supported_variants:
1501     raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1502
1503
1504 def _GetNodeInstancesInner(cfg, fn):
1505   return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1506
1507
1508 def _GetNodeInstances(cfg, node_name):
1509   """Returns a list of all primary and secondary instances on a node.
1510
1511   """
1512
1513   return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1514
1515
1516 def _GetNodePrimaryInstances(cfg, node_name):
1517   """Returns primary instances on a node.
1518
1519   """
1520   return _GetNodeInstancesInner(cfg,
1521                                 lambda inst: node_name == inst.primary_node)
1522
1523
1524 def _GetNodeSecondaryInstances(cfg, node_name):
1525   """Returns secondary instances on a node.
1526
1527   """
1528   return _GetNodeInstancesInner(cfg,
1529                                 lambda inst: node_name in inst.secondary_nodes)
1530
1531
1532 def _GetStorageTypeArgs(cfg, storage_type):
1533   """Returns the arguments for a storage type.
1534
1535   """
1536   # Special case for file storage
1537   if storage_type == constants.ST_FILE:
1538     # storage.FileStorage wants a list of storage directories
1539     return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1540
1541   return []
1542
1543
1544 def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
1545   faulty = []
1546
1547   for dev in instance.disks:
1548     cfg.SetDiskID(dev, node_name)
1549
1550   result = rpc_runner.call_blockdev_getmirrorstatus(node_name, instance.disks)
1551   result.Raise("Failed to get disk status from node %s" % node_name,
1552                prereq=prereq, ecode=errors.ECODE_ENVIRON)
1553
1554   for idx, bdev_status in enumerate(result.payload):
1555     if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1556       faulty.append(idx)
1557
1558   return faulty
1559
1560
1561 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1562   """Check the sanity of iallocator and node arguments and use the
1563   cluster-wide iallocator if appropriate.
1564
1565   Check that at most one of (iallocator, node) is specified. If none is
1566   specified, then the LU's opcode's iallocator slot is filled with the
1567   cluster-wide default iallocator.
1568
1569   @type iallocator_slot: string
1570   @param iallocator_slot: the name of the opcode iallocator slot
1571   @type node_slot: string
1572   @param node_slot: the name of the opcode target node slot
1573
1574   """
1575   node = getattr(lu.op, node_slot, None)
1576   iallocator = getattr(lu.op, iallocator_slot, None)
1577
1578   if node is not None and iallocator is not None:
1579     raise errors.OpPrereqError("Do not specify both, iallocator and node",
1580                                errors.ECODE_INVAL)
1581   elif node is None and iallocator is None:
1582     default_iallocator = lu.cfg.GetDefaultIAllocator()
1583     if default_iallocator:
1584       setattr(lu.op, iallocator_slot, default_iallocator)
1585     else:
1586       raise errors.OpPrereqError("No iallocator or node given and no"
1587                                  " cluster-wide default iallocator found;"
1588                                  " please specify either an iallocator or a"
1589                                  " node, or set a cluster-wide default"
1590                                  " iallocator")
1591
1592
1593 def _GetDefaultIAllocator(cfg, iallocator):
1594   """Decides on which iallocator to use.
1595
1596   @type cfg: L{config.ConfigWriter}
1597   @param cfg: Cluster configuration object
1598   @type iallocator: string or None
1599   @param iallocator: Iallocator specified in opcode
1600   @rtype: string
1601   @return: Iallocator name
1602
1603   """
1604   if not iallocator:
1605     # Use default iallocator
1606     iallocator = cfg.GetDefaultIAllocator()
1607
1608   if not iallocator:
1609     raise errors.OpPrereqError("No iallocator was specified, neither in the"
1610                                " opcode nor as a cluster-wide default",
1611                                errors.ECODE_INVAL)
1612
1613   return iallocator
1614
1615
1616 class LUClusterPostInit(LogicalUnit):
1617   """Logical unit for running hooks after cluster initialization.
1618
1619   """
1620   HPATH = "cluster-init"
1621   HTYPE = constants.HTYPE_CLUSTER
1622
1623   def BuildHooksEnv(self):
1624     """Build hooks env.
1625
1626     """
1627     return {
1628       "OP_TARGET": self.cfg.GetClusterName(),
1629       }
1630
1631   def BuildHooksNodes(self):
1632     """Build hooks nodes.
1633
1634     """
1635     return ([], [self.cfg.GetMasterNode()])
1636
1637   def Exec(self, feedback_fn):
1638     """Nothing to do.
1639
1640     """
1641     return True
1642
1643
1644 class LUClusterDestroy(LogicalUnit):
1645   """Logical unit for destroying the cluster.
1646
1647   """
1648   HPATH = "cluster-destroy"
1649   HTYPE = constants.HTYPE_CLUSTER
1650
1651   def BuildHooksEnv(self):
1652     """Build hooks env.
1653
1654     """
1655     return {
1656       "OP_TARGET": self.cfg.GetClusterName(),
1657       }
1658
1659   def BuildHooksNodes(self):
1660     """Build hooks nodes.
1661
1662     """
1663     return ([], [])
1664
1665   def CheckPrereq(self):
1666     """Check prerequisites.
1667
1668     This checks whether the cluster is empty.
1669
1670     Any errors are signaled by raising errors.OpPrereqError.
1671
1672     """
1673     master = self.cfg.GetMasterNode()
1674
1675     nodelist = self.cfg.GetNodeList()
1676     if len(nodelist) != 1 or nodelist[0] != master:
1677       raise errors.OpPrereqError("There are still %d node(s) in"
1678                                  " this cluster." % (len(nodelist) - 1),
1679                                  errors.ECODE_INVAL)
1680     instancelist = self.cfg.GetInstanceList()
1681     if instancelist:
1682       raise errors.OpPrereqError("There are still %d instance(s) in"
1683                                  " this cluster." % len(instancelist),
1684                                  errors.ECODE_INVAL)
1685
1686   def Exec(self, feedback_fn):
1687     """Destroys the cluster.
1688
1689     """
1690     master_params = self.cfg.GetMasterNetworkParameters()
1691
1692     # Run post hooks on master node before it's removed
1693     _RunPostHook(self, master_params.name)
1694
1695     ems = self.cfg.GetUseExternalMipScript()
1696     result = self.rpc.call_node_deactivate_master_ip(master_params.name,
1697                                                      master_params, ems)
1698     if result.fail_msg:
1699       self.LogWarning("Error disabling the master IP address: %s",
1700                       result.fail_msg)
1701
1702     return master_params.name
1703
1704
1705 def _VerifyCertificate(filename):
1706   """Verifies a certificate for L{LUClusterVerifyConfig}.
1707
1708   @type filename: string
1709   @param filename: Path to PEM file
1710
1711   """
1712   try:
1713     cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1714                                            utils.ReadFile(filename))
1715   except Exception, err: # pylint: disable=W0703
1716     return (LUClusterVerifyConfig.ETYPE_ERROR,
1717             "Failed to load X509 certificate %s: %s" % (filename, err))
1718
1719   (errcode, msg) = \
1720     utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1721                                 constants.SSL_CERT_EXPIRATION_ERROR)
1722
1723   if msg:
1724     fnamemsg = "While verifying %s: %s" % (filename, msg)
1725   else:
1726     fnamemsg = None
1727
1728   if errcode is None:
1729     return (None, fnamemsg)
1730   elif errcode == utils.CERT_WARNING:
1731     return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1732   elif errcode == utils.CERT_ERROR:
1733     return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1734
1735   raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1736
1737
1738 def _GetAllHypervisorParameters(cluster, instances):
1739   """Compute the set of all hypervisor parameters.
1740
1741   @type cluster: L{objects.Cluster}
1742   @param cluster: the cluster object
1743   @param instances: list of L{objects.Instance}
1744   @param instances: additional instances from which to obtain parameters
1745   @rtype: list of (origin, hypervisor, parameters)
1746   @return: a list with all parameters found, indicating the hypervisor they
1747        apply to, and the origin (can be "cluster", "os X", or "instance Y")
1748
1749   """
1750   hvp_data = []
1751
1752   for hv_name in cluster.enabled_hypervisors:
1753     hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1754
1755   for os_name, os_hvp in cluster.os_hvp.items():
1756     for hv_name, hv_params in os_hvp.items():
1757       if hv_params:
1758         full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1759         hvp_data.append(("os %s" % os_name, hv_name, full_params))
1760
1761   # TODO: collapse identical parameter values in a single one
1762   for instance in instances:
1763     if instance.hvparams:
1764       hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1765                        cluster.FillHV(instance)))
1766
1767   return hvp_data
1768
1769
1770 class _VerifyErrors(object):
1771   """Mix-in for cluster/group verify LUs.
1772
1773   It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1774   self.op and self._feedback_fn to be available.)
1775
1776   """
1777
1778   ETYPE_FIELD = "code"
1779   ETYPE_ERROR = "ERROR"
1780   ETYPE_WARNING = "WARNING"
1781
1782   def _Error(self, ecode, item, msg, *args, **kwargs):
1783     """Format an error message.
1784
1785     Based on the opcode's error_codes parameter, either format a
1786     parseable error code, or a simpler error string.
1787
1788     This must be called only from Exec and functions called from Exec.
1789
1790     """
1791     ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1792     itype, etxt, _ = ecode
1793     # first complete the msg
1794     if args:
1795       msg = msg % args
1796     # then format the whole message
1797     if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1798       msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1799     else:
1800       if item:
1801         item = " " + item
1802       else:
1803         item = ""
1804       msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1805     # and finally report it via the feedback_fn
1806     self._feedback_fn("  - %s" % msg) # Mix-in. pylint: disable=E1101
1807
1808   def _ErrorIf(self, cond, ecode, *args, **kwargs):
1809     """Log an error message if the passed condition is True.
1810
1811     """
1812     cond = (bool(cond)
1813             or self.op.debug_simulate_errors) # pylint: disable=E1101
1814
1815     # If the error code is in the list of ignored errors, demote the error to a
1816     # warning
1817     (_, etxt, _) = ecode
1818     if etxt in self.op.ignore_errors:     # pylint: disable=E1101
1819       kwargs[self.ETYPE_FIELD] = self.ETYPE_WARNING
1820
1821     if cond:
1822       self._Error(ecode, *args, **kwargs)
1823
1824     # do not mark the operation as failed for WARN cases only
1825     if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1826       self.bad = self.bad or cond
1827
1828
1829 class LUClusterVerify(NoHooksLU):
1830   """Submits all jobs necessary to verify the cluster.
1831
1832   """
1833   REQ_BGL = False
1834
1835   def ExpandNames(self):
1836     self.needed_locks = {}
1837
1838   def Exec(self, feedback_fn):
1839     jobs = []
1840
1841     if self.op.group_name:
1842       groups = [self.op.group_name]
1843       depends_fn = lambda: None
1844     else:
1845       groups = self.cfg.GetNodeGroupList()
1846
1847       # Verify global configuration
1848       jobs.append([
1849         opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors)
1850         ])
1851
1852       # Always depend on global verification
1853       depends_fn = lambda: [(-len(jobs), [])]
1854
1855     jobs.extend([opcodes.OpClusterVerifyGroup(group_name=group,
1856                                             ignore_errors=self.op.ignore_errors,
1857                                             depends=depends_fn())]
1858                 for group in groups)
1859
1860     # Fix up all parameters
1861     for op in itertools.chain(*jobs): # pylint: disable=W0142
1862       op.debug_simulate_errors = self.op.debug_simulate_errors
1863       op.verbose = self.op.verbose
1864       op.error_codes = self.op.error_codes
1865       try:
1866         op.skip_checks = self.op.skip_checks
1867       except AttributeError:
1868         assert not isinstance(op, opcodes.OpClusterVerifyGroup)
1869
1870     return ResultWithJobs(jobs)
1871
1872
1873 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1874   """Verifies the cluster config.
1875
1876   """
1877   REQ_BGL = True
1878
1879   def _VerifyHVP(self, hvp_data):
1880     """Verifies locally the syntax of the hypervisor parameters.
1881
1882     """
1883     for item, hv_name, hv_params in hvp_data:
1884       msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1885              (item, hv_name))
1886       try:
1887         hv_class = hypervisor.GetHypervisor(hv_name)
1888         utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1889         hv_class.CheckParameterSyntax(hv_params)
1890       except errors.GenericError, err:
1891         self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
1892
1893   def ExpandNames(self):
1894     # Information can be safely retrieved as the BGL is acquired in exclusive
1895     # mode
1896     assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER)
1897     self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1898     self.all_node_info = self.cfg.GetAllNodesInfo()
1899     self.all_inst_info = self.cfg.GetAllInstancesInfo()
1900     self.needed_locks = {}
1901
1902   def Exec(self, feedback_fn):
1903     """Verify integrity of cluster, performing various test on nodes.
1904
1905     """
1906     self.bad = False
1907     self._feedback_fn = feedback_fn
1908
1909     feedback_fn("* Verifying cluster config")
1910
1911     for msg in self.cfg.VerifyConfig():
1912       self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
1913
1914     feedback_fn("* Verifying cluster certificate files")
1915
1916     for cert_filename in constants.ALL_CERT_FILES:
1917       (errcode, msg) = _VerifyCertificate(cert_filename)
1918       self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
1919
1920     feedback_fn("* Verifying hypervisor parameters")
1921
1922     self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1923                                                 self.all_inst_info.values()))
1924
1925     feedback_fn("* Verifying all nodes belong to an existing group")
1926
1927     # We do this verification here because, should this bogus circumstance
1928     # occur, it would never be caught by VerifyGroup, which only acts on
1929     # nodes/instances reachable from existing node groups.
1930
1931     dangling_nodes = set(node.name for node in self.all_node_info.values()
1932                          if node.group not in self.all_group_info)
1933
1934     dangling_instances = {}
1935     no_node_instances = []
1936
1937     for inst in self.all_inst_info.values():
1938       if inst.primary_node in dangling_nodes:
1939         dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
1940       elif inst.primary_node not in self.all_node_info:
1941         no_node_instances.append(inst.name)
1942
1943     pretty_dangling = [
1944         "%s (%s)" %
1945         (node.name,
1946          utils.CommaJoin(dangling_instances.get(node.name,
1947                                                 ["no instances"])))
1948         for node in dangling_nodes]
1949
1950     self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
1951                   None,
1952                   "the following nodes (and their instances) belong to a non"
1953                   " existing group: %s", utils.CommaJoin(pretty_dangling))
1954
1955     self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
1956                   None,
1957                   "the following instances have a non-existing primary-node:"
1958                   " %s", utils.CommaJoin(no_node_instances))
1959
1960     return not self.bad
1961
1962
1963 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
1964   """Verifies the status of a node group.
1965
1966   """
1967   HPATH = "cluster-verify"
1968   HTYPE = constants.HTYPE_CLUSTER
1969   REQ_BGL = False
1970
1971   _HOOKS_INDENT_RE = re.compile("^", re.M)
1972
1973   class NodeImage(object):
1974     """A class representing the logical and physical status of a node.
1975
1976     @type name: string
1977     @ivar name: the node name to which this object refers
1978     @ivar volumes: a structure as returned from
1979         L{ganeti.backend.GetVolumeList} (runtime)
1980     @ivar instances: a list of running instances (runtime)
1981     @ivar pinst: list of configured primary instances (config)
1982     @ivar sinst: list of configured secondary instances (config)
1983     @ivar sbp: dictionary of {primary-node: list of instances} for all
1984         instances for which this node is secondary (config)
1985     @ivar mfree: free memory, as reported by hypervisor (runtime)
1986     @ivar dfree: free disk, as reported by the node (runtime)
1987     @ivar offline: the offline status (config)
1988     @type rpc_fail: boolean
1989     @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1990         not whether the individual keys were correct) (runtime)
1991     @type lvm_fail: boolean
1992     @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1993     @type hyp_fail: boolean
1994     @ivar hyp_fail: whether the RPC call didn't return the instance list
1995     @type ghost: boolean
1996     @ivar ghost: whether this is a known node or not (config)
1997     @type os_fail: boolean
1998     @ivar os_fail: whether the RPC call didn't return valid OS data
1999     @type oslist: list
2000     @ivar oslist: list of OSes as diagnosed by DiagnoseOS
2001     @type vm_capable: boolean
2002     @ivar vm_capable: whether the node can host instances
2003
2004     """
2005     def __init__(self, offline=False, name=None, vm_capable=True):
2006       self.name = name
2007       self.volumes = {}
2008       self.instances = []
2009       self.pinst = []
2010       self.sinst = []
2011       self.sbp = {}
2012       self.mfree = 0
2013       self.dfree = 0
2014       self.offline = offline
2015       self.vm_capable = vm_capable
2016       self.rpc_fail = False
2017       self.lvm_fail = False
2018       self.hyp_fail = False
2019       self.ghost = False
2020       self.os_fail = False
2021       self.oslist = {}
2022
2023   def ExpandNames(self):
2024     # This raises errors.OpPrereqError on its own:
2025     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
2026
2027     # Get instances in node group; this is unsafe and needs verification later
2028     inst_names = self.cfg.GetNodeGroupInstances(self.group_uuid)
2029
2030     self.needed_locks = {
2031       locking.LEVEL_INSTANCE: inst_names,
2032       locking.LEVEL_NODEGROUP: [self.group_uuid],
2033       locking.LEVEL_NODE: [],
2034       }
2035
2036     self.share_locks = _ShareAll()
2037
2038   def DeclareLocks(self, level):
2039     if level == locking.LEVEL_NODE:
2040       # Get members of node group; this is unsafe and needs verification later
2041       nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
2042
2043       all_inst_info = self.cfg.GetAllInstancesInfo()
2044
2045       # In Exec(), we warn about mirrored instances that have primary and
2046       # secondary living in separate node groups. To fully verify that
2047       # volumes for these instances are healthy, we will need to do an
2048       # extra call to their secondaries. We ensure here those nodes will
2049       # be locked.
2050       for inst in self.owned_locks(locking.LEVEL_INSTANCE):
2051         # Important: access only the instances whose lock is owned
2052         if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
2053           nodes.update(all_inst_info[inst].secondary_nodes)
2054
2055       self.needed_locks[locking.LEVEL_NODE] = nodes
2056
2057   def CheckPrereq(self):
2058     assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
2059     self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
2060
2061     group_nodes = set(self.group_info.members)
2062     group_instances = self.cfg.GetNodeGroupInstances(self.group_uuid)
2063
2064     unlocked_nodes = \
2065         group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2066
2067     unlocked_instances = \
2068         group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
2069
2070     if unlocked_nodes:
2071       raise errors.OpPrereqError("Missing lock for nodes: %s" %
2072                                  utils.CommaJoin(unlocked_nodes))
2073
2074     if unlocked_instances:
2075       raise errors.OpPrereqError("Missing lock for instances: %s" %
2076                                  utils.CommaJoin(unlocked_instances))
2077
2078     self.all_node_info = self.cfg.GetAllNodesInfo()
2079     self.all_inst_info = self.cfg.GetAllInstancesInfo()
2080
2081     self.my_node_names = utils.NiceSort(group_nodes)
2082     self.my_inst_names = utils.NiceSort(group_instances)
2083
2084     self.my_node_info = dict((name, self.all_node_info[name])
2085                              for name in self.my_node_names)
2086
2087     self.my_inst_info = dict((name, self.all_inst_info[name])
2088                              for name in self.my_inst_names)
2089
2090     # We detect here the nodes that will need the extra RPC calls for verifying
2091     # split LV volumes; they should be locked.
2092     extra_lv_nodes = set()
2093
2094     for inst in self.my_inst_info.values():
2095       if inst.disk_template in constants.DTS_INT_MIRROR:
2096         group = self.my_node_info[inst.primary_node].group
2097         for nname in inst.secondary_nodes:
2098           if self.all_node_info[nname].group != group:
2099             extra_lv_nodes.add(nname)
2100
2101     unlocked_lv_nodes = \
2102         extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2103
2104     if unlocked_lv_nodes:
2105       raise errors.OpPrereqError("these nodes could be locked: %s" %
2106                                  utils.CommaJoin(unlocked_lv_nodes))
2107     self.extra_lv_nodes = list(extra_lv_nodes)
2108
2109   def _VerifyNode(self, ninfo, nresult):
2110     """Perform some basic validation on data returned from a node.
2111
2112       - check the result data structure is well formed and has all the
2113         mandatory fields
2114       - check ganeti version
2115
2116     @type ninfo: L{objects.Node}
2117     @param ninfo: the node to check
2118     @param nresult: the results from the node
2119     @rtype: boolean
2120     @return: whether overall this call was successful (and we can expect
2121          reasonable values in the respose)
2122
2123     """
2124     node = ninfo.name
2125     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2126
2127     # main result, nresult should be a non-empty dict
2128     test = not nresult or not isinstance(nresult, dict)
2129     _ErrorIf(test, constants.CV_ENODERPC, node,
2130                   "unable to verify node: no data returned")
2131     if test:
2132       return False
2133
2134     # compares ganeti version
2135     local_version = constants.PROTOCOL_VERSION
2136     remote_version = nresult.get("version", None)
2137     test = not (remote_version and
2138                 isinstance(remote_version, (list, tuple)) and
2139                 len(remote_version) == 2)
2140     _ErrorIf(test, constants.CV_ENODERPC, node,
2141              "connection to node returned invalid data")
2142     if test:
2143       return False
2144
2145     test = local_version != remote_version[0]
2146     _ErrorIf(test, constants.CV_ENODEVERSION, node,
2147              "incompatible protocol versions: master %s,"
2148              " node %s", local_version, remote_version[0])
2149     if test:
2150       return False
2151
2152     # node seems compatible, we can actually try to look into its results
2153
2154     # full package version
2155     self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
2156                   constants.CV_ENODEVERSION, node,
2157                   "software version mismatch: master %s, node %s",
2158                   constants.RELEASE_VERSION, remote_version[1],
2159                   code=self.ETYPE_WARNING)
2160
2161     hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
2162     if ninfo.vm_capable and isinstance(hyp_result, dict):
2163       for hv_name, hv_result in hyp_result.iteritems():
2164         test = hv_result is not None
2165         _ErrorIf(test, constants.CV_ENODEHV, node,
2166                  "hypervisor %s verify failure: '%s'", hv_name, hv_result)
2167
2168     hvp_result = nresult.get(constants.NV_HVPARAMS, None)
2169     if ninfo.vm_capable and isinstance(hvp_result, list):
2170       for item, hv_name, hv_result in hvp_result:
2171         _ErrorIf(True, constants.CV_ENODEHV, node,
2172                  "hypervisor %s parameter verify failure (source %s): %s",
2173                  hv_name, item, hv_result)
2174
2175     test = nresult.get(constants.NV_NODESETUP,
2176                        ["Missing NODESETUP results"])
2177     _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
2178              "; ".join(test))
2179
2180     return True
2181
2182   def _VerifyNodeTime(self, ninfo, nresult,
2183                       nvinfo_starttime, nvinfo_endtime):
2184     """Check the node time.
2185
2186     @type ninfo: L{objects.Node}
2187     @param ninfo: the node to check
2188     @param nresult: the remote results for the node
2189     @param nvinfo_starttime: the start time of the RPC call
2190     @param nvinfo_endtime: the end time of the RPC call
2191
2192     """
2193     node = ninfo.name
2194     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2195
2196     ntime = nresult.get(constants.NV_TIME, None)
2197     try:
2198       ntime_merged = utils.MergeTime(ntime)
2199     except (ValueError, TypeError):
2200       _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
2201       return
2202
2203     if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
2204       ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
2205     elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
2206       ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
2207     else:
2208       ntime_diff = None
2209
2210     _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
2211              "Node time diverges by at least %s from master node time",
2212              ntime_diff)
2213
2214   def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
2215     """Check the node LVM results.
2216
2217     @type ninfo: L{objects.Node}
2218     @param ninfo: the node to check
2219     @param nresult: the remote results for the node
2220     @param vg_name: the configured VG name
2221
2222     """
2223     if vg_name is None:
2224       return
2225
2226     node = ninfo.name
2227     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2228
2229     # checks vg existence and size > 20G
2230     vglist = nresult.get(constants.NV_VGLIST, None)
2231     test = not vglist
2232     _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
2233     if not test:
2234       vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
2235                                             constants.MIN_VG_SIZE)
2236       _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
2237
2238     # check pv names
2239     pvlist = nresult.get(constants.NV_PVLIST, None)
2240     test = pvlist is None
2241     _ErrorIf(test, constants.CV_ENODELVM, node, "Can't get PV list from node")
2242     if not test:
2243       # check that ':' is not present in PV names, since it's a
2244       # special character for lvcreate (denotes the range of PEs to
2245       # use on the PV)
2246       for _, pvname, owner_vg in pvlist:
2247         test = ":" in pvname
2248         _ErrorIf(test, constants.CV_ENODELVM, node,
2249                  "Invalid character ':' in PV '%s' of VG '%s'",
2250                  pvname, owner_vg)
2251
2252   def _VerifyNodeBridges(self, ninfo, nresult, bridges):
2253     """Check the node bridges.
2254
2255     @type ninfo: L{objects.Node}
2256     @param ninfo: the node to check
2257     @param nresult: the remote results for the node
2258     @param bridges: the expected list of bridges
2259
2260     """
2261     if not bridges:
2262       return
2263
2264     node = ninfo.name
2265     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2266
2267     missing = nresult.get(constants.NV_BRIDGES, None)
2268     test = not isinstance(missing, list)
2269     _ErrorIf(test, constants.CV_ENODENET, node,
2270              "did not return valid bridge information")
2271     if not test:
2272       _ErrorIf(bool(missing), constants.CV_ENODENET, node,
2273                "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
2274
2275   def _VerifyNodeUserScripts(self, ninfo, nresult):
2276     """Check the results of user scripts presence and executability on the node
2277
2278     @type ninfo: L{objects.Node}
2279     @param ninfo: the node to check
2280     @param nresult: the remote results for the node
2281
2282     """
2283     node = ninfo.name
2284
2285     test = not constants.NV_USERSCRIPTS in nresult
2286     self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node,
2287                   "did not return user scripts information")
2288
2289     broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None)
2290     if not test:
2291       self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node,
2292                     "user scripts not present or not executable: %s" %
2293                     utils.CommaJoin(sorted(broken_scripts)))
2294
2295   def _VerifyNodeNetwork(self, ninfo, nresult):
2296     """Check the node network connectivity results.
2297
2298     @type ninfo: L{objects.Node}
2299     @param ninfo: the node to check
2300     @param nresult: the remote results for the node
2301
2302     """
2303     node = ninfo.name
2304     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2305
2306     test = constants.NV_NODELIST not in nresult
2307     _ErrorIf(test, constants.CV_ENODESSH, node,
2308              "node hasn't returned node ssh connectivity data")
2309     if not test:
2310       if nresult[constants.NV_NODELIST]:
2311         for a_node, a_msg in nresult[constants.NV_NODELIST].items():
2312           _ErrorIf(True, constants.CV_ENODESSH, node,
2313                    "ssh communication with node '%s': %s", a_node, a_msg)
2314
2315     test = constants.NV_NODENETTEST not in nresult
2316     _ErrorIf(test, constants.CV_ENODENET, node,
2317              "node hasn't returned node tcp connectivity data")
2318     if not test:
2319       if nresult[constants.NV_NODENETTEST]:
2320         nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
2321         for anode in nlist:
2322           _ErrorIf(True, constants.CV_ENODENET, node,
2323                    "tcp communication with node '%s': %s",
2324                    anode, nresult[constants.NV_NODENETTEST][anode])
2325
2326     test = constants.NV_MASTERIP not in nresult
2327     _ErrorIf(test, constants.CV_ENODENET, node,
2328              "node hasn't returned node master IP reachability data")
2329     if not test:
2330       if not nresult[constants.NV_MASTERIP]:
2331         if node == self.master_node:
2332           msg = "the master node cannot reach the master IP (not configured?)"
2333         else:
2334           msg = "cannot reach the master IP"
2335         _ErrorIf(True, constants.CV_ENODENET, node, msg)
2336
2337   def _VerifyInstance(self, instance, instanceconfig, node_image,
2338                       diskstatus):
2339     """Verify an instance.
2340
2341     This function checks to see if the required block devices are
2342     available on the instance's node.
2343
2344     """
2345     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2346     node_current = instanceconfig.primary_node
2347
2348     node_vol_should = {}
2349     instanceconfig.MapLVsByNode(node_vol_should)
2350
2351     ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(), self.group_info)
2352     err = _ComputeIPolicyInstanceViolation(ipolicy, instanceconfig)
2353     _ErrorIf(err, constants.CV_EINSTANCEPOLICY, instance, err)
2354
2355     for node in node_vol_should:
2356       n_img = node_image[node]
2357       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2358         # ignore missing volumes on offline or broken nodes
2359         continue
2360       for volume in node_vol_should[node]:
2361         test = volume not in n_img.volumes
2362         _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
2363                  "volume %s missing on node %s", volume, node)
2364
2365     if instanceconfig.admin_state == constants.ADMINST_UP:
2366       pri_img = node_image[node_current]
2367       test = instance not in pri_img.instances and not pri_img.offline
2368       _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
2369                "instance not running on its primary node %s",
2370                node_current)
2371
2372     diskdata = [(nname, success, status, idx)
2373                 for (nname, disks) in diskstatus.items()
2374                 for idx, (success, status) in enumerate(disks)]
2375
2376     for nname, success, bdev_status, idx in diskdata:
2377       # the 'ghost node' construction in Exec() ensures that we have a
2378       # node here
2379       snode = node_image[nname]
2380       bad_snode = snode.ghost or snode.offline
2381       _ErrorIf(instanceconfig.admin_state == constants.ADMINST_UP and
2382                not success and not bad_snode,
2383                constants.CV_EINSTANCEFAULTYDISK, instance,
2384                "couldn't retrieve status for disk/%s on %s: %s",
2385                idx, nname, bdev_status)
2386       _ErrorIf((instanceconfig.admin_state == constants.ADMINST_UP and
2387                 success and bdev_status.ldisk_status == constants.LDS_FAULTY),
2388                constants.CV_EINSTANCEFAULTYDISK, instance,
2389                "disk/%s on %s is faulty", idx, nname)
2390
2391   def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2392     """Verify if there are any unknown volumes in the cluster.
2393
2394     The .os, .swap and backup volumes are ignored. All other volumes are
2395     reported as unknown.
2396
2397     @type reserved: L{ganeti.utils.FieldSet}
2398     @param reserved: a FieldSet of reserved volume names
2399
2400     """
2401     for node, n_img in node_image.items():
2402       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2403         # skip non-healthy nodes
2404         continue
2405       for volume in n_img.volumes:
2406         test = ((node not in node_vol_should or
2407                 volume not in node_vol_should[node]) and
2408                 not reserved.Matches(volume))
2409         self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
2410                       "volume %s is unknown", volume)
2411
2412   def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2413     """Verify N+1 Memory Resilience.
2414
2415     Check that if one single node dies we can still start all the
2416     instances it was primary for.
2417
2418     """
2419     cluster_info = self.cfg.GetClusterInfo()
2420     for node, n_img in node_image.items():
2421       # This code checks that every node which is now listed as
2422       # secondary has enough memory to host all instances it is
2423       # supposed to should a single other node in the cluster fail.
2424       # FIXME: not ready for failover to an arbitrary node
2425       # FIXME: does not support file-backed instances
2426       # WARNING: we currently take into account down instances as well
2427       # as up ones, considering that even if they're down someone
2428       # might want to start them even in the event of a node failure.
2429       if n_img.offline:
2430         # we're skipping offline nodes from the N+1 warning, since
2431         # most likely we don't have good memory infromation from them;
2432         # we already list instances living on such nodes, and that's
2433         # enough warning
2434         continue
2435       #TODO(dynmem): also consider ballooning out other instances
2436       for prinode, instances in n_img.sbp.items():
2437         needed_mem = 0
2438         for instance in instances:
2439           bep = cluster_info.FillBE(instance_cfg[instance])
2440           if bep[constants.BE_AUTO_BALANCE]:
2441             needed_mem += bep[constants.BE_MINMEM]
2442         test = n_img.mfree < needed_mem
2443         self._ErrorIf(test, constants.CV_ENODEN1, node,
2444                       "not enough memory to accomodate instance failovers"
2445                       " should node %s fail (%dMiB needed, %dMiB available)",
2446                       prinode, needed_mem, n_img.mfree)
2447
2448   @classmethod
2449   def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2450                    (files_all, files_opt, files_mc, files_vm)):
2451     """Verifies file checksums collected from all nodes.
2452
2453     @param errorif: Callback for reporting errors
2454     @param nodeinfo: List of L{objects.Node} objects
2455     @param master_node: Name of master node
2456     @param all_nvinfo: RPC results
2457
2458     """
2459     # Define functions determining which nodes to consider for a file
2460     files2nodefn = [
2461       (files_all, None),
2462       (files_mc, lambda node: (node.master_candidate or
2463                                node.name == master_node)),
2464       (files_vm, lambda node: node.vm_capable),
2465       ]
2466
2467     # Build mapping from filename to list of nodes which should have the file
2468     nodefiles = {}
2469     for (files, fn) in files2nodefn:
2470       if fn is None:
2471         filenodes = nodeinfo
2472       else:
2473         filenodes = filter(fn, nodeinfo)
2474       nodefiles.update((filename,
2475                         frozenset(map(operator.attrgetter("name"), filenodes)))
2476                        for filename in files)
2477
2478     assert set(nodefiles) == (files_all | files_mc | files_vm)
2479
2480     fileinfo = dict((filename, {}) for filename in nodefiles)
2481     ignore_nodes = set()
2482
2483     for node in nodeinfo:
2484       if node.offline:
2485         ignore_nodes.add(node.name)
2486         continue
2487
2488       nresult = all_nvinfo[node.name]
2489
2490       if nresult.fail_msg or not nresult.payload:
2491         node_files = None
2492       else:
2493         node_files = nresult.payload.get(constants.NV_FILELIST, None)
2494
2495       test = not (node_files and isinstance(node_files, dict))
2496       errorif(test, constants.CV_ENODEFILECHECK, node.name,
2497               "Node did not return file checksum data")
2498       if test:
2499         ignore_nodes.add(node.name)
2500         continue
2501
2502       # Build per-checksum mapping from filename to nodes having it
2503       for (filename, checksum) in node_files.items():
2504         assert filename in nodefiles
2505         fileinfo[filename].setdefault(checksum, set()).add(node.name)
2506
2507     for (filename, checksums) in fileinfo.items():
2508       assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2509
2510       # Nodes having the file
2511       with_file = frozenset(node_name
2512                             for nodes in fileinfo[filename].values()
2513                             for node_name in nodes) - ignore_nodes
2514
2515       expected_nodes = nodefiles[filename] - ignore_nodes
2516
2517       # Nodes missing file
2518       missing_file = expected_nodes - with_file
2519
2520       if filename in files_opt:
2521         # All or no nodes
2522         errorif(missing_file and missing_file != expected_nodes,
2523                 constants.CV_ECLUSTERFILECHECK, None,
2524                 "File %s is optional, but it must exist on all or no"
2525                 " nodes (not found on %s)",
2526                 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2527       else:
2528         errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2529                 "File %s is missing from node(s) %s", filename,
2530                 utils.CommaJoin(utils.NiceSort(missing_file)))
2531
2532         # Warn if a node has a file it shouldn't
2533         unexpected = with_file - expected_nodes
2534         errorif(unexpected,
2535                 constants.CV_ECLUSTERFILECHECK, None,
2536                 "File %s should not exist on node(s) %s",
2537                 filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2538
2539       # See if there are multiple versions of the file
2540       test = len(checksums) > 1
2541       if test:
2542         variants = ["variant %s on %s" %
2543                     (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2544                     for (idx, (checksum, nodes)) in
2545                       enumerate(sorted(checksums.items()))]
2546       else:
2547         variants = []
2548
2549       errorif(test, constants.CV_ECLUSTERFILECHECK, None,
2550               "File %s found with %s different checksums (%s)",
2551               filename, len(checksums), "; ".join(variants))
2552
2553   def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2554                       drbd_map):
2555     """Verifies and the node DRBD status.
2556
2557     @type ninfo: L{objects.Node}
2558     @param ninfo: the node to check
2559     @param nresult: the remote results for the node
2560     @param instanceinfo: the dict of instances
2561     @param drbd_helper: the configured DRBD usermode helper
2562     @param drbd_map: the DRBD map as returned by
2563         L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2564
2565     """
2566     node = ninfo.name
2567     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2568
2569     if drbd_helper:
2570       helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2571       test = (helper_result == None)
2572       _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2573                "no drbd usermode helper returned")
2574       if helper_result:
2575         status, payload = helper_result
2576         test = not status
2577         _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2578                  "drbd usermode helper check unsuccessful: %s", payload)
2579         test = status and (payload != drbd_helper)
2580         _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2581                  "wrong drbd usermode helper: %s", payload)
2582
2583     # compute the DRBD minors
2584     node_drbd = {}
2585     for minor, instance in drbd_map[node].items():
2586       test = instance not in instanceinfo
2587       _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2588                "ghost instance '%s' in temporary DRBD map", instance)
2589         # ghost instance should not be running, but otherwise we
2590         # don't give double warnings (both ghost instance and
2591         # unallocated minor in use)
2592       if test:
2593         node_drbd[minor] = (instance, False)
2594       else:
2595         instance = instanceinfo[instance]
2596         node_drbd[minor] = (instance.name,
2597                             instance.admin_state == constants.ADMINST_UP)
2598
2599     # and now check them
2600     used_minors = nresult.get(constants.NV_DRBDLIST, [])
2601     test = not isinstance(used_minors, (tuple, list))
2602     _ErrorIf(test, constants.CV_ENODEDRBD, node,
2603              "cannot parse drbd status file: %s", str(used_minors))
2604     if test:
2605       # we cannot check drbd status
2606       return
2607
2608     for minor, (iname, must_exist) in node_drbd.items():
2609       test = minor not in used_minors and must_exist
2610       _ErrorIf(test, constants.CV_ENODEDRBD, node,
2611                "drbd minor %d of instance %s is not active", minor, iname)
2612     for minor in used_minors:
2613       test = minor not in node_drbd
2614       _ErrorIf(test, constants.CV_ENODEDRBD, node,
2615                "unallocated drbd minor %d is in use", minor)
2616
2617   def _UpdateNodeOS(self, ninfo, nresult, nimg):
2618     """Builds the node OS structures.
2619
2620     @type ninfo: L{objects.Node}
2621     @param ninfo: the node to check
2622     @param nresult: the remote results for the node
2623     @param nimg: the node image object
2624
2625     """
2626     node = ninfo.name
2627     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2628
2629     remote_os = nresult.get(constants.NV_OSLIST, None)
2630     test = (not isinstance(remote_os, list) or
2631             not compat.all(isinstance(v, list) and len(v) == 7
2632                            for v in remote_os))
2633
2634     _ErrorIf(test, constants.CV_ENODEOS, node,
2635              "node hasn't returned valid OS data")
2636
2637     nimg.os_fail = test
2638
2639     if test:
2640       return
2641
2642     os_dict = {}
2643
2644     for (name, os_path, status, diagnose,
2645          variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2646
2647       if name not in os_dict:
2648         os_dict[name] = []
2649
2650       # parameters is a list of lists instead of list of tuples due to
2651       # JSON lacking a real tuple type, fix it:
2652       parameters = [tuple(v) for v in parameters]
2653       os_dict[name].append((os_path, status, diagnose,
2654                             set(variants), set(parameters), set(api_ver)))
2655
2656     nimg.oslist = os_dict
2657
2658   def _VerifyNodeOS(self, ninfo, nimg, base):
2659     """Verifies the node OS list.
2660
2661     @type ninfo: L{objects.Node}
2662     @param ninfo: the node to check
2663     @param nimg: the node image object
2664     @param base: the 'template' node we match against (e.g. from the master)
2665
2666     """
2667     node = ninfo.name
2668     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2669
2670     assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2671
2672     beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2673     for os_name, os_data in nimg.oslist.items():
2674       assert os_data, "Empty OS status for OS %s?!" % os_name
2675       f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2676       _ErrorIf(not f_status, constants.CV_ENODEOS, node,
2677                "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2678       _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
2679                "OS '%s' has multiple entries (first one shadows the rest): %s",
2680                os_name, utils.CommaJoin([v[0] for v in os_data]))
2681       # comparisons with the 'base' image
2682       test = os_name not in base.oslist
2683       _ErrorIf(test, constants.CV_ENODEOS, node,
2684                "Extra OS %s not present on reference node (%s)",
2685                os_name, base.name)
2686       if test:
2687         continue
2688       assert base.oslist[os_name], "Base node has empty OS status?"
2689       _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2690       if not b_status:
2691         # base OS is invalid, skipping
2692         continue
2693       for kind, a, b in [("API version", f_api, b_api),
2694                          ("variants list", f_var, b_var),
2695                          ("parameters", beautify_params(f_param),
2696                           beautify_params(b_param))]:
2697         _ErrorIf(a != b, constants.CV_ENODEOS, node,
2698                  "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2699                  kind, os_name, base.name,
2700                  utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2701
2702     # check any missing OSes
2703     missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2704     _ErrorIf(missing, constants.CV_ENODEOS, node,
2705              "OSes present on reference node %s but missing on this node: %s",
2706              base.name, utils.CommaJoin(missing))
2707
2708   def _VerifyOob(self, ninfo, nresult):
2709     """Verifies out of band functionality of a node.
2710
2711     @type ninfo: L{objects.Node}
2712     @param ninfo: the node to check
2713     @param nresult: the remote results for the node
2714
2715     """
2716     node = ninfo.name
2717     # We just have to verify the paths on master and/or master candidates
2718     # as the oob helper is invoked on the master
2719     if ((ninfo.master_candidate or ninfo.master_capable) and
2720         constants.NV_OOB_PATHS in nresult):
2721       for path_result in nresult[constants.NV_OOB_PATHS]:
2722         self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
2723
2724   def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2725     """Verifies and updates the node volume data.
2726
2727     This function will update a L{NodeImage}'s internal structures
2728     with data from the remote call.
2729
2730     @type ninfo: L{objects.Node}
2731     @param ninfo: the node to check
2732     @param nresult: the remote results for the node
2733     @param nimg: the node image object
2734     @param vg_name: the configured VG name
2735
2736     """
2737     node = ninfo.name
2738     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2739
2740     nimg.lvm_fail = True
2741     lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2742     if vg_name is None:
2743       pass
2744     elif isinstance(lvdata, basestring):
2745       _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
2746                utils.SafeEncode(lvdata))
2747     elif not isinstance(lvdata, dict):
2748       _ErrorIf(True, constants.CV_ENODELVM, node,
2749                "rpc call to node failed (lvlist)")
2750     else:
2751       nimg.volumes = lvdata
2752       nimg.lvm_fail = False
2753
2754   def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2755     """Verifies and updates the node instance list.
2756
2757     If the listing was successful, then updates this node's instance
2758     list. Otherwise, it marks the RPC call as failed for the instance
2759     list key.
2760
2761     @type ninfo: L{objects.Node}
2762     @param ninfo: the node to check
2763     @param nresult: the remote results for the node
2764     @param nimg: the node image object
2765
2766     """
2767     idata = nresult.get(constants.NV_INSTANCELIST, None)
2768     test = not isinstance(idata, list)
2769     self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
2770                   "rpc call to node failed (instancelist): %s",
2771                   utils.SafeEncode(str(idata)))
2772     if test:
2773       nimg.hyp_fail = True
2774     else:
2775       nimg.instances = idata
2776
2777   def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2778     """Verifies and computes a node information map
2779
2780     @type ninfo: L{objects.Node}
2781     @param ninfo: the node to check
2782     @param nresult: the remote results for the node
2783     @param nimg: the node image object
2784     @param vg_name: the configured VG name
2785
2786     """
2787     node = ninfo.name
2788     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2789
2790     # try to read free memory (from the hypervisor)
2791     hv_info = nresult.get(constants.NV_HVINFO, None)
2792     test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2793     _ErrorIf(test, constants.CV_ENODEHV, node,
2794              "rpc call to node failed (hvinfo)")
2795     if not test:
2796       try:
2797         nimg.mfree = int(hv_info["memory_free"])
2798       except (ValueError, TypeError):
2799         _ErrorIf(True, constants.CV_ENODERPC, node,
2800                  "node returned invalid nodeinfo, check hypervisor")
2801
2802     # FIXME: devise a free space model for file based instances as well
2803     if vg_name is not None:
2804       test = (constants.NV_VGLIST not in nresult or
2805               vg_name not in nresult[constants.NV_VGLIST])
2806       _ErrorIf(test, constants.CV_ENODELVM, node,
2807                "node didn't return data for the volume group '%s'"
2808                " - it is either missing or broken", vg_name)
2809       if not test:
2810         try:
2811           nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2812         except (ValueError, TypeError):
2813           _ErrorIf(True, constants.CV_ENODERPC, node,
2814                    "node returned invalid LVM info, check LVM status")
2815
2816   def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2817     """Gets per-disk status information for all instances.
2818
2819     @type nodelist: list of strings
2820     @param nodelist: Node names
2821     @type node_image: dict of (name, L{objects.Node})
2822     @param node_image: Node objects
2823     @type instanceinfo: dict of (name, L{objects.Instance})
2824     @param instanceinfo: Instance objects
2825     @rtype: {instance: {node: [(succes, payload)]}}
2826     @return: a dictionary of per-instance dictionaries with nodes as
2827         keys and disk information as values; the disk information is a
2828         list of tuples (success, payload)
2829
2830     """
2831     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2832
2833     node_disks = {}
2834     node_disks_devonly = {}
2835     diskless_instances = set()
2836     diskless = constants.DT_DISKLESS
2837
2838     for nname in nodelist:
2839       node_instances = list(itertools.chain(node_image[nname].pinst,
2840                                             node_image[nname].sinst))
2841       diskless_instances.update(inst for inst in node_instances
2842                                 if instanceinfo[inst].disk_template == diskless)
2843       disks = [(inst, disk)
2844                for inst in node_instances
2845                for disk in instanceinfo[inst].disks]
2846
2847       if not disks:
2848         # No need to collect data
2849         continue
2850
2851       node_disks[nname] = disks
2852
2853       # Creating copies as SetDiskID below will modify the objects and that can
2854       # lead to incorrect data returned from nodes
2855       devonly = [dev.Copy() for (_, dev) in disks]
2856
2857       for dev in devonly:
2858         self.cfg.SetDiskID(dev, nname)
2859
2860       node_disks_devonly[nname] = devonly
2861
2862     assert len(node_disks) == len(node_disks_devonly)
2863
2864     # Collect data from all nodes with disks
2865     result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2866                                                           node_disks_devonly)
2867
2868     assert len(result) == len(node_disks)
2869
2870     instdisk = {}
2871
2872     for (nname, nres) in result.items():
2873       disks = node_disks[nname]
2874
2875       if nres.offline:
2876         # No data from this node
2877         data = len(disks) * [(False, "node offline")]
2878       else:
2879         msg = nres.fail_msg
2880         _ErrorIf(msg, constants.CV_ENODERPC, nname,
2881                  "while getting disk information: %s", msg)
2882         if msg:
2883           # No data from this node
2884           data = len(disks) * [(False, msg)]
2885         else:
2886           data = []
2887           for idx, i in enumerate(nres.payload):
2888             if isinstance(i, (tuple, list)) and len(i) == 2:
2889               data.append(i)
2890             else:
2891               logging.warning("Invalid result from node %s, entry %d: %s",
2892                               nname, idx, i)
2893               data.append((False, "Invalid result from the remote node"))
2894
2895       for ((inst, _), status) in zip(disks, data):
2896         instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2897
2898     # Add empty entries for diskless instances.
2899     for inst in diskless_instances:
2900       assert inst not in instdisk
2901       instdisk[inst] = {}
2902
2903     assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2904                       len(nnames) <= len(instanceinfo[inst].all_nodes) and
2905                       compat.all(isinstance(s, (tuple, list)) and
2906                                  len(s) == 2 for s in statuses)
2907                       for inst, nnames in instdisk.items()
2908                       for nname, statuses in nnames.items())
2909     assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2910
2911     return instdisk
2912
2913   @staticmethod
2914   def _SshNodeSelector(group_uuid, all_nodes):
2915     """Create endless iterators for all potential SSH check hosts.
2916
2917     """
2918     nodes = [node for node in all_nodes
2919              if (node.group != group_uuid and
2920                  not node.offline)]
2921     keyfunc = operator.attrgetter("group")
2922
2923     return map(itertools.cycle,
2924                [sorted(map(operator.attrgetter("name"), names))
2925                 for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
2926                                                   keyfunc)])
2927
2928   @classmethod
2929   def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
2930     """Choose which nodes should talk to which other nodes.
2931
2932     We will make nodes contact all nodes in their group, and one node from
2933     every other group.
2934
2935     @warning: This algorithm has a known issue if one node group is much
2936       smaller than others (e.g. just one node). In such a case all other
2937       nodes will talk to the single node.
2938
2939     """
2940     online_nodes = sorted(node.name for node in group_nodes if not node.offline)
2941     sel = cls._SshNodeSelector(group_uuid, all_nodes)
2942
2943     return (online_nodes,
2944             dict((name, sorted([i.next() for i in sel]))
2945                  for name in online_nodes))
2946
2947   def BuildHooksEnv(self):
2948     """Build hooks env.
2949
2950     Cluster-Verify hooks just ran in the post phase and their failure makes
2951     the output be logged in the verify output and the verification to fail.
2952
2953     """
2954     env = {
2955       "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2956       }
2957
2958     env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
2959                for node in self.my_node_info.values())
2960
2961     return env
2962
2963   def BuildHooksNodes(self):
2964     """Build hooks nodes.
2965
2966     """
2967     return ([], self.my_node_names)
2968
2969   def Exec(self, feedback_fn):
2970     """Verify integrity of the node group, performing various test on nodes.
2971
2972     """
2973     # This method has too many local variables. pylint: disable=R0914
2974     feedback_fn("* Verifying group '%s'" % self.group_info.name)
2975
2976     if not self.my_node_names:
2977       # empty node group
2978       feedback_fn("* Empty node group, skipping verification")
2979       return True
2980
2981     self.bad = False
2982     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2983     verbose = self.op.verbose
2984     self._feedback_fn = feedback_fn
2985
2986     vg_name = self.cfg.GetVGName()
2987     drbd_helper = self.cfg.GetDRBDHelper()
2988     cluster = self.cfg.GetClusterInfo()
2989     groupinfo = self.cfg.GetAllNodeGroupsInfo()
2990     hypervisors = cluster.enabled_hypervisors
2991     node_data_list = [self.my_node_info[name] for name in self.my_node_names]
2992
2993     i_non_redundant = [] # Non redundant instances
2994     i_non_a_balanced = [] # Non auto-balanced instances
2995     i_offline = 0 # Count of offline instances
2996     n_offline = 0 # Count of offline nodes
2997     n_drained = 0 # Count of nodes being drained
2998     node_vol_should = {}
2999
3000     # FIXME: verify OS list
3001
3002     # File verification
3003     filemap = _ComputeAncillaryFiles(cluster, False)
3004
3005     # do local checksums
3006     master_node = self.master_node = self.cfg.GetMasterNode()
3007     master_ip = self.cfg.GetMasterIP()
3008
3009     feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
3010
3011     user_scripts = []
3012     if self.cfg.GetUseExternalMipScript():
3013       user_scripts.append(constants.EXTERNAL_MASTER_SETUP_SCRIPT)
3014
3015     node_verify_param = {
3016       constants.NV_FILELIST:
3017         utils.UniqueSequence(filename
3018                              for files in filemap
3019                              for filename in files),
3020       constants.NV_NODELIST:
3021         self._SelectSshCheckNodes(node_data_list, self.group_uuid,
3022                                   self.all_node_info.values()),
3023       constants.NV_HYPERVISOR: hypervisors,
3024       constants.NV_HVPARAMS:
3025         _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
3026       constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
3027                                  for node in node_data_list
3028                                  if not node.offline],
3029       constants.NV_INSTANCELIST: hypervisors,
3030       constants.NV_VERSION: None,
3031       constants.NV_HVINFO: self.cfg.GetHypervisorType(),
3032       constants.NV_NODESETUP: None,
3033       constants.NV_TIME: None,
3034       constants.NV_MASTERIP: (master_node, master_ip),
3035       constants.NV_OSLIST: None,
3036       constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
3037       constants.NV_USERSCRIPTS: user_scripts,
3038       }
3039
3040     if vg_name is not None:
3041       node_verify_param[constants.NV_VGLIST] = None
3042       node_verify_param[constants.NV_LVLIST] = vg_name
3043       node_verify_param[constants.NV_PVLIST] = [vg_name]
3044       node_verify_param[constants.NV_DRBDLIST] = None
3045
3046     if drbd_helper:
3047       node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
3048
3049     # bridge checks
3050     # FIXME: this needs to be changed per node-group, not cluster-wide
3051     bridges = set()
3052     default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
3053     if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3054       bridges.add(default_nicpp[constants.NIC_LINK])
3055     for instance in self.my_inst_info.values():
3056       for nic in instance.nics:
3057         full_nic = cluster.SimpleFillNIC(nic.nicparams)
3058         if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3059           bridges.add(full_nic[constants.NIC_LINK])
3060
3061     if bridges:
3062       node_verify_param[constants.NV_BRIDGES] = list(bridges)
3063
3064     # Build our expected cluster state
3065     node_image = dict((node.name, self.NodeImage(offline=node.offline,
3066                                                  name=node.name,
3067                                                  vm_capable=node.vm_capable))
3068                       for node in node_data_list)
3069
3070     # Gather OOB paths
3071     oob_paths = []
3072     for node in self.all_node_info.values():
3073       path = _SupportsOob(self.cfg, node)
3074       if path and path not in oob_paths:
3075         oob_paths.append(path)
3076
3077     if oob_paths:
3078       node_verify_param[constants.NV_OOB_PATHS] = oob_paths
3079
3080     for instance in self.my_inst_names:
3081       inst_config = self.my_inst_info[instance]
3082
3083       for nname in inst_config.all_nodes:
3084         if nname not in node_image:
3085           gnode = self.NodeImage(name=nname)
3086           gnode.ghost = (nname not in self.all_node_info)
3087           node_image[nname] = gnode
3088
3089       inst_config.MapLVsByNode(node_vol_should)
3090
3091       pnode = inst_config.primary_node
3092       node_image[pnode].pinst.append(instance)
3093
3094       for snode in inst_config.secondary_nodes:
3095         nimg = node_image[snode]
3096         nimg.sinst.append(instance)
3097         if pnode not in nimg.sbp:
3098           nimg.sbp[pnode] = []
3099         nimg.sbp[pnode].append(instance)
3100
3101     # At this point, we have the in-memory data structures complete,
3102     # except for the runtime information, which we'll gather next
3103
3104     # Due to the way our RPC system works, exact response times cannot be
3105     # guaranteed (e.g. a broken node could run into a timeout). By keeping the
3106     # time before and after executing the request, we can at least have a time
3107     # window.
3108     nvinfo_starttime = time.time()
3109     all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
3110                                            node_verify_param,
3111                                            self.cfg.GetClusterName())
3112     nvinfo_endtime = time.time()
3113
3114     if self.extra_lv_nodes and vg_name is not None:
3115       extra_lv_nvinfo = \
3116           self.rpc.call_node_verify(self.extra_lv_nodes,
3117                                     {constants.NV_LVLIST: vg_name},
3118                                     self.cfg.GetClusterName())
3119     else:
3120       extra_lv_nvinfo = {}
3121
3122     all_drbd_map = self.cfg.ComputeDRBDMap()
3123
3124     feedback_fn("* Gathering disk information (%s nodes)" %
3125                 len(self.my_node_names))
3126     instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
3127                                      self.my_inst_info)
3128
3129     feedback_fn("* Verifying configuration file consistency")
3130
3131     # If not all nodes are being checked, we need to make sure the master node
3132     # and a non-checked vm_capable node are in the list.
3133     absent_nodes = set(self.all_node_info).difference(self.my_node_info)
3134     if absent_nodes:
3135       vf_nvinfo = all_nvinfo.copy()
3136       vf_node_info = list(self.my_node_info.values())
3137       additional_nodes = []
3138       if master_node not in self.my_node_info:
3139         additional_nodes.append(master_node)
3140         vf_node_info.append(self.all_node_info[master_node])
3141       # Add the first vm_capable node we find which is not included
3142       for node in absent_nodes:
3143         nodeinfo = self.all_node_info[node]
3144         if nodeinfo.vm_capable and not nodeinfo.offline:
3145           additional_nodes.append(node)
3146           vf_node_info.append(self.all_node_info[node])
3147           break
3148       key = constants.NV_FILELIST
3149       vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
3150                                                  {key: node_verify_param[key]},
3151                                                  self.cfg.GetClusterName()))
3152     else:
3153       vf_nvinfo = all_nvinfo
3154       vf_node_info = self.my_node_info.values()
3155
3156     self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
3157
3158     feedback_fn("* Verifying node status")
3159
3160     refos_img = None
3161
3162     for node_i in node_data_list:
3163       node = node_i.name
3164       nimg = node_image[node]
3165
3166       if node_i.offline:
3167         if verbose:
3168           feedback_fn("* Skipping offline node %s" % (node,))
3169         n_offline += 1
3170         continue
3171
3172       if node == master_node:
3173         ntype = "master"
3174       elif node_i.master_candidate:
3175         ntype = "master candidate"
3176       elif node_i.drained:
3177         ntype = "drained"
3178         n_drained += 1
3179       else:
3180         ntype = "regular"
3181       if verbose:
3182         feedback_fn("* Verifying node %s (%s)" % (node, ntype))
3183
3184       msg = all_nvinfo[node].fail_msg
3185       _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
3186                msg)
3187       if msg:
3188         nimg.rpc_fail = True
3189         continue
3190
3191       nresult = all_nvinfo[node].payload
3192
3193       nimg.call_ok = self._VerifyNode(node_i, nresult)
3194       self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
3195       self._VerifyNodeNetwork(node_i, nresult)
3196       self._VerifyNodeUserScripts(node_i, nresult)
3197       self._VerifyOob(node_i, nresult)
3198
3199       if nimg.vm_capable:
3200         self._VerifyNodeLVM(node_i, nresult, vg_name)
3201         self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
3202                              all_drbd_map)
3203
3204         self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
3205         self._UpdateNodeInstances(node_i, nresult, nimg)
3206         self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
3207         self._UpdateNodeOS(node_i, nresult, nimg)
3208
3209         if not nimg.os_fail:
3210           if refos_img is None:
3211             refos_img = nimg
3212           self._VerifyNodeOS(node_i, nimg, refos_img)
3213         self._VerifyNodeBridges(node_i, nresult, bridges)
3214
3215         # Check whether all running instancies are primary for the node. (This
3216         # can no longer be done from _VerifyInstance below, since some of the
3217         # wrong instances could be from other node groups.)
3218         non_primary_inst = set(nimg.instances).difference(nimg.pinst)
3219
3220         for inst in non_primary_inst:
3221           # FIXME: investigate best way to handle offline insts
3222           if inst.admin_state == constants.ADMINST_OFFLINE:
3223             if verbose:
3224               feedback_fn("* Skipping offline instance %s" % inst.name)
3225             i_offline += 1
3226             continue
3227           test = inst in self.all_inst_info
3228           _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
3229                    "instance should not run on node %s", node_i.name)
3230           _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
3231                    "node is running unknown instance %s", inst)
3232
3233     for node, result in extra_lv_nvinfo.items():
3234       self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
3235                               node_image[node], vg_name)
3236
3237     feedback_fn("* Verifying instance status")
3238     for instance in self.my_inst_names:
3239       if verbose:
3240         feedback_fn("* Verifying instance %s" % instance)
3241       inst_config = self.my_inst_info[instance]
3242       self._VerifyInstance(instance, inst_config, node_image,
3243                            instdisk[instance])
3244       inst_nodes_offline = []
3245
3246       pnode = inst_config.primary_node
3247       pnode_img = node_image[pnode]
3248       _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
3249                constants.CV_ENODERPC, pnode, "instance %s, connection to"
3250                " primary node failed", instance)
3251
3252       _ErrorIf(inst_config.admin_state == constants.ADMINST_UP and
3253                pnode_img.offline,
3254                constants.CV_EINSTANCEBADNODE, instance,
3255                "instance is marked as running and lives on offline node %s",
3256                inst_config.primary_node)
3257
3258       # If the instance is non-redundant we cannot survive losing its primary
3259       # node, so we are not N+1 compliant. On the other hand we have no disk
3260       # templates with more than one secondary so that situation is not well
3261       # supported either.
3262       # FIXME: does not support file-backed instances
3263       if not inst_config.secondary_nodes:
3264         i_non_redundant.append(instance)
3265
3266       _ErrorIf(len(inst_config.secondary_nodes) > 1,
3267                constants.CV_EINSTANCELAYOUT,
3268                instance, "instance has multiple secondary nodes: %s",
3269                utils.CommaJoin(inst_config.secondary_nodes),
3270                code=self.ETYPE_WARNING)
3271
3272       if inst_config.disk_template in constants.DTS_INT_MIRROR:
3273         pnode = inst_config.primary_node
3274         instance_nodes = utils.NiceSort(inst_config.all_nodes)
3275         instance_groups = {}
3276
3277         for node in instance_nodes:
3278           instance_groups.setdefault(self.all_node_info[node].group,
3279                                      []).append(node)
3280
3281         pretty_list = [
3282           "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
3283           # Sort so that we always list the primary node first.
3284           for group, nodes in sorted(instance_groups.items(),
3285                                      key=lambda (_, nodes): pnode in nodes,
3286                                      reverse=True)]
3287
3288         self._ErrorIf(len(instance_groups) > 1,
3289                       constants.CV_EINSTANCESPLITGROUPS,
3290                       instance, "instance has primary and secondary nodes in"
3291                       " different groups: %s", utils.CommaJoin(pretty_list),
3292                       code=self.ETYPE_WARNING)
3293
3294       if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
3295         i_non_a_balanced.append(instance)
3296
3297       for snode in inst_config.secondary_nodes:
3298         s_img = node_image[snode]
3299         _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
3300                  snode, "instance %s, connection to secondary node failed",
3301                  instance)
3302
3303         if s_img.offline:
3304           inst_nodes_offline.append(snode)
3305
3306       # warn that the instance lives on offline nodes
3307       _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
3308                "instance has offline secondary node(s) %s",
3309                utils.CommaJoin(inst_nodes_offline))
3310       # ... or ghost/non-vm_capable nodes
3311       for node in inst_config.all_nodes:
3312         _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
3313                  instance, "instance lives on ghost node %s", node)
3314         _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
3315                  instance, "instance lives on non-vm_capable node %s", node)
3316
3317     feedback_fn("* Verifying orphan volumes")
3318     reserved = utils.FieldSet(*cluster.reserved_lvs)
3319
3320     # We will get spurious "unknown volume" warnings if any node of this group
3321     # is secondary for an instance whose primary is in another group. To avoid
3322     # them, we find these instances and add their volumes to node_vol_should.
3323     for inst in self.all_inst_info.values():
3324       for secondary in inst.secondary_nodes:
3325         if (secondary in self.my_node_info
3326             and inst.name not in self.my_inst_info):
3327           inst.MapLVsByNode(node_vol_should)
3328           break
3329
3330     self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
3331
3332     if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
3333       feedback_fn("* Verifying N+1 Memory redundancy")
3334       self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
3335
3336     feedback_fn("* Other Notes")
3337     if i_non_redundant:
3338       feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
3339                   % len(i_non_redundant))
3340
3341     if i_non_a_balanced:
3342       feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
3343                   % len(i_non_a_balanced))
3344
3345     if i_offline:
3346       feedback_fn("  - NOTICE: %d offline instance(s) found." % i_offline)
3347
3348     if n_offline:
3349       feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
3350
3351     if n_drained:
3352       feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
3353
3354     return not self.bad
3355
3356   def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
3357     """Analyze the post-hooks' result
3358
3359     This method analyses the hook result, handles it, and sends some
3360     nicely-formatted feedback back to the user.
3361
3362     @param phase: one of L{constants.HOOKS_PHASE_POST} or
3363         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
3364     @param hooks_results: the results of the multi-node hooks rpc call
3365     @param feedback_fn: function used send feedback back to the caller
3366     @param lu_result: previous Exec result
3367     @return: the new Exec result, based on the previous result
3368         and hook results
3369
3370     """
3371     # We only really run POST phase hooks, only for non-empty groups,
3372     # and are only interested in their results
3373     if not self.my_node_names:
3374       # empty node group
3375       pass
3376     elif phase == constants.HOOKS_PHASE_POST:
3377       # Used to change hooks' output to proper indentation
3378       feedback_fn("* Hooks Results")
3379       assert hooks_results, "invalid result from hooks"
3380
3381       for node_name in hooks_results:
3382         res = hooks_results[node_name]
3383         msg = res.fail_msg
3384         test = msg and not res.offline
3385         self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3386                       "Communication failure in hooks execution: %s", msg)
3387         if res.offline or msg:
3388           # No need to investigate payload if node is offline or gave
3389           # an error.
3390           continue
3391         for script, hkr, output in res.payload:
3392           test = hkr == constants.HKR_FAIL
3393           self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3394                         "Script %s failed, output:", script)
3395           if test:
3396             output = self._HOOKS_INDENT_RE.sub("      ", output)
3397             feedback_fn("%s" % output)
3398             lu_result = False
3399
3400     return lu_result
3401
3402
3403 class LUClusterVerifyDisks(NoHooksLU):
3404   """Verifies the cluster disks status.
3405
3406   """
3407   REQ_BGL = False
3408
3409   def ExpandNames(self):
3410     self.share_locks = _ShareAll()
3411     self.needed_locks = {
3412       locking.LEVEL_NODEGROUP: locking.ALL_SET,
3413       }
3414
3415   def Exec(self, feedback_fn):
3416     group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3417
3418     # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3419     return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3420                            for group in group_names])
3421
3422
3423 class LUGroupVerifyDisks(NoHooksLU):
3424   """Verifies the status of all disks in a node group.
3425
3426   """
3427   REQ_BGL = False
3428
3429   def ExpandNames(self):
3430     # Raises errors.OpPrereqError on its own if group can't be found
3431     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3432
3433     self.share_locks = _ShareAll()
3434     self.needed_locks = {
3435       locking.LEVEL_INSTANCE: [],
3436       locking.LEVEL_NODEGROUP: [],
3437       locking.LEVEL_NODE: [],
3438       }
3439
3440   def DeclareLocks(self, level):
3441     if level == locking.LEVEL_INSTANCE:
3442       assert not self.needed_locks[locking.LEVEL_INSTANCE]
3443
3444       # Lock instances optimistically, needs verification once node and group
3445       # locks have been acquired
3446       self.needed_locks[locking.LEVEL_INSTANCE] = \
3447         self.cfg.GetNodeGroupInstances(self.group_uuid)
3448
3449     elif level == locking.LEVEL_NODEGROUP:
3450       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3451
3452       self.needed_locks[locking.LEVEL_NODEGROUP] = \
3453         set([self.group_uuid] +
3454             # Lock all groups used by instances optimistically; this requires
3455             # going via the node before it's locked, requiring verification
3456             # later on
3457             [group_uuid
3458              for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3459              for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3460
3461     elif level == locking.LEVEL_NODE:
3462       # This will only lock the nodes in the group to be verified which contain
3463       # actual instances
3464       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3465       self._LockInstancesNodes()
3466
3467       # Lock all nodes in group to be verified
3468       assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3469       member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3470       self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3471
3472   def CheckPrereq(self):
3473     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3474     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3475     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3476
3477     assert self.group_uuid in owned_groups
3478
3479     # Check if locked instances are still correct
3480     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3481
3482     # Get instance information
3483     self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3484
3485     # Check if node groups for locked instances are still correct
3486     for (instance_name, inst) in self.instances.items():
3487       assert owned_nodes.issuperset(inst.all_nodes), \
3488         "Instance %s's nodes changed while we kept the lock" % instance_name
3489
3490       inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
3491                                              owned_groups)
3492
3493       assert self.group_uuid in inst_groups, \
3494         "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
3495
3496   def Exec(self, feedback_fn):
3497     """Verify integrity of cluster disks.
3498
3499     @rtype: tuple of three items
3500     @return: a tuple of (dict of node-to-node_error, list of instances
3501         which need activate-disks, dict of instance: (node, volume) for
3502         missing volumes
3503
3504     """
3505     res_nodes = {}
3506     res_instances = set()
3507     res_missing = {}
3508
3509     nv_dict = _MapInstanceDisksToNodes([inst
3510             for inst in self.instances.values()
3511             if inst.admin_state == constants.ADMINST_UP])
3512
3513     if nv_dict:
3514       nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3515                              set(self.cfg.GetVmCapableNodeList()))
3516
3517       node_lvs = self.rpc.call_lv_list(nodes, [])
3518
3519       for (node, node_res) in node_lvs.items():
3520         if node_res.offline:
3521           continue
3522
3523         msg = node_res.fail_msg
3524         if msg:
3525           logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3526           res_nodes[node] = msg
3527           continue
3528
3529         for lv_name, (_, _, lv_online) in node_res.payload.items():
3530           inst = nv_dict.pop((node, lv_name), None)
3531           if not (lv_online or inst is None):
3532             res_instances.add(inst)
3533
3534       # any leftover items in nv_dict are missing LVs, let's arrange the data
3535       # better
3536       for key, inst in nv_dict.iteritems():
3537         res_missing.setdefault(inst, []).append(list(key))
3538
3539     return (res_nodes, list(res_instances), res_missing)
3540
3541
3542 class LUClusterRepairDiskSizes(NoHooksLU):
3543   """Verifies the cluster disks sizes.
3544
3545   """
3546   REQ_BGL = False
3547
3548   def ExpandNames(self):
3549     if self.op.instances:
3550       self.wanted_names = _GetWantedInstances(self, self.op.instances)
3551       self.needed_locks = {
3552         locking.LEVEL_NODE_RES: [],
3553         locking.LEVEL_INSTANCE: self.wanted_names,
3554         }
3555       self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
3556     else:
3557       self.wanted_names = None
3558       self.needed_locks = {
3559         locking.LEVEL_NODE_RES: locking.ALL_SET,
3560         locking.LEVEL_INSTANCE: locking.ALL_SET,
3561         }
3562     self.share_locks = {
3563       locking.LEVEL_NODE_RES: 1,
3564       locking.LEVEL_INSTANCE: 0,
3565       }
3566
3567   def DeclareLocks(self, level):
3568     if level == locking.LEVEL_NODE_RES and self.wanted_names is not None:
3569       self._LockInstancesNodes(primary_only=True, level=level)
3570
3571   def CheckPrereq(self):
3572     """Check prerequisites.
3573
3574     This only checks the optional instance list against the existing names.
3575
3576     """
3577     if self.wanted_names is None:
3578       self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3579
3580     self.wanted_instances = \
3581         map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3582
3583   def _EnsureChildSizes(self, disk):
3584     """Ensure children of the disk have the needed disk size.
3585
3586     This is valid mainly for DRBD8 and fixes an issue where the
3587     children have smaller disk size.
3588
3589     @param disk: an L{ganeti.objects.Disk} object
3590
3591     """
3592     if disk.dev_type == constants.LD_DRBD8:
3593       assert disk.children, "Empty children for DRBD8?"
3594       fchild = disk.children[0]
3595       mismatch = fchild.size < disk.size
3596       if mismatch:
3597         self.LogInfo("Child disk has size %d, parent %d, fixing",
3598                      fchild.size, disk.size)
3599         fchild.size = disk.size
3600
3601       # and we recurse on this child only, not on the metadev
3602       return self._EnsureChildSizes(fchild) or mismatch
3603     else:
3604       return False
3605
3606   def Exec(self, feedback_fn):
3607     """Verify the size of cluster disks.
3608
3609     """
3610     # TODO: check child disks too
3611     # TODO: check differences in size between primary/secondary nodes
3612     per_node_disks = {}
3613     for instance in self.wanted_instances:
3614       pnode = instance.primary_node
3615       if pnode not in per_node_disks:
3616         per_node_disks[pnode] = []
3617       for idx, disk in enumerate(instance.disks):
3618         per_node_disks[pnode].append((instance, idx, disk))
3619
3620     assert not (frozenset(per_node_disks.keys()) -
3621                 self.owned_locks(locking.LEVEL_NODE_RES)), \
3622       "Not owning correct locks"
3623     assert not self.owned_locks(locking.LEVEL_NODE)
3624
3625     changed = []
3626     for node, dskl in per_node_disks.items():
3627       newl = [v[2].Copy() for v in dskl]
3628       for dsk in newl:
3629         self.cfg.SetDiskID(dsk, node)
3630       result = self.rpc.call_blockdev_getsize(node, newl)
3631       if result.fail_msg:
3632         self.LogWarning("Failure in blockdev_getsize call to node"
3633                         " %s, ignoring", node)
3634         continue
3635       if len(result.payload) != len(dskl):
3636         logging.warning("Invalid result from node %s: len(dksl)=%d,"
3637                         " result.payload=%s", node, len(dskl), result.payload)
3638         self.LogWarning("Invalid result from node %s, ignoring node results",
3639                         node)
3640         continue
3641       for ((instance, idx, disk), size) in zip(dskl, result.payload):
3642         if size is None:
3643           self.LogWarning("Disk %d of instance %s did not return size"
3644                           " information, ignoring", idx, instance.name)
3645           continue
3646         if not isinstance(size, (int, long)):
3647           self.LogWarning("Disk %d of instance %s did not return valid"
3648                           " size information, ignoring", idx, instance.name)
3649           continue
3650         size = size >> 20
3651         if size != disk.size:
3652           self.LogInfo("Disk %d of instance %s has mismatched size,"
3653                        " correcting: recorded %d, actual %d", idx,
3654                        instance.name, disk.size, size)
3655           disk.size = size
3656           self.cfg.Update(instance, feedback_fn)
3657           changed.append((instance.name, idx, size))
3658         if self._EnsureChildSizes(disk):
3659           self.cfg.Update(instance, feedback_fn)
3660           changed.append((instance.name, idx, disk.size))
3661     return changed
3662
3663
3664 class LUClusterRename(LogicalUnit):
3665   """Rename the cluster.
3666
3667   """
3668   HPATH = "cluster-rename"
3669   HTYPE = constants.HTYPE_CLUSTER
3670
3671   def BuildHooksEnv(self):
3672     """Build hooks env.
3673
3674     """
3675     return {
3676       "OP_TARGET": self.cfg.GetClusterName(),
3677       "NEW_NAME": self.op.name,
3678       }
3679
3680   def BuildHooksNodes(self):
3681     """Build hooks nodes.
3682
3683     """
3684     return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3685
3686   def CheckPrereq(self):
3687     """Verify that the passed name is a valid one.
3688
3689     """
3690     hostname = netutils.GetHostname(name=self.op.name,
3691                                     family=self.cfg.GetPrimaryIPFamily())
3692
3693     new_name = hostname.name
3694     self.ip = new_ip = hostname.ip
3695     old_name = self.cfg.GetClusterName()
3696     old_ip = self.cfg.GetMasterIP()
3697     if new_name == old_name and new_ip == old_ip:
3698       raise errors.OpPrereqError("Neither the name nor the IP address of the"
3699                                  " cluster has changed",
3700                                  errors.ECODE_INVAL)
3701     if new_ip != old_ip:
3702       if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3703         raise errors.OpPrereqError("The given cluster IP address (%s) is"
3704                                    " reachable on the network" %
3705                                    new_ip, errors.ECODE_NOTUNIQUE)
3706
3707     self.op.name = new_name
3708
3709   def Exec(self, feedback_fn):
3710     """Rename the cluster.
3711
3712     """
3713     clustername = self.op.name
3714     new_ip = self.ip
3715
3716     # shutdown the master IP
3717     master_params = self.cfg.GetMasterNetworkParameters()
3718     ems = self.cfg.GetUseExternalMipScript()
3719     result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3720                                                      master_params, ems)
3721     result.Raise("Could not disable the master role")
3722
3723     try:
3724       cluster = self.cfg.GetClusterInfo()
3725       cluster.cluster_name = clustername
3726       cluster.master_ip = new_ip
3727       self.cfg.Update(cluster, feedback_fn)
3728
3729       # update the known hosts file
3730       ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3731       node_list = self.cfg.GetOnlineNodeList()
3732       try:
3733         node_list.remove(master_params.name)
3734       except ValueError:
3735         pass
3736       _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3737     finally:
3738       master_params.ip = new_ip
3739       result = self.rpc.call_node_activate_master_ip(master_params.name,
3740                                                      master_params, ems)
3741       msg = result.fail_msg
3742       if msg:
3743         self.LogWarning("Could not re-enable the master role on"
3744                         " the master, please restart manually: %s", msg)
3745
3746     return clustername
3747
3748
3749 def _ValidateNetmask(cfg, netmask):
3750   """Checks if a netmask is valid.
3751
3752   @type cfg: L{config.ConfigWriter}
3753   @param cfg: The cluster configuration
3754   @type netmask: int
3755   @param netmask: the netmask to be verified
3756   @raise errors.OpPrereqError: if the validation fails
3757
3758   """
3759   ip_family = cfg.GetPrimaryIPFamily()
3760   try:
3761     ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
3762   except errors.ProgrammerError:
3763     raise errors.OpPrereqError("Invalid primary ip family: %s." %
3764                                ip_family)
3765   if not ipcls.ValidateNetmask(netmask):
3766     raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
3767                                 (netmask))
3768
3769
3770 class LUClusterSetParams(LogicalUnit):
3771   """Change the parameters of the cluster.
3772
3773   """
3774   HPATH = "cluster-modify"
3775   HTYPE = constants.HTYPE_CLUSTER
3776   REQ_BGL = False
3777
3778   def CheckArguments(self):
3779     """Check parameters
3780
3781     """
3782     if self.op.uid_pool:
3783       uidpool.CheckUidPool(self.op.uid_pool)
3784
3785     if self.op.add_uids:
3786       uidpool.CheckUidPool(self.op.add_uids)
3787
3788     if self.op.remove_uids:
3789       uidpool.CheckUidPool(self.op.remove_uids)
3790
3791     if self.op.master_netmask is not None:
3792       _ValidateNetmask(self.cfg, self.op.master_netmask)
3793
3794     if self.op.diskparams:
3795       for dt_params in self.op.diskparams.values():
3796         utils.ForceDictType(dt_params, constants.DISK_DT_TYPES)
3797
3798   def ExpandNames(self):
3799     # FIXME: in the future maybe other cluster params won't require checking on
3800     # all nodes to be modified.
3801     self.needed_locks = {
3802       locking.LEVEL_NODE: locking.ALL_SET,
3803       locking.LEVEL_INSTANCE: locking.ALL_SET,
3804       locking.LEVEL_NODEGROUP: locking.ALL_SET,
3805     }
3806     self.share_locks = {
3807         locking.LEVEL_NODE: 1,
3808         locking.LEVEL_INSTANCE: 1,
3809         locking.LEVEL_NODEGROUP: 1,
3810     }
3811
3812   def BuildHooksEnv(self):
3813     """Build hooks env.
3814
3815     """
3816     return {
3817       "OP_TARGET": self.cfg.GetClusterName(),
3818       "NEW_VG_NAME": self.op.vg_name,
3819       }
3820
3821   def BuildHooksNodes(self):
3822     """Build hooks nodes.
3823
3824     """
3825     mn = self.cfg.GetMasterNode()
3826     return ([mn], [mn])
3827
3828   def CheckPrereq(self):
3829     """Check prerequisites.
3830
3831     This checks whether the given params don't conflict and
3832     if the given volume group is valid.
3833
3834     """
3835     if self.op.vg_name is not None and not self.op.vg_name:
3836       if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3837         raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3838                                    " instances exist", errors.ECODE_INVAL)
3839
3840     if self.op.drbd_helper is not None and not self.op.drbd_helper:
3841       if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3842         raise errors.OpPrereqError("Cannot disable drbd helper while"
3843                                    " drbd-based instances exist",
3844                                    errors.ECODE_INVAL)
3845
3846     node_list = self.owned_locks(locking.LEVEL_NODE)
3847
3848     # if vg_name not None, checks given volume group on all nodes
3849     if self.op.vg_name:
3850       vglist = self.rpc.call_vg_list(node_list)
3851       for node in node_list:
3852         msg = vglist[node].fail_msg
3853         if msg:
3854           # ignoring down node
3855           self.LogWarning("Error while gathering data on node %s"
3856                           " (ignoring node): %s", node, msg)
3857           continue
3858         vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3859                                               self.op.vg_name,
3860                                               constants.MIN_VG_SIZE)
3861         if vgstatus:
3862           raise errors.OpPrereqError("Error on node '%s': %s" %
3863                                      (node, vgstatus), errors.ECODE_ENVIRON)
3864
3865     if self.op.drbd_helper:
3866       # checks given drbd helper on all nodes
3867       helpers = self.rpc.call_drbd_helper(node_list)
3868       for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
3869         if ninfo.offline:
3870           self.LogInfo("Not checking drbd helper on offline node %s", node)
3871           continue
3872         msg = helpers[node].fail_msg
3873         if msg:
3874           raise errors.OpPrereqError("Error checking drbd helper on node"
3875                                      " '%s': %s" % (node, msg),
3876                                      errors.ECODE_ENVIRON)
3877         node_helper = helpers[node].payload
3878         if node_helper != self.op.drbd_helper:
3879           raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3880                                      (node, node_helper), errors.ECODE_ENVIRON)
3881
3882     self.cluster = cluster = self.cfg.GetClusterInfo()
3883     # validate params changes
3884     if self.op.beparams:
3885       objects.UpgradeBeParams(self.op.beparams)
3886       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3887       self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3888
3889     if self.op.ndparams:
3890       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3891       self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3892
3893       # TODO: we need a more general way to handle resetting
3894       # cluster-level parameters to default values
3895       if self.new_ndparams["oob_program"] == "":
3896         self.new_ndparams["oob_program"] = \
3897             constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3898
3899     if self.op.hv_state:
3900       new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
3901                                             self.cluster.hv_state_static)
3902       self.new_hv_state = dict((hv, cluster.SimpleFillHvState(values))
3903                                for hv, values in new_hv_state.items())
3904
3905     if self.op.disk_state:
3906       new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state,
3907                                                 self.cluster.disk_state_static)
3908       self.new_disk_state = \
3909         dict((storage, dict((name, cluster.SimpleFillDiskState(values))
3910                             for name, values in svalues.items()))
3911              for storage, svalues in new_disk_state.items())
3912
3913     if self.op.ipolicy:
3914       self.new_ipolicy = _GetUpdatedIPolicy(cluster.ipolicy, self.op.ipolicy,
3915                                             group_policy=False)
3916
3917       all_instances = self.cfg.GetAllInstancesInfo().values()
3918       violations = set()
3919       for group in self.cfg.GetAllNodeGroupsInfo().values():
3920         instances = frozenset([inst for inst in all_instances
3921                                if compat.any(node in group.members
3922                                              for node in inst.all_nodes)])
3923         new_ipolicy = objects.FillIPolicy(self.new_ipolicy, group.ipolicy)
3924         new = _ComputeNewInstanceViolations(_CalculateGroupIPolicy(cluster,
3925                                                                    group),
3926                                             new_ipolicy, instances)
3927         if new:
3928           violations.update(new)
3929
3930       if violations:
3931         self.LogWarning("After the ipolicy change the following instances"
3932                         " violate them: %s",
3933                         utils.CommaJoin(violations))
3934
3935     if self.op.nicparams:
3936       utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
3937       self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
3938       objects.NIC.CheckParameterSyntax(self.new_nicparams)
3939       nic_errors = []
3940
3941       # check all instances for consistency
3942       for instance in self.cfg.GetAllInstancesInfo().values():
3943         for nic_idx, nic in enumerate(instance.nics):
3944           params_copy = copy.deepcopy(nic.nicparams)
3945           params_filled = objects.FillDict(self.new_nicparams, params_copy)
3946
3947           # check parameter syntax
3948           try:
3949             objects.NIC.CheckParameterSyntax(params_filled)
3950           except errors.ConfigurationError, err:
3951             nic_errors.append("Instance %s, nic/%d: %s" %
3952                               (instance.name, nic_idx, err))
3953
3954           # if we're moving instances to routed, check that they have an ip
3955           target_mode = params_filled[constants.NIC_MODE]
3956           if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
3957             nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
3958                               " address" % (instance.name, nic_idx))
3959       if nic_errors:
3960         raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
3961                                    "\n".join(nic_errors))
3962
3963     # hypervisor list/parameters
3964     self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
3965     if self.op.hvparams:
3966       for hv_name, hv_dict in self.op.hvparams.items():
3967         if hv_name not in self.new_hvparams:
3968           self.new_hvparams[hv_name] = hv_dict
3969         else:
3970           self.new_hvparams[hv_name].update(hv_dict)
3971
3972     # disk template parameters
3973     self.new_diskparams = objects.FillDict(cluster.diskparams, {})
3974     if self.op.diskparams:
3975       for dt_name, dt_params in self.op.diskparams.items():
3976         if dt_name not in self.op.diskparams:
3977           self.new_diskparams[dt_name] = dt_params
3978         else:
3979           self.new_diskparams[dt_name].update(dt_params)
3980
3981     # os hypervisor parameters
3982     self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
3983     if self.op.os_hvp:
3984       for os_name, hvs in self.op.os_hvp.items():
3985         if os_name not in self.new_os_hvp:
3986           self.new_os_hvp[os_name] = hvs
3987         else:
3988           for hv_name, hv_dict in hvs.items():
3989             if hv_name not in self.new_os_hvp[os_name]:
3990               self.new_os_hvp[os_name][hv_name] = hv_dict
3991             else:
3992               self.new_os_hvp[os_name][hv_name].update(hv_dict)
3993
3994     # os parameters
3995     self.new_osp = objects.FillDict(cluster.osparams, {})
3996     if self.op.osparams:
3997       for os_name, osp in self.op.osparams.items():
3998         if os_name not in self.new_osp:
3999           self.new_osp[os_name] = {}
4000
4001         self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
4002                                                   use_none=True)
4003
4004         if not self.new_osp[os_name]:
4005           # we removed all parameters
4006           del self.new_osp[os_name]
4007         else:
4008           # check the parameter validity (remote check)
4009           _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
4010                          os_name, self.new_osp[os_name])
4011
4012     # changes to the hypervisor list
4013     if self.op.enabled_hypervisors is not None:
4014       self.hv_list = self.op.enabled_hypervisors
4015       for hv in self.hv_list:
4016         # if the hypervisor doesn't already exist in the cluster
4017         # hvparams, we initialize it to empty, and then (in both
4018         # cases) we make sure to fill the defaults, as we might not
4019         # have a complete defaults list if the hypervisor wasn't
4020         # enabled before
4021         if hv not in new_hvp:
4022           new_hvp[hv] = {}
4023         new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
4024         utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
4025     else:
4026       self.hv_list = cluster.enabled_hypervisors
4027
4028     if self.op.hvparams or self.op.enabled_hypervisors is not None:
4029       # either the enabled list has changed, or the parameters have, validate
4030       for hv_name, hv_params in self.new_hvparams.items():
4031         if ((self.op.hvparams and hv_name in self.op.hvparams) or
4032             (self.op.enabled_hypervisors and
4033              hv_name in self.op.enabled_hypervisors)):
4034           # either this is a new hypervisor, or its parameters have changed
4035           hv_class = hypervisor.GetHypervisor(hv_name)
4036           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4037           hv_class.CheckParameterSyntax(hv_params)
4038           _CheckHVParams(self, node_list, hv_name, hv_params)
4039
4040     if self.op.os_hvp:
4041       # no need to check any newly-enabled hypervisors, since the
4042       # defaults have already been checked in the above code-block
4043       for os_name, os_hvp in self.new_os_hvp.items():
4044         for hv_name, hv_params in os_hvp.items():
4045           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4046           # we need to fill in the new os_hvp on top of the actual hv_p
4047           cluster_defaults = self.new_hvparams.get(hv_name, {})
4048           new_osp = objects.FillDict(cluster_defaults, hv_params)
4049           hv_class = hypervisor.GetHypervisor(hv_name)
4050           hv_class.CheckParameterSyntax(new_osp)
4051           _CheckHVParams(self, node_list, hv_name, new_osp)
4052
4053     if self.op.default_iallocator:
4054       alloc_script = utils.FindFile(self.op.default_iallocator,
4055                                     constants.IALLOCATOR_SEARCH_PATH,
4056                                     os.path.isfile)
4057       if alloc_script is None:
4058         raise errors.OpPrereqError("Invalid default iallocator script '%s'"
4059                                    " specified" % self.op.default_iallocator,
4060                                    errors.ECODE_INVAL)
4061
4062   def Exec(self, feedback_fn):
4063     """Change the parameters of the cluster.
4064
4065     """
4066     if self.op.vg_name is not None:
4067       new_volume = self.op.vg_name
4068       if not new_volume:
4069         new_volume = None
4070       if new_volume != self.cfg.GetVGName():
4071         self.cfg.SetVGName(new_volume)
4072       else:
4073         feedback_fn("Cluster LVM configuration already in desired"
4074                     " state, not changing")
4075     if self.op.drbd_helper is not None:
4076       new_helper = self.op.drbd_helper
4077       if not new_helper:
4078         new_helper = None
4079       if new_helper != self.cfg.GetDRBDHelper():
4080         self.cfg.SetDRBDHelper(new_helper)
4081       else:
4082         feedback_fn("Cluster DRBD helper already in desired state,"
4083                     " not changing")
4084     if self.op.hvparams:
4085       self.cluster.hvparams = self.new_hvparams
4086     if self.op.os_hvp:
4087       self.cluster.os_hvp = self.new_os_hvp
4088     if self.op.enabled_hypervisors is not None:
4089       self.cluster.hvparams = self.new_hvparams
4090       self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
4091     if self.op.beparams:
4092       self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
4093     if self.op.nicparams:
4094       self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
4095     if self.op.ipolicy:
4096       self.cluster.ipolicy = self.new_ipolicy
4097     if self.op.osparams:
4098       self.cluster.osparams = self.new_osp
4099     if self.op.ndparams:
4100       self.cluster.ndparams = self.new_ndparams
4101     if self.op.diskparams:
4102       self.cluster.diskparams = self.new_diskparams
4103     if self.op.hv_state:
4104       self.cluster.hv_state_static = self.new_hv_state
4105     if self.op.disk_state:
4106       self.cluster.disk_state_static = self.new_disk_state
4107
4108     if self.op.candidate_pool_size is not None:
4109       self.cluster.candidate_pool_size = self.op.candidate_pool_size
4110       # we need to update the pool size here, otherwise the save will fail
4111       _AdjustCandidatePool(self, [])
4112
4113     if self.op.maintain_node_health is not None:
4114       if self.op.maintain_node_health and not constants.ENABLE_CONFD:
4115         feedback_fn("Note: CONFD was disabled at build time, node health"
4116                     " maintenance is not useful (still enabling it)")
4117       self.cluster.maintain_node_health = self.op.maintain_node_health
4118
4119     if self.op.prealloc_wipe_disks is not None:
4120       self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
4121
4122     if self.op.add_uids is not None:
4123       uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
4124
4125     if self.op.remove_uids is not None:
4126       uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
4127
4128     if self.op.uid_pool is not None:
4129       self.cluster.uid_pool = self.op.uid_pool
4130
4131     if self.op.default_iallocator is not None:
4132       self.cluster.default_iallocator = self.op.default_iallocator
4133
4134     if self.op.reserved_lvs is not None:
4135       self.cluster.reserved_lvs = self.op.reserved_lvs
4136
4137     if self.op.use_external_mip_script is not None:
4138       self.cluster.use_external_mip_script = self.op.use_external_mip_script
4139
4140     def helper_os(aname, mods, desc):
4141       desc += " OS list"
4142       lst = getattr(self.cluster, aname)
4143       for key, val in mods:
4144         if key == constants.DDM_ADD:
4145           if val in lst:
4146             feedback_fn("OS %s already in %s, ignoring" % (val, desc))
4147           else:
4148             lst.append(val)
4149         elif key == constants.DDM_REMOVE:
4150           if val in lst:
4151             lst.remove(val)
4152           else:
4153             feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
4154         else:
4155           raise errors.ProgrammerError("Invalid modification '%s'" % key)
4156
4157     if self.op.hidden_os:
4158       helper_os("hidden_os", self.op.hidden_os, "hidden")
4159
4160     if self.op.blacklisted_os:
4161       helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
4162
4163     if self.op.master_netdev:
4164       master_params = self.cfg.GetMasterNetworkParameters()
4165       ems = self.cfg.GetUseExternalMipScript()
4166       feedback_fn("Shutting down master ip on the current netdev (%s)" %
4167                   self.cluster.master_netdev)
4168       result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4169                                                        master_params, ems)
4170       result.Raise("Could not disable the master ip")
4171       feedback_fn("Changing master_netdev from %s to %s" %
4172                   (master_params.netdev, self.op.master_netdev))
4173       self.cluster.master_netdev = self.op.master_netdev
4174
4175     if self.op.master_netmask:
4176       master_params = self.cfg.GetMasterNetworkParameters()
4177       feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
4178       result = self.rpc.call_node_change_master_netmask(master_params.name,
4179                                                         master_params.netmask,
4180                                                         self.op.master_netmask,
4181                                                         master_params.ip,
4182                                                         master_params.netdev)
4183       if result.fail_msg:
4184         msg = "Could not change the master IP netmask: %s" % result.fail_msg
4185         feedback_fn(msg)
4186
4187       self.cluster.master_netmask = self.op.master_netmask
4188
4189     self.cfg.Update(self.cluster, feedback_fn)
4190
4191     if self.op.master_netdev:
4192       master_params = self.cfg.GetMasterNetworkParameters()
4193       feedback_fn("Starting the master ip on the new master netdev (%s)" %
4194                   self.op.master_netdev)
4195       ems = self.cfg.GetUseExternalMipScript()
4196       result = self.rpc.call_node_activate_master_ip(master_params.name,
4197                                                      master_params, ems)
4198       if result.fail_msg:
4199         self.LogWarning("Could not re-enable the master ip on"
4200                         " the master, please restart manually: %s",
4201                         result.fail_msg)
4202
4203
4204 def _UploadHelper(lu, nodes, fname):
4205   """Helper for uploading a file and showing warnings.
4206
4207   """
4208   if os.path.exists(fname):
4209     result = lu.rpc.call_upload_file(nodes, fname)
4210     for to_node, to_result in result.items():
4211       msg = to_result.fail_msg
4212       if msg:
4213         msg = ("Copy of file %s to node %s failed: %s" %
4214                (fname, to_node, msg))
4215         lu.proc.LogWarning(msg)
4216
4217
4218 def _ComputeAncillaryFiles(cluster, redist):
4219   """Compute files external to Ganeti which need to be consistent.
4220
4221   @type redist: boolean
4222   @param redist: Whether to include files which need to be redistributed
4223
4224   """
4225   # Compute files for all nodes
4226   files_all = set([
4227     constants.SSH_KNOWN_HOSTS_FILE,
4228     constants.CONFD_HMAC_KEY,
4229     constants.CLUSTER_DOMAIN_SECRET_FILE,
4230     constants.SPICE_CERT_FILE,
4231     constants.SPICE_CACERT_FILE,
4232     constants.RAPI_USERS_FILE,
4233     ])
4234
4235   if not redist:
4236     files_all.update(constants.ALL_CERT_FILES)
4237     files_all.update(ssconf.SimpleStore().GetFileList())
4238   else:
4239     # we need to ship at least the RAPI certificate
4240     files_all.add(constants.RAPI_CERT_FILE)
4241
4242   if cluster.modify_etc_hosts:
4243     files_all.add(constants.ETC_HOSTS)
4244
4245   # Files which are optional, these must:
4246   # - be present in one other category as well
4247   # - either exist or not exist on all nodes of that category (mc, vm all)
4248   files_opt = set([
4249     constants.RAPI_USERS_FILE,
4250     ])
4251
4252   # Files which should only be on master candidates
4253   files_mc = set()
4254
4255   if not redist:
4256     files_mc.add(constants.CLUSTER_CONF_FILE)
4257
4258     # FIXME: this should also be replicated but Ganeti doesn't support files_mc
4259     # replication
4260     files_mc.add(constants.DEFAULT_MASTER_SETUP_SCRIPT)
4261
4262   # Files which should only be on VM-capable nodes
4263   files_vm = set(filename
4264     for hv_name in cluster.enabled_hypervisors
4265     for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[0])
4266
4267   files_opt |= set(filename
4268     for hv_name in cluster.enabled_hypervisors
4269     for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[1])
4270
4271   # Filenames in each category must be unique
4272   all_files_set = files_all | files_mc | files_vm
4273   assert (len(all_files_set) ==
4274           sum(map(len, [files_all, files_mc, files_vm]))), \
4275          "Found file listed in more than one file list"
4276
4277   # Optional files must be present in one other category
4278   assert all_files_set.issuperset(files_opt), \
4279          "Optional file not in a different required list"
4280
4281   return (files_all, files_opt, files_mc, files_vm)
4282
4283
4284 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
4285   """Distribute additional files which are part of the cluster configuration.
4286
4287   ConfigWriter takes care of distributing the config and ssconf files, but
4288   there are more files which should be distributed to all nodes. This function
4289   makes sure those are copied.
4290
4291   @param lu: calling logical unit
4292   @param additional_nodes: list of nodes not in the config to distribute to
4293   @type additional_vm: boolean
4294   @param additional_vm: whether the additional nodes are vm-capable or not
4295
4296   """
4297   # Gather target nodes
4298   cluster = lu.cfg.GetClusterInfo()
4299   master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
4300
4301   online_nodes = lu.cfg.GetOnlineNodeList()
4302   vm_nodes = lu.cfg.GetVmCapableNodeList()
4303
4304   if additional_nodes is not None:
4305     online_nodes.extend(additional_nodes)
4306     if additional_vm:
4307       vm_nodes.extend(additional_nodes)
4308
4309   # Never distribute to master node
4310   for nodelist in [online_nodes, vm_nodes]:
4311     if master_info.name in nodelist:
4312       nodelist.remove(master_info.name)
4313
4314   # Gather file lists
4315   (files_all, _, files_mc, files_vm) = \
4316     _ComputeAncillaryFiles(cluster, True)
4317
4318   # Never re-distribute configuration file from here
4319   assert not (constants.CLUSTER_CONF_FILE in files_all or
4320               constants.CLUSTER_CONF_FILE in files_vm)
4321   assert not files_mc, "Master candidates not handled in this function"
4322
4323   filemap = [
4324     (online_nodes, files_all),
4325     (vm_nodes, files_vm),
4326     ]
4327
4328   # Upload the files
4329   for (node_list, files) in filemap:
4330     for fname in files:
4331       _UploadHelper(lu, node_list, fname)
4332
4333
4334 class LUClusterRedistConf(NoHooksLU):
4335   """Force the redistribution of cluster configuration.
4336
4337   This is a very simple LU.
4338
4339   """
4340   REQ_BGL = False
4341
4342   def ExpandNames(self):
4343     self.needed_locks = {
4344       locking.LEVEL_NODE: locking.ALL_SET,
4345     }
4346     self.share_locks[locking.LEVEL_NODE] = 1
4347
4348   def Exec(self, feedback_fn):
4349     """Redistribute the configuration.
4350
4351     """
4352     self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
4353     _RedistributeAncillaryFiles(self)
4354
4355
4356 class LUClusterActivateMasterIp(NoHooksLU):
4357   """Activate the master IP on the master node.
4358
4359   """
4360   def Exec(self, feedback_fn):
4361     """Activate the master IP.
4362
4363     """
4364     master_params = self.cfg.GetMasterNetworkParameters()
4365     ems = self.cfg.GetUseExternalMipScript()
4366     result = self.rpc.call_node_activate_master_ip(master_params.name,
4367                                                    master_params, ems)
4368     result.Raise("Could not activate the master IP")
4369
4370
4371 class LUClusterDeactivateMasterIp(NoHooksLU):
4372   """Deactivate the master IP on the master node.
4373
4374   """
4375   def Exec(self, feedback_fn):
4376     """Deactivate the master IP.
4377
4378     """
4379     master_params = self.cfg.GetMasterNetworkParameters()
4380     ems = self.cfg.GetUseExternalMipScript()
4381     result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4382                                                      master_params, ems)
4383     result.Raise("Could not deactivate the master IP")
4384
4385
4386 def _WaitForSync(lu, instance, disks=None, oneshot=False):
4387   """Sleep and poll for an instance's disk to sync.
4388
4389   """
4390   if not instance.disks or disks is not None and not disks:
4391     return True
4392
4393   disks = _ExpandCheckDisks(instance, disks)
4394
4395   if not oneshot:
4396     lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
4397
4398   node = instance.primary_node
4399
4400   for dev in disks:
4401     lu.cfg.SetDiskID(dev, node)
4402
4403   # TODO: Convert to utils.Retry
4404
4405   retries = 0
4406   degr_retries = 10 # in seconds, as we sleep 1 second each time
4407   while True:
4408     max_time = 0
4409     done = True
4410     cumul_degraded = False
4411     rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
4412     msg = rstats.fail_msg
4413     if msg:
4414       lu.LogWarning("Can't get any data from node %s: %s", node, msg)
4415       retries += 1
4416       if retries >= 10:
4417         raise errors.RemoteError("Can't contact node %s for mirror data,"
4418                                  " aborting." % node)
4419       time.sleep(6)
4420       continue
4421     rstats = rstats.payload
4422     retries = 0
4423     for i, mstat in enumerate(rstats):
4424       if mstat is None:
4425         lu.LogWarning("Can't compute data for node %s/%s",
4426                            node, disks[i].iv_name)
4427         continue
4428
4429       cumul_degraded = (cumul_degraded or
4430                         (mstat.is_degraded and mstat.sync_percent is None))
4431       if mstat.sync_percent is not None:
4432         done = False
4433         if mstat.estimated_time is not None:
4434           rem_time = ("%s remaining (estimated)" %
4435                       utils.FormatSeconds(mstat.estimated_time))
4436           max_time = mstat.estimated_time
4437         else:
4438           rem_time = "no time estimate"
4439         lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
4440                         (disks[i].iv_name, mstat.sync_percent, rem_time))
4441
4442     # if we're done but degraded, let's do a few small retries, to
4443     # make sure we see a stable and not transient situation; therefore
4444     # we force restart of the loop
4445     if (done or oneshot) and cumul_degraded and degr_retries > 0:
4446       logging.info("Degraded disks found, %d retries left", degr_retries)
4447       degr_retries -= 1
4448       time.sleep(1)
4449       continue
4450
4451     if done or oneshot:
4452       break
4453
4454     time.sleep(min(60, max_time))
4455
4456   if done:
4457     lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
4458   return not cumul_degraded
4459
4460
4461 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
4462   """Check that mirrors are not degraded.
4463
4464   The ldisk parameter, if True, will change the test from the
4465   is_degraded attribute (which represents overall non-ok status for
4466   the device(s)) to the ldisk (representing the local storage status).
4467
4468   """
4469   lu.cfg.SetDiskID(dev, node)
4470
4471   result = True
4472
4473   if on_primary or dev.AssembleOnSecondary():
4474     rstats = lu.rpc.call_blockdev_find(node, dev)
4475     msg = rstats.fail_msg
4476     if msg:
4477       lu.LogWarning("Can't find disk on node %s: %s", node, msg)
4478       result = False
4479     elif not rstats.payload:
4480       lu.LogWarning("Can't find disk on node %s", node)
4481       result = False
4482     else:
4483       if ldisk:
4484         result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4485       else:
4486         result = result and not rstats.payload.is_degraded
4487
4488   if dev.children:
4489     for child in dev.children:
4490       result = result and _CheckDiskConsistency(lu, child, node, on_primary)
4491
4492   return result
4493
4494
4495 class LUOobCommand(NoHooksLU):
4496   """Logical unit for OOB handling.
4497
4498   """
4499   REG_BGL = False
4500   _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4501
4502   def ExpandNames(self):
4503     """Gather locks we need.
4504
4505     """
4506     if self.op.node_names:
4507       self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4508       lock_names = self.op.node_names
4509     else:
4510       lock_names = locking.ALL_SET
4511
4512     self.needed_locks = {
4513       locking.LEVEL_NODE: lock_names,
4514       }
4515
4516   def CheckPrereq(self):
4517     """Check prerequisites.
4518
4519     This checks:
4520      - the node exists in the configuration
4521      - OOB is supported
4522
4523     Any errors are signaled by raising errors.OpPrereqError.
4524
4525     """
4526     self.nodes = []
4527     self.master_node = self.cfg.GetMasterNode()
4528
4529     assert self.op.power_delay >= 0.0
4530
4531     if self.op.node_names:
4532       if (self.op.command in self._SKIP_MASTER and
4533           self.master_node in self.op.node_names):
4534         master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4535         master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4536
4537         if master_oob_handler:
4538           additional_text = ("run '%s %s %s' if you want to operate on the"
4539                              " master regardless") % (master_oob_handler,
4540                                                       self.op.command,
4541                                                       self.master_node)
4542         else:
4543           additional_text = "it does not support out-of-band operations"
4544
4545         raise errors.OpPrereqError(("Operating on the master node %s is not"
4546                                     " allowed for %s; %s") %
4547                                    (self.master_node, self.op.command,
4548                                     additional_text), errors.ECODE_INVAL)
4549     else:
4550       self.op.node_names = self.cfg.GetNodeList()
4551       if self.op.command in self._SKIP_MASTER:
4552         self.op.node_names.remove(self.master_node)
4553
4554     if self.op.command in self._SKIP_MASTER:
4555       assert self.master_node not in self.op.node_names
4556
4557     for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4558       if node is None:
4559         raise errors.OpPrereqError("Node %s not found" % node_name,
4560                                    errors.ECODE_NOENT)
4561       else:
4562         self.nodes.append(node)
4563
4564       if (not self.op.ignore_status and
4565           (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4566         raise errors.OpPrereqError(("Cannot power off node %s because it is"
4567                                     " not marked offline") % node_name,
4568                                    errors.ECODE_STATE)
4569
4570   def Exec(self, feedback_fn):
4571     """Execute OOB and return result if we expect any.
4572
4573     """
4574     master_node = self.master_node
4575     ret = []
4576
4577     for idx, node in enumerate(utils.NiceSort(self.nodes,
4578                                               key=lambda node: node.name)):
4579       node_entry = [(constants.RS_NORMAL, node.name)]
4580       ret.append(node_entry)
4581
4582       oob_program = _SupportsOob(self.cfg, node)
4583
4584       if not oob_program:
4585         node_entry.append((constants.RS_UNAVAIL, None))
4586         continue
4587
4588       logging.info("Executing out-of-band command '%s' using '%s' on %s",
4589                    self.op.command, oob_program, node.name)
4590       result = self.rpc.call_run_oob(master_node, oob_program,
4591                                      self.op.command, node.name,
4592                                      self.op.timeout)
4593
4594       if result.fail_msg:
4595         self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4596                         node.name, result.fail_msg)
4597         node_entry.append((constants.RS_NODATA, None))
4598       else:
4599         try:
4600           self._CheckPayload(result)
4601         except errors.OpExecError, err:
4602           self.LogWarning("Payload returned by node '%s' is not valid: %s",
4603                           node.name, err)
4604           node_entry.append((constants.RS_NODATA, None))
4605         else:
4606           if self.op.command == constants.OOB_HEALTH:
4607             # For health we should log important events
4608             for item, status in result.payload:
4609               if status in [constants.OOB_STATUS_WARNING,
4610                             constants.OOB_STATUS_CRITICAL]:
4611                 self.LogWarning("Item '%s' on node '%s' has status '%s'",
4612                                 item, node.name, status)
4613
4614           if self.op.command == constants.OOB_POWER_ON:
4615             node.powered = True
4616           elif self.op.command == constants.OOB_POWER_OFF:
4617             node.powered = False
4618           elif self.op.command == constants.OOB_POWER_STATUS:
4619             powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4620             if powered != node.powered:
4621               logging.warning(("Recorded power state (%s) of node '%s' does not"
4622                                " match actual power state (%s)"), node.powered,
4623                               node.name, powered)
4624
4625           # For configuration changing commands we should update the node
4626           if self.op.command in (constants.OOB_POWER_ON,
4627                                  constants.OOB_POWER_OFF):
4628             self.cfg.Update(node, feedback_fn)
4629
4630           node_entry.append((constants.RS_NORMAL, result.payload))
4631
4632           if (self.op.command == constants.OOB_POWER_ON and
4633               idx < len(self.nodes) - 1):
4634             time.sleep(self.op.power_delay)
4635
4636     return ret
4637
4638   def _CheckPayload(self, result):
4639     """Checks if the payload is valid.
4640
4641     @param result: RPC result
4642     @raises errors.OpExecError: If payload is not valid
4643
4644     """
4645     errs = []
4646     if self.op.command == constants.OOB_HEALTH:
4647       if not isinstance(result.payload, list):
4648         errs.append("command 'health' is expected to return a list but got %s" %
4649                     type(result.payload))
4650       else:
4651         for item, status in result.payload:
4652           if status not in constants.OOB_STATUSES:
4653             errs.append("health item '%s' has invalid status '%s'" %
4654                         (item, status))
4655
4656     if self.op.command == constants.OOB_POWER_STATUS:
4657       if not isinstance(result.payload, dict):
4658         errs.append("power-status is expected to return a dict but got %s" %
4659                     type(result.payload))
4660
4661     if self.op.command in [
4662         constants.OOB_POWER_ON,
4663         constants.OOB_POWER_OFF,
4664         constants.OOB_POWER_CYCLE,
4665         ]:
4666       if result.payload is not None:
4667         errs.append("%s is expected to not return payload but got '%s'" %
4668                     (self.op.command, result.payload))
4669
4670     if errs:
4671       raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4672                                utils.CommaJoin(errs))
4673
4674
4675 class _OsQuery(_QueryBase):
4676   FIELDS = query.OS_FIELDS
4677
4678   def ExpandNames(self, lu):
4679     # Lock all nodes in shared mode
4680     # Temporary removal of locks, should be reverted later
4681     # TODO: reintroduce locks when they are lighter-weight
4682     lu.needed_locks = {}
4683     #self.share_locks[locking.LEVEL_NODE] = 1
4684     #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4685
4686     # The following variables interact with _QueryBase._GetNames
4687     if self.names:
4688       self.wanted = self.names
4689     else:
4690       self.wanted = locking.ALL_SET
4691
4692     self.do_locking = self.use_locking
4693
4694   def DeclareLocks(self, lu, level):
4695     pass
4696
4697   @staticmethod
4698   def _DiagnoseByOS(rlist):
4699     """Remaps a per-node return list into an a per-os per-node dictionary
4700
4701     @param rlist: a map with node names as keys and OS objects as values
4702
4703     @rtype: dict
4704     @return: a dictionary with osnames as keys and as value another
4705         map, with nodes as keys and tuples of (path, status, diagnose,
4706         variants, parameters, api_versions) as values, eg::
4707
4708           {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4709                                      (/srv/..., False, "invalid api")],
4710                            "node2": [(/srv/..., True, "", [], [])]}
4711           }
4712
4713     """
4714     all_os = {}
4715     # we build here the list of nodes that didn't fail the RPC (at RPC
4716     # level), so that nodes with a non-responding node daemon don't
4717     # make all OSes invalid
4718     good_nodes = [node_name for node_name in rlist
4719                   if not rlist[node_name].fail_msg]
4720     for node_name, nr in rlist.items():
4721       if nr.fail_msg or not nr.payload:
4722         continue
4723       for (name, path, status, diagnose, variants,
4724            params, api_versions) in nr.payload:
4725         if name not in all_os:
4726           # build a list of nodes for this os containing empty lists
4727           # for each node in node_list
4728           all_os[name] = {}
4729           for nname in good_nodes:
4730             all_os[name][nname] = []
4731         # convert params from [name, help] to (name, help)
4732         params = [tuple(v) for v in params]
4733         all_os[name][node_name].append((path, status, diagnose,
4734                                         variants, params, api_versions))
4735     return all_os
4736
4737   def _GetQueryData(self, lu):
4738     """Computes the list of nodes and their attributes.
4739
4740     """
4741     # Locking is not used
4742     assert not (compat.any(lu.glm.is_owned(level)
4743                            for level in locking.LEVELS
4744                            if level != locking.LEVEL_CLUSTER) or
4745                 self.do_locking or self.use_locking)
4746
4747     valid_nodes = [node.name
4748                    for node in lu.cfg.GetAllNodesInfo().values()
4749                    if not node.offline and node.vm_capable]
4750     pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4751     cluster = lu.cfg.GetClusterInfo()
4752
4753     data = {}
4754
4755     for (os_name, os_data) in pol.items():
4756       info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4757                           hidden=(os_name in cluster.hidden_os),
4758                           blacklisted=(os_name in cluster.blacklisted_os))
4759
4760       variants = set()
4761       parameters = set()
4762       api_versions = set()
4763
4764       for idx, osl in enumerate(os_data.values()):
4765         info.valid = bool(info.valid and osl and osl[0][1])
4766         if not info.valid:
4767           break
4768
4769         (node_variants, node_params, node_api) = osl[0][3:6]
4770         if idx == 0:
4771           # First entry
4772           variants.update(node_variants)
4773           parameters.update(node_params)
4774           api_versions.update(node_api)
4775         else:
4776           # Filter out inconsistent values
4777           variants.intersection_update(node_variants)
4778           parameters.intersection_update(node_params)
4779           api_versions.intersection_update(node_api)
4780
4781       info.variants = list(variants)
4782       info.parameters = list(parameters)
4783       info.api_versions = list(api_versions)
4784
4785       data[os_name] = info
4786
4787     # Prepare data in requested order
4788     return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4789             if name in data]
4790
4791
4792 class LUOsDiagnose(NoHooksLU):
4793   """Logical unit for OS diagnose/query.
4794
4795   """
4796   REQ_BGL = False
4797
4798   @staticmethod
4799   def _BuildFilter(fields, names):
4800     """Builds a filter for querying OSes.
4801
4802     """
4803     name_filter = qlang.MakeSimpleFilter("name", names)
4804
4805     # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4806     # respective field is not requested
4807     status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4808                      for fname in ["hidden", "blacklisted"]
4809                      if fname not in fields]
4810     if "valid" not in fields:
4811       status_filter.append([qlang.OP_TRUE, "valid"])
4812
4813     if status_filter:
4814       status_filter.insert(0, qlang.OP_AND)
4815     else:
4816       status_filter = None
4817
4818     if name_filter and status_filter:
4819       return [qlang.OP_AND, name_filter, status_filter]
4820     elif name_filter:
4821       return name_filter
4822     else:
4823       return status_filter
4824
4825   def CheckArguments(self):
4826     self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4827                        self.op.output_fields, False)
4828
4829   def ExpandNames(self):
4830     self.oq.ExpandNames(self)
4831
4832   def Exec(self, feedback_fn):
4833     return self.oq.OldStyleQuery(self)
4834
4835
4836 class LUNodeRemove(LogicalUnit):
4837   """Logical unit for removing a node.
4838
4839   """
4840   HPATH = "node-remove"
4841   HTYPE = constants.HTYPE_NODE
4842
4843   def BuildHooksEnv(self):
4844     """Build hooks env.
4845
4846     This doesn't run on the target node in the pre phase as a failed
4847     node would then be impossible to remove.
4848
4849     """
4850     return {
4851       "OP_TARGET": self.op.node_name,
4852       "NODE_NAME": self.op.node_name,
4853       }
4854
4855   def BuildHooksNodes(self):
4856     """Build hooks nodes.
4857
4858     """
4859     all_nodes = self.cfg.GetNodeList()
4860     try:
4861       all_nodes.remove(self.op.node_name)
4862     except ValueError:
4863       logging.warning("Node '%s', which is about to be removed, was not found"
4864                       " in the list of all nodes", self.op.node_name)
4865     return (all_nodes, all_nodes)
4866
4867   def CheckPrereq(self):
4868     """Check prerequisites.
4869
4870     This checks:
4871      - the node exists in the configuration
4872      - it does not have primary or secondary instances
4873      - it's not the master
4874
4875     Any errors are signaled by raising errors.OpPrereqError.
4876
4877     """
4878     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4879     node = self.cfg.GetNodeInfo(self.op.node_name)
4880     assert node is not None
4881
4882     masternode = self.cfg.GetMasterNode()
4883     if node.name == masternode:
4884       raise errors.OpPrereqError("Node is the master node, failover to another"
4885                                  " node is required", errors.ECODE_INVAL)
4886
4887     for instance_name, instance in self.cfg.GetAllInstancesInfo().items():
4888       if node.name in instance.all_nodes:
4889         raise errors.OpPrereqError("Instance %s is still running on the node,"
4890                                    " please remove first" % instance_name,
4891                                    errors.ECODE_INVAL)
4892     self.op.node_name = node.name
4893     self.node = node
4894
4895   def Exec(self, feedback_fn):
4896     """Removes the node from the cluster.
4897
4898     """
4899     node = self.node
4900     logging.info("Stopping the node daemon and removing configs from node %s",
4901                  node.name)
4902
4903     modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
4904
4905     assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
4906       "Not owning BGL"
4907
4908     # Promote nodes to master candidate as needed
4909     _AdjustCandidatePool(self, exceptions=[node.name])
4910     self.context.RemoveNode(node.name)
4911
4912     # Run post hooks on the node before it's removed
4913     _RunPostHook(self, node.name)
4914
4915     result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
4916     msg = result.fail_msg
4917     if msg:
4918       self.LogWarning("Errors encountered on the remote node while leaving"
4919                       " the cluster: %s", msg)
4920
4921     # Remove node from our /etc/hosts
4922     if self.cfg.GetClusterInfo().modify_etc_hosts:
4923       master_node = self.cfg.GetMasterNode()
4924       result = self.rpc.call_etc_hosts_modify(master_node,
4925                                               constants.ETC_HOSTS_REMOVE,
4926                                               node.name, None)
4927       result.Raise("Can't update hosts file with new host data")
4928       _RedistributeAncillaryFiles(self)
4929
4930
4931 class _NodeQuery(_QueryBase):
4932   FIELDS = query.NODE_FIELDS
4933
4934   def ExpandNames(self, lu):
4935     lu.needed_locks = {}
4936     lu.share_locks = _ShareAll()
4937
4938     if self.names:
4939       self.wanted = _GetWantedNodes(lu, self.names)
4940     else:
4941       self.wanted = locking.ALL_SET
4942
4943     self.do_locking = (self.use_locking and
4944                        query.NQ_LIVE in self.requested_data)
4945
4946     if self.do_locking:
4947       # If any non-static field is requested we need to lock the nodes
4948       lu.needed_locks[locking.LEVEL_NODE] = self.wanted
4949
4950   def DeclareLocks(self, lu, level):
4951     pass
4952
4953   def _GetQueryData(self, lu):
4954     """Computes the list of nodes and their attributes.
4955
4956     """
4957     all_info = lu.cfg.GetAllNodesInfo()
4958
4959     nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
4960
4961     # Gather data as requested
4962     if query.NQ_LIVE in self.requested_data:
4963       # filter out non-vm_capable nodes
4964       toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
4965
4966       node_data = lu.rpc.call_node_info(toquery_nodes, [lu.cfg.GetVGName()],
4967                                         [lu.cfg.GetHypervisorType()])
4968       live_data = dict((name, _MakeLegacyNodeInfo(nresult.payload))
4969                        for (name, nresult) in node_data.items()
4970                        if not nresult.fail_msg and nresult.payload)
4971     else:
4972       live_data = None
4973
4974     if query.NQ_INST in self.requested_data:
4975       node_to_primary = dict([(name, set()) for name in nodenames])
4976       node_to_secondary = dict([(name, set()) for name in nodenames])
4977
4978       inst_data = lu.cfg.GetAllInstancesInfo()
4979
4980       for inst in inst_data.values():
4981         if inst.primary_node in node_to_primary:
4982           node_to_primary[inst.primary_node].add(inst.name)
4983         for secnode in inst.secondary_nodes:
4984           if secnode in node_to_secondary:
4985             node_to_secondary[secnode].add(inst.name)
4986     else:
4987       node_to_primary = None
4988       node_to_secondary = None
4989
4990     if query.NQ_OOB in self.requested_data:
4991       oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
4992                          for name, node in all_info.iteritems())
4993     else:
4994       oob_support = None
4995
4996     if query.NQ_GROUP in self.requested_data:
4997       groups = lu.cfg.GetAllNodeGroupsInfo()
4998     else:
4999       groups = {}
5000
5001     return query.NodeQueryData([all_info[name] for name in nodenames],
5002                                live_data, lu.cfg.GetMasterNode(),
5003                                node_to_primary, node_to_secondary, groups,
5004                                oob_support, lu.cfg.GetClusterInfo())
5005
5006
5007 class LUNodeQuery(NoHooksLU):
5008   """Logical unit for querying nodes.
5009
5010   """
5011   # pylint: disable=W0142
5012   REQ_BGL = False
5013
5014   def CheckArguments(self):
5015     self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
5016                          self.op.output_fields, self.op.use_locking)
5017
5018   def ExpandNames(self):
5019     self.nq.ExpandNames(self)
5020
5021   def DeclareLocks(self, level):
5022     self.nq.DeclareLocks(self, level)
5023
5024   def Exec(self, feedback_fn):
5025     return self.nq.OldStyleQuery(self)
5026
5027
5028 class LUNodeQueryvols(NoHooksLU):
5029   """Logical unit for getting volumes on node(s).
5030
5031   """
5032   REQ_BGL = False
5033   _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
5034   _FIELDS_STATIC = utils.FieldSet("node")
5035
5036   def CheckArguments(self):
5037     _CheckOutputFields(static=self._FIELDS_STATIC,
5038                        dynamic=self._FIELDS_DYNAMIC,
5039                        selected=self.op.output_fields)
5040
5041   def ExpandNames(self):
5042     self.share_locks = _ShareAll()
5043     self.needed_locks = {}
5044
5045     if not self.op.nodes:
5046       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5047     else:
5048       self.needed_locks[locking.LEVEL_NODE] = \
5049         _GetWantedNodes(self, self.op.nodes)
5050
5051   def Exec(self, feedback_fn):
5052     """Computes the list of nodes and their attributes.
5053
5054     """
5055     nodenames = self.owned_locks(locking.LEVEL_NODE)
5056     volumes = self.rpc.call_node_volumes(nodenames)
5057
5058     ilist = self.cfg.GetAllInstancesInfo()
5059     vol2inst = _MapInstanceDisksToNodes(ilist.values())
5060
5061     output = []
5062     for node in nodenames:
5063       nresult = volumes[node]
5064       if nresult.offline:
5065         continue
5066       msg = nresult.fail_msg
5067       if msg:
5068         self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
5069         continue
5070
5071       node_vols = sorted(nresult.payload,
5072                          key=operator.itemgetter("dev"))
5073
5074       for vol in node_vols:
5075         node_output = []
5076         for field in self.op.output_fields:
5077           if field == "node":
5078             val = node
5079           elif field == "phys":
5080             val = vol["dev"]
5081           elif field == "vg":
5082             val = vol["vg"]
5083           elif field == "name":
5084             val = vol["name"]
5085           elif field == "size":
5086             val = int(float(vol["size"]))
5087           elif field == "instance":
5088             val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
5089           else:
5090             raise errors.ParameterError(field)
5091           node_output.append(str(val))
5092
5093         output.append(node_output)
5094
5095     return output
5096
5097
5098 class LUNodeQueryStorage(NoHooksLU):
5099   """Logical unit for getting information on storage units on node(s).
5100
5101   """
5102   _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
5103   REQ_BGL = False
5104
5105   def CheckArguments(self):
5106     _CheckOutputFields(static=self._FIELDS_STATIC,
5107                        dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
5108                        selected=self.op.output_fields)
5109
5110   def ExpandNames(self):
5111     self.share_locks = _ShareAll()
5112     self.needed_locks = {}
5113
5114     if self.op.nodes:
5115       self.needed_locks[locking.LEVEL_NODE] = \
5116         _GetWantedNodes(self, self.op.nodes)
5117     else:
5118       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5119
5120   def Exec(self, feedback_fn):
5121     """Computes the list of nodes and their attributes.
5122
5123     """
5124     self.nodes = self.owned_locks(locking.LEVEL_NODE)
5125
5126     # Always get name to sort by
5127     if constants.SF_NAME in self.op.output_fields:
5128       fields = self.op.output_fields[:]
5129     else:
5130       fields = [constants.SF_NAME] + self.op.output_fields
5131
5132     # Never ask for node or type as it's only known to the LU
5133     for extra in [constants.SF_NODE, constants.SF_TYPE]:
5134       while extra in fields:
5135         fields.remove(extra)
5136
5137     field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
5138     name_idx = field_idx[constants.SF_NAME]
5139
5140     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5141     data = self.rpc.call_storage_list(self.nodes,
5142                                       self.op.storage_type, st_args,
5143                                       self.op.name, fields)
5144
5145     result = []
5146
5147     for node in utils.NiceSort(self.nodes):
5148       nresult = data[node]
5149       if nresult.offline:
5150         continue
5151
5152       msg = nresult.fail_msg
5153       if msg:
5154         self.LogWarning("Can't get storage data from node %s: %s", node, msg)
5155         continue
5156
5157       rows = dict([(row[name_idx], row) for row in nresult.payload])
5158
5159       for name in utils.NiceSort(rows.keys()):
5160         row = rows[name]
5161
5162         out = []
5163
5164         for field in self.op.output_fields:
5165           if field == constants.SF_NODE:
5166             val = node
5167           elif field == constants.SF_TYPE:
5168             val = self.op.storage_type
5169           elif field in field_idx:
5170             val = row[field_idx[field]]
5171           else:
5172             raise errors.ParameterError(field)
5173
5174           out.append(val)
5175
5176         result.append(out)
5177
5178     return result
5179
5180
5181 class _InstanceQuery(_QueryBase):
5182   FIELDS = query.INSTANCE_FIELDS
5183
5184   def ExpandNames(self, lu):
5185     lu.needed_locks = {}
5186     lu.share_locks = _ShareAll()
5187
5188     if self.names:
5189       self.wanted = _GetWantedInstances(lu, self.names)
5190     else:
5191       self.wanted = locking.ALL_SET
5192
5193     self.do_locking = (self.use_locking and
5194                        query.IQ_LIVE in self.requested_data)
5195     if self.do_locking:
5196       lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
5197       lu.needed_locks[locking.LEVEL_NODEGROUP] = []
5198       lu.needed_locks[locking.LEVEL_NODE] = []
5199       lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5200
5201     self.do_grouplocks = (self.do_locking and
5202                           query.IQ_NODES in self.requested_data)
5203
5204   def DeclareLocks(self, lu, level):
5205     if self.do_locking:
5206       if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
5207         assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
5208
5209         # Lock all groups used by instances optimistically; this requires going
5210         # via the node before it's locked, requiring verification later on
5211         lu.needed_locks[locking.LEVEL_NODEGROUP] = \
5212           set(group_uuid
5213               for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
5214               for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
5215       elif level == locking.LEVEL_NODE:
5216         lu._LockInstancesNodes() # pylint: disable=W0212
5217
5218   @staticmethod
5219   def _CheckGroupLocks(lu):
5220     owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
5221     owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
5222
5223     # Check if node groups for locked instances are still correct
5224     for instance_name in owned_instances:
5225       _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
5226
5227   def _GetQueryData(self, lu):
5228     """Computes the list of instances and their attributes.
5229
5230     """
5231     if self.do_grouplocks:
5232       self._CheckGroupLocks(lu)
5233
5234     cluster = lu.cfg.GetClusterInfo()
5235     all_info = lu.cfg.GetAllInstancesInfo()
5236
5237     instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
5238
5239     instance_list = [all_info[name] for name in instance_names]
5240     nodes = frozenset(itertools.chain(*(inst.all_nodes
5241                                         for inst in instance_list)))
5242     hv_list = list(set([inst.hypervisor for inst in instance_list]))
5243     bad_nodes = []
5244     offline_nodes = []
5245     wrongnode_inst = set()
5246
5247     # Gather data as requested
5248     if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
5249       live_data = {}
5250       node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
5251       for name in nodes:
5252         result = node_data[name]
5253         if result.offline:
5254           # offline nodes will be in both lists
5255           assert result.fail_msg
5256           offline_nodes.append(name)
5257         if result.fail_msg:
5258           bad_nodes.append(name)
5259         elif result.payload:
5260           for inst in result.payload:
5261             if inst in all_info:
5262               if all_info[inst].primary_node == name:
5263                 live_data.update(result.payload)
5264               else:
5265                 wrongnode_inst.add(inst)
5266             else:
5267               # orphan instance; we don't list it here as we don't
5268               # handle this case yet in the output of instance listing
5269               logging.warning("Orphan instance '%s' found on node %s",
5270                               inst, name)
5271         # else no instance is alive
5272     else:
5273       live_data = {}
5274
5275     if query.IQ_DISKUSAGE in self.requested_data:
5276       disk_usage = dict((inst.name,
5277                          _ComputeDiskSize(inst.disk_template,
5278                                           [{constants.IDISK_SIZE: disk.size}
5279                                            for disk in inst.disks]))
5280                         for inst in instance_list)
5281     else:
5282       disk_usage = None
5283
5284     if query.IQ_CONSOLE in self.requested_data:
5285       consinfo = {}
5286       for inst in instance_list:
5287         if inst.name in live_data:
5288           # Instance is running
5289           consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
5290         else:
5291           consinfo[inst.name] = None
5292       assert set(consinfo.keys()) == set(instance_names)
5293     else:
5294       consinfo = None
5295
5296     if query.IQ_NODES in self.requested_data:
5297       node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
5298                                             instance_list)))
5299       nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
5300       groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
5301                     for uuid in set(map(operator.attrgetter("group"),
5302                                         nodes.values())))
5303     else:
5304       nodes = None
5305       groups = None
5306
5307     return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
5308                                    disk_usage, offline_nodes, bad_nodes,
5309                                    live_data, wrongnode_inst, consinfo,
5310                                    nodes, groups)
5311
5312
5313 class LUQuery(NoHooksLU):
5314   """Query for resources/items of a certain kind.
5315
5316   """
5317   # pylint: disable=W0142
5318   REQ_BGL = False
5319
5320   def CheckArguments(self):
5321     qcls = _GetQueryImplementation(self.op.what)
5322
5323     self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
5324
5325   def ExpandNames(self):
5326     self.impl.ExpandNames(self)
5327
5328   def DeclareLocks(self, level):
5329     self.impl.DeclareLocks(self, level)
5330
5331   def Exec(self, feedback_fn):
5332     return self.impl.NewStyleQuery(self)
5333
5334
5335 class LUQueryFields(NoHooksLU):
5336   """Query for resources/items of a certain kind.
5337
5338   """
5339   # pylint: disable=W0142
5340   REQ_BGL = False
5341
5342   def CheckArguments(self):
5343     self.qcls = _GetQueryImplementation(self.op.what)
5344
5345   def ExpandNames(self):
5346     self.needed_locks = {}
5347
5348   def Exec(self, feedback_fn):
5349     return query.QueryFields(self.qcls.FIELDS, self.op.fields)
5350
5351
5352 class LUNodeModifyStorage(NoHooksLU):
5353   """Logical unit for modifying a storage volume on a node.
5354
5355   """
5356   REQ_BGL = False
5357
5358   def CheckArguments(self):
5359     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5360
5361     storage_type = self.op.storage_type
5362
5363     try:
5364       modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
5365     except KeyError:
5366       raise errors.OpPrereqError("Storage units of type '%s' can not be"
5367                                  " modified" % storage_type,
5368                                  errors.ECODE_INVAL)
5369
5370     diff = set(self.op.changes.keys()) - modifiable
5371     if diff:
5372       raise errors.OpPrereqError("The following fields can not be modified for"
5373                                  " storage units of type '%s': %r" %
5374                                  (storage_type, list(diff)),
5375                                  errors.ECODE_INVAL)
5376
5377   def ExpandNames(self):
5378     self.needed_locks = {
5379       locking.LEVEL_NODE: self.op.node_name,
5380       }
5381
5382   def Exec(self, feedback_fn):
5383     """Computes the list of nodes and their attributes.
5384
5385     """
5386     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5387     result = self.rpc.call_storage_modify(self.op.node_name,
5388                                           self.op.storage_type, st_args,
5389                                           self.op.name, self.op.changes)
5390     result.Raise("Failed to modify storage unit '%s' on %s" %
5391                  (self.op.name, self.op.node_name))
5392
5393
5394 class LUNodeAdd(LogicalUnit):
5395   """Logical unit for adding node to the cluster.
5396
5397   """
5398   HPATH = "node-add"
5399   HTYPE = constants.HTYPE_NODE
5400   _NFLAGS = ["master_capable", "vm_capable"]
5401
5402   def CheckArguments(self):
5403     self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
5404     # validate/normalize the node name
5405     self.hostname = netutils.GetHostname(name=self.op.node_name,
5406                                          family=self.primary_ip_family)
5407     self.op.node_name = self.hostname.name
5408
5409     if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
5410       raise errors.OpPrereqError("Cannot readd the master node",
5411                                  errors.ECODE_STATE)
5412
5413     if self.op.readd and self.op.group:
5414       raise errors.OpPrereqError("Cannot pass a node group when a node is"
5415                                  " being readded", errors.ECODE_INVAL)
5416
5417   def BuildHooksEnv(self):
5418     """Build hooks env.
5419
5420     This will run on all nodes before, and on all nodes + the new node after.
5421
5422     """
5423     return {
5424       "OP_TARGET": self.op.node_name,
5425       "NODE_NAME": self.op.node_name,
5426       "NODE_PIP": self.op.primary_ip,
5427       "NODE_SIP": self.op.secondary_ip,
5428       "MASTER_CAPABLE": str(self.op.master_capable),
5429       "VM_CAPABLE": str(self.op.vm_capable),
5430       }
5431
5432   def BuildHooksNodes(self):
5433     """Build hooks nodes.
5434
5435     """
5436     # Exclude added node
5437     pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
5438     post_nodes = pre_nodes + [self.op.node_name, ]
5439
5440     return (pre_nodes, post_nodes)
5441
5442   def CheckPrereq(self):
5443     """Check prerequisites.
5444
5445     This checks:
5446      - the new node is not already in the config
5447      - it is resolvable
5448      - its parameters (single/dual homed) matches the cluster
5449
5450     Any errors are signaled by raising errors.OpPrereqError.
5451
5452     """
5453     cfg = self.cfg
5454     hostname = self.hostname
5455     node = hostname.name
5456     primary_ip = self.op.primary_ip = hostname.ip
5457     if self.op.secondary_ip is None:
5458       if self.primary_ip_family == netutils.IP6Address.family:
5459         raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
5460                                    " IPv4 address must be given as secondary",
5461                                    errors.ECODE_INVAL)
5462       self.op.secondary_ip = primary_ip
5463
5464     secondary_ip = self.op.secondary_ip
5465     if not netutils.IP4Address.IsValid(secondary_ip):
5466       raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5467                                  " address" % secondary_ip, errors.ECODE_INVAL)
5468
5469     node_list = cfg.GetNodeList()
5470     if not self.op.readd and node in node_list:
5471       raise errors.OpPrereqError("Node %s is already in the configuration" %
5472                                  node, errors.ECODE_EXISTS)
5473     elif self.op.readd and node not in node_list:
5474       raise errors.OpPrereqError("Node %s is not in the configuration" % node,
5475                                  errors.ECODE_NOENT)
5476
5477     self.changed_primary_ip = False
5478
5479     for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
5480       if self.op.readd and node == existing_node_name:
5481         if existing_node.secondary_ip != secondary_ip:
5482           raise errors.OpPrereqError("Readded node doesn't have the same IP"
5483                                      " address configuration as before",
5484                                      errors.ECODE_INVAL)
5485         if existing_node.primary_ip != primary_ip:
5486           self.changed_primary_ip = True
5487
5488         continue
5489
5490       if (existing_node.primary_ip == primary_ip or
5491           existing_node.secondary_ip == primary_ip or
5492           existing_node.primary_ip == secondary_ip or
5493           existing_node.secondary_ip == secondary_ip):
5494         raise errors.OpPrereqError("New node ip address(es) conflict with"
5495                                    " existing node %s" % existing_node.name,
5496                                    errors.ECODE_NOTUNIQUE)
5497
5498     # After this 'if' block, None is no longer a valid value for the
5499     # _capable op attributes
5500     if self.op.readd:
5501       old_node = self.cfg.GetNodeInfo(node)
5502       assert old_node is not None, "Can't retrieve locked node %s" % node
5503       for attr in self._NFLAGS:
5504         if getattr(self.op, attr) is None:
5505           setattr(self.op, attr, getattr(old_node, attr))
5506     else:
5507       for attr in self._NFLAGS:
5508         if getattr(self.op, attr) is None:
5509           setattr(self.op, attr, True)
5510
5511     if self.op.readd and not self.op.vm_capable:
5512       pri, sec = cfg.GetNodeInstances(node)
5513       if pri or sec:
5514         raise errors.OpPrereqError("Node %s being re-added with vm_capable"
5515                                    " flag set to false, but it already holds"
5516                                    " instances" % node,
5517                                    errors.ECODE_STATE)
5518
5519     # check that the type of the node (single versus dual homed) is the
5520     # same as for the master
5521     myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
5522     master_singlehomed = myself.secondary_ip == myself.primary_ip
5523     newbie_singlehomed = secondary_ip == primary_ip
5524     if master_singlehomed != newbie_singlehomed:
5525       if master_singlehomed:
5526         raise errors.OpPrereqError("The master has no secondary ip but the"
5527                                    " new node has one",
5528                                    errors.ECODE_INVAL)
5529       else:
5530         raise errors.OpPrereqError("The master has a secondary ip but the"
5531                                    " new node doesn't have one",
5532                                    errors.ECODE_INVAL)
5533
5534     # checks reachability
5535     if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
5536       raise errors.OpPrereqError("Node not reachable by ping",
5537                                  errors.ECODE_ENVIRON)
5538
5539     if not newbie_singlehomed:
5540       # check reachability from my secondary ip to newbie's secondary ip
5541       if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
5542                            source=myself.secondary_ip):
5543         raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5544                                    " based ping to node daemon port",
5545                                    errors.ECODE_ENVIRON)
5546
5547     if self.op.readd:
5548       exceptions = [node]
5549     else:
5550       exceptions = []
5551
5552     if self.op.master_capable:
5553       self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5554     else:
5555       self.master_candidate = False
5556
5557     if self.op.readd:
5558       self.new_node = old_node
5559     else:
5560       node_group = cfg.LookupNodeGroup(self.op.group)
5561       self.new_node = objects.Node(name=node,
5562                                    primary_ip=primary_ip,
5563                                    secondary_ip=secondary_ip,
5564                                    master_candidate=self.master_candidate,
5565                                    offline=False, drained=False,
5566                                    group=node_group)
5567
5568     if self.op.ndparams:
5569       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5570
5571     if self.op.hv_state:
5572       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
5573
5574     if self.op.disk_state:
5575       self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
5576
5577   def Exec(self, feedback_fn):
5578     """Adds the new node to the cluster.
5579
5580     """
5581     new_node = self.new_node
5582     node = new_node.name
5583
5584     assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5585       "Not owning BGL"
5586
5587     # We adding a new node so we assume it's powered
5588     new_node.powered = True
5589
5590     # for re-adds, reset the offline/drained/master-candidate flags;
5591     # we need to reset here, otherwise offline would prevent RPC calls
5592     # later in the procedure; this also means that if the re-add
5593     # fails, we are left with a non-offlined, broken node
5594     if self.op.readd:
5595       new_node.drained = new_node.offline = False # pylint: disable=W0201
5596       self.LogInfo("Readding a node, the offline/drained flags were reset")
5597       # if we demote the node, we do cleanup later in the procedure
5598       new_node.master_candidate = self.master_candidate
5599       if self.changed_primary_ip:
5600         new_node.primary_ip = self.op.primary_ip
5601
5602     # copy the master/vm_capable flags
5603     for attr in self._NFLAGS:
5604       setattr(new_node, attr, getattr(self.op, attr))
5605
5606     # notify the user about any possible mc promotion
5607     if new_node.master_candidate:
5608       self.LogInfo("Node will be a master candidate")
5609
5610     if self.op.ndparams:
5611       new_node.ndparams = self.op.ndparams
5612     else:
5613       new_node.ndparams = {}
5614
5615     if self.op.hv_state:
5616       new_node.hv_state_static = self.new_hv_state
5617
5618     if self.op.disk_state:
5619       new_node.disk_state_static = self.new_disk_state
5620
5621     # check connectivity
5622     result = self.rpc.call_version([node])[node]
5623     result.Raise("Can't get version information from node %s" % node)
5624     if constants.PROTOCOL_VERSION == result.payload:
5625       logging.info("Communication to node %s fine, sw version %s match",
5626                    node, result.payload)
5627     else:
5628       raise errors.OpExecError("Version mismatch master version %s,"
5629                                " node version %s" %
5630                                (constants.PROTOCOL_VERSION, result.payload))
5631
5632     # Add node to our /etc/hosts, and add key to known_hosts
5633     if self.cfg.GetClusterInfo().modify_etc_hosts:
5634       master_node = self.cfg.GetMasterNode()
5635       result = self.rpc.call_etc_hosts_modify(master_node,
5636                                               constants.ETC_HOSTS_ADD,
5637                                               self.hostname.name,
5638                                               self.hostname.ip)
5639       result.Raise("Can't update hosts file with new host data")
5640
5641     if new_node.secondary_ip != new_node.primary_ip:
5642       _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5643                                False)
5644
5645     node_verify_list = [self.cfg.GetMasterNode()]
5646     node_verify_param = {
5647       constants.NV_NODELIST: ([node], {}),
5648       # TODO: do a node-net-test as well?
5649     }
5650
5651     result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5652                                        self.cfg.GetClusterName())
5653     for verifier in node_verify_list:
5654       result[verifier].Raise("Cannot communicate with node %s" % verifier)
5655       nl_payload = result[verifier].payload[constants.NV_NODELIST]
5656       if nl_payload:
5657         for failed in nl_payload:
5658           feedback_fn("ssh/hostname verification failed"
5659                       " (checking from %s): %s" %
5660                       (verifier, nl_payload[failed]))
5661         raise errors.OpExecError("ssh/hostname verification failed")
5662
5663     if self.op.readd:
5664       _RedistributeAncillaryFiles(self)
5665       self.context.ReaddNode(new_node)
5666       # make sure we redistribute the config
5667       self.cfg.Update(new_node, feedback_fn)
5668       # and make sure the new node will not have old files around
5669       if not new_node.master_candidate:
5670         result = self.rpc.call_node_demote_from_mc(new_node.name)
5671         msg = result.fail_msg
5672         if msg:
5673           self.LogWarning("Node failed to demote itself from master"
5674                           " candidate status: %s" % msg)
5675     else:
5676       _RedistributeAncillaryFiles(self, additional_nodes=[node],
5677                                   additional_vm=self.op.vm_capable)
5678       self.context.AddNode(new_node, self.proc.GetECId())
5679
5680
5681 class LUNodeSetParams(LogicalUnit):
5682   """Modifies the parameters of a node.
5683
5684   @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5685       to the node role (as _ROLE_*)
5686   @cvar _R2F: a dictionary from node role to tuples of flags
5687   @cvar _FLAGS: a list of attribute names corresponding to the flags
5688
5689   """
5690   HPATH = "node-modify"
5691   HTYPE = constants.HTYPE_NODE
5692   REQ_BGL = False
5693   (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5694   _F2R = {
5695     (True, False, False): _ROLE_CANDIDATE,
5696     (False, True, False): _ROLE_DRAINED,
5697     (False, False, True): _ROLE_OFFLINE,
5698     (False, False, False): _ROLE_REGULAR,
5699     }
5700   _R2F = dict((v, k) for k, v in _F2R.items())
5701   _FLAGS = ["master_candidate", "drained", "offline"]
5702
5703   def CheckArguments(self):
5704     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5705     all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5706                 self.op.master_capable, self.op.vm_capable,
5707                 self.op.secondary_ip, self.op.ndparams, self.op.hv_state,
5708                 self.op.disk_state]
5709     if all_mods.count(None) == len(all_mods):
5710       raise errors.OpPrereqError("Please pass at least one modification",
5711                                  errors.ECODE_INVAL)
5712     if all_mods.count(True) > 1:
5713       raise errors.OpPrereqError("Can't set the node into more than one"
5714                                  " state at the same time",
5715                                  errors.ECODE_INVAL)
5716
5717     # Boolean value that tells us whether we might be demoting from MC
5718     self.might_demote = (self.op.master_candidate == False or
5719                          self.op.offline == True or
5720                          self.op.drained == True or
5721                          self.op.master_capable == False)
5722
5723     if self.op.secondary_ip:
5724       if not netutils.IP4Address.IsValid(self.op.secondary_ip):
5725         raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5726                                    " address" % self.op.secondary_ip,
5727                                    errors.ECODE_INVAL)
5728
5729     self.lock_all = self.op.auto_promote and self.might_demote
5730     self.lock_instances = self.op.secondary_ip is not None
5731
5732   def _InstanceFilter(self, instance):
5733     """Filter for getting affected instances.
5734
5735     """
5736     return (instance.disk_template in constants.DTS_INT_MIRROR and
5737             self.op.node_name in instance.all_nodes)
5738
5739   def ExpandNames(self):
5740     if self.lock_all:
5741       self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
5742     else:
5743       self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
5744
5745     # Since modifying a node can have severe effects on currently running
5746     # operations the resource lock is at least acquired in shared mode
5747     self.needed_locks[locking.LEVEL_NODE_RES] = \
5748       self.needed_locks[locking.LEVEL_NODE]
5749
5750     # Get node resource and instance locks in shared mode; they are not used
5751     # for anything but read-only access
5752     self.share_locks[locking.LEVEL_NODE_RES] = 1
5753     self.share_locks[locking.LEVEL_INSTANCE] = 1
5754
5755     if self.lock_instances:
5756       self.needed_locks[locking.LEVEL_INSTANCE] = \
5757         frozenset(self.cfg.GetInstancesInfoByFilter(self._InstanceFilter))
5758
5759   def BuildHooksEnv(self):
5760     """Build hooks env.
5761
5762     This runs on the master node.
5763
5764     """
5765     return {
5766       "OP_TARGET": self.op.node_name,
5767       "MASTER_CANDIDATE": str(self.op.master_candidate),
5768       "OFFLINE": str(self.op.offline),
5769       "DRAINED": str(self.op.drained),
5770       "MASTER_CAPABLE": str(self.op.master_capable),
5771       "VM_CAPABLE": str(self.op.vm_capable),
5772       }
5773
5774   def BuildHooksNodes(self):
5775     """Build hooks nodes.
5776
5777     """
5778     nl = [self.cfg.GetMasterNode(), self.op.node_name]
5779     return (nl, nl)
5780
5781   def CheckPrereq(self):
5782     """Check prerequisites.
5783
5784     This only checks the instance list against the existing names.
5785
5786     """
5787     node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5788
5789     if self.lock_instances:
5790       affected_instances = \
5791         self.cfg.GetInstancesInfoByFilter(self._InstanceFilter)
5792
5793       # Verify instance locks
5794       owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
5795       wanted_instances = frozenset(affected_instances.keys())
5796       if wanted_instances - owned_instances:
5797         raise errors.OpPrereqError("Instances affected by changing node %s's"
5798                                    " secondary IP address have changed since"
5799                                    " locks were acquired, wanted '%s', have"
5800                                    " '%s'; retry the operation" %
5801                                    (self.op.node_name,
5802                                     utils.CommaJoin(wanted_instances),
5803                                     utils.CommaJoin(owned_instances)),
5804                                    errors.ECODE_STATE)
5805     else:
5806       affected_instances = None
5807
5808     if (self.op.master_candidate is not None or
5809         self.op.drained is not None or
5810         self.op.offline is not None):
5811       # we can't change the master's node flags
5812       if self.op.node_name == self.cfg.GetMasterNode():
5813         raise errors.OpPrereqError("The master role can be changed"
5814                                    " only via master-failover",
5815                                    errors.ECODE_INVAL)
5816
5817     if self.op.master_candidate and not node.master_capable:
5818       raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5819                                  " it a master candidate" % node.name,
5820                                  errors.ECODE_STATE)
5821
5822     if self.op.vm_capable == False:
5823       (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5824       if ipri or isec:
5825         raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5826                                    " the vm_capable flag" % node.name,
5827                                    errors.ECODE_STATE)
5828
5829     if node.master_candidate and self.might_demote and not self.lock_all:
5830       assert not self.op.auto_promote, "auto_promote set but lock_all not"
5831       # check if after removing the current node, we're missing master
5832       # candidates
5833       (mc_remaining, mc_should, _) = \
5834           self.cfg.GetMasterCandidateStats(exceptions=[node.name])
5835       if mc_remaining < mc_should:
5836         raise errors.OpPrereqError("Not enough master candidates, please"
5837                                    " pass auto promote option to allow"
5838                                    " promotion", errors.ECODE_STATE)
5839
5840     self.old_flags = old_flags = (node.master_candidate,
5841                                   node.drained, node.offline)
5842     assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
5843     self.old_role = old_role = self._F2R[old_flags]
5844
5845     # Check for ineffective changes
5846     for attr in self._FLAGS:
5847       if (getattr(self.op, attr) == False and getattr(node, attr) == False):
5848         self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
5849         setattr(self.op, attr, None)
5850
5851     # Past this point, any flag change to False means a transition
5852     # away from the respective state, as only real changes are kept
5853
5854     # TODO: We might query the real power state if it supports OOB
5855     if _SupportsOob(self.cfg, node):
5856       if self.op.offline is False and not (node.powered or
5857                                            self.op.powered == True):
5858         raise errors.OpPrereqError(("Node %s needs to be turned on before its"
5859                                     " offline status can be reset") %
5860                                    self.op.node_name)
5861     elif self.op.powered is not None:
5862       raise errors.OpPrereqError(("Unable to change powered state for node %s"
5863                                   " as it does not support out-of-band"
5864                                   " handling") % self.op.node_name)
5865
5866     # If we're being deofflined/drained, we'll MC ourself if needed
5867     if (self.op.drained == False or self.op.offline == False or
5868         (self.op.master_capable and not node.master_capable)):
5869       if _DecideSelfPromotion(self):
5870         self.op.master_candidate = True
5871         self.LogInfo("Auto-promoting node to master candidate")
5872
5873     # If we're no longer master capable, we'll demote ourselves from MC
5874     if self.op.master_capable == False and node.master_candidate:
5875       self.LogInfo("Demoting from master candidate")
5876       self.op.master_candidate = False
5877
5878     # Compute new role
5879     assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5880     if self.op.master_candidate:
5881       new_role = self._ROLE_CANDIDATE
5882     elif self.op.drained:
5883       new_role = self._ROLE_DRAINED
5884     elif self.op.offline:
5885       new_role = self._ROLE_OFFLINE
5886     elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5887       # False is still in new flags, which means we're un-setting (the
5888       # only) True flag
5889       new_role = self._ROLE_REGULAR
5890     else: # no new flags, nothing, keep old role
5891       new_role = old_role
5892
5893     self.new_role = new_role
5894
5895     if old_role == self._ROLE_OFFLINE and new_role != old_role:
5896       # Trying to transition out of offline status
5897       # TODO: Use standard RPC runner, but make sure it works when the node is
5898       # still marked offline
5899       result = rpc.BootstrapRunner().call_version([node.name])[node.name]
5900       if result.fail_msg:
5901         raise errors.OpPrereqError("Node %s is being de-offlined but fails"
5902                                    " to report its version: %s" %
5903                                    (node.name, result.fail_msg),
5904                                    errors.ECODE_STATE)
5905       else:
5906         self.LogWarning("Transitioning node from offline to online state"
5907                         " without using re-add. Please make sure the node"
5908                         " is healthy!")
5909
5910     if self.op.secondary_ip:
5911       # Ok even without locking, because this can't be changed by any LU
5912       master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
5913       master_singlehomed = master.secondary_ip == master.primary_ip
5914       if master_singlehomed and self.op.secondary_ip:
5915         raise errors.OpPrereqError("Cannot change the secondary ip on a single"
5916                                    " homed cluster", errors.ECODE_INVAL)
5917
5918       assert not (frozenset(affected_instances) -
5919                   self.owned_locks(locking.LEVEL_INSTANCE))
5920
5921       if node.offline:
5922         if affected_instances:
5923           raise errors.OpPrereqError("Cannot change secondary IP address:"
5924                                      " offline node has instances (%s)"
5925                                      " configured to use it" %
5926                                      utils.CommaJoin(affected_instances.keys()))
5927       else:
5928         # On online nodes, check that no instances are running, and that
5929         # the node has the new ip and we can reach it.
5930         for instance in affected_instances.values():
5931           _CheckInstanceState(self, instance, INSTANCE_DOWN,
5932                               msg="cannot change secondary ip")
5933
5934         _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
5935         if master.name != node.name:
5936           # check reachability from master secondary ip to new secondary ip
5937           if not netutils.TcpPing(self.op.secondary_ip,
5938                                   constants.DEFAULT_NODED_PORT,
5939                                   source=master.secondary_ip):
5940             raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5941                                        " based ping to node daemon port",
5942                                        errors.ECODE_ENVIRON)
5943
5944     if self.op.ndparams:
5945       new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
5946       utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
5947       self.new_ndparams = new_ndparams
5948
5949     if self.op.hv_state:
5950       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
5951                                                  self.node.hv_state_static)
5952
5953     if self.op.disk_state:
5954       self.new_disk_state = \
5955         _MergeAndVerifyDiskState(self.op.disk_state,
5956                                  self.node.disk_state_static)
5957
5958   def Exec(self, feedback_fn):
5959     """Modifies a node.
5960
5961     """
5962     node = self.node
5963     old_role = self.old_role
5964     new_role = self.new_role
5965
5966     result = []
5967
5968     if self.op.ndparams:
5969       node.ndparams = self.new_ndparams
5970
5971     if self.op.powered is not None:
5972       node.powered = self.op.powered
5973
5974     if self.op.hv_state:
5975       node.hv_state_static = self.new_hv_state
5976
5977     if self.op.disk_state:
5978       node.disk_state_static = self.new_disk_state
5979
5980     for attr in ["master_capable", "vm_capable"]:
5981       val = getattr(self.op, attr)
5982       if val is not None:
5983         setattr(node, attr, val)
5984         result.append((attr, str(val)))
5985
5986     if new_role != old_role:
5987       # Tell the node to demote itself, if no longer MC and not offline
5988       if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
5989         msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
5990         if msg:
5991           self.LogWarning("Node failed to demote itself: %s", msg)
5992
5993       new_flags = self._R2F[new_role]
5994       for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
5995         if of != nf:
5996           result.append((desc, str(nf)))
5997       (node.master_candidate, node.drained, node.offline) = new_flags
5998
5999       # we locked all nodes, we adjust the CP before updating this node
6000       if self.lock_all:
6001         _AdjustCandidatePool(self, [node.name])
6002
6003     if self.op.secondary_ip:
6004       node.secondary_ip = self.op.secondary_ip
6005       result.append(("secondary_ip", self.op.secondary_ip))
6006
6007     # this will trigger configuration file update, if needed
6008     self.cfg.Update(node, feedback_fn)
6009
6010     # this will trigger job queue propagation or cleanup if the mc
6011     # flag changed
6012     if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
6013       self.context.ReaddNode(node)
6014
6015     return result
6016
6017
6018 class LUNodePowercycle(NoHooksLU):
6019   """Powercycles a node.
6020
6021   """
6022   REQ_BGL = False
6023
6024   def CheckArguments(self):
6025     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6026     if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
6027       raise errors.OpPrereqError("The node is the master and the force"
6028                                  " parameter was not set",
6029                                  errors.ECODE_INVAL)
6030
6031   def ExpandNames(self):
6032     """Locking for PowercycleNode.
6033
6034     This is a last-resort option and shouldn't block on other
6035     jobs. Therefore, we grab no locks.
6036
6037     """
6038     self.needed_locks = {}
6039
6040   def Exec(self, feedback_fn):
6041     """Reboots a node.
6042
6043     """
6044     result = self.rpc.call_node_powercycle(self.op.node_name,
6045                                            self.cfg.GetHypervisorType())
6046     result.Raise("Failed to schedule the reboot")
6047     return result.payload
6048
6049
6050 class LUClusterQuery(NoHooksLU):
6051   """Query cluster configuration.
6052
6053   """
6054   REQ_BGL = False
6055
6056   def ExpandNames(self):
6057     self.needed_locks = {}
6058
6059   def Exec(self, feedback_fn):
6060     """Return cluster config.
6061
6062     """
6063     cluster = self.cfg.GetClusterInfo()
6064     os_hvp = {}
6065
6066     # Filter just for enabled hypervisors
6067     for os_name, hv_dict in cluster.os_hvp.items():
6068       os_hvp[os_name] = {}
6069       for hv_name, hv_params in hv_dict.items():
6070         if hv_name in cluster.enabled_hypervisors:
6071           os_hvp[os_name][hv_name] = hv_params
6072
6073     # Convert ip_family to ip_version
6074     primary_ip_version = constants.IP4_VERSION
6075     if cluster.primary_ip_family == netutils.IP6Address.family:
6076       primary_ip_version = constants.IP6_VERSION
6077
6078     result = {
6079       "software_version": constants.RELEASE_VERSION,
6080       "protocol_version": constants.PROTOCOL_VERSION,
6081       "config_version": constants.CONFIG_VERSION,
6082       "os_api_version": max(constants.OS_API_VERSIONS),
6083       "export_version": constants.EXPORT_VERSION,
6084       "architecture": (platform.architecture()[0], platform.machine()),
6085       "name": cluster.cluster_name,
6086       "master": cluster.master_node,
6087       "default_hypervisor": cluster.primary_hypervisor,
6088       "enabled_hypervisors": cluster.enabled_hypervisors,
6089       "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
6090                         for hypervisor_name in cluster.enabled_hypervisors]),
6091       "os_hvp": os_hvp,
6092       "beparams": cluster.beparams,
6093       "osparams": cluster.osparams,
6094       "ipolicy": cluster.ipolicy,
6095       "nicparams": cluster.nicparams,
6096       "ndparams": cluster.ndparams,
6097       "candidate_pool_size": cluster.candidate_pool_size,
6098       "master_netdev": cluster.master_netdev,
6099       "master_netmask": cluster.master_netmask,
6100       "use_external_mip_script": cluster.use_external_mip_script,
6101       "volume_group_name": cluster.volume_group_name,
6102       "drbd_usermode_helper": cluster.drbd_usermode_helper,
6103       "file_storage_dir": cluster.file_storage_dir,
6104       "shared_file_storage_dir": cluster.shared_file_storage_dir,
6105       "maintain_node_health": cluster.maintain_node_health,
6106       "ctime": cluster.ctime,
6107       "mtime": cluster.mtime,
6108       "uuid": cluster.uuid,
6109       "tags": list(cluster.GetTags()),
6110       "uid_pool": cluster.uid_pool,
6111       "default_iallocator": cluster.default_iallocator,
6112       "reserved_lvs": cluster.reserved_lvs,
6113       "primary_ip_version": primary_ip_version,
6114       "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
6115       "hidden_os": cluster.hidden_os,
6116       "blacklisted_os": cluster.blacklisted_os,
6117       }
6118
6119     return result
6120
6121
6122 class LUClusterConfigQuery(NoHooksLU):
6123   """Return configuration values.
6124
6125   """
6126   REQ_BGL = False
6127   _FIELDS_DYNAMIC = utils.FieldSet()
6128   _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
6129                                   "watcher_pause", "volume_group_name")
6130
6131   def CheckArguments(self):
6132     _CheckOutputFields(static=self._FIELDS_STATIC,
6133                        dynamic=self._FIELDS_DYNAMIC,
6134                        selected=self.op.output_fields)
6135
6136   def ExpandNames(self):
6137     self.needed_locks = {}
6138
6139   def Exec(self, feedback_fn):
6140     """Dump a representation of the cluster config to the standard output.
6141
6142     """
6143     values = []
6144     for field in self.op.output_fields:
6145       if field == "cluster_name":
6146         entry = self.cfg.GetClusterName()
6147       elif field == "master_node":
6148         entry = self.cfg.GetMasterNode()
6149       elif field == "drain_flag":
6150         entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
6151       elif field == "watcher_pause":
6152         entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
6153       elif field == "volume_group_name":
6154         entry = self.cfg.GetVGName()
6155       else:
6156         raise errors.ParameterError(field)
6157       values.append(entry)
6158     return values
6159
6160
6161 class LUInstanceActivateDisks(NoHooksLU):
6162   """Bring up an instance's disks.
6163
6164   """
6165   REQ_BGL = False
6166
6167   def ExpandNames(self):
6168     self._ExpandAndLockInstance()
6169     self.needed_locks[locking.LEVEL_NODE] = []
6170     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6171
6172   def DeclareLocks(self, level):
6173     if level == locking.LEVEL_NODE:
6174       self._LockInstancesNodes()
6175
6176   def CheckPrereq(self):
6177     """Check prerequisites.
6178
6179     This checks that the instance is in the cluster.
6180
6181     """
6182     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6183     assert self.instance is not None, \
6184       "Cannot retrieve locked instance %s" % self.op.instance_name
6185     _CheckNodeOnline(self, self.instance.primary_node)
6186
6187   def Exec(self, feedback_fn):
6188     """Activate the disks.
6189
6190     """
6191     disks_ok, disks_info = \
6192               _AssembleInstanceDisks(self, self.instance,
6193                                      ignore_size=self.op.ignore_size)
6194     if not disks_ok:
6195       raise errors.OpExecError("Cannot activate block devices")
6196
6197     return disks_info
6198
6199
6200 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
6201                            ignore_size=False):
6202   """Prepare the block devices for an instance.
6203
6204   This sets up the block devices on all nodes.
6205
6206   @type lu: L{LogicalUnit}
6207   @param lu: the logical unit on whose behalf we execute
6208   @type instance: L{objects.Instance}
6209   @param instance: the instance for whose disks we assemble
6210   @type disks: list of L{objects.Disk} or None
6211   @param disks: which disks to assemble (or all, if None)
6212   @type ignore_secondaries: boolean
6213   @param ignore_secondaries: if true, errors on secondary nodes
6214       won't result in an error return from the function
6215   @type ignore_size: boolean
6216   @param ignore_size: if true, the current known size of the disk
6217       will not be used during the disk activation, useful for cases
6218       when the size is wrong
6219   @return: False if the operation failed, otherwise a list of
6220       (host, instance_visible_name, node_visible_name)
6221       with the mapping from node devices to instance devices
6222
6223   """
6224   device_info = []
6225   disks_ok = True
6226   iname = instance.name
6227   disks = _ExpandCheckDisks(instance, disks)
6228
6229   # With the two passes mechanism we try to reduce the window of
6230   # opportunity for the race condition of switching DRBD to primary
6231   # before handshaking occured, but we do not eliminate it
6232
6233   # The proper fix would be to wait (with some limits) until the
6234   # connection has been made and drbd transitions from WFConnection
6235   # into any other network-connected state (Connected, SyncTarget,
6236   # SyncSource, etc.)
6237
6238   # 1st pass, assemble on all nodes in secondary mode
6239   for idx, inst_disk in enumerate(disks):
6240     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6241       if ignore_size:
6242         node_disk = node_disk.Copy()
6243         node_disk.UnsetSize()
6244       lu.cfg.SetDiskID(node_disk, node)
6245       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
6246       msg = result.fail_msg
6247       if msg:
6248         lu.proc.LogWarning("Could not prepare block device %s on node %s"
6249                            " (is_primary=False, pass=1): %s",
6250                            inst_disk.iv_name, node, msg)
6251         if not ignore_secondaries:
6252           disks_ok = False
6253
6254   # FIXME: race condition on drbd migration to primary
6255
6256   # 2nd pass, do only the primary node
6257   for idx, inst_disk in enumerate(disks):
6258     dev_path = None
6259
6260     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6261       if node != instance.primary_node:
6262         continue
6263       if ignore_size:
6264         node_disk = node_disk.Copy()
6265         node_disk.UnsetSize()
6266       lu.cfg.SetDiskID(node_disk, node)
6267       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
6268       msg = result.fail_msg
6269       if msg:
6270         lu.proc.LogWarning("Could not prepare block device %s on node %s"
6271                            " (is_primary=True, pass=2): %s",
6272                            inst_disk.iv_name, node, msg)
6273         disks_ok = False
6274       else:
6275         dev_path = result.payload
6276
6277     device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
6278
6279   # leave the disks configured for the primary node
6280   # this is a workaround that would be fixed better by
6281   # improving the logical/physical id handling
6282   for disk in disks:
6283     lu.cfg.SetDiskID(disk, instance.primary_node)
6284
6285   return disks_ok, device_info
6286
6287
6288 def _StartInstanceDisks(lu, instance, force):
6289   """Start the disks of an instance.
6290
6291   """
6292   disks_ok, _ = _AssembleInstanceDisks(lu, instance,
6293                                            ignore_secondaries=force)
6294   if not disks_ok:
6295     _ShutdownInstanceDisks(lu, instance)
6296     if force is not None and not force:
6297       lu.proc.LogWarning("", hint="If the message above refers to a"
6298                          " secondary node,"
6299                          " you can retry the operation using '--force'.")
6300     raise errors.OpExecError("Disk consistency error")
6301
6302
6303 class LUInstanceDeactivateDisks(NoHooksLU):
6304   """Shutdown an instance's disks.
6305
6306   """
6307   REQ_BGL = False
6308
6309   def ExpandNames(self):
6310     self._ExpandAndLockInstance()
6311     self.needed_locks[locking.LEVEL_NODE] = []
6312     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6313
6314   def DeclareLocks(self, level):
6315     if level == locking.LEVEL_NODE:
6316       self._LockInstancesNodes()
6317
6318   def CheckPrereq(self):
6319     """Check prerequisites.
6320
6321     This checks that the instance is in the cluster.
6322
6323     """
6324     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6325     assert self.instance is not None, \
6326       "Cannot retrieve locked instance %s" % self.op.instance_name
6327
6328   def Exec(self, feedback_fn):
6329     """Deactivate the disks
6330
6331     """
6332     instance = self.instance
6333     if self.op.force:
6334       _ShutdownInstanceDisks(self, instance)
6335     else:
6336       _SafeShutdownInstanceDisks(self, instance)
6337
6338
6339 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
6340   """Shutdown block devices of an instance.
6341
6342   This function checks if an instance is running, before calling
6343   _ShutdownInstanceDisks.
6344
6345   """
6346   _CheckInstanceState(lu, instance, INSTANCE_DOWN, msg="cannot shutdown disks")
6347   _ShutdownInstanceDisks(lu, instance, disks=disks)
6348
6349
6350 def _ExpandCheckDisks(instance, disks):
6351   """Return the instance disks selected by the disks list
6352
6353   @type disks: list of L{objects.Disk} or None
6354   @param disks: selected disks
6355   @rtype: list of L{objects.Disk}
6356   @return: selected instance disks to act on
6357
6358   """
6359   if disks is None:
6360     return instance.disks
6361   else:
6362     if not set(disks).issubset(instance.disks):
6363       raise errors.ProgrammerError("Can only act on disks belonging to the"
6364                                    " target instance")
6365     return disks
6366
6367
6368 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
6369   """Shutdown block devices of an instance.
6370
6371   This does the shutdown on all nodes of the instance.
6372
6373   If the ignore_primary is false, errors on the primary node are
6374   ignored.
6375
6376   """
6377   all_result = True
6378   disks = _ExpandCheckDisks(instance, disks)
6379
6380   for disk in disks:
6381     for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
6382       lu.cfg.SetDiskID(top_disk, node)
6383       result = lu.rpc.call_blockdev_shutdown(node, top_disk)
6384       msg = result.fail_msg
6385       if msg:
6386         lu.LogWarning("Could not shutdown block device %s on node %s: %s",
6387                       disk.iv_name, node, msg)
6388         if ((node == instance.primary_node and not ignore_primary) or
6389             (node != instance.primary_node and not result.offline)):
6390           all_result = False
6391   return all_result
6392
6393
6394 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
6395   """Checks if a node has enough free memory.
6396
6397   This function check if a given node has the needed amount of free
6398   memory. In case the node has less memory or we cannot get the
6399   information from the node, this function raise an OpPrereqError
6400   exception.
6401
6402   @type lu: C{LogicalUnit}
6403   @param lu: a logical unit from which we get configuration data
6404   @type node: C{str}
6405   @param node: the node to check
6406   @type reason: C{str}
6407   @param reason: string to use in the error message
6408   @type requested: C{int}
6409   @param requested: the amount of memory in MiB to check for
6410   @type hypervisor_name: C{str}
6411   @param hypervisor_name: the hypervisor to ask for memory stats
6412   @rtype: integer
6413   @return: node current free memory
6414   @raise errors.OpPrereqError: if the node doesn't have enough memory, or
6415       we cannot check the node
6416
6417   """
6418   nodeinfo = lu.rpc.call_node_info([node], None, [hypervisor_name])
6419   nodeinfo[node].Raise("Can't get data from node %s" % node,
6420                        prereq=True, ecode=errors.ECODE_ENVIRON)
6421   (_, _, (hv_info, )) = nodeinfo[node].payload
6422
6423   free_mem = hv_info.get("memory_free", None)
6424   if not isinstance(free_mem, int):
6425     raise errors.OpPrereqError("Can't compute free memory on node %s, result"
6426                                " was '%s'" % (node, free_mem),
6427                                errors.ECODE_ENVIRON)
6428   if requested > free_mem:
6429     raise errors.OpPrereqError("Not enough memory on node %s for %s:"
6430                                " needed %s MiB, available %s MiB" %
6431                                (node, reason, requested, free_mem),
6432                                errors.ECODE_NORES)
6433   return free_mem
6434
6435
6436 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
6437   """Checks if nodes have enough free disk space in the all VGs.
6438
6439   This function check if all given nodes have the needed amount of
6440   free disk. In case any node has less disk or we cannot get the
6441   information from the node, this function raise an OpPrereqError
6442   exception.
6443
6444   @type lu: C{LogicalUnit}
6445   @param lu: a logical unit from which we get configuration data
6446   @type nodenames: C{list}
6447   @param nodenames: the list of node names to check
6448   @type req_sizes: C{dict}
6449   @param req_sizes: the hash of vg and corresponding amount of disk in
6450       MiB to check for
6451   @raise errors.OpPrereqError: if the node doesn't have enough disk,
6452       or we cannot check the node
6453
6454   """
6455   for vg, req_size in req_sizes.items():
6456     _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
6457
6458
6459 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
6460   """Checks if nodes have enough free disk space in the specified VG.
6461
6462   This function check if all given nodes have the needed amount of
6463   free disk. In case any node has less disk or we cannot get the
6464   information from the node, this function raise an OpPrereqError
6465   exception.
6466
6467   @type lu: C{LogicalUnit}
6468   @param lu: a logical unit from which we get configuration data
6469   @type nodenames: C{list}
6470   @param nodenames: the list of node names to check
6471   @type vg: C{str}
6472   @param vg: the volume group to check
6473   @type requested: C{int}
6474   @param requested: the amount of disk in MiB to check for
6475   @raise errors.OpPrereqError: if the node doesn't have enough disk,
6476       or we cannot check the node
6477
6478   """
6479   nodeinfo = lu.rpc.call_node_info(nodenames, [vg], None)
6480   for node in nodenames:
6481     info = nodeinfo[node]
6482     info.Raise("Cannot get current information from node %s" % node,
6483                prereq=True, ecode=errors.ECODE_ENVIRON)
6484     (_, (vg_info, ), _) = info.payload
6485     vg_free = vg_info.get("vg_free", None)
6486     if not isinstance(vg_free, int):
6487       raise errors.OpPrereqError("Can't compute free disk space on node"
6488                                  " %s for vg %s, result was '%s'" %
6489                                  (node, vg, vg_free), errors.ECODE_ENVIRON)
6490     if requested > vg_free:
6491       raise errors.OpPrereqError("Not enough disk space on target node %s"
6492                                  " vg %s: required %d MiB, available %d MiB" %
6493                                  (node, vg, requested, vg_free),
6494                                  errors.ECODE_NORES)
6495
6496
6497 def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
6498   """Checks if nodes have enough physical CPUs
6499
6500   This function checks if all given nodes have the needed number of
6501   physical CPUs. In case any node has less CPUs or we cannot get the
6502   information from the node, this function raises an OpPrereqError
6503   exception.
6504
6505   @type lu: C{LogicalUnit}
6506   @param lu: a logical unit from which we get configuration data
6507   @type nodenames: C{list}
6508   @param nodenames: the list of node names to check
6509   @type requested: C{int}
6510   @param requested: the minimum acceptable number of physical CPUs
6511   @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
6512       or we cannot check the node
6513
6514   """
6515   nodeinfo = lu.rpc.call_node_info(nodenames, None, [hypervisor_name])
6516   for node in nodenames:
6517     info = nodeinfo[node]
6518     info.Raise("Cannot get current information from node %s" % node,
6519                prereq=True, ecode=errors.ECODE_ENVIRON)
6520     (_, _, (hv_info, )) = info.payload
6521     num_cpus = hv_info.get("cpu_total", None)
6522     if not isinstance(num_cpus, int):
6523       raise errors.OpPrereqError("Can't compute the number of physical CPUs"
6524                                  " on node %s, result was '%s'" %
6525                                  (node, num_cpus), errors.ECODE_ENVIRON)
6526     if requested > num_cpus:
6527       raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
6528                                  "required" % (node, num_cpus, requested),
6529                                  errors.ECODE_NORES)
6530
6531
6532 class LUInstanceStartup(LogicalUnit):
6533   """Starts an instance.
6534
6535   """
6536   HPATH = "instance-start"
6537   HTYPE = constants.HTYPE_INSTANCE
6538   REQ_BGL = False
6539
6540   def CheckArguments(self):
6541     # extra beparams
6542     if self.op.beparams:
6543       # fill the beparams dict
6544       objects.UpgradeBeParams(self.op.beparams)
6545       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6546
6547   def ExpandNames(self):
6548     self._ExpandAndLockInstance()
6549     self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
6550
6551   def DeclareLocks(self, level):
6552     if level == locking.LEVEL_NODE_RES:
6553       self._LockInstancesNodes(primary_only=True, level=locking.LEVEL_NODE_RES)
6554
6555   def BuildHooksEnv(self):
6556     """Build hooks env.
6557
6558     This runs on master, primary and secondary nodes of the instance.
6559
6560     """
6561     env = {
6562       "FORCE": self.op.force,
6563       }
6564
6565     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6566
6567     return env
6568
6569   def BuildHooksNodes(self):
6570     """Build hooks nodes.
6571
6572     """
6573     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6574     return (nl, nl)
6575
6576   def CheckPrereq(self):
6577     """Check prerequisites.
6578
6579     This checks that the instance is in the cluster.
6580
6581     """
6582     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6583     assert self.instance is not None, \
6584       "Cannot retrieve locked instance %s" % self.op.instance_name
6585
6586     # extra hvparams
6587     if self.op.hvparams:
6588       # check hypervisor parameter syntax (locally)
6589       cluster = self.cfg.GetClusterInfo()
6590       utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6591       filled_hvp = cluster.FillHV(instance)
6592       filled_hvp.update(self.op.hvparams)
6593       hv_type = hypervisor.GetHypervisor(instance.hypervisor)
6594       hv_type.CheckParameterSyntax(filled_hvp)
6595       _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
6596
6597     _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6598
6599     self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
6600
6601     if self.primary_offline and self.op.ignore_offline_nodes:
6602       self.proc.LogWarning("Ignoring offline primary node")
6603
6604       if self.op.hvparams or self.op.beparams:
6605         self.proc.LogWarning("Overridden parameters are ignored")
6606     else:
6607       _CheckNodeOnline(self, instance.primary_node)
6608
6609       bep = self.cfg.GetClusterInfo().FillBE(instance)
6610       bep.update(self.op.beparams)
6611
6612       # check bridges existence
6613       _CheckInstanceBridgesExist(self, instance)
6614
6615       remote_info = self.rpc.call_instance_info(instance.primary_node,
6616                                                 instance.name,
6617                                                 instance.hypervisor)
6618       remote_info.Raise("Error checking node %s" % instance.primary_node,
6619                         prereq=True, ecode=errors.ECODE_ENVIRON)
6620       if not remote_info.payload: # not running already
6621         _CheckNodeFreeMemory(self, instance.primary_node,
6622                              "starting instance %s" % instance.name,
6623                              bep[constants.BE_MINMEM], instance.hypervisor)
6624
6625   def Exec(self, feedback_fn):
6626     """Start the instance.
6627
6628     """
6629     instance = self.instance
6630     force = self.op.force
6631
6632     if not self.op.no_remember:
6633       self.cfg.MarkInstanceUp(instance.name)
6634
6635     if self.primary_offline:
6636       assert self.op.ignore_offline_nodes
6637       self.proc.LogInfo("Primary node offline, marked instance as started")
6638     else:
6639       node_current = instance.primary_node
6640
6641       _StartInstanceDisks(self, instance, force)
6642
6643       result = \
6644         self.rpc.call_instance_start(node_current,
6645                                      (instance, self.op.hvparams,
6646                                       self.op.beparams),
6647                                      self.op.startup_paused)
6648       msg = result.fail_msg
6649       if msg:
6650         _ShutdownInstanceDisks(self, instance)
6651         raise errors.OpExecError("Could not start instance: %s" % msg)
6652
6653
6654 class LUInstanceReboot(LogicalUnit):
6655   """Reboot an instance.
6656
6657   """
6658   HPATH = "instance-reboot"
6659   HTYPE = constants.HTYPE_INSTANCE
6660   REQ_BGL = False
6661
6662   def ExpandNames(self):
6663     self._ExpandAndLockInstance()
6664
6665   def BuildHooksEnv(self):
6666     """Build hooks env.
6667
6668     This runs on master, primary and secondary nodes of the instance.
6669
6670     """
6671     env = {
6672       "IGNORE_SECONDARIES": self.op.ignore_secondaries,
6673       "REBOOT_TYPE": self.op.reboot_type,
6674       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6675       }
6676
6677     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6678
6679     return env
6680
6681   def BuildHooksNodes(self):
6682     """Build hooks nodes.
6683
6684     """
6685     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6686     return (nl, nl)
6687
6688   def CheckPrereq(self):
6689     """Check prerequisites.
6690
6691     This checks that the instance is in the cluster.
6692
6693     """
6694     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6695     assert self.instance is not None, \
6696       "Cannot retrieve locked instance %s" % self.op.instance_name
6697     _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6698     _CheckNodeOnline(self, instance.primary_node)
6699
6700     # check bridges existence
6701     _CheckInstanceBridgesExist(self, instance)
6702
6703   def Exec(self, feedback_fn):
6704     """Reboot the instance.
6705
6706     """
6707     instance = self.instance
6708     ignore_secondaries = self.op.ignore_secondaries
6709     reboot_type = self.op.reboot_type
6710
6711     remote_info = self.rpc.call_instance_info(instance.primary_node,
6712                                               instance.name,
6713                                               instance.hypervisor)
6714     remote_info.Raise("Error checking node %s" % instance.primary_node)
6715     instance_running = bool(remote_info.payload)
6716
6717     node_current = instance.primary_node
6718
6719     if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
6720                                             constants.INSTANCE_REBOOT_HARD]:
6721       for disk in instance.disks:
6722         self.cfg.SetDiskID(disk, node_current)
6723       result = self.rpc.call_instance_reboot(node_current, instance,
6724                                              reboot_type,
6725                                              self.op.shutdown_timeout)
6726       result.Raise("Could not reboot instance")
6727     else:
6728       if instance_running:
6729         result = self.rpc.call_instance_shutdown(node_current, instance,
6730                                                  self.op.shutdown_timeout)
6731         result.Raise("Could not shutdown instance for full reboot")
6732         _ShutdownInstanceDisks(self, instance)
6733       else:
6734         self.LogInfo("Instance %s was already stopped, starting now",
6735                      instance.name)
6736       _StartInstanceDisks(self, instance, ignore_secondaries)
6737       result = self.rpc.call_instance_start(node_current,
6738                                             (instance, None, None), False)
6739       msg = result.fail_msg
6740       if msg:
6741         _ShutdownInstanceDisks(self, instance)
6742         raise errors.OpExecError("Could not start instance for"
6743                                  " full reboot: %s" % msg)
6744
6745     self.cfg.MarkInstanceUp(instance.name)
6746
6747
6748 class LUInstanceShutdown(LogicalUnit):
6749   """Shutdown an instance.
6750
6751   """
6752   HPATH = "instance-stop"
6753   HTYPE = constants.HTYPE_INSTANCE
6754   REQ_BGL = False
6755
6756   def ExpandNames(self):
6757     self._ExpandAndLockInstance()
6758
6759   def BuildHooksEnv(self):
6760     """Build hooks env.
6761
6762     This runs on master, primary and secondary nodes of the instance.
6763
6764     """
6765     env = _BuildInstanceHookEnvByObject(self, self.instance)
6766     env["TIMEOUT"] = self.op.timeout
6767     return env
6768
6769   def BuildHooksNodes(self):
6770     """Build hooks nodes.
6771
6772     """
6773     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6774     return (nl, nl)
6775
6776   def CheckPrereq(self):
6777     """Check prerequisites.
6778
6779     This checks that the instance is in the cluster.
6780
6781     """
6782     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6783     assert self.instance is not None, \
6784       "Cannot retrieve locked instance %s" % self.op.instance_name
6785
6786     _CheckInstanceState(self, self.instance, INSTANCE_ONLINE)
6787
6788     self.primary_offline = \
6789       self.cfg.GetNodeInfo(self.instance.primary_node).offline
6790
6791     if self.primary_offline and self.op.ignore_offline_nodes:
6792       self.proc.LogWarning("Ignoring offline primary node")
6793     else:
6794       _CheckNodeOnline(self, self.instance.primary_node)
6795
6796   def Exec(self, feedback_fn):
6797     """Shutdown the instance.
6798
6799     """
6800     instance = self.instance
6801     node_current = instance.primary_node
6802     timeout = self.op.timeout
6803
6804     if not self.op.no_remember:
6805       self.cfg.MarkInstanceDown(instance.name)
6806
6807     if self.primary_offline:
6808       assert self.op.ignore_offline_nodes
6809       self.proc.LogInfo("Primary node offline, marked instance as stopped")
6810     else:
6811       result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
6812       msg = result.fail_msg
6813       if msg:
6814         self.proc.LogWarning("Could not shutdown instance: %s" % msg)
6815
6816       _ShutdownInstanceDisks(self, instance)
6817
6818
6819 class LUInstanceReinstall(LogicalUnit):
6820   """Reinstall an instance.
6821
6822   """
6823   HPATH = "instance-reinstall"
6824   HTYPE = constants.HTYPE_INSTANCE
6825   REQ_BGL = False
6826
6827   def ExpandNames(self):
6828     self._ExpandAndLockInstance()
6829
6830   def BuildHooksEnv(self):
6831     """Build hooks env.
6832
6833     This runs on master, primary and secondary nodes of the instance.
6834
6835     """
6836     return _BuildInstanceHookEnvByObject(self, self.instance)
6837
6838   def BuildHooksNodes(self):
6839     """Build hooks nodes.
6840
6841     """
6842     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6843     return (nl, nl)
6844
6845   def CheckPrereq(self):
6846     """Check prerequisites.
6847
6848     This checks that the instance is in the cluster and is not running.
6849
6850     """
6851     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6852     assert instance is not None, \
6853       "Cannot retrieve locked instance %s" % self.op.instance_name
6854     _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
6855                      " offline, cannot reinstall")
6856     for node in instance.secondary_nodes:
6857       _CheckNodeOnline(self, node, "Instance secondary node offline,"
6858                        " cannot reinstall")
6859
6860     if instance.disk_template == constants.DT_DISKLESS:
6861       raise errors.OpPrereqError("Instance '%s' has no disks" %
6862                                  self.op.instance_name,
6863                                  errors.ECODE_INVAL)
6864     _CheckInstanceState(self, instance, INSTANCE_DOWN, msg="cannot reinstall")
6865
6866     if self.op.os_type is not None:
6867       # OS verification
6868       pnode = _ExpandNodeName(self.cfg, instance.primary_node)
6869       _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
6870       instance_os = self.op.os_type
6871     else:
6872       instance_os = instance.os
6873
6874     nodelist = list(instance.all_nodes)
6875
6876     if self.op.osparams:
6877       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
6878       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
6879       self.os_inst = i_osdict # the new dict (without defaults)
6880     else:
6881       self.os_inst = None
6882
6883     self.instance = instance
6884
6885   def Exec(self, feedback_fn):
6886     """Reinstall the instance.
6887
6888     """
6889     inst = self.instance
6890
6891     if self.op.os_type is not None:
6892       feedback_fn("Changing OS to '%s'..." % self.op.os_type)
6893       inst.os = self.op.os_type
6894       # Write to configuration
6895       self.cfg.Update(inst, feedback_fn)
6896
6897     _StartInstanceDisks(self, inst, None)
6898     try:
6899       feedback_fn("Running the instance OS create scripts...")
6900       # FIXME: pass debug option from opcode to backend
6901       result = self.rpc.call_instance_os_add(inst.primary_node,
6902                                              (inst, self.os_inst), True,
6903                                              self.op.debug_level)
6904       result.Raise("Could not install OS for instance %s on node %s" %
6905                    (inst.name, inst.primary_node))
6906     finally:
6907       _ShutdownInstanceDisks(self, inst)
6908
6909
6910 class LUInstanceRecreateDisks(LogicalUnit):
6911   """Recreate an instance's missing disks.
6912
6913   """
6914   HPATH = "instance-recreate-disks"
6915   HTYPE = constants.HTYPE_INSTANCE
6916   REQ_BGL = False
6917
6918   _MODIFYABLE = frozenset([
6919     constants.IDISK_SIZE,
6920     constants.IDISK_MODE,
6921     ])
6922
6923   # New or changed disk parameters may have different semantics
6924   assert constants.IDISK_PARAMS == (_MODIFYABLE | frozenset([
6925     constants.IDISK_ADOPT,
6926
6927     # TODO: Implement support changing VG while recreating
6928     constants.IDISK_VG,
6929     constants.IDISK_METAVG,
6930     ]))
6931
6932   def CheckArguments(self):
6933     if self.op.disks and ht.TPositiveInt(self.op.disks[0]):
6934       # Normalize and convert deprecated list of disk indices
6935       self.op.disks = [(idx, {}) for idx in sorted(frozenset(self.op.disks))]
6936
6937     duplicates = utils.FindDuplicates(map(compat.fst, self.op.disks))
6938     if duplicates:
6939       raise errors.OpPrereqError("Some disks have been specified more than"
6940                                  " once: %s" % utils.CommaJoin(duplicates),
6941                                  errors.ECODE_INVAL)
6942
6943     for (idx, params) in self.op.disks:
6944       utils.ForceDictType(params, constants.IDISK_PARAMS_TYPES)
6945       unsupported = frozenset(params.keys()) - self._MODIFYABLE
6946       if unsupported:
6947         raise errors.OpPrereqError("Parameters for disk %s try to change"
6948                                    " unmodifyable parameter(s): %s" %
6949                                    (idx, utils.CommaJoin(unsupported)),
6950                                    errors.ECODE_INVAL)
6951
6952   def ExpandNames(self):
6953     self._ExpandAndLockInstance()
6954     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6955     if self.op.nodes:
6956       self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
6957       self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
6958     else:
6959       self.needed_locks[locking.LEVEL_NODE] = []
6960     self.needed_locks[locking.LEVEL_NODE_RES] = []
6961
6962   def DeclareLocks(self, level):
6963     if level == locking.LEVEL_NODE:
6964       # if we replace the nodes, we only need to lock the old primary,
6965       # otherwise we need to lock all nodes for disk re-creation
6966       primary_only = bool(self.op.nodes)
6967       self._LockInstancesNodes(primary_only=primary_only)
6968     elif level == locking.LEVEL_NODE_RES:
6969       # Copy node locks
6970       self.needed_locks[locking.LEVEL_NODE_RES] = \
6971         self.needed_locks[locking.LEVEL_NODE][:]
6972
6973   def BuildHooksEnv(self):
6974     """Build hooks env.
6975
6976     This runs on master, primary and secondary nodes of the instance.
6977
6978     """
6979     return _BuildInstanceHookEnvByObject(self, self.instance)
6980
6981   def BuildHooksNodes(self):
6982     """Build hooks nodes.
6983
6984     """
6985     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6986     return (nl, nl)
6987
6988   def CheckPrereq(self):
6989     """Check prerequisites.
6990
6991     This checks that the instance is in the cluster and is not running.
6992
6993     """
6994     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6995     assert instance is not None, \
6996       "Cannot retrieve locked instance %s" % self.op.instance_name
6997     if self.op.nodes:
6998       if len(self.op.nodes) != len(instance.all_nodes):
6999         raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
7000                                    " %d replacement nodes were specified" %
7001                                    (instance.name, len(instance.all_nodes),
7002                                     len(self.op.nodes)),
7003                                    errors.ECODE_INVAL)
7004       assert instance.disk_template != constants.DT_DRBD8 or \
7005           len(self.op.nodes) == 2
7006       assert instance.disk_template != constants.DT_PLAIN or \
7007           len(self.op.nodes) == 1
7008       primary_node = self.op.nodes[0]
7009     else:
7010       primary_node = instance.primary_node
7011     _CheckNodeOnline(self, primary_node)
7012
7013     if instance.disk_template == constants.DT_DISKLESS:
7014       raise errors.OpPrereqError("Instance '%s' has no disks" %
7015                                  self.op.instance_name, errors.ECODE_INVAL)
7016
7017     # if we replace nodes *and* the old primary is offline, we don't
7018     # check
7019     assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE)
7020     assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE_RES)
7021     old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
7022     if not (self.op.nodes and old_pnode.offline):
7023       _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7024                           msg="cannot recreate disks")
7025
7026     if self.op.disks:
7027       self.disks = dict(self.op.disks)
7028     else:
7029       self.disks = dict((idx, {}) for idx in range(len(instance.disks)))
7030
7031     maxidx = max(self.disks.keys())
7032     if maxidx >= len(instance.disks):
7033       raise errors.OpPrereqError("Invalid disk index '%s'" % maxidx,
7034                                  errors.ECODE_INVAL)
7035
7036     if (self.op.nodes and
7037         sorted(self.disks.keys()) != range(len(instance.disks))):
7038       raise errors.OpPrereqError("Can't recreate disks partially and"
7039                                  " change the nodes at the same time",
7040                                  errors.ECODE_INVAL)
7041
7042     self.instance = instance
7043
7044   def Exec(self, feedback_fn):
7045     """Recreate the disks.
7046
7047     """
7048     instance = self.instance
7049
7050     assert (self.owned_locks(locking.LEVEL_NODE) ==
7051             self.owned_locks(locking.LEVEL_NODE_RES))
7052
7053     to_skip = []
7054     mods = [] # keeps track of needed changes
7055
7056     for idx, disk in enumerate(instance.disks):
7057       try:
7058         changes = self.disks[idx]
7059       except KeyError:
7060         # Disk should not be recreated
7061         to_skip.append(idx)
7062         continue
7063
7064       # update secondaries for disks, if needed
7065       if self.op.nodes and disk.dev_type == constants.LD_DRBD8:
7066         # need to update the nodes and minors
7067         assert len(self.op.nodes) == 2
7068         assert len(disk.logical_id) == 6 # otherwise disk internals
7069                                          # have changed
7070         (_, _, old_port, _, _, old_secret) = disk.logical_id
7071         new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
7072         new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
7073                   new_minors[0], new_minors[1], old_secret)
7074         assert len(disk.logical_id) == len(new_id)
7075       else:
7076         new_id = None
7077
7078       mods.append((idx, new_id, changes))
7079
7080     # now that we have passed all asserts above, we can apply the mods
7081     # in a single run (to avoid partial changes)
7082     for idx, new_id, changes in mods:
7083       disk = instance.disks[idx]
7084       if new_id is not None:
7085         assert disk.dev_type == constants.LD_DRBD8
7086         disk.logical_id = new_id
7087       if changes:
7088         disk.Update(size=changes.get(constants.IDISK_SIZE, None),
7089                     mode=changes.get(constants.IDISK_MODE, None))
7090
7091     # change primary node, if needed
7092     if self.op.nodes:
7093       instance.primary_node = self.op.nodes[0]
7094       self.LogWarning("Changing the instance's nodes, you will have to"
7095                       " remove any disks left on the older nodes manually")
7096
7097     if self.op.nodes:
7098       self.cfg.Update(instance, feedback_fn)
7099
7100     _CreateDisks(self, instance, to_skip=to_skip)
7101
7102
7103 class LUInstanceRename(LogicalUnit):
7104   """Rename an instance.
7105
7106   """
7107   HPATH = "instance-rename"
7108   HTYPE = constants.HTYPE_INSTANCE
7109
7110   def CheckArguments(self):
7111     """Check arguments.
7112
7113     """
7114     if self.op.ip_check and not self.op.name_check:
7115       # TODO: make the ip check more flexible and not depend on the name check
7116       raise errors.OpPrereqError("IP address check requires a name check",
7117                                  errors.ECODE_INVAL)
7118
7119   def BuildHooksEnv(self):
7120     """Build hooks env.
7121
7122     This runs on master, primary and secondary nodes of the instance.
7123
7124     """
7125     env = _BuildInstanceHookEnvByObject(self, self.instance)
7126     env["INSTANCE_NEW_NAME"] = self.op.new_name
7127     return env
7128
7129   def BuildHooksNodes(self):
7130     """Build hooks nodes.
7131
7132     """
7133     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7134     return (nl, nl)
7135
7136   def CheckPrereq(self):
7137     """Check prerequisites.
7138
7139     This checks that the instance is in the cluster and is not running.
7140
7141     """
7142     self.op.instance_name = _ExpandInstanceName(self.cfg,
7143                                                 self.op.instance_name)
7144     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7145     assert instance is not None
7146     _CheckNodeOnline(self, instance.primary_node)
7147     _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7148                         msg="cannot rename")
7149     self.instance = instance
7150
7151     new_name = self.op.new_name
7152     if self.op.name_check:
7153       hostname = netutils.GetHostname(name=new_name)
7154       if hostname.name != new_name:
7155         self.LogInfo("Resolved given name '%s' to '%s'", new_name,
7156                      hostname.name)
7157       if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
7158         raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
7159                                     " same as given hostname '%s'") %
7160                                     (hostname.name, self.op.new_name),
7161                                     errors.ECODE_INVAL)
7162       new_name = self.op.new_name = hostname.name
7163       if (self.op.ip_check and
7164           netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
7165         raise errors.OpPrereqError("IP %s of instance %s already in use" %
7166                                    (hostname.ip, new_name),
7167                                    errors.ECODE_NOTUNIQUE)
7168
7169     instance_list = self.cfg.GetInstanceList()
7170     if new_name in instance_list and new_name != instance.name:
7171       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7172                                  new_name, errors.ECODE_EXISTS)
7173
7174   def Exec(self, feedback_fn):
7175     """Rename the instance.
7176
7177     """
7178     inst = self.instance
7179     old_name = inst.name
7180
7181     rename_file_storage = False
7182     if (inst.disk_template in constants.DTS_FILEBASED and
7183         self.op.new_name != inst.name):
7184       old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7185       rename_file_storage = True
7186
7187     self.cfg.RenameInstance(inst.name, self.op.new_name)
7188     # Change the instance lock. This is definitely safe while we hold the BGL.
7189     # Otherwise the new lock would have to be added in acquired mode.
7190     assert self.REQ_BGL
7191     self.glm.remove(locking.LEVEL_INSTANCE, old_name)
7192     self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
7193
7194     # re-read the instance from the configuration after rename
7195     inst = self.cfg.GetInstanceInfo(self.op.new_name)
7196
7197     if rename_file_storage:
7198       new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7199       result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
7200                                                      old_file_storage_dir,
7201                                                      new_file_storage_dir)
7202       result.Raise("Could not rename on node %s directory '%s' to '%s'"
7203                    " (but the instance has been renamed in Ganeti)" %
7204                    (inst.primary_node, old_file_storage_dir,
7205                     new_file_storage_dir))
7206
7207     _StartInstanceDisks(self, inst, None)
7208     try:
7209       result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
7210                                                  old_name, self.op.debug_level)
7211       msg = result.fail_msg
7212       if msg:
7213         msg = ("Could not run OS rename script for instance %s on node %s"
7214                " (but the instance has been renamed in Ganeti): %s" %
7215                (inst.name, inst.primary_node, msg))
7216         self.proc.LogWarning(msg)
7217     finally:
7218       _ShutdownInstanceDisks(self, inst)
7219
7220     return inst.name
7221
7222
7223 class LUInstanceRemove(LogicalUnit):
7224   """Remove an instance.
7225
7226   """
7227   HPATH = "instance-remove"
7228   HTYPE = constants.HTYPE_INSTANCE
7229   REQ_BGL = False
7230
7231   def ExpandNames(self):
7232     self._ExpandAndLockInstance()
7233     self.needed_locks[locking.LEVEL_NODE] = []
7234     self.needed_locks[locking.LEVEL_NODE_RES] = []
7235     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7236
7237   def DeclareLocks(self, level):
7238     if level == locking.LEVEL_NODE:
7239       self._LockInstancesNodes()
7240     elif level == locking.LEVEL_NODE_RES:
7241       # Copy node locks
7242       self.needed_locks[locking.LEVEL_NODE_RES] = \
7243         self.needed_locks[locking.LEVEL_NODE][:]
7244
7245   def BuildHooksEnv(self):
7246     """Build hooks env.
7247
7248     This runs on master, primary and secondary nodes of the instance.
7249
7250     """
7251     env = _BuildInstanceHookEnvByObject(self, self.instance)
7252     env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
7253     return env
7254
7255   def BuildHooksNodes(self):
7256     """Build hooks nodes.
7257
7258     """
7259     nl = [self.cfg.GetMasterNode()]
7260     nl_post = list(self.instance.all_nodes) + nl
7261     return (nl, nl_post)
7262
7263   def CheckPrereq(self):
7264     """Check prerequisites.
7265
7266     This checks that the instance is in the cluster.
7267
7268     """
7269     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7270     assert self.instance is not None, \
7271       "Cannot retrieve locked instance %s" % self.op.instance_name
7272
7273   def Exec(self, feedback_fn):
7274     """Remove the instance.
7275
7276     """
7277     instance = self.instance
7278     logging.info("Shutting down instance %s on node %s",
7279                  instance.name, instance.primary_node)
7280
7281     result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
7282                                              self.op.shutdown_timeout)
7283     msg = result.fail_msg
7284     if msg:
7285       if self.op.ignore_failures:
7286         feedback_fn("Warning: can't shutdown instance: %s" % msg)
7287       else:
7288         raise errors.OpExecError("Could not shutdown instance %s on"
7289                                  " node %s: %s" %
7290                                  (instance.name, instance.primary_node, msg))
7291
7292     assert (self.owned_locks(locking.LEVEL_NODE) ==
7293             self.owned_locks(locking.LEVEL_NODE_RES))
7294     assert not (set(instance.all_nodes) -
7295                 self.owned_locks(locking.LEVEL_NODE)), \
7296       "Not owning correct locks"
7297
7298     _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
7299
7300
7301 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
7302   """Utility function to remove an instance.
7303
7304   """
7305   logging.info("Removing block devices for instance %s", instance.name)
7306
7307   if not _RemoveDisks(lu, instance):
7308     if not ignore_failures:
7309       raise errors.OpExecError("Can't remove instance's disks")
7310     feedback_fn("Warning: can't remove instance's disks")
7311
7312   logging.info("Removing instance %s out of cluster config", instance.name)
7313
7314   lu.cfg.RemoveInstance(instance.name)
7315
7316   assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
7317     "Instance lock removal conflict"
7318
7319   # Remove lock for the instance
7320   lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
7321
7322
7323 class LUInstanceQuery(NoHooksLU):
7324   """Logical unit for querying instances.
7325
7326   """
7327   # pylint: disable=W0142
7328   REQ_BGL = False
7329
7330   def CheckArguments(self):
7331     self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
7332                              self.op.output_fields, self.op.use_locking)
7333
7334   def ExpandNames(self):
7335     self.iq.ExpandNames(self)
7336
7337   def DeclareLocks(self, level):
7338     self.iq.DeclareLocks(self, level)
7339
7340   def Exec(self, feedback_fn):
7341     return self.iq.OldStyleQuery(self)
7342
7343
7344 class LUInstanceFailover(LogicalUnit):
7345   """Failover an instance.
7346
7347   """
7348   HPATH = "instance-failover"
7349   HTYPE = constants.HTYPE_INSTANCE
7350   REQ_BGL = False
7351
7352   def CheckArguments(self):
7353     """Check the arguments.
7354
7355     """
7356     self.iallocator = getattr(self.op, "iallocator", None)
7357     self.target_node = getattr(self.op, "target_node", None)
7358
7359   def ExpandNames(self):
7360     self._ExpandAndLockInstance()
7361
7362     if self.op.target_node is not None:
7363       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7364
7365     self.needed_locks[locking.LEVEL_NODE] = []
7366     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7367
7368     self.needed_locks[locking.LEVEL_NODE_RES] = []
7369     self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
7370
7371     ignore_consistency = self.op.ignore_consistency
7372     shutdown_timeout = self.op.shutdown_timeout
7373     self._migrater = TLMigrateInstance(self, self.op.instance_name,
7374                                        cleanup=False,
7375                                        failover=True,
7376                                        ignore_consistency=ignore_consistency,
7377                                        shutdown_timeout=shutdown_timeout,
7378                                        ignore_ipolicy=self.op.ignore_ipolicy)
7379     self.tasklets = [self._migrater]
7380
7381   def DeclareLocks(self, level):
7382     if level == locking.LEVEL_NODE:
7383       instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7384       if instance.disk_template in constants.DTS_EXT_MIRROR:
7385         if self.op.target_node is None:
7386           self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7387         else:
7388           self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7389                                                    self.op.target_node]
7390         del self.recalculate_locks[locking.LEVEL_NODE]
7391       else:
7392         self._LockInstancesNodes()
7393     elif level == locking.LEVEL_NODE_RES:
7394       # Copy node locks
7395       self.needed_locks[locking.LEVEL_NODE_RES] = \
7396         self.needed_locks[locking.LEVEL_NODE][:]
7397
7398   def BuildHooksEnv(self):
7399     """Build hooks env.
7400
7401     This runs on master, primary and secondary nodes of the instance.
7402
7403     """
7404     instance = self._migrater.instance
7405     source_node = instance.primary_node
7406     target_node = self.op.target_node
7407     env = {
7408       "IGNORE_CONSISTENCY": self.op.ignore_consistency,
7409       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7410       "OLD_PRIMARY": source_node,
7411       "NEW_PRIMARY": target_node,
7412       }
7413
7414     if instance.disk_template in constants.DTS_INT_MIRROR:
7415       env["OLD_SECONDARY"] = instance.secondary_nodes[0]
7416       env["NEW_SECONDARY"] = source_node
7417     else:
7418       env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
7419
7420     env.update(_BuildInstanceHookEnvByObject(self, instance))
7421
7422     return env
7423
7424   def BuildHooksNodes(self):
7425     """Build hooks nodes.
7426
7427     """
7428     instance = self._migrater.instance
7429     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7430     return (nl, nl + [instance.primary_node])
7431
7432
7433 class LUInstanceMigrate(LogicalUnit):
7434   """Migrate an instance.
7435
7436   This is migration without shutting down, compared to the failover,
7437   which is done with shutdown.
7438
7439   """
7440   HPATH = "instance-migrate"
7441   HTYPE = constants.HTYPE_INSTANCE
7442   REQ_BGL = False
7443
7444   def ExpandNames(self):
7445     self._ExpandAndLockInstance()
7446
7447     if self.op.target_node is not None:
7448       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7449
7450     self.needed_locks[locking.LEVEL_NODE] = []
7451     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7452
7453     self.needed_locks[locking.LEVEL_NODE] = []
7454     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7455
7456     self._migrater = \
7457       TLMigrateInstance(self, self.op.instance_name,
7458                         cleanup=self.op.cleanup,
7459                         failover=False,
7460                         fallback=self.op.allow_failover,
7461                         allow_runtime_changes=self.op.allow_runtime_changes,
7462                         ignore_ipolicy=self.op.ignore_ipolicy)
7463     self.tasklets = [self._migrater]
7464
7465   def DeclareLocks(self, level):
7466     if level == locking.LEVEL_NODE:
7467       instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7468       if instance.disk_template in constants.DTS_EXT_MIRROR:
7469         if self.op.target_node is None:
7470           self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7471         else:
7472           self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7473                                                    self.op.target_node]
7474         del self.recalculate_locks[locking.LEVEL_NODE]
7475       else:
7476         self._LockInstancesNodes()
7477     elif level == locking.LEVEL_NODE_RES:
7478       # Copy node locks
7479       self.needed_locks[locking.LEVEL_NODE_RES] = \
7480         self.needed_locks[locking.LEVEL_NODE][:]
7481
7482   def BuildHooksEnv(self):
7483     """Build hooks env.
7484
7485     This runs on master, primary and secondary nodes of the instance.
7486
7487     """
7488     instance = self._migrater.instance
7489     source_node = instance.primary_node
7490     target_node = self.op.target_node
7491     env = _BuildInstanceHookEnvByObject(self, instance)
7492     env.update({
7493       "MIGRATE_LIVE": self._migrater.live,
7494       "MIGRATE_CLEANUP": self.op.cleanup,
7495       "OLD_PRIMARY": source_node,
7496       "NEW_PRIMARY": target_node,
7497       "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
7498       })
7499
7500     if instance.disk_template in constants.DTS_INT_MIRROR:
7501       env["OLD_SECONDARY"] = target_node
7502       env["NEW_SECONDARY"] = source_node
7503     else:
7504       env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
7505
7506     return env
7507
7508   def BuildHooksNodes(self):
7509     """Build hooks nodes.
7510
7511     """
7512     instance = self._migrater.instance
7513     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7514     return (nl, nl + [instance.primary_node])
7515
7516
7517 class LUInstanceMove(LogicalUnit):
7518   """Move an instance by data-copying.
7519
7520   """
7521   HPATH = "instance-move"
7522   HTYPE = constants.HTYPE_INSTANCE
7523   REQ_BGL = False
7524
7525   def ExpandNames(self):
7526     self._ExpandAndLockInstance()
7527     target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7528     self.op.target_node = target_node
7529     self.needed_locks[locking.LEVEL_NODE] = [target_node]
7530     self.needed_locks[locking.LEVEL_NODE_RES] = []
7531     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7532
7533   def DeclareLocks(self, level):
7534     if level == locking.LEVEL_NODE:
7535       self._LockInstancesNodes(primary_only=True)
7536     elif level == locking.LEVEL_NODE_RES:
7537       # Copy node locks
7538       self.needed_locks[locking.LEVEL_NODE_RES] = \
7539         self.needed_locks[locking.LEVEL_NODE][:]
7540
7541   def BuildHooksEnv(self):
7542     """Build hooks env.
7543
7544     This runs on master, primary and secondary nodes of the instance.
7545
7546     """
7547     env = {
7548       "TARGET_NODE": self.op.target_node,
7549       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7550       }
7551     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7552     return env
7553
7554   def BuildHooksNodes(self):
7555     """Build hooks nodes.
7556
7557     """
7558     nl = [
7559       self.cfg.GetMasterNode(),
7560       self.instance.primary_node,
7561       self.op.target_node,
7562       ]
7563     return (nl, nl)
7564
7565   def CheckPrereq(self):
7566     """Check prerequisites.
7567
7568     This checks that the instance is in the cluster.
7569
7570     """
7571     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7572     assert self.instance is not None, \
7573       "Cannot retrieve locked instance %s" % self.op.instance_name
7574
7575     node = self.cfg.GetNodeInfo(self.op.target_node)
7576     assert node is not None, \
7577       "Cannot retrieve locked node %s" % self.op.target_node
7578
7579     self.target_node = target_node = node.name
7580
7581     if target_node == instance.primary_node:
7582       raise errors.OpPrereqError("Instance %s is already on the node %s" %
7583                                  (instance.name, target_node),
7584                                  errors.ECODE_STATE)
7585
7586     bep = self.cfg.GetClusterInfo().FillBE(instance)
7587
7588     for idx, dsk in enumerate(instance.disks):
7589       if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
7590         raise errors.OpPrereqError("Instance disk %d has a complex layout,"
7591                                    " cannot copy" % idx, errors.ECODE_STATE)
7592
7593     _CheckNodeOnline(self, target_node)
7594     _CheckNodeNotDrained(self, target_node)
7595     _CheckNodeVmCapable(self, target_node)
7596     ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(),
7597                                      self.cfg.GetNodeGroup(node.group))
7598     _CheckTargetNodeIPolicy(self, ipolicy, instance, node,
7599                             ignore=self.op.ignore_ipolicy)
7600
7601     if instance.admin_state == constants.ADMINST_UP:
7602       # check memory requirements on the secondary node
7603       _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
7604                            instance.name, bep[constants.BE_MAXMEM],
7605                            instance.hypervisor)
7606     else:
7607       self.LogInfo("Not checking memory on the secondary node as"
7608                    " instance will not be started")
7609
7610     # check bridge existance
7611     _CheckInstanceBridgesExist(self, instance, node=target_node)
7612
7613   def Exec(self, feedback_fn):
7614     """Move an instance.
7615
7616     The move is done by shutting it down on its present node, copying
7617     the data over (slow) and starting it on the new node.
7618
7619     """
7620     instance = self.instance
7621
7622     source_node = instance.primary_node
7623     target_node = self.target_node
7624
7625     self.LogInfo("Shutting down instance %s on source node %s",
7626                  instance.name, source_node)
7627
7628     assert (self.owned_locks(locking.LEVEL_NODE) ==
7629             self.owned_locks(locking.LEVEL_NODE_RES))
7630
7631     result = self.rpc.call_instance_shutdown(source_node, instance,
7632                                              self.op.shutdown_timeout)
7633     msg = result.fail_msg
7634     if msg:
7635       if self.op.ignore_consistency:
7636         self.proc.LogWarning("Could not shutdown instance %s on node %s."
7637                              " Proceeding anyway. Please make sure node"
7638                              " %s is down. Error details: %s",
7639                              instance.name, source_node, source_node, msg)
7640       else:
7641         raise errors.OpExecError("Could not shutdown instance %s on"
7642                                  " node %s: %s" %
7643                                  (instance.name, source_node, msg))
7644
7645     # create the target disks
7646     try:
7647       _CreateDisks(self, instance, target_node=target_node)
7648     except errors.OpExecError:
7649       self.LogWarning("Device creation failed, reverting...")
7650       try:
7651         _RemoveDisks(self, instance, target_node=target_node)
7652       finally:
7653         self.cfg.ReleaseDRBDMinors(instance.name)
7654         raise
7655
7656     cluster_name = self.cfg.GetClusterInfo().cluster_name
7657
7658     errs = []
7659     # activate, get path, copy the data over
7660     for idx, disk in enumerate(instance.disks):
7661       self.LogInfo("Copying data for disk %d", idx)
7662       result = self.rpc.call_blockdev_assemble(target_node, disk,
7663                                                instance.name, True, idx)
7664       if result.fail_msg:
7665         self.LogWarning("Can't assemble newly created disk %d: %s",
7666                         idx, result.fail_msg)
7667         errs.append(result.fail_msg)
7668         break
7669       dev_path = result.payload
7670       result = self.rpc.call_blockdev_export(source_node, disk,
7671                                              target_node, dev_path,
7672                                              cluster_name)
7673       if result.fail_msg:
7674         self.LogWarning("Can't copy data over for disk %d: %s",
7675                         idx, result.fail_msg)
7676         errs.append(result.fail_msg)
7677         break
7678
7679     if errs:
7680       self.LogWarning("Some disks failed to copy, aborting")
7681       try:
7682         _RemoveDisks(self, instance, target_node=target_node)
7683       finally:
7684         self.cfg.ReleaseDRBDMinors(instance.name)
7685         raise errors.OpExecError("Errors during disk copy: %s" %
7686                                  (",".join(errs),))
7687
7688     instance.primary_node = target_node
7689     self.cfg.Update(instance, feedback_fn)
7690
7691     self.LogInfo("Removing the disks on the original node")
7692     _RemoveDisks(self, instance, target_node=source_node)
7693
7694     # Only start the instance if it's marked as up
7695     if instance.admin_state == constants.ADMINST_UP:
7696       self.LogInfo("Starting instance %s on node %s",
7697                    instance.name, target_node)
7698
7699       disks_ok, _ = _AssembleInstanceDisks(self, instance,
7700                                            ignore_secondaries=True)
7701       if not disks_ok:
7702         _ShutdownInstanceDisks(self, instance)
7703         raise errors.OpExecError("Can't activate the instance's disks")
7704
7705       result = self.rpc.call_instance_start(target_node,
7706                                             (instance, None, None), False)
7707       msg = result.fail_msg
7708       if msg:
7709         _ShutdownInstanceDisks(self, instance)
7710         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7711                                  (instance.name, target_node, msg))
7712
7713
7714 class LUNodeMigrate(LogicalUnit):
7715   """Migrate all instances from a node.
7716
7717   """
7718   HPATH = "node-migrate"
7719   HTYPE = constants.HTYPE_NODE
7720   REQ_BGL = False
7721
7722   def CheckArguments(self):
7723     pass
7724
7725   def ExpandNames(self):
7726     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7727
7728     self.share_locks = _ShareAll()
7729     self.needed_locks = {
7730       locking.LEVEL_NODE: [self.op.node_name],
7731       }
7732
7733   def BuildHooksEnv(self):
7734     """Build hooks env.
7735
7736     This runs on the master, the primary and all the secondaries.
7737
7738     """
7739     return {
7740       "NODE_NAME": self.op.node_name,
7741       "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
7742       }
7743
7744   def BuildHooksNodes(self):
7745     """Build hooks nodes.
7746
7747     """
7748     nl = [self.cfg.GetMasterNode()]
7749     return (nl, nl)
7750
7751   def CheckPrereq(self):
7752     pass
7753
7754   def Exec(self, feedback_fn):
7755     # Prepare jobs for migration instances
7756     allow_runtime_changes = self.op.allow_runtime_changes
7757     jobs = [
7758       [opcodes.OpInstanceMigrate(instance_name=inst.name,
7759                                  mode=self.op.mode,
7760                                  live=self.op.live,
7761                                  iallocator=self.op.iallocator,
7762                                  target_node=self.op.target_node,
7763                                  allow_runtime_changes=allow_runtime_changes,
7764                                  ignore_ipolicy=self.op.ignore_ipolicy)]
7765       for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
7766       ]
7767
7768     # TODO: Run iallocator in this opcode and pass correct placement options to
7769     # OpInstanceMigrate. Since other jobs can modify the cluster between
7770     # running the iallocator and the actual migration, a good consistency model
7771     # will have to be found.
7772
7773     assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
7774             frozenset([self.op.node_name]))
7775
7776     return ResultWithJobs(jobs)
7777
7778
7779 class TLMigrateInstance(Tasklet):
7780   """Tasklet class for instance migration.
7781
7782   @type live: boolean
7783   @ivar live: whether the migration will be done live or non-live;
7784       this variable is initalized only after CheckPrereq has run
7785   @type cleanup: boolean
7786   @ivar cleanup: Wheater we cleanup from a failed migration
7787   @type iallocator: string
7788   @ivar iallocator: The iallocator used to determine target_node
7789   @type target_node: string
7790   @ivar target_node: If given, the target_node to reallocate the instance to
7791   @type failover: boolean
7792   @ivar failover: Whether operation results in failover or migration
7793   @type fallback: boolean
7794   @ivar fallback: Whether fallback to failover is allowed if migration not
7795                   possible
7796   @type ignore_consistency: boolean
7797   @ivar ignore_consistency: Wheter we should ignore consistency between source
7798                             and target node
7799   @type shutdown_timeout: int
7800   @ivar shutdown_timeout: In case of failover timeout of the shutdown
7801   @type ignore_ipolicy: bool
7802   @ivar ignore_ipolicy: If true, we can ignore instance policy when migrating
7803
7804   """
7805
7806   # Constants
7807   _MIGRATION_POLL_INTERVAL = 1      # seconds
7808   _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
7809
7810   def __init__(self, lu, instance_name, cleanup=False,
7811                failover=False, fallback=False,
7812                ignore_consistency=False,
7813                allow_runtime_changes=True,
7814                shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT,
7815                ignore_ipolicy=False):
7816     """Initializes this class.
7817
7818     """
7819     Tasklet.__init__(self, lu)
7820
7821     # Parameters
7822     self.instance_name = instance_name
7823     self.cleanup = cleanup
7824     self.live = False # will be overridden later
7825     self.failover = failover
7826     self.fallback = fallback
7827     self.ignore_consistency = ignore_consistency
7828     self.shutdown_timeout = shutdown_timeout
7829     self.ignore_ipolicy = ignore_ipolicy
7830     self.allow_runtime_changes = allow_runtime_changes
7831
7832   def CheckPrereq(self):
7833     """Check prerequisites.
7834
7835     This checks that the instance is in the cluster.
7836
7837     """
7838     instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
7839     instance = self.cfg.GetInstanceInfo(instance_name)
7840     assert instance is not None
7841     self.instance = instance
7842     cluster = self.cfg.GetClusterInfo()
7843
7844     if (not self.cleanup and
7845         not instance.admin_state == constants.ADMINST_UP and
7846         not self.failover and self.fallback):
7847       self.lu.LogInfo("Instance is marked down or offline, fallback allowed,"
7848                       " switching to failover")
7849       self.failover = True
7850
7851     if instance.disk_template not in constants.DTS_MIRRORED:
7852       if self.failover:
7853         text = "failovers"
7854       else:
7855         text = "migrations"
7856       raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
7857                                  " %s" % (instance.disk_template, text),
7858                                  errors.ECODE_STATE)
7859
7860     if instance.disk_template in constants.DTS_EXT_MIRROR:
7861       _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
7862
7863       if self.lu.op.iallocator:
7864         self._RunAllocator()
7865       else:
7866         # We set set self.target_node as it is required by
7867         # BuildHooksEnv
7868         self.target_node = self.lu.op.target_node
7869
7870       # Check that the target node is correct in terms of instance policy
7871       nodeinfo = self.cfg.GetNodeInfo(self.target_node)
7872       group_info = self.cfg.GetNodeGroup(nodeinfo.group)
7873       ipolicy = _CalculateGroupIPolicy(cluster, group_info)
7874       _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
7875                               ignore=self.ignore_ipolicy)
7876
7877       # self.target_node is already populated, either directly or by the
7878       # iallocator run
7879       target_node = self.target_node
7880       if self.target_node == instance.primary_node:
7881         raise errors.OpPrereqError("Cannot migrate instance %s"
7882                                    " to its primary (%s)" %
7883                                    (instance.name, instance.primary_node))
7884
7885       if len(self.lu.tasklets) == 1:
7886         # It is safe to release locks only when we're the only tasklet
7887         # in the LU
7888         _ReleaseLocks(self.lu, locking.LEVEL_NODE,
7889                       keep=[instance.primary_node, self.target_node])
7890
7891     else:
7892       secondary_nodes = instance.secondary_nodes
7893       if not secondary_nodes:
7894         raise errors.ConfigurationError("No secondary node but using"
7895                                         " %s disk template" %
7896                                         instance.disk_template)
7897       target_node = secondary_nodes[0]
7898       if self.lu.op.iallocator or (self.lu.op.target_node and
7899                                    self.lu.op.target_node != target_node):
7900         if self.failover:
7901           text = "failed over"
7902         else:
7903           text = "migrated"
7904         raise errors.OpPrereqError("Instances with disk template %s cannot"
7905                                    " be %s to arbitrary nodes"
7906                                    " (neither an iallocator nor a target"
7907                                    " node can be passed)" %
7908                                    (instance.disk_template, text),
7909                                    errors.ECODE_INVAL)
7910       nodeinfo = self.cfg.GetNodeInfo(target_node)
7911       group_info = self.cfg.GetNodeGroup(nodeinfo.group)
7912       ipolicy = _CalculateGroupIPolicy(cluster, group_info)
7913       _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
7914                               ignore=self.ignore_ipolicy)
7915
7916     i_be = cluster.FillBE(instance)
7917
7918     # check memory requirements on the secondary node
7919     if (not self.cleanup and
7920          (not self.failover or instance.admin_state == constants.ADMINST_UP)):
7921       self.tgt_free_mem = _CheckNodeFreeMemory(self.lu, target_node,
7922                                                "migrating instance %s" %
7923                                                instance.name,
7924                                                i_be[constants.BE_MINMEM],
7925                                                instance.hypervisor)
7926     else:
7927       self.lu.LogInfo("Not checking memory on the secondary node as"
7928                       " instance will not be started")
7929
7930     # check if failover must be forced instead of migration
7931     if (not self.cleanup and not self.failover and
7932         i_be[constants.BE_ALWAYS_FAILOVER]):
7933       if self.fallback:
7934         self.lu.LogInfo("Instance configured to always failover; fallback"
7935                         " to failover")
7936         self.failover = True
7937       else:
7938         raise errors.OpPrereqError("This instance has been configured to"
7939                                    " always failover, please allow failover",
7940                                    errors.ECODE_STATE)
7941
7942     # check bridge existance
7943     _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
7944
7945     if not self.cleanup:
7946       _CheckNodeNotDrained(self.lu, target_node)
7947       if not self.failover:
7948         result = self.rpc.call_instance_migratable(instance.primary_node,
7949                                                    instance)
7950         if result.fail_msg and self.fallback:
7951           self.lu.LogInfo("Can't migrate, instance offline, fallback to"
7952                           " failover")
7953           self.failover = True
7954         else:
7955           result.Raise("Can't migrate, please use failover",
7956                        prereq=True, ecode=errors.ECODE_STATE)
7957
7958     assert not (self.failover and self.cleanup)
7959
7960     if not self.failover:
7961       if self.lu.op.live is not None and self.lu.op.mode is not None:
7962         raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
7963                                    " parameters are accepted",
7964                                    errors.ECODE_INVAL)
7965       if self.lu.op.live is not None:
7966         if self.lu.op.live:
7967           self.lu.op.mode = constants.HT_MIGRATION_LIVE
7968         else:
7969           self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
7970         # reset the 'live' parameter to None so that repeated
7971         # invocations of CheckPrereq do not raise an exception
7972         self.lu.op.live = None
7973       elif self.lu.op.mode is None:
7974         # read the default value from the hypervisor
7975         i_hv = cluster.FillHV(self.instance, skip_globals=False)
7976         self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
7977
7978       self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
7979     else:
7980       # Failover is never live
7981       self.live = False
7982
7983     if not (self.failover or self.cleanup):
7984       remote_info = self.rpc.call_instance_info(instance.primary_node,
7985                                                 instance.name,
7986                                                 instance.hypervisor)
7987       remote_info.Raise("Error checking instance on node %s" %
7988                         instance.primary_node)
7989       instance_running = bool(remote_info.payload)
7990       if instance_running:
7991         self.current_mem = int(remote_info.payload["memory"])
7992
7993   def _RunAllocator(self):
7994     """Run the allocator based on input opcode.
7995
7996     """
7997     # FIXME: add a self.ignore_ipolicy option
7998     ial = IAllocator(self.cfg, self.rpc,
7999                      mode=constants.IALLOCATOR_MODE_RELOC,
8000                      name=self.instance_name,
8001                      # TODO See why hail breaks with a single node below
8002                      relocate_from=[self.instance.primary_node,
8003                                     self.instance.primary_node],
8004                      )
8005
8006     ial.Run(self.lu.op.iallocator)
8007
8008     if not ial.success:
8009       raise errors.OpPrereqError("Can't compute nodes using"
8010                                  " iallocator '%s': %s" %
8011                                  (self.lu.op.iallocator, ial.info),
8012                                  errors.ECODE_NORES)
8013     if len(ial.result) != ial.required_nodes:
8014       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8015                                  " of nodes (%s), required %s" %
8016                                  (self.lu.op.iallocator, len(ial.result),
8017                                   ial.required_nodes), errors.ECODE_FAULT)
8018     self.target_node = ial.result[0]
8019     self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8020                  self.instance_name, self.lu.op.iallocator,
8021                  utils.CommaJoin(ial.result))
8022
8023   def _WaitUntilSync(self):
8024     """Poll with custom rpc for disk sync.
8025
8026     This uses our own step-based rpc call.
8027
8028     """
8029     self.feedback_fn("* wait until resync is done")
8030     all_done = False
8031     while not all_done:
8032       all_done = True
8033       result = self.rpc.call_drbd_wait_sync(self.all_nodes,
8034                                             self.nodes_ip,
8035                                             self.instance.disks)
8036       min_percent = 100
8037       for node, nres in result.items():
8038         nres.Raise("Cannot resync disks on node %s" % node)
8039         node_done, node_percent = nres.payload
8040         all_done = all_done and node_done
8041         if node_percent is not None:
8042           min_percent = min(min_percent, node_percent)
8043       if not all_done:
8044         if min_percent < 100:
8045           self.feedback_fn("   - progress: %.1f%%" % min_percent)
8046         time.sleep(2)
8047
8048   def _EnsureSecondary(self, node):
8049     """Demote a node to secondary.
8050
8051     """
8052     self.feedback_fn("* switching node %s to secondary mode" % node)
8053
8054     for dev in self.instance.disks:
8055       self.cfg.SetDiskID(dev, node)
8056
8057     result = self.rpc.call_blockdev_close(node, self.instance.name,
8058                                           self.instance.disks)
8059     result.Raise("Cannot change disk to secondary on node %s" % node)
8060
8061   def _GoStandalone(self):
8062     """Disconnect from the network.
8063
8064     """
8065     self.feedback_fn("* changing into standalone mode")
8066     result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
8067                                                self.instance.disks)
8068     for node, nres in result.items():
8069       nres.Raise("Cannot disconnect disks node %s" % node)
8070
8071   def _GoReconnect(self, multimaster):
8072     """Reconnect to the network.
8073
8074     """
8075     if multimaster:
8076       msg = "dual-master"
8077     else:
8078       msg = "single-master"
8079     self.feedback_fn("* changing disks into %s mode" % msg)
8080     result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
8081                                            self.instance.disks,
8082                                            self.instance.name, multimaster)
8083     for node, nres in result.items():
8084       nres.Raise("Cannot change disks config on node %s" % node)
8085
8086   def _ExecCleanup(self):
8087     """Try to cleanup after a failed migration.
8088
8089     The cleanup is done by:
8090       - check that the instance is running only on one node
8091         (and update the config if needed)
8092       - change disks on its secondary node to secondary
8093       - wait until disks are fully synchronized
8094       - disconnect from the network
8095       - change disks into single-master mode
8096       - wait again until disks are fully synchronized
8097
8098     """
8099     instance = self.instance
8100     target_node = self.target_node
8101     source_node = self.source_node
8102
8103     # check running on only one node
8104     self.feedback_fn("* checking where the instance actually runs"
8105                      " (if this hangs, the hypervisor might be in"
8106                      " a bad state)")
8107     ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
8108     for node, result in ins_l.items():
8109       result.Raise("Can't contact node %s" % node)
8110
8111     runningon_source = instance.name in ins_l[source_node].payload
8112     runningon_target = instance.name in ins_l[target_node].payload
8113
8114     if runningon_source and runningon_target:
8115       raise errors.OpExecError("Instance seems to be running on two nodes,"
8116                                " or the hypervisor is confused; you will have"
8117                                " to ensure manually that it runs only on one"
8118                                " and restart this operation")
8119
8120     if not (runningon_source or runningon_target):
8121       raise errors.OpExecError("Instance does not seem to be running at all;"
8122                                " in this case it's safer to repair by"
8123                                " running 'gnt-instance stop' to ensure disk"
8124                                " shutdown, and then restarting it")
8125
8126     if runningon_target:
8127       # the migration has actually succeeded, we need to update the config
8128       self.feedback_fn("* instance running on secondary node (%s),"
8129                        " updating config" % target_node)
8130       instance.primary_node = target_node
8131       self.cfg.Update(instance, self.feedback_fn)
8132       demoted_node = source_node
8133     else:
8134       self.feedback_fn("* instance confirmed to be running on its"
8135                        " primary node (%s)" % source_node)
8136       demoted_node = target_node
8137
8138     if instance.disk_template in constants.DTS_INT_MIRROR:
8139       self._EnsureSecondary(demoted_node)
8140       try:
8141         self._WaitUntilSync()
8142       except errors.OpExecError:
8143         # we ignore here errors, since if the device is standalone, it
8144         # won't be able to sync
8145         pass
8146       self._GoStandalone()
8147       self._GoReconnect(False)
8148       self._WaitUntilSync()
8149
8150     self.feedback_fn("* done")
8151
8152   def _RevertDiskStatus(self):
8153     """Try to revert the disk status after a failed migration.
8154
8155     """
8156     target_node = self.target_node
8157     if self.instance.disk_template in constants.DTS_EXT_MIRROR:
8158       return
8159
8160     try:
8161       self._EnsureSecondary(target_node)
8162       self._GoStandalone()
8163       self._GoReconnect(False)
8164       self._WaitUntilSync()
8165     except errors.OpExecError, err:
8166       self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
8167                          " please try to recover the instance manually;"
8168                          " error '%s'" % str(err))
8169
8170   def _AbortMigration(self):
8171     """Call the hypervisor code to abort a started migration.
8172
8173     """
8174     instance = self.instance
8175     target_node = self.target_node
8176     source_node = self.source_node
8177     migration_info = self.migration_info
8178
8179     abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
8180                                                                  instance,
8181                                                                  migration_info,
8182                                                                  False)
8183     abort_msg = abort_result.fail_msg
8184     if abort_msg:
8185       logging.error("Aborting migration failed on target node %s: %s",
8186                     target_node, abort_msg)
8187       # Don't raise an exception here, as we stil have to try to revert the
8188       # disk status, even if this step failed.
8189
8190     abort_result = self.rpc.call_instance_finalize_migration_src(source_node,
8191         instance, False, self.live)
8192     abort_msg = abort_result.fail_msg
8193     if abort_msg:
8194       logging.error("Aborting migration failed on source node %s: %s",
8195                     source_node, abort_msg)
8196
8197   def _ExecMigration(self):
8198     """Migrate an instance.
8199
8200     The migrate is done by:
8201       - change the disks into dual-master mode
8202       - wait until disks are fully synchronized again
8203       - migrate the instance
8204       - change disks on the new secondary node (the old primary) to secondary
8205       - wait until disks are fully synchronized
8206       - change disks into single-master mode
8207
8208     """
8209     instance = self.instance
8210     target_node = self.target_node
8211     source_node = self.source_node
8212
8213     # Check for hypervisor version mismatch and warn the user.
8214     nodeinfo = self.rpc.call_node_info([source_node, target_node],
8215                                        None, [self.instance.hypervisor])
8216     for ninfo in nodeinfo.values():
8217       ninfo.Raise("Unable to retrieve node information from node '%s'" %
8218                   ninfo.node)
8219     (_, _, (src_info, )) = nodeinfo[source_node].payload
8220     (_, _, (dst_info, )) = nodeinfo[target_node].payload
8221
8222     if ((constants.HV_NODEINFO_KEY_VERSION in src_info) and
8223         (constants.HV_NODEINFO_KEY_VERSION in dst_info)):
8224       src_version = src_info[constants.HV_NODEINFO_KEY_VERSION]
8225       dst_version = dst_info[constants.HV_NODEINFO_KEY_VERSION]
8226       if src_version != dst_version:
8227         self.feedback_fn("* warning: hypervisor version mismatch between"
8228                          " source (%s) and target (%s) node" %
8229                          (src_version, dst_version))
8230
8231     self.feedback_fn("* checking disk consistency between source and target")
8232     for dev in instance.disks:
8233       if not _CheckDiskConsistency(self.lu, dev, target_node, False):
8234         raise errors.OpExecError("Disk %s is degraded or not fully"
8235                                  " synchronized on target node,"
8236                                  " aborting migration" % dev.iv_name)
8237
8238     if self.current_mem > self.tgt_free_mem:
8239       if not self.allow_runtime_changes:
8240         raise errors.OpExecError("Memory ballooning not allowed and not enough"
8241                                  " free memory to fit instance %s on target"
8242                                  " node %s (have %dMB, need %dMB)" %
8243                                  (instance.name, target_node,
8244                                   self.tgt_free_mem, self.current_mem))
8245       self.feedback_fn("* setting instance memory to %s" % self.tgt_free_mem)
8246       rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
8247                                                      instance,
8248                                                      self.tgt_free_mem)
8249       rpcres.Raise("Cannot modify instance runtime memory")
8250
8251     # First get the migration information from the remote node
8252     result = self.rpc.call_migration_info(source_node, instance)
8253     msg = result.fail_msg
8254     if msg:
8255       log_err = ("Failed fetching source migration information from %s: %s" %
8256                  (source_node, msg))
8257       logging.error(log_err)
8258       raise errors.OpExecError(log_err)
8259
8260     self.migration_info = migration_info = result.payload
8261
8262     if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8263       # Then switch the disks to master/master mode
8264       self._EnsureSecondary(target_node)
8265       self._GoStandalone()
8266       self._GoReconnect(True)
8267       self._WaitUntilSync()
8268
8269     self.feedback_fn("* preparing %s to accept the instance" % target_node)
8270     result = self.rpc.call_accept_instance(target_node,
8271                                            instance,
8272                                            migration_info,
8273                                            self.nodes_ip[target_node])
8274
8275     msg = result.fail_msg
8276     if msg:
8277       logging.error("Instance pre-migration failed, trying to revert"
8278                     " disk status: %s", msg)
8279       self.feedback_fn("Pre-migration failed, aborting")
8280       self._AbortMigration()
8281       self._RevertDiskStatus()
8282       raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
8283                                (instance.name, msg))
8284
8285     self.feedback_fn("* migrating instance to %s" % target_node)
8286     result = self.rpc.call_instance_migrate(source_node, instance,
8287                                             self.nodes_ip[target_node],
8288                                             self.live)
8289     msg = result.fail_msg
8290     if msg:
8291       logging.error("Instance migration failed, trying to revert"
8292                     " disk status: %s", msg)
8293       self.feedback_fn("Migration failed, aborting")
8294       self._AbortMigration()
8295       self._RevertDiskStatus()
8296       raise errors.OpExecError("Could not migrate instance %s: %s" %
8297                                (instance.name, msg))
8298
8299     self.feedback_fn("* starting memory transfer")
8300     last_feedback = time.time()
8301     while True:
8302       result = self.rpc.call_instance_get_migration_status(source_node,
8303                                                            instance)
8304       msg = result.fail_msg
8305       ms = result.payload   # MigrationStatus instance
8306       if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
8307         logging.error("Instance migration failed, trying to revert"
8308                       " disk status: %s", msg)
8309         self.feedback_fn("Migration failed, aborting")
8310         self._AbortMigration()
8311         self._RevertDiskStatus()
8312         raise errors.OpExecError("Could not migrate instance %s: %s" %
8313                                  (instance.name, msg))
8314
8315       if result.payload.status != constants.HV_MIGRATION_ACTIVE:
8316         self.feedback_fn("* memory transfer complete")
8317         break
8318
8319       if (utils.TimeoutExpired(last_feedback,
8320                                self._MIGRATION_FEEDBACK_INTERVAL) and
8321           ms.transferred_ram is not None):
8322         mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
8323         self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
8324         last_feedback = time.time()
8325
8326       time.sleep(self._MIGRATION_POLL_INTERVAL)
8327
8328     result = self.rpc.call_instance_finalize_migration_src(source_node,
8329                                                            instance,
8330                                                            True,
8331                                                            self.live)
8332     msg = result.fail_msg
8333     if msg:
8334       logging.error("Instance migration succeeded, but finalization failed"
8335                     " on the source node: %s", msg)
8336       raise errors.OpExecError("Could not finalize instance migration: %s" %
8337                                msg)
8338
8339     instance.primary_node = target_node
8340
8341     # distribute new instance config to the other nodes
8342     self.cfg.Update(instance, self.feedback_fn)
8343
8344     result = self.rpc.call_instance_finalize_migration_dst(target_node,
8345                                                            instance,
8346                                                            migration_info,
8347                                                            True)
8348     msg = result.fail_msg
8349     if msg:
8350       logging.error("Instance migration succeeded, but finalization failed"
8351                     " on the target node: %s", msg)
8352       raise errors.OpExecError("Could not finalize instance migration: %s" %
8353                                msg)
8354
8355     if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8356       self._EnsureSecondary(source_node)
8357       self._WaitUntilSync()
8358       self._GoStandalone()
8359       self._GoReconnect(False)
8360       self._WaitUntilSync()
8361
8362     # If the instance's disk template is `rbd' and there was a successful
8363     # migration, unmap the device from the source node.
8364     if self.instance.disk_template == constants.DT_RBD:
8365       disks = _ExpandCheckDisks(instance, instance.disks)
8366       self.feedback_fn("* unmapping instance's disks from %s" % source_node)
8367       for disk in disks:
8368         result = self.rpc.call_blockdev_shutdown(source_node, disk)
8369         msg = result.fail_msg
8370         if msg:
8371           logging.error("Migration was successful, but couldn't unmap the"
8372                         " block device %s on source node %s: %s",
8373                         disk.iv_name, source_node, msg)
8374           logging.error("You need to unmap the device %s manually on %s",
8375                         disk.iv_name, source_node)
8376
8377     self.feedback_fn("* done")
8378
8379   def _ExecFailover(self):
8380     """Failover an instance.
8381
8382     The failover is done by shutting it down on its present node and
8383     starting it on the secondary.
8384
8385     """
8386     instance = self.instance
8387     primary_node = self.cfg.GetNodeInfo(instance.primary_node)
8388
8389     source_node = instance.primary_node
8390     target_node = self.target_node
8391
8392     if instance.admin_state == constants.ADMINST_UP:
8393       self.feedback_fn("* checking disk consistency between source and target")
8394       for dev in instance.disks:
8395         # for drbd, these are drbd over lvm
8396         if not _CheckDiskConsistency(self.lu, dev, target_node, False):
8397           if primary_node.offline:
8398             self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
8399                              " target node %s" %
8400                              (primary_node.name, dev.iv_name, target_node))
8401           elif not self.ignore_consistency:
8402             raise errors.OpExecError("Disk %s is degraded on target node,"
8403                                      " aborting failover" % dev.iv_name)
8404     else:
8405       self.feedback_fn("* not checking disk consistency as instance is not"
8406                        " running")
8407
8408     self.feedback_fn("* shutting down instance on source node")
8409     logging.info("Shutting down instance %s on node %s",
8410                  instance.name, source_node)
8411
8412     result = self.rpc.call_instance_shutdown(source_node, instance,
8413                                              self.shutdown_timeout)
8414     msg = result.fail_msg
8415     if msg:
8416       if self.ignore_consistency or primary_node.offline:
8417         self.lu.LogWarning("Could not shutdown instance %s on node %s,"
8418                            " proceeding anyway; please make sure node"
8419                            " %s is down; error details: %s",
8420                            instance.name, source_node, source_node, msg)
8421       else:
8422         raise errors.OpExecError("Could not shutdown instance %s on"
8423                                  " node %s: %s" %
8424                                  (instance.name, source_node, msg))
8425
8426     self.feedback_fn("* deactivating the instance's disks on source node")
8427     if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
8428       raise errors.OpExecError("Can't shut down the instance's disks")
8429
8430     instance.primary_node = target_node
8431     # distribute new instance config to the other nodes
8432     self.cfg.Update(instance, self.feedback_fn)
8433
8434     # Only start the instance if it's marked as up
8435     if instance.admin_state == constants.ADMINST_UP:
8436       self.feedback_fn("* activating the instance's disks on target node %s" %
8437                        target_node)
8438       logging.info("Starting instance %s on node %s",
8439                    instance.name, target_node)
8440
8441       disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
8442                                            ignore_secondaries=True)
8443       if not disks_ok:
8444         _ShutdownInstanceDisks(self.lu, instance)
8445         raise errors.OpExecError("Can't activate the instance's disks")
8446
8447       self.feedback_fn("* starting the instance on the target node %s" %
8448                        target_node)
8449       result = self.rpc.call_instance_start(target_node, (instance, None, None),
8450                                             False)
8451       msg = result.fail_msg
8452       if msg:
8453         _ShutdownInstanceDisks(self.lu, instance)
8454         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
8455                                  (instance.name, target_node, msg))
8456
8457   def Exec(self, feedback_fn):
8458     """Perform the migration.
8459
8460     """
8461     self.feedback_fn = feedback_fn
8462     self.source_node = self.instance.primary_node
8463
8464     # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
8465     if self.instance.disk_template in constants.DTS_INT_MIRROR:
8466       self.target_node = self.instance.secondary_nodes[0]
8467       # Otherwise self.target_node has been populated either
8468       # directly, or through an iallocator.
8469
8470     self.all_nodes = [self.source_node, self.target_node]
8471     self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
8472                          in self.cfg.GetMultiNodeInfo(self.all_nodes))
8473
8474     if self.failover:
8475       feedback_fn("Failover instance %s" % self.instance.name)
8476       self._ExecFailover()
8477     else:
8478       feedback_fn("Migrating instance %s" % self.instance.name)
8479
8480       if self.cleanup:
8481         return self._ExecCleanup()
8482       else:
8483         return self._ExecMigration()
8484
8485
8486 def _CreateBlockDev(lu, node, instance, device, force_create,
8487                     info, force_open):
8488   """Create a tree of block devices on a given node.
8489
8490   If this device type has to be created on secondaries, create it and
8491   all its children.
8492
8493   If not, just recurse to children keeping the same 'force' value.
8494
8495   @param lu: the lu on whose behalf we execute
8496   @param node: the node on which to create the device
8497   @type instance: L{objects.Instance}
8498   @param instance: the instance which owns the device
8499   @type device: L{objects.Disk}
8500   @param device: the device to create
8501   @type force_create: boolean
8502   @param force_create: whether to force creation of this device; this
8503       will be change to True whenever we find a device which has
8504       CreateOnSecondary() attribute
8505   @param info: the extra 'metadata' we should attach to the device
8506       (this will be represented as a LVM tag)
8507   @type force_open: boolean
8508   @param force_open: this parameter will be passes to the
8509       L{backend.BlockdevCreate} function where it specifies
8510       whether we run on primary or not, and it affects both
8511       the child assembly and the device own Open() execution
8512
8513   """
8514   if device.CreateOnSecondary():
8515     force_create = True
8516
8517   if device.children:
8518     for child in device.children:
8519       _CreateBlockDev(lu, node, instance, child, force_create,
8520                       info, force_open)
8521
8522   if not force_create:
8523     return
8524
8525   _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
8526
8527
8528 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
8529   """Create a single block device on a given node.
8530
8531   This will not recurse over children of the device, so they must be
8532   created in advance.
8533
8534   @param lu: the lu on whose behalf we execute
8535   @param node: the node on which to create the device
8536   @type instance: L{objects.Instance}
8537   @param instance: the instance which owns the device
8538   @type device: L{objects.Disk}
8539   @param device: the device to create
8540   @param info: the extra 'metadata' we should attach to the device
8541       (this will be represented as a LVM tag)
8542   @type force_open: boolean
8543   @param force_open: this parameter will be passes to the
8544       L{backend.BlockdevCreate} function where it specifies
8545       whether we run on primary or not, and it affects both
8546       the child assembly and the device own Open() execution
8547
8548   """
8549   lu.cfg.SetDiskID(device, node)
8550   result = lu.rpc.call_blockdev_create(node, device, device.size,
8551                                        instance.name, force_open, info)
8552   result.Raise("Can't create block device %s on"
8553                " node %s for instance %s" % (device, node, instance.name))
8554   if device.physical_id is None:
8555     device.physical_id = result.payload
8556
8557
8558 def _GenerateUniqueNames(lu, exts):
8559   """Generate a suitable LV name.
8560
8561   This will generate a logical volume name for the given instance.
8562
8563   """
8564   results = []
8565   for val in exts:
8566     new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
8567     results.append("%s%s" % (new_id, val))
8568   return results
8569
8570
8571 def _ComputeLDParams(disk_template, disk_params):
8572   """Computes Logical Disk parameters from Disk Template parameters.
8573
8574   @type disk_template: string
8575   @param disk_template: disk template, one of L{constants.DISK_TEMPLATES}
8576   @type disk_params: dict
8577   @param disk_params: disk template parameters; dict(template_name -> parameters
8578   @rtype: list(dict)
8579   @return: a list of dicts, one for each node of the disk hierarchy. Each dict
8580     contains the LD parameters of the node. The tree is flattened in-order.
8581
8582   """
8583   if disk_template not in constants.DISK_TEMPLATES:
8584     raise errors.ProgrammerError("Unknown disk template %s" % disk_template)
8585
8586   result = list()
8587   dt_params = disk_params[disk_template]
8588   if disk_template == constants.DT_DRBD8:
8589     drbd_params = {
8590       constants.LDP_RESYNC_RATE: dt_params[constants.DRBD_RESYNC_RATE],
8591       constants.LDP_BARRIERS: dt_params[constants.DRBD_DISK_BARRIERS],
8592       constants.LDP_NO_META_FLUSH: dt_params[constants.DRBD_META_BARRIERS],
8593       constants.LDP_DEFAULT_METAVG: dt_params[constants.DRBD_DEFAULT_METAVG],
8594       constants.LDP_DISK_CUSTOM: dt_params[constants.DRBD_DISK_CUSTOM],
8595       constants.LDP_NET_CUSTOM: dt_params[constants.DRBD_NET_CUSTOM],
8596       constants.LDP_DYNAMIC_RESYNC: dt_params[constants.DRBD_DYNAMIC_RESYNC],
8597       constants.LDP_PLAN_AHEAD: dt_params[constants.DRBD_PLAN_AHEAD],
8598       constants.LDP_FILL_TARGET: dt_params[constants.DRBD_FILL_TARGET],
8599       constants.LDP_DELAY_TARGET: dt_params[constants.DRBD_DELAY_TARGET],
8600       constants.LDP_MAX_RATE: dt_params[constants.DRBD_MAX_RATE],
8601       constants.LDP_MIN_RATE: dt_params[constants.DRBD_MIN_RATE],
8602       }
8603
8604     drbd_params = \
8605       objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_DRBD8],
8606                        drbd_params)
8607
8608     result.append(drbd_params)
8609
8610     # data LV
8611     data_params = {
8612       constants.LDP_STRIPES: dt_params[constants.DRBD_DATA_STRIPES],
8613       }
8614     data_params = \
8615       objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_LV],
8616                        data_params)
8617     result.append(data_params)
8618
8619     # metadata LV
8620     meta_params = {
8621       constants.LDP_STRIPES: dt_params[constants.DRBD_META_STRIPES],
8622       }
8623     meta_params = \
8624       objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_LV],
8625                        meta_params)
8626     result.append(meta_params)
8627
8628   elif (disk_template == constants.DT_FILE or
8629         disk_template == constants.DT_SHARED_FILE):
8630     result.append(constants.DISK_LD_DEFAULTS[constants.LD_FILE])
8631
8632   elif disk_template == constants.DT_PLAIN:
8633     params = {
8634       constants.LDP_STRIPES: dt_params[constants.LV_STRIPES],
8635       }
8636     params = \
8637       objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_LV],
8638                        params)
8639     result.append(params)
8640
8641   elif disk_template == constants.DT_BLOCK:
8642     result.append(constants.DISK_LD_DEFAULTS[constants.LD_BLOCKDEV])
8643
8644   elif disk_template == constants.DT_RBD:
8645     params = {
8646       constants.LDP_POOL: dt_params[constants.RBD_POOL]
8647       }
8648     params = \
8649       objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_RBD],
8650                        params)
8651     result.append(params)
8652
8653   return result
8654
8655
8656 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
8657                          iv_name, p_minor, s_minor, drbd_params, data_params,
8658                          meta_params):
8659   """Generate a drbd8 device complete with its children.
8660
8661   """
8662   assert len(vgnames) == len(names) == 2
8663   port = lu.cfg.AllocatePort()
8664   shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
8665
8666   dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
8667                           logical_id=(vgnames[0], names[0]),
8668                           params=data_params)
8669   dev_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
8670                           logical_id=(vgnames[1], names[1]),
8671                           params=meta_params)
8672   drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
8673                           logical_id=(primary, secondary, port,
8674                                       p_minor, s_minor,
8675                                       shared_secret),
8676                           children=[dev_data, dev_meta],
8677                           iv_name=iv_name, params=drbd_params)
8678   return drbd_dev
8679
8680
8681 def _GenerateDiskTemplate(lu, template_name,
8682                           instance_name, primary_node,
8683                           secondary_nodes, disk_info,
8684                           file_storage_dir, file_driver,
8685                           base_index, feedback_fn, disk_params):
8686   """Generate the entire disk layout for a given template type.
8687
8688   """
8689   #TODO: compute space requirements
8690
8691   vgname = lu.cfg.GetVGName()
8692   disk_count = len(disk_info)
8693   disks = []
8694   ld_params = _ComputeLDParams(template_name, disk_params)
8695   if template_name == constants.DT_DISKLESS:
8696     pass
8697   elif template_name == constants.DT_PLAIN:
8698     if secondary_nodes:
8699       raise errors.ProgrammerError("Wrong template configuration")
8700
8701     names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
8702                                       for i in range(disk_count)])
8703     for idx, disk in enumerate(disk_info):
8704       disk_index = idx + base_index
8705       vg = disk.get(constants.IDISK_VG, vgname)
8706       feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
8707       disk_dev = objects.Disk(dev_type=constants.LD_LV,
8708                               size=disk[constants.IDISK_SIZE],
8709                               logical_id=(vg, names[idx]),
8710                               iv_name="disk/%d" % disk_index,
8711                               mode=disk[constants.IDISK_MODE],
8712                               params=ld_params[0])
8713       disks.append(disk_dev)
8714   elif template_name == constants.DT_DRBD8:
8715     drbd_params, data_params, meta_params = ld_params
8716     if len(secondary_nodes) != 1:
8717       raise errors.ProgrammerError("Wrong template configuration")
8718     remote_node = secondary_nodes[0]
8719     minors = lu.cfg.AllocateDRBDMinor(
8720       [primary_node, remote_node] * len(disk_info), instance_name)
8721
8722     names = []
8723     for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
8724                                                for i in range(disk_count)]):
8725       names.append(lv_prefix + "_data")
8726       names.append(lv_prefix + "_meta")
8727     for idx, disk in enumerate(disk_info):
8728       disk_index = idx + base_index
8729       drbd_default_metavg = drbd_params[constants.LDP_DEFAULT_METAVG]
8730       data_vg = disk.get(constants.IDISK_VG, vgname)
8731       meta_vg = disk.get(constants.IDISK_METAVG, drbd_default_metavg)
8732       disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
8733                                       disk[constants.IDISK_SIZE],
8734                                       [data_vg, meta_vg],
8735                                       names[idx * 2:idx * 2 + 2],
8736                                       "disk/%d" % disk_index,
8737                                       minors[idx * 2], minors[idx * 2 + 1],
8738                                       drbd_params, data_params, meta_params)
8739       disk_dev.mode = disk[constants.IDISK_MODE]
8740       disks.append(disk_dev)
8741   elif template_name == constants.DT_FILE:
8742     if secondary_nodes:
8743       raise errors.ProgrammerError("Wrong template configuration")
8744
8745     opcodes.RequireFileStorage()
8746
8747     for idx, disk in enumerate(disk_info):
8748       disk_index = idx + base_index
8749       disk_dev = objects.Disk(dev_type=constants.LD_FILE,
8750                               size=disk[constants.IDISK_SIZE],
8751                               iv_name="disk/%d" % disk_index,
8752                               logical_id=(file_driver,
8753                                           "%s/disk%d" % (file_storage_dir,
8754                                                          disk_index)),
8755                               mode=disk[constants.IDISK_MODE],
8756                               params=ld_params[0])
8757       disks.append(disk_dev)
8758   elif template_name == constants.DT_SHARED_FILE:
8759     if secondary_nodes:
8760       raise errors.ProgrammerError("Wrong template configuration")
8761
8762     opcodes.RequireSharedFileStorage()
8763
8764     for idx, disk in enumerate(disk_info):
8765       disk_index = idx + base_index
8766       disk_dev = objects.Disk(dev_type=constants.LD_FILE,
8767                               size=disk[constants.IDISK_SIZE],
8768                               iv_name="disk/%d" % disk_index,
8769                               logical_id=(file_driver,
8770                                           "%s/disk%d" % (file_storage_dir,
8771                                                          disk_index)),
8772                               mode=disk[constants.IDISK_MODE],
8773                               params=ld_params[0])
8774       disks.append(disk_dev)
8775   elif template_name == constants.DT_BLOCK:
8776     if secondary_nodes:
8777       raise errors.ProgrammerError("Wrong template configuration")
8778
8779     for idx, disk in enumerate(disk_info):
8780       disk_index = idx + base_index
8781       disk_dev = objects.Disk(dev_type=constants.LD_BLOCKDEV,
8782                               size=disk[constants.IDISK_SIZE],
8783                               logical_id=(constants.BLOCKDEV_DRIVER_MANUAL,
8784                                           disk[constants.IDISK_ADOPT]),
8785                               iv_name="disk/%d" % disk_index,
8786                               mode=disk[constants.IDISK_MODE],
8787                               params=ld_params[0])
8788       disks.append(disk_dev)
8789   elif template_name == constants.DT_RBD:
8790     if secondary_nodes:
8791       raise errors.ProgrammerError("Wrong template configuration")
8792
8793     names = _GenerateUniqueNames(lu, [".rbd.disk%d" % (base_index + i)
8794                                       for i in range(disk_count)])
8795
8796     for idx, disk in enumerate(disk_info):
8797       disk_index = idx + base_index
8798       disk_dev = objects.Disk(dev_type=constants.LD_RBD,
8799                               size=disk[constants.IDISK_SIZE],
8800                               logical_id=("rbd", names[idx]),
8801                               iv_name="disk/%d" % disk_index,
8802                               mode=disk[constants.IDISK_MODE],
8803                               params=ld_params[0])
8804       disks.append(disk_dev)
8805
8806   else:
8807     raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
8808   return disks
8809
8810
8811 def _GetInstanceInfoText(instance):
8812   """Compute that text that should be added to the disk's metadata.
8813
8814   """
8815   return "originstname+%s" % instance.name
8816
8817
8818 def _CalcEta(time_taken, written, total_size):
8819   """Calculates the ETA based on size written and total size.
8820
8821   @param time_taken: The time taken so far
8822   @param written: amount written so far
8823   @param total_size: The total size of data to be written
8824   @return: The remaining time in seconds
8825
8826   """
8827   avg_time = time_taken / float(written)
8828   return (total_size - written) * avg_time
8829
8830
8831 def _WipeDisks(lu, instance):
8832   """Wipes instance disks.
8833
8834   @type lu: L{LogicalUnit}
8835   @param lu: the logical unit on whose behalf we execute
8836   @type instance: L{objects.Instance}
8837   @param instance: the instance whose disks we should create
8838   @return: the success of the wipe
8839
8840   """
8841   node = instance.primary_node
8842
8843   for device in instance.disks:
8844     lu.cfg.SetDiskID(device, node)
8845
8846   logging.info("Pause sync of instance %s disks", instance.name)
8847   result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
8848
8849   for idx, success in enumerate(result.payload):
8850     if not success:
8851       logging.warn("pause-sync of instance %s for disks %d failed",
8852                    instance.name, idx)
8853
8854   try:
8855     for idx, device in enumerate(instance.disks):
8856       # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
8857       # MAX_WIPE_CHUNK at max
8858       wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
8859                             constants.MIN_WIPE_CHUNK_PERCENT)
8860       # we _must_ make this an int, otherwise rounding errors will
8861       # occur
8862       wipe_chunk_size = int(wipe_chunk_size)
8863
8864       lu.LogInfo("* Wiping disk %d", idx)
8865       logging.info("Wiping disk %d for instance %s, node %s using"
8866                    " chunk size %s", idx, instance.name, node, wipe_chunk_size)
8867
8868       offset = 0
8869       size = device.size
8870       last_output = 0
8871       start_time = time.time()
8872
8873       while offset < size:
8874         wipe_size = min(wipe_chunk_size, size - offset)
8875         logging.debug("Wiping disk %d, offset %s, chunk %s",
8876                       idx, offset, wipe_size)
8877         result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
8878         result.Raise("Could not wipe disk %d at offset %d for size %d" %
8879                      (idx, offset, wipe_size))
8880         now = time.time()
8881         offset += wipe_size
8882         if now - last_output >= 60:
8883           eta = _CalcEta(now - start_time, offset, size)
8884           lu.LogInfo(" - done: %.1f%% ETA: %s" %
8885                      (offset / float(size) * 100, utils.FormatSeconds(eta)))
8886           last_output = now
8887   finally:
8888     logging.info("Resume sync of instance %s disks", instance.name)
8889
8890     result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
8891
8892     for idx, success in enumerate(result.payload):
8893       if not success:
8894         lu.LogWarning("Resume sync of disk %d failed, please have a"
8895                       " look at the status and troubleshoot the issue", idx)
8896         logging.warn("resume-sync of instance %s for disks %d failed",
8897                      instance.name, idx)
8898
8899
8900 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
8901   """Create all disks for an instance.
8902
8903   This abstracts away some work from AddInstance.
8904
8905   @type lu: L{LogicalUnit}
8906   @param lu: the logical unit on whose behalf we execute
8907   @type instance: L{objects.Instance}
8908   @param instance: the instance whose disks we should create
8909   @type to_skip: list
8910   @param to_skip: list of indices to skip
8911   @type target_node: string
8912   @param target_node: if passed, overrides the target node for creation
8913   @rtype: boolean
8914   @return: the success of the creation
8915
8916   """
8917   info = _GetInstanceInfoText(instance)
8918   if target_node is None:
8919     pnode = instance.primary_node
8920     all_nodes = instance.all_nodes
8921   else:
8922     pnode = target_node
8923     all_nodes = [pnode]
8924
8925   if instance.disk_template in constants.DTS_FILEBASED:
8926     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8927     result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
8928
8929     result.Raise("Failed to create directory '%s' on"
8930                  " node %s" % (file_storage_dir, pnode))
8931
8932   # Note: this needs to be kept in sync with adding of disks in
8933   # LUInstanceSetParams
8934   for idx, device in enumerate(instance.disks):
8935     if to_skip and idx in to_skip:
8936       continue
8937     logging.info("Creating volume %s for instance %s",
8938                  device.iv_name, instance.name)
8939     #HARDCODE
8940     for node in all_nodes:
8941       f_create = node == pnode
8942       _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
8943
8944
8945 def _RemoveDisks(lu, instance, target_node=None):
8946   """Remove all disks for an instance.
8947
8948   This abstracts away some work from `AddInstance()` and
8949   `RemoveInstance()`. Note that in case some of the devices couldn't
8950   be removed, the removal will continue with the other ones (compare
8951   with `_CreateDisks()`).
8952
8953   @type lu: L{LogicalUnit}
8954   @param lu: the logical unit on whose behalf we execute
8955   @type instance: L{objects.Instance}
8956   @param instance: the instance whose disks we should remove
8957   @type target_node: string
8958   @param target_node: used to override the node on which to remove the disks
8959   @rtype: boolean
8960   @return: the success of the removal
8961
8962   """
8963   logging.info("Removing block devices for instance %s", instance.name)
8964
8965   all_result = True
8966   for device in instance.disks:
8967     if target_node:
8968       edata = [(target_node, device)]
8969     else:
8970       edata = device.ComputeNodeTree(instance.primary_node)
8971     for node, disk in edata:
8972       lu.cfg.SetDiskID(disk, node)
8973       msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
8974       if msg:
8975         lu.LogWarning("Could not remove block device %s on node %s,"
8976                       " continuing anyway: %s", device.iv_name, node, msg)
8977         all_result = False
8978
8979     # if this is a DRBD disk, return its port to the pool
8980     if device.dev_type in constants.LDS_DRBD:
8981       tcp_port = device.logical_id[2]
8982       lu.cfg.AddTcpUdpPort(tcp_port)
8983
8984   if instance.disk_template == constants.DT_FILE:
8985     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8986     if target_node:
8987       tgt = target_node
8988     else:
8989       tgt = instance.primary_node
8990     result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
8991     if result.fail_msg:
8992       lu.LogWarning("Could not remove directory '%s' on node %s: %s",
8993                     file_storage_dir, instance.primary_node, result.fail_msg)
8994       all_result = False
8995
8996   return all_result
8997
8998
8999 def _ComputeDiskSizePerVG(disk_template, disks):
9000   """Compute disk size requirements in the volume group
9001
9002   """
9003   def _compute(disks, payload):
9004     """Universal algorithm.
9005
9006     """
9007     vgs = {}
9008     for disk in disks:
9009       vgs[disk[constants.IDISK_VG]] = \
9010         vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
9011
9012     return vgs
9013
9014   # Required free disk space as a function of disk and swap space
9015   req_size_dict = {
9016     constants.DT_DISKLESS: {},
9017     constants.DT_PLAIN: _compute(disks, 0),
9018     # 128 MB are added for drbd metadata for each disk
9019     constants.DT_DRBD8: _compute(disks, DRBD_META_SIZE),
9020     constants.DT_FILE: {},
9021     constants.DT_SHARED_FILE: {},
9022   }
9023
9024   if disk_template not in req_size_dict:
9025     raise errors.ProgrammerError("Disk template '%s' size requirement"
9026                                  " is unknown" % disk_template)
9027
9028   return req_size_dict[disk_template]
9029
9030
9031 def _ComputeDiskSize(disk_template, disks):
9032   """Compute disk size requirements in the volume group
9033
9034   """
9035   # Required free disk space as a function of disk and swap space
9036   req_size_dict = {
9037     constants.DT_DISKLESS: None,
9038     constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
9039     # 128 MB are added for drbd metadata for each disk
9040     constants.DT_DRBD8:
9041       sum(d[constants.IDISK_SIZE] + DRBD_META_SIZE for d in disks),
9042     constants.DT_FILE: None,
9043     constants.DT_SHARED_FILE: 0,
9044     constants.DT_BLOCK: 0,
9045     constants.DT_RBD: 0,
9046   }
9047
9048   if disk_template not in req_size_dict:
9049     raise errors.ProgrammerError("Disk template '%s' size requirement"
9050                                  " is unknown" % disk_template)
9051
9052   return req_size_dict[disk_template]
9053
9054
9055 def _FilterVmNodes(lu, nodenames):
9056   """Filters out non-vm_capable nodes from a list.
9057
9058   @type lu: L{LogicalUnit}
9059   @param lu: the logical unit for which we check
9060   @type nodenames: list
9061   @param nodenames: the list of nodes on which we should check
9062   @rtype: list
9063   @return: the list of vm-capable nodes
9064
9065   """
9066   vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
9067   return [name for name in nodenames if name not in vm_nodes]
9068
9069
9070 def _CheckHVParams(lu, nodenames, hvname, hvparams):
9071   """Hypervisor parameter validation.
9072
9073   This function abstract the hypervisor parameter validation to be
9074   used in both instance create and instance modify.
9075
9076   @type lu: L{LogicalUnit}
9077   @param lu: the logical unit for which we check
9078   @type nodenames: list
9079   @param nodenames: the list of nodes on which we should check
9080   @type hvname: string
9081   @param hvname: the name of the hypervisor we should use
9082   @type hvparams: dict
9083   @param hvparams: the parameters which we need to check
9084   @raise errors.OpPrereqError: if the parameters are not valid
9085
9086   """
9087   nodenames = _FilterVmNodes(lu, nodenames)
9088
9089   cluster = lu.cfg.GetClusterInfo()
9090   hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams)
9091
9092   hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, hvname, hvfull)
9093   for node in nodenames:
9094     info = hvinfo[node]
9095     if info.offline:
9096       continue
9097     info.Raise("Hypervisor parameter validation failed on node %s" % node)
9098
9099
9100 def _CheckOSParams(lu, required, nodenames, osname, osparams):
9101   """OS parameters validation.
9102
9103   @type lu: L{LogicalUnit}
9104   @param lu: the logical unit for which we check
9105   @type required: boolean
9106   @param required: whether the validation should fail if the OS is not
9107       found
9108   @type nodenames: list
9109   @param nodenames: the list of nodes on which we should check
9110   @type osname: string
9111   @param osname: the name of the hypervisor we should use
9112   @type osparams: dict
9113   @param osparams: the parameters which we need to check
9114   @raise errors.OpPrereqError: if the parameters are not valid
9115
9116   """
9117   nodenames = _FilterVmNodes(lu, nodenames)
9118   result = lu.rpc.call_os_validate(nodenames, required, osname,
9119                                    [constants.OS_VALIDATE_PARAMETERS],
9120                                    osparams)
9121   for node, nres in result.items():
9122     # we don't check for offline cases since this should be run only
9123     # against the master node and/or an instance's nodes
9124     nres.Raise("OS Parameters validation failed on node %s" % node)
9125     if not nres.payload:
9126       lu.LogInfo("OS %s not found on node %s, validation skipped",
9127                  osname, node)
9128
9129
9130 class LUInstanceCreate(LogicalUnit):
9131   """Create an instance.
9132
9133   """
9134   HPATH = "instance-add"
9135   HTYPE = constants.HTYPE_INSTANCE
9136   REQ_BGL = False
9137
9138   def CheckArguments(self):
9139     """Check arguments.
9140
9141     """
9142     # do not require name_check to ease forward/backward compatibility
9143     # for tools
9144     if self.op.no_install and self.op.start:
9145       self.LogInfo("No-installation mode selected, disabling startup")
9146       self.op.start = False
9147     # validate/normalize the instance name
9148     self.op.instance_name = \
9149       netutils.Hostname.GetNormalizedName(self.op.instance_name)
9150
9151     if self.op.ip_check and not self.op.name_check:
9152       # TODO: make the ip check more flexible and not depend on the name check
9153       raise errors.OpPrereqError("Cannot do IP address check without a name"
9154                                  " check", errors.ECODE_INVAL)
9155
9156     # check nics' parameter names
9157     for nic in self.op.nics:
9158       utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
9159
9160     # check disks. parameter names and consistent adopt/no-adopt strategy
9161     has_adopt = has_no_adopt = False
9162     for disk in self.op.disks:
9163       utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
9164       if constants.IDISK_ADOPT in disk:
9165         has_adopt = True
9166       else:
9167         has_no_adopt = True
9168     if has_adopt and has_no_adopt:
9169       raise errors.OpPrereqError("Either all disks are adopted or none is",
9170                                  errors.ECODE_INVAL)
9171     if has_adopt:
9172       if self.op.disk_template not in constants.DTS_MAY_ADOPT:
9173         raise errors.OpPrereqError("Disk adoption is not supported for the"
9174                                    " '%s' disk template" %
9175                                    self.op.disk_template,
9176                                    errors.ECODE_INVAL)
9177       if self.op.iallocator is not None:
9178         raise errors.OpPrereqError("Disk adoption not allowed with an"
9179                                    " iallocator script", errors.ECODE_INVAL)
9180       if self.op.mode == constants.INSTANCE_IMPORT:
9181         raise errors.OpPrereqError("Disk adoption not allowed for"
9182                                    " instance import", errors.ECODE_INVAL)
9183     else:
9184       if self.op.disk_template in constants.DTS_MUST_ADOPT:
9185         raise errors.OpPrereqError("Disk template %s requires disk adoption,"
9186                                    " but no 'adopt' parameter given" %
9187                                    self.op.disk_template,
9188                                    errors.ECODE_INVAL)
9189
9190     self.adopt_disks = has_adopt
9191
9192     # instance name verification
9193     if self.op.name_check:
9194       self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
9195       self.op.instance_name = self.hostname1.name
9196       # used in CheckPrereq for ip ping check
9197       self.check_ip = self.hostname1.ip
9198     else:
9199       self.check_ip = None
9200
9201     # file storage checks
9202     if (self.op.file_driver and
9203         not self.op.file_driver in constants.FILE_DRIVER):
9204       raise errors.OpPrereqError("Invalid file driver name '%s'" %
9205                                  self.op.file_driver, errors.ECODE_INVAL)
9206
9207     if self.op.disk_template == constants.DT_FILE:
9208       opcodes.RequireFileStorage()
9209     elif self.op.disk_template == constants.DT_SHARED_FILE:
9210       opcodes.RequireSharedFileStorage()
9211
9212     ### Node/iallocator related checks
9213     _CheckIAllocatorOrNode(self, "iallocator", "pnode")
9214
9215     if self.op.pnode is not None:
9216       if self.op.disk_template in constants.DTS_INT_MIRROR:
9217         if self.op.snode is None:
9218           raise errors.OpPrereqError("The networked disk templates need"
9219                                      " a mirror node", errors.ECODE_INVAL)
9220       elif self.op.snode:
9221         self.LogWarning("Secondary node will be ignored on non-mirrored disk"
9222                         " template")
9223         self.op.snode = None
9224
9225     self._cds = _GetClusterDomainSecret()
9226
9227     if self.op.mode == constants.INSTANCE_IMPORT:
9228       # On import force_variant must be True, because if we forced it at
9229       # initial install, our only chance when importing it back is that it
9230       # works again!
9231       self.op.force_variant = True
9232
9233       if self.op.no_install:
9234         self.LogInfo("No-installation mode has no effect during import")
9235
9236     elif self.op.mode == constants.INSTANCE_CREATE:
9237       if self.op.os_type is None:
9238         raise errors.OpPrereqError("No guest OS specified",
9239                                    errors.ECODE_INVAL)
9240       if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
9241         raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
9242                                    " installation" % self.op.os_type,
9243                                    errors.ECODE_STATE)
9244       if self.op.disk_template is None:
9245         raise errors.OpPrereqError("No disk template specified",
9246                                    errors.ECODE_INVAL)
9247
9248     elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9249       # Check handshake to ensure both clusters have the same domain secret
9250       src_handshake = self.op.source_handshake
9251       if not src_handshake:
9252         raise errors.OpPrereqError("Missing source handshake",
9253                                    errors.ECODE_INVAL)
9254
9255       errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
9256                                                            src_handshake)
9257       if errmsg:
9258         raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
9259                                    errors.ECODE_INVAL)
9260
9261       # Load and check source CA
9262       self.source_x509_ca_pem = self.op.source_x509_ca
9263       if not self.source_x509_ca_pem:
9264         raise errors.OpPrereqError("Missing source X509 CA",
9265                                    errors.ECODE_INVAL)
9266
9267       try:
9268         (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
9269                                                     self._cds)
9270       except OpenSSL.crypto.Error, err:
9271         raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
9272                                    (err, ), errors.ECODE_INVAL)
9273
9274       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9275       if errcode is not None:
9276         raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
9277                                    errors.ECODE_INVAL)
9278
9279       self.source_x509_ca = cert
9280
9281       src_instance_name = self.op.source_instance_name
9282       if not src_instance_name:
9283         raise errors.OpPrereqError("Missing source instance name",
9284                                    errors.ECODE_INVAL)
9285
9286       self.source_instance_name = \
9287           netutils.GetHostname(name=src_instance_name).name
9288
9289     else:
9290       raise errors.OpPrereqError("Invalid instance creation mode %r" %
9291                                  self.op.mode, errors.ECODE_INVAL)
9292
9293   def ExpandNames(self):
9294     """ExpandNames for CreateInstance.
9295
9296     Figure out the right locks for instance creation.
9297
9298     """
9299     self.needed_locks = {}
9300
9301     instance_name = self.op.instance_name
9302     # this is just a preventive check, but someone might still add this
9303     # instance in the meantime, and creation will fail at lock-add time
9304     if instance_name in self.cfg.GetInstanceList():
9305       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
9306                                  instance_name, errors.ECODE_EXISTS)
9307
9308     self.add_locks[locking.LEVEL_INSTANCE] = instance_name
9309
9310     if self.op.iallocator:
9311       # TODO: Find a solution to not lock all nodes in the cluster, e.g. by
9312       # specifying a group on instance creation and then selecting nodes from
9313       # that group
9314       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9315       self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
9316     else:
9317       self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
9318       nodelist = [self.op.pnode]
9319       if self.op.snode is not None:
9320         self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
9321         nodelist.append(self.op.snode)
9322       self.needed_locks[locking.LEVEL_NODE] = nodelist
9323       # Lock resources of instance's primary and secondary nodes (copy to
9324       # prevent accidential modification)
9325       self.needed_locks[locking.LEVEL_NODE_RES] = list(nodelist)
9326
9327     # in case of import lock the source node too
9328     if self.op.mode == constants.INSTANCE_IMPORT:
9329       src_node = self.op.src_node
9330       src_path = self.op.src_path
9331
9332       if src_path is None:
9333         self.op.src_path = src_path = self.op.instance_name
9334
9335       if src_node is None:
9336         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9337         self.op.src_node = None
9338         if os.path.isabs(src_path):
9339           raise errors.OpPrereqError("Importing an instance from a path"
9340                                      " requires a source node option",
9341                                      errors.ECODE_INVAL)
9342       else:
9343         self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
9344         if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
9345           self.needed_locks[locking.LEVEL_NODE].append(src_node)
9346         if not os.path.isabs(src_path):
9347           self.op.src_path = src_path = \
9348             utils.PathJoin(constants.EXPORT_DIR, src_path)
9349
9350   def _RunAllocator(self):
9351     """Run the allocator based on input opcode.
9352
9353     """
9354     nics = [n.ToDict() for n in self.nics]
9355     ial = IAllocator(self.cfg, self.rpc,
9356                      mode=constants.IALLOCATOR_MODE_ALLOC,
9357                      name=self.op.instance_name,
9358                      disk_template=self.op.disk_template,
9359                      tags=self.op.tags,
9360                      os=self.op.os_type,
9361                      vcpus=self.be_full[constants.BE_VCPUS],
9362                      memory=self.be_full[constants.BE_MAXMEM],
9363                      disks=self.disks,
9364                      nics=nics,
9365                      hypervisor=self.op.hypervisor,
9366                      )
9367
9368     ial.Run(self.op.iallocator)
9369
9370     if not ial.success:
9371       raise errors.OpPrereqError("Can't compute nodes using"
9372                                  " iallocator '%s': %s" %
9373                                  (self.op.iallocator, ial.info),
9374                                  errors.ECODE_NORES)
9375     if len(ial.result) != ial.required_nodes:
9376       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
9377                                  " of nodes (%s), required %s" %
9378                                  (self.op.iallocator, len(ial.result),
9379                                   ial.required_nodes), errors.ECODE_FAULT)
9380     self.op.pnode = ial.result[0]
9381     self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
9382                  self.op.instance_name, self.op.iallocator,
9383                  utils.CommaJoin(ial.result))
9384     if ial.required_nodes == 2:
9385       self.op.snode = ial.result[1]
9386
9387   def BuildHooksEnv(self):
9388     """Build hooks env.
9389
9390     This runs on master, primary and secondary nodes of the instance.
9391
9392     """
9393     env = {
9394       "ADD_MODE": self.op.mode,
9395       }
9396     if self.op.mode == constants.INSTANCE_IMPORT:
9397       env["SRC_NODE"] = self.op.src_node
9398       env["SRC_PATH"] = self.op.src_path
9399       env["SRC_IMAGES"] = self.src_images
9400
9401     env.update(_BuildInstanceHookEnv(
9402       name=self.op.instance_name,
9403       primary_node=self.op.pnode,
9404       secondary_nodes=self.secondaries,
9405       status=self.op.start,
9406       os_type=self.op.os_type,
9407       minmem=self.be_full[constants.BE_MINMEM],
9408       maxmem=self.be_full[constants.BE_MAXMEM],
9409       vcpus=self.be_full[constants.BE_VCPUS],
9410       nics=_NICListToTuple(self, self.nics),
9411       disk_template=self.op.disk_template,
9412       disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
9413              for d in self.disks],
9414       bep=self.be_full,
9415       hvp=self.hv_full,
9416       hypervisor_name=self.op.hypervisor,
9417       tags=self.op.tags,
9418     ))
9419
9420     return env
9421
9422   def BuildHooksNodes(self):
9423     """Build hooks nodes.
9424
9425     """
9426     nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
9427     return nl, nl
9428
9429   def _ReadExportInfo(self):
9430     """Reads the export information from disk.
9431
9432     It will override the opcode source node and path with the actual
9433     information, if these two were not specified before.
9434
9435     @return: the export information
9436
9437     """
9438     assert self.op.mode == constants.INSTANCE_IMPORT
9439
9440     src_node = self.op.src_node
9441     src_path = self.op.src_path
9442
9443     if src_node is None:
9444       locked_nodes = self.owned_locks(locking.LEVEL_NODE)
9445       exp_list = self.rpc.call_export_list(locked_nodes)
9446       found = False
9447       for node in exp_list:
9448         if exp_list[node].fail_msg:
9449           continue
9450         if src_path in exp_list[node].payload:
9451           found = True
9452           self.op.src_node = src_node = node
9453           self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
9454                                                        src_path)
9455           break
9456       if not found:
9457         raise errors.OpPrereqError("No export found for relative path %s" %
9458                                     src_path, errors.ECODE_INVAL)
9459
9460     _CheckNodeOnline(self, src_node)
9461     result = self.rpc.call_export_info(src_node, src_path)
9462     result.Raise("No export or invalid export found in dir %s" % src_path)
9463
9464     export_info = objects.SerializableConfigParser.Loads(str(result.payload))
9465     if not export_info.has_section(constants.INISECT_EXP):
9466       raise errors.ProgrammerError("Corrupted export config",
9467                                    errors.ECODE_ENVIRON)
9468
9469     ei_version = export_info.get(constants.INISECT_EXP, "version")
9470     if (int(ei_version) != constants.EXPORT_VERSION):
9471       raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
9472                                  (ei_version, constants.EXPORT_VERSION),
9473                                  errors.ECODE_ENVIRON)
9474     return export_info
9475
9476   def _ReadExportParams(self, einfo):
9477     """Use export parameters as defaults.
9478
9479     In case the opcode doesn't specify (as in override) some instance
9480     parameters, then try to use them from the export information, if
9481     that declares them.
9482
9483     """
9484     self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
9485
9486     if self.op.disk_template is None:
9487       if einfo.has_option(constants.INISECT_INS, "disk_template"):
9488         self.op.disk_template = einfo.get(constants.INISECT_INS,
9489                                           "disk_template")
9490         if self.op.disk_template not in constants.DISK_TEMPLATES:
9491           raise errors.OpPrereqError("Disk template specified in configuration"
9492                                      " file is not one of the allowed values:"
9493                                      " %s" % " ".join(constants.DISK_TEMPLATES))
9494       else:
9495         raise errors.OpPrereqError("No disk template specified and the export"
9496                                    " is missing the disk_template information",
9497                                    errors.ECODE_INVAL)
9498
9499     if not self.op.disks:
9500       disks = []
9501       # TODO: import the disk iv_name too
9502       for idx in range(constants.MAX_DISKS):
9503         if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
9504           disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
9505           disks.append({constants.IDISK_SIZE: disk_sz})
9506       self.op.disks = disks
9507       if not disks and self.op.disk_template != constants.DT_DISKLESS:
9508         raise errors.OpPrereqError("No disk info specified and the export"
9509                                    " is missing the disk information",
9510                                    errors.ECODE_INVAL)
9511
9512     if not self.op.nics:
9513       nics = []
9514       for idx in range(constants.MAX_NICS):
9515         if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
9516           ndict = {}
9517           for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
9518             v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
9519             ndict[name] = v
9520           nics.append(ndict)
9521         else:
9522           break
9523       self.op.nics = nics
9524
9525     if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
9526       self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
9527
9528     if (self.op.hypervisor is None and
9529         einfo.has_option(constants.INISECT_INS, "hypervisor")):
9530       self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
9531
9532     if einfo.has_section(constants.INISECT_HYP):
9533       # use the export parameters but do not override the ones
9534       # specified by the user
9535       for name, value in einfo.items(constants.INISECT_HYP):
9536         if name not in self.op.hvparams:
9537           self.op.hvparams[name] = value
9538
9539     if einfo.has_section(constants.INISECT_BEP):
9540       # use the parameters, without overriding
9541       for name, value in einfo.items(constants.INISECT_BEP):
9542         if name not in self.op.beparams:
9543           self.op.beparams[name] = value
9544         # Compatibility for the old "memory" be param
9545         if name == constants.BE_MEMORY:
9546           if constants.BE_MAXMEM not in self.op.beparams:
9547             self.op.beparams[constants.BE_MAXMEM] = value
9548           if constants.BE_MINMEM not in self.op.beparams:
9549             self.op.beparams[constants.BE_MINMEM] = value
9550     else:
9551       # try to read the parameters old style, from the main section
9552       for name in constants.BES_PARAMETERS:
9553         if (name not in self.op.beparams and
9554             einfo.has_option(constants.INISECT_INS, name)):
9555           self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
9556
9557     if einfo.has_section(constants.INISECT_OSP):
9558       # use the parameters, without overriding
9559       for name, value in einfo.items(constants.INISECT_OSP):
9560         if name not in self.op.osparams:
9561           self.op.osparams[name] = value
9562
9563   def _RevertToDefaults(self, cluster):
9564     """Revert the instance parameters to the default values.
9565
9566     """
9567     # hvparams
9568     hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
9569     for name in self.op.hvparams.keys():
9570       if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
9571         del self.op.hvparams[name]
9572     # beparams
9573     be_defs = cluster.SimpleFillBE({})
9574     for name in self.op.beparams.keys():
9575       if name in be_defs and be_defs[name] == self.op.beparams[name]:
9576         del self.op.beparams[name]
9577     # nic params
9578     nic_defs = cluster.SimpleFillNIC({})
9579     for nic in self.op.nics:
9580       for name in constants.NICS_PARAMETERS:
9581         if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
9582           del nic[name]
9583     # osparams
9584     os_defs = cluster.SimpleFillOS(self.op.os_type, {})
9585     for name in self.op.osparams.keys():
9586       if name in os_defs and os_defs[name] == self.op.osparams[name]:
9587         del self.op.osparams[name]
9588
9589   def _CalculateFileStorageDir(self):
9590     """Calculate final instance file storage dir.
9591
9592     """
9593     # file storage dir calculation/check
9594     self.instance_file_storage_dir = None
9595     if self.op.disk_template in constants.DTS_FILEBASED:
9596       # build the full file storage dir path
9597       joinargs = []
9598
9599       if self.op.disk_template == constants.DT_SHARED_FILE:
9600         get_fsd_fn = self.cfg.GetSharedFileStorageDir
9601       else:
9602         get_fsd_fn = self.cfg.GetFileStorageDir
9603
9604       cfg_storagedir = get_fsd_fn()
9605       if not cfg_storagedir:
9606         raise errors.OpPrereqError("Cluster file storage dir not defined")
9607       joinargs.append(cfg_storagedir)
9608
9609       if self.op.file_storage_dir is not None:
9610         joinargs.append(self.op.file_storage_dir)
9611
9612       joinargs.append(self.op.instance_name)
9613
9614       # pylint: disable=W0142
9615       self.instance_file_storage_dir = utils.PathJoin(*joinargs)
9616
9617   def CheckPrereq(self): # pylint: disable=R0914
9618     """Check prerequisites.
9619
9620     """
9621     self._CalculateFileStorageDir()
9622
9623     if self.op.mode == constants.INSTANCE_IMPORT:
9624       export_info = self._ReadExportInfo()
9625       self._ReadExportParams(export_info)
9626
9627     if (not self.cfg.GetVGName() and
9628         self.op.disk_template not in constants.DTS_NOT_LVM):
9629       raise errors.OpPrereqError("Cluster does not support lvm-based"
9630                                  " instances", errors.ECODE_STATE)
9631
9632     if (self.op.hypervisor is None or
9633         self.op.hypervisor == constants.VALUE_AUTO):
9634       self.op.hypervisor = self.cfg.GetHypervisorType()
9635
9636     cluster = self.cfg.GetClusterInfo()
9637     enabled_hvs = cluster.enabled_hypervisors
9638     if self.op.hypervisor not in enabled_hvs:
9639       raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
9640                                  " cluster (%s)" % (self.op.hypervisor,
9641                                   ",".join(enabled_hvs)),
9642                                  errors.ECODE_STATE)
9643
9644     # Check tag validity
9645     for tag in self.op.tags:
9646       objects.TaggableObject.ValidateTag(tag)
9647
9648     # check hypervisor parameter syntax (locally)
9649     utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
9650     filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
9651                                       self.op.hvparams)
9652     hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
9653     hv_type.CheckParameterSyntax(filled_hvp)
9654     self.hv_full = filled_hvp
9655     # check that we don't specify global parameters on an instance
9656     _CheckGlobalHvParams(self.op.hvparams)
9657
9658     # fill and remember the beparams dict
9659     default_beparams = cluster.beparams[constants.PP_DEFAULT]
9660     for param, value in self.op.beparams.iteritems():
9661       if value == constants.VALUE_AUTO:
9662         self.op.beparams[param] = default_beparams[param]
9663     objects.UpgradeBeParams(self.op.beparams)
9664     utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
9665     self.be_full = cluster.SimpleFillBE(self.op.beparams)
9666
9667     # build os parameters
9668     self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
9669
9670     # now that hvp/bep are in final format, let's reset to defaults,
9671     # if told to do so
9672     if self.op.identify_defaults:
9673       self._RevertToDefaults(cluster)
9674
9675     # NIC buildup
9676     self.nics = []
9677     for idx, nic in enumerate(self.op.nics):
9678       nic_mode_req = nic.get(constants.INIC_MODE, None)
9679       nic_mode = nic_mode_req
9680       if nic_mode is None or nic_mode == constants.VALUE_AUTO:
9681         nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
9682
9683       # in routed mode, for the first nic, the default ip is 'auto'
9684       if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
9685         default_ip_mode = constants.VALUE_AUTO
9686       else:
9687         default_ip_mode = constants.VALUE_NONE
9688
9689       # ip validity checks
9690       ip = nic.get(constants.INIC_IP, default_ip_mode)
9691       if ip is None or ip.lower() == constants.VALUE_NONE:
9692         nic_ip = None
9693       elif ip.lower() == constants.VALUE_AUTO:
9694         if not self.op.name_check:
9695           raise errors.OpPrereqError("IP address set to auto but name checks"
9696                                      " have been skipped",
9697                                      errors.ECODE_INVAL)
9698         nic_ip = self.hostname1.ip
9699       else:
9700         if not netutils.IPAddress.IsValid(ip):
9701           raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
9702                                      errors.ECODE_INVAL)
9703         nic_ip = ip
9704
9705       # TODO: check the ip address for uniqueness
9706       if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
9707         raise errors.OpPrereqError("Routed nic mode requires an ip address",
9708                                    errors.ECODE_INVAL)
9709
9710       # MAC address verification
9711       mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
9712       if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9713         mac = utils.NormalizeAndValidateMac(mac)
9714
9715         try:
9716           self.cfg.ReserveMAC(mac, self.proc.GetECId())
9717         except errors.ReservationError:
9718           raise errors.OpPrereqError("MAC address %s already in use"
9719                                      " in cluster" % mac,
9720                                      errors.ECODE_NOTUNIQUE)
9721
9722       #  Build nic parameters
9723       link = nic.get(constants.INIC_LINK, None)
9724       if link == constants.VALUE_AUTO:
9725         link = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_LINK]
9726       nicparams = {}
9727       if nic_mode_req:
9728         nicparams[constants.NIC_MODE] = nic_mode
9729       if link:
9730         nicparams[constants.NIC_LINK] = link
9731
9732       check_params = cluster.SimpleFillNIC(nicparams)
9733       objects.NIC.CheckParameterSyntax(check_params)
9734       self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
9735
9736     # disk checks/pre-build
9737     default_vg = self.cfg.GetVGName()
9738     self.disks = []
9739     for disk in self.op.disks:
9740       mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
9741       if mode not in constants.DISK_ACCESS_SET:
9742         raise errors.OpPrereqError("Invalid disk access mode '%s'" %
9743                                    mode, errors.ECODE_INVAL)
9744       size = disk.get(constants.IDISK_SIZE, None)
9745       if size is None:
9746         raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
9747       try:
9748         size = int(size)
9749       except (TypeError, ValueError):
9750         raise errors.OpPrereqError("Invalid disk size '%s'" % size,
9751                                    errors.ECODE_INVAL)
9752
9753       data_vg = disk.get(constants.IDISK_VG, default_vg)
9754       new_disk = {
9755         constants.IDISK_SIZE: size,
9756         constants.IDISK_MODE: mode,
9757         constants.IDISK_VG: data_vg,
9758         }
9759       if constants.IDISK_METAVG in disk:
9760         new_disk[constants.IDISK_METAVG] = disk[constants.IDISK_METAVG]
9761       if constants.IDISK_ADOPT in disk:
9762         new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
9763       self.disks.append(new_disk)
9764
9765     if self.op.mode == constants.INSTANCE_IMPORT:
9766       disk_images = []
9767       for idx in range(len(self.disks)):
9768         option = "disk%d_dump" % idx
9769         if export_info.has_option(constants.INISECT_INS, option):
9770           # FIXME: are the old os-es, disk sizes, etc. useful?
9771           export_name = export_info.get(constants.INISECT_INS, option)
9772           image = utils.PathJoin(self.op.src_path, export_name)
9773           disk_images.append(image)
9774         else:
9775           disk_images.append(False)
9776
9777       self.src_images = disk_images
9778
9779       old_name = export_info.get(constants.INISECT_INS, "name")
9780       if self.op.instance_name == old_name:
9781         for idx, nic in enumerate(self.nics):
9782           if nic.mac == constants.VALUE_AUTO:
9783             nic_mac_ini = "nic%d_mac" % idx
9784             nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
9785
9786     # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
9787
9788     # ip ping checks (we use the same ip that was resolved in ExpandNames)
9789     if self.op.ip_check:
9790       if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
9791         raise errors.OpPrereqError("IP %s of instance %s already in use" %
9792                                    (self.check_ip, self.op.instance_name),
9793                                    errors.ECODE_NOTUNIQUE)
9794
9795     #### mac address generation
9796     # By generating here the mac address both the allocator and the hooks get
9797     # the real final mac address rather than the 'auto' or 'generate' value.
9798     # There is a race condition between the generation and the instance object
9799     # creation, which means that we know the mac is valid now, but we're not
9800     # sure it will be when we actually add the instance. If things go bad
9801     # adding the instance will abort because of a duplicate mac, and the
9802     # creation job will fail.
9803     for nic in self.nics:
9804       if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9805         nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
9806
9807     #### allocator run
9808
9809     if self.op.iallocator is not None:
9810       self._RunAllocator()
9811
9812     # Release all unneeded node locks
9813     _ReleaseLocks(self, locking.LEVEL_NODE,
9814                   keep=filter(None, [self.op.pnode, self.op.snode,
9815                                      self.op.src_node]))
9816     _ReleaseLocks(self, locking.LEVEL_NODE_RES,
9817                   keep=filter(None, [self.op.pnode, self.op.snode,
9818                                      self.op.src_node]))
9819
9820     #### node related checks
9821
9822     # check primary node
9823     self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
9824     assert self.pnode is not None, \
9825       "Cannot retrieve locked node %s" % self.op.pnode
9826     if pnode.offline:
9827       raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
9828                                  pnode.name, errors.ECODE_STATE)
9829     if pnode.drained:
9830       raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
9831                                  pnode.name, errors.ECODE_STATE)
9832     if not pnode.vm_capable:
9833       raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
9834                                  " '%s'" % pnode.name, errors.ECODE_STATE)
9835
9836     self.secondaries = []
9837
9838     # mirror node verification
9839     if self.op.disk_template in constants.DTS_INT_MIRROR:
9840       if self.op.snode == pnode.name:
9841         raise errors.OpPrereqError("The secondary node cannot be the"
9842                                    " primary node", errors.ECODE_INVAL)
9843       _CheckNodeOnline(self, self.op.snode)
9844       _CheckNodeNotDrained(self, self.op.snode)
9845       _CheckNodeVmCapable(self, self.op.snode)
9846       self.secondaries.append(self.op.snode)
9847
9848       snode = self.cfg.GetNodeInfo(self.op.snode)
9849       if pnode.group != snode.group:
9850         self.LogWarning("The primary and secondary nodes are in two"
9851                         " different node groups; the disk parameters"
9852                         " from the first disk's node group will be"
9853                         " used")
9854
9855     nodenames = [pnode.name] + self.secondaries
9856
9857     # Verify instance specs
9858     ispec = {
9859       constants.ISPEC_MEM_SIZE: self.be_full.get(constants.BE_MAXMEM, None),
9860       constants.ISPEC_CPU_COUNT: self.be_full.get(constants.BE_VCPUS, None),
9861       constants.ISPEC_DISK_COUNT: len(self.disks),
9862       constants.ISPEC_DISK_SIZE: [disk["size"] for disk in self.disks],
9863       constants.ISPEC_NIC_COUNT: len(self.nics),
9864       }
9865
9866     group_info = self.cfg.GetNodeGroup(pnode.group)
9867     ipolicy = _CalculateGroupIPolicy(cluster, group_info)
9868     res = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec)
9869     if not self.op.ignore_ipolicy and res:
9870       raise errors.OpPrereqError(("Instance allocation to group %s violates"
9871                                   " policy: %s") % (pnode.group,
9872                                                     utils.CommaJoin(res)),
9873                                   errors.ECODE_INVAL)
9874
9875     # disk parameters (not customizable at instance or node level)
9876     # just use the primary node parameters, ignoring the secondary.
9877     self.diskparams = group_info.diskparams
9878
9879     if not self.adopt_disks:
9880       if self.op.disk_template == constants.DT_RBD:
9881         # _CheckRADOSFreeSpace() is just a placeholder.
9882         # Any function that checks prerequisites can be placed here.
9883         # Check if there is enough space on the RADOS cluster.
9884         _CheckRADOSFreeSpace()
9885       else:
9886         # Check lv size requirements, if not adopting
9887         req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
9888         _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
9889
9890     elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
9891       all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
9892                                 disk[constants.IDISK_ADOPT])
9893                      for disk in self.disks])
9894       if len(all_lvs) != len(self.disks):
9895         raise errors.OpPrereqError("Duplicate volume names given for adoption",
9896                                    errors.ECODE_INVAL)
9897       for lv_name in all_lvs:
9898         try:
9899           # FIXME: lv_name here is "vg/lv" need to ensure that other calls
9900           # to ReserveLV uses the same syntax
9901           self.cfg.ReserveLV(lv_name, self.proc.GetECId())
9902         except errors.ReservationError:
9903           raise errors.OpPrereqError("LV named %s used by another instance" %
9904                                      lv_name, errors.ECODE_NOTUNIQUE)
9905
9906       vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
9907       vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
9908
9909       node_lvs = self.rpc.call_lv_list([pnode.name],
9910                                        vg_names.payload.keys())[pnode.name]
9911       node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
9912       node_lvs = node_lvs.payload
9913
9914       delta = all_lvs.difference(node_lvs.keys())
9915       if delta:
9916         raise errors.OpPrereqError("Missing logical volume(s): %s" %
9917                                    utils.CommaJoin(delta),
9918                                    errors.ECODE_INVAL)
9919       online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
9920       if online_lvs:
9921         raise errors.OpPrereqError("Online logical volumes found, cannot"
9922                                    " adopt: %s" % utils.CommaJoin(online_lvs),
9923                                    errors.ECODE_STATE)
9924       # update the size of disk based on what is found
9925       for dsk in self.disks:
9926         dsk[constants.IDISK_SIZE] = \
9927           int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
9928                                         dsk[constants.IDISK_ADOPT])][0]))
9929
9930     elif self.op.disk_template == constants.DT_BLOCK:
9931       # Normalize and de-duplicate device paths
9932       all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
9933                        for disk in self.disks])
9934       if len(all_disks) != len(self.disks):
9935         raise errors.OpPrereqError("Duplicate disk names given for adoption",
9936                                    errors.ECODE_INVAL)
9937       baddisks = [d for d in all_disks
9938                   if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
9939       if baddisks:
9940         raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
9941                                    " cannot be adopted" %
9942                                    (", ".join(baddisks),
9943                                     constants.ADOPTABLE_BLOCKDEV_ROOT),
9944                                    errors.ECODE_INVAL)
9945
9946       node_disks = self.rpc.call_bdev_sizes([pnode.name],
9947                                             list(all_disks))[pnode.name]
9948       node_disks.Raise("Cannot get block device information from node %s" %
9949                        pnode.name)
9950       node_disks = node_disks.payload
9951       delta = all_disks.difference(node_disks.keys())
9952       if delta:
9953         raise errors.OpPrereqError("Missing block device(s): %s" %
9954                                    utils.CommaJoin(delta),
9955                                    errors.ECODE_INVAL)
9956       for dsk in self.disks:
9957         dsk[constants.IDISK_SIZE] = \
9958           int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
9959
9960     _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
9961
9962     _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
9963     # check OS parameters (remotely)
9964     _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
9965
9966     _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
9967
9968     # memory check on primary node
9969     #TODO(dynmem): use MINMEM for checking
9970     if self.op.start:
9971       _CheckNodeFreeMemory(self, self.pnode.name,
9972                            "creating instance %s" % self.op.instance_name,
9973                            self.be_full[constants.BE_MAXMEM],
9974                            self.op.hypervisor)
9975
9976     self.dry_run_result = list(nodenames)
9977
9978   def Exec(self, feedback_fn):
9979     """Create and add the instance to the cluster.
9980
9981     """
9982     instance = self.op.instance_name
9983     pnode_name = self.pnode.name
9984
9985     assert not (self.owned_locks(locking.LEVEL_NODE_RES) -
9986                 self.owned_locks(locking.LEVEL_NODE)), \
9987       "Node locks differ from node resource locks"
9988
9989     ht_kind = self.op.hypervisor
9990     if ht_kind in constants.HTS_REQ_PORT:
9991       network_port = self.cfg.AllocatePort()
9992     else:
9993       network_port = None
9994
9995     disks = _GenerateDiskTemplate(self,
9996                                   self.op.disk_template,
9997                                   instance, pnode_name,
9998                                   self.secondaries,
9999                                   self.disks,
10000                                   self.instance_file_storage_dir,
10001                                   self.op.file_driver,
10002                                   0,
10003                                   feedback_fn,
10004                                   self.diskparams)
10005
10006     iobj = objects.Instance(name=instance, os=self.op.os_type,
10007                             primary_node=pnode_name,
10008                             nics=self.nics, disks=disks,
10009                             disk_template=self.op.disk_template,
10010                             admin_state=constants.ADMINST_DOWN,
10011                             network_port=network_port,
10012                             beparams=self.op.beparams,
10013                             hvparams=self.op.hvparams,
10014                             hypervisor=self.op.hypervisor,
10015                             osparams=self.op.osparams,
10016                             )
10017
10018     if self.op.tags:
10019       for tag in self.op.tags:
10020         iobj.AddTag(tag)
10021
10022     if self.adopt_disks:
10023       if self.op.disk_template == constants.DT_PLAIN:
10024         # rename LVs to the newly-generated names; we need to construct
10025         # 'fake' LV disks with the old data, plus the new unique_id
10026         tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
10027         rename_to = []
10028         for t_dsk, a_dsk in zip(tmp_disks, self.disks):
10029           rename_to.append(t_dsk.logical_id)
10030           t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
10031           self.cfg.SetDiskID(t_dsk, pnode_name)
10032         result = self.rpc.call_blockdev_rename(pnode_name,
10033                                                zip(tmp_disks, rename_to))
10034         result.Raise("Failed to rename adoped LVs")
10035     else:
10036       feedback_fn("* creating instance disks...")
10037       try:
10038         _CreateDisks(self, iobj)
10039       except errors.OpExecError:
10040         self.LogWarning("Device creation failed, reverting...")
10041         try:
10042           _RemoveDisks(self, iobj)
10043         finally:
10044           self.cfg.ReleaseDRBDMinors(instance)
10045           raise
10046
10047     feedback_fn("adding instance %s to cluster config" % instance)
10048
10049     self.cfg.AddInstance(iobj, self.proc.GetECId())
10050
10051     # Declare that we don't want to remove the instance lock anymore, as we've
10052     # added the instance to the config
10053     del self.remove_locks[locking.LEVEL_INSTANCE]
10054
10055     if self.op.mode == constants.INSTANCE_IMPORT:
10056       # Release unused nodes
10057       _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
10058     else:
10059       # Release all nodes
10060       _ReleaseLocks(self, locking.LEVEL_NODE)
10061
10062     disk_abort = False
10063     if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
10064       feedback_fn("* wiping instance disks...")
10065       try:
10066         _WipeDisks(self, iobj)
10067       except errors.OpExecError, err:
10068         logging.exception("Wiping disks failed")
10069         self.LogWarning("Wiping instance disks failed (%s)", err)
10070         disk_abort = True
10071
10072     if disk_abort:
10073       # Something is already wrong with the disks, don't do anything else
10074       pass
10075     elif self.op.wait_for_sync:
10076       disk_abort = not _WaitForSync(self, iobj)
10077     elif iobj.disk_template in constants.DTS_INT_MIRROR:
10078       # make sure the disks are not degraded (still sync-ing is ok)
10079       feedback_fn("* checking mirrors status")
10080       disk_abort = not _WaitForSync(self, iobj, oneshot=True)
10081     else:
10082       disk_abort = False
10083
10084     if disk_abort:
10085       _RemoveDisks(self, iobj)
10086       self.cfg.RemoveInstance(iobj.name)
10087       # Make sure the instance lock gets removed
10088       self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
10089       raise errors.OpExecError("There are some degraded disks for"
10090                                " this instance")
10091
10092     # Release all node resource locks
10093     _ReleaseLocks(self, locking.LEVEL_NODE_RES)
10094
10095     if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
10096       if self.op.mode == constants.INSTANCE_CREATE:
10097         if not self.op.no_install:
10098           pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
10099                         not self.op.wait_for_sync)
10100           if pause_sync:
10101             feedback_fn("* pausing disk sync to install instance OS")
10102             result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10103                                                               iobj.disks, True)
10104             for idx, success in enumerate(result.payload):
10105               if not success:
10106                 logging.warn("pause-sync of instance %s for disk %d failed",
10107                              instance, idx)
10108
10109           feedback_fn("* running the instance OS create scripts...")
10110           # FIXME: pass debug option from opcode to backend
10111           os_add_result = \
10112             self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
10113                                           self.op.debug_level)
10114           if pause_sync:
10115             feedback_fn("* resuming disk sync")
10116             result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10117                                                               iobj.disks, False)
10118             for idx, success in enumerate(result.payload):
10119               if not success:
10120                 logging.warn("resume-sync of instance %s for disk %d failed",
10121                              instance, idx)
10122
10123           os_add_result.Raise("Could not add os for instance %s"
10124                               " on node %s" % (instance, pnode_name))
10125
10126       elif self.op.mode == constants.INSTANCE_IMPORT:
10127         feedback_fn("* running the instance OS import scripts...")
10128
10129         transfers = []
10130
10131         for idx, image in enumerate(self.src_images):
10132           if not image:
10133             continue
10134
10135           # FIXME: pass debug option from opcode to backend
10136           dt = masterd.instance.DiskTransfer("disk/%s" % idx,
10137                                              constants.IEIO_FILE, (image, ),
10138                                              constants.IEIO_SCRIPT,
10139                                              (iobj.disks[idx], idx),
10140                                              None)
10141           transfers.append(dt)
10142
10143         import_result = \
10144           masterd.instance.TransferInstanceData(self, feedback_fn,
10145                                                 self.op.src_node, pnode_name,
10146                                                 self.pnode.secondary_ip,
10147                                                 iobj, transfers)
10148         if not compat.all(import_result):
10149           self.LogWarning("Some disks for instance %s on node %s were not"
10150                           " imported successfully" % (instance, pnode_name))
10151
10152       elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
10153         feedback_fn("* preparing remote import...")
10154         # The source cluster will stop the instance before attempting to make a
10155         # connection. In some cases stopping an instance can take a long time,
10156         # hence the shutdown timeout is added to the connection timeout.
10157         connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
10158                            self.op.source_shutdown_timeout)
10159         timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
10160
10161         assert iobj.primary_node == self.pnode.name
10162         disk_results = \
10163           masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
10164                                         self.source_x509_ca,
10165                                         self._cds, timeouts)
10166         if not compat.all(disk_results):
10167           # TODO: Should the instance still be started, even if some disks
10168           # failed to import (valid for local imports, too)?
10169           self.LogWarning("Some disks for instance %s on node %s were not"
10170                           " imported successfully" % (instance, pnode_name))
10171
10172         # Run rename script on newly imported instance
10173         assert iobj.name == instance
10174         feedback_fn("Running rename script for %s" % instance)
10175         result = self.rpc.call_instance_run_rename(pnode_name, iobj,
10176                                                    self.source_instance_name,
10177                                                    self.op.debug_level)
10178         if result.fail_msg:
10179           self.LogWarning("Failed to run rename script for %s on node"
10180                           " %s: %s" % (instance, pnode_name, result.fail_msg))
10181
10182       else:
10183         # also checked in the prereq part
10184         raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
10185                                      % self.op.mode)
10186
10187     assert not self.owned_locks(locking.LEVEL_NODE_RES)
10188
10189     if self.op.start:
10190       iobj.admin_state = constants.ADMINST_UP
10191       self.cfg.Update(iobj, feedback_fn)
10192       logging.info("Starting instance %s on node %s", instance, pnode_name)
10193       feedback_fn("* starting instance...")
10194       result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
10195                                             False)
10196       result.Raise("Could not start instance")
10197
10198     return list(iobj.all_nodes)
10199
10200
10201 def _CheckRADOSFreeSpace():
10202   """Compute disk size requirements inside the RADOS cluster.
10203
10204   """
10205   # For the RADOS cluster we assume there is always enough space.
10206   pass
10207
10208
10209 class LUInstanceConsole(NoHooksLU):
10210   """Connect to an instance's console.
10211
10212   This is somewhat special in that it returns the command line that
10213   you need to run on the master node in order to connect to the
10214   console.
10215
10216   """
10217   REQ_BGL = False
10218
10219   def ExpandNames(self):
10220     self.share_locks = _ShareAll()
10221     self._ExpandAndLockInstance()
10222
10223   def CheckPrereq(self):
10224     """Check prerequisites.
10225
10226     This checks that the instance is in the cluster.
10227
10228     """
10229     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10230     assert self.instance is not None, \
10231       "Cannot retrieve locked instance %s" % self.op.instance_name
10232     _CheckNodeOnline(self, self.instance.primary_node)
10233
10234   def Exec(self, feedback_fn):
10235     """Connect to the console of an instance
10236
10237     """
10238     instance = self.instance
10239     node = instance.primary_node
10240
10241     node_insts = self.rpc.call_instance_list([node],
10242                                              [instance.hypervisor])[node]
10243     node_insts.Raise("Can't get node information from %s" % node)
10244
10245     if instance.name not in node_insts.payload:
10246       if instance.admin_state == constants.ADMINST_UP:
10247         state = constants.INSTST_ERRORDOWN
10248       elif instance.admin_state == constants.ADMINST_DOWN:
10249         state = constants.INSTST_ADMINDOWN
10250       else:
10251         state = constants.INSTST_ADMINOFFLINE
10252       raise errors.OpExecError("Instance %s is not running (state %s)" %
10253                                (instance.name, state))
10254
10255     logging.debug("Connecting to console of %s on %s", instance.name, node)
10256
10257     return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
10258
10259
10260 def _GetInstanceConsole(cluster, instance):
10261   """Returns console information for an instance.
10262
10263   @type cluster: L{objects.Cluster}
10264   @type instance: L{objects.Instance}
10265   @rtype: dict
10266
10267   """
10268   hyper = hypervisor.GetHypervisor(instance.hypervisor)
10269   # beparams and hvparams are passed separately, to avoid editing the
10270   # instance and then saving the defaults in the instance itself.
10271   hvparams = cluster.FillHV(instance)
10272   beparams = cluster.FillBE(instance)
10273   console = hyper.GetInstanceConsole(instance, hvparams, beparams)
10274
10275   assert console.instance == instance.name
10276   assert console.Validate()
10277
10278   return console.ToDict()
10279
10280
10281 class LUInstanceReplaceDisks(LogicalUnit):
10282   """Replace the disks of an instance.
10283
10284   """
10285   HPATH = "mirrors-replace"
10286   HTYPE = constants.HTYPE_INSTANCE
10287   REQ_BGL = False
10288
10289   def CheckArguments(self):
10290     TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
10291                                   self.op.iallocator)
10292
10293   def ExpandNames(self):
10294     self._ExpandAndLockInstance()
10295
10296     assert locking.LEVEL_NODE not in self.needed_locks
10297     assert locking.LEVEL_NODE_RES not in self.needed_locks
10298     assert locking.LEVEL_NODEGROUP not in self.needed_locks
10299
10300     assert self.op.iallocator is None or self.op.remote_node is None, \
10301       "Conflicting options"
10302
10303     if self.op.remote_node is not None:
10304       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10305
10306       # Warning: do not remove the locking of the new secondary here
10307       # unless DRBD8.AddChildren is changed to work in parallel;
10308       # currently it doesn't since parallel invocations of
10309       # FindUnusedMinor will conflict
10310       self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
10311       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
10312     else:
10313       self.needed_locks[locking.LEVEL_NODE] = []
10314       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10315
10316       if self.op.iallocator is not None:
10317         # iallocator will select a new node in the same group
10318         self.needed_locks[locking.LEVEL_NODEGROUP] = []
10319
10320     self.needed_locks[locking.LEVEL_NODE_RES] = []
10321
10322     self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
10323                                    self.op.iallocator, self.op.remote_node,
10324                                    self.op.disks, False, self.op.early_release,
10325                                    self.op.ignore_ipolicy)
10326
10327     self.tasklets = [self.replacer]
10328
10329   def DeclareLocks(self, level):
10330     if level == locking.LEVEL_NODEGROUP:
10331       assert self.op.remote_node is None
10332       assert self.op.iallocator is not None
10333       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
10334
10335       self.share_locks[locking.LEVEL_NODEGROUP] = 1
10336       # Lock all groups used by instance optimistically; this requires going
10337       # via the node before it's locked, requiring verification later on
10338       self.needed_locks[locking.LEVEL_NODEGROUP] = \
10339         self.cfg.GetInstanceNodeGroups(self.op.instance_name)
10340
10341     elif level == locking.LEVEL_NODE:
10342       if self.op.iallocator is not None:
10343         assert self.op.remote_node is None
10344         assert not self.needed_locks[locking.LEVEL_NODE]
10345
10346         # Lock member nodes of all locked groups
10347         self.needed_locks[locking.LEVEL_NODE] = [node_name
10348           for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
10349           for node_name in self.cfg.GetNodeGroup(group_uuid).members]
10350       else:
10351         self._LockInstancesNodes()
10352     elif level == locking.LEVEL_NODE_RES:
10353       # Reuse node locks
10354       self.needed_locks[locking.LEVEL_NODE_RES] = \
10355         self.needed_locks[locking.LEVEL_NODE]
10356
10357   def BuildHooksEnv(self):
10358     """Build hooks env.
10359
10360     This runs on the master, the primary and all the secondaries.
10361
10362     """
10363     instance = self.replacer.instance
10364     env = {
10365       "MODE": self.op.mode,
10366       "NEW_SECONDARY": self.op.remote_node,
10367       "OLD_SECONDARY": instance.secondary_nodes[0],
10368       }
10369     env.update(_BuildInstanceHookEnvByObject(self, instance))
10370     return env
10371
10372   def BuildHooksNodes(self):
10373     """Build hooks nodes.
10374
10375     """
10376     instance = self.replacer.instance
10377     nl = [
10378       self.cfg.GetMasterNode(),
10379       instance.primary_node,
10380       ]
10381     if self.op.remote_node is not None:
10382       nl.append(self.op.remote_node)
10383     return nl, nl
10384
10385   def CheckPrereq(self):
10386     """Check prerequisites.
10387
10388     """
10389     assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
10390             self.op.iallocator is None)
10391
10392     # Verify if node group locks are still correct
10393     owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
10394     if owned_groups:
10395       _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
10396
10397     return LogicalUnit.CheckPrereq(self)
10398
10399
10400 class TLReplaceDisks(Tasklet):
10401   """Replaces disks for an instance.
10402
10403   Note: Locking is not within the scope of this class.
10404
10405   """
10406   def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
10407                disks, delay_iallocator, early_release, ignore_ipolicy):
10408     """Initializes this class.
10409
10410     """
10411     Tasklet.__init__(self, lu)
10412
10413     # Parameters
10414     self.instance_name = instance_name
10415     self.mode = mode
10416     self.iallocator_name = iallocator_name
10417     self.remote_node = remote_node
10418     self.disks = disks
10419     self.delay_iallocator = delay_iallocator
10420     self.early_release = early_release
10421     self.ignore_ipolicy = ignore_ipolicy
10422
10423     # Runtime data
10424     self.instance = None
10425     self.new_node = None
10426     self.target_node = None
10427     self.other_node = None
10428     self.remote_node_info = None
10429     self.node_secondary_ip = None
10430
10431   @staticmethod
10432   def CheckArguments(mode, remote_node, iallocator):
10433     """Helper function for users of this class.
10434
10435     """
10436     # check for valid parameter combination
10437     if mode == constants.REPLACE_DISK_CHG:
10438       if remote_node is None and iallocator is None:
10439         raise errors.OpPrereqError("When changing the secondary either an"
10440                                    " iallocator script must be used or the"
10441                                    " new node given", errors.ECODE_INVAL)
10442
10443       if remote_node is not None and iallocator is not None:
10444         raise errors.OpPrereqError("Give either the iallocator or the new"
10445                                    " secondary, not both", errors.ECODE_INVAL)
10446
10447     elif remote_node is not None or iallocator is not None:
10448       # Not replacing the secondary
10449       raise errors.OpPrereqError("The iallocator and new node options can"
10450                                  " only be used when changing the"
10451                                  " secondary node", errors.ECODE_INVAL)
10452
10453   @staticmethod
10454   def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
10455     """Compute a new secondary node using an IAllocator.
10456
10457     """
10458     ial = IAllocator(lu.cfg, lu.rpc,
10459                      mode=constants.IALLOCATOR_MODE_RELOC,
10460                      name=instance_name,
10461                      relocate_from=list(relocate_from))
10462
10463     ial.Run(iallocator_name)
10464
10465     if not ial.success:
10466       raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
10467                                  " %s" % (iallocator_name, ial.info),
10468                                  errors.ECODE_NORES)
10469
10470     if len(ial.result) != ial.required_nodes:
10471       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
10472                                  " of nodes (%s), required %s" %
10473                                  (iallocator_name,
10474                                   len(ial.result), ial.required_nodes),
10475                                  errors.ECODE_FAULT)
10476
10477     remote_node_name = ial.result[0]
10478
10479     lu.LogInfo("Selected new secondary for instance '%s': %s",
10480                instance_name, remote_node_name)
10481
10482     return remote_node_name
10483
10484   def _FindFaultyDisks(self, node_name):
10485     """Wrapper for L{_FindFaultyInstanceDisks}.
10486
10487     """
10488     return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
10489                                     node_name, True)
10490
10491   def _CheckDisksActivated(self, instance):
10492     """Checks if the instance disks are activated.
10493
10494     @param instance: The instance to check disks
10495     @return: True if they are activated, False otherwise
10496
10497     """
10498     nodes = instance.all_nodes
10499
10500     for idx, dev in enumerate(instance.disks):
10501       for node in nodes:
10502         self.lu.LogInfo("Checking disk/%d on %s", idx, node)
10503         self.cfg.SetDiskID(dev, node)
10504
10505         result = self.rpc.call_blockdev_find(node, dev)
10506
10507         if result.offline:
10508           continue
10509         elif result.fail_msg or not result.payload:
10510           return False
10511
10512     return True
10513
10514   def CheckPrereq(self):
10515     """Check prerequisites.
10516
10517     This checks that the instance is in the cluster.
10518
10519     """
10520     self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
10521     assert instance is not None, \
10522       "Cannot retrieve locked instance %s" % self.instance_name
10523
10524     if instance.disk_template != constants.DT_DRBD8:
10525       raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
10526                                  " instances", errors.ECODE_INVAL)
10527
10528     if len(instance.secondary_nodes) != 1:
10529       raise errors.OpPrereqError("The instance has a strange layout,"
10530                                  " expected one secondary but found %d" %
10531                                  len(instance.secondary_nodes),
10532                                  errors.ECODE_FAULT)
10533
10534     if not self.delay_iallocator:
10535       self._CheckPrereq2()
10536
10537   def _CheckPrereq2(self):
10538     """Check prerequisites, second part.
10539
10540     This function should always be part of CheckPrereq. It was separated and is
10541     now called from Exec because during node evacuation iallocator was only
10542     called with an unmodified cluster model, not taking planned changes into
10543     account.
10544
10545     """
10546     instance = self.instance
10547     secondary_node = instance.secondary_nodes[0]
10548
10549     if self.iallocator_name is None:
10550       remote_node = self.remote_node
10551     else:
10552       remote_node = self._RunAllocator(self.lu, self.iallocator_name,
10553                                        instance.name, instance.secondary_nodes)
10554
10555     if remote_node is None:
10556       self.remote_node_info = None
10557     else:
10558       assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
10559              "Remote node '%s' is not locked" % remote_node
10560
10561       self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
10562       assert self.remote_node_info is not None, \
10563         "Cannot retrieve locked node %s" % remote_node
10564
10565     if remote_node == self.instance.primary_node:
10566       raise errors.OpPrereqError("The specified node is the primary node of"
10567                                  " the instance", errors.ECODE_INVAL)
10568
10569     if remote_node == secondary_node:
10570       raise errors.OpPrereqError("The specified node is already the"
10571                                  " secondary node of the instance",
10572                                  errors.ECODE_INVAL)
10573
10574     if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
10575                                     constants.REPLACE_DISK_CHG):
10576       raise errors.OpPrereqError("Cannot specify disks to be replaced",
10577                                  errors.ECODE_INVAL)
10578
10579     if self.mode == constants.REPLACE_DISK_AUTO:
10580       if not self._CheckDisksActivated(instance):
10581         raise errors.OpPrereqError("Please run activate-disks on instance %s"
10582                                    " first" % self.instance_name,
10583                                    errors.ECODE_STATE)
10584       faulty_primary = self._FindFaultyDisks(instance.primary_node)
10585       faulty_secondary = self._FindFaultyDisks(secondary_node)
10586
10587       if faulty_primary and faulty_secondary:
10588         raise errors.OpPrereqError("Instance %s has faulty disks on more than"
10589                                    " one node and can not be repaired"
10590                                    " automatically" % self.instance_name,
10591                                    errors.ECODE_STATE)
10592
10593       if faulty_primary:
10594         self.disks = faulty_primary
10595         self.target_node = instance.primary_node
10596         self.other_node = secondary_node
10597         check_nodes = [self.target_node, self.other_node]
10598       elif faulty_secondary:
10599         self.disks = faulty_secondary
10600         self.target_node = secondary_node
10601         self.other_node = instance.primary_node
10602         check_nodes = [self.target_node, self.other_node]
10603       else:
10604         self.disks = []
10605         check_nodes = []
10606
10607     else:
10608       # Non-automatic modes
10609       if self.mode == constants.REPLACE_DISK_PRI:
10610         self.target_node = instance.primary_node
10611         self.other_node = secondary_node
10612         check_nodes = [self.target_node, self.other_node]
10613
10614       elif self.mode == constants.REPLACE_DISK_SEC:
10615         self.target_node = secondary_node
10616         self.other_node = instance.primary_node
10617         check_nodes = [self.target_node, self.other_node]
10618
10619       elif self.mode == constants.REPLACE_DISK_CHG:
10620         self.new_node = remote_node
10621         self.other_node = instance.primary_node
10622         self.target_node = secondary_node
10623         check_nodes = [self.new_node, self.other_node]
10624
10625         _CheckNodeNotDrained(self.lu, remote_node)
10626         _CheckNodeVmCapable(self.lu, remote_node)
10627
10628         old_node_info = self.cfg.GetNodeInfo(secondary_node)
10629         assert old_node_info is not None
10630         if old_node_info.offline and not self.early_release:
10631           # doesn't make sense to delay the release
10632           self.early_release = True
10633           self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
10634                           " early-release mode", secondary_node)
10635
10636       else:
10637         raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
10638                                      self.mode)
10639
10640       # If not specified all disks should be replaced
10641       if not self.disks:
10642         self.disks = range(len(self.instance.disks))
10643
10644     # TODO: This is ugly, but right now we can't distinguish between internal
10645     # submitted opcode and external one. We should fix that.
10646     if self.remote_node_info:
10647       # We change the node, lets verify it still meets instance policy
10648       new_group_info = self.cfg.GetNodeGroup(self.remote_node_info.group)
10649       ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(),
10650                                        new_group_info)
10651       _CheckTargetNodeIPolicy(self, ipolicy, instance, self.remote_node_info,
10652                               ignore=self.ignore_ipolicy)
10653
10654     # TODO: compute disk parameters
10655     primary_node_info = self.cfg.GetNodeInfo(instance.primary_node)
10656     secondary_node_info = self.cfg.GetNodeInfo(secondary_node)
10657     if primary_node_info.group != secondary_node_info.group:
10658       self.lu.LogInfo("The instance primary and secondary nodes are in two"
10659                       " different node groups; the disk parameters of the"
10660                       " primary node's group will be applied.")
10661
10662     self.diskparams = self.cfg.GetNodeGroup(primary_node_info.group).diskparams
10663
10664     for node in check_nodes:
10665       _CheckNodeOnline(self.lu, node)
10666
10667     touched_nodes = frozenset(node_name for node_name in [self.new_node,
10668                                                           self.other_node,
10669                                                           self.target_node]
10670                               if node_name is not None)
10671
10672     # Release unneeded node and node resource locks
10673     _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
10674     _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, keep=touched_nodes)
10675
10676     # Release any owned node group
10677     if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
10678       _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
10679
10680     # Check whether disks are valid
10681     for disk_idx in self.disks:
10682       instance.FindDisk(disk_idx)
10683
10684     # Get secondary node IP addresses
10685     self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
10686                                   in self.cfg.GetMultiNodeInfo(touched_nodes))
10687
10688   def Exec(self, feedback_fn):
10689     """Execute disk replacement.
10690
10691     This dispatches the disk replacement to the appropriate handler.
10692
10693     """
10694     if self.delay_iallocator:
10695       self._CheckPrereq2()
10696
10697     if __debug__:
10698       # Verify owned locks before starting operation
10699       owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
10700       assert set(owned_nodes) == set(self.node_secondary_ip), \
10701           ("Incorrect node locks, owning %s, expected %s" %
10702            (owned_nodes, self.node_secondary_ip.keys()))
10703       assert (self.lu.owned_locks(locking.LEVEL_NODE) ==
10704               self.lu.owned_locks(locking.LEVEL_NODE_RES))
10705
10706       owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
10707       assert list(owned_instances) == [self.instance_name], \
10708           "Instance '%s' not locked" % self.instance_name
10709
10710       assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
10711           "Should not own any node group lock at this point"
10712
10713     if not self.disks:
10714       feedback_fn("No disks need replacement")
10715       return
10716
10717     feedback_fn("Replacing disk(s) %s for %s" %
10718                 (utils.CommaJoin(self.disks), self.instance.name))
10719
10720     activate_disks = (self.instance.admin_state != constants.ADMINST_UP)
10721
10722     # Activate the instance disks if we're replacing them on a down instance
10723     if activate_disks:
10724       _StartInstanceDisks(self.lu, self.instance, True)
10725
10726     try:
10727       # Should we replace the secondary node?
10728       if self.new_node is not None:
10729         fn = self._ExecDrbd8Secondary
10730       else:
10731         fn = self._ExecDrbd8DiskOnly
10732
10733       result = fn(feedback_fn)
10734     finally:
10735       # Deactivate the instance disks if we're replacing them on a
10736       # down instance
10737       if activate_disks:
10738         _SafeShutdownInstanceDisks(self.lu, self.instance)
10739
10740     assert not self.lu.owned_locks(locking.LEVEL_NODE)
10741
10742     if __debug__:
10743       # Verify owned locks
10744       owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE_RES)
10745       nodes = frozenset(self.node_secondary_ip)
10746       assert ((self.early_release and not owned_nodes) or
10747               (not self.early_release and not (set(owned_nodes) - nodes))), \
10748         ("Not owning the correct locks, early_release=%s, owned=%r,"
10749          " nodes=%r" % (self.early_release, owned_nodes, nodes))
10750
10751     return result
10752
10753   def _CheckVolumeGroup(self, nodes):
10754     self.lu.LogInfo("Checking volume groups")
10755
10756     vgname = self.cfg.GetVGName()
10757
10758     # Make sure volume group exists on all involved nodes
10759     results = self.rpc.call_vg_list(nodes)
10760     if not results:
10761       raise errors.OpExecError("Can't list volume groups on the nodes")
10762
10763     for node in nodes:
10764       res = results[node]
10765       res.Raise("Error checking node %s" % node)
10766       if vgname not in res.payload:
10767         raise errors.OpExecError("Volume group '%s' not found on node %s" %
10768                                  (vgname, node))
10769
10770   def _CheckDisksExistence(self, nodes):
10771     # Check disk existence
10772     for idx, dev in enumerate(self.instance.disks):
10773       if idx not in self.disks:
10774         continue
10775
10776       for node in nodes:
10777         self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
10778         self.cfg.SetDiskID(dev, node)
10779
10780         result = self.rpc.call_blockdev_find(node, dev)
10781
10782         msg = result.fail_msg
10783         if msg or not result.payload:
10784           if not msg:
10785             msg = "disk not found"
10786           raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
10787                                    (idx, node, msg))
10788
10789   def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
10790     for idx, dev in enumerate(self.instance.disks):
10791       if idx not in self.disks:
10792         continue
10793
10794       self.lu.LogInfo("Checking disk/%d consistency on node %s" %
10795                       (idx, node_name))
10796
10797       if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
10798                                    ldisk=ldisk):
10799         raise errors.OpExecError("Node %s has degraded storage, unsafe to"
10800                                  " replace disks for instance %s" %
10801                                  (node_name, self.instance.name))
10802
10803   def _CreateNewStorage(self, node_name):
10804     """Create new storage on the primary or secondary node.
10805
10806     This is only used for same-node replaces, not for changing the
10807     secondary node, hence we don't want to modify the existing disk.
10808
10809     """
10810     iv_names = {}
10811
10812     for idx, dev in enumerate(self.instance.disks):
10813       if idx not in self.disks:
10814         continue
10815
10816       self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
10817
10818       self.cfg.SetDiskID(dev, node_name)
10819
10820       lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
10821       names = _GenerateUniqueNames(self.lu, lv_names)
10822
10823       _, data_p, meta_p = _ComputeLDParams(constants.DT_DRBD8, self.diskparams)
10824
10825       vg_data = dev.children[0].logical_id[0]
10826       lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
10827                              logical_id=(vg_data, names[0]), params=data_p)
10828       vg_meta = dev.children[1].logical_id[0]
10829       lv_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
10830                              logical_id=(vg_meta, names[1]), params=meta_p)
10831
10832       new_lvs = [lv_data, lv_meta]
10833       old_lvs = [child.Copy() for child in dev.children]
10834       iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
10835
10836       # we pass force_create=True to force the LVM creation
10837       for new_lv in new_lvs:
10838         _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
10839                         _GetInstanceInfoText(self.instance), False)
10840
10841     return iv_names
10842
10843   def _CheckDevices(self, node_name, iv_names):
10844     for name, (dev, _, _) in iv_names.iteritems():
10845       self.cfg.SetDiskID(dev, node_name)
10846
10847       result = self.rpc.call_blockdev_find(node_name, dev)
10848
10849       msg = result.fail_msg
10850       if msg or not result.payload:
10851         if not msg:
10852           msg = "disk not found"
10853         raise errors.OpExecError("Can't find DRBD device %s: %s" %
10854                                  (name, msg))
10855
10856       if result.payload.is_degraded:
10857         raise errors.OpExecError("DRBD device %s is degraded!" % name)
10858
10859   def _RemoveOldStorage(self, node_name, iv_names):
10860     for name, (_, old_lvs, _) in iv_names.iteritems():
10861       self.lu.LogInfo("Remove logical volumes for %s" % name)
10862
10863       for lv in old_lvs:
10864         self.cfg.SetDiskID(lv, node_name)
10865
10866         msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
10867         if msg:
10868           self.lu.LogWarning("Can't remove old LV: %s" % msg,
10869                              hint="remove unused LVs manually")
10870
10871   def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
10872     """Replace a disk on the primary or secondary for DRBD 8.
10873
10874     The algorithm for replace is quite complicated:
10875
10876       1. for each disk to be replaced:
10877
10878         1. create new LVs on the target node with unique names
10879         1. detach old LVs from the drbd device
10880         1. rename old LVs to name_replaced.<time_t>
10881         1. rename new LVs to old LVs
10882         1. attach the new LVs (with the old names now) to the drbd device
10883
10884       1. wait for sync across all devices
10885
10886       1. for each modified disk:
10887
10888         1. remove old LVs (which have the name name_replaces.<time_t>)
10889
10890     Failures are not very well handled.
10891
10892     """
10893     steps_total = 6
10894
10895     # Step: check device activation
10896     self.lu.LogStep(1, steps_total, "Check device existence")
10897     self._CheckDisksExistence([self.other_node, self.target_node])
10898     self._CheckVolumeGroup([self.target_node, self.other_node])
10899
10900     # Step: check other node consistency
10901     self.lu.LogStep(2, steps_total, "Check peer consistency")
10902     self._CheckDisksConsistency(self.other_node,
10903                                 self.other_node == self.instance.primary_node,
10904                                 False)
10905
10906     # Step: create new storage
10907     self.lu.LogStep(3, steps_total, "Allocate new storage")
10908     iv_names = self._CreateNewStorage(self.target_node)
10909
10910     # Step: for each lv, detach+rename*2+attach
10911     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
10912     for dev, old_lvs, new_lvs in iv_names.itervalues():
10913       self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
10914
10915       result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
10916                                                      old_lvs)
10917       result.Raise("Can't detach drbd from local storage on node"
10918                    " %s for device %s" % (self.target_node, dev.iv_name))
10919       #dev.children = []
10920       #cfg.Update(instance)
10921
10922       # ok, we created the new LVs, so now we know we have the needed
10923       # storage; as such, we proceed on the target node to rename
10924       # old_lv to _old, and new_lv to old_lv; note that we rename LVs
10925       # using the assumption that logical_id == physical_id (which in
10926       # turn is the unique_id on that node)
10927
10928       # FIXME(iustin): use a better name for the replaced LVs
10929       temp_suffix = int(time.time())
10930       ren_fn = lambda d, suff: (d.physical_id[0],
10931                                 d.physical_id[1] + "_replaced-%s" % suff)
10932
10933       # Build the rename list based on what LVs exist on the node
10934       rename_old_to_new = []
10935       for to_ren in old_lvs:
10936         result = self.rpc.call_blockdev_find(self.target_node, to_ren)
10937         if not result.fail_msg and result.payload:
10938           # device exists
10939           rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
10940
10941       self.lu.LogInfo("Renaming the old LVs on the target node")
10942       result = self.rpc.call_blockdev_rename(self.target_node,
10943                                              rename_old_to_new)
10944       result.Raise("Can't rename old LVs on node %s" % self.target_node)
10945
10946       # Now we rename the new LVs to the old LVs
10947       self.lu.LogInfo("Renaming the new LVs on the target node")
10948       rename_new_to_old = [(new, old.physical_id)
10949                            for old, new in zip(old_lvs, new_lvs)]
10950       result = self.rpc.call_blockdev_rename(self.target_node,
10951                                              rename_new_to_old)
10952       result.Raise("Can't rename new LVs on node %s" % self.target_node)
10953
10954       # Intermediate steps of in memory modifications
10955       for old, new in zip(old_lvs, new_lvs):
10956         new.logical_id = old.logical_id
10957         self.cfg.SetDiskID(new, self.target_node)
10958
10959       # We need to modify old_lvs so that removal later removes the
10960       # right LVs, not the newly added ones; note that old_lvs is a
10961       # copy here
10962       for disk in old_lvs:
10963         disk.logical_id = ren_fn(disk, temp_suffix)
10964         self.cfg.SetDiskID(disk, self.target_node)
10965
10966       # Now that the new lvs have the old name, we can add them to the device
10967       self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
10968       result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
10969                                                   new_lvs)
10970       msg = result.fail_msg
10971       if msg:
10972         for new_lv in new_lvs:
10973           msg2 = self.rpc.call_blockdev_remove(self.target_node,
10974                                                new_lv).fail_msg
10975           if msg2:
10976             self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
10977                                hint=("cleanup manually the unused logical"
10978                                      "volumes"))
10979         raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
10980
10981     cstep = itertools.count(5)
10982
10983     if self.early_release:
10984       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
10985       self._RemoveOldStorage(self.target_node, iv_names)
10986       # TODO: Check if releasing locks early still makes sense
10987       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
10988     else:
10989       # Release all resource locks except those used by the instance
10990       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
10991                     keep=self.node_secondary_ip.keys())
10992
10993     # Release all node locks while waiting for sync
10994     _ReleaseLocks(self.lu, locking.LEVEL_NODE)
10995
10996     # TODO: Can the instance lock be downgraded here? Take the optional disk
10997     # shutdown in the caller into consideration.
10998
10999     # Wait for sync
11000     # This can fail as the old devices are degraded and _WaitForSync
11001     # does a combined result over all disks, so we don't check its return value
11002     self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
11003     _WaitForSync(self.lu, self.instance)
11004
11005     # Check all devices manually
11006     self._CheckDevices(self.instance.primary_node, iv_names)
11007
11008     # Step: remove old storage
11009     if not self.early_release:
11010       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11011       self._RemoveOldStorage(self.target_node, iv_names)
11012
11013   def _ExecDrbd8Secondary(self, feedback_fn):
11014     """Replace the secondary node for DRBD 8.
11015
11016     The algorithm for replace is quite complicated:
11017       - for all disks of the instance:
11018         - create new LVs on the new node with same names
11019         - shutdown the drbd device on the old secondary
11020         - disconnect the drbd network on the primary
11021         - create the drbd device on the new secondary
11022         - network attach the drbd on the primary, using an artifice:
11023           the drbd code for Attach() will connect to the network if it
11024           finds a device which is connected to the good local disks but
11025           not network enabled
11026       - wait for sync across all devices
11027       - remove all disks from the old secondary
11028
11029     Failures are not very well handled.
11030
11031     """
11032     steps_total = 6
11033
11034     pnode = self.instance.primary_node
11035
11036     # Step: check device activation
11037     self.lu.LogStep(1, steps_total, "Check device existence")
11038     self._CheckDisksExistence([self.instance.primary_node])
11039     self._CheckVolumeGroup([self.instance.primary_node])
11040
11041     # Step: check other node consistency
11042     self.lu.LogStep(2, steps_total, "Check peer consistency")
11043     self._CheckDisksConsistency(self.instance.primary_node, True, True)
11044
11045     # Step: create new storage
11046     self.lu.LogStep(3, steps_total, "Allocate new storage")
11047     for idx, dev in enumerate(self.instance.disks):
11048       self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
11049                       (self.new_node, idx))
11050       # we pass force_create=True to force LVM creation
11051       for new_lv in dev.children:
11052         _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
11053                         _GetInstanceInfoText(self.instance), False)
11054
11055     # Step 4: dbrd minors and drbd setups changes
11056     # after this, we must manually remove the drbd minors on both the
11057     # error and the success paths
11058     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
11059     minors = self.cfg.AllocateDRBDMinor([self.new_node
11060                                          for dev in self.instance.disks],
11061                                         self.instance.name)
11062     logging.debug("Allocated minors %r", minors)
11063
11064     iv_names = {}
11065     for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
11066       self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
11067                       (self.new_node, idx))
11068       # create new devices on new_node; note that we create two IDs:
11069       # one without port, so the drbd will be activated without
11070       # networking information on the new node at this stage, and one
11071       # with network, for the latter activation in step 4
11072       (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
11073       if self.instance.primary_node == o_node1:
11074         p_minor = o_minor1
11075       else:
11076         assert self.instance.primary_node == o_node2, "Three-node instance?"
11077         p_minor = o_minor2
11078
11079       new_alone_id = (self.instance.primary_node, self.new_node, None,
11080                       p_minor, new_minor, o_secret)
11081       new_net_id = (self.instance.primary_node, self.new_node, o_port,
11082                     p_minor, new_minor, o_secret)
11083
11084       iv_names[idx] = (dev, dev.children, new_net_id)
11085       logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
11086                     new_net_id)
11087       drbd_params, _, _ = _ComputeLDParams(constants.DT_DRBD8, self.diskparams)
11088       new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
11089                               logical_id=new_alone_id,
11090                               children=dev.children,
11091                               size=dev.size,
11092                               params=drbd_params)
11093       try:
11094         _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
11095                               _GetInstanceInfoText(self.instance), False)
11096       except errors.GenericError:
11097         self.cfg.ReleaseDRBDMinors(self.instance.name)
11098         raise
11099
11100     # We have new devices, shutdown the drbd on the old secondary
11101     for idx, dev in enumerate(self.instance.disks):
11102       self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
11103       self.cfg.SetDiskID(dev, self.target_node)
11104       msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
11105       if msg:
11106         self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
11107                            "node: %s" % (idx, msg),
11108                            hint=("Please cleanup this device manually as"
11109                                  " soon as possible"))
11110
11111     self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
11112     result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
11113                                                self.instance.disks)[pnode]
11114
11115     msg = result.fail_msg
11116     if msg:
11117       # detaches didn't succeed (unlikely)
11118       self.cfg.ReleaseDRBDMinors(self.instance.name)
11119       raise errors.OpExecError("Can't detach the disks from the network on"
11120                                " old node: %s" % (msg,))
11121
11122     # if we managed to detach at least one, we update all the disks of
11123     # the instance to point to the new secondary
11124     self.lu.LogInfo("Updating instance configuration")
11125     for dev, _, new_logical_id in iv_names.itervalues():
11126       dev.logical_id = new_logical_id
11127       self.cfg.SetDiskID(dev, self.instance.primary_node)
11128
11129     self.cfg.Update(self.instance, feedback_fn)
11130
11131     # Release all node locks (the configuration has been updated)
11132     _ReleaseLocks(self.lu, locking.LEVEL_NODE)
11133
11134     # and now perform the drbd attach
11135     self.lu.LogInfo("Attaching primary drbds to new secondary"
11136                     " (standalone => connected)")
11137     result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
11138                                             self.new_node],
11139                                            self.node_secondary_ip,
11140                                            self.instance.disks,
11141                                            self.instance.name,
11142                                            False)
11143     for to_node, to_result in result.items():
11144       msg = to_result.fail_msg
11145       if msg:
11146         self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
11147                            to_node, msg,
11148                            hint=("please do a gnt-instance info to see the"
11149                                  " status of disks"))
11150
11151     cstep = itertools.count(5)
11152
11153     if self.early_release:
11154       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11155       self._RemoveOldStorage(self.target_node, iv_names)
11156       # TODO: Check if releasing locks early still makes sense
11157       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
11158     else:
11159       # Release all resource locks except those used by the instance
11160       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
11161                     keep=self.node_secondary_ip.keys())
11162
11163     # TODO: Can the instance lock be downgraded here? Take the optional disk
11164     # shutdown in the caller into consideration.
11165
11166     # Wait for sync
11167     # This can fail as the old devices are degraded and _WaitForSync
11168     # does a combined result over all disks, so we don't check its return value
11169     self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
11170     _WaitForSync(self.lu, self.instance)
11171
11172     # Check all devices manually
11173     self._CheckDevices(self.instance.primary_node, iv_names)
11174
11175     # Step: remove old storage
11176     if not self.early_release:
11177       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11178       self._RemoveOldStorage(self.target_node, iv_names)
11179
11180
11181 class LURepairNodeStorage(NoHooksLU):
11182   """Repairs the volume group on a node.
11183
11184   """
11185   REQ_BGL = False
11186
11187   def CheckArguments(self):
11188     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11189
11190     storage_type = self.op.storage_type
11191
11192     if (constants.SO_FIX_CONSISTENCY not in
11193         constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
11194       raise errors.OpPrereqError("Storage units of type '%s' can not be"
11195                                  " repaired" % storage_type,
11196                                  errors.ECODE_INVAL)
11197
11198   def ExpandNames(self):
11199     self.needed_locks = {
11200       locking.LEVEL_NODE: [self.op.node_name],
11201       }
11202
11203   def _CheckFaultyDisks(self, instance, node_name):
11204     """Ensure faulty disks abort the opcode or at least warn."""
11205     try:
11206       if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
11207                                   node_name, True):
11208         raise errors.OpPrereqError("Instance '%s' has faulty disks on"
11209                                    " node '%s'" % (instance.name, node_name),
11210                                    errors.ECODE_STATE)
11211     except errors.OpPrereqError, err:
11212       if self.op.ignore_consistency:
11213         self.proc.LogWarning(str(err.args[0]))
11214       else:
11215         raise
11216
11217   def CheckPrereq(self):
11218     """Check prerequisites.
11219
11220     """
11221     # Check whether any instance on this node has faulty disks
11222     for inst in _GetNodeInstances(self.cfg, self.op.node_name):
11223       if inst.admin_state != constants.ADMINST_UP:
11224         continue
11225       check_nodes = set(inst.all_nodes)
11226       check_nodes.discard(self.op.node_name)
11227       for inst_node_name in check_nodes:
11228         self._CheckFaultyDisks(inst, inst_node_name)
11229
11230   def Exec(self, feedback_fn):
11231     feedback_fn("Repairing storage unit '%s' on %s ..." %
11232                 (self.op.name, self.op.node_name))
11233
11234     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
11235     result = self.rpc.call_storage_execute(self.op.node_name,
11236                                            self.op.storage_type, st_args,
11237                                            self.op.name,
11238                                            constants.SO_FIX_CONSISTENCY)
11239     result.Raise("Failed to repair storage unit '%s' on %s" %
11240                  (self.op.name, self.op.node_name))
11241
11242
11243 class LUNodeEvacuate(NoHooksLU):
11244   """Evacuates instances off a list of nodes.
11245
11246   """
11247   REQ_BGL = False
11248
11249   _MODE2IALLOCATOR = {
11250     constants.NODE_EVAC_PRI: constants.IALLOCATOR_NEVAC_PRI,
11251     constants.NODE_EVAC_SEC: constants.IALLOCATOR_NEVAC_SEC,
11252     constants.NODE_EVAC_ALL: constants.IALLOCATOR_NEVAC_ALL,
11253     }
11254   assert frozenset(_MODE2IALLOCATOR.keys()) == constants.NODE_EVAC_MODES
11255   assert (frozenset(_MODE2IALLOCATOR.values()) ==
11256           constants.IALLOCATOR_NEVAC_MODES)
11257
11258   def CheckArguments(self):
11259     _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
11260
11261   def ExpandNames(self):
11262     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11263
11264     if self.op.remote_node is not None:
11265       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11266       assert self.op.remote_node
11267
11268       if self.op.remote_node == self.op.node_name:
11269         raise errors.OpPrereqError("Can not use evacuated node as a new"
11270                                    " secondary node", errors.ECODE_INVAL)
11271
11272       if self.op.mode != constants.NODE_EVAC_SEC:
11273         raise errors.OpPrereqError("Without the use of an iallocator only"
11274                                    " secondary instances can be evacuated",
11275                                    errors.ECODE_INVAL)
11276
11277     # Declare locks
11278     self.share_locks = _ShareAll()
11279     self.needed_locks = {
11280       locking.LEVEL_INSTANCE: [],
11281       locking.LEVEL_NODEGROUP: [],
11282       locking.LEVEL_NODE: [],
11283       }
11284
11285     # Determine nodes (via group) optimistically, needs verification once locks
11286     # have been acquired
11287     self.lock_nodes = self._DetermineNodes()
11288
11289   def _DetermineNodes(self):
11290     """Gets the list of nodes to operate on.
11291
11292     """
11293     if self.op.remote_node is None:
11294       # Iallocator will choose any node(s) in the same group
11295       group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
11296     else:
11297       group_nodes = frozenset([self.op.remote_node])
11298
11299     # Determine nodes to be locked
11300     return set([self.op.node_name]) | group_nodes
11301
11302   def _DetermineInstances(self):
11303     """Builds list of instances to operate on.
11304
11305     """
11306     assert self.op.mode in constants.NODE_EVAC_MODES
11307
11308     if self.op.mode == constants.NODE_EVAC_PRI:
11309       # Primary instances only
11310       inst_fn = _GetNodePrimaryInstances
11311       assert self.op.remote_node is None, \
11312         "Evacuating primary instances requires iallocator"
11313     elif self.op.mode == constants.NODE_EVAC_SEC:
11314       # Secondary instances only
11315       inst_fn = _GetNodeSecondaryInstances
11316     else:
11317       # All instances
11318       assert self.op.mode == constants.NODE_EVAC_ALL
11319       inst_fn = _GetNodeInstances
11320       # TODO: In 2.6, change the iallocator interface to take an evacuation mode
11321       # per instance
11322       raise errors.OpPrereqError("Due to an issue with the iallocator"
11323                                  " interface it is not possible to evacuate"
11324                                  " all instances at once; specify explicitly"
11325                                  " whether to evacuate primary or secondary"
11326                                  " instances",
11327                                  errors.ECODE_INVAL)
11328
11329     return inst_fn(self.cfg, self.op.node_name)
11330
11331   def DeclareLocks(self, level):
11332     if level == locking.LEVEL_INSTANCE:
11333       # Lock instances optimistically, needs verification once node and group
11334       # locks have been acquired
11335       self.needed_locks[locking.LEVEL_INSTANCE] = \
11336         set(i.name for i in self._DetermineInstances())
11337
11338     elif level == locking.LEVEL_NODEGROUP:
11339       # Lock node groups for all potential target nodes optimistically, needs
11340       # verification once nodes have been acquired
11341       self.needed_locks[locking.LEVEL_NODEGROUP] = \
11342         self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
11343
11344     elif level == locking.LEVEL_NODE:
11345       self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
11346
11347   def CheckPrereq(self):
11348     # Verify locks
11349     owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
11350     owned_nodes = self.owned_locks(locking.LEVEL_NODE)
11351     owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
11352
11353     need_nodes = self._DetermineNodes()
11354
11355     if not owned_nodes.issuperset(need_nodes):
11356       raise errors.OpPrereqError("Nodes in same group as '%s' changed since"
11357                                  " locks were acquired, current nodes are"
11358                                  " are '%s', used to be '%s'; retry the"
11359                                  " operation" %
11360                                  (self.op.node_name,
11361                                   utils.CommaJoin(need_nodes),
11362                                   utils.CommaJoin(owned_nodes)),
11363                                  errors.ECODE_STATE)
11364
11365     wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
11366     if owned_groups != wanted_groups:
11367       raise errors.OpExecError("Node groups changed since locks were acquired,"
11368                                " current groups are '%s', used to be '%s';"
11369                                " retry the operation" %
11370                                (utils.CommaJoin(wanted_groups),
11371                                 utils.CommaJoin(owned_groups)))
11372
11373     # Determine affected instances
11374     self.instances = self._DetermineInstances()
11375     self.instance_names = [i.name for i in self.instances]
11376
11377     if set(self.instance_names) != owned_instances:
11378       raise errors.OpExecError("Instances on node '%s' changed since locks"
11379                                " were acquired, current instances are '%s',"
11380                                " used to be '%s'; retry the operation" %
11381                                (self.op.node_name,
11382                                 utils.CommaJoin(self.instance_names),
11383                                 utils.CommaJoin(owned_instances)))
11384
11385     if self.instance_names:
11386       self.LogInfo("Evacuating instances from node '%s': %s",
11387                    self.op.node_name,
11388                    utils.CommaJoin(utils.NiceSort(self.instance_names)))
11389     else:
11390       self.LogInfo("No instances to evacuate from node '%s'",
11391                    self.op.node_name)
11392
11393     if self.op.remote_node is not None:
11394       for i in self.instances:
11395         if i.primary_node == self.op.remote_node:
11396           raise errors.OpPrereqError("Node %s is the primary node of"
11397                                      " instance %s, cannot use it as"
11398                                      " secondary" %
11399                                      (self.op.remote_node, i.name),
11400                                      errors.ECODE_INVAL)
11401
11402   def Exec(self, feedback_fn):
11403     assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
11404
11405     if not self.instance_names:
11406       # No instances to evacuate
11407       jobs = []
11408
11409     elif self.op.iallocator is not None:
11410       # TODO: Implement relocation to other group
11411       ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_NODE_EVAC,
11412                        evac_mode=self._MODE2IALLOCATOR[self.op.mode],
11413                        instances=list(self.instance_names))
11414
11415       ial.Run(self.op.iallocator)
11416
11417       if not ial.success:
11418         raise errors.OpPrereqError("Can't compute node evacuation using"
11419                                    " iallocator '%s': %s" %
11420                                    (self.op.iallocator, ial.info),
11421                                    errors.ECODE_NORES)
11422
11423       jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
11424
11425     elif self.op.remote_node is not None:
11426       assert self.op.mode == constants.NODE_EVAC_SEC
11427       jobs = [
11428         [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
11429                                         remote_node=self.op.remote_node,
11430                                         disks=[],
11431                                         mode=constants.REPLACE_DISK_CHG,
11432                                         early_release=self.op.early_release)]
11433         for instance_name in self.instance_names
11434         ]
11435
11436     else:
11437       raise errors.ProgrammerError("No iallocator or remote node")
11438
11439     return ResultWithJobs(jobs)
11440
11441
11442 def _SetOpEarlyRelease(early_release, op):
11443   """Sets C{early_release} flag on opcodes if available.
11444
11445   """
11446   try:
11447     op.early_release = early_release
11448   except AttributeError:
11449     assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
11450
11451   return op
11452
11453
11454 def _NodeEvacDest(use_nodes, group, nodes):
11455   """Returns group or nodes depending on caller's choice.
11456
11457   """
11458   if use_nodes:
11459     return utils.CommaJoin(nodes)
11460   else:
11461     return group
11462
11463
11464 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
11465   """Unpacks the result of change-group and node-evacuate iallocator requests.
11466
11467   Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
11468   L{constants.IALLOCATOR_MODE_CHG_GROUP}.
11469
11470   @type lu: L{LogicalUnit}
11471   @param lu: Logical unit instance
11472   @type alloc_result: tuple/list
11473   @param alloc_result: Result from iallocator
11474   @type early_release: bool
11475   @param early_release: Whether to release locks early if possible
11476   @type use_nodes: bool
11477   @param use_nodes: Whether to display node names instead of groups
11478
11479   """
11480   (moved, failed, jobs) = alloc_result
11481
11482   if failed:
11483     failreason = utils.CommaJoin("%s (%s)" % (name, reason)
11484                                  for (name, reason) in failed)
11485     lu.LogWarning("Unable to evacuate instances %s", failreason)
11486     raise errors.OpExecError("Unable to evacuate instances %s" % failreason)
11487
11488   if moved:
11489     lu.LogInfo("Instances to be moved: %s",
11490                utils.CommaJoin("%s (to %s)" %
11491                                (name, _NodeEvacDest(use_nodes, group, nodes))
11492                                for (name, group, nodes) in moved))
11493
11494   return [map(compat.partial(_SetOpEarlyRelease, early_release),
11495               map(opcodes.OpCode.LoadOpCode, ops))
11496           for ops in jobs]
11497
11498
11499 class LUInstanceGrowDisk(LogicalUnit):
11500   """Grow a disk of an instance.
11501
11502   """
11503   HPATH = "disk-grow"
11504   HTYPE = constants.HTYPE_INSTANCE
11505   REQ_BGL = False
11506
11507   def ExpandNames(self):
11508     self._ExpandAndLockInstance()
11509     self.needed_locks[locking.LEVEL_NODE] = []
11510     self.needed_locks[locking.LEVEL_NODE_RES] = []
11511     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11512     self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
11513
11514   def DeclareLocks(self, level):
11515     if level == locking.LEVEL_NODE:
11516       self._LockInstancesNodes()
11517     elif level == locking.LEVEL_NODE_RES:
11518       # Copy node locks
11519       self.needed_locks[locking.LEVEL_NODE_RES] = \
11520         self.needed_locks[locking.LEVEL_NODE][:]
11521
11522   def BuildHooksEnv(self):
11523     """Build hooks env.
11524
11525     This runs on the master, the primary and all the secondaries.
11526
11527     """
11528     env = {
11529       "DISK": self.op.disk,
11530       "AMOUNT": self.op.amount,
11531       }
11532     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11533     return env
11534
11535   def BuildHooksNodes(self):
11536     """Build hooks nodes.
11537
11538     """
11539     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
11540     return (nl, nl)
11541
11542   def CheckPrereq(self):
11543     """Check prerequisites.
11544
11545     This checks that the instance is in the cluster.
11546
11547     """
11548     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11549     assert instance is not None, \
11550       "Cannot retrieve locked instance %s" % self.op.instance_name
11551     nodenames = list(instance.all_nodes)
11552     for node in nodenames:
11553       _CheckNodeOnline(self, node)
11554
11555     self.instance = instance
11556
11557     if instance.disk_template not in constants.DTS_GROWABLE:
11558       raise errors.OpPrereqError("Instance's disk layout does not support"
11559                                  " growing", errors.ECODE_INVAL)
11560
11561     self.disk = instance.FindDisk(self.op.disk)
11562
11563     if instance.disk_template not in (constants.DT_FILE,
11564                                       constants.DT_SHARED_FILE,
11565                                       constants.DT_RBD):
11566       # TODO: check the free disk space for file, when that feature will be
11567       # supported
11568       _CheckNodesFreeDiskPerVG(self, nodenames,
11569                                self.disk.ComputeGrowth(self.op.amount))
11570
11571   def Exec(self, feedback_fn):
11572     """Execute disk grow.
11573
11574     """
11575     instance = self.instance
11576     disk = self.disk
11577
11578     assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11579     assert (self.owned_locks(locking.LEVEL_NODE) ==
11580             self.owned_locks(locking.LEVEL_NODE_RES))
11581
11582     disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
11583     if not disks_ok:
11584       raise errors.OpExecError("Cannot activate block device to grow")
11585
11586     feedback_fn("Growing disk %s of instance '%s' by %s" %
11587                 (self.op.disk, instance.name,
11588                  utils.FormatUnit(self.op.amount, "h")))
11589
11590     # First run all grow ops in dry-run mode
11591     for node in instance.all_nodes:
11592       self.cfg.SetDiskID(disk, node)
11593       result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, True)
11594       result.Raise("Grow request failed to node %s" % node)
11595
11596     # We know that (as far as we can test) operations across different
11597     # nodes will succeed, time to run it for real
11598     for node in instance.all_nodes:
11599       self.cfg.SetDiskID(disk, node)
11600       result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, False)
11601       result.Raise("Grow request failed to node %s" % node)
11602
11603       # TODO: Rewrite code to work properly
11604       # DRBD goes into sync mode for a short amount of time after executing the
11605       # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
11606       # calling "resize" in sync mode fails. Sleeping for a short amount of
11607       # time is a work-around.
11608       time.sleep(5)
11609
11610     disk.RecordGrow(self.op.amount)
11611     self.cfg.Update(instance, feedback_fn)
11612
11613     # Changes have been recorded, release node lock
11614     _ReleaseLocks(self, locking.LEVEL_NODE)
11615
11616     # Downgrade lock while waiting for sync
11617     self.glm.downgrade(locking.LEVEL_INSTANCE)
11618
11619     if self.op.wait_for_sync:
11620       disk_abort = not _WaitForSync(self, instance, disks=[disk])
11621       if disk_abort:
11622         self.proc.LogWarning("Disk sync-ing has not returned a good"
11623                              " status; please check the instance")
11624       if instance.admin_state != constants.ADMINST_UP:
11625         _SafeShutdownInstanceDisks(self, instance, disks=[disk])
11626     elif instance.admin_state != constants.ADMINST_UP:
11627       self.proc.LogWarning("Not shutting down the disk even if the instance is"
11628                            " not supposed to be running because no wait for"
11629                            " sync mode was requested")
11630
11631     assert self.owned_locks(locking.LEVEL_NODE_RES)
11632     assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11633
11634
11635 class LUInstanceQueryData(NoHooksLU):
11636   """Query runtime instance data.
11637
11638   """
11639   REQ_BGL = False
11640
11641   def ExpandNames(self):
11642     self.needed_locks = {}
11643
11644     # Use locking if requested or when non-static information is wanted
11645     if not (self.op.static or self.op.use_locking):
11646       self.LogWarning("Non-static data requested, locks need to be acquired")
11647       self.op.use_locking = True
11648
11649     if self.op.instances or not self.op.use_locking:
11650       # Expand instance names right here
11651       self.wanted_names = _GetWantedInstances(self, self.op.instances)
11652     else:
11653       # Will use acquired locks
11654       self.wanted_names = None
11655
11656     if self.op.use_locking:
11657       self.share_locks = _ShareAll()
11658
11659       if self.wanted_names is None:
11660         self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
11661       else:
11662         self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
11663
11664       self.needed_locks[locking.LEVEL_NODE] = []
11665       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11666
11667   def DeclareLocks(self, level):
11668     if self.op.use_locking and level == locking.LEVEL_NODE:
11669       self._LockInstancesNodes()
11670
11671   def CheckPrereq(self):
11672     """Check prerequisites.
11673
11674     This only checks the optional instance list against the existing names.
11675
11676     """
11677     if self.wanted_names is None:
11678       assert self.op.use_locking, "Locking was not used"
11679       self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
11680
11681     self.wanted_instances = \
11682         map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
11683
11684   def _ComputeBlockdevStatus(self, node, instance_name, dev):
11685     """Returns the status of a block device
11686
11687     """
11688     if self.op.static or not node:
11689       return None
11690
11691     self.cfg.SetDiskID(dev, node)
11692
11693     result = self.rpc.call_blockdev_find(node, dev)
11694     if result.offline:
11695       return None
11696
11697     result.Raise("Can't compute disk status for %s" % instance_name)
11698
11699     status = result.payload
11700     if status is None:
11701       return None
11702
11703     return (status.dev_path, status.major, status.minor,
11704             status.sync_percent, status.estimated_time,
11705             status.is_degraded, status.ldisk_status)
11706
11707   def _ComputeDiskStatus(self, instance, snode, dev):
11708     """Compute block device status.
11709
11710     """
11711     if dev.dev_type in constants.LDS_DRBD:
11712       # we change the snode then (otherwise we use the one passed in)
11713       if dev.logical_id[0] == instance.primary_node:
11714         snode = dev.logical_id[1]
11715       else:
11716         snode = dev.logical_id[0]
11717
11718     dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
11719                                               instance.name, dev)
11720     dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
11721
11722     if dev.children:
11723       dev_children = map(compat.partial(self._ComputeDiskStatus,
11724                                         instance, snode),
11725                          dev.children)
11726     else:
11727       dev_children = []
11728
11729     return {
11730       "iv_name": dev.iv_name,
11731       "dev_type": dev.dev_type,
11732       "logical_id": dev.logical_id,
11733       "physical_id": dev.physical_id,
11734       "pstatus": dev_pstatus,
11735       "sstatus": dev_sstatus,
11736       "children": dev_children,
11737       "mode": dev.mode,
11738       "size": dev.size,
11739       }
11740
11741   def Exec(self, feedback_fn):
11742     """Gather and return data"""
11743     result = {}
11744
11745     cluster = self.cfg.GetClusterInfo()
11746
11747     pri_nodes = self.cfg.GetMultiNodeInfo(i.primary_node
11748                                           for i in self.wanted_instances)
11749     for instance, (_, pnode) in zip(self.wanted_instances, pri_nodes):
11750       if self.op.static or pnode.offline:
11751         remote_state = None
11752         if pnode.offline:
11753           self.LogWarning("Primary node %s is marked offline, returning static"
11754                           " information only for instance %s" %
11755                           (pnode.name, instance.name))
11756       else:
11757         remote_info = self.rpc.call_instance_info(instance.primary_node,
11758                                                   instance.name,
11759                                                   instance.hypervisor)
11760         remote_info.Raise("Error checking node %s" % instance.primary_node)
11761         remote_info = remote_info.payload
11762         if remote_info and "state" in remote_info:
11763           remote_state = "up"
11764         else:
11765           if instance.admin_state == constants.ADMINST_UP:
11766             remote_state = "down"
11767           else:
11768             remote_state = instance.admin_state
11769
11770       disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
11771                   instance.disks)
11772
11773       result[instance.name] = {
11774         "name": instance.name,
11775         "config_state": instance.admin_state,
11776         "run_state": remote_state,
11777         "pnode": instance.primary_node,
11778         "snodes": instance.secondary_nodes,
11779         "os": instance.os,
11780         # this happens to be the same format used for hooks
11781         "nics": _NICListToTuple(self, instance.nics),
11782         "disk_template": instance.disk_template,
11783         "disks": disks,
11784         "hypervisor": instance.hypervisor,
11785         "network_port": instance.network_port,
11786         "hv_instance": instance.hvparams,
11787         "hv_actual": cluster.FillHV(instance, skip_globals=True),
11788         "be_instance": instance.beparams,
11789         "be_actual": cluster.FillBE(instance),
11790         "os_instance": instance.osparams,
11791         "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
11792         "serial_no": instance.serial_no,
11793         "mtime": instance.mtime,
11794         "ctime": instance.ctime,
11795         "uuid": instance.uuid,
11796         }
11797
11798     return result
11799
11800
11801 def PrepareContainerMods(mods, private_fn):
11802   """Prepares a list of container modifications by adding a private data field.
11803
11804   @type mods: list of tuples; (operation, index, parameters)
11805   @param mods: List of modifications
11806   @type private_fn: callable or None
11807   @param private_fn: Callable for constructing a private data field for a
11808     modification
11809   @rtype: list
11810
11811   """
11812   if private_fn is None:
11813     fn = lambda: None
11814   else:
11815     fn = private_fn
11816
11817   return [(op, idx, params, fn()) for (op, idx, params) in mods]
11818
11819
11820 #: Type description for changes as returned by L{ApplyContainerMods}'s
11821 #: callbacks
11822 _TApplyContModsCbChanges = \
11823   ht.TMaybeListOf(ht.TAnd(ht.TIsLength(2), ht.TItems([
11824     ht.TNonEmptyString,
11825     ht.TAny,
11826     ])))
11827
11828
11829 def ApplyContainerMods(kind, container, chgdesc, mods,
11830                        create_fn, modify_fn, remove_fn):
11831   """Applies descriptions in C{mods} to C{container}.
11832
11833   @type kind: string
11834   @param kind: One-word item description
11835   @type container: list
11836   @param container: Container to modify
11837   @type chgdesc: None or list
11838   @param chgdesc: List of applied changes
11839   @type mods: list
11840   @param mods: Modifications as returned by L{PrepareContainerMods}
11841   @type create_fn: callable
11842   @param create_fn: Callback for creating a new item (L{constants.DDM_ADD});
11843     receives absolute item index, parameters and private data object as added
11844     by L{PrepareContainerMods}, returns tuple containing new item and changes
11845     as list
11846   @type modify_fn: callable
11847   @param modify_fn: Callback for modifying an existing item
11848     (L{constants.DDM_MODIFY}); receives absolute item index, item, parameters
11849     and private data object as added by L{PrepareContainerMods}, returns
11850     changes as list
11851   @type remove_fn: callable
11852   @param remove_fn: Callback on removing item; receives absolute item index,
11853     item and private data object as added by L{PrepareContainerMods}
11854
11855   """
11856   for (op, idx, params, private) in mods:
11857     if idx == -1:
11858       # Append
11859       absidx = len(container) - 1
11860     elif idx < 0:
11861       raise IndexError("Not accepting negative indices")
11862     else:
11863       absidx = idx
11864
11865     changes = None
11866
11867     if op == constants.DDM_ADD:
11868       if create_fn is None:
11869         item = params
11870       else:
11871         (item, changes) = create_fn(absidx + 1, params, private)
11872
11873       if idx == -1:
11874         container.append(item)
11875       else:
11876         assert idx >= 0
11877         # list.insert does so before the specified index
11878         container.insert(idx, item)
11879     else:
11880       # Retrieve existing item
11881       try:
11882         item = container[absidx]
11883       except IndexError:
11884         raise IndexError("Invalid %s index %s" % (kind, idx))
11885
11886       if op == constants.DDM_REMOVE:
11887         assert not params
11888
11889         if remove_fn is not None:
11890           remove_fn(absidx, item, private)
11891
11892         changes = [("%s/%s" % (kind, absidx), "remove")]
11893
11894         assert container[absidx] == item
11895         del container[absidx]
11896       elif op == constants.DDM_MODIFY:
11897         if modify_fn is not None:
11898           changes = modify_fn(absidx, item, params, private)
11899       else:
11900         raise errors.ProgrammerError("Unhandled operation '%s'" % op)
11901
11902     assert _TApplyContModsCbChanges(changes)
11903
11904     if not (chgdesc is None or changes is None):
11905       chgdesc.extend(changes)
11906
11907
11908 class _InstNicModPrivate:
11909   """Data structure for network interface modifications.
11910
11911   Used by L{LUInstanceSetParams}.
11912
11913   """
11914   def __init__(self):
11915     self.params = None
11916     self.filled = None
11917
11918
11919 class LUInstanceSetParams(LogicalUnit):
11920   """Modifies an instances's parameters.
11921
11922   """
11923   HPATH = "instance-modify"
11924   HTYPE = constants.HTYPE_INSTANCE
11925   REQ_BGL = False
11926
11927   @staticmethod
11928   def _UpgradeDiskNicMods(kind, mods, verify_fn):
11929     assert ht.TList(mods)
11930     assert not mods or len(mods[0]) in (2, 3)
11931
11932     if mods and len(mods[0]) == 2:
11933       result = []
11934
11935       addremove = 0
11936       for op, params in mods:
11937         if op in (constants.DDM_ADD, constants.DDM_REMOVE):
11938           result.append((op, -1, params))
11939           addremove += 1
11940
11941           if addremove > 1:
11942             raise errors.OpPrereqError("Only one %s add or remove operation is"
11943                                        " supported at a time" % kind,
11944                                        errors.ECODE_INVAL)
11945         else:
11946           result.append((constants.DDM_MODIFY, op, params))
11947
11948       assert verify_fn(result)
11949     else:
11950       result = mods
11951
11952     return result
11953
11954   @staticmethod
11955   def _CheckMods(kind, mods, key_types, item_fn):
11956     """Ensures requested disk/NIC modifications are valid.
11957
11958     """
11959     for (op, _, params) in mods:
11960       assert ht.TDict(params)
11961
11962       utils.ForceDictType(params, key_types)
11963
11964       if op == constants.DDM_REMOVE:
11965         if params:
11966           raise errors.OpPrereqError("No settings should be passed when"
11967                                      " removing a %s" % kind,
11968                                      errors.ECODE_INVAL)
11969       elif op in (constants.DDM_ADD, constants.DDM_MODIFY):
11970         item_fn(op, params)
11971       else:
11972         raise errors.ProgrammerError("Unhandled operation '%s'" % op)
11973
11974   @staticmethod
11975   def _VerifyDiskModification(op, params):
11976     """Verifies a disk modification.
11977
11978     """
11979     if op == constants.DDM_ADD:
11980       mode = params.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
11981       if mode not in constants.DISK_ACCESS_SET:
11982         raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
11983                                    errors.ECODE_INVAL)
11984
11985       size = params.get(constants.IDISK_SIZE, None)
11986       if size is None:
11987         raise errors.OpPrereqError("Required disk parameter '%s' missing" %
11988                                    constants.IDISK_SIZE, errors.ECODE_INVAL)
11989
11990       try:
11991         size = int(size)
11992       except (TypeError, ValueError), err:
11993         raise errors.OpPrereqError("Invalid disk size parameter: %s" % err,
11994                                    errors.ECODE_INVAL)
11995
11996       params[constants.IDISK_SIZE] = size
11997
11998     elif op == constants.DDM_MODIFY and constants.IDISK_SIZE in params:
11999       raise errors.OpPrereqError("Disk size change not possible, use"
12000                                  " grow-disk", errors.ECODE_INVAL)
12001
12002   @staticmethod
12003   def _VerifyNicModification(op, params):
12004     """Verifies a network interface modification.
12005
12006     """
12007     if op in (constants.DDM_ADD, constants.DDM_MODIFY):
12008       ip = params.get(constants.INIC_IP, None)
12009       if ip is None:
12010         pass
12011       elif ip.lower() == constants.VALUE_NONE:
12012         params[constants.INIC_IP] = None
12013       elif not netutils.IPAddress.IsValid(ip):
12014         raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
12015                                    errors.ECODE_INVAL)
12016
12017       bridge = params.get("bridge", None)
12018       link = params.get(constants.INIC_LINK, None)
12019       if bridge and link:
12020         raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
12021                                    " at the same time", errors.ECODE_INVAL)
12022       elif bridge and bridge.lower() == constants.VALUE_NONE:
12023         params["bridge"] = None
12024       elif link and link.lower() == constants.VALUE_NONE:
12025         params[constants.INIC_LINK] = None
12026
12027       if op == constants.DDM_ADD:
12028         macaddr = params.get(constants.INIC_MAC, None)
12029         if macaddr is None:
12030           params[constants.INIC_MAC] = constants.VALUE_AUTO
12031
12032       if constants.INIC_MAC in params:
12033         macaddr = params[constants.INIC_MAC]
12034         if macaddr not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12035           macaddr = utils.NormalizeAndValidateMac(macaddr)
12036
12037         if op == constants.DDM_MODIFY and macaddr == constants.VALUE_AUTO:
12038           raise errors.OpPrereqError("'auto' is not a valid MAC address when"
12039                                      " modifying an existing NIC",
12040                                      errors.ECODE_INVAL)
12041
12042   def CheckArguments(self):
12043     if not (self.op.nics or self.op.disks or self.op.disk_template or
12044             self.op.hvparams or self.op.beparams or self.op.os_name or
12045             self.op.offline is not None or self.op.runtime_mem):
12046       raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
12047
12048     if self.op.hvparams:
12049       _CheckGlobalHvParams(self.op.hvparams)
12050
12051     self.op.disks = \
12052       self._UpgradeDiskNicMods("disk", self.op.disks,
12053         opcodes.OpInstanceSetParams.TestDiskModifications)
12054     self.op.nics = \
12055       self._UpgradeDiskNicMods("NIC", self.op.nics,
12056         opcodes.OpInstanceSetParams.TestNicModifications)
12057
12058     # Check disk modifications
12059     self._CheckMods("disk", self.op.disks, constants.IDISK_PARAMS_TYPES,
12060                     self._VerifyDiskModification)
12061
12062     if self.op.disks and self.op.disk_template is not None:
12063       raise errors.OpPrereqError("Disk template conversion and other disk"
12064                                  " changes not supported at the same time",
12065                                  errors.ECODE_INVAL)
12066
12067     if (self.op.disk_template and
12068         self.op.disk_template in constants.DTS_INT_MIRROR and
12069         self.op.remote_node is None):
12070       raise errors.OpPrereqError("Changing the disk template to a mirrored"
12071                                  " one requires specifying a secondary node",
12072                                  errors.ECODE_INVAL)
12073
12074     # Check NIC modifications
12075     self._CheckMods("NIC", self.op.nics, constants.INIC_PARAMS_TYPES,
12076                     self._VerifyNicModification)
12077
12078   def ExpandNames(self):
12079     self._ExpandAndLockInstance()
12080     # Can't even acquire node locks in shared mode as upcoming changes in
12081     # Ganeti 2.6 will start to modify the node object on disk conversion
12082     self.needed_locks[locking.LEVEL_NODE] = []
12083     self.needed_locks[locking.LEVEL_NODE_RES] = []
12084     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12085
12086   def DeclareLocks(self, level):
12087     # TODO: Acquire group lock in shared mode (disk parameters)
12088     if level == locking.LEVEL_NODE:
12089       self._LockInstancesNodes()
12090       if self.op.disk_template and self.op.remote_node:
12091         self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
12092         self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
12093     elif level == locking.LEVEL_NODE_RES and self.op.disk_template:
12094       # Copy node locks
12095       self.needed_locks[locking.LEVEL_NODE_RES] = \
12096         self.needed_locks[locking.LEVEL_NODE][:]
12097
12098   def BuildHooksEnv(self):
12099     """Build hooks env.
12100
12101     This runs on the master, primary and secondaries.
12102
12103     """
12104     args = dict()
12105     if constants.BE_MINMEM in self.be_new:
12106       args["minmem"] = self.be_new[constants.BE_MINMEM]
12107     if constants.BE_MAXMEM in self.be_new:
12108       args["maxmem"] = self.be_new[constants.BE_MAXMEM]
12109     if constants.BE_VCPUS in self.be_new:
12110       args["vcpus"] = self.be_new[constants.BE_VCPUS]
12111     # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
12112     # information at all.
12113
12114     if self._new_nics is not None:
12115       nics = []
12116
12117       for nic in self._new_nics:
12118         nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
12119         mode = nicparams[constants.NIC_MODE]
12120         link = nicparams[constants.NIC_LINK]
12121         nics.append((nic.ip, nic.mac, mode, link))
12122
12123       args["nics"] = nics
12124
12125     env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
12126     if self.op.disk_template:
12127       env["NEW_DISK_TEMPLATE"] = self.op.disk_template
12128     if self.op.runtime_mem:
12129       env["RUNTIME_MEMORY"] = self.op.runtime_mem
12130
12131     return env
12132
12133   def BuildHooksNodes(self):
12134     """Build hooks nodes.
12135
12136     """
12137     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
12138     return (nl, nl)
12139
12140   def _PrepareNicModification(self, params, private, old_ip, old_params,
12141                               cluster, pnode):
12142     update_params_dict = dict([(key, params[key])
12143                                for key in constants.NICS_PARAMETERS
12144                                if key in params])
12145
12146     if "bridge" in params:
12147       update_params_dict[constants.NIC_LINK] = params["bridge"]
12148
12149     new_params = _GetUpdatedParams(old_params, update_params_dict)
12150     utils.ForceDictType(new_params, constants.NICS_PARAMETER_TYPES)
12151
12152     new_filled_params = cluster.SimpleFillNIC(new_params)
12153     objects.NIC.CheckParameterSyntax(new_filled_params)
12154
12155     new_mode = new_filled_params[constants.NIC_MODE]
12156     if new_mode == constants.NIC_MODE_BRIDGED:
12157       bridge = new_filled_params[constants.NIC_LINK]
12158       msg = self.rpc.call_bridges_exist(pnode, [bridge]).fail_msg
12159       if msg:
12160         msg = "Error checking bridges on node '%s': %s" % (pnode, msg)
12161         if self.op.force:
12162           self.warn.append(msg)
12163         else:
12164           raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
12165
12166     elif new_mode == constants.NIC_MODE_ROUTED:
12167       ip = params.get(constants.INIC_IP, old_ip)
12168       if ip is None:
12169         raise errors.OpPrereqError("Cannot set the NIC IP address to None"
12170                                    " on a routed NIC", errors.ECODE_INVAL)
12171
12172     if constants.INIC_MAC in params:
12173       mac = params[constants.INIC_MAC]
12174       if mac is None:
12175         raise errors.OpPrereqError("Cannot unset the NIC MAC address",
12176                                    errors.ECODE_INVAL)
12177       elif mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12178         # otherwise generate the MAC address
12179         params[constants.INIC_MAC] = \
12180           self.cfg.GenerateMAC(self.proc.GetECId())
12181       else:
12182         # or validate/reserve the current one
12183         try:
12184           self.cfg.ReserveMAC(mac, self.proc.GetECId())
12185         except errors.ReservationError:
12186           raise errors.OpPrereqError("MAC address '%s' already in use"
12187                                      " in cluster" % mac,
12188                                      errors.ECODE_NOTUNIQUE)
12189
12190     private.params = new_params
12191     private.filled = new_filled_params
12192
12193     return (None, None)
12194
12195   def CheckPrereq(self):
12196     """Check prerequisites.
12197
12198     This only checks the instance list against the existing names.
12199
12200     """
12201     # checking the new params on the primary/secondary nodes
12202
12203     instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12204     cluster = self.cluster = self.cfg.GetClusterInfo()
12205     assert self.instance is not None, \
12206       "Cannot retrieve locked instance %s" % self.op.instance_name
12207     pnode = instance.primary_node
12208     nodelist = list(instance.all_nodes)
12209     pnode_info = self.cfg.GetNodeInfo(pnode)
12210     self.diskparams = self.cfg.GetNodeGroup(pnode_info.group).diskparams
12211
12212     # Prepare disk/NIC modifications
12213     self.diskmod = PrepareContainerMods(self.op.disks, None)
12214     self.nicmod = PrepareContainerMods(self.op.nics, _InstNicModPrivate)
12215
12216     # OS change
12217     if self.op.os_name and not self.op.force:
12218       _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
12219                       self.op.force_variant)
12220       instance_os = self.op.os_name
12221     else:
12222       instance_os = instance.os
12223
12224     assert not (self.op.disk_template and self.op.disks), \
12225       "Can't modify disk template and apply disk changes at the same time"
12226
12227     if self.op.disk_template:
12228       if instance.disk_template == self.op.disk_template:
12229         raise errors.OpPrereqError("Instance already has disk template %s" %
12230                                    instance.disk_template, errors.ECODE_INVAL)
12231
12232       if (instance.disk_template,
12233           self.op.disk_template) not in self._DISK_CONVERSIONS:
12234         raise errors.OpPrereqError("Unsupported disk template conversion from"
12235                                    " %s to %s" % (instance.disk_template,
12236                                                   self.op.disk_template),
12237                                    errors.ECODE_INVAL)
12238       _CheckInstanceState(self, instance, INSTANCE_DOWN,
12239                           msg="cannot change disk template")
12240       if self.op.disk_template in constants.DTS_INT_MIRROR:
12241         if self.op.remote_node == pnode:
12242           raise errors.OpPrereqError("Given new secondary node %s is the same"
12243                                      " as the primary node of the instance" %
12244                                      self.op.remote_node, errors.ECODE_STATE)
12245         _CheckNodeOnline(self, self.op.remote_node)
12246         _CheckNodeNotDrained(self, self.op.remote_node)
12247         # FIXME: here we assume that the old instance type is DT_PLAIN
12248         assert instance.disk_template == constants.DT_PLAIN
12249         disks = [{constants.IDISK_SIZE: d.size,
12250                   constants.IDISK_VG: d.logical_id[0]}
12251                  for d in instance.disks]
12252         required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
12253         _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
12254
12255         snode_info = self.cfg.GetNodeInfo(self.op.remote_node)
12256         snode_group = self.cfg.GetNodeGroup(snode_info.group)
12257         ipolicy = _CalculateGroupIPolicy(cluster, snode_group)
12258         _CheckTargetNodeIPolicy(self, ipolicy, instance, snode_info,
12259                                 ignore=self.op.ignore_ipolicy)
12260         if pnode_info.group != snode_info.group:
12261           self.LogWarning("The primary and secondary nodes are in two"
12262                           " different node groups; the disk parameters"
12263                           " from the first disk's node group will be"
12264                           " used")
12265
12266     # hvparams processing
12267     if self.op.hvparams:
12268       hv_type = instance.hypervisor
12269       i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
12270       utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
12271       hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
12272
12273       # local check
12274       hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
12275       _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
12276       self.hv_proposed = self.hv_new = hv_new # the new actual values
12277       self.hv_inst = i_hvdict # the new dict (without defaults)
12278     else:
12279       self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
12280                                               instance.hvparams)
12281       self.hv_new = self.hv_inst = {}
12282
12283     # beparams processing
12284     if self.op.beparams:
12285       i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
12286                                    use_none=True)
12287       objects.UpgradeBeParams(i_bedict)
12288       utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
12289       be_new = cluster.SimpleFillBE(i_bedict)
12290       self.be_proposed = self.be_new = be_new # the new actual values
12291       self.be_inst = i_bedict # the new dict (without defaults)
12292     else:
12293       self.be_new = self.be_inst = {}
12294       self.be_proposed = cluster.SimpleFillBE(instance.beparams)
12295     be_old = cluster.FillBE(instance)
12296
12297     # CPU param validation -- checking every time a paramtere is
12298     # changed to cover all cases where either CPU mask or vcpus have
12299     # changed
12300     if (constants.BE_VCPUS in self.be_proposed and
12301         constants.HV_CPU_MASK in self.hv_proposed):
12302       cpu_list = \
12303         utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
12304       # Verify mask is consistent with number of vCPUs. Can skip this
12305       # test if only 1 entry in the CPU mask, which means same mask
12306       # is applied to all vCPUs.
12307       if (len(cpu_list) > 1 and
12308           len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
12309         raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
12310                                    " CPU mask [%s]" %
12311                                    (self.be_proposed[constants.BE_VCPUS],
12312                                     self.hv_proposed[constants.HV_CPU_MASK]),
12313                                    errors.ECODE_INVAL)
12314
12315       # Only perform this test if a new CPU mask is given
12316       if constants.HV_CPU_MASK in self.hv_new:
12317         # Calculate the largest CPU number requested
12318         max_requested_cpu = max(map(max, cpu_list))
12319         # Check that all of the instance's nodes have enough physical CPUs to
12320         # satisfy the requested CPU mask
12321         _CheckNodesPhysicalCPUs(self, instance.all_nodes,
12322                                 max_requested_cpu + 1, instance.hypervisor)
12323
12324     # osparams processing
12325     if self.op.osparams:
12326       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
12327       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
12328       self.os_inst = i_osdict # the new dict (without defaults)
12329     else:
12330       self.os_inst = {}
12331
12332     self.warn = []
12333
12334     #TODO(dynmem): do the appropriate check involving MINMEM
12335     if (constants.BE_MAXMEM in self.op.beparams and not self.op.force and
12336         be_new[constants.BE_MAXMEM] > be_old[constants.BE_MAXMEM]):
12337       mem_check_list = [pnode]
12338       if be_new[constants.BE_AUTO_BALANCE]:
12339         # either we changed auto_balance to yes or it was from before
12340         mem_check_list.extend(instance.secondary_nodes)
12341       instance_info = self.rpc.call_instance_info(pnode, instance.name,
12342                                                   instance.hypervisor)
12343       nodeinfo = self.rpc.call_node_info(mem_check_list, None,
12344                                          [instance.hypervisor])
12345       pninfo = nodeinfo[pnode]
12346       msg = pninfo.fail_msg
12347       if msg:
12348         # Assume the primary node is unreachable and go ahead
12349         self.warn.append("Can't get info from primary node %s: %s" %
12350                          (pnode, msg))
12351       else:
12352         (_, _, (pnhvinfo, )) = pninfo.payload
12353         if not isinstance(pnhvinfo.get("memory_free", None), int):
12354           self.warn.append("Node data from primary node %s doesn't contain"
12355                            " free memory information" % pnode)
12356         elif instance_info.fail_msg:
12357           self.warn.append("Can't get instance runtime information: %s" %
12358                           instance_info.fail_msg)
12359         else:
12360           if instance_info.payload:
12361             current_mem = int(instance_info.payload["memory"])
12362           else:
12363             # Assume instance not running
12364             # (there is a slight race condition here, but it's not very
12365             # probable, and we have no other way to check)
12366             # TODO: Describe race condition
12367             current_mem = 0
12368           #TODO(dynmem): do the appropriate check involving MINMEM
12369           miss_mem = (be_new[constants.BE_MAXMEM] - current_mem -
12370                       pnhvinfo["memory_free"])
12371           if miss_mem > 0:
12372             raise errors.OpPrereqError("This change will prevent the instance"
12373                                        " from starting, due to %d MB of memory"
12374                                        " missing on its primary node" %
12375                                        miss_mem,
12376                                        errors.ECODE_NORES)
12377
12378       if be_new[constants.BE_AUTO_BALANCE]:
12379         for node, nres in nodeinfo.items():
12380           if node not in instance.secondary_nodes:
12381             continue
12382           nres.Raise("Can't get info from secondary node %s" % node,
12383                      prereq=True, ecode=errors.ECODE_STATE)
12384           (_, _, (nhvinfo, )) = nres.payload
12385           if not isinstance(nhvinfo.get("memory_free", None), int):
12386             raise errors.OpPrereqError("Secondary node %s didn't return free"
12387                                        " memory information" % node,
12388                                        errors.ECODE_STATE)
12389           #TODO(dynmem): do the appropriate check involving MINMEM
12390           elif be_new[constants.BE_MAXMEM] > nhvinfo["memory_free"]:
12391             raise errors.OpPrereqError("This change will prevent the instance"
12392                                        " from failover to its secondary node"
12393                                        " %s, due to not enough memory" % node,
12394                                        errors.ECODE_STATE)
12395
12396     if self.op.runtime_mem:
12397       remote_info = self.rpc.call_instance_info(instance.primary_node,
12398                                                 instance.name,
12399                                                 instance.hypervisor)
12400       remote_info.Raise("Error checking node %s" % instance.primary_node)
12401       if not remote_info.payload: # not running already
12402         raise errors.OpPrereqError("Instance %s is not running" % instance.name,
12403                                    errors.ECODE_STATE)
12404
12405       current_memory = remote_info.payload["memory"]
12406       if (not self.op.force and
12407            (self.op.runtime_mem > self.be_proposed[constants.BE_MAXMEM] or
12408             self.op.runtime_mem < self.be_proposed[constants.BE_MINMEM])):
12409         raise errors.OpPrereqError("Instance %s must have memory between %d"
12410                                    " and %d MB of memory unless --force is"
12411                                    " given" % (instance.name,
12412                                     self.be_proposed[constants.BE_MINMEM],
12413                                     self.be_proposed[constants.BE_MAXMEM]),
12414                                    errors.ECODE_INVAL)
12415
12416       if self.op.runtime_mem > current_memory:
12417         _CheckNodeFreeMemory(self, instance.primary_node,
12418                              "ballooning memory for instance %s" %
12419                              instance.name,
12420                              self.op.memory - current_memory,
12421                              instance.hypervisor)
12422
12423     if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
12424       raise errors.OpPrereqError("Disk operations not supported for"
12425                                  " diskless instances",
12426                                  errors.ECODE_INVAL)
12427
12428     def _PrepareNicCreate(_, params, private):
12429       return self._PrepareNicModification(params, private, None, {},
12430                                           cluster, pnode)
12431
12432     def _PrepareNicMod(_, nic, params, private):
12433       return self._PrepareNicModification(params, private, nic.ip,
12434                                           nic.nicparams, cluster, pnode)
12435
12436     # Verify NIC changes (operating on copy)
12437     nics = instance.nics[:]
12438     ApplyContainerMods("NIC", nics, None, self.nicmod,
12439                        _PrepareNicCreate, _PrepareNicMod, None)
12440     if len(nics) > constants.MAX_NICS:
12441       raise errors.OpPrereqError("Instance has too many network interfaces"
12442                                  " (%d), cannot add more" % constants.MAX_NICS,
12443                                  errors.ECODE_STATE)
12444
12445     # Verify disk changes (operating on a copy)
12446     disks = instance.disks[:]
12447     ApplyContainerMods("disk", disks, None, self.diskmod, None, None, None)
12448     if len(disks) > constants.MAX_DISKS:
12449       raise errors.OpPrereqError("Instance has too many disks (%d), cannot add"
12450                                  " more" % constants.MAX_DISKS,
12451                                  errors.ECODE_STATE)
12452
12453     if self.op.offline is not None:
12454       if self.op.offline:
12455         msg = "can't change to offline"
12456       else:
12457         msg = "can't change to online"
12458       _CheckInstanceState(self, instance, CAN_CHANGE_INSTANCE_OFFLINE, msg=msg)
12459
12460     # Pre-compute NIC changes (necessary to use result in hooks)
12461     self._nic_chgdesc = []
12462     if self.nicmod:
12463       # Operate on copies as this is still in prereq
12464       nics = [nic.Copy() for nic in instance.nics]
12465       ApplyContainerMods("NIC", nics, self._nic_chgdesc, self.nicmod,
12466                          self._CreateNewNic, self._ApplyNicMods, None)
12467       self._new_nics = nics
12468     else:
12469       self._new_nics = None
12470
12471   def _ConvertPlainToDrbd(self, feedback_fn):
12472     """Converts an instance from plain to drbd.
12473
12474     """
12475     feedback_fn("Converting template to drbd")
12476     instance = self.instance
12477     pnode = instance.primary_node
12478     snode = self.op.remote_node
12479
12480     assert instance.disk_template == constants.DT_PLAIN
12481
12482     # create a fake disk info for _GenerateDiskTemplate
12483     disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
12484                   constants.IDISK_VG: d.logical_id[0]}
12485                  for d in instance.disks]
12486     new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
12487                                       instance.name, pnode, [snode],
12488                                       disk_info, None, None, 0, feedback_fn,
12489                                       self.diskparams)
12490     info = _GetInstanceInfoText(instance)
12491     feedback_fn("Creating aditional volumes...")
12492     # first, create the missing data and meta devices
12493     for disk in new_disks:
12494       # unfortunately this is... not too nice
12495       _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
12496                             info, True)
12497       for child in disk.children:
12498         _CreateSingleBlockDev(self, snode, instance, child, info, True)
12499     # at this stage, all new LVs have been created, we can rename the
12500     # old ones
12501     feedback_fn("Renaming original volumes...")
12502     rename_list = [(o, n.children[0].logical_id)
12503                    for (o, n) in zip(instance.disks, new_disks)]
12504     result = self.rpc.call_blockdev_rename(pnode, rename_list)
12505     result.Raise("Failed to rename original LVs")
12506
12507     feedback_fn("Initializing DRBD devices...")
12508     # all child devices are in place, we can now create the DRBD devices
12509     for disk in new_disks:
12510       for node in [pnode, snode]:
12511         f_create = node == pnode
12512         _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
12513
12514     # at this point, the instance has been modified
12515     instance.disk_template = constants.DT_DRBD8
12516     instance.disks = new_disks
12517     self.cfg.Update(instance, feedback_fn)
12518
12519     # Release node locks while waiting for sync
12520     _ReleaseLocks(self, locking.LEVEL_NODE)
12521
12522     # disks are created, waiting for sync
12523     disk_abort = not _WaitForSync(self, instance,
12524                                   oneshot=not self.op.wait_for_sync)
12525     if disk_abort:
12526       raise errors.OpExecError("There are some degraded disks for"
12527                                " this instance, please cleanup manually")
12528
12529     # Node resource locks will be released by caller
12530
12531   def _ConvertDrbdToPlain(self, feedback_fn):
12532     """Converts an instance from drbd to plain.
12533
12534     """
12535     instance = self.instance
12536
12537     assert len(instance.secondary_nodes) == 1
12538     assert instance.disk_template == constants.DT_DRBD8
12539
12540     pnode = instance.primary_node
12541     snode = instance.secondary_nodes[0]
12542     feedback_fn("Converting template to plain")
12543
12544     old_disks = instance.disks
12545     new_disks = [d.children[0] for d in old_disks]
12546
12547     # copy over size and mode
12548     for parent, child in zip(old_disks, new_disks):
12549       child.size = parent.size
12550       child.mode = parent.mode
12551
12552     # update instance structure
12553     instance.disks = new_disks
12554     instance.disk_template = constants.DT_PLAIN
12555     self.cfg.Update(instance, feedback_fn)
12556
12557     # Release locks in case removing disks takes a while
12558     _ReleaseLocks(self, locking.LEVEL_NODE)
12559
12560     feedback_fn("Removing volumes on the secondary node...")
12561     for disk in old_disks:
12562       self.cfg.SetDiskID(disk, snode)
12563       msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
12564       if msg:
12565         self.LogWarning("Could not remove block device %s on node %s,"
12566                         " continuing anyway: %s", disk.iv_name, snode, msg)
12567
12568     feedback_fn("Removing unneeded volumes on the primary node...")
12569     for idx, disk in enumerate(old_disks):
12570       meta = disk.children[1]
12571       self.cfg.SetDiskID(meta, pnode)
12572       msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
12573       if msg:
12574         self.LogWarning("Could not remove metadata for disk %d on node %s,"
12575                         " continuing anyway: %s", idx, pnode, msg)
12576
12577     # this is a DRBD disk, return its port to the pool
12578     for disk in old_disks:
12579       tcp_port = disk.logical_id[2]
12580       self.cfg.AddTcpUdpPort(tcp_port)
12581
12582     # Node resource locks will be released by caller
12583
12584   def _CreateNewDisk(self, idx, params, _):
12585     """Creates a new disk.
12586
12587     """
12588     instance = self.instance
12589
12590     # add a new disk
12591     if instance.disk_template in constants.DTS_FILEBASED:
12592       (file_driver, file_path) = instance.disks[0].logical_id
12593       file_path = os.path.dirname(file_path)
12594     else:
12595       file_driver = file_path = None
12596
12597     disk = \
12598       _GenerateDiskTemplate(self, instance.disk_template, instance.name,
12599                             instance.primary_node, instance.secondary_nodes,
12600                             [params], file_path, file_driver, idx,
12601                             self.Log, self.diskparams)[0]
12602
12603     info = _GetInstanceInfoText(instance)
12604
12605     logging.info("Creating volume %s for instance %s",
12606                  disk.iv_name, instance.name)
12607     # Note: this needs to be kept in sync with _CreateDisks
12608     #HARDCODE
12609     for node in instance.all_nodes:
12610       f_create = (node == instance.primary_node)
12611       try:
12612         _CreateBlockDev(self, node, instance, disk, f_create, info, f_create)
12613       except errors.OpExecError, err:
12614         self.LogWarning("Failed to create volume %s (%s) on node '%s': %s",
12615                         disk.iv_name, disk, node, err)
12616
12617     return (disk, [
12618       ("disk/%d" % idx, "add:size=%s,mode=%s" % (disk.size, disk.mode)),
12619       ])
12620
12621   @staticmethod
12622   def _ModifyDisk(idx, disk, params, _):
12623     """Modifies a disk.
12624
12625     """
12626     disk.mode = params[constants.IDISK_MODE]
12627
12628     return [
12629       ("disk.mode/%d" % idx, disk.mode),
12630       ]
12631
12632   def _RemoveDisk(self, idx, root, _):
12633     """Removes a disk.
12634
12635     """
12636     for node, disk in root.ComputeNodeTree(self.instance.primary_node):
12637       self.cfg.SetDiskID(disk, node)
12638       msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
12639       if msg:
12640         self.LogWarning("Could not remove disk/%d on node '%s': %s,"
12641                         " continuing anyway", idx, node, msg)
12642
12643     # if this is a DRBD disk, return its port to the pool
12644     if root.dev_type in constants.LDS_DRBD:
12645       self.cfg.AddTcpUdpPort(root.logical_id[2])
12646
12647   @staticmethod
12648   def _CreateNewNic(idx, params, private):
12649     """Creates data structure for a new network interface.
12650
12651     """
12652     mac = params[constants.INIC_MAC]
12653     ip = params.get(constants.INIC_IP, None)
12654     nicparams = private.params
12655
12656     return (objects.NIC(mac=mac, ip=ip, nicparams=nicparams), [
12657       ("nic.%d" % idx,
12658        "add:mac=%s,ip=%s,mode=%s,link=%s" %
12659        (mac, ip, private.filled[constants.NIC_MODE],
12660        private.filled[constants.NIC_LINK])),
12661       ])
12662
12663   @staticmethod
12664   def _ApplyNicMods(idx, nic, params, private):
12665     """Modifies a network interface.
12666
12667     """
12668     changes = []
12669
12670     for key in [constants.INIC_MAC, constants.INIC_IP]:
12671       if key in params:
12672         changes.append(("nic.%s/%d" % (key, idx), params[key]))
12673         setattr(nic, key, params[key])
12674
12675     if private.params:
12676       nic.nicparams = private.params
12677
12678       for (key, val) in params.items():
12679         changes.append(("nic.%s/%d" % (key, idx), val))
12680
12681     return changes
12682
12683   def Exec(self, feedback_fn):
12684     """Modifies an instance.
12685
12686     All parameters take effect only at the next restart of the instance.
12687
12688     """
12689     # Process here the warnings from CheckPrereq, as we don't have a
12690     # feedback_fn there.
12691     # TODO: Replace with self.LogWarning
12692     for warn in self.warn:
12693       feedback_fn("WARNING: %s" % warn)
12694
12695     assert ((self.op.disk_template is None) ^
12696             bool(self.owned_locks(locking.LEVEL_NODE_RES))), \
12697       "Not owning any node resource locks"
12698
12699     result = []
12700     instance = self.instance
12701
12702     # runtime memory
12703     if self.op.runtime_mem:
12704       rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
12705                                                      instance,
12706                                                      self.op.runtime_mem)
12707       rpcres.Raise("Cannot modify instance runtime memory")
12708       result.append(("runtime_memory", self.op.runtime_mem))
12709
12710     # Apply disk changes
12711     ApplyContainerMods("disk", instance.disks, result, self.diskmod,
12712                        self._CreateNewDisk, self._ModifyDisk, self._RemoveDisk)
12713
12714     if self.op.disk_template:
12715       if __debug__:
12716         check_nodes = set(instance.all_nodes)
12717         if self.op.remote_node:
12718           check_nodes.add(self.op.remote_node)
12719         for level in [locking.LEVEL_NODE, locking.LEVEL_NODE_RES]:
12720           owned = self.owned_locks(level)
12721           assert not (check_nodes - owned), \
12722             ("Not owning the correct locks, owning %r, expected at least %r" %
12723              (owned, check_nodes))
12724
12725       r_shut = _ShutdownInstanceDisks(self, instance)
12726       if not r_shut:
12727         raise errors.OpExecError("Cannot shutdown instance disks, unable to"
12728                                  " proceed with disk template conversion")
12729       mode = (instance.disk_template, self.op.disk_template)
12730       try:
12731         self._DISK_CONVERSIONS[mode](self, feedback_fn)
12732       except:
12733         self.cfg.ReleaseDRBDMinors(instance.name)
12734         raise
12735       result.append(("disk_template", self.op.disk_template))
12736
12737       assert instance.disk_template == self.op.disk_template, \
12738         ("Expected disk template '%s', found '%s'" %
12739          (self.op.disk_template, instance.disk_template))
12740
12741     # Release node and resource locks if there are any (they might already have
12742     # been released during disk conversion)
12743     _ReleaseLocks(self, locking.LEVEL_NODE)
12744     _ReleaseLocks(self, locking.LEVEL_NODE_RES)
12745
12746     # Apply NIC changes
12747     if self._new_nics is not None:
12748       instance.nics = self._new_nics
12749       result.extend(self._nic_chgdesc)
12750
12751     # hvparams changes
12752     if self.op.hvparams:
12753       instance.hvparams = self.hv_inst
12754       for key, val in self.op.hvparams.iteritems():
12755         result.append(("hv/%s" % key, val))
12756
12757     # beparams changes
12758     if self.op.beparams:
12759       instance.beparams = self.be_inst
12760       for key, val in self.op.beparams.iteritems():
12761         result.append(("be/%s" % key, val))
12762
12763     # OS change
12764     if self.op.os_name:
12765       instance.os = self.op.os_name
12766
12767     # osparams changes
12768     if self.op.osparams:
12769       instance.osparams = self.os_inst
12770       for key, val in self.op.osparams.iteritems():
12771         result.append(("os/%s" % key, val))
12772
12773     if self.op.offline is None:
12774       # Ignore
12775       pass
12776     elif self.op.offline:
12777       # Mark instance as offline
12778       self.cfg.MarkInstanceOffline(instance.name)
12779       result.append(("admin_state", constants.ADMINST_OFFLINE))
12780     else:
12781       # Mark instance as online, but stopped
12782       self.cfg.MarkInstanceDown(instance.name)
12783       result.append(("admin_state", constants.ADMINST_DOWN))
12784
12785     self.cfg.Update(instance, feedback_fn)
12786
12787     assert not (self.owned_locks(locking.LEVEL_NODE_RES) or
12788                 self.owned_locks(locking.LEVEL_NODE)), \
12789       "All node locks should have been released by now"
12790
12791     return result
12792
12793   _DISK_CONVERSIONS = {
12794     (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
12795     (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
12796     }
12797
12798
12799 class LUInstanceChangeGroup(LogicalUnit):
12800   HPATH = "instance-change-group"
12801   HTYPE = constants.HTYPE_INSTANCE
12802   REQ_BGL = False
12803
12804   def ExpandNames(self):
12805     self.share_locks = _ShareAll()
12806     self.needed_locks = {
12807       locking.LEVEL_NODEGROUP: [],
12808       locking.LEVEL_NODE: [],
12809       }
12810
12811     self._ExpandAndLockInstance()
12812
12813     if self.op.target_groups:
12814       self.req_target_uuids = map(self.cfg.LookupNodeGroup,
12815                                   self.op.target_groups)
12816     else:
12817       self.req_target_uuids = None
12818
12819     self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
12820
12821   def DeclareLocks(self, level):
12822     if level == locking.LEVEL_NODEGROUP:
12823       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
12824
12825       if self.req_target_uuids:
12826         lock_groups = set(self.req_target_uuids)
12827
12828         # Lock all groups used by instance optimistically; this requires going
12829         # via the node before it's locked, requiring verification later on
12830         instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
12831         lock_groups.update(instance_groups)
12832       else:
12833         # No target groups, need to lock all of them
12834         lock_groups = locking.ALL_SET
12835
12836       self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
12837
12838     elif level == locking.LEVEL_NODE:
12839       if self.req_target_uuids:
12840         # Lock all nodes used by instances
12841         self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
12842         self._LockInstancesNodes()
12843
12844         # Lock all nodes in all potential target groups
12845         lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
12846                        self.cfg.GetInstanceNodeGroups(self.op.instance_name))
12847         member_nodes = [node_name
12848                         for group in lock_groups
12849                         for node_name in self.cfg.GetNodeGroup(group).members]
12850         self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
12851       else:
12852         # Lock all nodes as all groups are potential targets
12853         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12854
12855   def CheckPrereq(self):
12856     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12857     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12858     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12859
12860     assert (self.req_target_uuids is None or
12861             owned_groups.issuperset(self.req_target_uuids))
12862     assert owned_instances == set([self.op.instance_name])
12863
12864     # Get instance information
12865     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12866
12867     # Check if node groups for locked instance are still correct
12868     assert owned_nodes.issuperset(self.instance.all_nodes), \
12869       ("Instance %s's nodes changed while we kept the lock" %
12870        self.op.instance_name)
12871
12872     inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
12873                                            owned_groups)
12874
12875     if self.req_target_uuids:
12876       # User requested specific target groups
12877       self.target_uuids = self.req_target_uuids
12878     else:
12879       # All groups except those used by the instance are potential targets
12880       self.target_uuids = owned_groups - inst_groups
12881
12882     conflicting_groups = self.target_uuids & inst_groups
12883     if conflicting_groups:
12884       raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
12885                                  " used by the instance '%s'" %
12886                                  (utils.CommaJoin(conflicting_groups),
12887                                   self.op.instance_name),
12888                                  errors.ECODE_INVAL)
12889
12890     if not self.target_uuids:
12891       raise errors.OpPrereqError("There are no possible target groups",
12892                                  errors.ECODE_INVAL)
12893
12894   def BuildHooksEnv(self):
12895     """Build hooks env.
12896
12897     """
12898     assert self.target_uuids
12899
12900     env = {
12901       "TARGET_GROUPS": " ".join(self.target_uuids),
12902       }
12903
12904     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
12905
12906     return env
12907
12908   def BuildHooksNodes(self):
12909     """Build hooks nodes.
12910
12911     """
12912     mn = self.cfg.GetMasterNode()
12913     return ([mn], [mn])
12914
12915   def Exec(self, feedback_fn):
12916     instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
12917
12918     assert instances == [self.op.instance_name], "Instance not locked"
12919
12920     ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
12921                      instances=instances, target_groups=list(self.target_uuids))
12922
12923     ial.Run(self.op.iallocator)
12924
12925     if not ial.success:
12926       raise errors.OpPrereqError("Can't compute solution for changing group of"
12927                                  " instance '%s' using iallocator '%s': %s" %
12928                                  (self.op.instance_name, self.op.iallocator,
12929                                   ial.info),
12930                                  errors.ECODE_NORES)
12931
12932     jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
12933
12934     self.LogInfo("Iallocator returned %s job(s) for changing group of"
12935                  " instance '%s'", len(jobs), self.op.instance_name)
12936
12937     return ResultWithJobs(jobs)
12938
12939
12940 class LUBackupQuery(NoHooksLU):
12941   """Query the exports list
12942
12943   """
12944   REQ_BGL = False
12945
12946   def ExpandNames(self):
12947     self.needed_locks = {}
12948     self.share_locks[locking.LEVEL_NODE] = 1
12949     if not self.op.nodes:
12950       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12951     else:
12952       self.needed_locks[locking.LEVEL_NODE] = \
12953         _GetWantedNodes(self, self.op.nodes)
12954
12955   def Exec(self, feedback_fn):
12956     """Compute the list of all the exported system images.
12957
12958     @rtype: dict
12959     @return: a dictionary with the structure node->(export-list)
12960         where export-list is a list of the instances exported on
12961         that node.
12962
12963     """
12964     self.nodes = self.owned_locks(locking.LEVEL_NODE)
12965     rpcresult = self.rpc.call_export_list(self.nodes)
12966     result = {}
12967     for node in rpcresult:
12968       if rpcresult[node].fail_msg:
12969         result[node] = False
12970       else:
12971         result[node] = rpcresult[node].payload
12972
12973     return result
12974
12975
12976 class LUBackupPrepare(NoHooksLU):
12977   """Prepares an instance for an export and returns useful information.
12978
12979   """
12980   REQ_BGL = False
12981
12982   def ExpandNames(self):
12983     self._ExpandAndLockInstance()
12984
12985   def CheckPrereq(self):
12986     """Check prerequisites.
12987
12988     """
12989     instance_name = self.op.instance_name
12990
12991     self.instance = self.cfg.GetInstanceInfo(instance_name)
12992     assert self.instance is not None, \
12993           "Cannot retrieve locked instance %s" % self.op.instance_name
12994     _CheckNodeOnline(self, self.instance.primary_node)
12995
12996     self._cds = _GetClusterDomainSecret()
12997
12998   def Exec(self, feedback_fn):
12999     """Prepares an instance for an export.
13000
13001     """
13002     instance = self.instance
13003
13004     if self.op.mode == constants.EXPORT_MODE_REMOTE:
13005       salt = utils.GenerateSecret(8)
13006
13007       feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
13008       result = self.rpc.call_x509_cert_create(instance.primary_node,
13009                                               constants.RIE_CERT_VALIDITY)
13010       result.Raise("Can't create X509 key and certificate on %s" % result.node)
13011
13012       (name, cert_pem) = result.payload
13013
13014       cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
13015                                              cert_pem)
13016
13017       return {
13018         "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
13019         "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
13020                           salt),
13021         "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
13022         }
13023
13024     return None
13025
13026
13027 class LUBackupExport(LogicalUnit):
13028   """Export an instance to an image in the cluster.
13029
13030   """
13031   HPATH = "instance-export"
13032   HTYPE = constants.HTYPE_INSTANCE
13033   REQ_BGL = False
13034
13035   def CheckArguments(self):
13036     """Check the arguments.
13037
13038     """
13039     self.x509_key_name = self.op.x509_key_name
13040     self.dest_x509_ca_pem = self.op.destination_x509_ca
13041
13042     if self.op.mode == constants.EXPORT_MODE_REMOTE:
13043       if not self.x509_key_name:
13044         raise errors.OpPrereqError("Missing X509 key name for encryption",
13045                                    errors.ECODE_INVAL)
13046
13047       if not self.dest_x509_ca_pem:
13048         raise errors.OpPrereqError("Missing destination X509 CA",
13049                                    errors.ECODE_INVAL)
13050
13051   def ExpandNames(self):
13052     self._ExpandAndLockInstance()
13053
13054     # Lock all nodes for local exports
13055     if self.op.mode == constants.EXPORT_MODE_LOCAL:
13056       # FIXME: lock only instance primary and destination node
13057       #
13058       # Sad but true, for now we have do lock all nodes, as we don't know where
13059       # the previous export might be, and in this LU we search for it and
13060       # remove it from its current node. In the future we could fix this by:
13061       #  - making a tasklet to search (share-lock all), then create the
13062       #    new one, then one to remove, after
13063       #  - removing the removal operation altogether
13064       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13065
13066   def DeclareLocks(self, level):
13067     """Last minute lock declaration."""
13068     # All nodes are locked anyway, so nothing to do here.
13069
13070   def BuildHooksEnv(self):
13071     """Build hooks env.
13072
13073     This will run on the master, primary node and target node.
13074
13075     """
13076     env = {
13077       "EXPORT_MODE": self.op.mode,
13078       "EXPORT_NODE": self.op.target_node,
13079       "EXPORT_DO_SHUTDOWN": self.op.shutdown,
13080       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
13081       # TODO: Generic function for boolean env variables
13082       "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
13083       }
13084
13085     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
13086
13087     return env
13088
13089   def BuildHooksNodes(self):
13090     """Build hooks nodes.
13091
13092     """
13093     nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
13094
13095     if self.op.mode == constants.EXPORT_MODE_LOCAL:
13096       nl.append(self.op.target_node)
13097
13098     return (nl, nl)
13099
13100   def CheckPrereq(self):
13101     """Check prerequisites.
13102
13103     This checks that the instance and node names are valid.
13104
13105     """
13106     instance_name = self.op.instance_name
13107
13108     self.instance = self.cfg.GetInstanceInfo(instance_name)
13109     assert self.instance is not None, \
13110           "Cannot retrieve locked instance %s" % self.op.instance_name
13111     _CheckNodeOnline(self, self.instance.primary_node)
13112
13113     if (self.op.remove_instance and
13114         self.instance.admin_state == constants.ADMINST_UP and
13115         not self.op.shutdown):
13116       raise errors.OpPrereqError("Can not remove instance without shutting it"
13117                                  " down before")
13118
13119     if self.op.mode == constants.EXPORT_MODE_LOCAL:
13120       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
13121       self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
13122       assert self.dst_node is not None
13123
13124       _CheckNodeOnline(self, self.dst_node.name)
13125       _CheckNodeNotDrained(self, self.dst_node.name)
13126
13127       self._cds = None
13128       self.dest_disk_info = None
13129       self.dest_x509_ca = None
13130
13131     elif self.op.mode == constants.EXPORT_MODE_REMOTE:
13132       self.dst_node = None
13133
13134       if len(self.op.target_node) != len(self.instance.disks):
13135         raise errors.OpPrereqError(("Received destination information for %s"
13136                                     " disks, but instance %s has %s disks") %
13137                                    (len(self.op.target_node), instance_name,
13138                                     len(self.instance.disks)),
13139                                    errors.ECODE_INVAL)
13140
13141       cds = _GetClusterDomainSecret()
13142
13143       # Check X509 key name
13144       try:
13145         (key_name, hmac_digest, hmac_salt) = self.x509_key_name
13146       except (TypeError, ValueError), err:
13147         raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
13148
13149       if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
13150         raise errors.OpPrereqError("HMAC for X509 key name is wrong",
13151                                    errors.ECODE_INVAL)
13152
13153       # Load and verify CA
13154       try:
13155         (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
13156       except OpenSSL.crypto.Error, err:
13157         raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
13158                                    (err, ), errors.ECODE_INVAL)
13159
13160       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
13161       if errcode is not None:
13162         raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
13163                                    (msg, ), errors.ECODE_INVAL)
13164
13165       self.dest_x509_ca = cert
13166
13167       # Verify target information
13168       disk_info = []
13169       for idx, disk_data in enumerate(self.op.target_node):
13170         try:
13171           (host, port, magic) = \
13172             masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
13173         except errors.GenericError, err:
13174           raise errors.OpPrereqError("Target info for disk %s: %s" %
13175                                      (idx, err), errors.ECODE_INVAL)
13176
13177         disk_info.append((host, port, magic))
13178
13179       assert len(disk_info) == len(self.op.target_node)
13180       self.dest_disk_info = disk_info
13181
13182     else:
13183       raise errors.ProgrammerError("Unhandled export mode %r" %
13184                                    self.op.mode)
13185
13186     # instance disk type verification
13187     # TODO: Implement export support for file-based disks
13188     for disk in self.instance.disks:
13189       if disk.dev_type == constants.LD_FILE:
13190         raise errors.OpPrereqError("Export not supported for instances with"
13191                                    " file-based disks", errors.ECODE_INVAL)
13192
13193   def _CleanupExports(self, feedback_fn):
13194     """Removes exports of current instance from all other nodes.
13195
13196     If an instance in a cluster with nodes A..D was exported to node C, its
13197     exports will be removed from the nodes A, B and D.
13198
13199     """
13200     assert self.op.mode != constants.EXPORT_MODE_REMOTE
13201
13202     nodelist = self.cfg.GetNodeList()
13203     nodelist.remove(self.dst_node.name)
13204
13205     # on one-node clusters nodelist will be empty after the removal
13206     # if we proceed the backup would be removed because OpBackupQuery
13207     # substitutes an empty list with the full cluster node list.
13208     iname = self.instance.name
13209     if nodelist:
13210       feedback_fn("Removing old exports for instance %s" % iname)
13211       exportlist = self.rpc.call_export_list(nodelist)
13212       for node in exportlist:
13213         if exportlist[node].fail_msg:
13214           continue
13215         if iname in exportlist[node].payload:
13216           msg = self.rpc.call_export_remove(node, iname).fail_msg
13217           if msg:
13218             self.LogWarning("Could not remove older export for instance %s"
13219                             " on node %s: %s", iname, node, msg)
13220
13221   def Exec(self, feedback_fn):
13222     """Export an instance to an image in the cluster.
13223
13224     """
13225     assert self.op.mode in constants.EXPORT_MODES
13226
13227     instance = self.instance
13228     src_node = instance.primary_node
13229
13230     if self.op.shutdown:
13231       # shutdown the instance, but not the disks
13232       feedback_fn("Shutting down instance %s" % instance.name)
13233       result = self.rpc.call_instance_shutdown(src_node, instance,
13234                                                self.op.shutdown_timeout)
13235       # TODO: Maybe ignore failures if ignore_remove_failures is set
13236       result.Raise("Could not shutdown instance %s on"
13237                    " node %s" % (instance.name, src_node))
13238
13239     # set the disks ID correctly since call_instance_start needs the
13240     # correct drbd minor to create the symlinks
13241     for disk in instance.disks:
13242       self.cfg.SetDiskID(disk, src_node)
13243
13244     activate_disks = (instance.admin_state != constants.ADMINST_UP)
13245
13246     if activate_disks:
13247       # Activate the instance disks if we'exporting a stopped instance
13248       feedback_fn("Activating disks for %s" % instance.name)
13249       _StartInstanceDisks(self, instance, None)
13250
13251     try:
13252       helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
13253                                                      instance)
13254
13255       helper.CreateSnapshots()
13256       try:
13257         if (self.op.shutdown and
13258             instance.admin_state == constants.ADMINST_UP and
13259             not self.op.remove_instance):
13260           assert not activate_disks
13261           feedback_fn("Starting instance %s" % instance.name)
13262           result = self.rpc.call_instance_start(src_node,
13263                                                 (instance, None, None), False)
13264           msg = result.fail_msg
13265           if msg:
13266             feedback_fn("Failed to start instance: %s" % msg)
13267             _ShutdownInstanceDisks(self, instance)
13268             raise errors.OpExecError("Could not start instance: %s" % msg)
13269
13270         if self.op.mode == constants.EXPORT_MODE_LOCAL:
13271           (fin_resu, dresults) = helper.LocalExport(self.dst_node)
13272         elif self.op.mode == constants.EXPORT_MODE_REMOTE:
13273           connect_timeout = constants.RIE_CONNECT_TIMEOUT
13274           timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
13275
13276           (key_name, _, _) = self.x509_key_name
13277
13278           dest_ca_pem = \
13279             OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
13280                                             self.dest_x509_ca)
13281
13282           (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
13283                                                      key_name, dest_ca_pem,
13284                                                      timeouts)
13285       finally:
13286         helper.Cleanup()
13287
13288       # Check for backwards compatibility
13289       assert len(dresults) == len(instance.disks)
13290       assert compat.all(isinstance(i, bool) for i in dresults), \
13291              "Not all results are boolean: %r" % dresults
13292
13293     finally:
13294       if activate_disks:
13295         feedback_fn("Deactivating disks for %s" % instance.name)
13296         _ShutdownInstanceDisks(self, instance)
13297
13298     if not (compat.all(dresults) and fin_resu):
13299       failures = []
13300       if not fin_resu:
13301         failures.append("export finalization")
13302       if not compat.all(dresults):
13303         fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
13304                                if not dsk)
13305         failures.append("disk export: disk(s) %s" % fdsk)
13306
13307       raise errors.OpExecError("Export failed, errors in %s" %
13308                                utils.CommaJoin(failures))
13309
13310     # At this point, the export was successful, we can cleanup/finish
13311
13312     # Remove instance if requested
13313     if self.op.remove_instance:
13314       feedback_fn("Removing instance %s" % instance.name)
13315       _RemoveInstance(self, feedback_fn, instance,
13316                       self.op.ignore_remove_failures)
13317
13318     if self.op.mode == constants.EXPORT_MODE_LOCAL:
13319       self._CleanupExports(feedback_fn)
13320
13321     return fin_resu, dresults
13322
13323
13324 class LUBackupRemove(NoHooksLU):
13325   """Remove exports related to the named instance.
13326
13327   """
13328   REQ_BGL = False
13329
13330   def ExpandNames(self):
13331     self.needed_locks = {}
13332     # We need all nodes to be locked in order for RemoveExport to work, but we
13333     # don't need to lock the instance itself, as nothing will happen to it (and
13334     # we can remove exports also for a removed instance)
13335     self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13336
13337   def Exec(self, feedback_fn):
13338     """Remove any export.
13339
13340     """
13341     instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
13342     # If the instance was not found we'll try with the name that was passed in.
13343     # This will only work if it was an FQDN, though.
13344     fqdn_warn = False
13345     if not instance_name:
13346       fqdn_warn = True
13347       instance_name = self.op.instance_name
13348
13349     locked_nodes = self.owned_locks(locking.LEVEL_NODE)
13350     exportlist = self.rpc.call_export_list(locked_nodes)
13351     found = False
13352     for node in exportlist:
13353       msg = exportlist[node].fail_msg
13354       if msg:
13355         self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
13356         continue
13357       if instance_name in exportlist[node].payload:
13358         found = True
13359         result = self.rpc.call_export_remove(node, instance_name)
13360         msg = result.fail_msg
13361         if msg:
13362           logging.error("Could not remove export for instance %s"
13363                         " on node %s: %s", instance_name, node, msg)
13364
13365     if fqdn_warn and not found:
13366       feedback_fn("Export not found. If trying to remove an export belonging"
13367                   " to a deleted instance please use its Fully Qualified"
13368                   " Domain Name.")
13369
13370
13371 class LUGroupAdd(LogicalUnit):
13372   """Logical unit for creating node groups.
13373
13374   """
13375   HPATH = "group-add"
13376   HTYPE = constants.HTYPE_GROUP
13377   REQ_BGL = False
13378
13379   def ExpandNames(self):
13380     # We need the new group's UUID here so that we can create and acquire the
13381     # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
13382     # that it should not check whether the UUID exists in the configuration.
13383     self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
13384     self.needed_locks = {}
13385     self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
13386
13387   def CheckPrereq(self):
13388     """Check prerequisites.
13389
13390     This checks that the given group name is not an existing node group
13391     already.
13392
13393     """
13394     try:
13395       existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13396     except errors.OpPrereqError:
13397       pass
13398     else:
13399       raise errors.OpPrereqError("Desired group name '%s' already exists as a"
13400                                  " node group (UUID: %s)" %
13401                                  (self.op.group_name, existing_uuid),
13402                                  errors.ECODE_EXISTS)
13403
13404     if self.op.ndparams:
13405       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
13406
13407     if self.op.hv_state:
13408       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
13409     else:
13410       self.new_hv_state = None
13411
13412     if self.op.disk_state:
13413       self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
13414     else:
13415       self.new_disk_state = None
13416
13417     if self.op.diskparams:
13418       for templ in constants.DISK_TEMPLATES:
13419         if templ not in self.op.diskparams:
13420           self.op.diskparams[templ] = {}
13421         utils.ForceDictType(self.op.diskparams[templ], constants.DISK_DT_TYPES)
13422     else:
13423       self.op.diskparams = self.cfg.GetClusterInfo().diskparams
13424
13425     if self.op.ipolicy:
13426       cluster = self.cfg.GetClusterInfo()
13427       full_ipolicy = cluster.SimpleFillIPolicy(self.op.ipolicy)
13428       try:
13429         objects.InstancePolicy.CheckParameterSyntax(full_ipolicy)
13430       except errors.ConfigurationError, err:
13431         raise errors.OpPrereqError("Invalid instance policy: %s" % err,
13432                                    errors.ECODE_INVAL)
13433
13434   def BuildHooksEnv(self):
13435     """Build hooks env.
13436
13437     """
13438     return {
13439       "GROUP_NAME": self.op.group_name,
13440       }
13441
13442   def BuildHooksNodes(self):
13443     """Build hooks nodes.
13444
13445     """
13446     mn = self.cfg.GetMasterNode()
13447     return ([mn], [mn])
13448
13449   def Exec(self, feedback_fn):
13450     """Add the node group to the cluster.
13451
13452     """
13453     group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
13454                                   uuid=self.group_uuid,
13455                                   alloc_policy=self.op.alloc_policy,
13456                                   ndparams=self.op.ndparams,
13457                                   diskparams=self.op.diskparams,
13458                                   ipolicy=self.op.ipolicy,
13459                                   hv_state_static=self.new_hv_state,
13460                                   disk_state_static=self.new_disk_state)
13461
13462     self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
13463     del self.remove_locks[locking.LEVEL_NODEGROUP]
13464
13465
13466 class LUGroupAssignNodes(NoHooksLU):
13467   """Logical unit for assigning nodes to groups.
13468
13469   """
13470   REQ_BGL = False
13471
13472   def ExpandNames(self):
13473     # These raise errors.OpPrereqError on their own:
13474     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13475     self.op.nodes = _GetWantedNodes(self, self.op.nodes)
13476
13477     # We want to lock all the affected nodes and groups. We have readily
13478     # available the list of nodes, and the *destination* group. To gather the
13479     # list of "source" groups, we need to fetch node information later on.
13480     self.needed_locks = {
13481       locking.LEVEL_NODEGROUP: set([self.group_uuid]),
13482       locking.LEVEL_NODE: self.op.nodes,
13483       }
13484
13485   def DeclareLocks(self, level):
13486     if level == locking.LEVEL_NODEGROUP:
13487       assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
13488
13489       # Try to get all affected nodes' groups without having the group or node
13490       # lock yet. Needs verification later in the code flow.
13491       groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
13492
13493       self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
13494
13495   def CheckPrereq(self):
13496     """Check prerequisites.
13497
13498     """
13499     assert self.needed_locks[locking.LEVEL_NODEGROUP]
13500     assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
13501             frozenset(self.op.nodes))
13502
13503     expected_locks = (set([self.group_uuid]) |
13504                       self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
13505     actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
13506     if actual_locks != expected_locks:
13507       raise errors.OpExecError("Nodes changed groups since locks were acquired,"
13508                                " current groups are '%s', used to be '%s'" %
13509                                (utils.CommaJoin(expected_locks),
13510                                 utils.CommaJoin(actual_locks)))
13511
13512     self.node_data = self.cfg.GetAllNodesInfo()
13513     self.group = self.cfg.GetNodeGroup(self.group_uuid)
13514     instance_data = self.cfg.GetAllInstancesInfo()
13515
13516     if self.group is None:
13517       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13518                                (self.op.group_name, self.group_uuid))
13519
13520     (new_splits, previous_splits) = \
13521       self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
13522                                              for node in self.op.nodes],
13523                                             self.node_data, instance_data)
13524
13525     if new_splits:
13526       fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
13527
13528       if not self.op.force:
13529         raise errors.OpExecError("The following instances get split by this"
13530                                  " change and --force was not given: %s" %
13531                                  fmt_new_splits)
13532       else:
13533         self.LogWarning("This operation will split the following instances: %s",
13534                         fmt_new_splits)
13535
13536         if previous_splits:
13537           self.LogWarning("In addition, these already-split instances continue"
13538                           " to be split across groups: %s",
13539                           utils.CommaJoin(utils.NiceSort(previous_splits)))
13540
13541   def Exec(self, feedback_fn):
13542     """Assign nodes to a new group.
13543
13544     """
13545     mods = [(node_name, self.group_uuid) for node_name in self.op.nodes]
13546
13547     self.cfg.AssignGroupNodes(mods)
13548
13549   @staticmethod
13550   def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
13551     """Check for split instances after a node assignment.
13552
13553     This method considers a series of node assignments as an atomic operation,
13554     and returns information about split instances after applying the set of
13555     changes.
13556
13557     In particular, it returns information about newly split instances, and
13558     instances that were already split, and remain so after the change.
13559
13560     Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
13561     considered.
13562
13563     @type changes: list of (node_name, new_group_uuid) pairs.
13564     @param changes: list of node assignments to consider.
13565     @param node_data: a dict with data for all nodes
13566     @param instance_data: a dict with all instances to consider
13567     @rtype: a two-tuple
13568     @return: a list of instances that were previously okay and result split as a
13569       consequence of this change, and a list of instances that were previously
13570       split and this change does not fix.
13571
13572     """
13573     changed_nodes = dict((node, group) for node, group in changes
13574                          if node_data[node].group != group)
13575
13576     all_split_instances = set()
13577     previously_split_instances = set()
13578
13579     def InstanceNodes(instance):
13580       return [instance.primary_node] + list(instance.secondary_nodes)
13581
13582     for inst in instance_data.values():
13583       if inst.disk_template not in constants.DTS_INT_MIRROR:
13584         continue
13585
13586       instance_nodes = InstanceNodes(inst)
13587
13588       if len(set(node_data[node].group for node in instance_nodes)) > 1:
13589         previously_split_instances.add(inst.name)
13590
13591       if len(set(changed_nodes.get(node, node_data[node].group)
13592                  for node in instance_nodes)) > 1:
13593         all_split_instances.add(inst.name)
13594
13595     return (list(all_split_instances - previously_split_instances),
13596             list(previously_split_instances & all_split_instances))
13597
13598
13599 class _GroupQuery(_QueryBase):
13600   FIELDS = query.GROUP_FIELDS
13601
13602   def ExpandNames(self, lu):
13603     lu.needed_locks = {}
13604
13605     self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
13606     self._cluster = lu.cfg.GetClusterInfo()
13607     name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
13608
13609     if not self.names:
13610       self.wanted = [name_to_uuid[name]
13611                      for name in utils.NiceSort(name_to_uuid.keys())]
13612     else:
13613       # Accept names to be either names or UUIDs.
13614       missing = []
13615       self.wanted = []
13616       all_uuid = frozenset(self._all_groups.keys())
13617
13618       for name in self.names:
13619         if name in all_uuid:
13620           self.wanted.append(name)
13621         elif name in name_to_uuid:
13622           self.wanted.append(name_to_uuid[name])
13623         else:
13624           missing.append(name)
13625
13626       if missing:
13627         raise errors.OpPrereqError("Some groups do not exist: %s" %
13628                                    utils.CommaJoin(missing),
13629                                    errors.ECODE_NOENT)
13630
13631   def DeclareLocks(self, lu, level):
13632     pass
13633
13634   def _GetQueryData(self, lu):
13635     """Computes the list of node groups and their attributes.
13636
13637     """
13638     do_nodes = query.GQ_NODE in self.requested_data
13639     do_instances = query.GQ_INST in self.requested_data
13640
13641     group_to_nodes = None
13642     group_to_instances = None
13643
13644     # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
13645     # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
13646     # latter GetAllInstancesInfo() is not enough, for we have to go through
13647     # instance->node. Hence, we will need to process nodes even if we only need
13648     # instance information.
13649     if do_nodes or do_instances:
13650       all_nodes = lu.cfg.GetAllNodesInfo()
13651       group_to_nodes = dict((uuid, []) for uuid in self.wanted)
13652       node_to_group = {}
13653
13654       for node in all_nodes.values():
13655         if node.group in group_to_nodes:
13656           group_to_nodes[node.group].append(node.name)
13657           node_to_group[node.name] = node.group
13658
13659       if do_instances:
13660         all_instances = lu.cfg.GetAllInstancesInfo()
13661         group_to_instances = dict((uuid, []) for uuid in self.wanted)
13662
13663         for instance in all_instances.values():
13664           node = instance.primary_node
13665           if node in node_to_group:
13666             group_to_instances[node_to_group[node]].append(instance.name)
13667
13668         if not do_nodes:
13669           # Do not pass on node information if it was not requested.
13670           group_to_nodes = None
13671
13672     return query.GroupQueryData(self._cluster,
13673                                 [self._all_groups[uuid]
13674                                  for uuid in self.wanted],
13675                                 group_to_nodes, group_to_instances)
13676
13677
13678 class LUGroupQuery(NoHooksLU):
13679   """Logical unit for querying node groups.
13680
13681   """
13682   REQ_BGL = False
13683
13684   def CheckArguments(self):
13685     self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
13686                           self.op.output_fields, False)
13687
13688   def ExpandNames(self):
13689     self.gq.ExpandNames(self)
13690
13691   def DeclareLocks(self, level):
13692     self.gq.DeclareLocks(self, level)
13693
13694   def Exec(self, feedback_fn):
13695     return self.gq.OldStyleQuery(self)
13696
13697
13698 class LUGroupSetParams(LogicalUnit):
13699   """Modifies the parameters of a node group.
13700
13701   """
13702   HPATH = "group-modify"
13703   HTYPE = constants.HTYPE_GROUP
13704   REQ_BGL = False
13705
13706   def CheckArguments(self):
13707     all_changes = [
13708       self.op.ndparams,
13709       self.op.diskparams,
13710       self.op.alloc_policy,
13711       self.op.hv_state,
13712       self.op.disk_state,
13713       self.op.ipolicy,
13714       ]
13715
13716     if all_changes.count(None) == len(all_changes):
13717       raise errors.OpPrereqError("Please pass at least one modification",
13718                                  errors.ECODE_INVAL)
13719
13720   def ExpandNames(self):
13721     # This raises errors.OpPrereqError on its own:
13722     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13723
13724     self.needed_locks = {
13725       locking.LEVEL_INSTANCE: [],
13726       locking.LEVEL_NODEGROUP: [self.group_uuid],
13727       }
13728
13729     self.share_locks[locking.LEVEL_INSTANCE] = 1
13730
13731   def DeclareLocks(self, level):
13732     if level == locking.LEVEL_INSTANCE:
13733       assert not self.needed_locks[locking.LEVEL_INSTANCE]
13734
13735       # Lock instances optimistically, needs verification once group lock has
13736       # been acquired
13737       self.needed_locks[locking.LEVEL_INSTANCE] = \
13738           self.cfg.GetNodeGroupInstances(self.group_uuid)
13739
13740   def CheckPrereq(self):
13741     """Check prerequisites.
13742
13743     """
13744     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
13745
13746     # Check if locked instances are still correct
13747     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
13748
13749     self.group = self.cfg.GetNodeGroup(self.group_uuid)
13750     cluster = self.cfg.GetClusterInfo()
13751
13752     if self.group is None:
13753       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13754                                (self.op.group_name, self.group_uuid))
13755
13756     if self.op.ndparams:
13757       new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
13758       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
13759       self.new_ndparams = new_ndparams
13760
13761     if self.op.diskparams:
13762       self.new_diskparams = dict()
13763       for templ in constants.DISK_TEMPLATES:
13764         if templ not in self.op.diskparams:
13765           self.op.diskparams[templ] = {}
13766         new_templ_params = _GetUpdatedParams(self.group.diskparams[templ],
13767                                              self.op.diskparams[templ])
13768         utils.ForceDictType(new_templ_params, constants.DISK_DT_TYPES)
13769         self.new_diskparams[templ] = new_templ_params
13770
13771     if self.op.hv_state:
13772       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
13773                                                  self.group.hv_state_static)
13774
13775     if self.op.disk_state:
13776       self.new_disk_state = \
13777         _MergeAndVerifyDiskState(self.op.disk_state,
13778                                  self.group.disk_state_static)
13779
13780     if self.op.ipolicy:
13781       self.new_ipolicy = _GetUpdatedIPolicy(self.group.ipolicy,
13782                                             self.op.ipolicy,
13783                                             group_policy=True)
13784
13785       new_ipolicy = cluster.SimpleFillIPolicy(self.new_ipolicy)
13786       inst_filter = lambda inst: inst.name in owned_instances
13787       instances = self.cfg.GetInstancesInfoByFilter(inst_filter).values()
13788       violations = \
13789           _ComputeNewInstanceViolations(_CalculateGroupIPolicy(cluster,
13790                                                                self.group),
13791                                         new_ipolicy, instances)
13792
13793       if violations:
13794         self.LogWarning("After the ipolicy change the following instances"
13795                         " violate them: %s",
13796                         utils.CommaJoin(violations))
13797
13798   def BuildHooksEnv(self):
13799     """Build hooks env.
13800
13801     """
13802     return {
13803       "GROUP_NAME": self.op.group_name,
13804       "NEW_ALLOC_POLICY": self.op.alloc_policy,
13805       }
13806
13807   def BuildHooksNodes(self):
13808     """Build hooks nodes.
13809
13810     """
13811     mn = self.cfg.GetMasterNode()
13812     return ([mn], [mn])
13813
13814   def Exec(self, feedback_fn):
13815     """Modifies the node group.
13816
13817     """
13818     result = []
13819
13820     if self.op.ndparams:
13821       self.group.ndparams = self.new_ndparams
13822       result.append(("ndparams", str(self.group.ndparams)))
13823
13824     if self.op.diskparams:
13825       self.group.diskparams = self.new_diskparams
13826       result.append(("diskparams", str(self.group.diskparams)))
13827
13828     if self.op.alloc_policy:
13829       self.group.alloc_policy = self.op.alloc_policy
13830
13831     if self.op.hv_state:
13832       self.group.hv_state_static = self.new_hv_state
13833
13834     if self.op.disk_state:
13835       self.group.disk_state_static = self.new_disk_state
13836
13837     if self.op.ipolicy:
13838       self.group.ipolicy = self.new_ipolicy
13839
13840     self.cfg.Update(self.group, feedback_fn)
13841     return result
13842
13843
13844 class LUGroupRemove(LogicalUnit):
13845   HPATH = "group-remove"
13846   HTYPE = constants.HTYPE_GROUP
13847   REQ_BGL = False
13848
13849   def ExpandNames(self):
13850     # This will raises errors.OpPrereqError on its own:
13851     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13852     self.needed_locks = {
13853       locking.LEVEL_NODEGROUP: [self.group_uuid],
13854       }
13855
13856   def CheckPrereq(self):
13857     """Check prerequisites.
13858
13859     This checks that the given group name exists as a node group, that is
13860     empty (i.e., contains no nodes), and that is not the last group of the
13861     cluster.
13862
13863     """
13864     # Verify that the group is empty.
13865     group_nodes = [node.name
13866                    for node in self.cfg.GetAllNodesInfo().values()
13867                    if node.group == self.group_uuid]
13868
13869     if group_nodes:
13870       raise errors.OpPrereqError("Group '%s' not empty, has the following"
13871                                  " nodes: %s" %
13872                                  (self.op.group_name,
13873                                   utils.CommaJoin(utils.NiceSort(group_nodes))),
13874                                  errors.ECODE_STATE)
13875
13876     # Verify the cluster would not be left group-less.
13877     if len(self.cfg.GetNodeGroupList()) == 1:
13878       raise errors.OpPrereqError("Group '%s' is the only group,"
13879                                  " cannot be removed" %
13880                                  self.op.group_name,
13881                                  errors.ECODE_STATE)
13882
13883   def BuildHooksEnv(self):
13884     """Build hooks env.
13885
13886     """
13887     return {
13888       "GROUP_NAME": self.op.group_name,
13889       }
13890
13891   def BuildHooksNodes(self):
13892     """Build hooks nodes.
13893
13894     """
13895     mn = self.cfg.GetMasterNode()
13896     return ([mn], [mn])
13897
13898   def Exec(self, feedback_fn):
13899     """Remove the node group.
13900
13901     """
13902     try:
13903       self.cfg.RemoveNodeGroup(self.group_uuid)
13904     except errors.ConfigurationError:
13905       raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
13906                                (self.op.group_name, self.group_uuid))
13907
13908     self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
13909
13910
13911 class LUGroupRename(LogicalUnit):
13912   HPATH = "group-rename"
13913   HTYPE = constants.HTYPE_GROUP
13914   REQ_BGL = False
13915
13916   def ExpandNames(self):
13917     # This raises errors.OpPrereqError on its own:
13918     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13919
13920     self.needed_locks = {
13921       locking.LEVEL_NODEGROUP: [self.group_uuid],
13922       }
13923
13924   def CheckPrereq(self):
13925     """Check prerequisites.
13926
13927     Ensures requested new name is not yet used.
13928
13929     """
13930     try:
13931       new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
13932     except errors.OpPrereqError:
13933       pass
13934     else:
13935       raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
13936                                  " node group (UUID: %s)" %
13937                                  (self.op.new_name, new_name_uuid),
13938                                  errors.ECODE_EXISTS)
13939
13940   def BuildHooksEnv(self):
13941     """Build hooks env.
13942
13943     """
13944     return {
13945       "OLD_NAME": self.op.group_name,
13946       "NEW_NAME": self.op.new_name,
13947       }
13948
13949   def BuildHooksNodes(self):
13950     """Build hooks nodes.
13951
13952     """
13953     mn = self.cfg.GetMasterNode()
13954
13955     all_nodes = self.cfg.GetAllNodesInfo()
13956     all_nodes.pop(mn, None)
13957
13958     run_nodes = [mn]
13959     run_nodes.extend(node.name for node in all_nodes.values()
13960                      if node.group == self.group_uuid)
13961
13962     return (run_nodes, run_nodes)
13963
13964   def Exec(self, feedback_fn):
13965     """Rename the node group.
13966
13967     """
13968     group = self.cfg.GetNodeGroup(self.group_uuid)
13969
13970     if group is None:
13971       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13972                                (self.op.group_name, self.group_uuid))
13973
13974     group.name = self.op.new_name
13975     self.cfg.Update(group, feedback_fn)
13976
13977     return self.op.new_name
13978
13979
13980 class LUGroupEvacuate(LogicalUnit):
13981   HPATH = "group-evacuate"
13982   HTYPE = constants.HTYPE_GROUP
13983   REQ_BGL = False
13984
13985   def ExpandNames(self):
13986     # This raises errors.OpPrereqError on its own:
13987     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13988
13989     if self.op.target_groups:
13990       self.req_target_uuids = map(self.cfg.LookupNodeGroup,
13991                                   self.op.target_groups)
13992     else:
13993       self.req_target_uuids = []
13994
13995     if self.group_uuid in self.req_target_uuids:
13996       raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
13997                                  " as a target group (targets are %s)" %
13998                                  (self.group_uuid,
13999                                   utils.CommaJoin(self.req_target_uuids)),
14000                                  errors.ECODE_INVAL)
14001
14002     self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
14003
14004     self.share_locks = _ShareAll()
14005     self.needed_locks = {
14006       locking.LEVEL_INSTANCE: [],
14007       locking.LEVEL_NODEGROUP: [],
14008       locking.LEVEL_NODE: [],
14009       }
14010
14011   def DeclareLocks(self, level):
14012     if level == locking.LEVEL_INSTANCE:
14013       assert not self.needed_locks[locking.LEVEL_INSTANCE]
14014
14015       # Lock instances optimistically, needs verification once node and group
14016       # locks have been acquired
14017       self.needed_locks[locking.LEVEL_INSTANCE] = \
14018         self.cfg.GetNodeGroupInstances(self.group_uuid)
14019
14020     elif level == locking.LEVEL_NODEGROUP:
14021       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
14022
14023       if self.req_target_uuids:
14024         lock_groups = set([self.group_uuid] + self.req_target_uuids)
14025
14026         # Lock all groups used by instances optimistically; this requires going
14027         # via the node before it's locked, requiring verification later on
14028         lock_groups.update(group_uuid
14029                            for instance_name in
14030                              self.owned_locks(locking.LEVEL_INSTANCE)
14031                            for group_uuid in
14032                              self.cfg.GetInstanceNodeGroups(instance_name))
14033       else:
14034         # No target groups, need to lock all of them
14035         lock_groups = locking.ALL_SET
14036
14037       self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
14038
14039     elif level == locking.LEVEL_NODE:
14040       # This will only lock the nodes in the group to be evacuated which
14041       # contain actual instances
14042       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
14043       self._LockInstancesNodes()
14044
14045       # Lock all nodes in group to be evacuated and target groups
14046       owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
14047       assert self.group_uuid in owned_groups
14048       member_nodes = [node_name
14049                       for group in owned_groups
14050                       for node_name in self.cfg.GetNodeGroup(group).members]
14051       self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
14052
14053   def CheckPrereq(self):
14054     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
14055     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
14056     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
14057
14058     assert owned_groups.issuperset(self.req_target_uuids)
14059     assert self.group_uuid in owned_groups
14060
14061     # Check if locked instances are still correct
14062     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
14063
14064     # Get instance information
14065     self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
14066
14067     # Check if node groups for locked instances are still correct
14068     for instance_name in owned_instances:
14069       inst = self.instances[instance_name]
14070       assert owned_nodes.issuperset(inst.all_nodes), \
14071         "Instance %s's nodes changed while we kept the lock" % instance_name
14072
14073       inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
14074                                              owned_groups)
14075
14076       assert self.group_uuid in inst_groups, \
14077         "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
14078
14079     if self.req_target_uuids:
14080       # User requested specific target groups
14081       self.target_uuids = self.req_target_uuids
14082     else:
14083       # All groups except the one to be evacuated are potential targets
14084       self.target_uuids = [group_uuid for group_uuid in owned_groups
14085                            if group_uuid != self.group_uuid]
14086
14087       if not self.target_uuids:
14088         raise errors.OpPrereqError("There are no possible target groups",
14089                                    errors.ECODE_INVAL)
14090
14091   def BuildHooksEnv(self):
14092     """Build hooks env.
14093
14094     """
14095     return {
14096       "GROUP_NAME": self.op.group_name,
14097       "TARGET_GROUPS": " ".join(self.target_uuids),
14098       }
14099
14100   def BuildHooksNodes(self):
14101     """Build hooks nodes.
14102
14103     """
14104     mn = self.cfg.GetMasterNode()
14105
14106     assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
14107
14108     run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
14109
14110     return (run_nodes, run_nodes)
14111
14112   def Exec(self, feedback_fn):
14113     instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
14114
14115     assert self.group_uuid not in self.target_uuids
14116
14117     ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
14118                      instances=instances, target_groups=self.target_uuids)
14119
14120     ial.Run(self.op.iallocator)
14121
14122     if not ial.success:
14123       raise errors.OpPrereqError("Can't compute group evacuation using"
14124                                  " iallocator '%s': %s" %
14125                                  (self.op.iallocator, ial.info),
14126                                  errors.ECODE_NORES)
14127
14128     jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
14129
14130     self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
14131                  len(jobs), self.op.group_name)
14132
14133     return ResultWithJobs(jobs)
14134
14135
14136 class TagsLU(NoHooksLU): # pylint: disable=W0223
14137   """Generic tags LU.
14138
14139   This is an abstract class which is the parent of all the other tags LUs.
14140
14141   """
14142   def ExpandNames(self):
14143     self.group_uuid = None
14144     self.needed_locks = {}
14145     if self.op.kind == constants.TAG_NODE:
14146       self.op.name = _ExpandNodeName(self.cfg, self.op.name)
14147       self.needed_locks[locking.LEVEL_NODE] = self.op.name
14148     elif self.op.kind == constants.TAG_INSTANCE:
14149       self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
14150       self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
14151     elif self.op.kind == constants.TAG_NODEGROUP:
14152       self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
14153
14154     # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
14155     # not possible to acquire the BGL based on opcode parameters)
14156
14157   def CheckPrereq(self):
14158     """Check prerequisites.
14159
14160     """
14161     if self.op.kind == constants.TAG_CLUSTER:
14162       self.target = self.cfg.GetClusterInfo()
14163     elif self.op.kind == constants.TAG_NODE:
14164       self.target = self.cfg.GetNodeInfo(self.op.name)
14165     elif self.op.kind == constants.TAG_INSTANCE:
14166       self.target = self.cfg.GetInstanceInfo(self.op.name)
14167     elif self.op.kind == constants.TAG_NODEGROUP:
14168       self.target = self.cfg.GetNodeGroup(self.group_uuid)
14169     else:
14170       raise errors.OpPrereqError("Wrong tag type requested (%s)" %
14171                                  str(self.op.kind), errors.ECODE_INVAL)
14172
14173
14174 class LUTagsGet(TagsLU):
14175   """Returns the tags of a given object.
14176
14177   """
14178   REQ_BGL = False
14179
14180   def ExpandNames(self):
14181     TagsLU.ExpandNames(self)
14182
14183     # Share locks as this is only a read operation
14184     self.share_locks = _ShareAll()
14185
14186   def Exec(self, feedback_fn):
14187     """Returns the tag list.
14188
14189     """
14190     return list(self.target.GetTags())
14191
14192
14193 class LUTagsSearch(NoHooksLU):
14194   """Searches the tags for a given pattern.
14195
14196   """
14197   REQ_BGL = False
14198
14199   def ExpandNames(self):
14200     self.needed_locks = {}
14201
14202   def CheckPrereq(self):
14203     """Check prerequisites.
14204
14205     This checks the pattern passed for validity by compiling it.
14206
14207     """
14208     try:
14209       self.re = re.compile(self.op.pattern)
14210     except re.error, err:
14211       raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
14212                                  (self.op.pattern, err), errors.ECODE_INVAL)
14213
14214   def Exec(self, feedback_fn):
14215     """Returns the tag list.
14216
14217     """
14218     cfg = self.cfg
14219     tgts = [("/cluster", cfg.GetClusterInfo())]
14220     ilist = cfg.GetAllInstancesInfo().values()
14221     tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
14222     nlist = cfg.GetAllNodesInfo().values()
14223     tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
14224     tgts.extend(("/nodegroup/%s" % n.name, n)
14225                 for n in cfg.GetAllNodeGroupsInfo().values())
14226     results = []
14227     for path, target in tgts:
14228       for tag in target.GetTags():
14229         if self.re.search(tag):
14230           results.append((path, tag))
14231     return results
14232
14233
14234 class LUTagsSet(TagsLU):
14235   """Sets a tag on a given object.
14236
14237   """
14238   REQ_BGL = False
14239
14240   def CheckPrereq(self):
14241     """Check prerequisites.
14242
14243     This checks the type and length of the tag name and value.
14244
14245     """
14246     TagsLU.CheckPrereq(self)
14247     for tag in self.op.tags:
14248       objects.TaggableObject.ValidateTag(tag)
14249
14250   def Exec(self, feedback_fn):
14251     """Sets the tag.
14252
14253     """
14254     try:
14255       for tag in self.op.tags:
14256         self.target.AddTag(tag)
14257     except errors.TagError, err:
14258       raise errors.OpExecError("Error while setting tag: %s" % str(err))
14259     self.cfg.Update(self.target, feedback_fn)
14260
14261
14262 class LUTagsDel(TagsLU):
14263   """Delete a list of tags from a given object.
14264
14265   """
14266   REQ_BGL = False
14267
14268   def CheckPrereq(self):
14269     """Check prerequisites.
14270
14271     This checks that we have the given tag.
14272
14273     """
14274     TagsLU.CheckPrereq(self)
14275     for tag in self.op.tags:
14276       objects.TaggableObject.ValidateTag(tag)
14277     del_tags = frozenset(self.op.tags)
14278     cur_tags = self.target.GetTags()
14279
14280     diff_tags = del_tags - cur_tags
14281     if diff_tags:
14282       diff_names = ("'%s'" % i for i in sorted(diff_tags))
14283       raise errors.OpPrereqError("Tag(s) %s not found" %
14284                                  (utils.CommaJoin(diff_names), ),
14285                                  errors.ECODE_NOENT)
14286
14287   def Exec(self, feedback_fn):
14288     """Remove the tag from the object.
14289
14290     """
14291     for tag in self.op.tags:
14292       self.target.RemoveTag(tag)
14293     self.cfg.Update(self.target, feedback_fn)
14294
14295
14296 class LUTestDelay(NoHooksLU):
14297   """Sleep for a specified amount of time.
14298
14299   This LU sleeps on the master and/or nodes for a specified amount of
14300   time.
14301
14302   """
14303   REQ_BGL = False
14304
14305   def ExpandNames(self):
14306     """Expand names and set required locks.
14307
14308     This expands the node list, if any.
14309
14310     """
14311     self.needed_locks = {}
14312     if self.op.on_nodes:
14313       # _GetWantedNodes can be used here, but is not always appropriate to use
14314       # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
14315       # more information.
14316       self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
14317       self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
14318
14319   def _TestDelay(self):
14320     """Do the actual sleep.
14321
14322     """
14323     if self.op.on_master:
14324       if not utils.TestDelay(self.op.duration):
14325         raise errors.OpExecError("Error during master delay test")
14326     if self.op.on_nodes:
14327       result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
14328       for node, node_result in result.items():
14329         node_result.Raise("Failure during rpc call to node %s" % node)
14330
14331   def Exec(self, feedback_fn):
14332     """Execute the test delay opcode, with the wanted repetitions.
14333
14334     """
14335     if self.op.repeat == 0:
14336       self._TestDelay()
14337     else:
14338       top_value = self.op.repeat - 1
14339       for i in range(self.op.repeat):
14340         self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
14341         self._TestDelay()
14342
14343
14344 class LUTestJqueue(NoHooksLU):
14345   """Utility LU to test some aspects of the job queue.
14346
14347   """
14348   REQ_BGL = False
14349
14350   # Must be lower than default timeout for WaitForJobChange to see whether it
14351   # notices changed jobs
14352   _CLIENT_CONNECT_TIMEOUT = 20.0
14353   _CLIENT_CONFIRM_TIMEOUT = 60.0
14354
14355   @classmethod
14356   def _NotifyUsingSocket(cls, cb, errcls):
14357     """Opens a Unix socket and waits for another program to connect.
14358
14359     @type cb: callable
14360     @param cb: Callback to send socket name to client
14361     @type errcls: class
14362     @param errcls: Exception class to use for errors
14363
14364     """
14365     # Using a temporary directory as there's no easy way to create temporary
14366     # sockets without writing a custom loop around tempfile.mktemp and
14367     # socket.bind
14368     tmpdir = tempfile.mkdtemp()
14369     try:
14370       tmpsock = utils.PathJoin(tmpdir, "sock")
14371
14372       logging.debug("Creating temporary socket at %s", tmpsock)
14373       sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
14374       try:
14375         sock.bind(tmpsock)
14376         sock.listen(1)
14377
14378         # Send details to client
14379         cb(tmpsock)
14380
14381         # Wait for client to connect before continuing
14382         sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
14383         try:
14384           (conn, _) = sock.accept()
14385         except socket.error, err:
14386           raise errcls("Client didn't connect in time (%s)" % err)
14387       finally:
14388         sock.close()
14389     finally:
14390       # Remove as soon as client is connected
14391       shutil.rmtree(tmpdir)
14392
14393     # Wait for client to close
14394     try:
14395       try:
14396         # pylint: disable=E1101
14397         # Instance of '_socketobject' has no ... member
14398         conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
14399         conn.recv(1)
14400       except socket.error, err:
14401         raise errcls("Client failed to confirm notification (%s)" % err)
14402     finally:
14403       conn.close()
14404
14405   def _SendNotification(self, test, arg, sockname):
14406     """Sends a notification to the client.
14407
14408     @type test: string
14409     @param test: Test name
14410     @param arg: Test argument (depends on test)
14411     @type sockname: string
14412     @param sockname: Socket path
14413
14414     """
14415     self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
14416
14417   def _Notify(self, prereq, test, arg):
14418     """Notifies the client of a test.
14419
14420     @type prereq: bool
14421     @param prereq: Whether this is a prereq-phase test
14422     @type test: string
14423     @param test: Test name
14424     @param arg: Test argument (depends on test)
14425
14426     """
14427     if prereq:
14428       errcls = errors.OpPrereqError
14429     else:
14430       errcls = errors.OpExecError
14431
14432     return self._NotifyUsingSocket(compat.partial(self._SendNotification,
14433                                                   test, arg),
14434                                    errcls)
14435
14436   def CheckArguments(self):
14437     self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
14438     self.expandnames_calls = 0
14439
14440   def ExpandNames(self):
14441     checkargs_calls = getattr(self, "checkargs_calls", 0)
14442     if checkargs_calls < 1:
14443       raise errors.ProgrammerError("CheckArguments was not called")
14444
14445     self.expandnames_calls += 1
14446
14447     if self.op.notify_waitlock:
14448       self._Notify(True, constants.JQT_EXPANDNAMES, None)
14449
14450     self.LogInfo("Expanding names")
14451
14452     # Get lock on master node (just to get a lock, not for a particular reason)
14453     self.needed_locks = {
14454       locking.LEVEL_NODE: self.cfg.GetMasterNode(),
14455       }
14456
14457   def Exec(self, feedback_fn):
14458     if self.expandnames_calls < 1:
14459       raise errors.ProgrammerError("ExpandNames was not called")
14460
14461     if self.op.notify_exec:
14462       self._Notify(False, constants.JQT_EXEC, None)
14463
14464     self.LogInfo("Executing")
14465
14466     if self.op.log_messages:
14467       self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
14468       for idx, msg in enumerate(self.op.log_messages):
14469         self.LogInfo("Sending log message %s", idx + 1)
14470         feedback_fn(constants.JQT_MSGPREFIX + msg)
14471         # Report how many test messages have been sent
14472         self._Notify(False, constants.JQT_LOGMSG, idx + 1)
14473
14474     if self.op.fail:
14475       raise errors.OpExecError("Opcode failure was requested")
14476
14477     return True
14478
14479
14480 class IAllocator(object):
14481   """IAllocator framework.
14482
14483   An IAllocator instance has three sets of attributes:
14484     - cfg that is needed to query the cluster
14485     - input data (all members of the _KEYS class attribute are required)
14486     - four buffer attributes (in|out_data|text), that represent the
14487       input (to the external script) in text and data structure format,
14488       and the output from it, again in two formats
14489     - the result variables from the script (success, info, nodes) for
14490       easy usage
14491
14492   """
14493   # pylint: disable=R0902
14494   # lots of instance attributes
14495
14496   def __init__(self, cfg, rpc_runner, mode, **kwargs):
14497     self.cfg = cfg
14498     self.rpc = rpc_runner
14499     # init buffer variables
14500     self.in_text = self.out_text = self.in_data = self.out_data = None
14501     # init all input fields so that pylint is happy
14502     self.mode = mode
14503     self.memory = self.disks = self.disk_template = None
14504     self.os = self.tags = self.nics = self.vcpus = None
14505     self.hypervisor = None
14506     self.relocate_from = None
14507     self.name = None
14508     self.instances = None
14509     self.evac_mode = None
14510     self.target_groups = []
14511     # computed fields
14512     self.required_nodes = None
14513     # init result fields
14514     self.success = self.info = self.result = None
14515
14516     try:
14517       (fn, keydata, self._result_check) = self._MODE_DATA[self.mode]
14518     except KeyError:
14519       raise errors.ProgrammerError("Unknown mode '%s' passed to the"
14520                                    " IAllocator" % self.mode)
14521
14522     keyset = [n for (n, _) in keydata]
14523
14524     for key in kwargs:
14525       if key not in keyset:
14526         raise errors.ProgrammerError("Invalid input parameter '%s' to"
14527                                      " IAllocator" % key)
14528       setattr(self, key, kwargs[key])
14529
14530     for key in keyset:
14531       if key not in kwargs:
14532         raise errors.ProgrammerError("Missing input parameter '%s' to"
14533                                      " IAllocator" % key)
14534     self._BuildInputData(compat.partial(fn, self), keydata)
14535
14536   def _ComputeClusterData(self):
14537     """Compute the generic allocator input data.
14538
14539     This is the data that is independent of the actual operation.
14540
14541     """
14542     cfg = self.cfg
14543     cluster_info = cfg.GetClusterInfo()
14544     # cluster data
14545     data = {
14546       "version": constants.IALLOCATOR_VERSION,
14547       "cluster_name": cfg.GetClusterName(),
14548       "cluster_tags": list(cluster_info.GetTags()),
14549       "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
14550       "ipolicy": cluster_info.ipolicy,
14551       }
14552     ninfo = cfg.GetAllNodesInfo()
14553     iinfo = cfg.GetAllInstancesInfo().values()
14554     i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
14555
14556     # node data
14557     node_list = [n.name for n in ninfo.values() if n.vm_capable]
14558
14559     if self.mode == constants.IALLOCATOR_MODE_ALLOC:
14560       hypervisor_name = self.hypervisor
14561     elif self.mode == constants.IALLOCATOR_MODE_RELOC:
14562       hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
14563     else:
14564       hypervisor_name = cluster_info.primary_hypervisor
14565
14566     node_data = self.rpc.call_node_info(node_list, [cfg.GetVGName()],
14567                                         [hypervisor_name])
14568     node_iinfo = \
14569       self.rpc.call_all_instances_info(node_list,
14570                                        cluster_info.enabled_hypervisors)
14571
14572     data["nodegroups"] = self._ComputeNodeGroupData(cfg)
14573
14574     config_ndata = self._ComputeBasicNodeData(ninfo)
14575     data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
14576                                                  i_list, config_ndata)
14577     assert len(data["nodes"]) == len(ninfo), \
14578         "Incomplete node data computed"
14579
14580     data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
14581
14582     self.in_data = data
14583
14584   @staticmethod
14585   def _ComputeNodeGroupData(cfg):
14586     """Compute node groups data.
14587
14588     """
14589     cluster = cfg.GetClusterInfo()
14590     ng = dict((guuid, {
14591       "name": gdata.name,
14592       "alloc_policy": gdata.alloc_policy,
14593       "ipolicy": _CalculateGroupIPolicy(cluster, gdata),
14594       })
14595       for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
14596
14597     return ng
14598
14599   @staticmethod
14600   def _ComputeBasicNodeData(node_cfg):
14601     """Compute global node data.
14602
14603     @rtype: dict
14604     @returns: a dict of name: (node dict, node config)
14605
14606     """
14607     # fill in static (config-based) values
14608     node_results = dict((ninfo.name, {
14609       "tags": list(ninfo.GetTags()),
14610       "primary_ip": ninfo.primary_ip,
14611       "secondary_ip": ninfo.secondary_ip,
14612       "offline": ninfo.offline,
14613       "drained": ninfo.drained,
14614       "master_candidate": ninfo.master_candidate,
14615       "group": ninfo.group,
14616       "master_capable": ninfo.master_capable,
14617       "vm_capable": ninfo.vm_capable,
14618       })
14619       for ninfo in node_cfg.values())
14620
14621     return node_results
14622
14623   @staticmethod
14624   def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
14625                               node_results):
14626     """Compute global node data.
14627
14628     @param node_results: the basic node structures as filled from the config
14629
14630     """
14631     #TODO(dynmem): compute the right data on MAX and MIN memory
14632     # make a copy of the current dict
14633     node_results = dict(node_results)
14634     for nname, nresult in node_data.items():
14635       assert nname in node_results, "Missing basic data for node %s" % nname
14636       ninfo = node_cfg[nname]
14637
14638       if not (ninfo.offline or ninfo.drained):
14639         nresult.Raise("Can't get data for node %s" % nname)
14640         node_iinfo[nname].Raise("Can't get node instance info from node %s" %
14641                                 nname)
14642         remote_info = _MakeLegacyNodeInfo(nresult.payload)
14643
14644         for attr in ["memory_total", "memory_free", "memory_dom0",
14645                      "vg_size", "vg_free", "cpu_total"]:
14646           if attr not in remote_info:
14647             raise errors.OpExecError("Node '%s' didn't return attribute"
14648                                      " '%s'" % (nname, attr))
14649           if not isinstance(remote_info[attr], int):
14650             raise errors.OpExecError("Node '%s' returned invalid value"
14651                                      " for '%s': %s" %
14652                                      (nname, attr, remote_info[attr]))
14653         # compute memory used by primary instances
14654         i_p_mem = i_p_up_mem = 0
14655         for iinfo, beinfo in i_list:
14656           if iinfo.primary_node == nname:
14657             i_p_mem += beinfo[constants.BE_MAXMEM]
14658             if iinfo.name not in node_iinfo[nname].payload:
14659               i_used_mem = 0
14660             else:
14661               i_used_mem = int(node_iinfo[nname].payload[iinfo.name]["memory"])
14662             i_mem_diff = beinfo[constants.BE_MAXMEM] - i_used_mem
14663             remote_info["memory_free"] -= max(0, i_mem_diff)
14664
14665             if iinfo.admin_state == constants.ADMINST_UP:
14666               i_p_up_mem += beinfo[constants.BE_MAXMEM]
14667
14668         # compute memory used by instances
14669         pnr_dyn = {
14670           "total_memory": remote_info["memory_total"],
14671           "reserved_memory": remote_info["memory_dom0"],
14672           "free_memory": remote_info["memory_free"],
14673           "total_disk": remote_info["vg_size"],
14674           "free_disk": remote_info["vg_free"],
14675           "total_cpus": remote_info["cpu_total"],
14676           "i_pri_memory": i_p_mem,
14677           "i_pri_up_memory": i_p_up_mem,
14678           }
14679         pnr_dyn.update(node_results[nname])
14680         node_results[nname] = pnr_dyn
14681
14682     return node_results
14683
14684   @staticmethod
14685   def _ComputeInstanceData(cluster_info, i_list):
14686     """Compute global instance data.
14687
14688     """
14689     instance_data = {}
14690     for iinfo, beinfo in i_list:
14691       nic_data = []
14692       for nic in iinfo.nics:
14693         filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
14694         nic_dict = {
14695           "mac": nic.mac,
14696           "ip": nic.ip,
14697           "mode": filled_params[constants.NIC_MODE],
14698           "link": filled_params[constants.NIC_LINK],
14699           }
14700         if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
14701           nic_dict["bridge"] = filled_params[constants.NIC_LINK]
14702         nic_data.append(nic_dict)
14703       pir = {
14704         "tags": list(iinfo.GetTags()),
14705         "admin_state": iinfo.admin_state,
14706         "vcpus": beinfo[constants.BE_VCPUS],
14707         "memory": beinfo[constants.BE_MAXMEM],
14708         "os": iinfo.os,
14709         "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
14710         "nics": nic_data,
14711         "disks": [{constants.IDISK_SIZE: dsk.size,
14712                    constants.IDISK_MODE: dsk.mode}
14713                   for dsk in iinfo.disks],
14714         "disk_template": iinfo.disk_template,
14715         "hypervisor": iinfo.hypervisor,
14716         }
14717       pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
14718                                                  pir["disks"])
14719       instance_data[iinfo.name] = pir
14720
14721     return instance_data
14722
14723   def _AddNewInstance(self):
14724     """Add new instance data to allocator structure.
14725
14726     This in combination with _AllocatorGetClusterData will create the
14727     correct structure needed as input for the allocator.
14728
14729     The checks for the completeness of the opcode must have already been
14730     done.
14731
14732     """
14733     disk_space = _ComputeDiskSize(self.disk_template, self.disks)
14734
14735     if self.disk_template in constants.DTS_INT_MIRROR:
14736       self.required_nodes = 2
14737     else:
14738       self.required_nodes = 1
14739
14740     request = {
14741       "name": self.name,
14742       "disk_template": self.disk_template,
14743       "tags": self.tags,
14744       "os": self.os,
14745       "vcpus": self.vcpus,
14746       "memory": self.memory,
14747       "disks": self.disks,
14748       "disk_space_total": disk_space,
14749       "nics": self.nics,
14750       "required_nodes": self.required_nodes,
14751       "hypervisor": self.hypervisor,
14752       }
14753
14754     return request
14755
14756   def _AddRelocateInstance(self):
14757     """Add relocate instance data to allocator structure.
14758
14759     This in combination with _IAllocatorGetClusterData will create the
14760     correct structure needed as input for the allocator.
14761
14762     The checks for the completeness of the opcode must have already been
14763     done.
14764
14765     """
14766     instance = self.cfg.GetInstanceInfo(self.name)
14767     if instance is None:
14768       raise errors.ProgrammerError("Unknown instance '%s' passed to"
14769                                    " IAllocator" % self.name)
14770
14771     if instance.disk_template not in constants.DTS_MIRRORED:
14772       raise errors.OpPrereqError("Can't relocate non-mirrored instances",
14773                                  errors.ECODE_INVAL)
14774
14775     if instance.disk_template in constants.DTS_INT_MIRROR and \
14776         len(instance.secondary_nodes) != 1:
14777       raise errors.OpPrereqError("Instance has not exactly one secondary node",
14778                                  errors.ECODE_STATE)
14779
14780     self.required_nodes = 1
14781     disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
14782     disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
14783
14784     request = {
14785       "name": self.name,
14786       "disk_space_total": disk_space,
14787       "required_nodes": self.required_nodes,
14788       "relocate_from": self.relocate_from,
14789       }
14790     return request
14791
14792   def _AddNodeEvacuate(self):
14793     """Get data for node-evacuate requests.
14794
14795     """
14796     return {
14797       "instances": self.instances,
14798       "evac_mode": self.evac_mode,
14799       }
14800
14801   def _AddChangeGroup(self):
14802     """Get data for node-evacuate requests.
14803
14804     """
14805     return {
14806       "instances": self.instances,
14807       "target_groups": self.target_groups,
14808       }
14809
14810   def _BuildInputData(self, fn, keydata):
14811     """Build input data structures.
14812
14813     """
14814     self._ComputeClusterData()
14815
14816     request = fn()
14817     request["type"] = self.mode
14818     for keyname, keytype in keydata:
14819       if keyname not in request:
14820         raise errors.ProgrammerError("Request parameter %s is missing" %
14821                                      keyname)
14822       val = request[keyname]
14823       if not keytype(val):
14824         raise errors.ProgrammerError("Request parameter %s doesn't pass"
14825                                      " validation, value %s, expected"
14826                                      " type %s" % (keyname, val, keytype))
14827     self.in_data["request"] = request
14828
14829     self.in_text = serializer.Dump(self.in_data)
14830
14831   _STRING_LIST = ht.TListOf(ht.TString)
14832   _JOB_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
14833      # pylint: disable=E1101
14834      # Class '...' has no 'OP_ID' member
14835      "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
14836                           opcodes.OpInstanceMigrate.OP_ID,
14837                           opcodes.OpInstanceReplaceDisks.OP_ID])
14838      })))
14839
14840   _NEVAC_MOVED = \
14841     ht.TListOf(ht.TAnd(ht.TIsLength(3),
14842                        ht.TItems([ht.TNonEmptyString,
14843                                   ht.TNonEmptyString,
14844                                   ht.TListOf(ht.TNonEmptyString),
14845                                  ])))
14846   _NEVAC_FAILED = \
14847     ht.TListOf(ht.TAnd(ht.TIsLength(2),
14848                        ht.TItems([ht.TNonEmptyString,
14849                                   ht.TMaybeString,
14850                                  ])))
14851   _NEVAC_RESULT = ht.TAnd(ht.TIsLength(3),
14852                           ht.TItems([_NEVAC_MOVED, _NEVAC_FAILED, _JOB_LIST]))
14853
14854   _MODE_DATA = {
14855     constants.IALLOCATOR_MODE_ALLOC:
14856       (_AddNewInstance,
14857        [
14858         ("name", ht.TString),
14859         ("memory", ht.TInt),
14860         ("disks", ht.TListOf(ht.TDict)),
14861         ("disk_template", ht.TString),
14862         ("os", ht.TString),
14863         ("tags", _STRING_LIST),
14864         ("nics", ht.TListOf(ht.TDict)),
14865         ("vcpus", ht.TInt),
14866         ("hypervisor", ht.TString),
14867         ], ht.TList),
14868     constants.IALLOCATOR_MODE_RELOC:
14869       (_AddRelocateInstance,
14870        [("name", ht.TString), ("relocate_from", _STRING_LIST)],
14871        ht.TList),
14872      constants.IALLOCATOR_MODE_NODE_EVAC:
14873       (_AddNodeEvacuate, [
14874         ("instances", _STRING_LIST),
14875         ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)),
14876         ], _NEVAC_RESULT),
14877      constants.IALLOCATOR_MODE_CHG_GROUP:
14878       (_AddChangeGroup, [
14879         ("instances", _STRING_LIST),
14880         ("target_groups", _STRING_LIST),
14881         ], _NEVAC_RESULT),
14882     }
14883
14884   def Run(self, name, validate=True, call_fn=None):
14885     """Run an instance allocator and return the results.
14886
14887     """
14888     if call_fn is None:
14889       call_fn = self.rpc.call_iallocator_runner
14890
14891     result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
14892     result.Raise("Failure while running the iallocator script")
14893
14894     self.out_text = result.payload
14895     if validate:
14896       self._ValidateResult()
14897
14898   def _ValidateResult(self):
14899     """Process the allocator results.
14900
14901     This will process and if successful save the result in
14902     self.out_data and the other parameters.
14903
14904     """
14905     try:
14906       rdict = serializer.Load(self.out_text)
14907     except Exception, err:
14908       raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
14909
14910     if not isinstance(rdict, dict):
14911       raise errors.OpExecError("Can't parse iallocator results: not a dict")
14912
14913     # TODO: remove backwards compatiblity in later versions
14914     if "nodes" in rdict and "result" not in rdict:
14915       rdict["result"] = rdict["nodes"]
14916       del rdict["nodes"]
14917
14918     for key in "success", "info", "result":
14919       if key not in rdict:
14920         raise errors.OpExecError("Can't parse iallocator results:"
14921                                  " missing key '%s'" % key)
14922       setattr(self, key, rdict[key])
14923
14924     if not self._result_check(self.result):
14925       raise errors.OpExecError("Iallocator returned invalid result,"
14926                                " expected %s, got %s" %
14927                                (self._result_check, self.result),
14928                                errors.ECODE_INVAL)
14929
14930     if self.mode == constants.IALLOCATOR_MODE_RELOC:
14931       assert self.relocate_from is not None
14932       assert self.required_nodes == 1
14933
14934       node2group = dict((name, ndata["group"])
14935                         for (name, ndata) in self.in_data["nodes"].items())
14936
14937       fn = compat.partial(self._NodesToGroups, node2group,
14938                           self.in_data["nodegroups"])
14939
14940       instance = self.cfg.GetInstanceInfo(self.name)
14941       request_groups = fn(self.relocate_from + [instance.primary_node])
14942       result_groups = fn(rdict["result"] + [instance.primary_node])
14943
14944       if self.success and not set(result_groups).issubset(request_groups):
14945         raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
14946                                  " differ from original groups (%s)" %
14947                                  (utils.CommaJoin(result_groups),
14948                                   utils.CommaJoin(request_groups)))
14949
14950     elif self.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
14951       assert self.evac_mode in constants.IALLOCATOR_NEVAC_MODES
14952
14953     self.out_data = rdict
14954
14955   @staticmethod
14956   def _NodesToGroups(node2group, groups, nodes):
14957     """Returns a list of unique group names for a list of nodes.
14958
14959     @type node2group: dict
14960     @param node2group: Map from node name to group UUID
14961     @type groups: dict
14962     @param groups: Group information
14963     @type nodes: list
14964     @param nodes: Node names
14965
14966     """
14967     result = set()
14968
14969     for node in nodes:
14970       try:
14971         group_uuid = node2group[node]
14972       except KeyError:
14973         # Ignore unknown node
14974         pass
14975       else:
14976         try:
14977           group = groups[group_uuid]
14978         except KeyError:
14979           # Can't find group, let's use UUID
14980           group_name = group_uuid
14981         else:
14982           group_name = group["name"]
14983
14984         result.add(group_name)
14985
14986     return sorted(result)
14987
14988
14989 class LUTestAllocator(NoHooksLU):
14990   """Run allocator tests.
14991
14992   This LU runs the allocator tests
14993
14994   """
14995   def CheckPrereq(self):
14996     """Check prerequisites.
14997
14998     This checks the opcode parameters depending on the director and mode test.
14999
15000     """
15001     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
15002       for attr in ["memory", "disks", "disk_template",
15003                    "os", "tags", "nics", "vcpus"]:
15004         if not hasattr(self.op, attr):
15005           raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
15006                                      attr, errors.ECODE_INVAL)
15007       iname = self.cfg.ExpandInstanceName(self.op.name)
15008       if iname is not None:
15009         raise errors.OpPrereqError("Instance '%s' already in the cluster" %
15010                                    iname, errors.ECODE_EXISTS)
15011       if not isinstance(self.op.nics, list):
15012         raise errors.OpPrereqError("Invalid parameter 'nics'",
15013                                    errors.ECODE_INVAL)
15014       if not isinstance(self.op.disks, list):
15015         raise errors.OpPrereqError("Invalid parameter 'disks'",
15016                                    errors.ECODE_INVAL)
15017       for row in self.op.disks:
15018         if (not isinstance(row, dict) or
15019             constants.IDISK_SIZE not in row or
15020             not isinstance(row[constants.IDISK_SIZE], int) or
15021             constants.IDISK_MODE not in row or
15022             row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
15023           raise errors.OpPrereqError("Invalid contents of the 'disks'"
15024                                      " parameter", errors.ECODE_INVAL)
15025       if self.op.hypervisor is None:
15026         self.op.hypervisor = self.cfg.GetHypervisorType()
15027     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
15028       fname = _ExpandInstanceName(self.cfg, self.op.name)
15029       self.op.name = fname
15030       self.relocate_from = \
15031           list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
15032     elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
15033                           constants.IALLOCATOR_MODE_NODE_EVAC):
15034       if not self.op.instances:
15035         raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
15036       self.op.instances = _GetWantedInstances(self, self.op.instances)
15037     else:
15038       raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
15039                                  self.op.mode, errors.ECODE_INVAL)
15040
15041     if self.op.direction == constants.IALLOCATOR_DIR_OUT:
15042       if self.op.allocator is None:
15043         raise errors.OpPrereqError("Missing allocator name",
15044                                    errors.ECODE_INVAL)
15045     elif self.op.direction != constants.IALLOCATOR_DIR_IN:
15046       raise errors.OpPrereqError("Wrong allocator test '%s'" %
15047                                  self.op.direction, errors.ECODE_INVAL)
15048
15049   def Exec(self, feedback_fn):
15050     """Run the allocator test.
15051
15052     """
15053     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
15054       ial = IAllocator(self.cfg, self.rpc,
15055                        mode=self.op.mode,
15056                        name=self.op.name,
15057                        memory=self.op.memory,
15058                        disks=self.op.disks,
15059                        disk_template=self.op.disk_template,
15060                        os=self.op.os,
15061                        tags=self.op.tags,
15062                        nics=self.op.nics,
15063                        vcpus=self.op.vcpus,
15064                        hypervisor=self.op.hypervisor,
15065                        )
15066     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
15067       ial = IAllocator(self.cfg, self.rpc,
15068                        mode=self.op.mode,
15069                        name=self.op.name,
15070                        relocate_from=list(self.relocate_from),
15071                        )
15072     elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
15073       ial = IAllocator(self.cfg, self.rpc,
15074                        mode=self.op.mode,
15075                        instances=self.op.instances,
15076                        target_groups=self.op.target_groups)
15077     elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
15078       ial = IAllocator(self.cfg, self.rpc,
15079                        mode=self.op.mode,
15080                        instances=self.op.instances,
15081                        evac_mode=self.op.evac_mode)
15082     else:
15083       raise errors.ProgrammerError("Uncatched mode %s in"
15084                                    " LUTestAllocator.Exec", self.op.mode)
15085
15086     if self.op.direction == constants.IALLOCATOR_DIR_IN:
15087       result = ial.in_text
15088     else:
15089       ial.Run(self.op.allocator, validate=False)
15090       result = ial.out_text
15091     return result
15092
15093
15094 #: Query type implementations
15095 _QUERY_IMPL = {
15096   constants.QR_INSTANCE: _InstanceQuery,
15097   constants.QR_NODE: _NodeQuery,
15098   constants.QR_GROUP: _GroupQuery,
15099   constants.QR_OS: _OsQuery,
15100   }
15101
15102 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
15103
15104
15105 def _GetQueryImplementation(name):
15106   """Returns the implemtnation for a query type.
15107
15108   @param name: Query type, must be one of L{constants.QR_VIA_OP}
15109
15110   """
15111   try:
15112     return _QUERY_IMPL[name]
15113   except KeyError:
15114     raise errors.OpPrereqError("Unknown query resource '%s'" % name,
15115                                errors.ECODE_INVAL)