code.grnet.gr Git - ganeti-local/blob - lib/cmdlib.py

   1 #
   2 #
   3
   4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc.
   5 #
   6 # This program is free software; you can redistribute it and/or modify
   7 # it under the terms of the GNU General Public License as published by
   8 # the Free Software Foundation; either version 2 of the License, or
   9 # (at your option) any later version.
  10 #
  11 # This program is distributed in the hope that it will be useful, but
  12 # WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 # General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program; if not, write to the Free Software
  18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19 # 02110-1301, USA.
  20
  21
  22 """Module implementing the master-side code."""
  23
  24 # pylint: disable=W0201,C0302
  25
  26 # W0201 since most LU attributes are defined in CheckPrereq or similar
  27 # functions
  28
  29 # C0302: since we have waaaay too many lines in this module
  30
  31 import os
  32 import os.path
  33 import time
  34 import re
  35 import platform
  36 import logging
  37 import copy
  38 import OpenSSL
  39 import socket
  40 import tempfile
  41 import shutil
  42 import itertools
  43 import operator
  44
  45 from ganeti import ssh
  46 from ganeti import utils
  47 from ganeti import errors
  48 from ganeti import hypervisor
  49 from ganeti import locking
  50 from ganeti import constants
  51 from ganeti import objects
  52 from ganeti import serializer
  53 from ganeti import ssconf
  54 from ganeti import uidpool
  55 from ganeti import compat
  56 from ganeti import masterd
  57 from ganeti import netutils
  58 from ganeti import query
  59 from ganeti import qlang
  60 from ganeti import opcodes
  61 from ganeti import ht
  62 from ganeti import rpc
  63
  64 import ganeti.masterd.instance # pylint: disable=W0611
  65
  66
  67 #: Size of DRBD meta block device
  68 DRBD_META_SIZE = 128
  69
  70 # States of instance
  71 INSTANCE_UP = [constants.ADMINST_UP]
  72 INSTANCE_DOWN = [constants.ADMINST_DOWN]
  73 INSTANCE_OFFLINE = [constants.ADMINST_OFFLINE]
  74 INSTANCE_ONLINE = [constants.ADMINST_DOWN, constants.ADMINST_UP]
  75 INSTANCE_NOT_RUNNING = [constants.ADMINST_DOWN, constants.ADMINST_OFFLINE]
  76
  77
  78 class ResultWithJobs:
  79   """Data container for LU results with jobs.
  80
  81   Instances of this class returned from L{LogicalUnit.Exec} will be recognized
  82   by L{mcpu.Processor._ProcessResult}. The latter will then submit the jobs
  83   contained in the C{jobs} attribute and include the job IDs in the opcode
  84   result.
  85
  86   """
  87   def __init__(self, jobs, **kwargs):
  88     """Initializes this class.
  89
  90     Additional return values can be specified as keyword arguments.
  91
  92     @type jobs: list of lists of L{opcode.OpCode}
  93     @param jobs: A list of lists of opcode objects
  94
  95     """
  96     self.jobs = jobs
  97     self.other = kwargs
  98
  99
 100 class LogicalUnit(object):
 101   """Logical Unit base class.
 102
 103   Subclasses must follow these rules:
 104     - implement ExpandNames
 105     - implement CheckPrereq (except when tasklets are used)
 106     - implement Exec (except when tasklets are used)
 107     - implement BuildHooksEnv
 108     - implement BuildHooksNodes
 109     - redefine HPATH and HTYPE
 110     - optionally redefine their run requirements:
 111         REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
 112
 113   Note that all commands require root permissions.
 114
 115   @ivar dry_run_result: the value (if any) that will be returned to the caller
 116       in dry-run mode (signalled by opcode dry_run parameter)
 117
 118   """
 119   HPATH = None
 120   HTYPE = None
 121   REQ_BGL = True
 122
 123   def __init__(self, processor, op, context, rpc_runner):
 124     """Constructor for LogicalUnit.
 125
 126     This needs to be overridden in derived classes in order to check op
 127     validity.
 128
 129     """
 130     self.proc = processor
 131     self.op = op
 132     self.cfg = context.cfg
 133     self.glm = context.glm
 134     # readability alias
 135     self.owned_locks = context.glm.list_owned
 136     self.context = context
 137     self.rpc = rpc_runner
 138     # Dicts used to declare locking needs to mcpu
 139     self.needed_locks = None
 140     self.share_locks = dict.fromkeys(locking.LEVELS, 0)
 141     self.add_locks = {}
 142     self.remove_locks = {}
 143     # Used to force good behavior when calling helper functions
 144     self.recalculate_locks = {}
 145     # logging
 146     self.Log = processor.Log # pylint: disable=C0103
 147     self.LogWarning = processor.LogWarning # pylint: disable=C0103
 148     self.LogInfo = processor.LogInfo # pylint: disable=C0103
 149     self.LogStep = processor.LogStep # pylint: disable=C0103
 150     # support for dry-run
 151     self.dry_run_result = None
 152     # support for generic debug attribute
 153     if (not hasattr(self.op, "debug_level") or
 154         not isinstance(self.op.debug_level, int)):
 155       self.op.debug_level = 0
 156
 157     # Tasklets
 158     self.tasklets = None
 159
 160     # Validate opcode parameters and set defaults
 161     self.op.Validate(True)
 162
 163     self.CheckArguments()
 164
 165   def CheckArguments(self):
 166     """Check syntactic validity for the opcode arguments.
 167
 168     This method is for doing a simple syntactic check and ensure
 169     validity of opcode parameters, without any cluster-related
 170     checks. While the same can be accomplished in ExpandNames and/or
 171     CheckPrereq, doing these separate is better because:
 172
 173       - ExpandNames is left as as purely a lock-related function
 174       - CheckPrereq is run after we have acquired locks (and possible
 175         waited for them)
 176
 177     The function is allowed to change the self.op attribute so that
 178     later methods can no longer worry about missing parameters.
 179
 180     """
 181     pass
 182
 183   def ExpandNames(self):
 184     """Expand names for this LU.
 185
 186     This method is called before starting to execute the opcode, and it should
 187     update all the parameters of the opcode to their canonical form (e.g. a
 188     short node name must be fully expanded after this method has successfully
 189     completed). This way locking, hooks, logging, etc. can work correctly.
 190
 191     LUs which implement this method must also populate the self.needed_locks
 192     member, as a dict with lock levels as keys, and a list of needed lock names
 193     as values. Rules:
 194
 195       - use an empty dict if you don't need any lock
 196       - if you don't need any lock at a particular level omit that level
 197       - don't put anything for the BGL level
 198       - if you want all locks at a level use locking.ALL_SET as a value
 199
 200     If you need to share locks (rather than acquire them exclusively) at one
 201     level you can modify self.share_locks, setting a true value (usually 1) for
 202     that level. By default locks are not shared.
 203
 204     This function can also define a list of tasklets, which then will be
 205     executed in order instead of the usual LU-level CheckPrereq and Exec
 206     functions, if those are not defined by the LU.
 207
 208     Examples::
 209
 210       # Acquire all nodes and one instance
 211       self.needed_locks = {
 212         locking.LEVEL_NODE: locking.ALL_SET,
 213         locking.LEVEL_INSTANCE: ['instance1.example.com'],
 214       }
 215       # Acquire just two nodes
 216       self.needed_locks = {
 217         locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
 218       }
 219       # Acquire no locks
 220       self.needed_locks = {} # No, you can't leave it to the default value None
 221
 222     """
 223     # The implementation of this method is mandatory only if the new LU is
 224     # concurrent, so that old LUs don't need to be changed all at the same
 225     # time.
 226     if self.REQ_BGL:
 227       self.needed_locks = {} # Exclusive LUs don't need locks.
 228     else:
 229       raise NotImplementedError
 230
 231   def DeclareLocks(self, level):
 232     """Declare LU locking needs for a level
 233
 234     While most LUs can just declare their locking needs at ExpandNames time,
 235     sometimes there's the need to calculate some locks after having acquired
 236     the ones before. This function is called just before acquiring locks at a
 237     particular level, but after acquiring the ones at lower levels, and permits
 238     such calculations. It can be used to modify self.needed_locks, and by
 239     default it does nothing.
 240
 241     This function is only called if you have something already set in
 242     self.needed_locks for the level.
 243
 244     @param level: Locking level which is going to be locked
 245     @type level: member of ganeti.locking.LEVELS
 246
 247     """
 248
 249   def CheckPrereq(self):
 250     """Check prerequisites for this LU.
 251
 252     This method should check that the prerequisites for the execution
 253     of this LU are fulfilled. It can do internode communication, but
 254     it should be idempotent - no cluster or system changes are
 255     allowed.
 256
 257     The method should raise errors.OpPrereqError in case something is
 258     not fulfilled. Its return value is ignored.
 259
 260     This method should also update all the parameters of the opcode to
 261     their canonical form if it hasn't been done by ExpandNames before.
 262
 263     """
 264     if self.tasklets is not None:
 265       for (idx, tl) in enumerate(self.tasklets):
 266         logging.debug("Checking prerequisites for tasklet %s/%s",
 267                       idx + 1, len(self.tasklets))
 268         tl.CheckPrereq()
 269     else:
 270       pass
 271
 272   def Exec(self, feedback_fn):
 273     """Execute the LU.
 274
 275     This method should implement the actual work. It should raise
 276     errors.OpExecError for failures that are somewhat dealt with in
 277     code, or expected.
 278
 279     """
 280     if self.tasklets is not None:
 281       for (idx, tl) in enumerate(self.tasklets):
 282         logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
 283         tl.Exec(feedback_fn)
 284     else:
 285       raise NotImplementedError
 286
 287   def BuildHooksEnv(self):
 288     """Build hooks environment for this LU.
 289
 290     @rtype: dict
 291     @return: Dictionary containing the environment that will be used for
 292       running the hooks for this LU. The keys of the dict must not be prefixed
 293       with "GANETI_"--that'll be added by the hooks runner. The hooks runner
 294       will extend the environment with additional variables. If no environment
 295       should be defined, an empty dictionary should be returned (not C{None}).
 296     @note: If the C{HPATH} attribute of the LU class is C{None}, this function
 297       will not be called.
 298
 299     """
 300     raise NotImplementedError
 301
 302   def BuildHooksNodes(self):
 303     """Build list of nodes to run LU's hooks.
 304
 305     @rtype: tuple; (list, list)
 306     @return: Tuple containing a list of node names on which the hook
 307       should run before the execution and a list of node names on which the
 308       hook should run after the execution. No nodes should be returned as an
 309       empty list (and not None).
 310     @note: If the C{HPATH} attribute of the LU class is C{None}, this function
 311       will not be called.
 312
 313     """
 314     raise NotImplementedError
 315
 316   def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
 317     """Notify the LU about the results of its hooks.
 318
 319     This method is called every time a hooks phase is executed, and notifies
 320     the Logical Unit about the hooks' result. The LU can then use it to alter
 321     its result based on the hooks.  By default the method does nothing and the
 322     previous result is passed back unchanged but any LU can define it if it
 323     wants to use the local cluster hook-scripts somehow.
 324
 325     @param phase: one of L{constants.HOOKS_PHASE_POST} or
 326         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
 327     @param hook_results: the results of the multi-node hooks rpc call
 328     @param feedback_fn: function used send feedback back to the caller
 329     @param lu_result: the previous Exec result this LU had, or None
 330         in the PRE phase
 331     @return: the new Exec result, based on the previous result
 332         and hook results
 333
 334     """
 335     # API must be kept, thus we ignore the unused argument and could
 336     # be a function warnings
 337     # pylint: disable=W0613,R0201
 338     return lu_result
 339
 340   def _ExpandAndLockInstance(self):
 341     """Helper function to expand and lock an instance.
 342
 343     Many LUs that work on an instance take its name in self.op.instance_name
 344     and need to expand it and then declare the expanded name for locking. This
 345     function does it, and then updates self.op.instance_name to the expanded
 346     name. It also initializes needed_locks as a dict, if this hasn't been done
 347     before.
 348
 349     """
 350     if self.needed_locks is None:
 351       self.needed_locks = {}
 352     else:
 353       assert locking.LEVEL_INSTANCE not in self.needed_locks, \
 354         "_ExpandAndLockInstance called with instance-level locks set"
 355     self.op.instance_name = _ExpandInstanceName(self.cfg,
 356                                                 self.op.instance_name)
 357     self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
 358
 359   def _LockInstancesNodes(self, primary_only=False,
 360                           level=locking.LEVEL_NODE):
 361     """Helper function to declare instances' nodes for locking.
 362
 363     This function should be called after locking one or more instances to lock
 364     their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
 365     with all primary or secondary nodes for instances already locked and
 366     present in self.needed_locks[locking.LEVEL_INSTANCE].
 367
 368     It should be called from DeclareLocks, and for safety only works if
 369     self.recalculate_locks[locking.LEVEL_NODE] is set.
 370
 371     In the future it may grow parameters to just lock some instance's nodes, or
 372     to just lock primaries or secondary nodes, if needed.
 373
 374     If should be called in DeclareLocks in a way similar to::
 375
 376       if level == locking.LEVEL_NODE:
 377         self._LockInstancesNodes()
 378
 379     @type primary_only: boolean
 380     @param primary_only: only lock primary nodes of locked instances
 381     @param level: Which lock level to use for locking nodes
 382
 383     """
 384     assert level in self.recalculate_locks, \
 385       "_LockInstancesNodes helper function called with no nodes to recalculate"
 386
 387     # TODO: check if we're really been called with the instance locks held
 388
 389     # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
 390     # future we might want to have different behaviors depending on the value
 391     # of self.recalculate_locks[locking.LEVEL_NODE]
 392     wanted_nodes = []
 393     locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
 394     for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
 395       wanted_nodes.append(instance.primary_node)
 396       if not primary_only:
 397         wanted_nodes.extend(instance.secondary_nodes)
 398
 399     if self.recalculate_locks[level] == constants.LOCKS_REPLACE:
 400       self.needed_locks[level] = wanted_nodes
 401     elif self.recalculate_locks[level] == constants.LOCKS_APPEND:
 402       self.needed_locks[level].extend(wanted_nodes)
 403     else:
 404       raise errors.ProgrammerError("Unknown recalculation mode")
 405
 406     del self.recalculate_locks[level]
 407
 408
 409 class NoHooksLU(LogicalUnit): # pylint: disable=W0223
 410   """Simple LU which runs no hooks.
 411
 412   This LU is intended as a parent for other LogicalUnits which will
 413   run no hooks, in order to reduce duplicate code.
 414
 415   """
 416   HPATH = None
 417   HTYPE = None
 418
 419   def BuildHooksEnv(self):
 420     """Empty BuildHooksEnv for NoHooksLu.
 421
 422     This just raises an error.
 423
 424     """
 425     raise AssertionError("BuildHooksEnv called for NoHooksLUs")
 426
 427   def BuildHooksNodes(self):
 428     """Empty BuildHooksNodes for NoHooksLU.
 429
 430     """
 431     raise AssertionError("BuildHooksNodes called for NoHooksLU")
 432
 433
 434 class Tasklet:
 435   """Tasklet base class.
 436
 437   Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
 438   they can mix legacy code with tasklets. Locking needs to be done in the LU,
 439   tasklets know nothing about locks.
 440
 441   Subclasses must follow these rules:
 442     - Implement CheckPrereq
 443     - Implement Exec
 444
 445   """
 446   def __init__(self, lu):
 447     self.lu = lu
 448
 449     # Shortcuts
 450     self.cfg = lu.cfg
 451     self.rpc = lu.rpc
 452
 453   def CheckPrereq(self):
 454     """Check prerequisites for this tasklets.
 455
 456     This method should check whether the prerequisites for the execution of
 457     this tasklet are fulfilled. It can do internode communication, but it
 458     should be idempotent - no cluster or system changes are allowed.
 459
 460     The method should raise errors.OpPrereqError in case something is not
 461     fulfilled. Its return value is ignored.
 462
 463     This method should also update all parameters to their canonical form if it
 464     hasn't been done before.
 465
 466     """
 467     pass
 468
 469   def Exec(self, feedback_fn):
 470     """Execute the tasklet.
 471
 472     This method should implement the actual work. It should raise
 473     errors.OpExecError for failures that are somewhat dealt with in code, or
 474     expected.
 475
 476     """
 477     raise NotImplementedError
 478
 479
 480 class _QueryBase:
 481   """Base for query utility classes.
 482
 483   """
 484   #: Attribute holding field definitions
 485   FIELDS = None
 486
 487   def __init__(self, qfilter, fields, use_locking):
 488     """Initializes this class.
 489
 490     """
 491     self.use_locking = use_locking
 492
 493     self.query = query.Query(self.FIELDS, fields, qfilter=qfilter,
 494                              namefield="name")
 495     self.requested_data = self.query.RequestedData()
 496     self.names = self.query.RequestedNames()
 497
 498     # Sort only if no names were requested
 499     self.sort_by_name = not self.names
 500
 501     self.do_locking = None
 502     self.wanted = None
 503
 504   def _GetNames(self, lu, all_names, lock_level):
 505     """Helper function to determine names asked for in the query.
 506
 507     """
 508     if self.do_locking:
 509       names = lu.owned_locks(lock_level)
 510     else:
 511       names = all_names
 512
 513     if self.wanted == locking.ALL_SET:
 514       assert not self.names
 515       # caller didn't specify names, so ordering is not important
 516       return utils.NiceSort(names)
 517
 518     # caller specified names and we must keep the same order
 519     assert self.names
 520     assert not self.do_locking or lu.glm.is_owned(lock_level)
 521
 522     missing = set(self.wanted).difference(names)
 523     if missing:
 524       raise errors.OpExecError("Some items were removed before retrieving"
 525                                " their data: %s" % missing)
 526
 527     # Return expanded names
 528     return self.wanted
 529
 530   def ExpandNames(self, lu):
 531     """Expand names for this query.
 532
 533     See L{LogicalUnit.ExpandNames}.
 534
 535     """
 536     raise NotImplementedError()
 537
 538   def DeclareLocks(self, lu, level):
 539     """Declare locks for this query.
 540
 541     See L{LogicalUnit.DeclareLocks}.
 542
 543     """
 544     raise NotImplementedError()
 545
 546   def _GetQueryData(self, lu):
 547     """Collects all data for this query.
 548
 549     @return: Query data object
 550
 551     """
 552     raise NotImplementedError()
 553
 554   def NewStyleQuery(self, lu):
 555     """Collect data and execute query.
 556
 557     """
 558     return query.GetQueryResponse(self.query, self._GetQueryData(lu),
 559                                   sort_by_name=self.sort_by_name)
 560
 561   def OldStyleQuery(self, lu):
 562     """Collect data and execute query.
 563
 564     """
 565     return self.query.OldStyleQuery(self._GetQueryData(lu),
 566                                     sort_by_name=self.sort_by_name)
 567
 568
 569 def _ShareAll():
 570   """Returns a dict declaring all lock levels shared.
 571
 572   """
 573   return dict.fromkeys(locking.LEVELS, 1)
 574
 575
 576 def _MakeLegacyNodeInfo(data):
 577   """Formats the data returned by L{rpc.RpcRunner.call_node_info}.
 578
 579   Converts the data into a single dictionary. This is fine for most use cases,
 580   but some require information from more than one volume group or hypervisor.
 581
 582   """
 583   (bootid, (vg_info, ), (hv_info, )) = data
 584
 585   return utils.JoinDisjointDicts(utils.JoinDisjointDicts(vg_info, hv_info), {
 586     "bootid": bootid,
 587     })
 588
 589
 590 def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups):
 591   """Checks if the owned node groups are still correct for an instance.
 592
 593   @type cfg: L{config.ConfigWriter}
 594   @param cfg: The cluster configuration
 595   @type instance_name: string
 596   @param instance_name: Instance name
 597   @type owned_groups: set or frozenset
 598   @param owned_groups: List of currently owned node groups
 599
 600   """
 601   inst_groups = cfg.GetInstanceNodeGroups(instance_name)
 602
 603   if not owned_groups.issuperset(inst_groups):
 604     raise errors.OpPrereqError("Instance %s's node groups changed since"
 605                                " locks were acquired, current groups are"
 606                                " are '%s', owning groups '%s'; retry the"
 607                                " operation" %
 608                                (instance_name,
 609                                 utils.CommaJoin(inst_groups),
 610                                 utils.CommaJoin(owned_groups)),
 611                                errors.ECODE_STATE)
 612
 613   return inst_groups
 614
 615
 616 def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
 617   """Checks if the instances in a node group are still correct.
 618
 619   @type cfg: L{config.ConfigWriter}
 620   @param cfg: The cluster configuration
 621   @type group_uuid: string
 622   @param group_uuid: Node group UUID
 623   @type owned_instances: set or frozenset
 624   @param owned_instances: List of currently owned instances
 625
 626   """
 627   wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
 628   if owned_instances != wanted_instances:
 629     raise errors.OpPrereqError("Instances in node group '%s' changed since"
 630                                " locks were acquired, wanted '%s', have '%s';"
 631                                " retry the operation" %
 632                                (group_uuid,
 633                                 utils.CommaJoin(wanted_instances),
 634                                 utils.CommaJoin(owned_instances)),
 635                                errors.ECODE_STATE)
 636
 637   return wanted_instances
 638
 639
 640 def _SupportsOob(cfg, node):
 641   """Tells if node supports OOB.
 642
 643   @type cfg: L{config.ConfigWriter}
 644   @param cfg: The cluster configuration
 645   @type node: L{objects.Node}
 646   @param node: The node
 647   @return: The OOB script if supported or an empty string otherwise
 648
 649   """
 650   return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
 651
 652
 653 def _GetWantedNodes(lu, nodes):
 654   """Returns list of checked and expanded node names.
 655
 656   @type lu: L{LogicalUnit}
 657   @param lu: the logical unit on whose behalf we execute
 658   @type nodes: list
 659   @param nodes: list of node names or None for all nodes
 660   @rtype: list
 661   @return: the list of nodes, sorted
 662   @raise errors.ProgrammerError: if the nodes parameter is wrong type
 663
 664   """
 665   if nodes:
 666     return [_ExpandNodeName(lu.cfg, name) for name in nodes]
 667
 668   return utils.NiceSort(lu.cfg.GetNodeList())
 669
 670
 671 def _GetWantedInstances(lu, instances):
 672   """Returns list of checked and expanded instance names.
 673
 674   @type lu: L{LogicalUnit}
 675   @param lu: the logical unit on whose behalf we execute
 676   @type instances: list
 677   @param instances: list of instance names or None for all instances
 678   @rtype: list
 679   @return: the list of instances, sorted
 680   @raise errors.OpPrereqError: if the instances parameter is wrong type
 681   @raise errors.OpPrereqError: if any of the passed instances is not found
 682
 683   """
 684   if instances:
 685     wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
 686   else:
 687     wanted = utils.NiceSort(lu.cfg.GetInstanceList())
 688   return wanted
 689
 690
 691 def _GetUpdatedParams(old_params, update_dict,
 692                       use_default=True, use_none=False):
 693   """Return the new version of a parameter dictionary.
 694
 695   @type old_params: dict
 696   @param old_params: old parameters
 697   @type update_dict: dict
 698   @param update_dict: dict containing new parameter values, or
 699       constants.VALUE_DEFAULT to reset the parameter to its default
 700       value
 701   @param use_default: boolean
 702   @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
 703       values as 'to be deleted' values
 704   @param use_none: boolean
 705   @type use_none: whether to recognise C{None} values as 'to be
 706       deleted' values
 707   @rtype: dict
 708   @return: the new parameter dictionary
 709
 710   """
 711   params_copy = copy.deepcopy(old_params)
 712   for key, val in update_dict.iteritems():
 713     if ((use_default and val == constants.VALUE_DEFAULT) or
 714         (use_none and val is None)):
 715       try:
 716         del params_copy[key]
 717       except KeyError:
 718         pass
 719     else:
 720       params_copy[key] = val
 721   return params_copy
 722
 723
 724 def _UpdateAndVerifySubDict(base, updates, type_check):
 725   """Updates and verifies a dict with sub dicts of the same type.
 726
 727   @param base: The dict with the old data
 728   @param updates: The dict with the new data
 729   @param type_check: Dict suitable to ForceDictType to verify correct types
 730   @returns: A new dict with updated and verified values
 731
 732   """
 733   def fn(old, value):
 734     new = _GetUpdatedParams(old, value)
 735     utils.ForceDictType(new, type_check)
 736     return new
 737
 738   ret = copy.deepcopy(base)
 739   ret.update(dict((key, fn(base.get(key, {}), value))
 740                   for key, value in updates.items()))
 741   return ret
 742
 743
 744 def _MergeAndVerifyHvState(op_input, obj_input):
 745   """Combines the hv state from an opcode with the one of the object
 746
 747   @param op_input: The input dict from the opcode
 748   @param obj_input: The input dict from the objects
 749   @return: The verified and updated dict
 750
 751   """
 752   if op_input:
 753     invalid_hvs = set(op_input) - constants.HYPER_TYPES
 754     if invalid_hvs:
 755       raise errors.OpPrereqError("Invalid hypervisor(s) in hypervisor state:"
 756                                  " %s" % utils.CommaJoin(invalid_hvs),
 757                                  errors.ECODE_INVAL)
 758     if obj_input is None:
 759       obj_input = {}
 760     type_check = constants.HVSTS_PARAMETER_TYPES
 761     return _UpdateAndVerifySubDict(obj_input, op_input, type_check)
 762
 763   return None
 764
 765
 766 def _MergeAndVerifyDiskState(op_input, obj_input):
 767   """Combines the disk state from an opcode with the one of the object
 768
 769   @param op_input: The input dict from the opcode
 770   @param obj_input: The input dict from the objects
 771   @return: The verified and updated dict
 772   """
 773   if op_input:
 774     invalid_dst = set(op_input) - constants.DS_VALID_TYPES
 775     if invalid_dst:
 776       raise errors.OpPrereqError("Invalid storage type(s) in disk state: %s" %
 777                                  utils.CommaJoin(invalid_dst),
 778                                  errors.ECODE_INVAL)
 779     type_check = constants.DSS_PARAMETER_TYPES
 780     if obj_input is None:
 781       obj_input = {}
 782     return dict((key, _UpdateAndVerifySubDict(obj_input.get(key, {}), value,
 783                                               type_check))
 784                 for key, value in op_input.items())
 785
 786   return None
 787
 788
 789 def _ReleaseLocks(lu, level, names=None, keep=None):
 790   """Releases locks owned by an LU.
 791
 792   @type lu: L{LogicalUnit}
 793   @param level: Lock level
 794   @type names: list or None
 795   @param names: Names of locks to release
 796   @type keep: list or None
 797   @param keep: Names of locks to retain
 798
 799   """
 800   assert not (keep is not None and names is not None), \
 801          "Only one of the 'names' and the 'keep' parameters can be given"
 802
 803   if names is not None:
 804     should_release = names.__contains__
 805   elif keep:
 806     should_release = lambda name: name not in keep
 807   else:
 808     should_release = None
 809
 810   owned = lu.owned_locks(level)
 811   if not owned:
 812     # Not owning any lock at this level, do nothing
 813     pass
 814
 815   elif should_release:
 816     retain = []
 817     release = []
 818
 819     # Determine which locks to release
 820     for name in owned:
 821       if should_release(name):
 822         release.append(name)
 823       else:
 824         retain.append(name)
 825
 826     assert len(lu.owned_locks(level)) == (len(retain) + len(release))
 827
 828     # Release just some locks
 829     lu.glm.release(level, names=release)
 830
 831     assert frozenset(lu.owned_locks(level)) == frozenset(retain)
 832   else:
 833     # Release everything
 834     lu.glm.release(level)
 835
 836     assert not lu.glm.is_owned(level), "No locks should be owned"
 837
 838
 839 def _MapInstanceDisksToNodes(instances):
 840   """Creates a map from (node, volume) to instance name.
 841
 842   @type instances: list of L{objects.Instance}
 843   @rtype: dict; tuple of (node name, volume name) as key, instance name as value
 844
 845   """
 846   return dict(((node, vol), inst.name)
 847               for inst in instances
 848               for (node, vols) in inst.MapLVsByNode().items()
 849               for vol in vols)
 850
 851
 852 def _RunPostHook(lu, node_name):
 853   """Runs the post-hook for an opcode on a single node.
 854
 855   """
 856   hm = lu.proc.BuildHooksManager(lu)
 857   try:
 858     hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
 859   except:
 860     # pylint: disable=W0702
 861     lu.LogWarning("Errors occurred running hooks on %s" % node_name)
 862
 863
 864 def _CheckOutputFields(static, dynamic, selected):
 865   """Checks whether all selected fields are valid.
 866
 867   @type static: L{utils.FieldSet}
 868   @param static: static fields set
 869   @type dynamic: L{utils.FieldSet}
 870   @param dynamic: dynamic fields set
 871
 872   """
 873   f = utils.FieldSet()
 874   f.Extend(static)
 875   f.Extend(dynamic)
 876
 877   delta = f.NonMatching(selected)
 878   if delta:
 879     raise errors.OpPrereqError("Unknown output fields selected: %s"
 880                                % ",".join(delta), errors.ECODE_INVAL)
 881
 882
 883 def _CheckGlobalHvParams(params):
 884   """Validates that given hypervisor params are not global ones.
 885
 886   This will ensure that instances don't get customised versions of
 887   global params.
 888
 889   """
 890   used_globals = constants.HVC_GLOBALS.intersection(params)
 891   if used_globals:
 892     msg = ("The following hypervisor parameters are global and cannot"
 893            " be customized at instance level, please modify them at"
 894            " cluster level: %s" % utils.CommaJoin(used_globals))
 895     raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
 896
 897
 898 def _CheckNodeOnline(lu, node, msg=None):
 899   """Ensure that a given node is online.
 900
 901   @param lu: the LU on behalf of which we make the check
 902   @param node: the node to check
 903   @param msg: if passed, should be a message to replace the default one
 904   @raise errors.OpPrereqError: if the node is offline
 905
 906   """
 907   if msg is None:
 908     msg = "Can't use offline node"
 909   if lu.cfg.GetNodeInfo(node).offline:
 910     raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
 911
 912
 913 def _CheckNodeNotDrained(lu, node):
 914   """Ensure that a given node is not drained.
 915
 916   @param lu: the LU on behalf of which we make the check
 917   @param node: the node to check
 918   @raise errors.OpPrereqError: if the node is drained
 919
 920   """
 921   if lu.cfg.GetNodeInfo(node).drained:
 922     raise errors.OpPrereqError("Can't use drained node %s" % node,
 923                                errors.ECODE_STATE)
 924
 925
 926 def _CheckNodeVmCapable(lu, node):
 927   """Ensure that a given node is vm capable.
 928
 929   @param lu: the LU on behalf of which we make the check
 930   @param node: the node to check
 931   @raise errors.OpPrereqError: if the node is not vm capable
 932
 933   """
 934   if not lu.cfg.GetNodeInfo(node).vm_capable:
 935     raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
 936                                errors.ECODE_STATE)
 937
 938
 939 def _CheckNodeHasOS(lu, node, os_name, force_variant):
 940   """Ensure that a node supports a given OS.
 941
 942   @param lu: the LU on behalf of which we make the check
 943   @param node: the node to check
 944   @param os_name: the OS to query about
 945   @param force_variant: whether to ignore variant errors
 946   @raise errors.OpPrereqError: if the node is not supporting the OS
 947
 948   """
 949   result = lu.rpc.call_os_get(node, os_name)
 950   result.Raise("OS '%s' not in supported OS list for node %s" %
 951                (os_name, node),
 952                prereq=True, ecode=errors.ECODE_INVAL)
 953   if not force_variant:
 954     _CheckOSVariant(result.payload, os_name)
 955
 956
 957 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
 958   """Ensure that a node has the given secondary ip.
 959
 960   @type lu: L{LogicalUnit}
 961   @param lu: the LU on behalf of which we make the check
 962   @type node: string
 963   @param node: the node to check
 964   @type secondary_ip: string
 965   @param secondary_ip: the ip to check
 966   @type prereq: boolean
 967   @param prereq: whether to throw a prerequisite or an execute error
 968   @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
 969   @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
 970
 971   """
 972   result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
 973   result.Raise("Failure checking secondary ip on node %s" % node,
 974                prereq=prereq, ecode=errors.ECODE_ENVIRON)
 975   if not result.payload:
 976     msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
 977            " please fix and re-run this command" % secondary_ip)
 978     if prereq:
 979       raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
 980     else:
 981       raise errors.OpExecError(msg)
 982
 983
 984 def _GetClusterDomainSecret():
 985   """Reads the cluster domain secret.
 986
 987   """
 988   return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
 989                                strict=True)
 990
 991
 992 def _CheckInstanceState(lu, instance, req_states, msg=None):
 993   """Ensure that an instance is in one of the required states.
 994
 995   @param lu: the LU on behalf of which we make the check
 996   @param instance: the instance to check
 997   @param msg: if passed, should be a message to replace the default one
 998   @raise errors.OpPrereqError: if the instance is not in the required state
 999
1000   """
1001   if msg is None:
1002     msg = "can't use instance from outside %s states" % ", ".join(req_states)
1003   if instance.admin_state not in req_states:
1004     raise errors.OpPrereqError("Instance %s is marked to be %s, %s" %
1005                                (instance, instance.admin_state, msg),
1006                                errors.ECODE_STATE)
1007
1008   if constants.ADMINST_UP not in req_states:
1009     pnode = instance.primary_node
1010     ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
1011     ins_l.Raise("Can't contact node %s for instance information" % pnode,
1012                 prereq=True, ecode=errors.ECODE_ENVIRON)
1013
1014     if instance.name in ins_l.payload:
1015       raise errors.OpPrereqError("Instance %s is running, %s" %
1016                                  (instance.name, msg), errors.ECODE_STATE)
1017
1018
1019 def _ExpandItemName(fn, name, kind):
1020   """Expand an item name.
1021
1022   @param fn: the function to use for expansion
1023   @param name: requested item name
1024   @param kind: text description ('Node' or 'Instance')
1025   @return: the resolved (full) name
1026   @raise errors.OpPrereqError: if the item is not found
1027
1028   """
1029   full_name = fn(name)
1030   if full_name is None:
1031     raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
1032                                errors.ECODE_NOENT)
1033   return full_name
1034
1035
1036 def _ExpandNodeName(cfg, name):
1037   """Wrapper over L{_ExpandItemName} for nodes."""
1038   return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
1039
1040
1041 def _ExpandInstanceName(cfg, name):
1042   """Wrapper over L{_ExpandItemName} for instance."""
1043   return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
1044
1045
1046 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
1047                           minmem, maxmem, vcpus, nics, disk_template, disks,
1048                           bep, hvp, hypervisor_name, tags):
1049   """Builds instance related env variables for hooks
1050
1051   This builds the hook environment from individual variables.
1052
1053   @type name: string
1054   @param name: the name of the instance
1055   @type primary_node: string
1056   @param primary_node: the name of the instance's primary node
1057   @type secondary_nodes: list
1058   @param secondary_nodes: list of secondary nodes as strings
1059   @type os_type: string
1060   @param os_type: the name of the instance's OS
1061   @type status: string
1062   @param status: the desired status of the instance
1063   @type minmem: string
1064   @param minmem: the minimum memory size of the instance
1065   @type maxmem: string
1066   @param maxmem: the maximum memory size of the instance
1067   @type vcpus: string
1068   @param vcpus: the count of VCPUs the instance has
1069   @type nics: list
1070   @param nics: list of tuples (ip, mac, mode, link) representing
1071       the NICs the instance has
1072   @type disk_template: string
1073   @param disk_template: the disk template of the instance
1074   @type disks: list
1075   @param disks: the list of (size, mode) pairs
1076   @type bep: dict
1077   @param bep: the backend parameters for the instance
1078   @type hvp: dict
1079   @param hvp: the hypervisor parameters for the instance
1080   @type hypervisor_name: string
1081   @param hypervisor_name: the hypervisor for the instance
1082   @type tags: list
1083   @param tags: list of instance tags as strings
1084   @rtype: dict
1085   @return: the hook environment for this instance
1086
1087   """
1088   env = {
1089     "OP_TARGET": name,
1090     "INSTANCE_NAME": name,
1091     "INSTANCE_PRIMARY": primary_node,
1092     "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
1093     "INSTANCE_OS_TYPE": os_type,
1094     "INSTANCE_STATUS": status,
1095     "INSTANCE_MINMEM": minmem,
1096     "INSTANCE_MAXMEM": maxmem,
1097     # TODO(2.7) remove deprecated "memory" value
1098     "INSTANCE_MEMORY": maxmem,
1099     "INSTANCE_VCPUS": vcpus,
1100     "INSTANCE_DISK_TEMPLATE": disk_template,
1101     "INSTANCE_HYPERVISOR": hypervisor_name,
1102   }
1103   if nics:
1104     nic_count = len(nics)
1105     for idx, (ip, mac, mode, link) in enumerate(nics):
1106       if ip is None:
1107         ip = ""
1108       env["INSTANCE_NIC%d_IP" % idx] = ip
1109       env["INSTANCE_NIC%d_MAC" % idx] = mac
1110       env["INSTANCE_NIC%d_MODE" % idx] = mode
1111       env["INSTANCE_NIC%d_LINK" % idx] = link
1112       if mode == constants.NIC_MODE_BRIDGED:
1113         env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1114   else:
1115     nic_count = 0
1116
1117   env["INSTANCE_NIC_COUNT"] = nic_count
1118
1119   if disks:
1120     disk_count = len(disks)
1121     for idx, (size, mode) in enumerate(disks):
1122       env["INSTANCE_DISK%d_SIZE" % idx] = size
1123       env["INSTANCE_DISK%d_MODE" % idx] = mode
1124   else:
1125     disk_count = 0
1126
1127   env["INSTANCE_DISK_COUNT"] = disk_count
1128
1129   if not tags:
1130     tags = []
1131
1132   env["INSTANCE_TAGS"] = " ".join(tags)
1133
1134   for source, kind in [(bep, "BE"), (hvp, "HV")]:
1135     for key, value in source.items():
1136       env["INSTANCE_%s_%s" % (kind, key)] = value
1137
1138   return env
1139
1140
1141 def _NICListToTuple(lu, nics):
1142   """Build a list of nic information tuples.
1143
1144   This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1145   value in LUInstanceQueryData.
1146
1147   @type lu:  L{LogicalUnit}
1148   @param lu: the logical unit on whose behalf we execute
1149   @type nics: list of L{objects.NIC}
1150   @param nics: list of nics to convert to hooks tuples
1151
1152   """
1153   hooks_nics = []
1154   cluster = lu.cfg.GetClusterInfo()
1155   for nic in nics:
1156     ip = nic.ip
1157     mac = nic.mac
1158     filled_params = cluster.SimpleFillNIC(nic.nicparams)
1159     mode = filled_params[constants.NIC_MODE]
1160     link = filled_params[constants.NIC_LINK]
1161     hooks_nics.append((ip, mac, mode, link))
1162   return hooks_nics
1163
1164
1165 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1166   """Builds instance related env variables for hooks from an object.
1167
1168   @type lu: L{LogicalUnit}
1169   @param lu: the logical unit on whose behalf we execute
1170   @type instance: L{objects.Instance}
1171   @param instance: the instance for which we should build the
1172       environment
1173   @type override: dict
1174   @param override: dictionary with key/values that will override
1175       our values
1176   @rtype: dict
1177   @return: the hook environment dictionary
1178
1179   """
1180   cluster = lu.cfg.GetClusterInfo()
1181   bep = cluster.FillBE(instance)
1182   hvp = cluster.FillHV(instance)
1183   args = {
1184     "name": instance.name,
1185     "primary_node": instance.primary_node,
1186     "secondary_nodes": instance.secondary_nodes,
1187     "os_type": instance.os,
1188     "status": instance.admin_state,
1189     "maxmem": bep[constants.BE_MAXMEM],
1190     "minmem": bep[constants.BE_MINMEM],
1191     "vcpus": bep[constants.BE_VCPUS],
1192     "nics": _NICListToTuple(lu, instance.nics),
1193     "disk_template": instance.disk_template,
1194     "disks": [(disk.size, disk.mode) for disk in instance.disks],
1195     "bep": bep,
1196     "hvp": hvp,
1197     "hypervisor_name": instance.hypervisor,
1198     "tags": instance.tags,
1199   }
1200   if override:
1201     args.update(override)
1202   return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1203
1204
1205 def _AdjustCandidatePool(lu, exceptions):
1206   """Adjust the candidate pool after node operations.
1207
1208   """
1209   mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1210   if mod_list:
1211     lu.LogInfo("Promoted nodes to master candidate role: %s",
1212                utils.CommaJoin(node.name for node in mod_list))
1213     for name in mod_list:
1214       lu.context.ReaddNode(name)
1215   mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1216   if mc_now > mc_max:
1217     lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1218                (mc_now, mc_max))
1219
1220
1221 def _DecideSelfPromotion(lu, exceptions=None):
1222   """Decide whether I should promote myself as a master candidate.
1223
1224   """
1225   cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1226   mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1227   # the new node will increase mc_max with one, so:
1228   mc_should = min(mc_should + 1, cp_size)
1229   return mc_now < mc_should
1230
1231
1232 def _CalculateGroupIPolicy(cfg, group):
1233   """Calculate instance policy for group.
1234
1235   """
1236   cluster = cfg.GetClusterInfo()
1237   return cluster.SimpleFillIPolicy(group.ipolicy)
1238
1239
1240 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1241   """Check that the brigdes needed by a list of nics exist.
1242
1243   """
1244   cluster = lu.cfg.GetClusterInfo()
1245   paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1246   brlist = [params[constants.NIC_LINK] for params in paramslist
1247             if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1248   if brlist:
1249     result = lu.rpc.call_bridges_exist(target_node, brlist)
1250     result.Raise("Error checking bridges on destination node '%s'" %
1251                  target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1252
1253
1254 def _CheckInstanceBridgesExist(lu, instance, node=None):
1255   """Check that the brigdes needed by an instance exist.
1256
1257   """
1258   if node is None:
1259     node = instance.primary_node
1260   _CheckNicsBridgesExist(lu, instance.nics, node)
1261
1262
1263 def _CheckOSVariant(os_obj, name):
1264   """Check whether an OS name conforms to the os variants specification.
1265
1266   @type os_obj: L{objects.OS}
1267   @param os_obj: OS object to check
1268   @type name: string
1269   @param name: OS name passed by the user, to check for validity
1270
1271   """
1272   variant = objects.OS.GetVariant(name)
1273   if not os_obj.supported_variants:
1274     if variant:
1275       raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1276                                  " passed)" % (os_obj.name, variant),
1277                                  errors.ECODE_INVAL)
1278     return
1279   if not variant:
1280     raise errors.OpPrereqError("OS name must include a variant",
1281                                errors.ECODE_INVAL)
1282
1283   if variant not in os_obj.supported_variants:
1284     raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1285
1286
1287 def _GetNodeInstancesInner(cfg, fn):
1288   return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1289
1290
1291 def _GetNodeInstances(cfg, node_name):
1292   """Returns a list of all primary and secondary instances on a node.
1293
1294   """
1295
1296   return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1297
1298
1299 def _GetNodePrimaryInstances(cfg, node_name):
1300   """Returns primary instances on a node.
1301
1302   """
1303   return _GetNodeInstancesInner(cfg,
1304                                 lambda inst: node_name == inst.primary_node)
1305
1306
1307 def _GetNodeSecondaryInstances(cfg, node_name):
1308   """Returns secondary instances on a node.
1309
1310   """
1311   return _GetNodeInstancesInner(cfg,
1312                                 lambda inst: node_name in inst.secondary_nodes)
1313
1314
1315 def _GetStorageTypeArgs(cfg, storage_type):
1316   """Returns the arguments for a storage type.
1317
1318   """
1319   # Special case for file storage
1320   if storage_type == constants.ST_FILE:
1321     # storage.FileStorage wants a list of storage directories
1322     return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1323
1324   return []
1325
1326
1327 def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
1328   faulty = []
1329
1330   for dev in instance.disks:
1331     cfg.SetDiskID(dev, node_name)
1332
1333   result = rpc_runner.call_blockdev_getmirrorstatus(node_name, instance.disks)
1334   result.Raise("Failed to get disk status from node %s" % node_name,
1335                prereq=prereq, ecode=errors.ECODE_ENVIRON)
1336
1337   for idx, bdev_status in enumerate(result.payload):
1338     if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1339       faulty.append(idx)
1340
1341   return faulty
1342
1343
1344 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1345   """Check the sanity of iallocator and node arguments and use the
1346   cluster-wide iallocator if appropriate.
1347
1348   Check that at most one of (iallocator, node) is specified. If none is
1349   specified, then the LU's opcode's iallocator slot is filled with the
1350   cluster-wide default iallocator.
1351
1352   @type iallocator_slot: string
1353   @param iallocator_slot: the name of the opcode iallocator slot
1354   @type node_slot: string
1355   @param node_slot: the name of the opcode target node slot
1356
1357   """
1358   node = getattr(lu.op, node_slot, None)
1359   iallocator = getattr(lu.op, iallocator_slot, None)
1360
1361   if node is not None and iallocator is not None:
1362     raise errors.OpPrereqError("Do not specify both, iallocator and node",
1363                                errors.ECODE_INVAL)
1364   elif node is None and iallocator is None:
1365     default_iallocator = lu.cfg.GetDefaultIAllocator()
1366     if default_iallocator:
1367       setattr(lu.op, iallocator_slot, default_iallocator)
1368     else:
1369       raise errors.OpPrereqError("No iallocator or node given and no"
1370                                  " cluster-wide default iallocator found;"
1371                                  " please specify either an iallocator or a"
1372                                  " node, or set a cluster-wide default"
1373                                  " iallocator")
1374
1375
1376 def _GetDefaultIAllocator(cfg, iallocator):
1377   """Decides on which iallocator to use.
1378
1379   @type cfg: L{config.ConfigWriter}
1380   @param cfg: Cluster configuration object
1381   @type iallocator: string or None
1382   @param iallocator: Iallocator specified in opcode
1383   @rtype: string
1384   @return: Iallocator name
1385
1386   """
1387   if not iallocator:
1388     # Use default iallocator
1389     iallocator = cfg.GetDefaultIAllocator()
1390
1391   if not iallocator:
1392     raise errors.OpPrereqError("No iallocator was specified, neither in the"
1393                                " opcode nor as a cluster-wide default",
1394                                errors.ECODE_INVAL)
1395
1396   return iallocator
1397
1398
1399 class LUClusterPostInit(LogicalUnit):
1400   """Logical unit for running hooks after cluster initialization.
1401
1402   """
1403   HPATH = "cluster-init"
1404   HTYPE = constants.HTYPE_CLUSTER
1405
1406   def BuildHooksEnv(self):
1407     """Build hooks env.
1408
1409     """
1410     return {
1411       "OP_TARGET": self.cfg.GetClusterName(),
1412       }
1413
1414   def BuildHooksNodes(self):
1415     """Build hooks nodes.
1416
1417     """
1418     return ([], [self.cfg.GetMasterNode()])
1419
1420   def Exec(self, feedback_fn):
1421     """Nothing to do.
1422
1423     """
1424     return True
1425
1426
1427 class LUClusterDestroy(LogicalUnit):
1428   """Logical unit for destroying the cluster.
1429
1430   """
1431   HPATH = "cluster-destroy"
1432   HTYPE = constants.HTYPE_CLUSTER
1433
1434   def BuildHooksEnv(self):
1435     """Build hooks env.
1436
1437     """
1438     return {
1439       "OP_TARGET": self.cfg.GetClusterName(),
1440       }
1441
1442   def BuildHooksNodes(self):
1443     """Build hooks nodes.
1444
1445     """
1446     return ([], [])
1447
1448   def CheckPrereq(self):
1449     """Check prerequisites.
1450
1451     This checks whether the cluster is empty.
1452
1453     Any errors are signaled by raising errors.OpPrereqError.
1454
1455     """
1456     master = self.cfg.GetMasterNode()
1457
1458     nodelist = self.cfg.GetNodeList()
1459     if len(nodelist) != 1 or nodelist[0] != master:
1460       raise errors.OpPrereqError("There are still %d node(s) in"
1461                                  " this cluster." % (len(nodelist) - 1),
1462                                  errors.ECODE_INVAL)
1463     instancelist = self.cfg.GetInstanceList()
1464     if instancelist:
1465       raise errors.OpPrereqError("There are still %d instance(s) in"
1466                                  " this cluster." % len(instancelist),
1467                                  errors.ECODE_INVAL)
1468
1469   def Exec(self, feedback_fn):
1470     """Destroys the cluster.
1471
1472     """
1473     master_params = self.cfg.GetMasterNetworkParameters()
1474
1475     # Run post hooks on master node before it's removed
1476     _RunPostHook(self, master_params.name)
1477
1478     ems = self.cfg.GetUseExternalMipScript()
1479     result = self.rpc.call_node_deactivate_master_ip(master_params.name,
1480                                                      master_params, ems)
1481     result.Raise("Could not disable the master role")
1482
1483     return master_params.name
1484
1485
1486 def _VerifyCertificate(filename):
1487   """Verifies a certificate for L{LUClusterVerifyConfig}.
1488
1489   @type filename: string
1490   @param filename: Path to PEM file
1491
1492   """
1493   try:
1494     cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1495                                            utils.ReadFile(filename))
1496   except Exception, err: # pylint: disable=W0703
1497     return (LUClusterVerifyConfig.ETYPE_ERROR,
1498             "Failed to load X509 certificate %s: %s" % (filename, err))
1499
1500   (errcode, msg) = \
1501     utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1502                                 constants.SSL_CERT_EXPIRATION_ERROR)
1503
1504   if msg:
1505     fnamemsg = "While verifying %s: %s" % (filename, msg)
1506   else:
1507     fnamemsg = None
1508
1509   if errcode is None:
1510     return (None, fnamemsg)
1511   elif errcode == utils.CERT_WARNING:
1512     return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1513   elif errcode == utils.CERT_ERROR:
1514     return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1515
1516   raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1517
1518
1519 def _GetAllHypervisorParameters(cluster, instances):
1520   """Compute the set of all hypervisor parameters.
1521
1522   @type cluster: L{objects.Cluster}
1523   @param cluster: the cluster object
1524   @param instances: list of L{objects.Instance}
1525   @param instances: additional instances from which to obtain parameters
1526   @rtype: list of (origin, hypervisor, parameters)
1527   @return: a list with all parameters found, indicating the hypervisor they
1528        apply to, and the origin (can be "cluster", "os X", or "instance Y")
1529
1530   """
1531   hvp_data = []
1532
1533   for hv_name in cluster.enabled_hypervisors:
1534     hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1535
1536   for os_name, os_hvp in cluster.os_hvp.items():
1537     for hv_name, hv_params in os_hvp.items():
1538       if hv_params:
1539         full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1540         hvp_data.append(("os %s" % os_name, hv_name, full_params))
1541
1542   # TODO: collapse identical parameter values in a single one
1543   for instance in instances:
1544     if instance.hvparams:
1545       hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1546                        cluster.FillHV(instance)))
1547
1548   return hvp_data
1549
1550
1551 class _VerifyErrors(object):
1552   """Mix-in for cluster/group verify LUs.
1553
1554   It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1555   self.op and self._feedback_fn to be available.)
1556
1557   """
1558
1559   ETYPE_FIELD = "code"
1560   ETYPE_ERROR = "ERROR"
1561   ETYPE_WARNING = "WARNING"
1562
1563   def _Error(self, ecode, item, msg, *args, **kwargs):
1564     """Format an error message.
1565
1566     Based on the opcode's error_codes parameter, either format a
1567     parseable error code, or a simpler error string.
1568
1569     This must be called only from Exec and functions called from Exec.
1570
1571     """
1572     ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1573     itype, etxt, _ = ecode
1574     # first complete the msg
1575     if args:
1576       msg = msg % args
1577     # then format the whole message
1578     if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1579       msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1580     else:
1581       if item:
1582         item = " " + item
1583       else:
1584         item = ""
1585       msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1586     # and finally report it via the feedback_fn
1587     self._feedback_fn("  - %s" % msg) # Mix-in. pylint: disable=E1101
1588
1589   def _ErrorIf(self, cond, ecode, *args, **kwargs):
1590     """Log an error message if the passed condition is True.
1591
1592     """
1593     cond = (bool(cond)
1594             or self.op.debug_simulate_errors) # pylint: disable=E1101
1595
1596     # If the error code is in the list of ignored errors, demote the error to a
1597     # warning
1598     (_, etxt, _) = ecode
1599     if etxt in self.op.ignore_errors:     # pylint: disable=E1101
1600       kwargs[self.ETYPE_FIELD] = self.ETYPE_WARNING
1601
1602     if cond:
1603       self._Error(ecode, *args, **kwargs)
1604
1605     # do not mark the operation as failed for WARN cases only
1606     if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1607       self.bad = self.bad or cond
1608
1609
1610 class LUClusterVerify(NoHooksLU):
1611   """Submits all jobs necessary to verify the cluster.
1612
1613   """
1614   REQ_BGL = False
1615
1616   def ExpandNames(self):
1617     self.needed_locks = {}
1618
1619   def Exec(self, feedback_fn):
1620     jobs = []
1621
1622     if self.op.group_name:
1623       groups = [self.op.group_name]
1624       depends_fn = lambda: None
1625     else:
1626       groups = self.cfg.GetNodeGroupList()
1627
1628       # Verify global configuration
1629       jobs.append([
1630         opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors)
1631         ])
1632
1633       # Always depend on global verification
1634       depends_fn = lambda: [(-len(jobs), [])]
1635
1636     jobs.extend([opcodes.OpClusterVerifyGroup(group_name=group,
1637                                             ignore_errors=self.op.ignore_errors,
1638                                             depends=depends_fn())]
1639                 for group in groups)
1640
1641     # Fix up all parameters
1642     for op in itertools.chain(*jobs): # pylint: disable=W0142
1643       op.debug_simulate_errors = self.op.debug_simulate_errors
1644       op.verbose = self.op.verbose
1645       op.error_codes = self.op.error_codes
1646       try:
1647         op.skip_checks = self.op.skip_checks
1648       except AttributeError:
1649         assert not isinstance(op, opcodes.OpClusterVerifyGroup)
1650
1651     return ResultWithJobs(jobs)
1652
1653
1654 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1655   """Verifies the cluster config.
1656
1657   """
1658   REQ_BGL = True
1659
1660   def _VerifyHVP(self, hvp_data):
1661     """Verifies locally the syntax of the hypervisor parameters.
1662
1663     """
1664     for item, hv_name, hv_params in hvp_data:
1665       msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1666              (item, hv_name))
1667       try:
1668         hv_class = hypervisor.GetHypervisor(hv_name)
1669         utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1670         hv_class.CheckParameterSyntax(hv_params)
1671       except errors.GenericError, err:
1672         self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
1673
1674   def ExpandNames(self):
1675     # Information can be safely retrieved as the BGL is acquired in exclusive
1676     # mode
1677     assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER)
1678     self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1679     self.all_node_info = self.cfg.GetAllNodesInfo()
1680     self.all_inst_info = self.cfg.GetAllInstancesInfo()
1681     self.needed_locks = {}
1682
1683   def Exec(self, feedback_fn):
1684     """Verify integrity of cluster, performing various test on nodes.
1685
1686     """
1687     self.bad = False
1688     self._feedback_fn = feedback_fn
1689
1690     feedback_fn("* Verifying cluster config")
1691
1692     for msg in self.cfg.VerifyConfig():
1693       self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
1694
1695     feedback_fn("* Verifying cluster certificate files")
1696
1697     for cert_filename in constants.ALL_CERT_FILES:
1698       (errcode, msg) = _VerifyCertificate(cert_filename)
1699       self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
1700
1701     feedback_fn("* Verifying hypervisor parameters")
1702
1703     self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1704                                                 self.all_inst_info.values()))
1705
1706     feedback_fn("* Verifying all nodes belong to an existing group")
1707
1708     # We do this verification here because, should this bogus circumstance
1709     # occur, it would never be caught by VerifyGroup, which only acts on
1710     # nodes/instances reachable from existing node groups.
1711
1712     dangling_nodes = set(node.name for node in self.all_node_info.values()
1713                          if node.group not in self.all_group_info)
1714
1715     dangling_instances = {}
1716     no_node_instances = []
1717
1718     for inst in self.all_inst_info.values():
1719       if inst.primary_node in dangling_nodes:
1720         dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
1721       elif inst.primary_node not in self.all_node_info:
1722         no_node_instances.append(inst.name)
1723
1724     pretty_dangling = [
1725         "%s (%s)" %
1726         (node.name,
1727          utils.CommaJoin(dangling_instances.get(node.name,
1728                                                 ["no instances"])))
1729         for node in dangling_nodes]
1730
1731     self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
1732                   None,
1733                   "the following nodes (and their instances) belong to a non"
1734                   " existing group: %s", utils.CommaJoin(pretty_dangling))
1735
1736     self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
1737                   None,
1738                   "the following instances have a non-existing primary-node:"
1739                   " %s", utils.CommaJoin(no_node_instances))
1740
1741     return not self.bad
1742
1743
1744 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
1745   """Verifies the status of a node group.
1746
1747   """
1748   HPATH = "cluster-verify"
1749   HTYPE = constants.HTYPE_CLUSTER
1750   REQ_BGL = False
1751
1752   _HOOKS_INDENT_RE = re.compile("^", re.M)
1753
1754   class NodeImage(object):
1755     """A class representing the logical and physical status of a node.
1756
1757     @type name: string
1758     @ivar name: the node name to which this object refers
1759     @ivar volumes: a structure as returned from
1760         L{ganeti.backend.GetVolumeList} (runtime)
1761     @ivar instances: a list of running instances (runtime)
1762     @ivar pinst: list of configured primary instances (config)
1763     @ivar sinst: list of configured secondary instances (config)
1764     @ivar sbp: dictionary of {primary-node: list of instances} for all
1765         instances for which this node is secondary (config)
1766     @ivar mfree: free memory, as reported by hypervisor (runtime)
1767     @ivar dfree: free disk, as reported by the node (runtime)
1768     @ivar offline: the offline status (config)
1769     @type rpc_fail: boolean
1770     @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1771         not whether the individual keys were correct) (runtime)
1772     @type lvm_fail: boolean
1773     @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1774     @type hyp_fail: boolean
1775     @ivar hyp_fail: whether the RPC call didn't return the instance list
1776     @type ghost: boolean
1777     @ivar ghost: whether this is a known node or not (config)
1778     @type os_fail: boolean
1779     @ivar os_fail: whether the RPC call didn't return valid OS data
1780     @type oslist: list
1781     @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1782     @type vm_capable: boolean
1783     @ivar vm_capable: whether the node can host instances
1784
1785     """
1786     def __init__(self, offline=False, name=None, vm_capable=True):
1787       self.name = name
1788       self.volumes = {}
1789       self.instances = []
1790       self.pinst = []
1791       self.sinst = []
1792       self.sbp = {}
1793       self.mfree = 0
1794       self.dfree = 0
1795       self.offline = offline
1796       self.vm_capable = vm_capable
1797       self.rpc_fail = False
1798       self.lvm_fail = False
1799       self.hyp_fail = False
1800       self.ghost = False
1801       self.os_fail = False
1802       self.oslist = {}
1803
1804   def ExpandNames(self):
1805     # This raises errors.OpPrereqError on its own:
1806     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
1807
1808     # Get instances in node group; this is unsafe and needs verification later
1809     inst_names = self.cfg.GetNodeGroupInstances(self.group_uuid)
1810
1811     self.needed_locks = {
1812       locking.LEVEL_INSTANCE: inst_names,
1813       locking.LEVEL_NODEGROUP: [self.group_uuid],
1814       locking.LEVEL_NODE: [],
1815       }
1816
1817     self.share_locks = _ShareAll()
1818
1819   def DeclareLocks(self, level):
1820     if level == locking.LEVEL_NODE:
1821       # Get members of node group; this is unsafe and needs verification later
1822       nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
1823
1824       all_inst_info = self.cfg.GetAllInstancesInfo()
1825
1826       # In Exec(), we warn about mirrored instances that have primary and
1827       # secondary living in separate node groups. To fully verify that
1828       # volumes for these instances are healthy, we will need to do an
1829       # extra call to their secondaries. We ensure here those nodes will
1830       # be locked.
1831       for inst in self.owned_locks(locking.LEVEL_INSTANCE):
1832         # Important: access only the instances whose lock is owned
1833         if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
1834           nodes.update(all_inst_info[inst].secondary_nodes)
1835
1836       self.needed_locks[locking.LEVEL_NODE] = nodes
1837
1838   def CheckPrereq(self):
1839     assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
1840     self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
1841
1842     group_nodes = set(self.group_info.members)
1843     group_instances = self.cfg.GetNodeGroupInstances(self.group_uuid)
1844
1845     unlocked_nodes = \
1846         group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1847
1848     unlocked_instances = \
1849         group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
1850
1851     if unlocked_nodes:
1852       raise errors.OpPrereqError("Missing lock for nodes: %s" %
1853                                  utils.CommaJoin(unlocked_nodes))
1854
1855     if unlocked_instances:
1856       raise errors.OpPrereqError("Missing lock for instances: %s" %
1857                                  utils.CommaJoin(unlocked_instances))
1858
1859     self.all_node_info = self.cfg.GetAllNodesInfo()
1860     self.all_inst_info = self.cfg.GetAllInstancesInfo()
1861
1862     self.my_node_names = utils.NiceSort(group_nodes)
1863     self.my_inst_names = utils.NiceSort(group_instances)
1864
1865     self.my_node_info = dict((name, self.all_node_info[name])
1866                              for name in self.my_node_names)
1867
1868     self.my_inst_info = dict((name, self.all_inst_info[name])
1869                              for name in self.my_inst_names)
1870
1871     # We detect here the nodes that will need the extra RPC calls for verifying
1872     # split LV volumes; they should be locked.
1873     extra_lv_nodes = set()
1874
1875     for inst in self.my_inst_info.values():
1876       if inst.disk_template in constants.DTS_INT_MIRROR:
1877         group = self.my_node_info[inst.primary_node].group
1878         for nname in inst.secondary_nodes:
1879           if self.all_node_info[nname].group != group:
1880             extra_lv_nodes.add(nname)
1881
1882     unlocked_lv_nodes = \
1883         extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1884
1885     if unlocked_lv_nodes:
1886       raise errors.OpPrereqError("these nodes could be locked: %s" %
1887                                  utils.CommaJoin(unlocked_lv_nodes))
1888     self.extra_lv_nodes = list(extra_lv_nodes)
1889
1890   def _VerifyNode(self, ninfo, nresult):
1891     """Perform some basic validation on data returned from a node.
1892
1893       - check the result data structure is well formed and has all the
1894         mandatory fields
1895       - check ganeti version
1896
1897     @type ninfo: L{objects.Node}
1898     @param ninfo: the node to check
1899     @param nresult: the results from the node
1900     @rtype: boolean
1901     @return: whether overall this call was successful (and we can expect
1902          reasonable values in the respose)
1903
1904     """
1905     node = ninfo.name
1906     _ErrorIf = self._ErrorIf # pylint: disable=C0103
1907
1908     # main result, nresult should be a non-empty dict
1909     test = not nresult or not isinstance(nresult, dict)
1910     _ErrorIf(test, constants.CV_ENODERPC, node,
1911                   "unable to verify node: no data returned")
1912     if test:
1913       return False
1914
1915     # compares ganeti version
1916     local_version = constants.PROTOCOL_VERSION
1917     remote_version = nresult.get("version", None)
1918     test = not (remote_version and
1919                 isinstance(remote_version, (list, tuple)) and
1920                 len(remote_version) == 2)
1921     _ErrorIf(test, constants.CV_ENODERPC, node,
1922              "connection to node returned invalid data")
1923     if test:
1924       return False
1925
1926     test = local_version != remote_version[0]
1927     _ErrorIf(test, constants.CV_ENODEVERSION, node,
1928              "incompatible protocol versions: master %s,"
1929              " node %s", local_version, remote_version[0])
1930     if test:
1931       return False
1932
1933     # node seems compatible, we can actually try to look into its results
1934
1935     # full package version
1936     self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1937                   constants.CV_ENODEVERSION, node,
1938                   "software version mismatch: master %s, node %s",
1939                   constants.RELEASE_VERSION, remote_version[1],
1940                   code=self.ETYPE_WARNING)
1941
1942     hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1943     if ninfo.vm_capable and isinstance(hyp_result, dict):
1944       for hv_name, hv_result in hyp_result.iteritems():
1945         test = hv_result is not None
1946         _ErrorIf(test, constants.CV_ENODEHV, node,
1947                  "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1948
1949     hvp_result = nresult.get(constants.NV_HVPARAMS, None)
1950     if ninfo.vm_capable and isinstance(hvp_result, list):
1951       for item, hv_name, hv_result in hvp_result:
1952         _ErrorIf(True, constants.CV_ENODEHV, node,
1953                  "hypervisor %s parameter verify failure (source %s): %s",
1954                  hv_name, item, hv_result)
1955
1956     test = nresult.get(constants.NV_NODESETUP,
1957                        ["Missing NODESETUP results"])
1958     _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
1959              "; ".join(test))
1960
1961     return True
1962
1963   def _VerifyNodeTime(self, ninfo, nresult,
1964                       nvinfo_starttime, nvinfo_endtime):
1965     """Check the node time.
1966
1967     @type ninfo: L{objects.Node}
1968     @param ninfo: the node to check
1969     @param nresult: the remote results for the node
1970     @param nvinfo_starttime: the start time of the RPC call
1971     @param nvinfo_endtime: the end time of the RPC call
1972
1973     """
1974     node = ninfo.name
1975     _ErrorIf = self._ErrorIf # pylint: disable=C0103
1976
1977     ntime = nresult.get(constants.NV_TIME, None)
1978     try:
1979       ntime_merged = utils.MergeTime(ntime)
1980     except (ValueError, TypeError):
1981       _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
1982       return
1983
1984     if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1985       ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1986     elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1987       ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1988     else:
1989       ntime_diff = None
1990
1991     _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
1992              "Node time diverges by at least %s from master node time",
1993              ntime_diff)
1994
1995   def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1996     """Check the node LVM results.
1997
1998     @type ninfo: L{objects.Node}
1999     @param ninfo: the node to check
2000     @param nresult: the remote results for the node
2001     @param vg_name: the configured VG name
2002
2003     """
2004     if vg_name is None:
2005       return
2006
2007     node = ninfo.name
2008     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2009
2010     # checks vg existence and size > 20G
2011     vglist = nresult.get(constants.NV_VGLIST, None)
2012     test = not vglist
2013     _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
2014     if not test:
2015       vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
2016                                             constants.MIN_VG_SIZE)
2017       _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
2018
2019     # check pv names
2020     pvlist = nresult.get(constants.NV_PVLIST, None)
2021     test = pvlist is None
2022     _ErrorIf(test, constants.CV_ENODELVM, node, "Can't get PV list from node")
2023     if not test:
2024       # check that ':' is not present in PV names, since it's a
2025       # special character for lvcreate (denotes the range of PEs to
2026       # use on the PV)
2027       for _, pvname, owner_vg in pvlist:
2028         test = ":" in pvname
2029         _ErrorIf(test, constants.CV_ENODELVM, node,
2030                  "Invalid character ':' in PV '%s' of VG '%s'",
2031                  pvname, owner_vg)
2032
2033   def _VerifyNodeBridges(self, ninfo, nresult, bridges):
2034     """Check the node bridges.
2035
2036     @type ninfo: L{objects.Node}
2037     @param ninfo: the node to check
2038     @param nresult: the remote results for the node
2039     @param bridges: the expected list of bridges
2040
2041     """
2042     if not bridges:
2043       return
2044
2045     node = ninfo.name
2046     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2047
2048     missing = nresult.get(constants.NV_BRIDGES, None)
2049     test = not isinstance(missing, list)
2050     _ErrorIf(test, constants.CV_ENODENET, node,
2051              "did not return valid bridge information")
2052     if not test:
2053       _ErrorIf(bool(missing), constants.CV_ENODENET, node,
2054                "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
2055
2056   def _VerifyNodeUserScripts(self, ninfo, nresult):
2057     """Check the results of user scripts presence and executability on the node
2058
2059     @type ninfo: L{objects.Node}
2060     @param ninfo: the node to check
2061     @param nresult: the remote results for the node
2062
2063     """
2064     node = ninfo.name
2065
2066     test = not constants.NV_USERSCRIPTS in nresult
2067     self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node,
2068                   "did not return user scripts information")
2069
2070     broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None)
2071     if not test:
2072       self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node,
2073                     "user scripts not present or not executable: %s" %
2074                     utils.CommaJoin(sorted(broken_scripts)))
2075
2076   def _VerifyNodeNetwork(self, ninfo, nresult):
2077     """Check the node network connectivity results.
2078
2079     @type ninfo: L{objects.Node}
2080     @param ninfo: the node to check
2081     @param nresult: the remote results for the node
2082
2083     """
2084     node = ninfo.name
2085     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2086
2087     test = constants.NV_NODELIST not in nresult
2088     _ErrorIf(test, constants.CV_ENODESSH, node,
2089              "node hasn't returned node ssh connectivity data")
2090     if not test:
2091       if nresult[constants.NV_NODELIST]:
2092         for a_node, a_msg in nresult[constants.NV_NODELIST].items():
2093           _ErrorIf(True, constants.CV_ENODESSH, node,
2094                    "ssh communication with node '%s': %s", a_node, a_msg)
2095
2096     test = constants.NV_NODENETTEST not in nresult
2097     _ErrorIf(test, constants.CV_ENODENET, node,
2098              "node hasn't returned node tcp connectivity data")
2099     if not test:
2100       if nresult[constants.NV_NODENETTEST]:
2101         nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
2102         for anode in nlist:
2103           _ErrorIf(True, constants.CV_ENODENET, node,
2104                    "tcp communication with node '%s': %s",
2105                    anode, nresult[constants.NV_NODENETTEST][anode])
2106
2107     test = constants.NV_MASTERIP not in nresult
2108     _ErrorIf(test, constants.CV_ENODENET, node,
2109              "node hasn't returned node master IP reachability data")
2110     if not test:
2111       if not nresult[constants.NV_MASTERIP]:
2112         if node == self.master_node:
2113           msg = "the master node cannot reach the master IP (not configured?)"
2114         else:
2115           msg = "cannot reach the master IP"
2116         _ErrorIf(True, constants.CV_ENODENET, node, msg)
2117
2118   def _VerifyInstance(self, instance, instanceconfig, node_image,
2119                       diskstatus):
2120     """Verify an instance.
2121
2122     This function checks to see if the required block devices are
2123     available on the instance's node.
2124
2125     """
2126     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2127     node_current = instanceconfig.primary_node
2128
2129     node_vol_should = {}
2130     instanceconfig.MapLVsByNode(node_vol_should)
2131
2132     for node in node_vol_should:
2133       n_img = node_image[node]
2134       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2135         # ignore missing volumes on offline or broken nodes
2136         continue
2137       for volume in node_vol_should[node]:
2138         test = volume not in n_img.volumes
2139         _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
2140                  "volume %s missing on node %s", volume, node)
2141
2142     if instanceconfig.admin_state == constants.ADMINST_UP:
2143       pri_img = node_image[node_current]
2144       test = instance not in pri_img.instances and not pri_img.offline
2145       _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
2146                "instance not running on its primary node %s",
2147                node_current)
2148
2149     diskdata = [(nname, success, status, idx)
2150                 for (nname, disks) in diskstatus.items()
2151                 for idx, (success, status) in enumerate(disks)]
2152
2153     for nname, success, bdev_status, idx in diskdata:
2154       # the 'ghost node' construction in Exec() ensures that we have a
2155       # node here
2156       snode = node_image[nname]
2157       bad_snode = snode.ghost or snode.offline
2158       _ErrorIf(instanceconfig.admin_state == constants.ADMINST_UP and
2159                not success and not bad_snode,
2160                constants.CV_EINSTANCEFAULTYDISK, instance,
2161                "couldn't retrieve status for disk/%s on %s: %s",
2162                idx, nname, bdev_status)
2163       _ErrorIf((instanceconfig.admin_state == constants.ADMINST_UP and
2164                 success and bdev_status.ldisk_status == constants.LDS_FAULTY),
2165                constants.CV_EINSTANCEFAULTYDISK, instance,
2166                "disk/%s on %s is faulty", idx, nname)
2167
2168   def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2169     """Verify if there are any unknown volumes in the cluster.
2170
2171     The .os, .swap and backup volumes are ignored. All other volumes are
2172     reported as unknown.
2173
2174     @type reserved: L{ganeti.utils.FieldSet}
2175     @param reserved: a FieldSet of reserved volume names
2176
2177     """
2178     for node, n_img in node_image.items():
2179       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2180         # skip non-healthy nodes
2181         continue
2182       for volume in n_img.volumes:
2183         test = ((node not in node_vol_should or
2184                 volume not in node_vol_should[node]) and
2185                 not reserved.Matches(volume))
2186         self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
2187                       "volume %s is unknown", volume)
2188
2189   def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2190     """Verify N+1 Memory Resilience.
2191
2192     Check that if one single node dies we can still start all the
2193     instances it was primary for.
2194
2195     """
2196     cluster_info = self.cfg.GetClusterInfo()
2197     for node, n_img in node_image.items():
2198       # This code checks that every node which is now listed as
2199       # secondary has enough memory to host all instances it is
2200       # supposed to should a single other node in the cluster fail.
2201       # FIXME: not ready for failover to an arbitrary node
2202       # FIXME: does not support file-backed instances
2203       # WARNING: we currently take into account down instances as well
2204       # as up ones, considering that even if they're down someone
2205       # might want to start them even in the event of a node failure.
2206       if n_img.offline:
2207         # we're skipping offline nodes from the N+1 warning, since
2208         # most likely we don't have good memory infromation from them;
2209         # we already list instances living on such nodes, and that's
2210         # enough warning
2211         continue
2212       #TODO(dynmem): use MINMEM for checking
2213       #TODO(dynmem): also consider ballooning out other instances
2214       for prinode, instances in n_img.sbp.items():
2215         needed_mem = 0
2216         for instance in instances:
2217           bep = cluster_info.FillBE(instance_cfg[instance])
2218           if bep[constants.BE_AUTO_BALANCE]:
2219             needed_mem += bep[constants.BE_MAXMEM]
2220         test = n_img.mfree < needed_mem
2221         self._ErrorIf(test, constants.CV_ENODEN1, node,
2222                       "not enough memory to accomodate instance failovers"
2223                       " should node %s fail (%dMiB needed, %dMiB available)",
2224                       prinode, needed_mem, n_img.mfree)
2225
2226   @classmethod
2227   def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2228                    (files_all, files_opt, files_mc, files_vm)):
2229     """Verifies file checksums collected from all nodes.
2230
2231     @param errorif: Callback for reporting errors
2232     @param nodeinfo: List of L{objects.Node} objects
2233     @param master_node: Name of master node
2234     @param all_nvinfo: RPC results
2235
2236     """
2237     # Define functions determining which nodes to consider for a file
2238     files2nodefn = [
2239       (files_all, None),
2240       (files_mc, lambda node: (node.master_candidate or
2241                                node.name == master_node)),
2242       (files_vm, lambda node: node.vm_capable),
2243       ]
2244
2245     # Build mapping from filename to list of nodes which should have the file
2246     nodefiles = {}
2247     for (files, fn) in files2nodefn:
2248       if fn is None:
2249         filenodes = nodeinfo
2250       else:
2251         filenodes = filter(fn, nodeinfo)
2252       nodefiles.update((filename,
2253                         frozenset(map(operator.attrgetter("name"), filenodes)))
2254                        for filename in files)
2255
2256     assert set(nodefiles) == (files_all | files_mc | files_vm)
2257
2258     fileinfo = dict((filename, {}) for filename in nodefiles)
2259     ignore_nodes = set()
2260
2261     for node in nodeinfo:
2262       if node.offline:
2263         ignore_nodes.add(node.name)
2264         continue
2265
2266       nresult = all_nvinfo[node.name]
2267
2268       if nresult.fail_msg or not nresult.payload:
2269         node_files = None
2270       else:
2271         node_files = nresult.payload.get(constants.NV_FILELIST, None)
2272
2273       test = not (node_files and isinstance(node_files, dict))
2274       errorif(test, constants.CV_ENODEFILECHECK, node.name,
2275               "Node did not return file checksum data")
2276       if test:
2277         ignore_nodes.add(node.name)
2278         continue
2279
2280       # Build per-checksum mapping from filename to nodes having it
2281       for (filename, checksum) in node_files.items():
2282         assert filename in nodefiles
2283         fileinfo[filename].setdefault(checksum, set()).add(node.name)
2284
2285     for (filename, checksums) in fileinfo.items():
2286       assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2287
2288       # Nodes having the file
2289       with_file = frozenset(node_name
2290                             for nodes in fileinfo[filename].values()
2291                             for node_name in nodes) - ignore_nodes
2292
2293       expected_nodes = nodefiles[filename] - ignore_nodes
2294
2295       # Nodes missing file
2296       missing_file = expected_nodes - with_file
2297
2298       if filename in files_opt:
2299         # All or no nodes
2300         errorif(missing_file and missing_file != expected_nodes,
2301                 constants.CV_ECLUSTERFILECHECK, None,
2302                 "File %s is optional, but it must exist on all or no"
2303                 " nodes (not found on %s)",
2304                 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2305       else:
2306         errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2307                 "File %s is missing from node(s) %s", filename,
2308                 utils.CommaJoin(utils.NiceSort(missing_file)))
2309
2310         # Warn if a node has a file it shouldn't
2311         unexpected = with_file - expected_nodes
2312         errorif(unexpected,
2313                 constants.CV_ECLUSTERFILECHECK, None,
2314                 "File %s should not exist on node(s) %s",
2315                 filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2316
2317       # See if there are multiple versions of the file
2318       test = len(checksums) > 1
2319       if test:
2320         variants = ["variant %s on %s" %
2321                     (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2322                     for (idx, (checksum, nodes)) in
2323                       enumerate(sorted(checksums.items()))]
2324       else:
2325         variants = []
2326
2327       errorif(test, constants.CV_ECLUSTERFILECHECK, None,
2328               "File %s found with %s different checksums (%s)",
2329               filename, len(checksums), "; ".join(variants))
2330
2331   def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2332                       drbd_map):
2333     """Verifies and the node DRBD status.
2334
2335     @type ninfo: L{objects.Node}
2336     @param ninfo: the node to check
2337     @param nresult: the remote results for the node
2338     @param instanceinfo: the dict of instances
2339     @param drbd_helper: the configured DRBD usermode helper
2340     @param drbd_map: the DRBD map as returned by
2341         L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2342
2343     """
2344     node = ninfo.name
2345     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2346
2347     if drbd_helper:
2348       helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2349       test = (helper_result == None)
2350       _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2351                "no drbd usermode helper returned")
2352       if helper_result:
2353         status, payload = helper_result
2354         test = not status
2355         _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2356                  "drbd usermode helper check unsuccessful: %s", payload)
2357         test = status and (payload != drbd_helper)
2358         _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2359                  "wrong drbd usermode helper: %s", payload)
2360
2361     # compute the DRBD minors
2362     node_drbd = {}
2363     for minor, instance in drbd_map[node].items():
2364       test = instance not in instanceinfo
2365       _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2366                "ghost instance '%s' in temporary DRBD map", instance)
2367         # ghost instance should not be running, but otherwise we
2368         # don't give double warnings (both ghost instance and
2369         # unallocated minor in use)
2370       if test:
2371         node_drbd[minor] = (instance, False)
2372       else:
2373         instance = instanceinfo[instance]
2374         node_drbd[minor] = (instance.name,
2375                             instance.admin_state == constants.ADMINST_UP)
2376
2377     # and now check them
2378     used_minors = nresult.get(constants.NV_DRBDLIST, [])
2379     test = not isinstance(used_minors, (tuple, list))
2380     _ErrorIf(test, constants.CV_ENODEDRBD, node,
2381              "cannot parse drbd status file: %s", str(used_minors))
2382     if test:
2383       # we cannot check drbd status
2384       return
2385
2386     for minor, (iname, must_exist) in node_drbd.items():
2387       test = minor not in used_minors and must_exist
2388       _ErrorIf(test, constants.CV_ENODEDRBD, node,
2389                "drbd minor %d of instance %s is not active", minor, iname)
2390     for minor in used_minors:
2391       test = minor not in node_drbd
2392       _ErrorIf(test, constants.CV_ENODEDRBD, node,
2393                "unallocated drbd minor %d is in use", minor)
2394
2395   def _UpdateNodeOS(self, ninfo, nresult, nimg):
2396     """Builds the node OS structures.
2397
2398     @type ninfo: L{objects.Node}
2399     @param ninfo: the node to check
2400     @param nresult: the remote results for the node
2401     @param nimg: the node image object
2402
2403     """
2404     node = ninfo.name
2405     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2406
2407     remote_os = nresult.get(constants.NV_OSLIST, None)
2408     test = (not isinstance(remote_os, list) or
2409             not compat.all(isinstance(v, list) and len(v) == 7
2410                            for v in remote_os))
2411
2412     _ErrorIf(test, constants.CV_ENODEOS, node,
2413              "node hasn't returned valid OS data")
2414
2415     nimg.os_fail = test
2416
2417     if test:
2418       return
2419
2420     os_dict = {}
2421
2422     for (name, os_path, status, diagnose,
2423          variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2424
2425       if name not in os_dict:
2426         os_dict[name] = []
2427
2428       # parameters is a list of lists instead of list of tuples due to
2429       # JSON lacking a real tuple type, fix it:
2430       parameters = [tuple(v) for v in parameters]
2431       os_dict[name].append((os_path, status, diagnose,
2432                             set(variants), set(parameters), set(api_ver)))
2433
2434     nimg.oslist = os_dict
2435
2436   def _VerifyNodeOS(self, ninfo, nimg, base):
2437     """Verifies the node OS list.
2438
2439     @type ninfo: L{objects.Node}
2440     @param ninfo: the node to check
2441     @param nimg: the node image object
2442     @param base: the 'template' node we match against (e.g. from the master)
2443
2444     """
2445     node = ninfo.name
2446     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2447
2448     assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2449
2450     beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2451     for os_name, os_data in nimg.oslist.items():
2452       assert os_data, "Empty OS status for OS %s?!" % os_name
2453       f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2454       _ErrorIf(not f_status, constants.CV_ENODEOS, node,
2455                "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2456       _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
2457                "OS '%s' has multiple entries (first one shadows the rest): %s",
2458                os_name, utils.CommaJoin([v[0] for v in os_data]))
2459       # comparisons with the 'base' image
2460       test = os_name not in base.oslist
2461       _ErrorIf(test, constants.CV_ENODEOS, node,
2462                "Extra OS %s not present on reference node (%s)",
2463                os_name, base.name)
2464       if test:
2465         continue
2466       assert base.oslist[os_name], "Base node has empty OS status?"
2467       _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2468       if not b_status:
2469         # base OS is invalid, skipping
2470         continue
2471       for kind, a, b in [("API version", f_api, b_api),
2472                          ("variants list", f_var, b_var),
2473                          ("parameters", beautify_params(f_param),
2474                           beautify_params(b_param))]:
2475         _ErrorIf(a != b, constants.CV_ENODEOS, node,
2476                  "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2477                  kind, os_name, base.name,
2478                  utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2479
2480     # check any missing OSes
2481     missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2482     _ErrorIf(missing, constants.CV_ENODEOS, node,
2483              "OSes present on reference node %s but missing on this node: %s",
2484              base.name, utils.CommaJoin(missing))
2485
2486   def _VerifyOob(self, ninfo, nresult):
2487     """Verifies out of band functionality of a node.
2488
2489     @type ninfo: L{objects.Node}
2490     @param ninfo: the node to check
2491     @param nresult: the remote results for the node
2492
2493     """
2494     node = ninfo.name
2495     # We just have to verify the paths on master and/or master candidates
2496     # as the oob helper is invoked on the master
2497     if ((ninfo.master_candidate or ninfo.master_capable) and
2498         constants.NV_OOB_PATHS in nresult):
2499       for path_result in nresult[constants.NV_OOB_PATHS]:
2500         self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
2501
2502   def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2503     """Verifies and updates the node volume data.
2504
2505     This function will update a L{NodeImage}'s internal structures
2506     with data from the remote call.
2507
2508     @type ninfo: L{objects.Node}
2509     @param ninfo: the node to check
2510     @param nresult: the remote results for the node
2511     @param nimg: the node image object
2512     @param vg_name: the configured VG name
2513
2514     """
2515     node = ninfo.name
2516     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2517
2518     nimg.lvm_fail = True
2519     lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2520     if vg_name is None:
2521       pass
2522     elif isinstance(lvdata, basestring):
2523       _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
2524                utils.SafeEncode(lvdata))
2525     elif not isinstance(lvdata, dict):
2526       _ErrorIf(True, constants.CV_ENODELVM, node,
2527                "rpc call to node failed (lvlist)")
2528     else:
2529       nimg.volumes = lvdata
2530       nimg.lvm_fail = False
2531
2532   def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2533     """Verifies and updates the node instance list.
2534
2535     If the listing was successful, then updates this node's instance
2536     list. Otherwise, it marks the RPC call as failed for the instance
2537     list key.
2538
2539     @type ninfo: L{objects.Node}
2540     @param ninfo: the node to check
2541     @param nresult: the remote results for the node
2542     @param nimg: the node image object
2543
2544     """
2545     idata = nresult.get(constants.NV_INSTANCELIST, None)
2546     test = not isinstance(idata, list)
2547     self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
2548                   "rpc call to node failed (instancelist): %s",
2549                   utils.SafeEncode(str(idata)))
2550     if test:
2551       nimg.hyp_fail = True
2552     else:
2553       nimg.instances = idata
2554
2555   def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2556     """Verifies and computes a node information map
2557
2558     @type ninfo: L{objects.Node}
2559     @param ninfo: the node to check
2560     @param nresult: the remote results for the node
2561     @param nimg: the node image object
2562     @param vg_name: the configured VG name
2563
2564     """
2565     node = ninfo.name
2566     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2567
2568     # try to read free memory (from the hypervisor)
2569     hv_info = nresult.get(constants.NV_HVINFO, None)
2570     test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2571     _ErrorIf(test, constants.CV_ENODEHV, node,
2572              "rpc call to node failed (hvinfo)")
2573     if not test:
2574       try:
2575         nimg.mfree = int(hv_info["memory_free"])
2576       except (ValueError, TypeError):
2577         _ErrorIf(True, constants.CV_ENODERPC, node,
2578                  "node returned invalid nodeinfo, check hypervisor")
2579
2580     # FIXME: devise a free space model for file based instances as well
2581     if vg_name is not None:
2582       test = (constants.NV_VGLIST not in nresult or
2583               vg_name not in nresult[constants.NV_VGLIST])
2584       _ErrorIf(test, constants.CV_ENODELVM, node,
2585                "node didn't return data for the volume group '%s'"
2586                " - it is either missing or broken", vg_name)
2587       if not test:
2588         try:
2589           nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2590         except (ValueError, TypeError):
2591           _ErrorIf(True, constants.CV_ENODERPC, node,
2592                    "node returned invalid LVM info, check LVM status")
2593
2594   def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2595     """Gets per-disk status information for all instances.
2596
2597     @type nodelist: list of strings
2598     @param nodelist: Node names
2599     @type node_image: dict of (name, L{objects.Node})
2600     @param node_image: Node objects
2601     @type instanceinfo: dict of (name, L{objects.Instance})
2602     @param instanceinfo: Instance objects
2603     @rtype: {instance: {node: [(succes, payload)]}}
2604     @return: a dictionary of per-instance dictionaries with nodes as
2605         keys and disk information as values; the disk information is a
2606         list of tuples (success, payload)
2607
2608     """
2609     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2610
2611     node_disks = {}
2612     node_disks_devonly = {}
2613     diskless_instances = set()
2614     diskless = constants.DT_DISKLESS
2615
2616     for nname in nodelist:
2617       node_instances = list(itertools.chain(node_image[nname].pinst,
2618                                             node_image[nname].sinst))
2619       diskless_instances.update(inst for inst in node_instances
2620                                 if instanceinfo[inst].disk_template == diskless)
2621       disks = [(inst, disk)
2622                for inst in node_instances
2623                for disk in instanceinfo[inst].disks]
2624
2625       if not disks:
2626         # No need to collect data
2627         continue
2628
2629       node_disks[nname] = disks
2630
2631       # Creating copies as SetDiskID below will modify the objects and that can
2632       # lead to incorrect data returned from nodes
2633       devonly = [dev.Copy() for (_, dev) in disks]
2634
2635       for dev in devonly:
2636         self.cfg.SetDiskID(dev, nname)
2637
2638       node_disks_devonly[nname] = devonly
2639
2640     assert len(node_disks) == len(node_disks_devonly)
2641
2642     # Collect data from all nodes with disks
2643     result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2644                                                           node_disks_devonly)
2645
2646     assert len(result) == len(node_disks)
2647
2648     instdisk = {}
2649
2650     for (nname, nres) in result.items():
2651       disks = node_disks[nname]
2652
2653       if nres.offline:
2654         # No data from this node
2655         data = len(disks) * [(False, "node offline")]
2656       else:
2657         msg = nres.fail_msg
2658         _ErrorIf(msg, constants.CV_ENODERPC, nname,
2659                  "while getting disk information: %s", msg)
2660         if msg:
2661           # No data from this node
2662           data = len(disks) * [(False, msg)]
2663         else:
2664           data = []
2665           for idx, i in enumerate(nres.payload):
2666             if isinstance(i, (tuple, list)) and len(i) == 2:
2667               data.append(i)
2668             else:
2669               logging.warning("Invalid result from node %s, entry %d: %s",
2670                               nname, idx, i)
2671               data.append((False, "Invalid result from the remote node"))
2672
2673       for ((inst, _), status) in zip(disks, data):
2674         instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2675
2676     # Add empty entries for diskless instances.
2677     for inst in diskless_instances:
2678       assert inst not in instdisk
2679       instdisk[inst] = {}
2680
2681     assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2682                       len(nnames) <= len(instanceinfo[inst].all_nodes) and
2683                       compat.all(isinstance(s, (tuple, list)) and
2684                                  len(s) == 2 for s in statuses)
2685                       for inst, nnames in instdisk.items()
2686                       for nname, statuses in nnames.items())
2687     assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2688
2689     return instdisk
2690
2691   @staticmethod
2692   def _SshNodeSelector(group_uuid, all_nodes):
2693     """Create endless iterators for all potential SSH check hosts.
2694
2695     """
2696     nodes = [node for node in all_nodes
2697              if (node.group != group_uuid and
2698                  not node.offline)]
2699     keyfunc = operator.attrgetter("group")
2700
2701     return map(itertools.cycle,
2702                [sorted(map(operator.attrgetter("name"), names))
2703                 for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
2704                                                   keyfunc)])
2705
2706   @classmethod
2707   def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
2708     """Choose which nodes should talk to which other nodes.
2709
2710     We will make nodes contact all nodes in their group, and one node from
2711     every other group.
2712
2713     @warning: This algorithm has a known issue if one node group is much
2714       smaller than others (e.g. just one node). In such a case all other
2715       nodes will talk to the single node.
2716
2717     """
2718     online_nodes = sorted(node.name for node in group_nodes if not node.offline)
2719     sel = cls._SshNodeSelector(group_uuid, all_nodes)
2720
2721     return (online_nodes,
2722             dict((name, sorted([i.next() for i in sel]))
2723                  for name in online_nodes))
2724
2725   def BuildHooksEnv(self):
2726     """Build hooks env.
2727
2728     Cluster-Verify hooks just ran in the post phase and their failure makes
2729     the output be logged in the verify output and the verification to fail.
2730
2731     """
2732     env = {
2733       "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2734       }
2735
2736     env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
2737                for node in self.my_node_info.values())
2738
2739     return env
2740
2741   def BuildHooksNodes(self):
2742     """Build hooks nodes.
2743
2744     """
2745     return ([], self.my_node_names)
2746
2747   def Exec(self, feedback_fn):
2748     """Verify integrity of the node group, performing various test on nodes.
2749
2750     """
2751     # This method has too many local variables. pylint: disable=R0914
2752     feedback_fn("* Verifying group '%s'" % self.group_info.name)
2753
2754     if not self.my_node_names:
2755       # empty node group
2756       feedback_fn("* Empty node group, skipping verification")
2757       return True
2758
2759     self.bad = False
2760     _ErrorIf = self._ErrorIf # pylint: disable=C0103
2761     verbose = self.op.verbose
2762     self._feedback_fn = feedback_fn
2763
2764     vg_name = self.cfg.GetVGName()
2765     drbd_helper = self.cfg.GetDRBDHelper()
2766     cluster = self.cfg.GetClusterInfo()
2767     groupinfo = self.cfg.GetAllNodeGroupsInfo()
2768     hypervisors = cluster.enabled_hypervisors
2769     node_data_list = [self.my_node_info[name] for name in self.my_node_names]
2770
2771     i_non_redundant = [] # Non redundant instances
2772     i_non_a_balanced = [] # Non auto-balanced instances
2773     i_offline = 0 # Count of offline instances
2774     n_offline = 0 # Count of offline nodes
2775     n_drained = 0 # Count of nodes being drained
2776     node_vol_should = {}
2777
2778     # FIXME: verify OS list
2779
2780     # File verification
2781     filemap = _ComputeAncillaryFiles(cluster, False)
2782
2783     # do local checksums
2784     master_node = self.master_node = self.cfg.GetMasterNode()
2785     master_ip = self.cfg.GetMasterIP()
2786
2787     feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
2788
2789     user_scripts = []
2790     if self.cfg.GetUseExternalMipScript():
2791       user_scripts.append(constants.EXTERNAL_MASTER_SETUP_SCRIPT)
2792
2793     node_verify_param = {
2794       constants.NV_FILELIST:
2795         utils.UniqueSequence(filename
2796                              for files in filemap
2797                              for filename in files),
2798       constants.NV_NODELIST:
2799         self._SelectSshCheckNodes(node_data_list, self.group_uuid,
2800                                   self.all_node_info.values()),
2801       constants.NV_HYPERVISOR: hypervisors,
2802       constants.NV_HVPARAMS:
2803         _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
2804       constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
2805                                  for node in node_data_list
2806                                  if not node.offline],
2807       constants.NV_INSTANCELIST: hypervisors,
2808       constants.NV_VERSION: None,
2809       constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2810       constants.NV_NODESETUP: None,
2811       constants.NV_TIME: None,
2812       constants.NV_MASTERIP: (master_node, master_ip),
2813       constants.NV_OSLIST: None,
2814       constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2815       constants.NV_USERSCRIPTS: user_scripts,
2816       }
2817
2818     if vg_name is not None:
2819       node_verify_param[constants.NV_VGLIST] = None
2820       node_verify_param[constants.NV_LVLIST] = vg_name
2821       node_verify_param[constants.NV_PVLIST] = [vg_name]
2822       node_verify_param[constants.NV_DRBDLIST] = None
2823
2824     if drbd_helper:
2825       node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2826
2827     # bridge checks
2828     # FIXME: this needs to be changed per node-group, not cluster-wide
2829     bridges = set()
2830     default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
2831     if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2832       bridges.add(default_nicpp[constants.NIC_LINK])
2833     for instance in self.my_inst_info.values():
2834       for nic in instance.nics:
2835         full_nic = cluster.SimpleFillNIC(nic.nicparams)
2836         if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2837           bridges.add(full_nic[constants.NIC_LINK])
2838
2839     if bridges:
2840       node_verify_param[constants.NV_BRIDGES] = list(bridges)
2841
2842     # Build our expected cluster state
2843     node_image = dict((node.name, self.NodeImage(offline=node.offline,
2844                                                  name=node.name,
2845                                                  vm_capable=node.vm_capable))
2846                       for node in node_data_list)
2847
2848     # Gather OOB paths
2849     oob_paths = []
2850     for node in self.all_node_info.values():
2851       path = _SupportsOob(self.cfg, node)
2852       if path and path not in oob_paths:
2853         oob_paths.append(path)
2854
2855     if oob_paths:
2856       node_verify_param[constants.NV_OOB_PATHS] = oob_paths
2857
2858     for instance in self.my_inst_names:
2859       inst_config = self.my_inst_info[instance]
2860
2861       for nname in inst_config.all_nodes:
2862         if nname not in node_image:
2863           gnode = self.NodeImage(name=nname)
2864           gnode.ghost = (nname not in self.all_node_info)
2865           node_image[nname] = gnode
2866
2867       inst_config.MapLVsByNode(node_vol_should)
2868
2869       pnode = inst_config.primary_node
2870       node_image[pnode].pinst.append(instance)
2871
2872       for snode in inst_config.secondary_nodes:
2873         nimg = node_image[snode]
2874         nimg.sinst.append(instance)
2875         if pnode not in nimg.sbp:
2876           nimg.sbp[pnode] = []
2877         nimg.sbp[pnode].append(instance)
2878
2879     # At this point, we have the in-memory data structures complete,
2880     # except for the runtime information, which we'll gather next
2881
2882     # Due to the way our RPC system works, exact response times cannot be
2883     # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2884     # time before and after executing the request, we can at least have a time
2885     # window.
2886     nvinfo_starttime = time.time()
2887     all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
2888                                            node_verify_param,
2889                                            self.cfg.GetClusterName())
2890     nvinfo_endtime = time.time()
2891
2892     if self.extra_lv_nodes and vg_name is not None:
2893       extra_lv_nvinfo = \
2894           self.rpc.call_node_verify(self.extra_lv_nodes,
2895                                     {constants.NV_LVLIST: vg_name},
2896                                     self.cfg.GetClusterName())
2897     else:
2898       extra_lv_nvinfo = {}
2899
2900     all_drbd_map = self.cfg.ComputeDRBDMap()
2901
2902     feedback_fn("* Gathering disk information (%s nodes)" %
2903                 len(self.my_node_names))
2904     instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
2905                                      self.my_inst_info)
2906
2907     feedback_fn("* Verifying configuration file consistency")
2908
2909     # If not all nodes are being checked, we need to make sure the master node
2910     # and a non-checked vm_capable node are in the list.
2911     absent_nodes = set(self.all_node_info).difference(self.my_node_info)
2912     if absent_nodes:
2913       vf_nvinfo = all_nvinfo.copy()
2914       vf_node_info = list(self.my_node_info.values())
2915       additional_nodes = []
2916       if master_node not in self.my_node_info:
2917         additional_nodes.append(master_node)
2918         vf_node_info.append(self.all_node_info[master_node])
2919       # Add the first vm_capable node we find which is not included
2920       for node in absent_nodes:
2921         nodeinfo = self.all_node_info[node]
2922         if nodeinfo.vm_capable and not nodeinfo.offline:
2923           additional_nodes.append(node)
2924           vf_node_info.append(self.all_node_info[node])
2925           break
2926       key = constants.NV_FILELIST
2927       vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
2928                                                  {key: node_verify_param[key]},
2929                                                  self.cfg.GetClusterName()))
2930     else:
2931       vf_nvinfo = all_nvinfo
2932       vf_node_info = self.my_node_info.values()
2933
2934     self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
2935
2936     feedback_fn("* Verifying node status")
2937
2938     refos_img = None
2939
2940     for node_i in node_data_list:
2941       node = node_i.name
2942       nimg = node_image[node]
2943
2944       if node_i.offline:
2945         if verbose:
2946           feedback_fn("* Skipping offline node %s" % (node,))
2947         n_offline += 1
2948         continue
2949
2950       if node == master_node:
2951         ntype = "master"
2952       elif node_i.master_candidate:
2953         ntype = "master candidate"
2954       elif node_i.drained:
2955         ntype = "drained"
2956         n_drained += 1
2957       else:
2958         ntype = "regular"
2959       if verbose:
2960         feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2961
2962       msg = all_nvinfo[node].fail_msg
2963       _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
2964                msg)
2965       if msg:
2966         nimg.rpc_fail = True
2967         continue
2968
2969       nresult = all_nvinfo[node].payload
2970
2971       nimg.call_ok = self._VerifyNode(node_i, nresult)
2972       self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2973       self._VerifyNodeNetwork(node_i, nresult)
2974       self._VerifyNodeUserScripts(node_i, nresult)
2975       self._VerifyOob(node_i, nresult)
2976
2977       if nimg.vm_capable:
2978         self._VerifyNodeLVM(node_i, nresult, vg_name)
2979         self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
2980                              all_drbd_map)
2981
2982         self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2983         self._UpdateNodeInstances(node_i, nresult, nimg)
2984         self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2985         self._UpdateNodeOS(node_i, nresult, nimg)
2986
2987         if not nimg.os_fail:
2988           if refos_img is None:
2989             refos_img = nimg
2990           self._VerifyNodeOS(node_i, nimg, refos_img)
2991         self._VerifyNodeBridges(node_i, nresult, bridges)
2992
2993         # Check whether all running instancies are primary for the node. (This
2994         # can no longer be done from _VerifyInstance below, since some of the
2995         # wrong instances could be from other node groups.)
2996         non_primary_inst = set(nimg.instances).difference(nimg.pinst)
2997
2998         for inst in non_primary_inst:
2999           # FIXME: investigate best way to handle offline insts
3000           if inst.admin_state == constants.ADMINST_OFFLINE:
3001             if verbose:
3002               feedback_fn("* Skipping offline instance %s" % inst.name)
3003             i_offline += 1
3004             continue
3005           test = inst in self.all_inst_info
3006           _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
3007                    "instance should not run on node %s", node_i.name)
3008           _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
3009                    "node is running unknown instance %s", inst)
3010
3011     for node, result in extra_lv_nvinfo.items():
3012       self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
3013                               node_image[node], vg_name)
3014
3015     feedback_fn("* Verifying instance status")
3016     for instance in self.my_inst_names:
3017       if verbose:
3018         feedback_fn("* Verifying instance %s" % instance)
3019       inst_config = self.my_inst_info[instance]
3020       self._VerifyInstance(instance, inst_config, node_image,
3021                            instdisk[instance])
3022       inst_nodes_offline = []
3023
3024       pnode = inst_config.primary_node
3025       pnode_img = node_image[pnode]
3026       _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
3027                constants.CV_ENODERPC, pnode, "instance %s, connection to"
3028                " primary node failed", instance)
3029
3030       _ErrorIf(inst_config.admin_state == constants.ADMINST_UP and
3031                pnode_img.offline,
3032                constants.CV_EINSTANCEBADNODE, instance,
3033                "instance is marked as running and lives on offline node %s",
3034                inst_config.primary_node)
3035
3036       # If the instance is non-redundant we cannot survive losing its primary
3037       # node, so we are not N+1 compliant. On the other hand we have no disk
3038       # templates with more than one secondary so that situation is not well
3039       # supported either.
3040       # FIXME: does not support file-backed instances
3041       if not inst_config.secondary_nodes:
3042         i_non_redundant.append(instance)
3043
3044       _ErrorIf(len(inst_config.secondary_nodes) > 1,
3045                constants.CV_EINSTANCELAYOUT,
3046                instance, "instance has multiple secondary nodes: %s",
3047                utils.CommaJoin(inst_config.secondary_nodes),
3048                code=self.ETYPE_WARNING)
3049
3050       if inst_config.disk_template in constants.DTS_INT_MIRROR:
3051         pnode = inst_config.primary_node
3052         instance_nodes = utils.NiceSort(inst_config.all_nodes)
3053         instance_groups = {}
3054
3055         for node in instance_nodes:
3056           instance_groups.setdefault(self.all_node_info[node].group,
3057                                      []).append(node)
3058
3059         pretty_list = [
3060           "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
3061           # Sort so that we always list the primary node first.
3062           for group, nodes in sorted(instance_groups.items(),
3063                                      key=lambda (_, nodes): pnode in nodes,
3064                                      reverse=True)]
3065
3066         self._ErrorIf(len(instance_groups) > 1,
3067                       constants.CV_EINSTANCESPLITGROUPS,
3068                       instance, "instance has primary and secondary nodes in"
3069                       " different groups: %s", utils.CommaJoin(pretty_list),
3070                       code=self.ETYPE_WARNING)
3071
3072       if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
3073         i_non_a_balanced.append(instance)
3074
3075       for snode in inst_config.secondary_nodes:
3076         s_img = node_image[snode]
3077         _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
3078                  snode, "instance %s, connection to secondary node failed",
3079                  instance)
3080
3081         if s_img.offline:
3082           inst_nodes_offline.append(snode)
3083
3084       # warn that the instance lives on offline nodes
3085       _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
3086                "instance has offline secondary node(s) %s",
3087                utils.CommaJoin(inst_nodes_offline))
3088       # ... or ghost/non-vm_capable nodes
3089       for node in inst_config.all_nodes:
3090         _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
3091                  instance, "instance lives on ghost node %s", node)
3092         _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
3093                  instance, "instance lives on non-vm_capable node %s", node)
3094
3095     feedback_fn("* Verifying orphan volumes")
3096     reserved = utils.FieldSet(*cluster.reserved_lvs)
3097
3098     # We will get spurious "unknown volume" warnings if any node of this group
3099     # is secondary for an instance whose primary is in another group. To avoid
3100     # them, we find these instances and add their volumes to node_vol_should.
3101     for inst in self.all_inst_info.values():
3102       for secondary in inst.secondary_nodes:
3103         if (secondary in self.my_node_info
3104             and inst.name not in self.my_inst_info):
3105           inst.MapLVsByNode(node_vol_should)
3106           break
3107
3108     self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
3109
3110     if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
3111       feedback_fn("* Verifying N+1 Memory redundancy")
3112       self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
3113
3114     feedback_fn("* Other Notes")
3115     if i_non_redundant:
3116       feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
3117                   % len(i_non_redundant))
3118
3119     if i_non_a_balanced:
3120       feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
3121                   % len(i_non_a_balanced))
3122
3123     if i_offline:
3124       feedback_fn("  - NOTICE: %d offline instance(s) found." % i_offline)
3125
3126     if n_offline:
3127       feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
3128
3129     if n_drained:
3130       feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
3131
3132     return not self.bad
3133
3134   def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
3135     """Analyze the post-hooks' result
3136
3137     This method analyses the hook result, handles it, and sends some
3138     nicely-formatted feedback back to the user.
3139
3140     @param phase: one of L{constants.HOOKS_PHASE_POST} or
3141         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
3142     @param hooks_results: the results of the multi-node hooks rpc call
3143     @param feedback_fn: function used send feedback back to the caller
3144     @param lu_result: previous Exec result
3145     @return: the new Exec result, based on the previous result
3146         and hook results
3147
3148     """
3149     # We only really run POST phase hooks, only for non-empty groups,
3150     # and are only interested in their results
3151     if not self.my_node_names:
3152       # empty node group
3153       pass
3154     elif phase == constants.HOOKS_PHASE_POST:
3155       # Used to change hooks' output to proper indentation
3156       feedback_fn("* Hooks Results")
3157       assert hooks_results, "invalid result from hooks"
3158
3159       for node_name in hooks_results:
3160         res = hooks_results[node_name]
3161         msg = res.fail_msg
3162         test = msg and not res.offline
3163         self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3164                       "Communication failure in hooks execution: %s", msg)
3165         if res.offline or msg:
3166           # No need to investigate payload if node is offline or gave
3167           # an error.
3168           continue
3169         for script, hkr, output in res.payload:
3170           test = hkr == constants.HKR_FAIL
3171           self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3172                         "Script %s failed, output:", script)
3173           if test:
3174             output = self._HOOKS_INDENT_RE.sub("      ", output)
3175             feedback_fn("%s" % output)
3176             lu_result = False
3177
3178     return lu_result
3179
3180
3181 class LUClusterVerifyDisks(NoHooksLU):
3182   """Verifies the cluster disks status.
3183
3184   """
3185   REQ_BGL = False
3186
3187   def ExpandNames(self):
3188     self.share_locks = _ShareAll()
3189     self.needed_locks = {
3190       locking.LEVEL_NODEGROUP: locking.ALL_SET,
3191       }
3192
3193   def Exec(self, feedback_fn):
3194     group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3195
3196     # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3197     return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3198                            for group in group_names])
3199
3200
3201 class LUGroupVerifyDisks(NoHooksLU):
3202   """Verifies the status of all disks in a node group.
3203
3204   """
3205   REQ_BGL = False
3206
3207   def ExpandNames(self):
3208     # Raises errors.OpPrereqError on its own if group can't be found
3209     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3210
3211     self.share_locks = _ShareAll()
3212     self.needed_locks = {
3213       locking.LEVEL_INSTANCE: [],
3214       locking.LEVEL_NODEGROUP: [],
3215       locking.LEVEL_NODE: [],
3216       }
3217
3218   def DeclareLocks(self, level):
3219     if level == locking.LEVEL_INSTANCE:
3220       assert not self.needed_locks[locking.LEVEL_INSTANCE]
3221
3222       # Lock instances optimistically, needs verification once node and group
3223       # locks have been acquired
3224       self.needed_locks[locking.LEVEL_INSTANCE] = \
3225         self.cfg.GetNodeGroupInstances(self.group_uuid)
3226
3227     elif level == locking.LEVEL_NODEGROUP:
3228       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3229
3230       self.needed_locks[locking.LEVEL_NODEGROUP] = \
3231         set([self.group_uuid] +
3232             # Lock all groups used by instances optimistically; this requires
3233             # going via the node before it's locked, requiring verification
3234             # later on
3235             [group_uuid
3236              for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3237              for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3238
3239     elif level == locking.LEVEL_NODE:
3240       # This will only lock the nodes in the group to be verified which contain
3241       # actual instances
3242       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3243       self._LockInstancesNodes()
3244
3245       # Lock all nodes in group to be verified
3246       assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3247       member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3248       self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3249
3250   def CheckPrereq(self):
3251     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3252     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3253     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3254
3255     assert self.group_uuid in owned_groups
3256
3257     # Check if locked instances are still correct
3258     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3259
3260     # Get instance information
3261     self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3262
3263     # Check if node groups for locked instances are still correct
3264     for (instance_name, inst) in self.instances.items():
3265       assert owned_nodes.issuperset(inst.all_nodes), \
3266         "Instance %s's nodes changed while we kept the lock" % instance_name
3267
3268       inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
3269                                              owned_groups)
3270
3271       assert self.group_uuid in inst_groups, \
3272         "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
3273
3274   def Exec(self, feedback_fn):
3275     """Verify integrity of cluster disks.
3276
3277     @rtype: tuple of three items
3278     @return: a tuple of (dict of node-to-node_error, list of instances
3279         which need activate-disks, dict of instance: (node, volume) for
3280         missing volumes
3281
3282     """
3283     res_nodes = {}
3284     res_instances = set()
3285     res_missing = {}
3286
3287     nv_dict = _MapInstanceDisksToNodes([inst
3288             for inst in self.instances.values()
3289             if inst.admin_state == constants.ADMINST_UP])
3290
3291     if nv_dict:
3292       nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3293                              set(self.cfg.GetVmCapableNodeList()))
3294
3295       node_lvs = self.rpc.call_lv_list(nodes, [])
3296
3297       for (node, node_res) in node_lvs.items():
3298         if node_res.offline:
3299           continue
3300
3301         msg = node_res.fail_msg
3302         if msg:
3303           logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3304           res_nodes[node] = msg
3305           continue
3306
3307         for lv_name, (_, _, lv_online) in node_res.payload.items():
3308           inst = nv_dict.pop((node, lv_name), None)
3309           if not (lv_online or inst is None):
3310             res_instances.add(inst)
3311
3312       # any leftover items in nv_dict are missing LVs, let's arrange the data
3313       # better
3314       for key, inst in nv_dict.iteritems():
3315         res_missing.setdefault(inst, []).append(list(key))
3316
3317     return (res_nodes, list(res_instances), res_missing)
3318
3319
3320 class LUClusterRepairDiskSizes(NoHooksLU):
3321   """Verifies the cluster disks sizes.
3322
3323   """
3324   REQ_BGL = False
3325
3326   def ExpandNames(self):
3327     if self.op.instances:
3328       self.wanted_names = _GetWantedInstances(self, self.op.instances)
3329       self.needed_locks = {
3330         locking.LEVEL_NODE_RES: [],
3331         locking.LEVEL_INSTANCE: self.wanted_names,
3332         }
3333       self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
3334     else:
3335       self.wanted_names = None
3336       self.needed_locks = {
3337         locking.LEVEL_NODE_RES: locking.ALL_SET,
3338         locking.LEVEL_INSTANCE: locking.ALL_SET,
3339         }
3340     self.share_locks = {
3341       locking.LEVEL_NODE_RES: 1,
3342       locking.LEVEL_INSTANCE: 0,
3343       }
3344
3345   def DeclareLocks(self, level):
3346     if level == locking.LEVEL_NODE_RES and self.wanted_names is not None:
3347       self._LockInstancesNodes(primary_only=True, level=level)
3348
3349   def CheckPrereq(self):
3350     """Check prerequisites.
3351
3352     This only checks the optional instance list against the existing names.
3353
3354     """
3355     if self.wanted_names is None:
3356       self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3357
3358     self.wanted_instances = \
3359         map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3360
3361   def _EnsureChildSizes(self, disk):
3362     """Ensure children of the disk have the needed disk size.
3363
3364     This is valid mainly for DRBD8 and fixes an issue where the
3365     children have smaller disk size.
3366
3367     @param disk: an L{ganeti.objects.Disk} object
3368
3369     """
3370     if disk.dev_type == constants.LD_DRBD8:
3371       assert disk.children, "Empty children for DRBD8?"
3372       fchild = disk.children[0]
3373       mismatch = fchild.size < disk.size
3374       if mismatch:
3375         self.LogInfo("Child disk has size %d, parent %d, fixing",
3376                      fchild.size, disk.size)
3377         fchild.size = disk.size
3378
3379       # and we recurse on this child only, not on the metadev
3380       return self._EnsureChildSizes(fchild) or mismatch
3381     else:
3382       return False
3383
3384   def Exec(self, feedback_fn):
3385     """Verify the size of cluster disks.
3386
3387     """
3388     # TODO: check child disks too
3389     # TODO: check differences in size between primary/secondary nodes
3390     per_node_disks = {}
3391     for instance in self.wanted_instances:
3392       pnode = instance.primary_node
3393       if pnode not in per_node_disks:
3394         per_node_disks[pnode] = []
3395       for idx, disk in enumerate(instance.disks):
3396         per_node_disks[pnode].append((instance, idx, disk))
3397
3398     assert not (frozenset(per_node_disks.keys()) -
3399                 self.owned_locks(locking.LEVEL_NODE_RES)), \
3400       "Not owning correct locks"
3401     assert not self.owned_locks(locking.LEVEL_NODE)
3402
3403     changed = []
3404     for node, dskl in per_node_disks.items():
3405       newl = [v[2].Copy() for v in dskl]
3406       for dsk in newl:
3407         self.cfg.SetDiskID(dsk, node)
3408       result = self.rpc.call_blockdev_getsize(node, newl)
3409       if result.fail_msg:
3410         self.LogWarning("Failure in blockdev_getsize call to node"
3411                         " %s, ignoring", node)
3412         continue
3413       if len(result.payload) != len(dskl):
3414         logging.warning("Invalid result from node %s: len(dksl)=%d,"
3415                         " result.payload=%s", node, len(dskl), result.payload)
3416         self.LogWarning("Invalid result from node %s, ignoring node results",
3417                         node)
3418         continue
3419       for ((instance, idx, disk), size) in zip(dskl, result.payload):
3420         if size is None:
3421           self.LogWarning("Disk %d of instance %s did not return size"
3422                           " information, ignoring", idx, instance.name)
3423           continue
3424         if not isinstance(size, (int, long)):
3425           self.LogWarning("Disk %d of instance %s did not return valid"
3426                           " size information, ignoring", idx, instance.name)
3427           continue
3428         size = size >> 20
3429         if size != disk.size:
3430           self.LogInfo("Disk %d of instance %s has mismatched size,"
3431                        " correcting: recorded %d, actual %d", idx,
3432                        instance.name, disk.size, size)
3433           disk.size = size
3434           self.cfg.Update(instance, feedback_fn)
3435           changed.append((instance.name, idx, size))
3436         if self._EnsureChildSizes(disk):
3437           self.cfg.Update(instance, feedback_fn)
3438           changed.append((instance.name, idx, disk.size))
3439     return changed
3440
3441
3442 class LUClusterRename(LogicalUnit):
3443   """Rename the cluster.
3444
3445   """
3446   HPATH = "cluster-rename"
3447   HTYPE = constants.HTYPE_CLUSTER
3448
3449   def BuildHooksEnv(self):
3450     """Build hooks env.
3451
3452     """
3453     return {
3454       "OP_TARGET": self.cfg.GetClusterName(),
3455       "NEW_NAME": self.op.name,
3456       }
3457
3458   def BuildHooksNodes(self):
3459     """Build hooks nodes.
3460
3461     """
3462     return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3463
3464   def CheckPrereq(self):
3465     """Verify that the passed name is a valid one.
3466
3467     """
3468     hostname = netutils.GetHostname(name=self.op.name,
3469                                     family=self.cfg.GetPrimaryIPFamily())
3470
3471     new_name = hostname.name
3472     self.ip = new_ip = hostname.ip
3473     old_name = self.cfg.GetClusterName()
3474     old_ip = self.cfg.GetMasterIP()
3475     if new_name == old_name and new_ip == old_ip:
3476       raise errors.OpPrereqError("Neither the name nor the IP address of the"
3477                                  " cluster has changed",
3478                                  errors.ECODE_INVAL)
3479     if new_ip != old_ip:
3480       if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3481         raise errors.OpPrereqError("The given cluster IP address (%s) is"
3482                                    " reachable on the network" %
3483                                    new_ip, errors.ECODE_NOTUNIQUE)
3484
3485     self.op.name = new_name
3486
3487   def Exec(self, feedback_fn):
3488     """Rename the cluster.
3489
3490     """
3491     clustername = self.op.name
3492     new_ip = self.ip
3493
3494     # shutdown the master IP
3495     master_params = self.cfg.GetMasterNetworkParameters()
3496     ems = self.cfg.GetUseExternalMipScript()
3497     result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3498                                                      master_params, ems)
3499     result.Raise("Could not disable the master role")
3500
3501     try:
3502       cluster = self.cfg.GetClusterInfo()
3503       cluster.cluster_name = clustername
3504       cluster.master_ip = new_ip
3505       self.cfg.Update(cluster, feedback_fn)
3506
3507       # update the known hosts file
3508       ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3509       node_list = self.cfg.GetOnlineNodeList()
3510       try:
3511         node_list.remove(master_params.name)
3512       except ValueError:
3513         pass
3514       _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3515     finally:
3516       master_params.ip = new_ip
3517       result = self.rpc.call_node_activate_master_ip(master_params.name,
3518                                                      master_params, ems)
3519       msg = result.fail_msg
3520       if msg:
3521         self.LogWarning("Could not re-enable the master role on"
3522                         " the master, please restart manually: %s", msg)
3523
3524     return clustername
3525
3526
3527 def _ValidateNetmask(cfg, netmask):
3528   """Checks if a netmask is valid.
3529
3530   @type cfg: L{config.ConfigWriter}
3531   @param cfg: The cluster configuration
3532   @type netmask: int
3533   @param netmask: the netmask to be verified
3534   @raise errors.OpPrereqError: if the validation fails
3535
3536   """
3537   ip_family = cfg.GetPrimaryIPFamily()
3538   try:
3539     ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
3540   except errors.ProgrammerError:
3541     raise errors.OpPrereqError("Invalid primary ip family: %s." %
3542                                ip_family)
3543   if not ipcls.ValidateNetmask(netmask):
3544     raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
3545                                 (netmask))
3546
3547
3548 class LUClusterSetParams(LogicalUnit):
3549   """Change the parameters of the cluster.
3550
3551   """
3552   HPATH = "cluster-modify"
3553   HTYPE = constants.HTYPE_CLUSTER
3554   REQ_BGL = False
3555
3556   def CheckArguments(self):
3557     """Check parameters
3558
3559     """
3560     if self.op.uid_pool:
3561       uidpool.CheckUidPool(self.op.uid_pool)
3562
3563     if self.op.add_uids:
3564       uidpool.CheckUidPool(self.op.add_uids)
3565
3566     if self.op.remove_uids:
3567       uidpool.CheckUidPool(self.op.remove_uids)
3568
3569     if self.op.master_netmask is not None:
3570       _ValidateNetmask(self.cfg, self.op.master_netmask)
3571
3572     if self.op.diskparams:
3573       for dt_params in self.op.diskparams.values():
3574         utils.ForceDictType(dt_params, constants.DISK_DT_TYPES)
3575
3576   def ExpandNames(self):
3577     # FIXME: in the future maybe other cluster params won't require checking on
3578     # all nodes to be modified.
3579     self.needed_locks = {
3580       locking.LEVEL_NODE: locking.ALL_SET,
3581     }
3582     self.share_locks[locking.LEVEL_NODE] = 1
3583
3584   def BuildHooksEnv(self):
3585     """Build hooks env.
3586
3587     """
3588     return {
3589       "OP_TARGET": self.cfg.GetClusterName(),
3590       "NEW_VG_NAME": self.op.vg_name,
3591       }
3592
3593   def BuildHooksNodes(self):
3594     """Build hooks nodes.
3595
3596     """
3597     mn = self.cfg.GetMasterNode()
3598     return ([mn], [mn])
3599
3600   def CheckPrereq(self):
3601     """Check prerequisites.
3602
3603     This checks whether the given params don't conflict and
3604     if the given volume group is valid.
3605
3606     """
3607     if self.op.vg_name is not None and not self.op.vg_name:
3608       if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3609         raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3610                                    " instances exist", errors.ECODE_INVAL)
3611
3612     if self.op.drbd_helper is not None and not self.op.drbd_helper:
3613       if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3614         raise errors.OpPrereqError("Cannot disable drbd helper while"
3615                                    " drbd-based instances exist",
3616                                    errors.ECODE_INVAL)
3617
3618     node_list = self.owned_locks(locking.LEVEL_NODE)
3619
3620     # if vg_name not None, checks given volume group on all nodes
3621     if self.op.vg_name:
3622       vglist = self.rpc.call_vg_list(node_list)
3623       for node in node_list:
3624         msg = vglist[node].fail_msg
3625         if msg:
3626           # ignoring down node
3627           self.LogWarning("Error while gathering data on node %s"
3628                           " (ignoring node): %s", node, msg)
3629           continue
3630         vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3631                                               self.op.vg_name,
3632                                               constants.MIN_VG_SIZE)
3633         if vgstatus:
3634           raise errors.OpPrereqError("Error on node '%s': %s" %
3635                                      (node, vgstatus), errors.ECODE_ENVIRON)
3636
3637     if self.op.drbd_helper:
3638       # checks given drbd helper on all nodes
3639       helpers = self.rpc.call_drbd_helper(node_list)
3640       for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
3641         if ninfo.offline:
3642           self.LogInfo("Not checking drbd helper on offline node %s", node)
3643           continue
3644         msg = helpers[node].fail_msg
3645         if msg:
3646           raise errors.OpPrereqError("Error checking drbd helper on node"
3647                                      " '%s': %s" % (node, msg),
3648                                      errors.ECODE_ENVIRON)
3649         node_helper = helpers[node].payload
3650         if node_helper != self.op.drbd_helper:
3651           raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3652                                      (node, node_helper), errors.ECODE_ENVIRON)
3653
3654     self.cluster = cluster = self.cfg.GetClusterInfo()
3655     # validate params changes
3656     if self.op.beparams:
3657       objects.UpgradeBeParams(self.op.beparams)
3658       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3659       self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3660
3661     if self.op.ndparams:
3662       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3663       self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3664
3665       # TODO: we need a more general way to handle resetting
3666       # cluster-level parameters to default values
3667       if self.new_ndparams["oob_program"] == "":
3668         self.new_ndparams["oob_program"] = \
3669             constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3670
3671     if self.op.hv_state:
3672       new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
3673                                             self.cluster.hv_state_static)
3674       self.new_hv_state = dict((hv, cluster.SimpleFillHvState(values))
3675                                for hv, values in new_hv_state.items())
3676
3677     if self.op.disk_state:
3678       new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state,
3679                                                 self.cluster.disk_state_static)
3680       self.new_disk_state = \
3681         dict((storage, dict((name, cluster.SimpleFillDiskState(values))
3682                             for name, values in svalues.items()))
3683              for storage, svalues in new_disk_state.items())
3684
3685     if self.op.ipolicy:
3686       ipolicy = {}
3687       for key, value in self.op.ipolicy.items():
3688         utils.ForceDictType(value, constants.ISPECS_PARAMETER_TYPES)
3689         ipolicy[key] = _GetUpdatedParams(cluster.ipolicy.get(key, {}),
3690                                           value)
3691       objects.InstancePolicy.CheckParameterSyntax(ipolicy)
3692       self.new_ipolicy = ipolicy
3693
3694     if self.op.nicparams:
3695       utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
3696       self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
3697       objects.NIC.CheckParameterSyntax(self.new_nicparams)
3698       nic_errors = []
3699
3700       # check all instances for consistency
3701       for instance in self.cfg.GetAllInstancesInfo().values():
3702         for nic_idx, nic in enumerate(instance.nics):
3703           params_copy = copy.deepcopy(nic.nicparams)
3704           params_filled = objects.FillDict(self.new_nicparams, params_copy)
3705
3706           # check parameter syntax
3707           try:
3708             objects.NIC.CheckParameterSyntax(params_filled)
3709           except errors.ConfigurationError, err:
3710             nic_errors.append("Instance %s, nic/%d: %s" %
3711                               (instance.name, nic_idx, err))
3712
3713           # if we're moving instances to routed, check that they have an ip
3714           target_mode = params_filled[constants.NIC_MODE]
3715           if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
3716             nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
3717                               " address" % (instance.name, nic_idx))
3718       if nic_errors:
3719         raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
3720                                    "\n".join(nic_errors))
3721
3722     # hypervisor list/parameters
3723     self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
3724     if self.op.hvparams:
3725       for hv_name, hv_dict in self.op.hvparams.items():
3726         if hv_name not in self.new_hvparams:
3727           self.new_hvparams[hv_name] = hv_dict
3728         else:
3729           self.new_hvparams[hv_name].update(hv_dict)
3730
3731     # disk template parameters
3732     self.new_diskparams = objects.FillDict(cluster.diskparams, {})
3733     if self.op.diskparams:
3734       for dt_name, dt_params in self.op.diskparams.items():
3735         if dt_name not in self.op.diskparams:
3736           self.new_diskparams[dt_name] = dt_params
3737         else:
3738           self.new_diskparams[dt_name].update(dt_params)
3739
3740     # os hypervisor parameters
3741     self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
3742     if self.op.os_hvp:
3743       for os_name, hvs in self.op.os_hvp.items():
3744         if os_name not in self.new_os_hvp:
3745           self.new_os_hvp[os_name] = hvs
3746         else:
3747           for hv_name, hv_dict in hvs.items():
3748             if hv_name not in self.new_os_hvp[os_name]:
3749               self.new_os_hvp[os_name][hv_name] = hv_dict
3750             else:
3751               self.new_os_hvp[os_name][hv_name].update(hv_dict)
3752
3753     # os parameters
3754     self.new_osp = objects.FillDict(cluster.osparams, {})
3755     if self.op.osparams:
3756       for os_name, osp in self.op.osparams.items():
3757         if os_name not in self.new_osp:
3758           self.new_osp[os_name] = {}
3759
3760         self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
3761                                                   use_none=True)
3762
3763         if not self.new_osp[os_name]:
3764           # we removed all parameters
3765           del self.new_osp[os_name]
3766         else:
3767           # check the parameter validity (remote check)
3768           _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
3769                          os_name, self.new_osp[os_name])
3770
3771     # changes to the hypervisor list
3772     if self.op.enabled_hypervisors is not None:
3773       self.hv_list = self.op.enabled_hypervisors
3774       for hv in self.hv_list:
3775         # if the hypervisor doesn't already exist in the cluster
3776         # hvparams, we initialize it to empty, and then (in both
3777         # cases) we make sure to fill the defaults, as we might not
3778         # have a complete defaults list if the hypervisor wasn't
3779         # enabled before
3780         if hv not in new_hvp:
3781           new_hvp[hv] = {}
3782         new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
3783         utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
3784     else:
3785       self.hv_list = cluster.enabled_hypervisors
3786
3787     if self.op.hvparams or self.op.enabled_hypervisors is not None:
3788       # either the enabled list has changed, or the parameters have, validate
3789       for hv_name, hv_params in self.new_hvparams.items():
3790         if ((self.op.hvparams and hv_name in self.op.hvparams) or
3791             (self.op.enabled_hypervisors and
3792              hv_name in self.op.enabled_hypervisors)):
3793           # either this is a new hypervisor, or its parameters have changed
3794           hv_class = hypervisor.GetHypervisor(hv_name)
3795           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3796           hv_class.CheckParameterSyntax(hv_params)
3797           _CheckHVParams(self, node_list, hv_name, hv_params)
3798
3799     if self.op.os_hvp:
3800       # no need to check any newly-enabled hypervisors, since the
3801       # defaults have already been checked in the above code-block
3802       for os_name, os_hvp in self.new_os_hvp.items():
3803         for hv_name, hv_params in os_hvp.items():
3804           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3805           # we need to fill in the new os_hvp on top of the actual hv_p
3806           cluster_defaults = self.new_hvparams.get(hv_name, {})
3807           new_osp = objects.FillDict(cluster_defaults, hv_params)
3808           hv_class = hypervisor.GetHypervisor(hv_name)
3809           hv_class.CheckParameterSyntax(new_osp)
3810           _CheckHVParams(self, node_list, hv_name, new_osp)
3811
3812     if self.op.default_iallocator:
3813       alloc_script = utils.FindFile(self.op.default_iallocator,
3814                                     constants.IALLOCATOR_SEARCH_PATH,
3815                                     os.path.isfile)
3816       if alloc_script is None:
3817         raise errors.OpPrereqError("Invalid default iallocator script '%s'"
3818                                    " specified" % self.op.default_iallocator,
3819                                    errors.ECODE_INVAL)
3820
3821   def Exec(self, feedback_fn):
3822     """Change the parameters of the cluster.
3823
3824     """
3825     if self.op.vg_name is not None:
3826       new_volume = self.op.vg_name
3827       if not new_volume:
3828         new_volume = None
3829       if new_volume != self.cfg.GetVGName():
3830         self.cfg.SetVGName(new_volume)
3831       else:
3832         feedback_fn("Cluster LVM configuration already in desired"
3833                     " state, not changing")
3834     if self.op.drbd_helper is not None:
3835       new_helper = self.op.drbd_helper
3836       if not new_helper:
3837         new_helper = None
3838       if new_helper != self.cfg.GetDRBDHelper():
3839         self.cfg.SetDRBDHelper(new_helper)
3840       else:
3841         feedback_fn("Cluster DRBD helper already in desired state,"
3842                     " not changing")
3843     if self.op.hvparams:
3844       self.cluster.hvparams = self.new_hvparams
3845     if self.op.os_hvp:
3846       self.cluster.os_hvp = self.new_os_hvp
3847     if self.op.enabled_hypervisors is not None:
3848       self.cluster.hvparams = self.new_hvparams
3849       self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
3850     if self.op.beparams:
3851       self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
3852     if self.op.nicparams:
3853       self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
3854     if self.op.ipolicy:
3855       self.cluster.ipolicy = self.new_ipolicy
3856     if self.op.osparams:
3857       self.cluster.osparams = self.new_osp
3858     if self.op.ndparams:
3859       self.cluster.ndparams = self.new_ndparams
3860     if self.op.diskparams:
3861       self.cluster.diskparams = self.new_diskparams
3862     if self.op.hv_state:
3863       self.cluster.hv_state_static = self.new_hv_state
3864     if self.op.disk_state:
3865       self.cluster.disk_state_static = self.new_disk_state
3866
3867     if self.op.candidate_pool_size is not None:
3868       self.cluster.candidate_pool_size = self.op.candidate_pool_size
3869       # we need to update the pool size here, otherwise the save will fail
3870       _AdjustCandidatePool(self, [])
3871
3872     if self.op.maintain_node_health is not None:
3873       if self.op.maintain_node_health and not constants.ENABLE_CONFD:
3874         feedback_fn("Note: CONFD was disabled at build time, node health"
3875                     " maintenance is not useful (still enabling it)")
3876       self.cluster.maintain_node_health = self.op.maintain_node_health
3877
3878     if self.op.prealloc_wipe_disks is not None:
3879       self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
3880
3881     if self.op.add_uids is not None:
3882       uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
3883
3884     if self.op.remove_uids is not None:
3885       uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
3886
3887     if self.op.uid_pool is not None:
3888       self.cluster.uid_pool = self.op.uid_pool
3889
3890     if self.op.default_iallocator is not None:
3891       self.cluster.default_iallocator = self.op.default_iallocator
3892
3893     if self.op.reserved_lvs is not None:
3894       self.cluster.reserved_lvs = self.op.reserved_lvs
3895
3896     if self.op.use_external_mip_script is not None:
3897       self.cluster.use_external_mip_script = self.op.use_external_mip_script
3898
3899     def helper_os(aname, mods, desc):
3900       desc += " OS list"
3901       lst = getattr(self.cluster, aname)
3902       for key, val in mods:
3903         if key == constants.DDM_ADD:
3904           if val in lst:
3905             feedback_fn("OS %s already in %s, ignoring" % (val, desc))
3906           else:
3907             lst.append(val)
3908         elif key == constants.DDM_REMOVE:
3909           if val in lst:
3910             lst.remove(val)
3911           else:
3912             feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
3913         else:
3914           raise errors.ProgrammerError("Invalid modification '%s'" % key)
3915
3916     if self.op.hidden_os:
3917       helper_os("hidden_os", self.op.hidden_os, "hidden")
3918
3919     if self.op.blacklisted_os:
3920       helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
3921
3922     if self.op.master_netdev:
3923       master_params = self.cfg.GetMasterNetworkParameters()
3924       ems = self.cfg.GetUseExternalMipScript()
3925       feedback_fn("Shutting down master ip on the current netdev (%s)" %
3926                   self.cluster.master_netdev)
3927       result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3928                                                        master_params, ems)
3929       result.Raise("Could not disable the master ip")
3930       feedback_fn("Changing master_netdev from %s to %s" %
3931                   (master_params.netdev, self.op.master_netdev))
3932       self.cluster.master_netdev = self.op.master_netdev
3933
3934     if self.op.master_netmask:
3935       master_params = self.cfg.GetMasterNetworkParameters()
3936       feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
3937       result = self.rpc.call_node_change_master_netmask(master_params.name,
3938                                                         master_params.netmask,
3939                                                         self.op.master_netmask,
3940                                                         master_params.ip,
3941                                                         master_params.netdev)
3942       if result.fail_msg:
3943         msg = "Could not change the master IP netmask: %s" % result.fail_msg
3944         feedback_fn(msg)
3945
3946       self.cluster.master_netmask = self.op.master_netmask
3947
3948     self.cfg.Update(self.cluster, feedback_fn)
3949
3950     if self.op.master_netdev:
3951       master_params = self.cfg.GetMasterNetworkParameters()
3952       feedback_fn("Starting the master ip on the new master netdev (%s)" %
3953                   self.op.master_netdev)
3954       ems = self.cfg.GetUseExternalMipScript()
3955       result = self.rpc.call_node_activate_master_ip(master_params.name,
3956                                                      master_params, ems)
3957       if result.fail_msg:
3958         self.LogWarning("Could not re-enable the master ip on"
3959                         " the master, please restart manually: %s",
3960                         result.fail_msg)
3961
3962
3963 def _UploadHelper(lu, nodes, fname):
3964   """Helper for uploading a file and showing warnings.
3965
3966   """
3967   if os.path.exists(fname):
3968     result = lu.rpc.call_upload_file(nodes, fname)
3969     for to_node, to_result in result.items():
3970       msg = to_result.fail_msg
3971       if msg:
3972         msg = ("Copy of file %s to node %s failed: %s" %
3973                (fname, to_node, msg))
3974         lu.proc.LogWarning(msg)
3975
3976
3977 def _ComputeAncillaryFiles(cluster, redist):
3978   """Compute files external to Ganeti which need to be consistent.
3979
3980   @type redist: boolean
3981   @param redist: Whether to include files which need to be redistributed
3982
3983   """
3984   # Compute files for all nodes
3985   files_all = set([
3986     constants.SSH_KNOWN_HOSTS_FILE,
3987     constants.CONFD_HMAC_KEY,
3988     constants.CLUSTER_DOMAIN_SECRET_FILE,
3989     constants.SPICE_CERT_FILE,
3990     constants.SPICE_CACERT_FILE,
3991     constants.RAPI_USERS_FILE,
3992     ])
3993
3994   if not redist:
3995     files_all.update(constants.ALL_CERT_FILES)
3996     files_all.update(ssconf.SimpleStore().GetFileList())
3997   else:
3998     # we need to ship at least the RAPI certificate
3999     files_all.add(constants.RAPI_CERT_FILE)
4000
4001   if cluster.modify_etc_hosts:
4002     files_all.add(constants.ETC_HOSTS)
4003
4004   # Files which are optional, these must:
4005   # - be present in one other category as well
4006   # - either exist or not exist on all nodes of that category (mc, vm all)
4007   files_opt = set([
4008     constants.RAPI_USERS_FILE,
4009     ])
4010
4011   # Files which should only be on master candidates
4012   files_mc = set()
4013
4014   if not redist:
4015     files_mc.add(constants.CLUSTER_CONF_FILE)
4016
4017     # FIXME: this should also be replicated but Ganeti doesn't support files_mc
4018     # replication
4019     files_mc.add(constants.DEFAULT_MASTER_SETUP_SCRIPT)
4020
4021   # Files which should only be on VM-capable nodes
4022   files_vm = set(filename
4023     for hv_name in cluster.enabled_hypervisors
4024     for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[0])
4025
4026   files_opt |= set(filename
4027     for hv_name in cluster.enabled_hypervisors
4028     for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[1])
4029
4030   # Filenames in each category must be unique
4031   all_files_set = files_all | files_mc | files_vm
4032   assert (len(all_files_set) ==
4033           sum(map(len, [files_all, files_mc, files_vm]))), \
4034          "Found file listed in more than one file list"
4035
4036   # Optional files must be present in one other category
4037   assert all_files_set.issuperset(files_opt), \
4038          "Optional file not in a different required list"
4039
4040   return (files_all, files_opt, files_mc, files_vm)
4041
4042
4043 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
4044   """Distribute additional files which are part of the cluster configuration.
4045
4046   ConfigWriter takes care of distributing the config and ssconf files, but
4047   there are more files which should be distributed to all nodes. This function
4048   makes sure those are copied.
4049
4050   @param lu: calling logical unit
4051   @param additional_nodes: list of nodes not in the config to distribute to
4052   @type additional_vm: boolean
4053   @param additional_vm: whether the additional nodes are vm-capable or not
4054
4055   """
4056   # Gather target nodes
4057   cluster = lu.cfg.GetClusterInfo()
4058   master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
4059
4060   online_nodes = lu.cfg.GetOnlineNodeList()
4061   vm_nodes = lu.cfg.GetVmCapableNodeList()
4062
4063   if additional_nodes is not None:
4064     online_nodes.extend(additional_nodes)
4065     if additional_vm:
4066       vm_nodes.extend(additional_nodes)
4067
4068   # Never distribute to master node
4069   for nodelist in [online_nodes, vm_nodes]:
4070     if master_info.name in nodelist:
4071       nodelist.remove(master_info.name)
4072
4073   # Gather file lists
4074   (files_all, _, files_mc, files_vm) = \
4075     _ComputeAncillaryFiles(cluster, True)
4076
4077   # Never re-distribute configuration file from here
4078   assert not (constants.CLUSTER_CONF_FILE in files_all or
4079               constants.CLUSTER_CONF_FILE in files_vm)
4080   assert not files_mc, "Master candidates not handled in this function"
4081
4082   filemap = [
4083     (online_nodes, files_all),
4084     (vm_nodes, files_vm),
4085     ]
4086
4087   # Upload the files
4088   for (node_list, files) in filemap:
4089     for fname in files:
4090       _UploadHelper(lu, node_list, fname)
4091
4092
4093 class LUClusterRedistConf(NoHooksLU):
4094   """Force the redistribution of cluster configuration.
4095
4096   This is a very simple LU.
4097
4098   """
4099   REQ_BGL = False
4100
4101   def ExpandNames(self):
4102     self.needed_locks = {
4103       locking.LEVEL_NODE: locking.ALL_SET,
4104     }
4105     self.share_locks[locking.LEVEL_NODE] = 1
4106
4107   def Exec(self, feedback_fn):
4108     """Redistribute the configuration.
4109
4110     """
4111     self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
4112     _RedistributeAncillaryFiles(self)
4113
4114
4115 class LUClusterActivateMasterIp(NoHooksLU):
4116   """Activate the master IP on the master node.
4117
4118   """
4119   def Exec(self, feedback_fn):
4120     """Activate the master IP.
4121
4122     """
4123     master_params = self.cfg.GetMasterNetworkParameters()
4124     ems = self.cfg.GetUseExternalMipScript()
4125     result = self.rpc.call_node_activate_master_ip(master_params.name,
4126                                                    master_params, ems)
4127     result.Raise("Could not activate the master IP")
4128
4129
4130 class LUClusterDeactivateMasterIp(NoHooksLU):
4131   """Deactivate the master IP on the master node.
4132
4133   """
4134   def Exec(self, feedback_fn):
4135     """Deactivate the master IP.
4136
4137     """
4138     master_params = self.cfg.GetMasterNetworkParameters()
4139     ems = self.cfg.GetUseExternalMipScript()
4140     result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4141                                                      master_params, ems)
4142     result.Raise("Could not deactivate the master IP")
4143
4144
4145 def _WaitForSync(lu, instance, disks=None, oneshot=False):
4146   """Sleep and poll for an instance's disk to sync.
4147
4148   """
4149   if not instance.disks or disks is not None and not disks:
4150     return True
4151
4152   disks = _ExpandCheckDisks(instance, disks)
4153
4154   if not oneshot:
4155     lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
4156
4157   node = instance.primary_node
4158
4159   for dev in disks:
4160     lu.cfg.SetDiskID(dev, node)
4161
4162   # TODO: Convert to utils.Retry
4163
4164   retries = 0
4165   degr_retries = 10 # in seconds, as we sleep 1 second each time
4166   while True:
4167     max_time = 0
4168     done = True
4169     cumul_degraded = False
4170     rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
4171     msg = rstats.fail_msg
4172     if msg:
4173       lu.LogWarning("Can't get any data from node %s: %s", node, msg)
4174       retries += 1
4175       if retries >= 10:
4176         raise errors.RemoteError("Can't contact node %s for mirror data,"
4177                                  " aborting." % node)
4178       time.sleep(6)
4179       continue
4180     rstats = rstats.payload
4181     retries = 0
4182     for i, mstat in enumerate(rstats):
4183       if mstat is None:
4184         lu.LogWarning("Can't compute data for node %s/%s",
4185                            node, disks[i].iv_name)
4186         continue
4187
4188       cumul_degraded = (cumul_degraded or
4189                         (mstat.is_degraded and mstat.sync_percent is None))
4190       if mstat.sync_percent is not None:
4191         done = False
4192         if mstat.estimated_time is not None:
4193           rem_time = ("%s remaining (estimated)" %
4194                       utils.FormatSeconds(mstat.estimated_time))
4195           max_time = mstat.estimated_time
4196         else:
4197           rem_time = "no time estimate"
4198         lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
4199                         (disks[i].iv_name, mstat.sync_percent, rem_time))
4200
4201     # if we're done but degraded, let's do a few small retries, to
4202     # make sure we see a stable and not transient situation; therefore
4203     # we force restart of the loop
4204     if (done or oneshot) and cumul_degraded and degr_retries > 0:
4205       logging.info("Degraded disks found, %d retries left", degr_retries)
4206       degr_retries -= 1
4207       time.sleep(1)
4208       continue
4209
4210     if done or oneshot:
4211       break
4212
4213     time.sleep(min(60, max_time))
4214
4215   if done:
4216     lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
4217   return not cumul_degraded
4218
4219
4220 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
4221   """Check that mirrors are not degraded.
4222
4223   The ldisk parameter, if True, will change the test from the
4224   is_degraded attribute (which represents overall non-ok status for
4225   the device(s)) to the ldisk (representing the local storage status).
4226
4227   """
4228   lu.cfg.SetDiskID(dev, node)
4229
4230   result = True
4231
4232   if on_primary or dev.AssembleOnSecondary():
4233     rstats = lu.rpc.call_blockdev_find(node, dev)
4234     msg = rstats.fail_msg
4235     if msg:
4236       lu.LogWarning("Can't find disk on node %s: %s", node, msg)
4237       result = False
4238     elif not rstats.payload:
4239       lu.LogWarning("Can't find disk on node %s", node)
4240       result = False
4241     else:
4242       if ldisk:
4243         result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4244       else:
4245         result = result and not rstats.payload.is_degraded
4246
4247   if dev.children:
4248     for child in dev.children:
4249       result = result and _CheckDiskConsistency(lu, child, node, on_primary)
4250
4251   return result
4252
4253
4254 class LUOobCommand(NoHooksLU):
4255   """Logical unit for OOB handling.
4256
4257   """
4258   REG_BGL = False
4259   _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4260
4261   def ExpandNames(self):
4262     """Gather locks we need.
4263
4264     """
4265     if self.op.node_names:
4266       self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4267       lock_names = self.op.node_names
4268     else:
4269       lock_names = locking.ALL_SET
4270
4271     self.needed_locks = {
4272       locking.LEVEL_NODE: lock_names,
4273       }
4274
4275   def CheckPrereq(self):
4276     """Check prerequisites.
4277
4278     This checks:
4279      - the node exists in the configuration
4280      - OOB is supported
4281
4282     Any errors are signaled by raising errors.OpPrereqError.
4283
4284     """
4285     self.nodes = []
4286     self.master_node = self.cfg.GetMasterNode()
4287
4288     assert self.op.power_delay >= 0.0
4289
4290     if self.op.node_names:
4291       if (self.op.command in self._SKIP_MASTER and
4292           self.master_node in self.op.node_names):
4293         master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4294         master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4295
4296         if master_oob_handler:
4297           additional_text = ("run '%s %s %s' if you want to operate on the"
4298                              " master regardless") % (master_oob_handler,
4299                                                       self.op.command,
4300                                                       self.master_node)
4301         else:
4302           additional_text = "it does not support out-of-band operations"
4303
4304         raise errors.OpPrereqError(("Operating on the master node %s is not"
4305                                     " allowed for %s; %s") %
4306                                    (self.master_node, self.op.command,
4307                                     additional_text), errors.ECODE_INVAL)
4308     else:
4309       self.op.node_names = self.cfg.GetNodeList()
4310       if self.op.command in self._SKIP_MASTER:
4311         self.op.node_names.remove(self.master_node)
4312
4313     if self.op.command in self._SKIP_MASTER:
4314       assert self.master_node not in self.op.node_names
4315
4316     for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4317       if node is None:
4318         raise errors.OpPrereqError("Node %s not found" % node_name,
4319                                    errors.ECODE_NOENT)
4320       else:
4321         self.nodes.append(node)
4322
4323       if (not self.op.ignore_status and
4324           (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4325         raise errors.OpPrereqError(("Cannot power off node %s because it is"
4326                                     " not marked offline") % node_name,
4327                                    errors.ECODE_STATE)
4328
4329   def Exec(self, feedback_fn):
4330     """Execute OOB and return result if we expect any.
4331
4332     """
4333     master_node = self.master_node
4334     ret = []
4335
4336     for idx, node in enumerate(utils.NiceSort(self.nodes,
4337                                               key=lambda node: node.name)):
4338       node_entry = [(constants.RS_NORMAL, node.name)]
4339       ret.append(node_entry)
4340
4341       oob_program = _SupportsOob(self.cfg, node)
4342
4343       if not oob_program:
4344         node_entry.append((constants.RS_UNAVAIL, None))
4345         continue
4346
4347       logging.info("Executing out-of-band command '%s' using '%s' on %s",
4348                    self.op.command, oob_program, node.name)
4349       result = self.rpc.call_run_oob(master_node, oob_program,
4350                                      self.op.command, node.name,
4351                                      self.op.timeout)
4352
4353       if result.fail_msg:
4354         self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4355                         node.name, result.fail_msg)
4356         node_entry.append((constants.RS_NODATA, None))
4357       else:
4358         try:
4359           self._CheckPayload(result)
4360         except errors.OpExecError, err:
4361           self.LogWarning("Payload returned by node '%s' is not valid: %s",
4362                           node.name, err)
4363           node_entry.append((constants.RS_NODATA, None))
4364         else:
4365           if self.op.command == constants.OOB_HEALTH:
4366             # For health we should log important events
4367             for item, status in result.payload:
4368               if status in [constants.OOB_STATUS_WARNING,
4369                             constants.OOB_STATUS_CRITICAL]:
4370                 self.LogWarning("Item '%s' on node '%s' has status '%s'",
4371                                 item, node.name, status)
4372
4373           if self.op.command == constants.OOB_POWER_ON:
4374             node.powered = True
4375           elif self.op.command == constants.OOB_POWER_OFF:
4376             node.powered = False
4377           elif self.op.command == constants.OOB_POWER_STATUS:
4378             powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4379             if powered != node.powered:
4380               logging.warning(("Recorded power state (%s) of node '%s' does not"
4381                                " match actual power state (%s)"), node.powered,
4382                               node.name, powered)
4383
4384           # For configuration changing commands we should update the node
4385           if self.op.command in (constants.OOB_POWER_ON,
4386                                  constants.OOB_POWER_OFF):
4387             self.cfg.Update(node, feedback_fn)
4388
4389           node_entry.append((constants.RS_NORMAL, result.payload))
4390
4391           if (self.op.command == constants.OOB_POWER_ON and
4392               idx < len(self.nodes) - 1):
4393             time.sleep(self.op.power_delay)
4394
4395     return ret
4396
4397   def _CheckPayload(self, result):
4398     """Checks if the payload is valid.
4399
4400     @param result: RPC result
4401     @raises errors.OpExecError: If payload is not valid
4402
4403     """
4404     errs = []
4405     if self.op.command == constants.OOB_HEALTH:
4406       if not isinstance(result.payload, list):
4407         errs.append("command 'health' is expected to return a list but got %s" %
4408                     type(result.payload))
4409       else:
4410         for item, status in result.payload:
4411           if status not in constants.OOB_STATUSES:
4412             errs.append("health item '%s' has invalid status '%s'" %
4413                         (item, status))
4414
4415     if self.op.command == constants.OOB_POWER_STATUS:
4416       if not isinstance(result.payload, dict):
4417         errs.append("power-status is expected to return a dict but got %s" %
4418                     type(result.payload))
4419
4420     if self.op.command in [
4421         constants.OOB_POWER_ON,
4422         constants.OOB_POWER_OFF,
4423         constants.OOB_POWER_CYCLE,
4424         ]:
4425       if result.payload is not None:
4426         errs.append("%s is expected to not return payload but got '%s'" %
4427                     (self.op.command, result.payload))
4428
4429     if errs:
4430       raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4431                                utils.CommaJoin(errs))
4432
4433
4434 class _OsQuery(_QueryBase):
4435   FIELDS = query.OS_FIELDS
4436
4437   def ExpandNames(self, lu):
4438     # Lock all nodes in shared mode
4439     # Temporary removal of locks, should be reverted later
4440     # TODO: reintroduce locks when they are lighter-weight
4441     lu.needed_locks = {}
4442     #self.share_locks[locking.LEVEL_NODE] = 1
4443     #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4444
4445     # The following variables interact with _QueryBase._GetNames
4446     if self.names:
4447       self.wanted = self.names
4448     else:
4449       self.wanted = locking.ALL_SET
4450
4451     self.do_locking = self.use_locking
4452
4453   def DeclareLocks(self, lu, level):
4454     pass
4455
4456   @staticmethod
4457   def _DiagnoseByOS(rlist):
4458     """Remaps a per-node return list into an a per-os per-node dictionary
4459
4460     @param rlist: a map with node names as keys and OS objects as values
4461
4462     @rtype: dict
4463     @return: a dictionary with osnames as keys and as value another
4464         map, with nodes as keys and tuples of (path, status, diagnose,
4465         variants, parameters, api_versions) as values, eg::
4466
4467           {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4468                                      (/srv/..., False, "invalid api")],
4469                            "node2": [(/srv/..., True, "", [], [])]}
4470           }
4471
4472     """
4473     all_os = {}
4474     # we build here the list of nodes that didn't fail the RPC (at RPC
4475     # level), so that nodes with a non-responding node daemon don't
4476     # make all OSes invalid
4477     good_nodes = [node_name for node_name in rlist
4478                   if not rlist[node_name].fail_msg]
4479     for node_name, nr in rlist.items():
4480       if nr.fail_msg or not nr.payload:
4481         continue
4482       for (name, path, status, diagnose, variants,
4483            params, api_versions) in nr.payload:
4484         if name not in all_os:
4485           # build a list of nodes for this os containing empty lists
4486           # for each node in node_list
4487           all_os[name] = {}
4488           for nname in good_nodes:
4489             all_os[name][nname] = []
4490         # convert params from [name, help] to (name, help)
4491         params = [tuple(v) for v in params]
4492         all_os[name][node_name].append((path, status, diagnose,
4493                                         variants, params, api_versions))
4494     return all_os
4495
4496   def _GetQueryData(self, lu):
4497     """Computes the list of nodes and their attributes.
4498
4499     """
4500     # Locking is not used
4501     assert not (compat.any(lu.glm.is_owned(level)
4502                            for level in locking.LEVELS
4503                            if level != locking.LEVEL_CLUSTER) or
4504                 self.do_locking or self.use_locking)
4505
4506     valid_nodes = [node.name
4507                    for node in lu.cfg.GetAllNodesInfo().values()
4508                    if not node.offline and node.vm_capable]
4509     pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4510     cluster = lu.cfg.GetClusterInfo()
4511
4512     data = {}
4513
4514     for (os_name, os_data) in pol.items():
4515       info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4516                           hidden=(os_name in cluster.hidden_os),
4517                           blacklisted=(os_name in cluster.blacklisted_os))
4518
4519       variants = set()
4520       parameters = set()
4521       api_versions = set()
4522
4523       for idx, osl in enumerate(os_data.values()):
4524         info.valid = bool(info.valid and osl and osl[0][1])
4525         if not info.valid:
4526           break
4527
4528         (node_variants, node_params, node_api) = osl[0][3:6]
4529         if idx == 0:
4530           # First entry
4531           variants.update(node_variants)
4532           parameters.update(node_params)
4533           api_versions.update(node_api)
4534         else:
4535           # Filter out inconsistent values
4536           variants.intersection_update(node_variants)
4537           parameters.intersection_update(node_params)
4538           api_versions.intersection_update(node_api)
4539
4540       info.variants = list(variants)
4541       info.parameters = list(parameters)
4542       info.api_versions = list(api_versions)
4543
4544       data[os_name] = info
4545
4546     # Prepare data in requested order
4547     return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4548             if name in data]
4549
4550
4551 class LUOsDiagnose(NoHooksLU):
4552   """Logical unit for OS diagnose/query.
4553
4554   """
4555   REQ_BGL = False
4556
4557   @staticmethod
4558   def _BuildFilter(fields, names):
4559     """Builds a filter for querying OSes.
4560
4561     """
4562     name_filter = qlang.MakeSimpleFilter("name", names)
4563
4564     # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4565     # respective field is not requested
4566     status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4567                      for fname in ["hidden", "blacklisted"]
4568                      if fname not in fields]
4569     if "valid" not in fields:
4570       status_filter.append([qlang.OP_TRUE, "valid"])
4571
4572     if status_filter:
4573       status_filter.insert(0, qlang.OP_AND)
4574     else:
4575       status_filter = None
4576
4577     if name_filter and status_filter:
4578       return [qlang.OP_AND, name_filter, status_filter]
4579     elif name_filter:
4580       return name_filter
4581     else:
4582       return status_filter
4583
4584   def CheckArguments(self):
4585     self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4586                        self.op.output_fields, False)
4587
4588   def ExpandNames(self):
4589     self.oq.ExpandNames(self)
4590
4591   def Exec(self, feedback_fn):
4592     return self.oq.OldStyleQuery(self)
4593
4594
4595 class LUNodeRemove(LogicalUnit):
4596   """Logical unit for removing a node.
4597
4598   """
4599   HPATH = "node-remove"
4600   HTYPE = constants.HTYPE_NODE
4601
4602   def BuildHooksEnv(self):
4603     """Build hooks env.
4604
4605     This doesn't run on the target node in the pre phase as a failed
4606     node would then be impossible to remove.
4607
4608     """
4609     return {
4610       "OP_TARGET": self.op.node_name,
4611       "NODE_NAME": self.op.node_name,
4612       }
4613
4614   def BuildHooksNodes(self):
4615     """Build hooks nodes.
4616
4617     """
4618     all_nodes = self.cfg.GetNodeList()
4619     try:
4620       all_nodes.remove(self.op.node_name)
4621     except ValueError:
4622       logging.warning("Node '%s', which is about to be removed, was not found"
4623                       " in the list of all nodes", self.op.node_name)
4624     return (all_nodes, all_nodes)
4625
4626   def CheckPrereq(self):
4627     """Check prerequisites.
4628
4629     This checks:
4630      - the node exists in the configuration
4631      - it does not have primary or secondary instances
4632      - it's not the master
4633
4634     Any errors are signaled by raising errors.OpPrereqError.
4635
4636     """
4637     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4638     node = self.cfg.GetNodeInfo(self.op.node_name)
4639     assert node is not None
4640
4641     masternode = self.cfg.GetMasterNode()
4642     if node.name == masternode:
4643       raise errors.OpPrereqError("Node is the master node, failover to another"
4644                                  " node is required", errors.ECODE_INVAL)
4645
4646     for instance_name, instance in self.cfg.GetAllInstancesInfo().items():
4647       if node.name in instance.all_nodes:
4648         raise errors.OpPrereqError("Instance %s is still running on the node,"
4649                                    " please remove first" % instance_name,
4650                                    errors.ECODE_INVAL)
4651     self.op.node_name = node.name
4652     self.node = node
4653
4654   def Exec(self, feedback_fn):
4655     """Removes the node from the cluster.
4656
4657     """
4658     node = self.node
4659     logging.info("Stopping the node daemon and removing configs from node %s",
4660                  node.name)
4661
4662     modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
4663
4664     assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
4665       "Not owning BGL"
4666
4667     # Promote nodes to master candidate as needed
4668     _AdjustCandidatePool(self, exceptions=[node.name])
4669     self.context.RemoveNode(node.name)
4670
4671     # Run post hooks on the node before it's removed
4672     _RunPostHook(self, node.name)
4673
4674     result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
4675     msg = result.fail_msg
4676     if msg:
4677       self.LogWarning("Errors encountered on the remote node while leaving"
4678                       " the cluster: %s", msg)
4679
4680     # Remove node from our /etc/hosts
4681     if self.cfg.GetClusterInfo().modify_etc_hosts:
4682       master_node = self.cfg.GetMasterNode()
4683       result = self.rpc.call_etc_hosts_modify(master_node,
4684                                               constants.ETC_HOSTS_REMOVE,
4685                                               node.name, None)
4686       result.Raise("Can't update hosts file with new host data")
4687       _RedistributeAncillaryFiles(self)
4688
4689
4690 class _NodeQuery(_QueryBase):
4691   FIELDS = query.NODE_FIELDS
4692
4693   def ExpandNames(self, lu):
4694     lu.needed_locks = {}
4695     lu.share_locks = _ShareAll()
4696
4697     if self.names:
4698       self.wanted = _GetWantedNodes(lu, self.names)
4699     else:
4700       self.wanted = locking.ALL_SET
4701
4702     self.do_locking = (self.use_locking and
4703                        query.NQ_LIVE in self.requested_data)
4704
4705     if self.do_locking:
4706       # If any non-static field is requested we need to lock the nodes
4707       lu.needed_locks[locking.LEVEL_NODE] = self.wanted
4708
4709   def DeclareLocks(self, lu, level):
4710     pass
4711
4712   def _GetQueryData(self, lu):
4713     """Computes the list of nodes and their attributes.
4714
4715     """
4716     all_info = lu.cfg.GetAllNodesInfo()
4717
4718     nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
4719
4720     # Gather data as requested
4721     if query.NQ_LIVE in self.requested_data:
4722       # filter out non-vm_capable nodes
4723       toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
4724
4725       node_data = lu.rpc.call_node_info(toquery_nodes, [lu.cfg.GetVGName()],
4726                                         [lu.cfg.GetHypervisorType()])
4727       live_data = dict((name, _MakeLegacyNodeInfo(nresult.payload))
4728                        for (name, nresult) in node_data.items()
4729                        if not nresult.fail_msg and nresult.payload)
4730     else:
4731       live_data = None
4732
4733     if query.NQ_INST in self.requested_data:
4734       node_to_primary = dict([(name, set()) for name in nodenames])
4735       node_to_secondary = dict([(name, set()) for name in nodenames])
4736
4737       inst_data = lu.cfg.GetAllInstancesInfo()
4738
4739       for inst in inst_data.values():
4740         if inst.primary_node in node_to_primary:
4741           node_to_primary[inst.primary_node].add(inst.name)
4742         for secnode in inst.secondary_nodes:
4743           if secnode in node_to_secondary:
4744             node_to_secondary[secnode].add(inst.name)
4745     else:
4746       node_to_primary = None
4747       node_to_secondary = None
4748
4749     if query.NQ_OOB in self.requested_data:
4750       oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
4751                          for name, node in all_info.iteritems())
4752     else:
4753       oob_support = None
4754
4755     if query.NQ_GROUP in self.requested_data:
4756       groups = lu.cfg.GetAllNodeGroupsInfo()
4757     else:
4758       groups = {}
4759
4760     return query.NodeQueryData([all_info[name] for name in nodenames],
4761                                live_data, lu.cfg.GetMasterNode(),
4762                                node_to_primary, node_to_secondary, groups,
4763                                oob_support, lu.cfg.GetClusterInfo())
4764
4765
4766 class LUNodeQuery(NoHooksLU):
4767   """Logical unit for querying nodes.
4768
4769   """
4770   # pylint: disable=W0142
4771   REQ_BGL = False
4772
4773   def CheckArguments(self):
4774     self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
4775                          self.op.output_fields, self.op.use_locking)
4776
4777   def ExpandNames(self):
4778     self.nq.ExpandNames(self)
4779
4780   def DeclareLocks(self, level):
4781     self.nq.DeclareLocks(self, level)
4782
4783   def Exec(self, feedback_fn):
4784     return self.nq.OldStyleQuery(self)
4785
4786
4787 class LUNodeQueryvols(NoHooksLU):
4788   """Logical unit for getting volumes on node(s).
4789
4790   """
4791   REQ_BGL = False
4792   _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
4793   _FIELDS_STATIC = utils.FieldSet("node")
4794
4795   def CheckArguments(self):
4796     _CheckOutputFields(static=self._FIELDS_STATIC,
4797                        dynamic=self._FIELDS_DYNAMIC,
4798                        selected=self.op.output_fields)
4799
4800   def ExpandNames(self):
4801     self.share_locks = _ShareAll()
4802     self.needed_locks = {}
4803
4804     if not self.op.nodes:
4805       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4806     else:
4807       self.needed_locks[locking.LEVEL_NODE] = \
4808         _GetWantedNodes(self, self.op.nodes)
4809
4810   def Exec(self, feedback_fn):
4811     """Computes the list of nodes and their attributes.
4812
4813     """
4814     nodenames = self.owned_locks(locking.LEVEL_NODE)
4815     volumes = self.rpc.call_node_volumes(nodenames)
4816
4817     ilist = self.cfg.GetAllInstancesInfo()
4818     vol2inst = _MapInstanceDisksToNodes(ilist.values())
4819
4820     output = []
4821     for node in nodenames:
4822       nresult = volumes[node]
4823       if nresult.offline:
4824         continue
4825       msg = nresult.fail_msg
4826       if msg:
4827         self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
4828         continue
4829
4830       node_vols = sorted(nresult.payload,
4831                          key=operator.itemgetter("dev"))
4832
4833       for vol in node_vols:
4834         node_output = []
4835         for field in self.op.output_fields:
4836           if field == "node":
4837             val = node
4838           elif field == "phys":
4839             val = vol["dev"]
4840           elif field == "vg":
4841             val = vol["vg"]
4842           elif field == "name":
4843             val = vol["name"]
4844           elif field == "size":
4845             val = int(float(vol["size"]))
4846           elif field == "instance":
4847             val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
4848           else:
4849             raise errors.ParameterError(field)
4850           node_output.append(str(val))
4851
4852         output.append(node_output)
4853
4854     return output
4855
4856
4857 class LUNodeQueryStorage(NoHooksLU):
4858   """Logical unit for getting information on storage units on node(s).
4859
4860   """
4861   _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
4862   REQ_BGL = False
4863
4864   def CheckArguments(self):
4865     _CheckOutputFields(static=self._FIELDS_STATIC,
4866                        dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
4867                        selected=self.op.output_fields)
4868
4869   def ExpandNames(self):
4870     self.share_locks = _ShareAll()
4871     self.needed_locks = {}
4872
4873     if self.op.nodes:
4874       self.needed_locks[locking.LEVEL_NODE] = \
4875         _GetWantedNodes(self, self.op.nodes)
4876     else:
4877       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4878
4879   def Exec(self, feedback_fn):
4880     """Computes the list of nodes and their attributes.
4881
4882     """
4883     self.nodes = self.owned_locks(locking.LEVEL_NODE)
4884
4885     # Always get name to sort by
4886     if constants.SF_NAME in self.op.output_fields:
4887       fields = self.op.output_fields[:]
4888     else:
4889       fields = [constants.SF_NAME] + self.op.output_fields
4890
4891     # Never ask for node or type as it's only known to the LU
4892     for extra in [constants.SF_NODE, constants.SF_TYPE]:
4893       while extra in fields:
4894         fields.remove(extra)
4895
4896     field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
4897     name_idx = field_idx[constants.SF_NAME]
4898
4899     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4900     data = self.rpc.call_storage_list(self.nodes,
4901                                       self.op.storage_type, st_args,
4902                                       self.op.name, fields)
4903
4904     result = []
4905
4906     for node in utils.NiceSort(self.nodes):
4907       nresult = data[node]
4908       if nresult.offline:
4909         continue
4910
4911       msg = nresult.fail_msg
4912       if msg:
4913         self.LogWarning("Can't get storage data from node %s: %s", node, msg)
4914         continue
4915
4916       rows = dict([(row[name_idx], row) for row in nresult.payload])
4917
4918       for name in utils.NiceSort(rows.keys()):
4919         row = rows[name]
4920
4921         out = []
4922
4923         for field in self.op.output_fields:
4924           if field == constants.SF_NODE:
4925             val = node
4926           elif field == constants.SF_TYPE:
4927             val = self.op.storage_type
4928           elif field in field_idx:
4929             val = row[field_idx[field]]
4930           else:
4931             raise errors.ParameterError(field)
4932
4933           out.append(val)
4934
4935         result.append(out)
4936
4937     return result
4938
4939
4940 class _InstanceQuery(_QueryBase):
4941   FIELDS = query.INSTANCE_FIELDS
4942
4943   def ExpandNames(self, lu):
4944     lu.needed_locks = {}
4945     lu.share_locks = _ShareAll()
4946
4947     if self.names:
4948       self.wanted = _GetWantedInstances(lu, self.names)
4949     else:
4950       self.wanted = locking.ALL_SET
4951
4952     self.do_locking = (self.use_locking and
4953                        query.IQ_LIVE in self.requested_data)
4954     if self.do_locking:
4955       lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4956       lu.needed_locks[locking.LEVEL_NODEGROUP] = []
4957       lu.needed_locks[locking.LEVEL_NODE] = []
4958       lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4959
4960     self.do_grouplocks = (self.do_locking and
4961                           query.IQ_NODES in self.requested_data)
4962
4963   def DeclareLocks(self, lu, level):
4964     if self.do_locking:
4965       if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
4966         assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
4967
4968         # Lock all groups used by instances optimistically; this requires going
4969         # via the node before it's locked, requiring verification later on
4970         lu.needed_locks[locking.LEVEL_NODEGROUP] = \
4971           set(group_uuid
4972               for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
4973               for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
4974       elif level == locking.LEVEL_NODE:
4975         lu._LockInstancesNodes() # pylint: disable=W0212
4976
4977   @staticmethod
4978   def _CheckGroupLocks(lu):
4979     owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
4980     owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
4981
4982     # Check if node groups for locked instances are still correct
4983     for instance_name in owned_instances:
4984       _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
4985
4986   def _GetQueryData(self, lu):
4987     """Computes the list of instances and their attributes.
4988
4989     """
4990     if self.do_grouplocks:
4991       self._CheckGroupLocks(lu)
4992
4993     cluster = lu.cfg.GetClusterInfo()
4994     all_info = lu.cfg.GetAllInstancesInfo()
4995
4996     instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
4997
4998     instance_list = [all_info[name] for name in instance_names]
4999     nodes = frozenset(itertools.chain(*(inst.all_nodes
5000                                         for inst in instance_list)))
5001     hv_list = list(set([inst.hypervisor for inst in instance_list]))
5002     bad_nodes = []
5003     offline_nodes = []
5004     wrongnode_inst = set()
5005
5006     # Gather data as requested
5007     if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
5008       live_data = {}
5009       node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
5010       for name in nodes:
5011         result = node_data[name]
5012         if result.offline:
5013           # offline nodes will be in both lists
5014           assert result.fail_msg
5015           offline_nodes.append(name)
5016         if result.fail_msg:
5017           bad_nodes.append(name)
5018         elif result.payload:
5019           for inst in result.payload:
5020             if inst in all_info:
5021               if all_info[inst].primary_node == name:
5022                 live_data.update(result.payload)
5023               else:
5024                 wrongnode_inst.add(inst)
5025             else:
5026               # orphan instance; we don't list it here as we don't
5027               # handle this case yet in the output of instance listing
5028               logging.warning("Orphan instance '%s' found on node %s",
5029                               inst, name)
5030         # else no instance is alive
5031     else:
5032       live_data = {}
5033
5034     if query.IQ_DISKUSAGE in self.requested_data:
5035       disk_usage = dict((inst.name,
5036                          _ComputeDiskSize(inst.disk_template,
5037                                           [{constants.IDISK_SIZE: disk.size}
5038                                            for disk in inst.disks]))
5039                         for inst in instance_list)
5040     else:
5041       disk_usage = None
5042
5043     if query.IQ_CONSOLE in self.requested_data:
5044       consinfo = {}
5045       for inst in instance_list:
5046         if inst.name in live_data:
5047           # Instance is running
5048           consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
5049         else:
5050           consinfo[inst.name] = None
5051       assert set(consinfo.keys()) == set(instance_names)
5052     else:
5053       consinfo = None
5054
5055     if query.IQ_NODES in self.requested_data:
5056       node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
5057                                             instance_list)))
5058       nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
5059       groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
5060                     for uuid in set(map(operator.attrgetter("group"),
5061                                         nodes.values())))
5062     else:
5063       nodes = None
5064       groups = None
5065
5066     return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
5067                                    disk_usage, offline_nodes, bad_nodes,
5068                                    live_data, wrongnode_inst, consinfo,
5069                                    nodes, groups)
5070
5071
5072 class LUQuery(NoHooksLU):
5073   """Query for resources/items of a certain kind.
5074
5075   """
5076   # pylint: disable=W0142
5077   REQ_BGL = False
5078
5079   def CheckArguments(self):
5080     qcls = _GetQueryImplementation(self.op.what)
5081
5082     self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
5083
5084   def ExpandNames(self):
5085     self.impl.ExpandNames(self)
5086
5087   def DeclareLocks(self, level):
5088     self.impl.DeclareLocks(self, level)
5089
5090   def Exec(self, feedback_fn):
5091     return self.impl.NewStyleQuery(self)
5092
5093
5094 class LUQueryFields(NoHooksLU):
5095   """Query for resources/items of a certain kind.
5096
5097   """
5098   # pylint: disable=W0142
5099   REQ_BGL = False
5100
5101   def CheckArguments(self):
5102     self.qcls = _GetQueryImplementation(self.op.what)
5103
5104   def ExpandNames(self):
5105     self.needed_locks = {}
5106
5107   def Exec(self, feedback_fn):
5108     return query.QueryFields(self.qcls.FIELDS, self.op.fields)
5109
5110
5111 class LUNodeModifyStorage(NoHooksLU):
5112   """Logical unit for modifying a storage volume on a node.
5113
5114   """
5115   REQ_BGL = False
5116
5117   def CheckArguments(self):
5118     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5119
5120     storage_type = self.op.storage_type
5121
5122     try:
5123       modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
5124     except KeyError:
5125       raise errors.OpPrereqError("Storage units of type '%s' can not be"
5126                                  " modified" % storage_type,
5127                                  errors.ECODE_INVAL)
5128
5129     diff = set(self.op.changes.keys()) - modifiable
5130     if diff:
5131       raise errors.OpPrereqError("The following fields can not be modified for"
5132                                  " storage units of type '%s': %r" %
5133                                  (storage_type, list(diff)),
5134                                  errors.ECODE_INVAL)
5135
5136   def ExpandNames(self):
5137     self.needed_locks = {
5138       locking.LEVEL_NODE: self.op.node_name,
5139       }
5140
5141   def Exec(self, feedback_fn):
5142     """Computes the list of nodes and their attributes.
5143
5144     """
5145     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5146     result = self.rpc.call_storage_modify(self.op.node_name,
5147                                           self.op.storage_type, st_args,
5148                                           self.op.name, self.op.changes)
5149     result.Raise("Failed to modify storage unit '%s' on %s" %
5150                  (self.op.name, self.op.node_name))
5151
5152
5153 class LUNodeAdd(LogicalUnit):
5154   """Logical unit for adding node to the cluster.
5155
5156   """
5157   HPATH = "node-add"
5158   HTYPE = constants.HTYPE_NODE
5159   _NFLAGS = ["master_capable", "vm_capable"]
5160
5161   def CheckArguments(self):
5162     self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
5163     # validate/normalize the node name
5164     self.hostname = netutils.GetHostname(name=self.op.node_name,
5165                                          family=self.primary_ip_family)
5166     self.op.node_name = self.hostname.name
5167
5168     if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
5169       raise errors.OpPrereqError("Cannot readd the master node",
5170                                  errors.ECODE_STATE)
5171
5172     if self.op.readd and self.op.group:
5173       raise errors.OpPrereqError("Cannot pass a node group when a node is"
5174                                  " being readded", errors.ECODE_INVAL)
5175
5176   def BuildHooksEnv(self):
5177     """Build hooks env.
5178
5179     This will run on all nodes before, and on all nodes + the new node after.
5180
5181     """
5182     return {
5183       "OP_TARGET": self.op.node_name,
5184       "NODE_NAME": self.op.node_name,
5185       "NODE_PIP": self.op.primary_ip,
5186       "NODE_SIP": self.op.secondary_ip,
5187       "MASTER_CAPABLE": str(self.op.master_capable),
5188       "VM_CAPABLE": str(self.op.vm_capable),
5189       }
5190
5191   def BuildHooksNodes(self):
5192     """Build hooks nodes.
5193
5194     """
5195     # Exclude added node
5196     pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
5197     post_nodes = pre_nodes + [self.op.node_name, ]
5198
5199     return (pre_nodes, post_nodes)
5200
5201   def CheckPrereq(self):
5202     """Check prerequisites.
5203
5204     This checks:
5205      - the new node is not already in the config
5206      - it is resolvable
5207      - its parameters (single/dual homed) matches the cluster
5208
5209     Any errors are signaled by raising errors.OpPrereqError.
5210
5211     """
5212     cfg = self.cfg
5213     hostname = self.hostname
5214     node = hostname.name
5215     primary_ip = self.op.primary_ip = hostname.ip
5216     if self.op.secondary_ip is None:
5217       if self.primary_ip_family == netutils.IP6Address.family:
5218         raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
5219                                    " IPv4 address must be given as secondary",
5220                                    errors.ECODE_INVAL)
5221       self.op.secondary_ip = primary_ip
5222
5223     secondary_ip = self.op.secondary_ip
5224     if not netutils.IP4Address.IsValid(secondary_ip):
5225       raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5226                                  " address" % secondary_ip, errors.ECODE_INVAL)
5227
5228     node_list = cfg.GetNodeList()
5229     if not self.op.readd and node in node_list:
5230       raise errors.OpPrereqError("Node %s is already in the configuration" %
5231                                  node, errors.ECODE_EXISTS)
5232     elif self.op.readd and node not in node_list:
5233       raise errors.OpPrereqError("Node %s is not in the configuration" % node,
5234                                  errors.ECODE_NOENT)
5235
5236     self.changed_primary_ip = False
5237
5238     for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
5239       if self.op.readd and node == existing_node_name:
5240         if existing_node.secondary_ip != secondary_ip:
5241           raise errors.OpPrereqError("Readded node doesn't have the same IP"
5242                                      " address configuration as before",
5243                                      errors.ECODE_INVAL)
5244         if existing_node.primary_ip != primary_ip:
5245           self.changed_primary_ip = True
5246
5247         continue
5248
5249       if (existing_node.primary_ip == primary_ip or
5250           existing_node.secondary_ip == primary_ip or
5251           existing_node.primary_ip == secondary_ip or
5252           existing_node.secondary_ip == secondary_ip):
5253         raise errors.OpPrereqError("New node ip address(es) conflict with"
5254                                    " existing node %s" % existing_node.name,
5255                                    errors.ECODE_NOTUNIQUE)
5256
5257     # After this 'if' block, None is no longer a valid value for the
5258     # _capable op attributes
5259     if self.op.readd:
5260       old_node = self.cfg.GetNodeInfo(node)
5261       assert old_node is not None, "Can't retrieve locked node %s" % node
5262       for attr in self._NFLAGS:
5263         if getattr(self.op, attr) is None:
5264           setattr(self.op, attr, getattr(old_node, attr))
5265     else:
5266       for attr in self._NFLAGS:
5267         if getattr(self.op, attr) is None:
5268           setattr(self.op, attr, True)
5269
5270     if self.op.readd and not self.op.vm_capable:
5271       pri, sec = cfg.GetNodeInstances(node)
5272       if pri or sec:
5273         raise errors.OpPrereqError("Node %s being re-added with vm_capable"
5274                                    " flag set to false, but it already holds"
5275                                    " instances" % node,
5276                                    errors.ECODE_STATE)
5277
5278     # check that the type of the node (single versus dual homed) is the
5279     # same as for the master
5280     myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
5281     master_singlehomed = myself.secondary_ip == myself.primary_ip
5282     newbie_singlehomed = secondary_ip == primary_ip
5283     if master_singlehomed != newbie_singlehomed:
5284       if master_singlehomed:
5285         raise errors.OpPrereqError("The master has no secondary ip but the"
5286                                    " new node has one",
5287                                    errors.ECODE_INVAL)
5288       else:
5289         raise errors.OpPrereqError("The master has a secondary ip but the"
5290                                    " new node doesn't have one",
5291                                    errors.ECODE_INVAL)
5292
5293     # checks reachability
5294     if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
5295       raise errors.OpPrereqError("Node not reachable by ping",
5296                                  errors.ECODE_ENVIRON)
5297
5298     if not newbie_singlehomed:
5299       # check reachability from my secondary ip to newbie's secondary ip
5300       if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
5301                            source=myself.secondary_ip):
5302         raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5303                                    " based ping to node daemon port",
5304                                    errors.ECODE_ENVIRON)
5305
5306     if self.op.readd:
5307       exceptions = [node]
5308     else:
5309       exceptions = []
5310
5311     if self.op.master_capable:
5312       self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5313     else:
5314       self.master_candidate = False
5315
5316     if self.op.readd:
5317       self.new_node = old_node
5318     else:
5319       node_group = cfg.LookupNodeGroup(self.op.group)
5320       self.new_node = objects.Node(name=node,
5321                                    primary_ip=primary_ip,
5322                                    secondary_ip=secondary_ip,
5323                                    master_candidate=self.master_candidate,
5324                                    offline=False, drained=False,
5325                                    group=node_group)
5326
5327     if self.op.ndparams:
5328       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5329
5330   def Exec(self, feedback_fn):
5331     """Adds the new node to the cluster.
5332
5333     """
5334     new_node = self.new_node
5335     node = new_node.name
5336
5337     assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5338       "Not owning BGL"
5339
5340     # We adding a new node so we assume it's powered
5341     new_node.powered = True
5342
5343     # for re-adds, reset the offline/drained/master-candidate flags;
5344     # we need to reset here, otherwise offline would prevent RPC calls
5345     # later in the procedure; this also means that if the re-add
5346     # fails, we are left with a non-offlined, broken node
5347     if self.op.readd:
5348       new_node.drained = new_node.offline = False # pylint: disable=W0201
5349       self.LogInfo("Readding a node, the offline/drained flags were reset")
5350       # if we demote the node, we do cleanup later in the procedure
5351       new_node.master_candidate = self.master_candidate
5352       if self.changed_primary_ip:
5353         new_node.primary_ip = self.op.primary_ip
5354
5355     # copy the master/vm_capable flags
5356     for attr in self._NFLAGS:
5357       setattr(new_node, attr, getattr(self.op, attr))
5358
5359     # notify the user about any possible mc promotion
5360     if new_node.master_candidate:
5361       self.LogInfo("Node will be a master candidate")
5362
5363     if self.op.ndparams:
5364       new_node.ndparams = self.op.ndparams
5365     else:
5366       new_node.ndparams = {}
5367
5368     # check connectivity
5369     result = self.rpc.call_version([node])[node]
5370     result.Raise("Can't get version information from node %s" % node)
5371     if constants.PROTOCOL_VERSION == result.payload:
5372       logging.info("Communication to node %s fine, sw version %s match",
5373                    node, result.payload)
5374     else:
5375       raise errors.OpExecError("Version mismatch master version %s,"
5376                                " node version %s" %
5377                                (constants.PROTOCOL_VERSION, result.payload))
5378
5379     # Add node to our /etc/hosts, and add key to known_hosts
5380     if self.cfg.GetClusterInfo().modify_etc_hosts:
5381       master_node = self.cfg.GetMasterNode()
5382       result = self.rpc.call_etc_hosts_modify(master_node,
5383                                               constants.ETC_HOSTS_ADD,
5384                                               self.hostname.name,
5385                                               self.hostname.ip)
5386       result.Raise("Can't update hosts file with new host data")
5387
5388     if new_node.secondary_ip != new_node.primary_ip:
5389       _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5390                                False)
5391
5392     node_verify_list = [self.cfg.GetMasterNode()]
5393     node_verify_param = {
5394       constants.NV_NODELIST: ([node], {}),
5395       # TODO: do a node-net-test as well?
5396     }
5397
5398     result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5399                                        self.cfg.GetClusterName())
5400     for verifier in node_verify_list:
5401       result[verifier].Raise("Cannot communicate with node %s" % verifier)
5402       nl_payload = result[verifier].payload[constants.NV_NODELIST]
5403       if nl_payload:
5404         for failed in nl_payload:
5405           feedback_fn("ssh/hostname verification failed"
5406                       " (checking from %s): %s" %
5407                       (verifier, nl_payload[failed]))
5408         raise errors.OpExecError("ssh/hostname verification failed")
5409
5410     if self.op.readd:
5411       _RedistributeAncillaryFiles(self)
5412       self.context.ReaddNode(new_node)
5413       # make sure we redistribute the config
5414       self.cfg.Update(new_node, feedback_fn)
5415       # and make sure the new node will not have old files around
5416       if not new_node.master_candidate:
5417         result = self.rpc.call_node_demote_from_mc(new_node.name)
5418         msg = result.fail_msg
5419         if msg:
5420           self.LogWarning("Node failed to demote itself from master"
5421                           " candidate status: %s" % msg)
5422     else:
5423       _RedistributeAncillaryFiles(self, additional_nodes=[node],
5424                                   additional_vm=self.op.vm_capable)
5425       self.context.AddNode(new_node, self.proc.GetECId())
5426
5427
5428 class LUNodeSetParams(LogicalUnit):
5429   """Modifies the parameters of a node.
5430
5431   @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5432       to the node role (as _ROLE_*)
5433   @cvar _R2F: a dictionary from node role to tuples of flags
5434   @cvar _FLAGS: a list of attribute names corresponding to the flags
5435
5436   """
5437   HPATH = "node-modify"
5438   HTYPE = constants.HTYPE_NODE
5439   REQ_BGL = False
5440   (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5441   _F2R = {
5442     (True, False, False): _ROLE_CANDIDATE,
5443     (False, True, False): _ROLE_DRAINED,
5444     (False, False, True): _ROLE_OFFLINE,
5445     (False, False, False): _ROLE_REGULAR,
5446     }
5447   _R2F = dict((v, k) for k, v in _F2R.items())
5448   _FLAGS = ["master_candidate", "drained", "offline"]
5449
5450   def CheckArguments(self):
5451     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5452     all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5453                 self.op.master_capable, self.op.vm_capable,
5454                 self.op.secondary_ip, self.op.ndparams, self.op.hv_state,
5455                 self.op.disk_state]
5456     if all_mods.count(None) == len(all_mods):
5457       raise errors.OpPrereqError("Please pass at least one modification",
5458                                  errors.ECODE_INVAL)
5459     if all_mods.count(True) > 1:
5460       raise errors.OpPrereqError("Can't set the node into more than one"
5461                                  " state at the same time",
5462                                  errors.ECODE_INVAL)
5463
5464     # Boolean value that tells us whether we might be demoting from MC
5465     self.might_demote = (self.op.master_candidate == False or
5466                          self.op.offline == True or
5467                          self.op.drained == True or
5468                          self.op.master_capable == False)
5469
5470     if self.op.secondary_ip:
5471       if not netutils.IP4Address.IsValid(self.op.secondary_ip):
5472         raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5473                                    " address" % self.op.secondary_ip,
5474                                    errors.ECODE_INVAL)
5475
5476     self.lock_all = self.op.auto_promote and self.might_demote
5477     self.lock_instances = self.op.secondary_ip is not None
5478
5479   def _InstanceFilter(self, instance):
5480     """Filter for getting affected instances.
5481
5482     """
5483     return (instance.disk_template in constants.DTS_INT_MIRROR and
5484             self.op.node_name in instance.all_nodes)
5485
5486   def ExpandNames(self):
5487     if self.lock_all:
5488       self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
5489     else:
5490       self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
5491
5492     # Since modifying a node can have severe effects on currently running
5493     # operations the resource lock is at least acquired in shared mode
5494     self.needed_locks[locking.LEVEL_NODE_RES] = \
5495       self.needed_locks[locking.LEVEL_NODE]
5496
5497     # Get node resource and instance locks in shared mode; they are not used
5498     # for anything but read-only access
5499     self.share_locks[locking.LEVEL_NODE_RES] = 1
5500     self.share_locks[locking.LEVEL_INSTANCE] = 1
5501
5502     if self.lock_instances:
5503       self.needed_locks[locking.LEVEL_INSTANCE] = \
5504         frozenset(self.cfg.GetInstancesInfoByFilter(self._InstanceFilter))
5505
5506   def BuildHooksEnv(self):
5507     """Build hooks env.
5508
5509     This runs on the master node.
5510
5511     """
5512     return {
5513       "OP_TARGET": self.op.node_name,
5514       "MASTER_CANDIDATE": str(self.op.master_candidate),
5515       "OFFLINE": str(self.op.offline),
5516       "DRAINED": str(self.op.drained),
5517       "MASTER_CAPABLE": str(self.op.master_capable),
5518       "VM_CAPABLE": str(self.op.vm_capable),
5519       }
5520
5521   def BuildHooksNodes(self):
5522     """Build hooks nodes.
5523
5524     """
5525     nl = [self.cfg.GetMasterNode(), self.op.node_name]
5526     return (nl, nl)
5527
5528   def CheckPrereq(self):
5529     """Check prerequisites.
5530
5531     This only checks the instance list against the existing names.
5532
5533     """
5534     node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5535
5536     if self.lock_instances:
5537       affected_instances = \
5538         self.cfg.GetInstancesInfoByFilter(self._InstanceFilter)
5539
5540       # Verify instance locks
5541       owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
5542       wanted_instances = frozenset(affected_instances.keys())
5543       if wanted_instances - owned_instances:
5544         raise errors.OpPrereqError("Instances affected by changing node %s's"
5545                                    " secondary IP address have changed since"
5546                                    " locks were acquired, wanted '%s', have"
5547                                    " '%s'; retry the operation" %
5548                                    (self.op.node_name,
5549                                     utils.CommaJoin(wanted_instances),
5550                                     utils.CommaJoin(owned_instances)),
5551                                    errors.ECODE_STATE)
5552     else:
5553       affected_instances = None
5554
5555     if (self.op.master_candidate is not None or
5556         self.op.drained is not None or
5557         self.op.offline is not None):
5558       # we can't change the master's node flags
5559       if self.op.node_name == self.cfg.GetMasterNode():
5560         raise errors.OpPrereqError("The master role can be changed"
5561                                    " only via master-failover",
5562                                    errors.ECODE_INVAL)
5563
5564     if self.op.master_candidate and not node.master_capable:
5565       raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5566                                  " it a master candidate" % node.name,
5567                                  errors.ECODE_STATE)
5568
5569     if self.op.vm_capable == False:
5570       (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5571       if ipri or isec:
5572         raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5573                                    " the vm_capable flag" % node.name,
5574                                    errors.ECODE_STATE)
5575
5576     if node.master_candidate and self.might_demote and not self.lock_all:
5577       assert not self.op.auto_promote, "auto_promote set but lock_all not"
5578       # check if after removing the current node, we're missing master
5579       # candidates
5580       (mc_remaining, mc_should, _) = \
5581           self.cfg.GetMasterCandidateStats(exceptions=[node.name])
5582       if mc_remaining < mc_should:
5583         raise errors.OpPrereqError("Not enough master candidates, please"
5584                                    " pass auto promote option to allow"
5585                                    " promotion", errors.ECODE_STATE)
5586
5587     self.old_flags = old_flags = (node.master_candidate,
5588                                   node.drained, node.offline)
5589     assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
5590     self.old_role = old_role = self._F2R[old_flags]
5591
5592     # Check for ineffective changes
5593     for attr in self._FLAGS:
5594       if (getattr(self.op, attr) == False and getattr(node, attr) == False):
5595         self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
5596         setattr(self.op, attr, None)
5597
5598     # Past this point, any flag change to False means a transition
5599     # away from the respective state, as only real changes are kept
5600
5601     # TODO: We might query the real power state if it supports OOB
5602     if _SupportsOob(self.cfg, node):
5603       if self.op.offline is False and not (node.powered or
5604                                            self.op.powered == True):
5605         raise errors.OpPrereqError(("Node %s needs to be turned on before its"
5606                                     " offline status can be reset") %
5607                                    self.op.node_name)
5608     elif self.op.powered is not None:
5609       raise errors.OpPrereqError(("Unable to change powered state for node %s"
5610                                   " as it does not support out-of-band"
5611                                   " handling") % self.op.node_name)
5612
5613     # If we're being deofflined/drained, we'll MC ourself if needed
5614     if (self.op.drained == False or self.op.offline == False or
5615         (self.op.master_capable and not node.master_capable)):
5616       if _DecideSelfPromotion(self):
5617         self.op.master_candidate = True
5618         self.LogInfo("Auto-promoting node to master candidate")
5619
5620     # If we're no longer master capable, we'll demote ourselves from MC
5621     if self.op.master_capable == False and node.master_candidate:
5622       self.LogInfo("Demoting from master candidate")
5623       self.op.master_candidate = False
5624
5625     # Compute new role
5626     assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5627     if self.op.master_candidate:
5628       new_role = self._ROLE_CANDIDATE
5629     elif self.op.drained:
5630       new_role = self._ROLE_DRAINED
5631     elif self.op.offline:
5632       new_role = self._ROLE_OFFLINE
5633     elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5634       # False is still in new flags, which means we're un-setting (the
5635       # only) True flag
5636       new_role = self._ROLE_REGULAR
5637     else: # no new flags, nothing, keep old role
5638       new_role = old_role
5639
5640     self.new_role = new_role
5641
5642     if old_role == self._ROLE_OFFLINE and new_role != old_role:
5643       # Trying to transition out of offline status
5644       # TODO: Use standard RPC runner, but make sure it works when the node is
5645       # still marked offline
5646       result = rpc.BootstrapRunner().call_version([node.name])[node.name]
5647       if result.fail_msg:
5648         raise errors.OpPrereqError("Node %s is being de-offlined but fails"
5649                                    " to report its version: %s" %
5650                                    (node.name, result.fail_msg),
5651                                    errors.ECODE_STATE)
5652       else:
5653         self.LogWarning("Transitioning node from offline to online state"
5654                         " without using re-add. Please make sure the node"
5655                         " is healthy!")
5656
5657     if self.op.secondary_ip:
5658       # Ok even without locking, because this can't be changed by any LU
5659       master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
5660       master_singlehomed = master.secondary_ip == master.primary_ip
5661       if master_singlehomed and self.op.secondary_ip:
5662         raise errors.OpPrereqError("Cannot change the secondary ip on a single"
5663                                    " homed cluster", errors.ECODE_INVAL)
5664
5665       assert not (frozenset(affected_instances) -
5666                   self.owned_locks(locking.LEVEL_INSTANCE))
5667
5668       if node.offline:
5669         if affected_instances:
5670           raise errors.OpPrereqError("Cannot change secondary IP address:"
5671                                      " offline node has instances (%s)"
5672                                      " configured to use it" %
5673                                      utils.CommaJoin(affected_instances.keys()))
5674       else:
5675         # On online nodes, check that no instances are running, and that
5676         # the node has the new ip and we can reach it.
5677         for instance in affected_instances.values():
5678           _CheckInstanceState(self, instance, INSTANCE_DOWN,
5679                               msg="cannot change secondary ip")
5680
5681         _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
5682         if master.name != node.name:
5683           # check reachability from master secondary ip to new secondary ip
5684           if not netutils.TcpPing(self.op.secondary_ip,
5685                                   constants.DEFAULT_NODED_PORT,
5686                                   source=master.secondary_ip):
5687             raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5688                                        " based ping to node daemon port",
5689                                        errors.ECODE_ENVIRON)
5690
5691     if self.op.ndparams:
5692       new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
5693       utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
5694       self.new_ndparams = new_ndparams
5695
5696     if self.op.hv_state:
5697       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
5698                                                  self.node.hv_state_static)
5699
5700     if self.op.disk_state:
5701       self.new_disk_state = \
5702         _MergeAndVerifyDiskState(self.op.disk_state,
5703                                  self.node.disk_state_static)
5704
5705   def Exec(self, feedback_fn):
5706     """Modifies a node.
5707
5708     """
5709     node = self.node
5710     old_role = self.old_role
5711     new_role = self.new_role
5712
5713     result = []
5714
5715     if self.op.ndparams:
5716       node.ndparams = self.new_ndparams
5717
5718     if self.op.powered is not None:
5719       node.powered = self.op.powered
5720
5721     if self.op.hv_state:
5722       node.hv_state_static = self.new_hv_state
5723
5724     if self.op.disk_state:
5725       node.disk_state_static = self.new_disk_state
5726
5727     for attr in ["master_capable", "vm_capable"]:
5728       val = getattr(self.op, attr)
5729       if val is not None:
5730         setattr(node, attr, val)
5731         result.append((attr, str(val)))
5732
5733     if new_role != old_role:
5734       # Tell the node to demote itself, if no longer MC and not offline
5735       if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
5736         msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
5737         if msg:
5738           self.LogWarning("Node failed to demote itself: %s", msg)
5739
5740       new_flags = self._R2F[new_role]
5741       for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
5742         if of != nf:
5743           result.append((desc, str(nf)))
5744       (node.master_candidate, node.drained, node.offline) = new_flags
5745
5746       # we locked all nodes, we adjust the CP before updating this node
5747       if self.lock_all:
5748         _AdjustCandidatePool(self, [node.name])
5749
5750     if self.op.secondary_ip:
5751       node.secondary_ip = self.op.secondary_ip
5752       result.append(("secondary_ip", self.op.secondary_ip))
5753
5754     # this will trigger configuration file update, if needed
5755     self.cfg.Update(node, feedback_fn)
5756
5757     # this will trigger job queue propagation or cleanup if the mc
5758     # flag changed
5759     if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
5760       self.context.ReaddNode(node)
5761
5762     return result
5763
5764
5765 class LUNodePowercycle(NoHooksLU):
5766   """Powercycles a node.
5767
5768   """
5769   REQ_BGL = False
5770
5771   def CheckArguments(self):
5772     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5773     if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
5774       raise errors.OpPrereqError("The node is the master and the force"
5775                                  " parameter was not set",
5776                                  errors.ECODE_INVAL)
5777
5778   def ExpandNames(self):
5779     """Locking for PowercycleNode.
5780
5781     This is a last-resort option and shouldn't block on other
5782     jobs. Therefore, we grab no locks.
5783
5784     """
5785     self.needed_locks = {}
5786
5787   def Exec(self, feedback_fn):
5788     """Reboots a node.
5789
5790     """
5791     result = self.rpc.call_node_powercycle(self.op.node_name,
5792                                            self.cfg.GetHypervisorType())
5793     result.Raise("Failed to schedule the reboot")
5794     return result.payload
5795
5796
5797 class LUClusterQuery(NoHooksLU):
5798   """Query cluster configuration.
5799
5800   """
5801   REQ_BGL = False
5802
5803   def ExpandNames(self):
5804     self.needed_locks = {}
5805
5806   def Exec(self, feedback_fn):
5807     """Return cluster config.
5808
5809     """
5810     cluster = self.cfg.GetClusterInfo()
5811     os_hvp = {}
5812
5813     # Filter just for enabled hypervisors
5814     for os_name, hv_dict in cluster.os_hvp.items():
5815       os_hvp[os_name] = {}
5816       for hv_name, hv_params in hv_dict.items():
5817         if hv_name in cluster.enabled_hypervisors:
5818           os_hvp[os_name][hv_name] = hv_params
5819
5820     # Convert ip_family to ip_version
5821     primary_ip_version = constants.IP4_VERSION
5822     if cluster.primary_ip_family == netutils.IP6Address.family:
5823       primary_ip_version = constants.IP6_VERSION
5824
5825     result = {
5826       "software_version": constants.RELEASE_VERSION,
5827       "protocol_version": constants.PROTOCOL_VERSION,
5828       "config_version": constants.CONFIG_VERSION,
5829       "os_api_version": max(constants.OS_API_VERSIONS),
5830       "export_version": constants.EXPORT_VERSION,
5831       "architecture": (platform.architecture()[0], platform.machine()),
5832       "name": cluster.cluster_name,
5833       "master": cluster.master_node,
5834       "default_hypervisor": cluster.primary_hypervisor,
5835       "enabled_hypervisors": cluster.enabled_hypervisors,
5836       "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
5837                         for hypervisor_name in cluster.enabled_hypervisors]),
5838       "os_hvp": os_hvp,
5839       "beparams": cluster.beparams,
5840       "osparams": cluster.osparams,
5841       "ipolicy": cluster.ipolicy,
5842       "nicparams": cluster.nicparams,
5843       "ndparams": cluster.ndparams,
5844       "candidate_pool_size": cluster.candidate_pool_size,
5845       "master_netdev": cluster.master_netdev,
5846       "master_netmask": cluster.master_netmask,
5847       "use_external_mip_script": cluster.use_external_mip_script,
5848       "volume_group_name": cluster.volume_group_name,
5849       "drbd_usermode_helper": cluster.drbd_usermode_helper,
5850       "file_storage_dir": cluster.file_storage_dir,
5851       "shared_file_storage_dir": cluster.shared_file_storage_dir,
5852       "maintain_node_health": cluster.maintain_node_health,
5853       "ctime": cluster.ctime,
5854       "mtime": cluster.mtime,
5855       "uuid": cluster.uuid,
5856       "tags": list(cluster.GetTags()),
5857       "uid_pool": cluster.uid_pool,
5858       "default_iallocator": cluster.default_iallocator,
5859       "reserved_lvs": cluster.reserved_lvs,
5860       "primary_ip_version": primary_ip_version,
5861       "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
5862       "hidden_os": cluster.hidden_os,
5863       "blacklisted_os": cluster.blacklisted_os,
5864       }
5865
5866     return result
5867
5868
5869 class LUClusterConfigQuery(NoHooksLU):
5870   """Return configuration values.
5871
5872   """
5873   REQ_BGL = False
5874   _FIELDS_DYNAMIC = utils.FieldSet()
5875   _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
5876                                   "watcher_pause", "volume_group_name")
5877
5878   def CheckArguments(self):
5879     _CheckOutputFields(static=self._FIELDS_STATIC,
5880                        dynamic=self._FIELDS_DYNAMIC,
5881                        selected=self.op.output_fields)
5882
5883   def ExpandNames(self):
5884     self.needed_locks = {}
5885
5886   def Exec(self, feedback_fn):
5887     """Dump a representation of the cluster config to the standard output.
5888
5889     """
5890     values = []
5891     for field in self.op.output_fields:
5892       if field == "cluster_name":
5893         entry = self.cfg.GetClusterName()
5894       elif field == "master_node":
5895         entry = self.cfg.GetMasterNode()
5896       elif field == "drain_flag":
5897         entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
5898       elif field == "watcher_pause":
5899         entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
5900       elif field == "volume_group_name":
5901         entry = self.cfg.GetVGName()
5902       else:
5903         raise errors.ParameterError(field)
5904       values.append(entry)
5905     return values
5906
5907
5908 class LUInstanceActivateDisks(NoHooksLU):
5909   """Bring up an instance's disks.
5910
5911   """
5912   REQ_BGL = False
5913
5914   def ExpandNames(self):
5915     self._ExpandAndLockInstance()
5916     self.needed_locks[locking.LEVEL_NODE] = []
5917     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5918
5919   def DeclareLocks(self, level):
5920     if level == locking.LEVEL_NODE:
5921       self._LockInstancesNodes()
5922
5923   def CheckPrereq(self):
5924     """Check prerequisites.
5925
5926     This checks that the instance is in the cluster.
5927
5928     """
5929     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5930     assert self.instance is not None, \
5931       "Cannot retrieve locked instance %s" % self.op.instance_name
5932     _CheckNodeOnline(self, self.instance.primary_node)
5933
5934   def Exec(self, feedback_fn):
5935     """Activate the disks.
5936
5937     """
5938     disks_ok, disks_info = \
5939               _AssembleInstanceDisks(self, self.instance,
5940                                      ignore_size=self.op.ignore_size)
5941     if not disks_ok:
5942       raise errors.OpExecError("Cannot activate block devices")
5943
5944     return disks_info
5945
5946
5947 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
5948                            ignore_size=False):
5949   """Prepare the block devices for an instance.
5950
5951   This sets up the block devices on all nodes.
5952
5953   @type lu: L{LogicalUnit}
5954   @param lu: the logical unit on whose behalf we execute
5955   @type instance: L{objects.Instance}
5956   @param instance: the instance for whose disks we assemble
5957   @type disks: list of L{objects.Disk} or None
5958   @param disks: which disks to assemble (or all, if None)
5959   @type ignore_secondaries: boolean
5960   @param ignore_secondaries: if true, errors on secondary nodes
5961       won't result in an error return from the function
5962   @type ignore_size: boolean
5963   @param ignore_size: if true, the current known size of the disk
5964       will not be used during the disk activation, useful for cases
5965       when the size is wrong
5966   @return: False if the operation failed, otherwise a list of
5967       (host, instance_visible_name, node_visible_name)
5968       with the mapping from node devices to instance devices
5969
5970   """
5971   device_info = []
5972   disks_ok = True
5973   iname = instance.name
5974   disks = _ExpandCheckDisks(instance, disks)
5975
5976   # With the two passes mechanism we try to reduce the window of
5977   # opportunity for the race condition of switching DRBD to primary
5978   # before handshaking occured, but we do not eliminate it
5979
5980   # The proper fix would be to wait (with some limits) until the
5981   # connection has been made and drbd transitions from WFConnection
5982   # into any other network-connected state (Connected, SyncTarget,
5983   # SyncSource, etc.)
5984
5985   # 1st pass, assemble on all nodes in secondary mode
5986   for idx, inst_disk in enumerate(disks):
5987     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5988       if ignore_size:
5989         node_disk = node_disk.Copy()
5990         node_disk.UnsetSize()
5991       lu.cfg.SetDiskID(node_disk, node)
5992       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
5993       msg = result.fail_msg
5994       if msg:
5995         lu.proc.LogWarning("Could not prepare block device %s on node %s"
5996                            " (is_primary=False, pass=1): %s",
5997                            inst_disk.iv_name, node, msg)
5998         if not ignore_secondaries:
5999           disks_ok = False
6000
6001   # FIXME: race condition on drbd migration to primary
6002
6003   # 2nd pass, do only the primary node
6004   for idx, inst_disk in enumerate(disks):
6005     dev_path = None
6006
6007     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6008       if node != instance.primary_node:
6009         continue
6010       if ignore_size:
6011         node_disk = node_disk.Copy()
6012         node_disk.UnsetSize()
6013       lu.cfg.SetDiskID(node_disk, node)
6014       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
6015       msg = result.fail_msg
6016       if msg:
6017         lu.proc.LogWarning("Could not prepare block device %s on node %s"
6018                            " (is_primary=True, pass=2): %s",
6019                            inst_disk.iv_name, node, msg)
6020         disks_ok = False
6021       else:
6022         dev_path = result.payload
6023
6024     device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
6025
6026   # leave the disks configured for the primary node
6027   # this is a workaround that would be fixed better by
6028   # improving the logical/physical id handling
6029   for disk in disks:
6030     lu.cfg.SetDiskID(disk, instance.primary_node)
6031
6032   return disks_ok, device_info
6033
6034
6035 def _StartInstanceDisks(lu, instance, force):
6036   """Start the disks of an instance.
6037
6038   """
6039   disks_ok, _ = _AssembleInstanceDisks(lu, instance,
6040                                            ignore_secondaries=force)
6041   if not disks_ok:
6042     _ShutdownInstanceDisks(lu, instance)
6043     if force is not None and not force:
6044       lu.proc.LogWarning("", hint="If the message above refers to a"
6045                          " secondary node,"
6046                          " you can retry the operation using '--force'.")
6047     raise errors.OpExecError("Disk consistency error")
6048
6049
6050 class LUInstanceDeactivateDisks(NoHooksLU):
6051   """Shutdown an instance's disks.
6052
6053   """
6054   REQ_BGL = False
6055
6056   def ExpandNames(self):
6057     self._ExpandAndLockInstance()
6058     self.needed_locks[locking.LEVEL_NODE] = []
6059     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6060
6061   def DeclareLocks(self, level):
6062     if level == locking.LEVEL_NODE:
6063       self._LockInstancesNodes()
6064
6065   def CheckPrereq(self):
6066     """Check prerequisites.
6067
6068     This checks that the instance is in the cluster.
6069
6070     """
6071     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6072     assert self.instance is not None, \
6073       "Cannot retrieve locked instance %s" % self.op.instance_name
6074
6075   def Exec(self, feedback_fn):
6076     """Deactivate the disks
6077
6078     """
6079     instance = self.instance
6080     if self.op.force:
6081       _ShutdownInstanceDisks(self, instance)
6082     else:
6083       _SafeShutdownInstanceDisks(self, instance)
6084
6085
6086 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
6087   """Shutdown block devices of an instance.
6088
6089   This function checks if an instance is running, before calling
6090   _ShutdownInstanceDisks.
6091
6092   """
6093   _CheckInstanceState(lu, instance, INSTANCE_DOWN, msg="cannot shutdown disks")
6094   _ShutdownInstanceDisks(lu, instance, disks=disks)
6095
6096
6097 def _ExpandCheckDisks(instance, disks):
6098   """Return the instance disks selected by the disks list
6099
6100   @type disks: list of L{objects.Disk} or None
6101   @param disks: selected disks
6102   @rtype: list of L{objects.Disk}
6103   @return: selected instance disks to act on
6104
6105   """
6106   if disks is None:
6107     return instance.disks
6108   else:
6109     if not set(disks).issubset(instance.disks):
6110       raise errors.ProgrammerError("Can only act on disks belonging to the"
6111                                    " target instance")
6112     return disks
6113
6114
6115 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
6116   """Shutdown block devices of an instance.
6117
6118   This does the shutdown on all nodes of the instance.
6119
6120   If the ignore_primary is false, errors on the primary node are
6121   ignored.
6122
6123   """
6124   all_result = True
6125   disks = _ExpandCheckDisks(instance, disks)
6126
6127   for disk in disks:
6128     for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
6129       lu.cfg.SetDiskID(top_disk, node)
6130       result = lu.rpc.call_blockdev_shutdown(node, top_disk)
6131       msg = result.fail_msg
6132       if msg:
6133         lu.LogWarning("Could not shutdown block device %s on node %s: %s",
6134                       disk.iv_name, node, msg)
6135         if ((node == instance.primary_node and not ignore_primary) or
6136             (node != instance.primary_node and not result.offline)):
6137           all_result = False
6138   return all_result
6139
6140
6141 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
6142   """Checks if a node has enough free memory.
6143
6144   This function check if a given node has the needed amount of free
6145   memory. In case the node has less memory or we cannot get the
6146   information from the node, this function raise an OpPrereqError
6147   exception.
6148
6149   @type lu: C{LogicalUnit}
6150   @param lu: a logical unit from which we get configuration data
6151   @type node: C{str}
6152   @param node: the node to check
6153   @type reason: C{str}
6154   @param reason: string to use in the error message
6155   @type requested: C{int}
6156   @param requested: the amount of memory in MiB to check for
6157   @type hypervisor_name: C{str}
6158   @param hypervisor_name: the hypervisor to ask for memory stats
6159   @raise errors.OpPrereqError: if the node doesn't have enough memory, or
6160       we cannot check the node
6161
6162   """
6163   nodeinfo = lu.rpc.call_node_info([node], None, [hypervisor_name])
6164   nodeinfo[node].Raise("Can't get data from node %s" % node,
6165                        prereq=True, ecode=errors.ECODE_ENVIRON)
6166   (_, _, (hv_info, )) = nodeinfo[node].payload
6167
6168   free_mem = hv_info.get("memory_free", None)
6169   if not isinstance(free_mem, int):
6170     raise errors.OpPrereqError("Can't compute free memory on node %s, result"
6171                                " was '%s'" % (node, free_mem),
6172                                errors.ECODE_ENVIRON)
6173   if requested > free_mem:
6174     raise errors.OpPrereqError("Not enough memory on node %s for %s:"
6175                                " needed %s MiB, available %s MiB" %
6176                                (node, reason, requested, free_mem),
6177                                errors.ECODE_NORES)
6178
6179
6180 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
6181   """Checks if nodes have enough free disk space in the all VGs.
6182
6183   This function check if all given nodes have the needed amount of
6184   free disk. In case any node has less disk or we cannot get the
6185   information from the node, this function raise an OpPrereqError
6186   exception.
6187
6188   @type lu: C{LogicalUnit}
6189   @param lu: a logical unit from which we get configuration data
6190   @type nodenames: C{list}
6191   @param nodenames: the list of node names to check
6192   @type req_sizes: C{dict}
6193   @param req_sizes: the hash of vg and corresponding amount of disk in
6194       MiB to check for
6195   @raise errors.OpPrereqError: if the node doesn't have enough disk,
6196       or we cannot check the node
6197
6198   """
6199   for vg, req_size in req_sizes.items():
6200     _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
6201
6202
6203 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
6204   """Checks if nodes have enough free disk space in the specified VG.
6205
6206   This function check if all given nodes have the needed amount of
6207   free disk. In case any node has less disk or we cannot get the
6208   information from the node, this function raise an OpPrereqError
6209   exception.
6210
6211   @type lu: C{LogicalUnit}
6212   @param lu: a logical unit from which we get configuration data
6213   @type nodenames: C{list}
6214   @param nodenames: the list of node names to check
6215   @type vg: C{str}
6216   @param vg: the volume group to check
6217   @type requested: C{int}
6218   @param requested: the amount of disk in MiB to check for
6219   @raise errors.OpPrereqError: if the node doesn't have enough disk,
6220       or we cannot check the node
6221
6222   """
6223   nodeinfo = lu.rpc.call_node_info(nodenames, [vg], None)
6224   for node in nodenames:
6225     info = nodeinfo[node]
6226     info.Raise("Cannot get current information from node %s" % node,
6227                prereq=True, ecode=errors.ECODE_ENVIRON)
6228     (_, (vg_info, ), _) = info.payload
6229     vg_free = vg_info.get("vg_free", None)
6230     if not isinstance(vg_free, int):
6231       raise errors.OpPrereqError("Can't compute free disk space on node"
6232                                  " %s for vg %s, result was '%s'" %
6233                                  (node, vg, vg_free), errors.ECODE_ENVIRON)
6234     if requested > vg_free:
6235       raise errors.OpPrereqError("Not enough disk space on target node %s"
6236                                  " vg %s: required %d MiB, available %d MiB" %
6237                                  (node, vg, requested, vg_free),
6238                                  errors.ECODE_NORES)
6239
6240
6241 def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
6242   """Checks if nodes have enough physical CPUs
6243
6244   This function checks if all given nodes have the needed number of
6245   physical CPUs. In case any node has less CPUs or we cannot get the
6246   information from the node, this function raises an OpPrereqError
6247   exception.
6248
6249   @type lu: C{LogicalUnit}
6250   @param lu: a logical unit from which we get configuration data
6251   @type nodenames: C{list}
6252   @param nodenames: the list of node names to check
6253   @type requested: C{int}
6254   @param requested: the minimum acceptable number of physical CPUs
6255   @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
6256       or we cannot check the node
6257
6258   """
6259   nodeinfo = lu.rpc.call_node_info(nodenames, None, [hypervisor_name])
6260   for node in nodenames:
6261     info = nodeinfo[node]
6262     info.Raise("Cannot get current information from node %s" % node,
6263                prereq=True, ecode=errors.ECODE_ENVIRON)
6264     (_, _, (hv_info, )) = info.payload
6265     num_cpus = hv_info.get("cpu_total", None)
6266     if not isinstance(num_cpus, int):
6267       raise errors.OpPrereqError("Can't compute the number of physical CPUs"
6268                                  " on node %s, result was '%s'" %
6269                                  (node, num_cpus), errors.ECODE_ENVIRON)
6270     if requested > num_cpus:
6271       raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
6272                                  "required" % (node, num_cpus, requested),
6273                                  errors.ECODE_NORES)
6274
6275
6276 class LUInstanceStartup(LogicalUnit):
6277   """Starts an instance.
6278
6279   """
6280   HPATH = "instance-start"
6281   HTYPE = constants.HTYPE_INSTANCE
6282   REQ_BGL = False
6283
6284   def CheckArguments(self):
6285     # extra beparams
6286     if self.op.beparams:
6287       # fill the beparams dict
6288       objects.UpgradeBeParams(self.op.beparams)
6289       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6290
6291   def ExpandNames(self):
6292     self._ExpandAndLockInstance()
6293
6294   def BuildHooksEnv(self):
6295     """Build hooks env.
6296
6297     This runs on master, primary and secondary nodes of the instance.
6298
6299     """
6300     env = {
6301       "FORCE": self.op.force,
6302       }
6303
6304     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6305
6306     return env
6307
6308   def BuildHooksNodes(self):
6309     """Build hooks nodes.
6310
6311     """
6312     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6313     return (nl, nl)
6314
6315   def CheckPrereq(self):
6316     """Check prerequisites.
6317
6318     This checks that the instance is in the cluster.
6319
6320     """
6321     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6322     assert self.instance is not None, \
6323       "Cannot retrieve locked instance %s" % self.op.instance_name
6324
6325     # extra hvparams
6326     if self.op.hvparams:
6327       # check hypervisor parameter syntax (locally)
6328       cluster = self.cfg.GetClusterInfo()
6329       utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6330       filled_hvp = cluster.FillHV(instance)
6331       filled_hvp.update(self.op.hvparams)
6332       hv_type = hypervisor.GetHypervisor(instance.hypervisor)
6333       hv_type.CheckParameterSyntax(filled_hvp)
6334       _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
6335
6336     _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6337
6338     self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
6339
6340     if self.primary_offline and self.op.ignore_offline_nodes:
6341       self.proc.LogWarning("Ignoring offline primary node")
6342
6343       if self.op.hvparams or self.op.beparams:
6344         self.proc.LogWarning("Overridden parameters are ignored")
6345     else:
6346       _CheckNodeOnline(self, instance.primary_node)
6347
6348       bep = self.cfg.GetClusterInfo().FillBE(instance)
6349
6350       # check bridges existence
6351       _CheckInstanceBridgesExist(self, instance)
6352
6353       remote_info = self.rpc.call_instance_info(instance.primary_node,
6354                                                 instance.name,
6355                                                 instance.hypervisor)
6356       remote_info.Raise("Error checking node %s" % instance.primary_node,
6357                         prereq=True, ecode=errors.ECODE_ENVIRON)
6358       if not remote_info.payload: # not running already
6359         _CheckNodeFreeMemory(self, instance.primary_node,
6360                              "starting instance %s" % instance.name,
6361                              bep[constants.BE_MAXMEM], instance.hypervisor)
6362
6363   def Exec(self, feedback_fn):
6364     """Start the instance.
6365
6366     """
6367     instance = self.instance
6368     force = self.op.force
6369
6370     if not self.op.no_remember:
6371       self.cfg.MarkInstanceUp(instance.name)
6372
6373     if self.primary_offline:
6374       assert self.op.ignore_offline_nodes
6375       self.proc.LogInfo("Primary node offline, marked instance as started")
6376     else:
6377       node_current = instance.primary_node
6378
6379       _StartInstanceDisks(self, instance, force)
6380
6381       result = \
6382         self.rpc.call_instance_start(node_current,
6383                                      (instance, self.op.hvparams,
6384                                       self.op.beparams),
6385                                      self.op.startup_paused)
6386       msg = result.fail_msg
6387       if msg:
6388         _ShutdownInstanceDisks(self, instance)
6389         raise errors.OpExecError("Could not start instance: %s" % msg)
6390
6391
6392 class LUInstanceReboot(LogicalUnit):
6393   """Reboot an instance.
6394
6395   """
6396   HPATH = "instance-reboot"
6397   HTYPE = constants.HTYPE_INSTANCE
6398   REQ_BGL = False
6399
6400   def ExpandNames(self):
6401     self._ExpandAndLockInstance()
6402
6403   def BuildHooksEnv(self):
6404     """Build hooks env.
6405
6406     This runs on master, primary and secondary nodes of the instance.
6407
6408     """
6409     env = {
6410       "IGNORE_SECONDARIES": self.op.ignore_secondaries,
6411       "REBOOT_TYPE": self.op.reboot_type,
6412       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6413       }
6414
6415     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6416
6417     return env
6418
6419   def BuildHooksNodes(self):
6420     """Build hooks nodes.
6421
6422     """
6423     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6424     return (nl, nl)
6425
6426   def CheckPrereq(self):
6427     """Check prerequisites.
6428
6429     This checks that the instance is in the cluster.
6430
6431     """
6432     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6433     assert self.instance is not None, \
6434       "Cannot retrieve locked instance %s" % self.op.instance_name
6435     _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6436     _CheckNodeOnline(self, instance.primary_node)
6437
6438     # check bridges existence
6439     _CheckInstanceBridgesExist(self, instance)
6440
6441   def Exec(self, feedback_fn):
6442     """Reboot the instance.
6443
6444     """
6445     instance = self.instance
6446     ignore_secondaries = self.op.ignore_secondaries
6447     reboot_type = self.op.reboot_type
6448
6449     remote_info = self.rpc.call_instance_info(instance.primary_node,
6450                                               instance.name,
6451                                               instance.hypervisor)
6452     remote_info.Raise("Error checking node %s" % instance.primary_node)
6453     instance_running = bool(remote_info.payload)
6454
6455     node_current = instance.primary_node
6456
6457     if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
6458                                             constants.INSTANCE_REBOOT_HARD]:
6459       for disk in instance.disks:
6460         self.cfg.SetDiskID(disk, node_current)
6461       result = self.rpc.call_instance_reboot(node_current, instance,
6462                                              reboot_type,
6463                                              self.op.shutdown_timeout)
6464       result.Raise("Could not reboot instance")
6465     else:
6466       if instance_running:
6467         result = self.rpc.call_instance_shutdown(node_current, instance,
6468                                                  self.op.shutdown_timeout)
6469         result.Raise("Could not shutdown instance for full reboot")
6470         _ShutdownInstanceDisks(self, instance)
6471       else:
6472         self.LogInfo("Instance %s was already stopped, starting now",
6473                      instance.name)
6474       _StartInstanceDisks(self, instance, ignore_secondaries)
6475       result = self.rpc.call_instance_start(node_current,
6476                                             (instance, None, None), False)
6477       msg = result.fail_msg
6478       if msg:
6479         _ShutdownInstanceDisks(self, instance)
6480         raise errors.OpExecError("Could not start instance for"
6481                                  " full reboot: %s" % msg)
6482
6483     self.cfg.MarkInstanceUp(instance.name)
6484
6485
6486 class LUInstanceShutdown(LogicalUnit):
6487   """Shutdown an instance.
6488
6489   """
6490   HPATH = "instance-stop"
6491   HTYPE = constants.HTYPE_INSTANCE
6492   REQ_BGL = False
6493
6494   def ExpandNames(self):
6495     self._ExpandAndLockInstance()
6496
6497   def BuildHooksEnv(self):
6498     """Build hooks env.
6499
6500     This runs on master, primary and secondary nodes of the instance.
6501
6502     """
6503     env = _BuildInstanceHookEnvByObject(self, self.instance)
6504     env["TIMEOUT"] = self.op.timeout
6505     return env
6506
6507   def BuildHooksNodes(self):
6508     """Build hooks nodes.
6509
6510     """
6511     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6512     return (nl, nl)
6513
6514   def CheckPrereq(self):
6515     """Check prerequisites.
6516
6517     This checks that the instance is in the cluster.
6518
6519     """
6520     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6521     assert self.instance is not None, \
6522       "Cannot retrieve locked instance %s" % self.op.instance_name
6523
6524     _CheckInstanceState(self, self.instance, INSTANCE_ONLINE)
6525
6526     self.primary_offline = \
6527       self.cfg.GetNodeInfo(self.instance.primary_node).offline
6528
6529     if self.primary_offline and self.op.ignore_offline_nodes:
6530       self.proc.LogWarning("Ignoring offline primary node")
6531     else:
6532       _CheckNodeOnline(self, self.instance.primary_node)
6533
6534   def Exec(self, feedback_fn):
6535     """Shutdown the instance.
6536
6537     """
6538     instance = self.instance
6539     node_current = instance.primary_node
6540     timeout = self.op.timeout
6541
6542     if not self.op.no_remember:
6543       self.cfg.MarkInstanceDown(instance.name)
6544
6545     if self.primary_offline:
6546       assert self.op.ignore_offline_nodes
6547       self.proc.LogInfo("Primary node offline, marked instance as stopped")
6548     else:
6549       result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
6550       msg = result.fail_msg
6551       if msg:
6552         self.proc.LogWarning("Could not shutdown instance: %s" % msg)
6553
6554       _ShutdownInstanceDisks(self, instance)
6555
6556
6557 class LUInstanceReinstall(LogicalUnit):
6558   """Reinstall an instance.
6559
6560   """
6561   HPATH = "instance-reinstall"
6562   HTYPE = constants.HTYPE_INSTANCE
6563   REQ_BGL = False
6564
6565   def ExpandNames(self):
6566     self._ExpandAndLockInstance()
6567
6568   def BuildHooksEnv(self):
6569     """Build hooks env.
6570
6571     This runs on master, primary and secondary nodes of the instance.
6572
6573     """
6574     return _BuildInstanceHookEnvByObject(self, self.instance)
6575
6576   def BuildHooksNodes(self):
6577     """Build hooks nodes.
6578
6579     """
6580     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6581     return (nl, nl)
6582
6583   def CheckPrereq(self):
6584     """Check prerequisites.
6585
6586     This checks that the instance is in the cluster and is not running.
6587
6588     """
6589     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6590     assert instance is not None, \
6591       "Cannot retrieve locked instance %s" % self.op.instance_name
6592     _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
6593                      " offline, cannot reinstall")
6594     for node in instance.secondary_nodes:
6595       _CheckNodeOnline(self, node, "Instance secondary node offline,"
6596                        " cannot reinstall")
6597
6598     if instance.disk_template == constants.DT_DISKLESS:
6599       raise errors.OpPrereqError("Instance '%s' has no disks" %
6600                                  self.op.instance_name,
6601                                  errors.ECODE_INVAL)
6602     _CheckInstanceState(self, instance, INSTANCE_DOWN, msg="cannot reinstall")
6603
6604     if self.op.os_type is not None:
6605       # OS verification
6606       pnode = _ExpandNodeName(self.cfg, instance.primary_node)
6607       _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
6608       instance_os = self.op.os_type
6609     else:
6610       instance_os = instance.os
6611
6612     nodelist = list(instance.all_nodes)
6613
6614     if self.op.osparams:
6615       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
6616       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
6617       self.os_inst = i_osdict # the new dict (without defaults)
6618     else:
6619       self.os_inst = None
6620
6621     self.instance = instance
6622
6623   def Exec(self, feedback_fn):
6624     """Reinstall the instance.
6625
6626     """
6627     inst = self.instance
6628
6629     if self.op.os_type is not None:
6630       feedback_fn("Changing OS to '%s'..." % self.op.os_type)
6631       inst.os = self.op.os_type
6632       # Write to configuration
6633       self.cfg.Update(inst, feedback_fn)
6634
6635     _StartInstanceDisks(self, inst, None)
6636     try:
6637       feedback_fn("Running the instance OS create scripts...")
6638       # FIXME: pass debug option from opcode to backend
6639       result = self.rpc.call_instance_os_add(inst.primary_node,
6640                                              (inst, self.os_inst), True,
6641                                              self.op.debug_level)
6642       result.Raise("Could not install OS for instance %s on node %s" %
6643                    (inst.name, inst.primary_node))
6644     finally:
6645       _ShutdownInstanceDisks(self, inst)
6646
6647
6648 class LUInstanceRecreateDisks(LogicalUnit):
6649   """Recreate an instance's missing disks.
6650
6651   """
6652   HPATH = "instance-recreate-disks"
6653   HTYPE = constants.HTYPE_INSTANCE
6654   REQ_BGL = False
6655
6656   def CheckArguments(self):
6657     # normalise the disk list
6658     self.op.disks = sorted(frozenset(self.op.disks))
6659
6660   def ExpandNames(self):
6661     self._ExpandAndLockInstance()
6662     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6663     if self.op.nodes:
6664       self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
6665       self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
6666     else:
6667       self.needed_locks[locking.LEVEL_NODE] = []
6668
6669   def DeclareLocks(self, level):
6670     if level == locking.LEVEL_NODE:
6671       # if we replace the nodes, we only need to lock the old primary,
6672       # otherwise we need to lock all nodes for disk re-creation
6673       primary_only = bool(self.op.nodes)
6674       self._LockInstancesNodes(primary_only=primary_only)
6675     elif level == locking.LEVEL_NODE_RES:
6676       # Copy node locks
6677       self.needed_locks[locking.LEVEL_NODE_RES] = \
6678         self.needed_locks[locking.LEVEL_NODE][:]
6679
6680   def BuildHooksEnv(self):
6681     """Build hooks env.
6682
6683     This runs on master, primary and secondary nodes of the instance.
6684
6685     """
6686     return _BuildInstanceHookEnvByObject(self, self.instance)
6687
6688   def BuildHooksNodes(self):
6689     """Build hooks nodes.
6690
6691     """
6692     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6693     return (nl, nl)
6694
6695   def CheckPrereq(self):
6696     """Check prerequisites.
6697
6698     This checks that the instance is in the cluster and is not running.
6699
6700     """
6701     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6702     assert instance is not None, \
6703       "Cannot retrieve locked instance %s" % self.op.instance_name
6704     if self.op.nodes:
6705       if len(self.op.nodes) != len(instance.all_nodes):
6706         raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
6707                                    " %d replacement nodes were specified" %
6708                                    (instance.name, len(instance.all_nodes),
6709                                     len(self.op.nodes)),
6710                                    errors.ECODE_INVAL)
6711       assert instance.disk_template != constants.DT_DRBD8 or \
6712           len(self.op.nodes) == 2
6713       assert instance.disk_template != constants.DT_PLAIN or \
6714           len(self.op.nodes) == 1
6715       primary_node = self.op.nodes[0]
6716     else:
6717       primary_node = instance.primary_node
6718     _CheckNodeOnline(self, primary_node)
6719
6720     if instance.disk_template == constants.DT_DISKLESS:
6721       raise errors.OpPrereqError("Instance '%s' has no disks" %
6722                                  self.op.instance_name, errors.ECODE_INVAL)
6723     # if we replace nodes *and* the old primary is offline, we don't
6724     # check
6725     assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE)
6726     assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE_RES)
6727     old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
6728     if not (self.op.nodes and old_pnode.offline):
6729       _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
6730                           msg="cannot recreate disks")
6731
6732     if not self.op.disks:
6733       self.op.disks = range(len(instance.disks))
6734     else:
6735       for idx in self.op.disks:
6736         if idx >= len(instance.disks):
6737           raise errors.OpPrereqError("Invalid disk index '%s'" % idx,
6738                                      errors.ECODE_INVAL)
6739     if self.op.disks != range(len(instance.disks)) and self.op.nodes:
6740       raise errors.OpPrereqError("Can't recreate disks partially and"
6741                                  " change the nodes at the same time",
6742                                  errors.ECODE_INVAL)
6743     self.instance = instance
6744
6745   def Exec(self, feedback_fn):
6746     """Recreate the disks.
6747
6748     """
6749     instance = self.instance
6750
6751     assert (self.owned_locks(locking.LEVEL_NODE) ==
6752             self.owned_locks(locking.LEVEL_NODE_RES))
6753
6754     to_skip = []
6755     mods = [] # keeps track of needed logical_id changes
6756
6757     for idx, disk in enumerate(instance.disks):
6758       if idx not in self.op.disks: # disk idx has not been passed in
6759         to_skip.append(idx)
6760         continue
6761       # update secondaries for disks, if needed
6762       if self.op.nodes:
6763         if disk.dev_type == constants.LD_DRBD8:
6764           # need to update the nodes and minors
6765           assert len(self.op.nodes) == 2
6766           assert len(disk.logical_id) == 6 # otherwise disk internals
6767                                            # have changed
6768           (_, _, old_port, _, _, old_secret) = disk.logical_id
6769           new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
6770           new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
6771                     new_minors[0], new_minors[1], old_secret)
6772           assert len(disk.logical_id) == len(new_id)
6773           mods.append((idx, new_id))
6774
6775     # now that we have passed all asserts above, we can apply the mods
6776     # in a single run (to avoid partial changes)
6777     for idx, new_id in mods:
6778       instance.disks[idx].logical_id = new_id
6779
6780     # change primary node, if needed
6781     if self.op.nodes:
6782       instance.primary_node = self.op.nodes[0]
6783       self.LogWarning("Changing the instance's nodes, you will have to"
6784                       " remove any disks left on the older nodes manually")
6785
6786     if self.op.nodes:
6787       self.cfg.Update(instance, feedback_fn)
6788
6789     _CreateDisks(self, instance, to_skip=to_skip)
6790
6791
6792 class LUInstanceRename(LogicalUnit):
6793   """Rename an instance.
6794
6795   """
6796   HPATH = "instance-rename"
6797   HTYPE = constants.HTYPE_INSTANCE
6798
6799   def CheckArguments(self):
6800     """Check arguments.
6801
6802     """
6803     if self.op.ip_check and not self.op.name_check:
6804       # TODO: make the ip check more flexible and not depend on the name check
6805       raise errors.OpPrereqError("IP address check requires a name check",
6806                                  errors.ECODE_INVAL)
6807
6808   def BuildHooksEnv(self):
6809     """Build hooks env.
6810
6811     This runs on master, primary and secondary nodes of the instance.
6812
6813     """
6814     env = _BuildInstanceHookEnvByObject(self, self.instance)
6815     env["INSTANCE_NEW_NAME"] = self.op.new_name
6816     return env
6817
6818   def BuildHooksNodes(self):
6819     """Build hooks nodes.
6820
6821     """
6822     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6823     return (nl, nl)
6824
6825   def CheckPrereq(self):
6826     """Check prerequisites.
6827
6828     This checks that the instance is in the cluster and is not running.
6829
6830     """
6831     self.op.instance_name = _ExpandInstanceName(self.cfg,
6832                                                 self.op.instance_name)
6833     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6834     assert instance is not None
6835     _CheckNodeOnline(self, instance.primary_node)
6836     _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
6837                         msg="cannot rename")
6838     self.instance = instance
6839
6840     new_name = self.op.new_name
6841     if self.op.name_check:
6842       hostname = netutils.GetHostname(name=new_name)
6843       if hostname.name != new_name:
6844         self.LogInfo("Resolved given name '%s' to '%s'", new_name,
6845                      hostname.name)
6846       if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
6847         raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
6848                                     " same as given hostname '%s'") %
6849                                     (hostname.name, self.op.new_name),
6850                                     errors.ECODE_INVAL)
6851       new_name = self.op.new_name = hostname.name
6852       if (self.op.ip_check and
6853           netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
6854         raise errors.OpPrereqError("IP %s of instance %s already in use" %
6855                                    (hostname.ip, new_name),
6856                                    errors.ECODE_NOTUNIQUE)
6857
6858     instance_list = self.cfg.GetInstanceList()
6859     if new_name in instance_list and new_name != instance.name:
6860       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6861                                  new_name, errors.ECODE_EXISTS)
6862
6863   def Exec(self, feedback_fn):
6864     """Rename the instance.
6865
6866     """
6867     inst = self.instance
6868     old_name = inst.name
6869
6870     rename_file_storage = False
6871     if (inst.disk_template in constants.DTS_FILEBASED and
6872         self.op.new_name != inst.name):
6873       old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6874       rename_file_storage = True
6875
6876     self.cfg.RenameInstance(inst.name, self.op.new_name)
6877     # Change the instance lock. This is definitely safe while we hold the BGL.
6878     # Otherwise the new lock would have to be added in acquired mode.
6879     assert self.REQ_BGL
6880     self.glm.remove(locking.LEVEL_INSTANCE, old_name)
6881     self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
6882
6883     # re-read the instance from the configuration after rename
6884     inst = self.cfg.GetInstanceInfo(self.op.new_name)
6885
6886     if rename_file_storage:
6887       new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6888       result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
6889                                                      old_file_storage_dir,
6890                                                      new_file_storage_dir)
6891       result.Raise("Could not rename on node %s directory '%s' to '%s'"
6892                    " (but the instance has been renamed in Ganeti)" %
6893                    (inst.primary_node, old_file_storage_dir,
6894                     new_file_storage_dir))
6895
6896     _StartInstanceDisks(self, inst, None)
6897     try:
6898       result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
6899                                                  old_name, self.op.debug_level)
6900       msg = result.fail_msg
6901       if msg:
6902         msg = ("Could not run OS rename script for instance %s on node %s"
6903                " (but the instance has been renamed in Ganeti): %s" %
6904                (inst.name, inst.primary_node, msg))
6905         self.proc.LogWarning(msg)
6906     finally:
6907       _ShutdownInstanceDisks(self, inst)
6908
6909     return inst.name
6910
6911
6912 class LUInstanceRemove(LogicalUnit):
6913   """Remove an instance.
6914
6915   """
6916   HPATH = "instance-remove"
6917   HTYPE = constants.HTYPE_INSTANCE
6918   REQ_BGL = False
6919
6920   def ExpandNames(self):
6921     self._ExpandAndLockInstance()
6922     self.needed_locks[locking.LEVEL_NODE] = []
6923     self.needed_locks[locking.LEVEL_NODE_RES] = []
6924     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6925
6926   def DeclareLocks(self, level):
6927     if level == locking.LEVEL_NODE:
6928       self._LockInstancesNodes()
6929     elif level == locking.LEVEL_NODE_RES:
6930       # Copy node locks
6931       self.needed_locks[locking.LEVEL_NODE_RES] = \
6932         self.needed_locks[locking.LEVEL_NODE][:]
6933
6934   def BuildHooksEnv(self):
6935     """Build hooks env.
6936
6937     This runs on master, primary and secondary nodes of the instance.
6938
6939     """
6940     env = _BuildInstanceHookEnvByObject(self, self.instance)
6941     env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
6942     return env
6943
6944   def BuildHooksNodes(self):
6945     """Build hooks nodes.
6946
6947     """
6948     nl = [self.cfg.GetMasterNode()]
6949     nl_post = list(self.instance.all_nodes) + nl
6950     return (nl, nl_post)
6951
6952   def CheckPrereq(self):
6953     """Check prerequisites.
6954
6955     This checks that the instance is in the cluster.
6956
6957     """
6958     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6959     assert self.instance is not None, \
6960       "Cannot retrieve locked instance %s" % self.op.instance_name
6961
6962   def Exec(self, feedback_fn):
6963     """Remove the instance.
6964
6965     """
6966     instance = self.instance
6967     logging.info("Shutting down instance %s on node %s",
6968                  instance.name, instance.primary_node)
6969
6970     result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
6971                                              self.op.shutdown_timeout)
6972     msg = result.fail_msg
6973     if msg:
6974       if self.op.ignore_failures:
6975         feedback_fn("Warning: can't shutdown instance: %s" % msg)
6976       else:
6977         raise errors.OpExecError("Could not shutdown instance %s on"
6978                                  " node %s: %s" %
6979                                  (instance.name, instance.primary_node, msg))
6980
6981     assert (self.owned_locks(locking.LEVEL_NODE) ==
6982             self.owned_locks(locking.LEVEL_NODE_RES))
6983     assert not (set(instance.all_nodes) -
6984                 self.owned_locks(locking.LEVEL_NODE)), \
6985       "Not owning correct locks"
6986
6987     _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
6988
6989
6990 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
6991   """Utility function to remove an instance.
6992
6993   """
6994   logging.info("Removing block devices for instance %s", instance.name)
6995
6996   if not _RemoveDisks(lu, instance):
6997     if not ignore_failures:
6998       raise errors.OpExecError("Can't remove instance's disks")
6999     feedback_fn("Warning: can't remove instance's disks")
7000
7001   logging.info("Removing instance %s out of cluster config", instance.name)
7002
7003   lu.cfg.RemoveInstance(instance.name)
7004
7005   assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
7006     "Instance lock removal conflict"
7007
7008   # Remove lock for the instance
7009   lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
7010
7011
7012 class LUInstanceQuery(NoHooksLU):
7013   """Logical unit for querying instances.
7014
7015   """
7016   # pylint: disable=W0142
7017   REQ_BGL = False
7018
7019   def CheckArguments(self):
7020     self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
7021                              self.op.output_fields, self.op.use_locking)
7022
7023   def ExpandNames(self):
7024     self.iq.ExpandNames(self)
7025
7026   def DeclareLocks(self, level):
7027     self.iq.DeclareLocks(self, level)
7028
7029   def Exec(self, feedback_fn):
7030     return self.iq.OldStyleQuery(self)
7031
7032
7033 class LUInstanceFailover(LogicalUnit):
7034   """Failover an instance.
7035
7036   """
7037   HPATH = "instance-failover"
7038   HTYPE = constants.HTYPE_INSTANCE
7039   REQ_BGL = False
7040
7041   def CheckArguments(self):
7042     """Check the arguments.
7043
7044     """
7045     self.iallocator = getattr(self.op, "iallocator", None)
7046     self.target_node = getattr(self.op, "target_node", None)
7047
7048   def ExpandNames(self):
7049     self._ExpandAndLockInstance()
7050
7051     if self.op.target_node is not None:
7052       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7053
7054     self.needed_locks[locking.LEVEL_NODE] = []
7055     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7056
7057     ignore_consistency = self.op.ignore_consistency
7058     shutdown_timeout = self.op.shutdown_timeout
7059     self._migrater = TLMigrateInstance(self, self.op.instance_name,
7060                                        cleanup=False,
7061                                        failover=True,
7062                                        ignore_consistency=ignore_consistency,
7063                                        shutdown_timeout=shutdown_timeout)
7064     self.tasklets = [self._migrater]
7065
7066   def DeclareLocks(self, level):
7067     if level == locking.LEVEL_NODE:
7068       instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7069       if instance.disk_template in constants.DTS_EXT_MIRROR:
7070         if self.op.target_node is None:
7071           self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7072         else:
7073           self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7074                                                    self.op.target_node]
7075         del self.recalculate_locks[locking.LEVEL_NODE]
7076       else:
7077         self._LockInstancesNodes()
7078
7079   def BuildHooksEnv(self):
7080     """Build hooks env.
7081
7082     This runs on master, primary and secondary nodes of the instance.
7083
7084     """
7085     instance = self._migrater.instance
7086     source_node = instance.primary_node
7087     target_node = self.op.target_node
7088     env = {
7089       "IGNORE_CONSISTENCY": self.op.ignore_consistency,
7090       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7091       "OLD_PRIMARY": source_node,
7092       "NEW_PRIMARY": target_node,
7093       }
7094
7095     if instance.disk_template in constants.DTS_INT_MIRROR:
7096       env["OLD_SECONDARY"] = instance.secondary_nodes[0]
7097       env["NEW_SECONDARY"] = source_node
7098     else:
7099       env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
7100
7101     env.update(_BuildInstanceHookEnvByObject(self, instance))
7102
7103     return env
7104
7105   def BuildHooksNodes(self):
7106     """Build hooks nodes.
7107
7108     """
7109     instance = self._migrater.instance
7110     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7111     return (nl, nl + [instance.primary_node])
7112
7113
7114 class LUInstanceMigrate(LogicalUnit):
7115   """Migrate an instance.
7116
7117   This is migration without shutting down, compared to the failover,
7118   which is done with shutdown.
7119
7120   """
7121   HPATH = "instance-migrate"
7122   HTYPE = constants.HTYPE_INSTANCE
7123   REQ_BGL = False
7124
7125   def ExpandNames(self):
7126     self._ExpandAndLockInstance()
7127
7128     if self.op.target_node is not None:
7129       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7130
7131     self.needed_locks[locking.LEVEL_NODE] = []
7132     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7133
7134     self._migrater = TLMigrateInstance(self, self.op.instance_name,
7135                                        cleanup=self.op.cleanup,
7136                                        failover=False,
7137                                        fallback=self.op.allow_failover)
7138     self.tasklets = [self._migrater]
7139
7140   def DeclareLocks(self, level):
7141     if level == locking.LEVEL_NODE:
7142       instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7143       if instance.disk_template in constants.DTS_EXT_MIRROR:
7144         if self.op.target_node is None:
7145           self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7146         else:
7147           self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7148                                                    self.op.target_node]
7149         del self.recalculate_locks[locking.LEVEL_NODE]
7150       else:
7151         self._LockInstancesNodes()
7152
7153   def BuildHooksEnv(self):
7154     """Build hooks env.
7155
7156     This runs on master, primary and secondary nodes of the instance.
7157
7158     """
7159     instance = self._migrater.instance
7160     source_node = instance.primary_node
7161     target_node = self.op.target_node
7162     env = _BuildInstanceHookEnvByObject(self, instance)
7163     env.update({
7164       "MIGRATE_LIVE": self._migrater.live,
7165       "MIGRATE_CLEANUP": self.op.cleanup,
7166       "OLD_PRIMARY": source_node,
7167       "NEW_PRIMARY": target_node,
7168       })
7169
7170     if instance.disk_template in constants.DTS_INT_MIRROR:
7171       env["OLD_SECONDARY"] = target_node
7172       env["NEW_SECONDARY"] = source_node
7173     else:
7174       env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
7175
7176     return env
7177
7178   def BuildHooksNodes(self):
7179     """Build hooks nodes.
7180
7181     """
7182     instance = self._migrater.instance
7183     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7184     return (nl, nl + [instance.primary_node])
7185
7186
7187 class LUInstanceMove(LogicalUnit):
7188   """Move an instance by data-copying.
7189
7190   """
7191   HPATH = "instance-move"
7192   HTYPE = constants.HTYPE_INSTANCE
7193   REQ_BGL = False
7194
7195   def ExpandNames(self):
7196     self._ExpandAndLockInstance()
7197     target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7198     self.op.target_node = target_node
7199     self.needed_locks[locking.LEVEL_NODE] = [target_node]
7200     self.needed_locks[locking.LEVEL_NODE_RES] = []
7201     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7202
7203   def DeclareLocks(self, level):
7204     if level == locking.LEVEL_NODE:
7205       self._LockInstancesNodes(primary_only=True)
7206     elif level == locking.LEVEL_NODE_RES:
7207       # Copy node locks
7208       self.needed_locks[locking.LEVEL_NODE_RES] = \
7209         self.needed_locks[locking.LEVEL_NODE][:]
7210
7211   def BuildHooksEnv(self):
7212     """Build hooks env.
7213
7214     This runs on master, primary and secondary nodes of the instance.
7215
7216     """
7217     env = {
7218       "TARGET_NODE": self.op.target_node,
7219       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7220       }
7221     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7222     return env
7223
7224   def BuildHooksNodes(self):
7225     """Build hooks nodes.
7226
7227     """
7228     nl = [
7229       self.cfg.GetMasterNode(),
7230       self.instance.primary_node,
7231       self.op.target_node,
7232       ]
7233     return (nl, nl)
7234
7235   def CheckPrereq(self):
7236     """Check prerequisites.
7237
7238     This checks that the instance is in the cluster.
7239
7240     """
7241     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7242     assert self.instance is not None, \
7243       "Cannot retrieve locked instance %s" % self.op.instance_name
7244
7245     node = self.cfg.GetNodeInfo(self.op.target_node)
7246     assert node is not None, \
7247       "Cannot retrieve locked node %s" % self.op.target_node
7248
7249     self.target_node = target_node = node.name
7250
7251     if target_node == instance.primary_node:
7252       raise errors.OpPrereqError("Instance %s is already on the node %s" %
7253                                  (instance.name, target_node),
7254                                  errors.ECODE_STATE)
7255
7256     bep = self.cfg.GetClusterInfo().FillBE(instance)
7257
7258     for idx, dsk in enumerate(instance.disks):
7259       if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
7260         raise errors.OpPrereqError("Instance disk %d has a complex layout,"
7261                                    " cannot copy" % idx, errors.ECODE_STATE)
7262
7263     _CheckNodeOnline(self, target_node)
7264     _CheckNodeNotDrained(self, target_node)
7265     _CheckNodeVmCapable(self, target_node)
7266
7267     if instance.admin_state == constants.ADMINST_UP:
7268       # check memory requirements on the secondary node
7269       _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
7270                            instance.name, bep[constants.BE_MAXMEM],
7271                            instance.hypervisor)
7272     else:
7273       self.LogInfo("Not checking memory on the secondary node as"
7274                    " instance will not be started")
7275
7276     # check bridge existance
7277     _CheckInstanceBridgesExist(self, instance, node=target_node)
7278
7279   def Exec(self, feedback_fn):
7280     """Move an instance.
7281
7282     The move is done by shutting it down on its present node, copying
7283     the data over (slow) and starting it on the new node.
7284
7285     """
7286     instance = self.instance
7287
7288     source_node = instance.primary_node
7289     target_node = self.target_node
7290
7291     self.LogInfo("Shutting down instance %s on source node %s",
7292                  instance.name, source_node)
7293
7294     assert (self.owned_locks(locking.LEVEL_NODE) ==
7295             self.owned_locks(locking.LEVEL_NODE_RES))
7296
7297     result = self.rpc.call_instance_shutdown(source_node, instance,
7298                                              self.op.shutdown_timeout)
7299     msg = result.fail_msg
7300     if msg:
7301       if self.op.ignore_consistency:
7302         self.proc.LogWarning("Could not shutdown instance %s on node %s."
7303                              " Proceeding anyway. Please make sure node"
7304                              " %s is down. Error details: %s",
7305                              instance.name, source_node, source_node, msg)
7306       else:
7307         raise errors.OpExecError("Could not shutdown instance %s on"
7308                                  " node %s: %s" %
7309                                  (instance.name, source_node, msg))
7310
7311     # create the target disks
7312     try:
7313       _CreateDisks(self, instance, target_node=target_node)
7314     except errors.OpExecError:
7315       self.LogWarning("Device creation failed, reverting...")
7316       try:
7317         _RemoveDisks(self, instance, target_node=target_node)
7318       finally:
7319         self.cfg.ReleaseDRBDMinors(instance.name)
7320         raise
7321
7322     cluster_name = self.cfg.GetClusterInfo().cluster_name
7323
7324     errs = []
7325     # activate, get path, copy the data over
7326     for idx, disk in enumerate(instance.disks):
7327       self.LogInfo("Copying data for disk %d", idx)
7328       result = self.rpc.call_blockdev_assemble(target_node, disk,
7329                                                instance.name, True, idx)
7330       if result.fail_msg:
7331         self.LogWarning("Can't assemble newly created disk %d: %s",
7332                         idx, result.fail_msg)
7333         errs.append(result.fail_msg)
7334         break
7335       dev_path = result.payload
7336       result = self.rpc.call_blockdev_export(source_node, disk,
7337                                              target_node, dev_path,
7338                                              cluster_name)
7339       if result.fail_msg:
7340         self.LogWarning("Can't copy data over for disk %d: %s",
7341                         idx, result.fail_msg)
7342         errs.append(result.fail_msg)
7343         break
7344
7345     if errs:
7346       self.LogWarning("Some disks failed to copy, aborting")
7347       try:
7348         _RemoveDisks(self, instance, target_node=target_node)
7349       finally:
7350         self.cfg.ReleaseDRBDMinors(instance.name)
7351         raise errors.OpExecError("Errors during disk copy: %s" %
7352                                  (",".join(errs),))
7353
7354     instance.primary_node = target_node
7355     self.cfg.Update(instance, feedback_fn)
7356
7357     self.LogInfo("Removing the disks on the original node")
7358     _RemoveDisks(self, instance, target_node=source_node)
7359
7360     # Only start the instance if it's marked as up
7361     if instance.admin_state == constants.ADMINST_UP:
7362       self.LogInfo("Starting instance %s on node %s",
7363                    instance.name, target_node)
7364
7365       disks_ok, _ = _AssembleInstanceDisks(self, instance,
7366                                            ignore_secondaries=True)
7367       if not disks_ok:
7368         _ShutdownInstanceDisks(self, instance)
7369         raise errors.OpExecError("Can't activate the instance's disks")
7370
7371       result = self.rpc.call_instance_start(target_node,
7372                                             (instance, None, None), False)
7373       msg = result.fail_msg
7374       if msg:
7375         _ShutdownInstanceDisks(self, instance)
7376         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7377                                  (instance.name, target_node, msg))
7378
7379
7380 class LUNodeMigrate(LogicalUnit):
7381   """Migrate all instances from a node.
7382
7383   """
7384   HPATH = "node-migrate"
7385   HTYPE = constants.HTYPE_NODE
7386   REQ_BGL = False
7387
7388   def CheckArguments(self):
7389     pass
7390
7391   def ExpandNames(self):
7392     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7393
7394     self.share_locks = _ShareAll()
7395     self.needed_locks = {
7396       locking.LEVEL_NODE: [self.op.node_name],
7397       }
7398
7399   def BuildHooksEnv(self):
7400     """Build hooks env.
7401
7402     This runs on the master, the primary and all the secondaries.
7403
7404     """
7405     return {
7406       "NODE_NAME": self.op.node_name,
7407       }
7408
7409   def BuildHooksNodes(self):
7410     """Build hooks nodes.
7411
7412     """
7413     nl = [self.cfg.GetMasterNode()]
7414     return (nl, nl)
7415
7416   def CheckPrereq(self):
7417     pass
7418
7419   def Exec(self, feedback_fn):
7420     # Prepare jobs for migration instances
7421     jobs = [
7422       [opcodes.OpInstanceMigrate(instance_name=inst.name,
7423                                  mode=self.op.mode,
7424                                  live=self.op.live,
7425                                  iallocator=self.op.iallocator,
7426                                  target_node=self.op.target_node)]
7427       for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
7428       ]
7429
7430     # TODO: Run iallocator in this opcode and pass correct placement options to
7431     # OpInstanceMigrate. Since other jobs can modify the cluster between
7432     # running the iallocator and the actual migration, a good consistency model
7433     # will have to be found.
7434
7435     assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
7436             frozenset([self.op.node_name]))
7437
7438     return ResultWithJobs(jobs)
7439
7440
7441 class TLMigrateInstance(Tasklet):
7442   """Tasklet class for instance migration.
7443
7444   @type live: boolean
7445   @ivar live: whether the migration will be done live or non-live;
7446       this variable is initalized only after CheckPrereq has run
7447   @type cleanup: boolean
7448   @ivar cleanup: Wheater we cleanup from a failed migration
7449   @type iallocator: string
7450   @ivar iallocator: The iallocator used to determine target_node
7451   @type target_node: string
7452   @ivar target_node: If given, the target_node to reallocate the instance to
7453   @type failover: boolean
7454   @ivar failover: Whether operation results in failover or migration
7455   @type fallback: boolean
7456   @ivar fallback: Whether fallback to failover is allowed if migration not
7457                   possible
7458   @type ignore_consistency: boolean
7459   @ivar ignore_consistency: Wheter we should ignore consistency between source
7460                             and target node
7461   @type shutdown_timeout: int
7462   @ivar shutdown_timeout: In case of failover timeout of the shutdown
7463
7464   """
7465
7466   # Constants
7467   _MIGRATION_POLL_INTERVAL = 1      # seconds
7468   _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
7469
7470   def __init__(self, lu, instance_name, cleanup=False,
7471                failover=False, fallback=False,
7472                ignore_consistency=False,
7473                shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT):
7474     """Initializes this class.
7475
7476     """
7477     Tasklet.__init__(self, lu)
7478
7479     # Parameters
7480     self.instance_name = instance_name
7481     self.cleanup = cleanup
7482     self.live = False # will be overridden later
7483     self.failover = failover
7484     self.fallback = fallback
7485     self.ignore_consistency = ignore_consistency
7486     self.shutdown_timeout = shutdown_timeout
7487
7488   def CheckPrereq(self):
7489     """Check prerequisites.
7490
7491     This checks that the instance is in the cluster.
7492
7493     """
7494     instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
7495     instance = self.cfg.GetInstanceInfo(instance_name)
7496     assert instance is not None
7497     self.instance = instance
7498
7499     if (not self.cleanup and
7500         not instance.admin_state == constants.ADMINST_UP and
7501         not self.failover and self.fallback):
7502       self.lu.LogInfo("Instance is marked down or offline, fallback allowed,"
7503                       " switching to failover")
7504       self.failover = True
7505
7506     if instance.disk_template not in constants.DTS_MIRRORED:
7507       if self.failover:
7508         text = "failovers"
7509       else:
7510         text = "migrations"
7511       raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
7512                                  " %s" % (instance.disk_template, text),
7513                                  errors.ECODE_STATE)
7514
7515     if instance.disk_template in constants.DTS_EXT_MIRROR:
7516       _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
7517
7518       if self.lu.op.iallocator:
7519         self._RunAllocator()
7520       else:
7521         # We set set self.target_node as it is required by
7522         # BuildHooksEnv
7523         self.target_node = self.lu.op.target_node
7524
7525       # self.target_node is already populated, either directly or by the
7526       # iallocator run
7527       target_node = self.target_node
7528       if self.target_node == instance.primary_node:
7529         raise errors.OpPrereqError("Cannot migrate instance %s"
7530                                    " to its primary (%s)" %
7531                                    (instance.name, instance.primary_node))
7532
7533       if len(self.lu.tasklets) == 1:
7534         # It is safe to release locks only when we're the only tasklet
7535         # in the LU
7536         _ReleaseLocks(self.lu, locking.LEVEL_NODE,
7537                       keep=[instance.primary_node, self.target_node])
7538
7539     else:
7540       secondary_nodes = instance.secondary_nodes
7541       if not secondary_nodes:
7542         raise errors.ConfigurationError("No secondary node but using"
7543                                         " %s disk template" %
7544                                         instance.disk_template)
7545       target_node = secondary_nodes[0]
7546       if self.lu.op.iallocator or (self.lu.op.target_node and
7547                                    self.lu.op.target_node != target_node):
7548         if self.failover:
7549           text = "failed over"
7550         else:
7551           text = "migrated"
7552         raise errors.OpPrereqError("Instances with disk template %s cannot"
7553                                    " be %s to arbitrary nodes"
7554                                    " (neither an iallocator nor a target"
7555                                    " node can be passed)" %
7556                                    (instance.disk_template, text),
7557                                    errors.ECODE_INVAL)
7558
7559     i_be = self.cfg.GetClusterInfo().FillBE(instance)
7560
7561     # check memory requirements on the secondary node
7562     if not self.failover or instance.admin_state == constants.ADMINST_UP:
7563       _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
7564                            instance.name, i_be[constants.BE_MAXMEM],
7565                            instance.hypervisor)
7566     else:
7567       self.lu.LogInfo("Not checking memory on the secondary node as"
7568                       " instance will not be started")
7569
7570     # check if failover must be forced instead of migration
7571     if (not self.cleanup and not self.failover and
7572         i_be[constants.BE_ALWAYS_FAILOVER]):
7573       if self.fallback:
7574         self.lu.LogInfo("Instance configured to always failover; fallback"
7575                         " to failover")
7576         self.failover = True
7577       else:
7578         raise errors.OpPrereqError("This instance has been configured to"
7579                                    " always failover, please allow failover",
7580                                    errors.ECODE_STATE)
7581
7582     # check bridge existance
7583     _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
7584
7585     if not self.cleanup:
7586       _CheckNodeNotDrained(self.lu, target_node)
7587       if not self.failover:
7588         result = self.rpc.call_instance_migratable(instance.primary_node,
7589                                                    instance)
7590         if result.fail_msg and self.fallback:
7591           self.lu.LogInfo("Can't migrate, instance offline, fallback to"
7592                           " failover")
7593           self.failover = True
7594         else:
7595           result.Raise("Can't migrate, please use failover",
7596                        prereq=True, ecode=errors.ECODE_STATE)
7597
7598     assert not (self.failover and self.cleanup)
7599
7600     if not self.failover:
7601       if self.lu.op.live is not None and self.lu.op.mode is not None:
7602         raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
7603                                    " parameters are accepted",
7604                                    errors.ECODE_INVAL)
7605       if self.lu.op.live is not None:
7606         if self.lu.op.live:
7607           self.lu.op.mode = constants.HT_MIGRATION_LIVE
7608         else:
7609           self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
7610         # reset the 'live' parameter to None so that repeated
7611         # invocations of CheckPrereq do not raise an exception
7612         self.lu.op.live = None
7613       elif self.lu.op.mode is None:
7614         # read the default value from the hypervisor
7615         i_hv = self.cfg.GetClusterInfo().FillHV(self.instance,
7616                                                 skip_globals=False)
7617         self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
7618
7619       self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
7620     else:
7621       # Failover is never live
7622       self.live = False
7623
7624   def _RunAllocator(self):
7625     """Run the allocator based on input opcode.
7626
7627     """
7628     ial = IAllocator(self.cfg, self.rpc,
7629                      mode=constants.IALLOCATOR_MODE_RELOC,
7630                      name=self.instance_name,
7631                      # TODO See why hail breaks with a single node below
7632                      relocate_from=[self.instance.primary_node,
7633                                     self.instance.primary_node],
7634                      )
7635
7636     ial.Run(self.lu.op.iallocator)
7637
7638     if not ial.success:
7639       raise errors.OpPrereqError("Can't compute nodes using"
7640                                  " iallocator '%s': %s" %
7641                                  (self.lu.op.iallocator, ial.info),
7642                                  errors.ECODE_NORES)
7643     if len(ial.result) != ial.required_nodes:
7644       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7645                                  " of nodes (%s), required %s" %
7646                                  (self.lu.op.iallocator, len(ial.result),
7647                                   ial.required_nodes), errors.ECODE_FAULT)
7648     self.target_node = ial.result[0]
7649     self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7650                  self.instance_name, self.lu.op.iallocator,
7651                  utils.CommaJoin(ial.result))
7652
7653   def _WaitUntilSync(self):
7654     """Poll with custom rpc for disk sync.
7655
7656     This uses our own step-based rpc call.
7657
7658     """
7659     self.feedback_fn("* wait until resync is done")
7660     all_done = False
7661     while not all_done:
7662       all_done = True
7663       result = self.rpc.call_drbd_wait_sync(self.all_nodes,
7664                                             self.nodes_ip,
7665                                             self.instance.disks)
7666       min_percent = 100
7667       for node, nres in result.items():
7668         nres.Raise("Cannot resync disks on node %s" % node)
7669         node_done, node_percent = nres.payload
7670         all_done = all_done and node_done
7671         if node_percent is not None:
7672           min_percent = min(min_percent, node_percent)
7673       if not all_done:
7674         if min_percent < 100:
7675           self.feedback_fn("   - progress: %.1f%%" % min_percent)
7676         time.sleep(2)
7677
7678   def _EnsureSecondary(self, node):
7679     """Demote a node to secondary.
7680
7681     """
7682     self.feedback_fn("* switching node %s to secondary mode" % node)
7683
7684     for dev in self.instance.disks:
7685       self.cfg.SetDiskID(dev, node)
7686
7687     result = self.rpc.call_blockdev_close(node, self.instance.name,
7688                                           self.instance.disks)
7689     result.Raise("Cannot change disk to secondary on node %s" % node)
7690
7691   def _GoStandalone(self):
7692     """Disconnect from the network.
7693
7694     """
7695     self.feedback_fn("* changing into standalone mode")
7696     result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
7697                                                self.instance.disks)
7698     for node, nres in result.items():
7699       nres.Raise("Cannot disconnect disks node %s" % node)
7700
7701   def _GoReconnect(self, multimaster):
7702     """Reconnect to the network.
7703
7704     """
7705     if multimaster:
7706       msg = "dual-master"
7707     else:
7708       msg = "single-master"
7709     self.feedback_fn("* changing disks into %s mode" % msg)
7710     result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
7711                                            self.instance.disks,
7712                                            self.instance.name, multimaster)
7713     for node, nres in result.items():
7714       nres.Raise("Cannot change disks config on node %s" % node)
7715
7716   def _ExecCleanup(self):
7717     """Try to cleanup after a failed migration.
7718
7719     The cleanup is done by:
7720       - check that the instance is running only on one node
7721         (and update the config if needed)
7722       - change disks on its secondary node to secondary
7723       - wait until disks are fully synchronized
7724       - disconnect from the network
7725       - change disks into single-master mode
7726       - wait again until disks are fully synchronized
7727
7728     """
7729     instance = self.instance
7730     target_node = self.target_node
7731     source_node = self.source_node
7732
7733     # check running on only one node
7734     self.feedback_fn("* checking where the instance actually runs"
7735                      " (if this hangs, the hypervisor might be in"
7736                      " a bad state)")
7737     ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
7738     for node, result in ins_l.items():
7739       result.Raise("Can't contact node %s" % node)
7740
7741     runningon_source = instance.name in ins_l[source_node].payload
7742     runningon_target = instance.name in ins_l[target_node].payload
7743
7744     if runningon_source and runningon_target:
7745       raise errors.OpExecError("Instance seems to be running on two nodes,"
7746                                " or the hypervisor is confused; you will have"
7747                                " to ensure manually that it runs only on one"
7748                                " and restart this operation")
7749
7750     if not (runningon_source or runningon_target):
7751       raise errors.OpExecError("Instance does not seem to be running at all;"
7752                                " in this case it's safer to repair by"
7753                                " running 'gnt-instance stop' to ensure disk"
7754                                " shutdown, and then restarting it")
7755
7756     if runningon_target:
7757       # the migration has actually succeeded, we need to update the config
7758       self.feedback_fn("* instance running on secondary node (%s),"
7759                        " updating config" % target_node)
7760       instance.primary_node = target_node
7761       self.cfg.Update(instance, self.feedback_fn)
7762       demoted_node = source_node
7763     else:
7764       self.feedback_fn("* instance confirmed to be running on its"
7765                        " primary node (%s)" % source_node)
7766       demoted_node = target_node
7767
7768     if instance.disk_template in constants.DTS_INT_MIRROR:
7769       self._EnsureSecondary(demoted_node)
7770       try:
7771         self._WaitUntilSync()
7772       except errors.OpExecError:
7773         # we ignore here errors, since if the device is standalone, it
7774         # won't be able to sync
7775         pass
7776       self._GoStandalone()
7777       self._GoReconnect(False)
7778       self._WaitUntilSync()
7779
7780     self.feedback_fn("* done")
7781
7782   def _RevertDiskStatus(self):
7783     """Try to revert the disk status after a failed migration.
7784
7785     """
7786     target_node = self.target_node
7787     if self.instance.disk_template in constants.DTS_EXT_MIRROR:
7788       return
7789
7790     try:
7791       self._EnsureSecondary(target_node)
7792       self._GoStandalone()
7793       self._GoReconnect(False)
7794       self._WaitUntilSync()
7795     except errors.OpExecError, err:
7796       self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
7797                          " please try to recover the instance manually;"
7798                          " error '%s'" % str(err))
7799
7800   def _AbortMigration(self):
7801     """Call the hypervisor code to abort a started migration.
7802
7803     """
7804     instance = self.instance
7805     target_node = self.target_node
7806     source_node = self.source_node
7807     migration_info = self.migration_info
7808
7809     abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
7810                                                                  instance,
7811                                                                  migration_info,
7812                                                                  False)
7813     abort_msg = abort_result.fail_msg
7814     if abort_msg:
7815       logging.error("Aborting migration failed on target node %s: %s",
7816                     target_node, abort_msg)
7817       # Don't raise an exception here, as we stil have to try to revert the
7818       # disk status, even if this step failed.
7819
7820     abort_result = self.rpc.call_instance_finalize_migration_src(source_node,
7821         instance, False, self.live)
7822     abort_msg = abort_result.fail_msg
7823     if abort_msg:
7824       logging.error("Aborting migration failed on source node %s: %s",
7825                     source_node, abort_msg)
7826
7827   def _ExecMigration(self):
7828     """Migrate an instance.
7829
7830     The migrate is done by:
7831       - change the disks into dual-master mode
7832       - wait until disks are fully synchronized again
7833       - migrate the instance
7834       - change disks on the new secondary node (the old primary) to secondary
7835       - wait until disks are fully synchronized
7836       - change disks into single-master mode
7837
7838     """
7839     instance = self.instance
7840     target_node = self.target_node
7841     source_node = self.source_node
7842
7843     # Check for hypervisor version mismatch and warn the user.
7844     nodeinfo = self.rpc.call_node_info([source_node, target_node],
7845                                        None, [self.instance.hypervisor])
7846     for ninfo in nodeinfo.values():
7847       ninfo.Raise("Unable to retrieve node information from node '%s'" %
7848                   ninfo.node)
7849     (_, _, (src_info, )) = nodeinfo[source_node].payload
7850     (_, _, (dst_info, )) = nodeinfo[target_node].payload
7851
7852     if ((constants.HV_NODEINFO_KEY_VERSION in src_info) and
7853         (constants.HV_NODEINFO_KEY_VERSION in dst_info)):
7854       src_version = src_info[constants.HV_NODEINFO_KEY_VERSION]
7855       dst_version = dst_info[constants.HV_NODEINFO_KEY_VERSION]
7856       if src_version != dst_version:
7857         self.feedback_fn("* warning: hypervisor version mismatch between"
7858                          " source (%s) and target (%s) node" %
7859                          (src_version, dst_version))
7860
7861     self.feedback_fn("* checking disk consistency between source and target")
7862     for dev in instance.disks:
7863       if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7864         raise errors.OpExecError("Disk %s is degraded or not fully"
7865                                  " synchronized on target node,"
7866                                  " aborting migration" % dev.iv_name)
7867
7868     # First get the migration information from the remote node
7869     result = self.rpc.call_migration_info(source_node, instance)
7870     msg = result.fail_msg
7871     if msg:
7872       log_err = ("Failed fetching source migration information from %s: %s" %
7873                  (source_node, msg))
7874       logging.error(log_err)
7875       raise errors.OpExecError(log_err)
7876
7877     self.migration_info = migration_info = result.payload
7878
7879     if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7880       # Then switch the disks to master/master mode
7881       self._EnsureSecondary(target_node)
7882       self._GoStandalone()
7883       self._GoReconnect(True)
7884       self._WaitUntilSync()
7885
7886     self.feedback_fn("* preparing %s to accept the instance" % target_node)
7887     result = self.rpc.call_accept_instance(target_node,
7888                                            instance,
7889                                            migration_info,
7890                                            self.nodes_ip[target_node])
7891
7892     msg = result.fail_msg
7893     if msg:
7894       logging.error("Instance pre-migration failed, trying to revert"
7895                     " disk status: %s", msg)
7896       self.feedback_fn("Pre-migration failed, aborting")
7897       self._AbortMigration()
7898       self._RevertDiskStatus()
7899       raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
7900                                (instance.name, msg))
7901
7902     self.feedback_fn("* migrating instance to %s" % target_node)
7903     result = self.rpc.call_instance_migrate(source_node, instance,
7904                                             self.nodes_ip[target_node],
7905                                             self.live)
7906     msg = result.fail_msg
7907     if msg:
7908       logging.error("Instance migration failed, trying to revert"
7909                     " disk status: %s", msg)
7910       self.feedback_fn("Migration failed, aborting")
7911       self._AbortMigration()
7912       self._RevertDiskStatus()
7913       raise errors.OpExecError("Could not migrate instance %s: %s" %
7914                                (instance.name, msg))
7915
7916     self.feedback_fn("* starting memory transfer")
7917     last_feedback = time.time()
7918     while True:
7919       result = self.rpc.call_instance_get_migration_status(source_node,
7920                                                            instance)
7921       msg = result.fail_msg
7922       ms = result.payload   # MigrationStatus instance
7923       if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
7924         logging.error("Instance migration failed, trying to revert"
7925                       " disk status: %s", msg)
7926         self.feedback_fn("Migration failed, aborting")
7927         self._AbortMigration()
7928         self._RevertDiskStatus()
7929         raise errors.OpExecError("Could not migrate instance %s: %s" %
7930                                  (instance.name, msg))
7931
7932       if result.payload.status != constants.HV_MIGRATION_ACTIVE:
7933         self.feedback_fn("* memory transfer complete")
7934         break
7935
7936       if (utils.TimeoutExpired(last_feedback,
7937                                self._MIGRATION_FEEDBACK_INTERVAL) and
7938           ms.transferred_ram is not None):
7939         mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
7940         self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
7941         last_feedback = time.time()
7942
7943       time.sleep(self._MIGRATION_POLL_INTERVAL)
7944
7945     result = self.rpc.call_instance_finalize_migration_src(source_node,
7946                                                            instance,
7947                                                            True,
7948                                                            self.live)
7949     msg = result.fail_msg
7950     if msg:
7951       logging.error("Instance migration succeeded, but finalization failed"
7952                     " on the source node: %s", msg)
7953       raise errors.OpExecError("Could not finalize instance migration: %s" %
7954                                msg)
7955
7956     instance.primary_node = target_node
7957
7958     # distribute new instance config to the other nodes
7959     self.cfg.Update(instance, self.feedback_fn)
7960
7961     result = self.rpc.call_instance_finalize_migration_dst(target_node,
7962                                                            instance,
7963                                                            migration_info,
7964                                                            True)
7965     msg = result.fail_msg
7966     if msg:
7967       logging.error("Instance migration succeeded, but finalization failed"
7968                     " on the target node: %s", msg)
7969       raise errors.OpExecError("Could not finalize instance migration: %s" %
7970                                msg)
7971
7972     if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7973       self._EnsureSecondary(source_node)
7974       self._WaitUntilSync()
7975       self._GoStandalone()
7976       self._GoReconnect(False)
7977       self._WaitUntilSync()
7978
7979     self.feedback_fn("* done")
7980
7981   def _ExecFailover(self):
7982     """Failover an instance.
7983
7984     The failover is done by shutting it down on its present node and
7985     starting it on the secondary.
7986
7987     """
7988     instance = self.instance
7989     primary_node = self.cfg.GetNodeInfo(instance.primary_node)
7990
7991     source_node = instance.primary_node
7992     target_node = self.target_node
7993
7994     if instance.admin_state == constants.ADMINST_UP:
7995       self.feedback_fn("* checking disk consistency between source and target")
7996       for dev in instance.disks:
7997         # for drbd, these are drbd over lvm
7998         if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7999           if primary_node.offline:
8000             self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
8001                              " target node %s" %
8002                              (primary_node.name, dev.iv_name, target_node))
8003           elif not self.ignore_consistency:
8004             raise errors.OpExecError("Disk %s is degraded on target node,"
8005                                      " aborting failover" % dev.iv_name)
8006     else:
8007       self.feedback_fn("* not checking disk consistency as instance is not"
8008                        " running")
8009
8010     self.feedback_fn("* shutting down instance on source node")
8011     logging.info("Shutting down instance %s on node %s",
8012                  instance.name, source_node)
8013
8014     result = self.rpc.call_instance_shutdown(source_node, instance,
8015                                              self.shutdown_timeout)
8016     msg = result.fail_msg
8017     if msg:
8018       if self.ignore_consistency or primary_node.offline:
8019         self.lu.LogWarning("Could not shutdown instance %s on node %s,"
8020                            " proceeding anyway; please make sure node"
8021                            " %s is down; error details: %s",
8022                            instance.name, source_node, source_node, msg)
8023       else:
8024         raise errors.OpExecError("Could not shutdown instance %s on"
8025                                  " node %s: %s" %
8026                                  (instance.name, source_node, msg))
8027
8028     self.feedback_fn("* deactivating the instance's disks on source node")
8029     if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
8030       raise errors.OpExecError("Can't shut down the instance's disks")
8031
8032     instance.primary_node = target_node
8033     # distribute new instance config to the other nodes
8034     self.cfg.Update(instance, self.feedback_fn)
8035
8036     # Only start the instance if it's marked as up
8037     if instance.admin_state == constants.ADMINST_UP:
8038       self.feedback_fn("* activating the instance's disks on target node %s" %
8039                        target_node)
8040       logging.info("Starting instance %s on node %s",
8041                    instance.name, target_node)
8042
8043       disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
8044                                            ignore_secondaries=True)
8045       if not disks_ok:
8046         _ShutdownInstanceDisks(self.lu, instance)
8047         raise errors.OpExecError("Can't activate the instance's disks")
8048
8049       self.feedback_fn("* starting the instance on the target node %s" %
8050                        target_node)
8051       result = self.rpc.call_instance_start(target_node, (instance, None, None),
8052                                             False)
8053       msg = result.fail_msg
8054       if msg:
8055         _ShutdownInstanceDisks(self.lu, instance)
8056         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
8057                                  (instance.name, target_node, msg))
8058
8059   def Exec(self, feedback_fn):
8060     """Perform the migration.
8061
8062     """
8063     self.feedback_fn = feedback_fn
8064     self.source_node = self.instance.primary_node
8065
8066     # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
8067     if self.instance.disk_template in constants.DTS_INT_MIRROR:
8068       self.target_node = self.instance.secondary_nodes[0]
8069       # Otherwise self.target_node has been populated either
8070       # directly, or through an iallocator.
8071
8072     self.all_nodes = [self.source_node, self.target_node]
8073     self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
8074                          in self.cfg.GetMultiNodeInfo(self.all_nodes))
8075
8076     if self.failover:
8077       feedback_fn("Failover instance %s" % self.instance.name)
8078       self._ExecFailover()
8079     else:
8080       feedback_fn("Migrating instance %s" % self.instance.name)
8081
8082       if self.cleanup:
8083         return self._ExecCleanup()
8084       else:
8085         return self._ExecMigration()
8086
8087
8088 def _CreateBlockDev(lu, node, instance, device, force_create,
8089                     info, force_open):
8090   """Create a tree of block devices on a given node.
8091
8092   If this device type has to be created on secondaries, create it and
8093   all its children.
8094
8095   If not, just recurse to children keeping the same 'force' value.
8096
8097   @param lu: the lu on whose behalf we execute
8098   @param node: the node on which to create the device
8099   @type instance: L{objects.Instance}
8100   @param instance: the instance which owns the device
8101   @type device: L{objects.Disk}
8102   @param device: the device to create
8103   @type force_create: boolean
8104   @param force_create: whether to force creation of this device; this
8105       will be change to True whenever we find a device which has
8106       CreateOnSecondary() attribute
8107   @param info: the extra 'metadata' we should attach to the device
8108       (this will be represented as a LVM tag)
8109   @type force_open: boolean
8110   @param force_open: this parameter will be passes to the
8111       L{backend.BlockdevCreate} function where it specifies
8112       whether we run on primary or not, and it affects both
8113       the child assembly and the device own Open() execution
8114
8115   """
8116   if device.CreateOnSecondary():
8117     force_create = True
8118
8119   if device.children:
8120     for child in device.children:
8121       _CreateBlockDev(lu, node, instance, child, force_create,
8122                       info, force_open)
8123
8124   if not force_create:
8125     return
8126
8127   _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
8128
8129
8130 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
8131   """Create a single block device on a given node.
8132
8133   This will not recurse over children of the device, so they must be
8134   created in advance.
8135
8136   @param lu: the lu on whose behalf we execute
8137   @param node: the node on which to create the device
8138   @type instance: L{objects.Instance}
8139   @param instance: the instance which owns the device
8140   @type device: L{objects.Disk}
8141   @param device: the device to create
8142   @param info: the extra 'metadata' we should attach to the device
8143       (this will be represented as a LVM tag)
8144   @type force_open: boolean
8145   @param force_open: this parameter will be passes to the
8146       L{backend.BlockdevCreate} function where it specifies
8147       whether we run on primary or not, and it affects both
8148       the child assembly and the device own Open() execution
8149
8150   """
8151   lu.cfg.SetDiskID(device, node)
8152   result = lu.rpc.call_blockdev_create(node, device, device.size,
8153                                        instance.name, force_open, info)
8154   result.Raise("Can't create block device %s on"
8155                " node %s for instance %s" % (device, node, instance.name))
8156   if device.physical_id is None:
8157     device.physical_id = result.payload
8158
8159
8160 def _GenerateUniqueNames(lu, exts):
8161   """Generate a suitable LV name.
8162
8163   This will generate a logical volume name for the given instance.
8164
8165   """
8166   results = []
8167   for val in exts:
8168     new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
8169     results.append("%s%s" % (new_id, val))
8170   return results
8171
8172
8173 def _ComputeLDParams(disk_template, disk_params):
8174   """Computes Logical Disk parameters from Disk Template parameters.
8175
8176   @type disk_template: string
8177   @param disk_template: disk template, one of L{constants.DISK_TEMPLATES}
8178   @type disk_params: dict
8179   @param disk_params: disk template parameters; dict(template_name -> parameters
8180   @rtype: list(dict)
8181   @return: a list of dicts, one for each node of the disk hierarchy. Each dict
8182     contains the LD parameters of the node. The tree is flattened in-order.
8183
8184   """
8185   if disk_template not in constants.DISK_TEMPLATES:
8186     raise errors.ProgrammerError("Unknown disk template %s" % disk_template)
8187
8188   result = list()
8189   dt_params = disk_params[disk_template]
8190   if disk_template == constants.DT_DRBD8:
8191     drbd_params = {
8192       constants.LDP_RESYNC_RATE: dt_params[constants.DRBD_RESYNC_RATE],
8193       constants.LDP_BARRIERS: dt_params[constants.DRBD_DISK_BARRIERS],
8194       constants.LDP_NO_META_FLUSH: dt_params[constants.DRBD_META_BARRIERS],
8195       constants.LDP_DEFAULT_METAVG: dt_params[constants.DRBD_DEFAULT_METAVG],
8196       constants.LDP_DISK_CUSTOM: dt_params[constants.DRBD_DISK_CUSTOM],
8197       constants.LDP_NET_CUSTOM: dt_params[constants.DRBD_NET_CUSTOM],
8198       }
8199
8200     drbd_params = \
8201       objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_DRBD8],
8202                        drbd_params)
8203
8204     result.append(drbd_params)
8205
8206     # data LV
8207     data_params = {
8208       constants.LDP_STRIPES: dt_params[constants.DRBD_DATA_STRIPES],
8209       }
8210     data_params = \
8211       objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_LV],
8212                        data_params)
8213     result.append(data_params)
8214
8215     # metadata LV
8216     meta_params = {
8217       constants.LDP_STRIPES: dt_params[constants.DRBD_META_STRIPES],
8218       }
8219     meta_params = \
8220       objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_LV],
8221                        meta_params)
8222     result.append(meta_params)
8223
8224   elif (disk_template == constants.DT_FILE or
8225         disk_template == constants.DT_SHARED_FILE):
8226     result.append(constants.DISK_LD_DEFAULTS[constants.LD_FILE])
8227
8228   elif disk_template == constants.DT_PLAIN:
8229     params = {
8230       constants.LDP_STRIPES: dt_params[constants.LV_STRIPES],
8231       }
8232     params = \
8233       objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_LV],
8234                        params)
8235     result.append(params)
8236
8237   elif disk_template == constants.DT_BLOCK:
8238     result.append(constants.DISK_LD_DEFAULTS[constants.LD_BLOCKDEV])
8239
8240   return result
8241
8242
8243 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
8244                          iv_name, p_minor, s_minor, drbd_params, data_params,
8245                          meta_params):
8246   """Generate a drbd8 device complete with its children.
8247
8248   """
8249   assert len(vgnames) == len(names) == 2
8250   port = lu.cfg.AllocatePort()
8251   shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
8252
8253   dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
8254                           logical_id=(vgnames[0], names[0]),
8255                           params=data_params)
8256   dev_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
8257                           logical_id=(vgnames[1], names[1]),
8258                           params=meta_params)
8259   drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
8260                           logical_id=(primary, secondary, port,
8261                                       p_minor, s_minor,
8262                                       shared_secret),
8263                           children=[dev_data, dev_meta],
8264                           iv_name=iv_name, params=drbd_params)
8265   return drbd_dev
8266
8267
8268 def _GenerateDiskTemplate(lu, template_name,
8269                           instance_name, primary_node,
8270                           secondary_nodes, disk_info,
8271                           file_storage_dir, file_driver,
8272                           base_index, feedback_fn, disk_params):
8273   """Generate the entire disk layout for a given template type.
8274
8275   """
8276   #TODO: compute space requirements
8277
8278   vgname = lu.cfg.GetVGName()
8279   disk_count = len(disk_info)
8280   disks = []
8281   ld_params = _ComputeLDParams(template_name, disk_params)
8282   if template_name == constants.DT_DISKLESS:
8283     pass
8284   elif template_name == constants.DT_PLAIN:
8285     if len(secondary_nodes) != 0:
8286       raise errors.ProgrammerError("Wrong template configuration")
8287
8288     names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
8289                                       for i in range(disk_count)])
8290     for idx, disk in enumerate(disk_info):
8291       disk_index = idx + base_index
8292       vg = disk.get(constants.IDISK_VG, vgname)
8293       feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
8294       disk_dev = objects.Disk(dev_type=constants.LD_LV,
8295                               size=disk[constants.IDISK_SIZE],
8296                               logical_id=(vg, names[idx]),
8297                               iv_name="disk/%d" % disk_index,
8298                               mode=disk[constants.IDISK_MODE],
8299                               params=ld_params[0])
8300       disks.append(disk_dev)
8301   elif template_name == constants.DT_DRBD8:
8302     drbd_params, data_params, meta_params = ld_params
8303     if len(secondary_nodes) != 1:
8304       raise errors.ProgrammerError("Wrong template configuration")
8305     remote_node = secondary_nodes[0]
8306     minors = lu.cfg.AllocateDRBDMinor(
8307       [primary_node, remote_node] * len(disk_info), instance_name)
8308
8309     names = []
8310     for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
8311                                                for i in range(disk_count)]):
8312       names.append(lv_prefix + "_data")
8313       names.append(lv_prefix + "_meta")
8314     for idx, disk in enumerate(disk_info):
8315       disk_index = idx + base_index
8316       drbd_default_metavg = drbd_params[constants.LDP_DEFAULT_METAVG]
8317       data_vg = disk.get(constants.IDISK_VG, vgname)
8318       meta_vg = disk.get(constants.IDISK_METAVG, drbd_default_metavg)
8319       disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
8320                                       disk[constants.IDISK_SIZE],
8321                                       [data_vg, meta_vg],
8322                                       names[idx * 2:idx * 2 + 2],
8323                                       "disk/%d" % disk_index,
8324                                       minors[idx * 2], minors[idx * 2 + 1],
8325                                       drbd_params, data_params, meta_params)
8326       disk_dev.mode = disk[constants.IDISK_MODE]
8327       disks.append(disk_dev)
8328   elif template_name == constants.DT_FILE:
8329     if len(secondary_nodes) != 0:
8330       raise errors.ProgrammerError("Wrong template configuration")
8331
8332     opcodes.RequireFileStorage()
8333
8334     for idx, disk in enumerate(disk_info):
8335       disk_index = idx + base_index
8336       disk_dev = objects.Disk(dev_type=constants.LD_FILE,
8337                               size=disk[constants.IDISK_SIZE],
8338                               iv_name="disk/%d" % disk_index,
8339                               logical_id=(file_driver,
8340                                           "%s/disk%d" % (file_storage_dir,
8341                                                          disk_index)),
8342                               mode=disk[constants.IDISK_MODE],
8343                               params=ld_params[0])
8344       disks.append(disk_dev)
8345   elif template_name == constants.DT_SHARED_FILE:
8346     if len(secondary_nodes) != 0:
8347       raise errors.ProgrammerError("Wrong template configuration")
8348
8349     opcodes.RequireSharedFileStorage()
8350
8351     for idx, disk in enumerate(disk_info):
8352       disk_index = idx + base_index
8353       disk_dev = objects.Disk(dev_type=constants.LD_FILE,
8354                               size=disk[constants.IDISK_SIZE],
8355                               iv_name="disk/%d" % disk_index,
8356                               logical_id=(file_driver,
8357                                           "%s/disk%d" % (file_storage_dir,
8358                                                          disk_index)),
8359                               mode=disk[constants.IDISK_MODE],
8360                               params=ld_params[0])
8361       disks.append(disk_dev)
8362   elif template_name == constants.DT_BLOCK:
8363     if len(secondary_nodes) != 0:
8364       raise errors.ProgrammerError("Wrong template configuration")
8365
8366     for idx, disk in enumerate(disk_info):
8367       disk_index = idx + base_index
8368       disk_dev = objects.Disk(dev_type=constants.LD_BLOCKDEV,
8369                               size=disk[constants.IDISK_SIZE],
8370                               logical_id=(constants.BLOCKDEV_DRIVER_MANUAL,
8371                                           disk[constants.IDISK_ADOPT]),
8372                               iv_name="disk/%d" % disk_index,
8373                               mode=disk[constants.IDISK_MODE],
8374                               params=ld_params[0])
8375       disks.append(disk_dev)
8376
8377   else:
8378     raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
8379   return disks
8380
8381
8382 def _GetInstanceInfoText(instance):
8383   """Compute that text that should be added to the disk's metadata.
8384
8385   """
8386   return "originstname+%s" % instance.name
8387
8388
8389 def _CalcEta(time_taken, written, total_size):
8390   """Calculates the ETA based on size written and total size.
8391
8392   @param time_taken: The time taken so far
8393   @param written: amount written so far
8394   @param total_size: The total size of data to be written
8395   @return: The remaining time in seconds
8396
8397   """
8398   avg_time = time_taken / float(written)
8399   return (total_size - written) * avg_time
8400
8401
8402 def _WipeDisks(lu, instance):
8403   """Wipes instance disks.
8404
8405   @type lu: L{LogicalUnit}
8406   @param lu: the logical unit on whose behalf we execute
8407   @type instance: L{objects.Instance}
8408   @param instance: the instance whose disks we should create
8409   @return: the success of the wipe
8410
8411   """
8412   node = instance.primary_node
8413
8414   for device in instance.disks:
8415     lu.cfg.SetDiskID(device, node)
8416
8417   logging.info("Pause sync of instance %s disks", instance.name)
8418   result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
8419
8420   for idx, success in enumerate(result.payload):
8421     if not success:
8422       logging.warn("pause-sync of instance %s for disks %d failed",
8423                    instance.name, idx)
8424
8425   try:
8426     for idx, device in enumerate(instance.disks):
8427       # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
8428       # MAX_WIPE_CHUNK at max
8429       wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
8430                             constants.MIN_WIPE_CHUNK_PERCENT)
8431       # we _must_ make this an int, otherwise rounding errors will
8432       # occur
8433       wipe_chunk_size = int(wipe_chunk_size)
8434
8435       lu.LogInfo("* Wiping disk %d", idx)
8436       logging.info("Wiping disk %d for instance %s, node %s using"
8437                    " chunk size %s", idx, instance.name, node, wipe_chunk_size)
8438
8439       offset = 0
8440       size = device.size
8441       last_output = 0
8442       start_time = time.time()
8443
8444       while offset < size:
8445         wipe_size = min(wipe_chunk_size, size - offset)
8446         logging.debug("Wiping disk %d, offset %s, chunk %s",
8447                       idx, offset, wipe_size)
8448         result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
8449         result.Raise("Could not wipe disk %d at offset %d for size %d" %
8450                      (idx, offset, wipe_size))
8451         now = time.time()
8452         offset += wipe_size
8453         if now - last_output >= 60:
8454           eta = _CalcEta(now - start_time, offset, size)
8455           lu.LogInfo(" - done: %.1f%% ETA: %s" %
8456                      (offset / float(size) * 100, utils.FormatSeconds(eta)))
8457           last_output = now
8458   finally:
8459     logging.info("Resume sync of instance %s disks", instance.name)
8460
8461     result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
8462
8463     for idx, success in enumerate(result.payload):
8464       if not success:
8465         lu.LogWarning("Resume sync of disk %d failed, please have a"
8466                       " look at the status and troubleshoot the issue", idx)
8467         logging.warn("resume-sync of instance %s for disks %d failed",
8468                      instance.name, idx)
8469
8470
8471 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
8472   """Create all disks for an instance.
8473
8474   This abstracts away some work from AddInstance.
8475
8476   @type lu: L{LogicalUnit}
8477   @param lu: the logical unit on whose behalf we execute
8478   @type instance: L{objects.Instance}
8479   @param instance: the instance whose disks we should create
8480   @type to_skip: list
8481   @param to_skip: list of indices to skip
8482   @type target_node: string
8483   @param target_node: if passed, overrides the target node for creation
8484   @rtype: boolean
8485   @return: the success of the creation
8486
8487   """
8488   info = _GetInstanceInfoText(instance)
8489   if target_node is None:
8490     pnode = instance.primary_node
8491     all_nodes = instance.all_nodes
8492   else:
8493     pnode = target_node
8494     all_nodes = [pnode]
8495
8496   if instance.disk_template in constants.DTS_FILEBASED:
8497     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8498     result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
8499
8500     result.Raise("Failed to create directory '%s' on"
8501                  " node %s" % (file_storage_dir, pnode))
8502
8503   # Note: this needs to be kept in sync with adding of disks in
8504   # LUInstanceSetParams
8505   for idx, device in enumerate(instance.disks):
8506     if to_skip and idx in to_skip:
8507       continue
8508     logging.info("Creating volume %s for instance %s",
8509                  device.iv_name, instance.name)
8510     #HARDCODE
8511     for node in all_nodes:
8512       f_create = node == pnode
8513       _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
8514
8515
8516 def _RemoveDisks(lu, instance, target_node=None):
8517   """Remove all disks for an instance.
8518
8519   This abstracts away some work from `AddInstance()` and
8520   `RemoveInstance()`. Note that in case some of the devices couldn't
8521   be removed, the removal will continue with the other ones (compare
8522   with `_CreateDisks()`).
8523
8524   @type lu: L{LogicalUnit}
8525   @param lu: the logical unit on whose behalf we execute
8526   @type instance: L{objects.Instance}
8527   @param instance: the instance whose disks we should remove
8528   @type target_node: string
8529   @param target_node: used to override the node on which to remove the disks
8530   @rtype: boolean
8531   @return: the success of the removal
8532
8533   """
8534   logging.info("Removing block devices for instance %s", instance.name)
8535
8536   all_result = True
8537   for device in instance.disks:
8538     if target_node:
8539       edata = [(target_node, device)]
8540     else:
8541       edata = device.ComputeNodeTree(instance.primary_node)
8542     for node, disk in edata:
8543       lu.cfg.SetDiskID(disk, node)
8544       msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
8545       if msg:
8546         lu.LogWarning("Could not remove block device %s on node %s,"
8547                       " continuing anyway: %s", device.iv_name, node, msg)
8548         all_result = False
8549
8550     # if this is a DRBD disk, return its port to the pool
8551     if device.dev_type in constants.LDS_DRBD:
8552       tcp_port = device.logical_id[2]
8553       lu.cfg.AddTcpUdpPort(tcp_port)
8554
8555   if instance.disk_template == constants.DT_FILE:
8556     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8557     if target_node:
8558       tgt = target_node
8559     else:
8560       tgt = instance.primary_node
8561     result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
8562     if result.fail_msg:
8563       lu.LogWarning("Could not remove directory '%s' on node %s: %s",
8564                     file_storage_dir, instance.primary_node, result.fail_msg)
8565       all_result = False
8566
8567   return all_result
8568
8569
8570 def _ComputeDiskSizePerVG(disk_template, disks):
8571   """Compute disk size requirements in the volume group
8572
8573   """
8574   def _compute(disks, payload):
8575     """Universal algorithm.
8576
8577     """
8578     vgs = {}
8579     for disk in disks:
8580       vgs[disk[constants.IDISK_VG]] = \
8581         vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
8582
8583     return vgs
8584
8585   # Required free disk space as a function of disk and swap space
8586   req_size_dict = {
8587     constants.DT_DISKLESS: {},
8588     constants.DT_PLAIN: _compute(disks, 0),
8589     # 128 MB are added for drbd metadata for each disk
8590     constants.DT_DRBD8: _compute(disks, DRBD_META_SIZE),
8591     constants.DT_FILE: {},
8592     constants.DT_SHARED_FILE: {},
8593   }
8594
8595   if disk_template not in req_size_dict:
8596     raise errors.ProgrammerError("Disk template '%s' size requirement"
8597                                  " is unknown" % disk_template)
8598
8599   return req_size_dict[disk_template]
8600
8601
8602 def _ComputeDiskSize(disk_template, disks):
8603   """Compute disk size requirements in the volume group
8604
8605   """
8606   # Required free disk space as a function of disk and swap space
8607   req_size_dict = {
8608     constants.DT_DISKLESS: None,
8609     constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
8610     # 128 MB are added for drbd metadata for each disk
8611     constants.DT_DRBD8:
8612       sum(d[constants.IDISK_SIZE] + DRBD_META_SIZE for d in disks),
8613     constants.DT_FILE: None,
8614     constants.DT_SHARED_FILE: 0,
8615     constants.DT_BLOCK: 0,
8616   }
8617
8618   if disk_template not in req_size_dict:
8619     raise errors.ProgrammerError("Disk template '%s' size requirement"
8620                                  " is unknown" % disk_template)
8621
8622   return req_size_dict[disk_template]
8623
8624
8625 def _FilterVmNodes(lu, nodenames):
8626   """Filters out non-vm_capable nodes from a list.
8627
8628   @type lu: L{LogicalUnit}
8629   @param lu: the logical unit for which we check
8630   @type nodenames: list
8631   @param nodenames: the list of nodes on which we should check
8632   @rtype: list
8633   @return: the list of vm-capable nodes
8634
8635   """
8636   vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
8637   return [name for name in nodenames if name not in vm_nodes]
8638
8639
8640 def _CheckHVParams(lu, nodenames, hvname, hvparams):
8641   """Hypervisor parameter validation.
8642
8643   This function abstract the hypervisor parameter validation to be
8644   used in both instance create and instance modify.
8645
8646   @type lu: L{LogicalUnit}
8647   @param lu: the logical unit for which we check
8648   @type nodenames: list
8649   @param nodenames: the list of nodes on which we should check
8650   @type hvname: string
8651   @param hvname: the name of the hypervisor we should use
8652   @type hvparams: dict
8653   @param hvparams: the parameters which we need to check
8654   @raise errors.OpPrereqError: if the parameters are not valid
8655
8656   """
8657   nodenames = _FilterVmNodes(lu, nodenames)
8658
8659   cluster = lu.cfg.GetClusterInfo()
8660   hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams)
8661
8662   hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, hvname, hvfull)
8663   for node in nodenames:
8664     info = hvinfo[node]
8665     if info.offline:
8666       continue
8667     info.Raise("Hypervisor parameter validation failed on node %s" % node)
8668
8669
8670 def _CheckOSParams(lu, required, nodenames, osname, osparams):
8671   """OS parameters validation.
8672
8673   @type lu: L{LogicalUnit}
8674   @param lu: the logical unit for which we check
8675   @type required: boolean
8676   @param required: whether the validation should fail if the OS is not
8677       found
8678   @type nodenames: list
8679   @param nodenames: the list of nodes on which we should check
8680   @type osname: string
8681   @param osname: the name of the hypervisor we should use
8682   @type osparams: dict
8683   @param osparams: the parameters which we need to check
8684   @raise errors.OpPrereqError: if the parameters are not valid
8685
8686   """
8687   nodenames = _FilterVmNodes(lu, nodenames)
8688   result = lu.rpc.call_os_validate(nodenames, required, osname,
8689                                    [constants.OS_VALIDATE_PARAMETERS],
8690                                    osparams)
8691   for node, nres in result.items():
8692     # we don't check for offline cases since this should be run only
8693     # against the master node and/or an instance's nodes
8694     nres.Raise("OS Parameters validation failed on node %s" % node)
8695     if not nres.payload:
8696       lu.LogInfo("OS %s not found on node %s, validation skipped",
8697                  osname, node)
8698
8699
8700 class LUInstanceCreate(LogicalUnit):
8701   """Create an instance.
8702
8703   """
8704   HPATH = "instance-add"
8705   HTYPE = constants.HTYPE_INSTANCE
8706   REQ_BGL = False
8707
8708   def CheckArguments(self):
8709     """Check arguments.
8710
8711     """
8712     # do not require name_check to ease forward/backward compatibility
8713     # for tools
8714     if self.op.no_install and self.op.start:
8715       self.LogInfo("No-installation mode selected, disabling startup")
8716       self.op.start = False
8717     # validate/normalize the instance name
8718     self.op.instance_name = \
8719       netutils.Hostname.GetNormalizedName(self.op.instance_name)
8720
8721     if self.op.ip_check and not self.op.name_check:
8722       # TODO: make the ip check more flexible and not depend on the name check
8723       raise errors.OpPrereqError("Cannot do IP address check without a name"
8724                                  " check", errors.ECODE_INVAL)
8725
8726     # check nics' parameter names
8727     for nic in self.op.nics:
8728       utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
8729
8730     # check disks. parameter names and consistent adopt/no-adopt strategy
8731     has_adopt = has_no_adopt = False
8732     for disk in self.op.disks:
8733       utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
8734       if constants.IDISK_ADOPT in disk:
8735         has_adopt = True
8736       else:
8737         has_no_adopt = True
8738     if has_adopt and has_no_adopt:
8739       raise errors.OpPrereqError("Either all disks are adopted or none is",
8740                                  errors.ECODE_INVAL)
8741     if has_adopt:
8742       if self.op.disk_template not in constants.DTS_MAY_ADOPT:
8743         raise errors.OpPrereqError("Disk adoption is not supported for the"
8744                                    " '%s' disk template" %
8745                                    self.op.disk_template,
8746                                    errors.ECODE_INVAL)
8747       if self.op.iallocator is not None:
8748         raise errors.OpPrereqError("Disk adoption not allowed with an"
8749                                    " iallocator script", errors.ECODE_INVAL)
8750       if self.op.mode == constants.INSTANCE_IMPORT:
8751         raise errors.OpPrereqError("Disk adoption not allowed for"
8752                                    " instance import", errors.ECODE_INVAL)
8753     else:
8754       if self.op.disk_template in constants.DTS_MUST_ADOPT:
8755         raise errors.OpPrereqError("Disk template %s requires disk adoption,"
8756                                    " but no 'adopt' parameter given" %
8757                                    self.op.disk_template,
8758                                    errors.ECODE_INVAL)
8759
8760     self.adopt_disks = has_adopt
8761
8762     # instance name verification
8763     if self.op.name_check:
8764       self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
8765       self.op.instance_name = self.hostname1.name
8766       # used in CheckPrereq for ip ping check
8767       self.check_ip = self.hostname1.ip
8768     else:
8769       self.check_ip = None
8770
8771     # file storage checks
8772     if (self.op.file_driver and
8773         not self.op.file_driver in constants.FILE_DRIVER):
8774       raise errors.OpPrereqError("Invalid file driver name '%s'" %
8775                                  self.op.file_driver, errors.ECODE_INVAL)
8776
8777     if self.op.disk_template == constants.DT_FILE:
8778       opcodes.RequireFileStorage()
8779     elif self.op.disk_template == constants.DT_SHARED_FILE:
8780       opcodes.RequireSharedFileStorage()
8781
8782     ### Node/iallocator related checks
8783     _CheckIAllocatorOrNode(self, "iallocator", "pnode")
8784
8785     if self.op.pnode is not None:
8786       if self.op.disk_template in constants.DTS_INT_MIRROR:
8787         if self.op.snode is None:
8788           raise errors.OpPrereqError("The networked disk templates need"
8789                                      " a mirror node", errors.ECODE_INVAL)
8790       elif self.op.snode:
8791         self.LogWarning("Secondary node will be ignored on non-mirrored disk"
8792                         " template")
8793         self.op.snode = None
8794
8795     self._cds = _GetClusterDomainSecret()
8796
8797     if self.op.mode == constants.INSTANCE_IMPORT:
8798       # On import force_variant must be True, because if we forced it at
8799       # initial install, our only chance when importing it back is that it
8800       # works again!
8801       self.op.force_variant = True
8802
8803       if self.op.no_install:
8804         self.LogInfo("No-installation mode has no effect during import")
8805
8806     elif self.op.mode == constants.INSTANCE_CREATE:
8807       if self.op.os_type is None:
8808         raise errors.OpPrereqError("No guest OS specified",
8809                                    errors.ECODE_INVAL)
8810       if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
8811         raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
8812                                    " installation" % self.op.os_type,
8813                                    errors.ECODE_STATE)
8814       if self.op.disk_template is None:
8815         raise errors.OpPrereqError("No disk template specified",
8816                                    errors.ECODE_INVAL)
8817
8818     elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
8819       # Check handshake to ensure both clusters have the same domain secret
8820       src_handshake = self.op.source_handshake
8821       if not src_handshake:
8822         raise errors.OpPrereqError("Missing source handshake",
8823                                    errors.ECODE_INVAL)
8824
8825       errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
8826                                                            src_handshake)
8827       if errmsg:
8828         raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
8829                                    errors.ECODE_INVAL)
8830
8831       # Load and check source CA
8832       self.source_x509_ca_pem = self.op.source_x509_ca
8833       if not self.source_x509_ca_pem:
8834         raise errors.OpPrereqError("Missing source X509 CA",
8835                                    errors.ECODE_INVAL)
8836
8837       try:
8838         (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
8839                                                     self._cds)
8840       except OpenSSL.crypto.Error, err:
8841         raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
8842                                    (err, ), errors.ECODE_INVAL)
8843
8844       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
8845       if errcode is not None:
8846         raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
8847                                    errors.ECODE_INVAL)
8848
8849       self.source_x509_ca = cert
8850
8851       src_instance_name = self.op.source_instance_name
8852       if not src_instance_name:
8853         raise errors.OpPrereqError("Missing source instance name",
8854                                    errors.ECODE_INVAL)
8855
8856       self.source_instance_name = \
8857           netutils.GetHostname(name=src_instance_name).name
8858
8859     else:
8860       raise errors.OpPrereqError("Invalid instance creation mode %r" %
8861                                  self.op.mode, errors.ECODE_INVAL)
8862
8863   def ExpandNames(self):
8864     """ExpandNames for CreateInstance.
8865
8866     Figure out the right locks for instance creation.
8867
8868     """
8869     self.needed_locks = {}
8870
8871     instance_name = self.op.instance_name
8872     # this is just a preventive check, but someone might still add this
8873     # instance in the meantime, and creation will fail at lock-add time
8874     if instance_name in self.cfg.GetInstanceList():
8875       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
8876                                  instance_name, errors.ECODE_EXISTS)
8877
8878     self.add_locks[locking.LEVEL_INSTANCE] = instance_name
8879
8880     if self.op.iallocator:
8881       # TODO: Find a solution to not lock all nodes in the cluster, e.g. by
8882       # specifying a group on instance creation and then selecting nodes from
8883       # that group
8884       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8885       self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
8886     else:
8887       self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
8888       nodelist = [self.op.pnode]
8889       if self.op.snode is not None:
8890         self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
8891         nodelist.append(self.op.snode)
8892       self.needed_locks[locking.LEVEL_NODE] = nodelist
8893       # Lock resources of instance's primary and secondary nodes (copy to
8894       # prevent accidential modification)
8895       self.needed_locks[locking.LEVEL_NODE_RES] = list(nodelist)
8896
8897     # in case of import lock the source node too
8898     if self.op.mode == constants.INSTANCE_IMPORT:
8899       src_node = self.op.src_node
8900       src_path = self.op.src_path
8901
8902       if src_path is None:
8903         self.op.src_path = src_path = self.op.instance_name
8904
8905       if src_node is None:
8906         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8907         self.op.src_node = None
8908         if os.path.isabs(src_path):
8909           raise errors.OpPrereqError("Importing an instance from a path"
8910                                      " requires a source node option",
8911                                      errors.ECODE_INVAL)
8912       else:
8913         self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
8914         if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
8915           self.needed_locks[locking.LEVEL_NODE].append(src_node)
8916         if not os.path.isabs(src_path):
8917           self.op.src_path = src_path = \
8918             utils.PathJoin(constants.EXPORT_DIR, src_path)
8919
8920   def _RunAllocator(self):
8921     """Run the allocator based on input opcode.
8922
8923     """
8924     nics = [n.ToDict() for n in self.nics]
8925     ial = IAllocator(self.cfg, self.rpc,
8926                      mode=constants.IALLOCATOR_MODE_ALLOC,
8927                      name=self.op.instance_name,
8928                      disk_template=self.op.disk_template,
8929                      tags=self.op.tags,
8930                      os=self.op.os_type,
8931                      vcpus=self.be_full[constants.BE_VCPUS],
8932                      memory=self.be_full[constants.BE_MAXMEM],
8933                      disks=self.disks,
8934                      nics=nics,
8935                      hypervisor=self.op.hypervisor,
8936                      )
8937
8938     ial.Run(self.op.iallocator)
8939
8940     if not ial.success:
8941       raise errors.OpPrereqError("Can't compute nodes using"
8942                                  " iallocator '%s': %s" %
8943                                  (self.op.iallocator, ial.info),
8944                                  errors.ECODE_NORES)
8945     if len(ial.result) != ial.required_nodes:
8946       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8947                                  " of nodes (%s), required %s" %
8948                                  (self.op.iallocator, len(ial.result),
8949                                   ial.required_nodes), errors.ECODE_FAULT)
8950     self.op.pnode = ial.result[0]
8951     self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8952                  self.op.instance_name, self.op.iallocator,
8953                  utils.CommaJoin(ial.result))
8954     if ial.required_nodes == 2:
8955       self.op.snode = ial.result[1]
8956
8957   def BuildHooksEnv(self):
8958     """Build hooks env.
8959
8960     This runs on master, primary and secondary nodes of the instance.
8961
8962     """
8963     env = {
8964       "ADD_MODE": self.op.mode,
8965       }
8966     if self.op.mode == constants.INSTANCE_IMPORT:
8967       env["SRC_NODE"] = self.op.src_node
8968       env["SRC_PATH"] = self.op.src_path
8969       env["SRC_IMAGES"] = self.src_images
8970
8971     env.update(_BuildInstanceHookEnv(
8972       name=self.op.instance_name,
8973       primary_node=self.op.pnode,
8974       secondary_nodes=self.secondaries,
8975       status=self.op.start,
8976       os_type=self.op.os_type,
8977       minmem=self.be_full[constants.BE_MINMEM],
8978       maxmem=self.be_full[constants.BE_MAXMEM],
8979       vcpus=self.be_full[constants.BE_VCPUS],
8980       nics=_NICListToTuple(self, self.nics),
8981       disk_template=self.op.disk_template,
8982       disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
8983              for d in self.disks],
8984       bep=self.be_full,
8985       hvp=self.hv_full,
8986       hypervisor_name=self.op.hypervisor,
8987       tags=self.op.tags,
8988     ))
8989
8990     return env
8991
8992   def BuildHooksNodes(self):
8993     """Build hooks nodes.
8994
8995     """
8996     nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
8997     return nl, nl
8998
8999   def _ReadExportInfo(self):
9000     """Reads the export information from disk.
9001
9002     It will override the opcode source node and path with the actual
9003     information, if these two were not specified before.
9004
9005     @return: the export information
9006
9007     """
9008     assert self.op.mode == constants.INSTANCE_IMPORT
9009
9010     src_node = self.op.src_node
9011     src_path = self.op.src_path
9012
9013     if src_node is None:
9014       locked_nodes = self.owned_locks(locking.LEVEL_NODE)
9015       exp_list = self.rpc.call_export_list(locked_nodes)
9016       found = False
9017       for node in exp_list:
9018         if exp_list[node].fail_msg:
9019           continue
9020         if src_path in exp_list[node].payload:
9021           found = True
9022           self.op.src_node = src_node = node
9023           self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
9024                                                        src_path)
9025           break
9026       if not found:
9027         raise errors.OpPrereqError("No export found for relative path %s" %
9028                                     src_path, errors.ECODE_INVAL)
9029
9030     _CheckNodeOnline(self, src_node)
9031     result = self.rpc.call_export_info(src_node, src_path)
9032     result.Raise("No export or invalid export found in dir %s" % src_path)
9033
9034     export_info = objects.SerializableConfigParser.Loads(str(result.payload))
9035     if not export_info.has_section(constants.INISECT_EXP):
9036       raise errors.ProgrammerError("Corrupted export config",
9037                                    errors.ECODE_ENVIRON)
9038
9039     ei_version = export_info.get(constants.INISECT_EXP, "version")
9040     if (int(ei_version) != constants.EXPORT_VERSION):
9041       raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
9042                                  (ei_version, constants.EXPORT_VERSION),
9043                                  errors.ECODE_ENVIRON)
9044     return export_info
9045
9046   def _ReadExportParams(self, einfo):
9047     """Use export parameters as defaults.
9048
9049     In case the opcode doesn't specify (as in override) some instance
9050     parameters, then try to use them from the export information, if
9051     that declares them.
9052
9053     """
9054     self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
9055
9056     if self.op.disk_template is None:
9057       if einfo.has_option(constants.INISECT_INS, "disk_template"):
9058         self.op.disk_template = einfo.get(constants.INISECT_INS,
9059                                           "disk_template")
9060         if self.op.disk_template not in constants.DISK_TEMPLATES:
9061           raise errors.OpPrereqError("Disk template specified in configuration"
9062                                      " file is not one of the allowed values:"
9063                                      " %s" % " ".join(constants.DISK_TEMPLATES))
9064       else:
9065         raise errors.OpPrereqError("No disk template specified and the export"
9066                                    " is missing the disk_template information",
9067                                    errors.ECODE_INVAL)
9068
9069     if not self.op.disks:
9070       disks = []
9071       # TODO: import the disk iv_name too
9072       for idx in range(constants.MAX_DISKS):
9073         if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
9074           disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
9075           disks.append({constants.IDISK_SIZE: disk_sz})
9076       self.op.disks = disks
9077       if not disks and self.op.disk_template != constants.DT_DISKLESS:
9078         raise errors.OpPrereqError("No disk info specified and the export"
9079                                    " is missing the disk information",
9080                                    errors.ECODE_INVAL)
9081
9082     if not self.op.nics:
9083       nics = []
9084       for idx in range(constants.MAX_NICS):
9085         if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
9086           ndict = {}
9087           for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
9088             v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
9089             ndict[name] = v
9090           nics.append(ndict)
9091         else:
9092           break
9093       self.op.nics = nics
9094
9095     if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
9096       self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
9097
9098     if (self.op.hypervisor is None and
9099         einfo.has_option(constants.INISECT_INS, "hypervisor")):
9100       self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
9101
9102     if einfo.has_section(constants.INISECT_HYP):
9103       # use the export parameters but do not override the ones
9104       # specified by the user
9105       for name, value in einfo.items(constants.INISECT_HYP):
9106         if name not in self.op.hvparams:
9107           self.op.hvparams[name] = value
9108
9109     if einfo.has_section(constants.INISECT_BEP):
9110       # use the parameters, without overriding
9111       for name, value in einfo.items(constants.INISECT_BEP):
9112         if name not in self.op.beparams:
9113           self.op.beparams[name] = value
9114         # Compatibility for the old "memory" be param
9115         if name == constants.BE_MEMORY:
9116           if constants.BE_MAXMEM not in self.op.beparams:
9117             self.op.beparams[constants.BE_MAXMEM] = value
9118           if constants.BE_MINMEM not in self.op.beparams:
9119             self.op.beparams[constants.BE_MINMEM] = value
9120     else:
9121       # try to read the parameters old style, from the main section
9122       for name in constants.BES_PARAMETERS:
9123         if (name not in self.op.beparams and
9124             einfo.has_option(constants.INISECT_INS, name)):
9125           self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
9126
9127     if einfo.has_section(constants.INISECT_OSP):
9128       # use the parameters, without overriding
9129       for name, value in einfo.items(constants.INISECT_OSP):
9130         if name not in self.op.osparams:
9131           self.op.osparams[name] = value
9132
9133   def _RevertToDefaults(self, cluster):
9134     """Revert the instance parameters to the default values.
9135
9136     """
9137     # hvparams
9138     hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
9139     for name in self.op.hvparams.keys():
9140       if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
9141         del self.op.hvparams[name]
9142     # beparams
9143     be_defs = cluster.SimpleFillBE({})
9144     for name in self.op.beparams.keys():
9145       if name in be_defs and be_defs[name] == self.op.beparams[name]:
9146         del self.op.beparams[name]
9147     # nic params
9148     nic_defs = cluster.SimpleFillNIC({})
9149     for nic in self.op.nics:
9150       for name in constants.NICS_PARAMETERS:
9151         if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
9152           del nic[name]
9153     # osparams
9154     os_defs = cluster.SimpleFillOS(self.op.os_type, {})
9155     for name in self.op.osparams.keys():
9156       if name in os_defs and os_defs[name] == self.op.osparams[name]:
9157         del self.op.osparams[name]
9158
9159   def _CalculateFileStorageDir(self):
9160     """Calculate final instance file storage dir.
9161
9162     """
9163     # file storage dir calculation/check
9164     self.instance_file_storage_dir = None
9165     if self.op.disk_template in constants.DTS_FILEBASED:
9166       # build the full file storage dir path
9167       joinargs = []
9168
9169       if self.op.disk_template == constants.DT_SHARED_FILE:
9170         get_fsd_fn = self.cfg.GetSharedFileStorageDir
9171       else:
9172         get_fsd_fn = self.cfg.GetFileStorageDir
9173
9174       cfg_storagedir = get_fsd_fn()
9175       if not cfg_storagedir:
9176         raise errors.OpPrereqError("Cluster file storage dir not defined")
9177       joinargs.append(cfg_storagedir)
9178
9179       if self.op.file_storage_dir is not None:
9180         joinargs.append(self.op.file_storage_dir)
9181
9182       joinargs.append(self.op.instance_name)
9183
9184       # pylint: disable=W0142
9185       self.instance_file_storage_dir = utils.PathJoin(*joinargs)
9186
9187   def CheckPrereq(self):
9188     """Check prerequisites.
9189
9190     """
9191     self._CalculateFileStorageDir()
9192
9193     if self.op.mode == constants.INSTANCE_IMPORT:
9194       export_info = self._ReadExportInfo()
9195       self._ReadExportParams(export_info)
9196
9197     if (not self.cfg.GetVGName() and
9198         self.op.disk_template not in constants.DTS_NOT_LVM):
9199       raise errors.OpPrereqError("Cluster does not support lvm-based"
9200                                  " instances", errors.ECODE_STATE)
9201
9202     if (self.op.hypervisor is None or
9203         self.op.hypervisor == constants.VALUE_AUTO):
9204       self.op.hypervisor = self.cfg.GetHypervisorType()
9205
9206     cluster = self.cfg.GetClusterInfo()
9207     enabled_hvs = cluster.enabled_hypervisors
9208     if self.op.hypervisor not in enabled_hvs:
9209       raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
9210                                  " cluster (%s)" % (self.op.hypervisor,
9211                                   ",".join(enabled_hvs)),
9212                                  errors.ECODE_STATE)
9213
9214     # Check tag validity
9215     for tag in self.op.tags:
9216       objects.TaggableObject.ValidateTag(tag)
9217
9218     # check hypervisor parameter syntax (locally)
9219     utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
9220     filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
9221                                       self.op.hvparams)
9222     hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
9223     hv_type.CheckParameterSyntax(filled_hvp)
9224     self.hv_full = filled_hvp
9225     # check that we don't specify global parameters on an instance
9226     _CheckGlobalHvParams(self.op.hvparams)
9227
9228     # fill and remember the beparams dict
9229     default_beparams = cluster.beparams[constants.PP_DEFAULT]
9230     for param, value in self.op.beparams.iteritems():
9231       if value == constants.VALUE_AUTO:
9232         self.op.beparams[param] = default_beparams[param]
9233     objects.UpgradeBeParams(self.op.beparams)
9234     utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
9235     self.be_full = cluster.SimpleFillBE(self.op.beparams)
9236
9237     # build os parameters
9238     self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
9239
9240     # now that hvp/bep are in final format, let's reset to defaults,
9241     # if told to do so
9242     if self.op.identify_defaults:
9243       self._RevertToDefaults(cluster)
9244
9245     # NIC buildup
9246     self.nics = []
9247     for idx, nic in enumerate(self.op.nics):
9248       nic_mode_req = nic.get(constants.INIC_MODE, None)
9249       nic_mode = nic_mode_req
9250       if nic_mode is None or nic_mode == constants.VALUE_AUTO:
9251         nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
9252
9253       # in routed mode, for the first nic, the default ip is 'auto'
9254       if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
9255         default_ip_mode = constants.VALUE_AUTO
9256       else:
9257         default_ip_mode = constants.VALUE_NONE
9258
9259       # ip validity checks
9260       ip = nic.get(constants.INIC_IP, default_ip_mode)
9261       if ip is None or ip.lower() == constants.VALUE_NONE:
9262         nic_ip = None
9263       elif ip.lower() == constants.VALUE_AUTO:
9264         if not self.op.name_check:
9265           raise errors.OpPrereqError("IP address set to auto but name checks"
9266                                      " have been skipped",
9267                                      errors.ECODE_INVAL)
9268         nic_ip = self.hostname1.ip
9269       else:
9270         if not netutils.IPAddress.IsValid(ip):
9271           raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
9272                                      errors.ECODE_INVAL)
9273         nic_ip = ip
9274
9275       # TODO: check the ip address for uniqueness
9276       if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
9277         raise errors.OpPrereqError("Routed nic mode requires an ip address",
9278                                    errors.ECODE_INVAL)
9279
9280       # MAC address verification
9281       mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
9282       if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9283         mac = utils.NormalizeAndValidateMac(mac)
9284
9285         try:
9286           self.cfg.ReserveMAC(mac, self.proc.GetECId())
9287         except errors.ReservationError:
9288           raise errors.OpPrereqError("MAC address %s already in use"
9289                                      " in cluster" % mac,
9290                                      errors.ECODE_NOTUNIQUE)
9291
9292       #  Build nic parameters
9293       link = nic.get(constants.INIC_LINK, None)
9294       if link == constants.VALUE_AUTO:
9295         link = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_LINK]
9296       nicparams = {}
9297       if nic_mode_req:
9298         nicparams[constants.NIC_MODE] = nic_mode
9299       if link:
9300         nicparams[constants.NIC_LINK] = link
9301
9302       check_params = cluster.SimpleFillNIC(nicparams)
9303       objects.NIC.CheckParameterSyntax(check_params)
9304       self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
9305
9306     # disk checks/pre-build
9307     default_vg = self.cfg.GetVGName()
9308     self.disks = []
9309     for disk in self.op.disks:
9310       mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
9311       if mode not in constants.DISK_ACCESS_SET:
9312         raise errors.OpPrereqError("Invalid disk access mode '%s'" %
9313                                    mode, errors.ECODE_INVAL)
9314       size = disk.get(constants.IDISK_SIZE, None)
9315       if size is None:
9316         raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
9317       try:
9318         size = int(size)
9319       except (TypeError, ValueError):
9320         raise errors.OpPrereqError("Invalid disk size '%s'" % size,
9321                                    errors.ECODE_INVAL)
9322
9323       data_vg = disk.get(constants.IDISK_VG, default_vg)
9324       new_disk = {
9325         constants.IDISK_SIZE: size,
9326         constants.IDISK_MODE: mode,
9327         constants.IDISK_VG: data_vg,
9328         }
9329       if constants.IDISK_METAVG in disk:
9330         new_disk[constants.IDISK_METAVG] = disk[constants.IDISK_METAVG]
9331       if constants.IDISK_ADOPT in disk:
9332         new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
9333       self.disks.append(new_disk)
9334
9335     if self.op.mode == constants.INSTANCE_IMPORT:
9336       disk_images = []
9337       for idx in range(len(self.disks)):
9338         option = "disk%d_dump" % idx
9339         if export_info.has_option(constants.INISECT_INS, option):
9340           # FIXME: are the old os-es, disk sizes, etc. useful?
9341           export_name = export_info.get(constants.INISECT_INS, option)
9342           image = utils.PathJoin(self.op.src_path, export_name)
9343           disk_images.append(image)
9344         else:
9345           disk_images.append(False)
9346
9347       self.src_images = disk_images
9348
9349       old_name = export_info.get(constants.INISECT_INS, "name")
9350       if self.op.instance_name == old_name:
9351         for idx, nic in enumerate(self.nics):
9352           if nic.mac == constants.VALUE_AUTO:
9353             nic_mac_ini = "nic%d_mac" % idx
9354             nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
9355
9356     # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
9357
9358     # ip ping checks (we use the same ip that was resolved in ExpandNames)
9359     if self.op.ip_check:
9360       if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
9361         raise errors.OpPrereqError("IP %s of instance %s already in use" %
9362                                    (self.check_ip, self.op.instance_name),
9363                                    errors.ECODE_NOTUNIQUE)
9364
9365     #### mac address generation
9366     # By generating here the mac address both the allocator and the hooks get
9367     # the real final mac address rather than the 'auto' or 'generate' value.
9368     # There is a race condition between the generation and the instance object
9369     # creation, which means that we know the mac is valid now, but we're not
9370     # sure it will be when we actually add the instance. If things go bad
9371     # adding the instance will abort because of a duplicate mac, and the
9372     # creation job will fail.
9373     for nic in self.nics:
9374       if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9375         nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
9376
9377     #### allocator run
9378
9379     if self.op.iallocator is not None:
9380       self._RunAllocator()
9381
9382     # Release all unneeded node locks
9383     _ReleaseLocks(self, locking.LEVEL_NODE,
9384                   keep=filter(None, [self.op.pnode, self.op.snode,
9385                                      self.op.src_node]))
9386
9387     #### node related checks
9388
9389     # check primary node
9390     self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
9391     assert self.pnode is not None, \
9392       "Cannot retrieve locked node %s" % self.op.pnode
9393     if pnode.offline:
9394       raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
9395                                  pnode.name, errors.ECODE_STATE)
9396     if pnode.drained:
9397       raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
9398                                  pnode.name, errors.ECODE_STATE)
9399     if not pnode.vm_capable:
9400       raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
9401                                  " '%s'" % pnode.name, errors.ECODE_STATE)
9402
9403     self.secondaries = []
9404
9405     # mirror node verification
9406     if self.op.disk_template in constants.DTS_INT_MIRROR:
9407       if self.op.snode == pnode.name:
9408         raise errors.OpPrereqError("The secondary node cannot be the"
9409                                    " primary node", errors.ECODE_INVAL)
9410       _CheckNodeOnline(self, self.op.snode)
9411       _CheckNodeNotDrained(self, self.op.snode)
9412       _CheckNodeVmCapable(self, self.op.snode)
9413       self.secondaries.append(self.op.snode)
9414
9415       snode = self.cfg.GetNodeInfo(self.op.snode)
9416       if pnode.group != snode.group:
9417         self.LogWarning("The primary and secondary nodes are in two"
9418                         " different node groups; the disk parameters"
9419                         " from the first disk's node group will be"
9420                         " used")
9421
9422     nodenames = [pnode.name] + self.secondaries
9423
9424     # disk parameters (not customizable at instance or node level)
9425     # just use the primary node parameters, ignoring the secondary.
9426     self.diskparams = self.cfg.GetNodeGroup(pnode.group).diskparams
9427
9428     if not self.adopt_disks:
9429       # Check lv size requirements, if not adopting
9430       req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
9431       _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
9432
9433     elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
9434       all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
9435                                 disk[constants.IDISK_ADOPT])
9436                      for disk in self.disks])
9437       if len(all_lvs) != len(self.disks):
9438         raise errors.OpPrereqError("Duplicate volume names given for adoption",
9439                                    errors.ECODE_INVAL)
9440       for lv_name in all_lvs:
9441         try:
9442           # FIXME: lv_name here is "vg/lv" need to ensure that other calls
9443           # to ReserveLV uses the same syntax
9444           self.cfg.ReserveLV(lv_name, self.proc.GetECId())
9445         except errors.ReservationError:
9446           raise errors.OpPrereqError("LV named %s used by another instance" %
9447                                      lv_name, errors.ECODE_NOTUNIQUE)
9448
9449       vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
9450       vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
9451
9452       node_lvs = self.rpc.call_lv_list([pnode.name],
9453                                        vg_names.payload.keys())[pnode.name]
9454       node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
9455       node_lvs = node_lvs.payload
9456
9457       delta = all_lvs.difference(node_lvs.keys())
9458       if delta:
9459         raise errors.OpPrereqError("Missing logical volume(s): %s" %
9460                                    utils.CommaJoin(delta),
9461                                    errors.ECODE_INVAL)
9462       online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
9463       if online_lvs:
9464         raise errors.OpPrereqError("Online logical volumes found, cannot"
9465                                    " adopt: %s" % utils.CommaJoin(online_lvs),
9466                                    errors.ECODE_STATE)
9467       # update the size of disk based on what is found
9468       for dsk in self.disks:
9469         dsk[constants.IDISK_SIZE] = \
9470           int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
9471                                         dsk[constants.IDISK_ADOPT])][0]))
9472
9473     elif self.op.disk_template == constants.DT_BLOCK:
9474       # Normalize and de-duplicate device paths
9475       all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
9476                        for disk in self.disks])
9477       if len(all_disks) != len(self.disks):
9478         raise errors.OpPrereqError("Duplicate disk names given for adoption",
9479                                    errors.ECODE_INVAL)
9480       baddisks = [d for d in all_disks
9481                   if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
9482       if baddisks:
9483         raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
9484                                    " cannot be adopted" %
9485                                    (", ".join(baddisks),
9486                                     constants.ADOPTABLE_BLOCKDEV_ROOT),
9487                                    errors.ECODE_INVAL)
9488
9489       node_disks = self.rpc.call_bdev_sizes([pnode.name],
9490                                             list(all_disks))[pnode.name]
9491       node_disks.Raise("Cannot get block device information from node %s" %
9492                        pnode.name)
9493       node_disks = node_disks.payload
9494       delta = all_disks.difference(node_disks.keys())
9495       if delta:
9496         raise errors.OpPrereqError("Missing block device(s): %s" %
9497                                    utils.CommaJoin(delta),
9498                                    errors.ECODE_INVAL)
9499       for dsk in self.disks:
9500         dsk[constants.IDISK_SIZE] = \
9501           int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
9502
9503     _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
9504
9505     _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
9506     # check OS parameters (remotely)
9507     _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
9508
9509     _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
9510
9511     # memory check on primary node
9512     #TODO(dynmem): use MINMEM for checking
9513     if self.op.start:
9514       _CheckNodeFreeMemory(self, self.pnode.name,
9515                            "creating instance %s" % self.op.instance_name,
9516                            self.be_full[constants.BE_MAXMEM],
9517                            self.op.hypervisor)
9518
9519     self.dry_run_result = list(nodenames)
9520
9521   def Exec(self, feedback_fn):
9522     """Create and add the instance to the cluster.
9523
9524     """
9525     instance = self.op.instance_name
9526     pnode_name = self.pnode.name
9527
9528     assert not (self.owned_locks(locking.LEVEL_NODE_RES) -
9529                 self.owned_locks(locking.LEVEL_NODE)), \
9530       "Node locks differ from node resource locks"
9531
9532     ht_kind = self.op.hypervisor
9533     if ht_kind in constants.HTS_REQ_PORT:
9534       network_port = self.cfg.AllocatePort()
9535     else:
9536       network_port = None
9537
9538     disks = _GenerateDiskTemplate(self,
9539                                   self.op.disk_template,
9540                                   instance, pnode_name,
9541                                   self.secondaries,
9542                                   self.disks,
9543                                   self.instance_file_storage_dir,
9544                                   self.op.file_driver,
9545                                   0,
9546                                   feedback_fn,
9547                                   self.diskparams)
9548
9549     iobj = objects.Instance(name=instance, os=self.op.os_type,
9550                             primary_node=pnode_name,
9551                             nics=self.nics, disks=disks,
9552                             disk_template=self.op.disk_template,
9553                             admin_state=constants.ADMINST_DOWN,
9554                             network_port=network_port,
9555                             beparams=self.op.beparams,
9556                             hvparams=self.op.hvparams,
9557                             hypervisor=self.op.hypervisor,
9558                             osparams=self.op.osparams,
9559                             )
9560
9561     if self.op.tags:
9562       for tag in self.op.tags:
9563         iobj.AddTag(tag)
9564
9565     if self.adopt_disks:
9566       if self.op.disk_template == constants.DT_PLAIN:
9567         # rename LVs to the newly-generated names; we need to construct
9568         # 'fake' LV disks with the old data, plus the new unique_id
9569         tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
9570         rename_to = []
9571         for t_dsk, a_dsk in zip(tmp_disks, self.disks):
9572           rename_to.append(t_dsk.logical_id)
9573           t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
9574           self.cfg.SetDiskID(t_dsk, pnode_name)
9575         result = self.rpc.call_blockdev_rename(pnode_name,
9576                                                zip(tmp_disks, rename_to))
9577         result.Raise("Failed to rename adoped LVs")
9578     else:
9579       feedback_fn("* creating instance disks...")
9580       try:
9581         _CreateDisks(self, iobj)
9582       except errors.OpExecError:
9583         self.LogWarning("Device creation failed, reverting...")
9584         try:
9585           _RemoveDisks(self, iobj)
9586         finally:
9587           self.cfg.ReleaseDRBDMinors(instance)
9588           raise
9589
9590     feedback_fn("adding instance %s to cluster config" % instance)
9591
9592     self.cfg.AddInstance(iobj, self.proc.GetECId())
9593
9594     # Declare that we don't want to remove the instance lock anymore, as we've
9595     # added the instance to the config
9596     del self.remove_locks[locking.LEVEL_INSTANCE]
9597
9598     if self.op.mode == constants.INSTANCE_IMPORT:
9599       # Release unused nodes
9600       _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
9601     else:
9602       # Release all nodes
9603       _ReleaseLocks(self, locking.LEVEL_NODE)
9604
9605     disk_abort = False
9606     if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
9607       feedback_fn("* wiping instance disks...")
9608       try:
9609         _WipeDisks(self, iobj)
9610       except errors.OpExecError, err:
9611         logging.exception("Wiping disks failed")
9612         self.LogWarning("Wiping instance disks failed (%s)", err)
9613         disk_abort = True
9614
9615     if disk_abort:
9616       # Something is already wrong with the disks, don't do anything else
9617       pass
9618     elif self.op.wait_for_sync:
9619       disk_abort = not _WaitForSync(self, iobj)
9620     elif iobj.disk_template in constants.DTS_INT_MIRROR:
9621       # make sure the disks are not degraded (still sync-ing is ok)
9622       feedback_fn("* checking mirrors status")
9623       disk_abort = not _WaitForSync(self, iobj, oneshot=True)
9624     else:
9625       disk_abort = False
9626
9627     if disk_abort:
9628       _RemoveDisks(self, iobj)
9629       self.cfg.RemoveInstance(iobj.name)
9630       # Make sure the instance lock gets removed
9631       self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
9632       raise errors.OpExecError("There are some degraded disks for"
9633                                " this instance")
9634
9635     # Release all node resource locks
9636     _ReleaseLocks(self, locking.LEVEL_NODE_RES)
9637
9638     if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
9639       if self.op.mode == constants.INSTANCE_CREATE:
9640         if not self.op.no_install:
9641           pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
9642                         not self.op.wait_for_sync)
9643           if pause_sync:
9644             feedback_fn("* pausing disk sync to install instance OS")
9645             result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
9646                                                               iobj.disks, True)
9647             for idx, success in enumerate(result.payload):
9648               if not success:
9649                 logging.warn("pause-sync of instance %s for disk %d failed",
9650                              instance, idx)
9651
9652           feedback_fn("* running the instance OS create scripts...")
9653           # FIXME: pass debug option from opcode to backend
9654           os_add_result = \
9655             self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
9656                                           self.op.debug_level)
9657           if pause_sync:
9658             feedback_fn("* resuming disk sync")
9659             result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
9660                                                               iobj.disks, False)
9661             for idx, success in enumerate(result.payload):
9662               if not success:
9663                 logging.warn("resume-sync of instance %s for disk %d failed",
9664                              instance, idx)
9665
9666           os_add_result.Raise("Could not add os for instance %s"
9667                               " on node %s" % (instance, pnode_name))
9668
9669       elif self.op.mode == constants.INSTANCE_IMPORT:
9670         feedback_fn("* running the instance OS import scripts...")
9671
9672         transfers = []
9673
9674         for idx, image in enumerate(self.src_images):
9675           if not image:
9676             continue
9677
9678           # FIXME: pass debug option from opcode to backend
9679           dt = masterd.instance.DiskTransfer("disk/%s" % idx,
9680                                              constants.IEIO_FILE, (image, ),
9681                                              constants.IEIO_SCRIPT,
9682                                              (iobj.disks[idx], idx),
9683                                              None)
9684           transfers.append(dt)
9685
9686         import_result = \
9687           masterd.instance.TransferInstanceData(self, feedback_fn,
9688                                                 self.op.src_node, pnode_name,
9689                                                 self.pnode.secondary_ip,
9690                                                 iobj, transfers)
9691         if not compat.all(import_result):
9692           self.LogWarning("Some disks for instance %s on node %s were not"
9693                           " imported successfully" % (instance, pnode_name))
9694
9695       elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9696         feedback_fn("* preparing remote import...")
9697         # The source cluster will stop the instance before attempting to make a
9698         # connection. In some cases stopping an instance can take a long time,
9699         # hence the shutdown timeout is added to the connection timeout.
9700         connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
9701                            self.op.source_shutdown_timeout)
9702         timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
9703
9704         assert iobj.primary_node == self.pnode.name
9705         disk_results = \
9706           masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
9707                                         self.source_x509_ca,
9708                                         self._cds, timeouts)
9709         if not compat.all(disk_results):
9710           # TODO: Should the instance still be started, even if some disks
9711           # failed to import (valid for local imports, too)?
9712           self.LogWarning("Some disks for instance %s on node %s were not"
9713                           " imported successfully" % (instance, pnode_name))
9714
9715         # Run rename script on newly imported instance
9716         assert iobj.name == instance
9717         feedback_fn("Running rename script for %s" % instance)
9718         result = self.rpc.call_instance_run_rename(pnode_name, iobj,
9719                                                    self.source_instance_name,
9720                                                    self.op.debug_level)
9721         if result.fail_msg:
9722           self.LogWarning("Failed to run rename script for %s on node"
9723                           " %s: %s" % (instance, pnode_name, result.fail_msg))
9724
9725       else:
9726         # also checked in the prereq part
9727         raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
9728                                      % self.op.mode)
9729
9730     assert not self.owned_locks(locking.LEVEL_NODE_RES)
9731
9732     if self.op.start:
9733       iobj.admin_state = constants.ADMINST_UP
9734       self.cfg.Update(iobj, feedback_fn)
9735       logging.info("Starting instance %s on node %s", instance, pnode_name)
9736       feedback_fn("* starting instance...")
9737       result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
9738                                             False)
9739       result.Raise("Could not start instance")
9740
9741     return list(iobj.all_nodes)
9742
9743
9744 class LUInstanceConsole(NoHooksLU):
9745   """Connect to an instance's console.
9746
9747   This is somewhat special in that it returns the command line that
9748   you need to run on the master node in order to connect to the
9749   console.
9750
9751   """
9752   REQ_BGL = False
9753
9754   def ExpandNames(self):
9755     self.share_locks = _ShareAll()
9756     self._ExpandAndLockInstance()
9757
9758   def CheckPrereq(self):
9759     """Check prerequisites.
9760
9761     This checks that the instance is in the cluster.
9762
9763     """
9764     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9765     assert self.instance is not None, \
9766       "Cannot retrieve locked instance %s" % self.op.instance_name
9767     _CheckNodeOnline(self, self.instance.primary_node)
9768
9769   def Exec(self, feedback_fn):
9770     """Connect to the console of an instance
9771
9772     """
9773     instance = self.instance
9774     node = instance.primary_node
9775
9776     node_insts = self.rpc.call_instance_list([node],
9777                                              [instance.hypervisor])[node]
9778     node_insts.Raise("Can't get node information from %s" % node)
9779
9780     if instance.name not in node_insts.payload:
9781       if instance.admin_state == constants.ADMINST_UP:
9782         state = constants.INSTST_ERRORDOWN
9783       elif instance.admin_state == constants.ADMINST_DOWN:
9784         state = constants.INSTST_ADMINDOWN
9785       else:
9786         state = constants.INSTST_ADMINOFFLINE
9787       raise errors.OpExecError("Instance %s is not running (state %s)" %
9788                                (instance.name, state))
9789
9790     logging.debug("Connecting to console of %s on %s", instance.name, node)
9791
9792     return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
9793
9794
9795 def _GetInstanceConsole(cluster, instance):
9796   """Returns console information for an instance.
9797
9798   @type cluster: L{objects.Cluster}
9799   @type instance: L{objects.Instance}
9800   @rtype: dict
9801
9802   """
9803   hyper = hypervisor.GetHypervisor(instance.hypervisor)
9804   # beparams and hvparams are passed separately, to avoid editing the
9805   # instance and then saving the defaults in the instance itself.
9806   hvparams = cluster.FillHV(instance)
9807   beparams = cluster.FillBE(instance)
9808   console = hyper.GetInstanceConsole(instance, hvparams, beparams)
9809
9810   assert console.instance == instance.name
9811   assert console.Validate()
9812
9813   return console.ToDict()
9814
9815
9816 class LUInstanceReplaceDisks(LogicalUnit):
9817   """Replace the disks of an instance.
9818
9819   """
9820   HPATH = "mirrors-replace"
9821   HTYPE = constants.HTYPE_INSTANCE
9822   REQ_BGL = False
9823
9824   def CheckArguments(self):
9825     TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
9826                                   self.op.iallocator)
9827
9828   def ExpandNames(self):
9829     self._ExpandAndLockInstance()
9830
9831     assert locking.LEVEL_NODE not in self.needed_locks
9832     assert locking.LEVEL_NODE_RES not in self.needed_locks
9833     assert locking.LEVEL_NODEGROUP not in self.needed_locks
9834
9835     assert self.op.iallocator is None or self.op.remote_node is None, \
9836       "Conflicting options"
9837
9838     if self.op.remote_node is not None:
9839       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9840
9841       # Warning: do not remove the locking of the new secondary here
9842       # unless DRBD8.AddChildren is changed to work in parallel;
9843       # currently it doesn't since parallel invocations of
9844       # FindUnusedMinor will conflict
9845       self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
9846       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
9847     else:
9848       self.needed_locks[locking.LEVEL_NODE] = []
9849       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9850
9851       if self.op.iallocator is not None:
9852         # iallocator will select a new node in the same group
9853         self.needed_locks[locking.LEVEL_NODEGROUP] = []
9854
9855     self.needed_locks[locking.LEVEL_NODE_RES] = []
9856
9857     self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
9858                                    self.op.iallocator, self.op.remote_node,
9859                                    self.op.disks, False, self.op.early_release)
9860
9861     self.tasklets = [self.replacer]
9862
9863   def DeclareLocks(self, level):
9864     if level == locking.LEVEL_NODEGROUP:
9865       assert self.op.remote_node is None
9866       assert self.op.iallocator is not None
9867       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
9868
9869       self.share_locks[locking.LEVEL_NODEGROUP] = 1
9870       # Lock all groups used by instance optimistically; this requires going
9871       # via the node before it's locked, requiring verification later on
9872       self.needed_locks[locking.LEVEL_NODEGROUP] = \
9873         self.cfg.GetInstanceNodeGroups(self.op.instance_name)
9874
9875     elif level == locking.LEVEL_NODE:
9876       if self.op.iallocator is not None:
9877         assert self.op.remote_node is None
9878         assert not self.needed_locks[locking.LEVEL_NODE]
9879
9880         # Lock member nodes of all locked groups
9881         self.needed_locks[locking.LEVEL_NODE] = [node_name
9882           for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
9883           for node_name in self.cfg.GetNodeGroup(group_uuid).members]
9884       else:
9885         self._LockInstancesNodes()
9886     elif level == locking.LEVEL_NODE_RES:
9887       # Reuse node locks
9888       self.needed_locks[locking.LEVEL_NODE_RES] = \
9889         self.needed_locks[locking.LEVEL_NODE]
9890
9891   def BuildHooksEnv(self):
9892     """Build hooks env.
9893
9894     This runs on the master, the primary and all the secondaries.
9895
9896     """
9897     instance = self.replacer.instance
9898     env = {
9899       "MODE": self.op.mode,
9900       "NEW_SECONDARY": self.op.remote_node,
9901       "OLD_SECONDARY": instance.secondary_nodes[0],
9902       }
9903     env.update(_BuildInstanceHookEnvByObject(self, instance))
9904     return env
9905
9906   def BuildHooksNodes(self):
9907     """Build hooks nodes.
9908
9909     """
9910     instance = self.replacer.instance
9911     nl = [
9912       self.cfg.GetMasterNode(),
9913       instance.primary_node,
9914       ]
9915     if self.op.remote_node is not None:
9916       nl.append(self.op.remote_node)
9917     return nl, nl
9918
9919   def CheckPrereq(self):
9920     """Check prerequisites.
9921
9922     """
9923     assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
9924             self.op.iallocator is None)
9925
9926     # Verify if node group locks are still correct
9927     owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
9928     if owned_groups:
9929       _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
9930
9931     return LogicalUnit.CheckPrereq(self)
9932
9933
9934 class TLReplaceDisks(Tasklet):
9935   """Replaces disks for an instance.
9936
9937   Note: Locking is not within the scope of this class.
9938
9939   """
9940   def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
9941                disks, delay_iallocator, early_release):
9942     """Initializes this class.
9943
9944     """
9945     Tasklet.__init__(self, lu)
9946
9947     # Parameters
9948     self.instance_name = instance_name
9949     self.mode = mode
9950     self.iallocator_name = iallocator_name
9951     self.remote_node = remote_node
9952     self.disks = disks
9953     self.delay_iallocator = delay_iallocator
9954     self.early_release = early_release
9955
9956     # Runtime data
9957     self.instance = None
9958     self.new_node = None
9959     self.target_node = None
9960     self.other_node = None
9961     self.remote_node_info = None
9962     self.node_secondary_ip = None
9963
9964   @staticmethod
9965   def CheckArguments(mode, remote_node, iallocator):
9966     """Helper function for users of this class.
9967
9968     """
9969     # check for valid parameter combination
9970     if mode == constants.REPLACE_DISK_CHG:
9971       if remote_node is None and iallocator is None:
9972         raise errors.OpPrereqError("When changing the secondary either an"
9973                                    " iallocator script must be used or the"
9974                                    " new node given", errors.ECODE_INVAL)
9975
9976       if remote_node is not None and iallocator is not None:
9977         raise errors.OpPrereqError("Give either the iallocator or the new"
9978                                    " secondary, not both", errors.ECODE_INVAL)
9979
9980     elif remote_node is not None or iallocator is not None:
9981       # Not replacing the secondary
9982       raise errors.OpPrereqError("The iallocator and new node options can"
9983                                  " only be used when changing the"
9984                                  " secondary node", errors.ECODE_INVAL)
9985
9986   @staticmethod
9987   def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
9988     """Compute a new secondary node using an IAllocator.
9989
9990     """
9991     ial = IAllocator(lu.cfg, lu.rpc,
9992                      mode=constants.IALLOCATOR_MODE_RELOC,
9993                      name=instance_name,
9994                      relocate_from=list(relocate_from))
9995
9996     ial.Run(iallocator_name)
9997
9998     if not ial.success:
9999       raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
10000                                  " %s" % (iallocator_name, ial.info),
10001                                  errors.ECODE_NORES)
10002
10003     if len(ial.result) != ial.required_nodes:
10004       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
10005                                  " of nodes (%s), required %s" %
10006                                  (iallocator_name,
10007                                   len(ial.result), ial.required_nodes),
10008                                  errors.ECODE_FAULT)
10009
10010     remote_node_name = ial.result[0]
10011
10012     lu.LogInfo("Selected new secondary for instance '%s': %s",
10013                instance_name, remote_node_name)
10014
10015     return remote_node_name
10016
10017   def _FindFaultyDisks(self, node_name):
10018     """Wrapper for L{_FindFaultyInstanceDisks}.
10019
10020     """
10021     return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
10022                                     node_name, True)
10023
10024   def _CheckDisksActivated(self, instance):
10025     """Checks if the instance disks are activated.
10026
10027     @param instance: The instance to check disks
10028     @return: True if they are activated, False otherwise
10029
10030     """
10031     nodes = instance.all_nodes
10032
10033     for idx, dev in enumerate(instance.disks):
10034       for node in nodes:
10035         self.lu.LogInfo("Checking disk/%d on %s", idx, node)
10036         self.cfg.SetDiskID(dev, node)
10037
10038         result = self.rpc.call_blockdev_find(node, dev)
10039
10040         if result.offline:
10041           continue
10042         elif result.fail_msg or not result.payload:
10043           return False
10044
10045     return True
10046
10047   def CheckPrereq(self):
10048     """Check prerequisites.
10049
10050     This checks that the instance is in the cluster.
10051
10052     """
10053     self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
10054     assert instance is not None, \
10055       "Cannot retrieve locked instance %s" % self.instance_name
10056
10057     if instance.disk_template != constants.DT_DRBD8:
10058       raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
10059                                  " instances", errors.ECODE_INVAL)
10060
10061     if len(instance.secondary_nodes) != 1:
10062       raise errors.OpPrereqError("The instance has a strange layout,"
10063                                  " expected one secondary but found %d" %
10064                                  len(instance.secondary_nodes),
10065                                  errors.ECODE_FAULT)
10066
10067     if not self.delay_iallocator:
10068       self._CheckPrereq2()
10069
10070   def _CheckPrereq2(self):
10071     """Check prerequisites, second part.
10072
10073     This function should always be part of CheckPrereq. It was separated and is
10074     now called from Exec because during node evacuation iallocator was only
10075     called with an unmodified cluster model, not taking planned changes into
10076     account.
10077
10078     """
10079     instance = self.instance
10080     secondary_node = instance.secondary_nodes[0]
10081
10082     if self.iallocator_name is None:
10083       remote_node = self.remote_node
10084     else:
10085       remote_node = self._RunAllocator(self.lu, self.iallocator_name,
10086                                        instance.name, instance.secondary_nodes)
10087
10088     if remote_node is None:
10089       self.remote_node_info = None
10090     else:
10091       assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
10092              "Remote node '%s' is not locked" % remote_node
10093
10094       self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
10095       assert self.remote_node_info is not None, \
10096         "Cannot retrieve locked node %s" % remote_node
10097
10098     if remote_node == self.instance.primary_node:
10099       raise errors.OpPrereqError("The specified node is the primary node of"
10100                                  " the instance", errors.ECODE_INVAL)
10101
10102     if remote_node == secondary_node:
10103       raise errors.OpPrereqError("The specified node is already the"
10104                                  " secondary node of the instance",
10105                                  errors.ECODE_INVAL)
10106
10107     if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
10108                                     constants.REPLACE_DISK_CHG):
10109       raise errors.OpPrereqError("Cannot specify disks to be replaced",
10110                                  errors.ECODE_INVAL)
10111
10112     if self.mode == constants.REPLACE_DISK_AUTO:
10113       if not self._CheckDisksActivated(instance):
10114         raise errors.OpPrereqError("Please run activate-disks on instance %s"
10115                                    " first" % self.instance_name,
10116                                    errors.ECODE_STATE)
10117       faulty_primary = self._FindFaultyDisks(instance.primary_node)
10118       faulty_secondary = self._FindFaultyDisks(secondary_node)
10119
10120       if faulty_primary and faulty_secondary:
10121         raise errors.OpPrereqError("Instance %s has faulty disks on more than"
10122                                    " one node and can not be repaired"
10123                                    " automatically" % self.instance_name,
10124                                    errors.ECODE_STATE)
10125
10126       if faulty_primary:
10127         self.disks = faulty_primary
10128         self.target_node = instance.primary_node
10129         self.other_node = secondary_node
10130         check_nodes = [self.target_node, self.other_node]
10131       elif faulty_secondary:
10132         self.disks = faulty_secondary
10133         self.target_node = secondary_node
10134         self.other_node = instance.primary_node
10135         check_nodes = [self.target_node, self.other_node]
10136       else:
10137         self.disks = []
10138         check_nodes = []
10139
10140     else:
10141       # Non-automatic modes
10142       if self.mode == constants.REPLACE_DISK_PRI:
10143         self.target_node = instance.primary_node
10144         self.other_node = secondary_node
10145         check_nodes = [self.target_node, self.other_node]
10146
10147       elif self.mode == constants.REPLACE_DISK_SEC:
10148         self.target_node = secondary_node
10149         self.other_node = instance.primary_node
10150         check_nodes = [self.target_node, self.other_node]
10151
10152       elif self.mode == constants.REPLACE_DISK_CHG:
10153         self.new_node = remote_node
10154         self.other_node = instance.primary_node
10155         self.target_node = secondary_node
10156         check_nodes = [self.new_node, self.other_node]
10157
10158         _CheckNodeNotDrained(self.lu, remote_node)
10159         _CheckNodeVmCapable(self.lu, remote_node)
10160
10161         old_node_info = self.cfg.GetNodeInfo(secondary_node)
10162         assert old_node_info is not None
10163         if old_node_info.offline and not self.early_release:
10164           # doesn't make sense to delay the release
10165           self.early_release = True
10166           self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
10167                           " early-release mode", secondary_node)
10168
10169       else:
10170         raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
10171                                      self.mode)
10172
10173       # If not specified all disks should be replaced
10174       if not self.disks:
10175         self.disks = range(len(self.instance.disks))
10176
10177     # TODO: compute disk parameters
10178     primary_node_info = self.cfg.GetNodeInfo(instance.primary_node)
10179     secondary_node_info = self.cfg.GetNodeInfo(secondary_node)
10180     if primary_node_info.group != secondary_node_info.group:
10181       self.lu.LogInfo("The instance primary and secondary nodes are in two"
10182                       " different node groups; the disk parameters of the"
10183                       " primary node's group will be applied.")
10184
10185     self.diskparams = self.cfg.GetNodeGroup(primary_node_info.group).diskparams
10186
10187     for node in check_nodes:
10188       _CheckNodeOnline(self.lu, node)
10189
10190     touched_nodes = frozenset(node_name for node_name in [self.new_node,
10191                                                           self.other_node,
10192                                                           self.target_node]
10193                               if node_name is not None)
10194
10195     # Release unneeded node and node resource locks
10196     _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
10197     _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, keep=touched_nodes)
10198
10199     # Release any owned node group
10200     if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
10201       _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
10202
10203     # Check whether disks are valid
10204     for disk_idx in self.disks:
10205       instance.FindDisk(disk_idx)
10206
10207     # Get secondary node IP addresses
10208     self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
10209                                   in self.cfg.GetMultiNodeInfo(touched_nodes))
10210
10211   def Exec(self, feedback_fn):
10212     """Execute disk replacement.
10213
10214     This dispatches the disk replacement to the appropriate handler.
10215
10216     """
10217     if self.delay_iallocator:
10218       self._CheckPrereq2()
10219
10220     if __debug__:
10221       # Verify owned locks before starting operation
10222       owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
10223       assert set(owned_nodes) == set(self.node_secondary_ip), \
10224           ("Incorrect node locks, owning %s, expected %s" %
10225            (owned_nodes, self.node_secondary_ip.keys()))
10226       assert (self.lu.owned_locks(locking.LEVEL_NODE) ==
10227               self.lu.owned_locks(locking.LEVEL_NODE_RES))
10228
10229       owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
10230       assert list(owned_instances) == [self.instance_name], \
10231           "Instance '%s' not locked" % self.instance_name
10232
10233       assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
10234           "Should not own any node group lock at this point"
10235
10236     if not self.disks:
10237       feedback_fn("No disks need replacement")
10238       return
10239
10240     feedback_fn("Replacing disk(s) %s for %s" %
10241                 (utils.CommaJoin(self.disks), self.instance.name))
10242
10243     activate_disks = (self.instance.admin_state != constants.ADMINST_UP)
10244
10245     # Activate the instance disks if we're replacing them on a down instance
10246     if activate_disks:
10247       _StartInstanceDisks(self.lu, self.instance, True)
10248
10249     try:
10250       # Should we replace the secondary node?
10251       if self.new_node is not None:
10252         fn = self._ExecDrbd8Secondary
10253       else:
10254         fn = self._ExecDrbd8DiskOnly
10255
10256       result = fn(feedback_fn)
10257     finally:
10258       # Deactivate the instance disks if we're replacing them on a
10259       # down instance
10260       if activate_disks:
10261         _SafeShutdownInstanceDisks(self.lu, self.instance)
10262
10263     assert not self.lu.owned_locks(locking.LEVEL_NODE)
10264
10265     if __debug__:
10266       # Verify owned locks
10267       owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE_RES)
10268       nodes = frozenset(self.node_secondary_ip)
10269       assert ((self.early_release and not owned_nodes) or
10270               (not self.early_release and not (set(owned_nodes) - nodes))), \
10271         ("Not owning the correct locks, early_release=%s, owned=%r,"
10272          " nodes=%r" % (self.early_release, owned_nodes, nodes))
10273
10274     return result
10275
10276   def _CheckVolumeGroup(self, nodes):
10277     self.lu.LogInfo("Checking volume groups")
10278
10279     vgname = self.cfg.GetVGName()
10280
10281     # Make sure volume group exists on all involved nodes
10282     results = self.rpc.call_vg_list(nodes)
10283     if not results:
10284       raise errors.OpExecError("Can't list volume groups on the nodes")
10285
10286     for node in nodes:
10287       res = results[node]
10288       res.Raise("Error checking node %s" % node)
10289       if vgname not in res.payload:
10290         raise errors.OpExecError("Volume group '%s' not found on node %s" %
10291                                  (vgname, node))
10292
10293   def _CheckDisksExistence(self, nodes):
10294     # Check disk existence
10295     for idx, dev in enumerate(self.instance.disks):
10296       if idx not in self.disks:
10297         continue
10298
10299       for node in nodes:
10300         self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
10301         self.cfg.SetDiskID(dev, node)
10302
10303         result = self.rpc.call_blockdev_find(node, dev)
10304
10305         msg = result.fail_msg
10306         if msg or not result.payload:
10307           if not msg:
10308             msg = "disk not found"
10309           raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
10310                                    (idx, node, msg))
10311
10312   def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
10313     for idx, dev in enumerate(self.instance.disks):
10314       if idx not in self.disks:
10315         continue
10316
10317       self.lu.LogInfo("Checking disk/%d consistency on node %s" %
10318                       (idx, node_name))
10319
10320       if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
10321                                    ldisk=ldisk):
10322         raise errors.OpExecError("Node %s has degraded storage, unsafe to"
10323                                  " replace disks for instance %s" %
10324                                  (node_name, self.instance.name))
10325
10326   def _CreateNewStorage(self, node_name):
10327     """Create new storage on the primary or secondary node.
10328
10329     This is only used for same-node replaces, not for changing the
10330     secondary node, hence we don't want to modify the existing disk.
10331
10332     """
10333     iv_names = {}
10334
10335     for idx, dev in enumerate(self.instance.disks):
10336       if idx not in self.disks:
10337         continue
10338
10339       self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
10340
10341       self.cfg.SetDiskID(dev, node_name)
10342
10343       lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
10344       names = _GenerateUniqueNames(self.lu, lv_names)
10345
10346       _, data_p, meta_p = _ComputeLDParams(constants.DT_DRBD8, self.diskparams)
10347
10348       vg_data = dev.children[0].logical_id[0]
10349       lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
10350                              logical_id=(vg_data, names[0]), params=data_p)
10351       vg_meta = dev.children[1].logical_id[0]
10352       lv_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
10353                              logical_id=(vg_meta, names[1]), params=meta_p)
10354
10355       new_lvs = [lv_data, lv_meta]
10356       old_lvs = [child.Copy() for child in dev.children]
10357       iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
10358
10359       # we pass force_create=True to force the LVM creation
10360       for new_lv in new_lvs:
10361         _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
10362                         _GetInstanceInfoText(self.instance), False)
10363
10364     return iv_names
10365
10366   def _CheckDevices(self, node_name, iv_names):
10367     for name, (dev, _, _) in iv_names.iteritems():
10368       self.cfg.SetDiskID(dev, node_name)
10369
10370       result = self.rpc.call_blockdev_find(node_name, dev)
10371
10372       msg = result.fail_msg
10373       if msg or not result.payload:
10374         if not msg:
10375           msg = "disk not found"
10376         raise errors.OpExecError("Can't find DRBD device %s: %s" %
10377                                  (name, msg))
10378
10379       if result.payload.is_degraded:
10380         raise errors.OpExecError("DRBD device %s is degraded!" % name)
10381
10382   def _RemoveOldStorage(self, node_name, iv_names):
10383     for name, (_, old_lvs, _) in iv_names.iteritems():
10384       self.lu.LogInfo("Remove logical volumes for %s" % name)
10385
10386       for lv in old_lvs:
10387         self.cfg.SetDiskID(lv, node_name)
10388
10389         msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
10390         if msg:
10391           self.lu.LogWarning("Can't remove old LV: %s" % msg,
10392                              hint="remove unused LVs manually")
10393
10394   def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
10395     """Replace a disk on the primary or secondary for DRBD 8.
10396
10397     The algorithm for replace is quite complicated:
10398
10399       1. for each disk to be replaced:
10400
10401         1. create new LVs on the target node with unique names
10402         1. detach old LVs from the drbd device
10403         1. rename old LVs to name_replaced.<time_t>
10404         1. rename new LVs to old LVs
10405         1. attach the new LVs (with the old names now) to the drbd device
10406
10407       1. wait for sync across all devices
10408
10409       1. for each modified disk:
10410
10411         1. remove old LVs (which have the name name_replaces.<time_t>)
10412
10413     Failures are not very well handled.
10414
10415     """
10416     steps_total = 6
10417
10418     # Step: check device activation
10419     self.lu.LogStep(1, steps_total, "Check device existence")
10420     self._CheckDisksExistence([self.other_node, self.target_node])
10421     self._CheckVolumeGroup([self.target_node, self.other_node])
10422
10423     # Step: check other node consistency
10424     self.lu.LogStep(2, steps_total, "Check peer consistency")
10425     self._CheckDisksConsistency(self.other_node,
10426                                 self.other_node == self.instance.primary_node,
10427                                 False)
10428
10429     # Step: create new storage
10430     self.lu.LogStep(3, steps_total, "Allocate new storage")
10431     iv_names = self._CreateNewStorage(self.target_node)
10432
10433     # Step: for each lv, detach+rename*2+attach
10434     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
10435     for dev, old_lvs, new_lvs in iv_names.itervalues():
10436       self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
10437
10438       result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
10439                                                      old_lvs)
10440       result.Raise("Can't detach drbd from local storage on node"
10441                    " %s for device %s" % (self.target_node, dev.iv_name))
10442       #dev.children = []
10443       #cfg.Update(instance)
10444
10445       # ok, we created the new LVs, so now we know we have the needed
10446       # storage; as such, we proceed on the target node to rename
10447       # old_lv to _old, and new_lv to old_lv; note that we rename LVs
10448       # using the assumption that logical_id == physical_id (which in
10449       # turn is the unique_id on that node)
10450
10451       # FIXME(iustin): use a better name for the replaced LVs
10452       temp_suffix = int(time.time())
10453       ren_fn = lambda d, suff: (d.physical_id[0],
10454                                 d.physical_id[1] + "_replaced-%s" % suff)
10455
10456       # Build the rename list based on what LVs exist on the node
10457       rename_old_to_new = []
10458       for to_ren in old_lvs:
10459         result = self.rpc.call_blockdev_find(self.target_node, to_ren)
10460         if not result.fail_msg and result.payload:
10461           # device exists
10462           rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
10463
10464       self.lu.LogInfo("Renaming the old LVs on the target node")
10465       result = self.rpc.call_blockdev_rename(self.target_node,
10466                                              rename_old_to_new)
10467       result.Raise("Can't rename old LVs on node %s" % self.target_node)
10468
10469       # Now we rename the new LVs to the old LVs
10470       self.lu.LogInfo("Renaming the new LVs on the target node")
10471       rename_new_to_old = [(new, old.physical_id)
10472                            for old, new in zip(old_lvs, new_lvs)]
10473       result = self.rpc.call_blockdev_rename(self.target_node,
10474                                              rename_new_to_old)
10475       result.Raise("Can't rename new LVs on node %s" % self.target_node)
10476
10477       # Intermediate steps of in memory modifications
10478       for old, new in zip(old_lvs, new_lvs):
10479         new.logical_id = old.logical_id
10480         self.cfg.SetDiskID(new, self.target_node)
10481
10482       # We need to modify old_lvs so that removal later removes the
10483       # right LVs, not the newly added ones; note that old_lvs is a
10484       # copy here
10485       for disk in old_lvs:
10486         disk.logical_id = ren_fn(disk, temp_suffix)
10487         self.cfg.SetDiskID(disk, self.target_node)
10488
10489       # Now that the new lvs have the old name, we can add them to the device
10490       self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
10491       result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
10492                                                   new_lvs)
10493       msg = result.fail_msg
10494       if msg:
10495         for new_lv in new_lvs:
10496           msg2 = self.rpc.call_blockdev_remove(self.target_node,
10497                                                new_lv).fail_msg
10498           if msg2:
10499             self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
10500                                hint=("cleanup manually the unused logical"
10501                                      "volumes"))
10502         raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
10503
10504     cstep = itertools.count(5)
10505
10506     if self.early_release:
10507       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
10508       self._RemoveOldStorage(self.target_node, iv_names)
10509       # TODO: Check if releasing locks early still makes sense
10510       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
10511     else:
10512       # Release all resource locks except those used by the instance
10513       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
10514                     keep=self.node_secondary_ip.keys())
10515
10516     # Release all node locks while waiting for sync
10517     _ReleaseLocks(self.lu, locking.LEVEL_NODE)
10518
10519     # TODO: Can the instance lock be downgraded here? Take the optional disk
10520     # shutdown in the caller into consideration.
10521
10522     # Wait for sync
10523     # This can fail as the old devices are degraded and _WaitForSync
10524     # does a combined result over all disks, so we don't check its return value
10525     self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
10526     _WaitForSync(self.lu, self.instance)
10527
10528     # Check all devices manually
10529     self._CheckDevices(self.instance.primary_node, iv_names)
10530
10531     # Step: remove old storage
10532     if not self.early_release:
10533       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
10534       self._RemoveOldStorage(self.target_node, iv_names)
10535
10536   def _ExecDrbd8Secondary(self, feedback_fn):
10537     """Replace the secondary node for DRBD 8.
10538
10539     The algorithm for replace is quite complicated:
10540       - for all disks of the instance:
10541         - create new LVs on the new node with same names
10542         - shutdown the drbd device on the old secondary
10543         - disconnect the drbd network on the primary
10544         - create the drbd device on the new secondary
10545         - network attach the drbd on the primary, using an artifice:
10546           the drbd code for Attach() will connect to the network if it
10547           finds a device which is connected to the good local disks but
10548           not network enabled
10549       - wait for sync across all devices
10550       - remove all disks from the old secondary
10551
10552     Failures are not very well handled.
10553
10554     """
10555     steps_total = 6
10556
10557     pnode = self.instance.primary_node
10558
10559     # Step: check device activation
10560     self.lu.LogStep(1, steps_total, "Check device existence")
10561     self._CheckDisksExistence([self.instance.primary_node])
10562     self._CheckVolumeGroup([self.instance.primary_node])
10563
10564     # Step: check other node consistency
10565     self.lu.LogStep(2, steps_total, "Check peer consistency")
10566     self._CheckDisksConsistency(self.instance.primary_node, True, True)
10567
10568     # Step: create new storage
10569     self.lu.LogStep(3, steps_total, "Allocate new storage")
10570     for idx, dev in enumerate(self.instance.disks):
10571       self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
10572                       (self.new_node, idx))
10573       # we pass force_create=True to force LVM creation
10574       for new_lv in dev.children:
10575         _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
10576                         _GetInstanceInfoText(self.instance), False)
10577
10578     # Step 4: dbrd minors and drbd setups changes
10579     # after this, we must manually remove the drbd minors on both the
10580     # error and the success paths
10581     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
10582     minors = self.cfg.AllocateDRBDMinor([self.new_node
10583                                          for dev in self.instance.disks],
10584                                         self.instance.name)
10585     logging.debug("Allocated minors %r", minors)
10586
10587     iv_names = {}
10588     for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
10589       self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
10590                       (self.new_node, idx))
10591       # create new devices on new_node; note that we create two IDs:
10592       # one without port, so the drbd will be activated without
10593       # networking information on the new node at this stage, and one
10594       # with network, for the latter activation in step 4
10595       (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
10596       if self.instance.primary_node == o_node1:
10597         p_minor = o_minor1
10598       else:
10599         assert self.instance.primary_node == o_node2, "Three-node instance?"
10600         p_minor = o_minor2
10601
10602       new_alone_id = (self.instance.primary_node, self.new_node, None,
10603                       p_minor, new_minor, o_secret)
10604       new_net_id = (self.instance.primary_node, self.new_node, o_port,
10605                     p_minor, new_minor, o_secret)
10606
10607       iv_names[idx] = (dev, dev.children, new_net_id)
10608       logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
10609                     new_net_id)
10610       drbd_params, _, _ = _ComputeLDParams(constants.DT_DRBD8, self.diskparams)
10611       new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
10612                               logical_id=new_alone_id,
10613                               children=dev.children,
10614                               size=dev.size,
10615                               params=drbd_params)
10616       try:
10617         _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
10618                               _GetInstanceInfoText(self.instance), False)
10619       except errors.GenericError:
10620         self.cfg.ReleaseDRBDMinors(self.instance.name)
10621         raise
10622
10623     # We have new devices, shutdown the drbd on the old secondary
10624     for idx, dev in enumerate(self.instance.disks):
10625       self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
10626       self.cfg.SetDiskID(dev, self.target_node)
10627       msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
10628       if msg:
10629         self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
10630                            "node: %s" % (idx, msg),
10631                            hint=("Please cleanup this device manually as"
10632                                  " soon as possible"))
10633
10634     self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
10635     result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
10636                                                self.instance.disks)[pnode]
10637
10638     msg = result.fail_msg
10639     if msg:
10640       # detaches didn't succeed (unlikely)
10641       self.cfg.ReleaseDRBDMinors(self.instance.name)
10642       raise errors.OpExecError("Can't detach the disks from the network on"
10643                                " old node: %s" % (msg,))
10644
10645     # if we managed to detach at least one, we update all the disks of
10646     # the instance to point to the new secondary
10647     self.lu.LogInfo("Updating instance configuration")
10648     for dev, _, new_logical_id in iv_names.itervalues():
10649       dev.logical_id = new_logical_id
10650       self.cfg.SetDiskID(dev, self.instance.primary_node)
10651
10652     self.cfg.Update(self.instance, feedback_fn)
10653
10654     # Release all node locks (the configuration has been updated)
10655     _ReleaseLocks(self.lu, locking.LEVEL_NODE)
10656
10657     # and now perform the drbd attach
10658     self.lu.LogInfo("Attaching primary drbds to new secondary"
10659                     " (standalone => connected)")
10660     result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
10661                                             self.new_node],
10662                                            self.node_secondary_ip,
10663                                            self.instance.disks,
10664                                            self.instance.name,
10665                                            False)
10666     for to_node, to_result in result.items():
10667       msg = to_result.fail_msg
10668       if msg:
10669         self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
10670                            to_node, msg,
10671                            hint=("please do a gnt-instance info to see the"
10672                                  " status of disks"))
10673
10674     cstep = itertools.count(5)
10675
10676     if self.early_release:
10677       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
10678       self._RemoveOldStorage(self.target_node, iv_names)
10679       # TODO: Check if releasing locks early still makes sense
10680       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
10681     else:
10682       # Release all resource locks except those used by the instance
10683       _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
10684                     keep=self.node_secondary_ip.keys())
10685
10686     # TODO: Can the instance lock be downgraded here? Take the optional disk
10687     # shutdown in the caller into consideration.
10688
10689     # Wait for sync
10690     # This can fail as the old devices are degraded and _WaitForSync
10691     # does a combined result over all disks, so we don't check its return value
10692     self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
10693     _WaitForSync(self.lu, self.instance)
10694
10695     # Check all devices manually
10696     self._CheckDevices(self.instance.primary_node, iv_names)
10697
10698     # Step: remove old storage
10699     if not self.early_release:
10700       self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
10701       self._RemoveOldStorage(self.target_node, iv_names)
10702
10703
10704 class LURepairNodeStorage(NoHooksLU):
10705   """Repairs the volume group on a node.
10706
10707   """
10708   REQ_BGL = False
10709
10710   def CheckArguments(self):
10711     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
10712
10713     storage_type = self.op.storage_type
10714
10715     if (constants.SO_FIX_CONSISTENCY not in
10716         constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
10717       raise errors.OpPrereqError("Storage units of type '%s' can not be"
10718                                  " repaired" % storage_type,
10719                                  errors.ECODE_INVAL)
10720
10721   def ExpandNames(self):
10722     self.needed_locks = {
10723       locking.LEVEL_NODE: [self.op.node_name],
10724       }
10725
10726   def _CheckFaultyDisks(self, instance, node_name):
10727     """Ensure faulty disks abort the opcode or at least warn."""
10728     try:
10729       if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
10730                                   node_name, True):
10731         raise errors.OpPrereqError("Instance '%s' has faulty disks on"
10732                                    " node '%s'" % (instance.name, node_name),
10733                                    errors.ECODE_STATE)
10734     except errors.OpPrereqError, err:
10735       if self.op.ignore_consistency:
10736         self.proc.LogWarning(str(err.args[0]))
10737       else:
10738         raise
10739
10740   def CheckPrereq(self):
10741     """Check prerequisites.
10742
10743     """
10744     # Check whether any instance on this node has faulty disks
10745     for inst in _GetNodeInstances(self.cfg, self.op.node_name):
10746       if inst.admin_state != constants.ADMINST_UP:
10747         continue
10748       check_nodes = set(inst.all_nodes)
10749       check_nodes.discard(self.op.node_name)
10750       for inst_node_name in check_nodes:
10751         self._CheckFaultyDisks(inst, inst_node_name)
10752
10753   def Exec(self, feedback_fn):
10754     feedback_fn("Repairing storage unit '%s' on %s ..." %
10755                 (self.op.name, self.op.node_name))
10756
10757     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
10758     result = self.rpc.call_storage_execute(self.op.node_name,
10759                                            self.op.storage_type, st_args,
10760                                            self.op.name,
10761                                            constants.SO_FIX_CONSISTENCY)
10762     result.Raise("Failed to repair storage unit '%s' on %s" %
10763                  (self.op.name, self.op.node_name))
10764
10765
10766 class LUNodeEvacuate(NoHooksLU):
10767   """Evacuates instances off a list of nodes.
10768
10769   """
10770   REQ_BGL = False
10771
10772   _MODE2IALLOCATOR = {
10773     constants.NODE_EVAC_PRI: constants.IALLOCATOR_NEVAC_PRI,
10774     constants.NODE_EVAC_SEC: constants.IALLOCATOR_NEVAC_SEC,
10775     constants.NODE_EVAC_ALL: constants.IALLOCATOR_NEVAC_ALL,
10776     }
10777   assert frozenset(_MODE2IALLOCATOR.keys()) == constants.NODE_EVAC_MODES
10778   assert (frozenset(_MODE2IALLOCATOR.values()) ==
10779           constants.IALLOCATOR_NEVAC_MODES)
10780
10781   def CheckArguments(self):
10782     _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
10783
10784   def ExpandNames(self):
10785     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
10786
10787     if self.op.remote_node is not None:
10788       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10789       assert self.op.remote_node
10790
10791       if self.op.remote_node == self.op.node_name:
10792         raise errors.OpPrereqError("Can not use evacuated node as a new"
10793                                    " secondary node", errors.ECODE_INVAL)
10794
10795       if self.op.mode != constants.NODE_EVAC_SEC:
10796         raise errors.OpPrereqError("Without the use of an iallocator only"
10797                                    " secondary instances can be evacuated",
10798                                    errors.ECODE_INVAL)
10799
10800     # Declare locks
10801     self.share_locks = _ShareAll()
10802     self.needed_locks = {
10803       locking.LEVEL_INSTANCE: [],
10804       locking.LEVEL_NODEGROUP: [],
10805       locking.LEVEL_NODE: [],
10806       }
10807
10808     # Determine nodes (via group) optimistically, needs verification once locks
10809     # have been acquired
10810     self.lock_nodes = self._DetermineNodes()
10811
10812   def _DetermineNodes(self):
10813     """Gets the list of nodes to operate on.
10814
10815     """
10816     if self.op.remote_node is None:
10817       # Iallocator will choose any node(s) in the same group
10818       group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
10819     else:
10820       group_nodes = frozenset([self.op.remote_node])
10821
10822     # Determine nodes to be locked
10823     return set([self.op.node_name]) | group_nodes
10824
10825   def _DetermineInstances(self):
10826     """Builds list of instances to operate on.
10827
10828     """
10829     assert self.op.mode in constants.NODE_EVAC_MODES
10830
10831     if self.op.mode == constants.NODE_EVAC_PRI:
10832       # Primary instances only
10833       inst_fn = _GetNodePrimaryInstances
10834       assert self.op.remote_node is None, \
10835         "Evacuating primary instances requires iallocator"
10836     elif self.op.mode == constants.NODE_EVAC_SEC:
10837       # Secondary instances only
10838       inst_fn = _GetNodeSecondaryInstances
10839     else:
10840       # All instances
10841       assert self.op.mode == constants.NODE_EVAC_ALL
10842       inst_fn = _GetNodeInstances
10843       # TODO: In 2.6, change the iallocator interface to take an evacuation mode
10844       # per instance
10845       raise errors.OpPrereqError("Due to an issue with the iallocator"
10846                                  " interface it is not possible to evacuate"
10847                                  " all instances at once; specify explicitly"
10848                                  " whether to evacuate primary or secondary"
10849                                  " instances",
10850                                  errors.ECODE_INVAL)
10851
10852     return inst_fn(self.cfg, self.op.node_name)
10853
10854   def DeclareLocks(self, level):
10855     if level == locking.LEVEL_INSTANCE:
10856       # Lock instances optimistically, needs verification once node and group
10857       # locks have been acquired
10858       self.needed_locks[locking.LEVEL_INSTANCE] = \
10859         set(i.name for i in self._DetermineInstances())
10860
10861     elif level == locking.LEVEL_NODEGROUP:
10862       # Lock node groups for all potential target nodes optimistically, needs
10863       # verification once nodes have been acquired
10864       self.needed_locks[locking.LEVEL_NODEGROUP] = \
10865         self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
10866
10867     elif level == locking.LEVEL_NODE:
10868       self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
10869
10870   def CheckPrereq(self):
10871     # Verify locks
10872     owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
10873     owned_nodes = self.owned_locks(locking.LEVEL_NODE)
10874     owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
10875
10876     need_nodes = self._DetermineNodes()
10877
10878     if not owned_nodes.issuperset(need_nodes):
10879       raise errors.OpPrereqError("Nodes in same group as '%s' changed since"
10880                                  " locks were acquired, current nodes are"
10881                                  " are '%s', used to be '%s'; retry the"
10882                                  " operation" %
10883                                  (self.op.node_name,
10884                                   utils.CommaJoin(need_nodes),
10885                                   utils.CommaJoin(owned_nodes)),
10886                                  errors.ECODE_STATE)
10887
10888     wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
10889     if owned_groups != wanted_groups:
10890       raise errors.OpExecError("Node groups changed since locks were acquired,"
10891                                " current groups are '%s', used to be '%s';"
10892                                " retry the operation" %
10893                                (utils.CommaJoin(wanted_groups),
10894                                 utils.CommaJoin(owned_groups)))
10895
10896     # Determine affected instances
10897     self.instances = self._DetermineInstances()
10898     self.instance_names = [i.name for i in self.instances]
10899
10900     if set(self.instance_names) != owned_instances:
10901       raise errors.OpExecError("Instances on node '%s' changed since locks"
10902                                " were acquired, current instances are '%s',"
10903                                " used to be '%s'; retry the operation" %
10904                                (self.op.node_name,
10905                                 utils.CommaJoin(self.instance_names),
10906                                 utils.CommaJoin(owned_instances)))
10907
10908     if self.instance_names:
10909       self.LogInfo("Evacuating instances from node '%s': %s",
10910                    self.op.node_name,
10911                    utils.CommaJoin(utils.NiceSort(self.instance_names)))
10912     else:
10913       self.LogInfo("No instances to evacuate from node '%s'",
10914                    self.op.node_name)
10915
10916     if self.op.remote_node is not None:
10917       for i in self.instances:
10918         if i.primary_node == self.op.remote_node:
10919           raise errors.OpPrereqError("Node %s is the primary node of"
10920                                      " instance %s, cannot use it as"
10921                                      " secondary" %
10922                                      (self.op.remote_node, i.name),
10923                                      errors.ECODE_INVAL)
10924
10925   def Exec(self, feedback_fn):
10926     assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
10927
10928     if not self.instance_names:
10929       # No instances to evacuate
10930       jobs = []
10931
10932     elif self.op.iallocator is not None:
10933       # TODO: Implement relocation to other group
10934       ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_NODE_EVAC,
10935                        evac_mode=self._MODE2IALLOCATOR[self.op.mode],
10936                        instances=list(self.instance_names))
10937
10938       ial.Run(self.op.iallocator)
10939
10940       if not ial.success:
10941         raise errors.OpPrereqError("Can't compute node evacuation using"
10942                                    " iallocator '%s': %s" %
10943                                    (self.op.iallocator, ial.info),
10944                                    errors.ECODE_NORES)
10945
10946       jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
10947
10948     elif self.op.remote_node is not None:
10949       assert self.op.mode == constants.NODE_EVAC_SEC
10950       jobs = [
10951         [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
10952                                         remote_node=self.op.remote_node,
10953                                         disks=[],
10954                                         mode=constants.REPLACE_DISK_CHG,
10955                                         early_release=self.op.early_release)]
10956         for instance_name in self.instance_names
10957         ]
10958
10959     else:
10960       raise errors.ProgrammerError("No iallocator or remote node")
10961
10962     return ResultWithJobs(jobs)
10963
10964
10965 def _SetOpEarlyRelease(early_release, op):
10966   """Sets C{early_release} flag on opcodes if available.
10967
10968   """
10969   try:
10970     op.early_release = early_release
10971   except AttributeError:
10972     assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
10973
10974   return op
10975
10976
10977 def _NodeEvacDest(use_nodes, group, nodes):
10978   """Returns group or nodes depending on caller's choice.
10979
10980   """
10981   if use_nodes:
10982     return utils.CommaJoin(nodes)
10983   else:
10984     return group
10985
10986
10987 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
10988   """Unpacks the result of change-group and node-evacuate iallocator requests.
10989
10990   Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
10991   L{constants.IALLOCATOR_MODE_CHG_GROUP}.
10992
10993   @type lu: L{LogicalUnit}
10994   @param lu: Logical unit instance
10995   @type alloc_result: tuple/list
10996   @param alloc_result: Result from iallocator
10997   @type early_release: bool
10998   @param early_release: Whether to release locks early if possible
10999   @type use_nodes: bool
11000   @param use_nodes: Whether to display node names instead of groups
11001
11002   """
11003   (moved, failed, jobs) = alloc_result
11004
11005   if failed:
11006     failreason = utils.CommaJoin("%s (%s)" % (name, reason)
11007                                  for (name, reason) in failed)
11008     lu.LogWarning("Unable to evacuate instances %s", failreason)
11009     raise errors.OpExecError("Unable to evacuate instances %s" % failreason)
11010
11011   if moved:
11012     lu.LogInfo("Instances to be moved: %s",
11013                utils.CommaJoin("%s (to %s)" %
11014                                (name, _NodeEvacDest(use_nodes, group, nodes))
11015                                for (name, group, nodes) in moved))
11016
11017   return [map(compat.partial(_SetOpEarlyRelease, early_release),
11018               map(opcodes.OpCode.LoadOpCode, ops))
11019           for ops in jobs]
11020
11021
11022 class LUInstanceGrowDisk(LogicalUnit):
11023   """Grow a disk of an instance.
11024
11025   """
11026   HPATH = "disk-grow"
11027   HTYPE = constants.HTYPE_INSTANCE
11028   REQ_BGL = False
11029
11030   def ExpandNames(self):
11031     self._ExpandAndLockInstance()
11032     self.needed_locks[locking.LEVEL_NODE] = []
11033     self.needed_locks[locking.LEVEL_NODE_RES] = []
11034     self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
11035
11036   def DeclareLocks(self, level):
11037     if level == locking.LEVEL_NODE:
11038       self._LockInstancesNodes()
11039     elif level == locking.LEVEL_NODE_RES:
11040       # Copy node locks
11041       self.needed_locks[locking.LEVEL_NODE_RES] = \
11042         self.needed_locks[locking.LEVEL_NODE][:]
11043
11044   def BuildHooksEnv(self):
11045     """Build hooks env.
11046
11047     This runs on the master, the primary and all the secondaries.
11048
11049     """
11050     env = {
11051       "DISK": self.op.disk,
11052       "AMOUNT": self.op.amount,
11053       }
11054     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11055     return env
11056
11057   def BuildHooksNodes(self):
11058     """Build hooks nodes.
11059
11060     """
11061     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
11062     return (nl, nl)
11063
11064   def CheckPrereq(self):
11065     """Check prerequisites.
11066
11067     This checks that the instance is in the cluster.
11068
11069     """
11070     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11071     assert instance is not None, \
11072       "Cannot retrieve locked instance %s" % self.op.instance_name
11073     nodenames = list(instance.all_nodes)
11074     for node in nodenames:
11075       _CheckNodeOnline(self, node)
11076
11077     self.instance = instance
11078
11079     if instance.disk_template not in constants.DTS_GROWABLE:
11080       raise errors.OpPrereqError("Instance's disk layout does not support"
11081                                  " growing", errors.ECODE_INVAL)
11082
11083     self.disk = instance.FindDisk(self.op.disk)
11084
11085     if instance.disk_template not in (constants.DT_FILE,
11086                                       constants.DT_SHARED_FILE):
11087       # TODO: check the free disk space for file, when that feature will be
11088       # supported
11089       _CheckNodesFreeDiskPerVG(self, nodenames,
11090                                self.disk.ComputeGrowth(self.op.amount))
11091
11092   def Exec(self, feedback_fn):
11093     """Execute disk grow.
11094
11095     """
11096     instance = self.instance
11097     disk = self.disk
11098
11099     assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11100     assert (self.owned_locks(locking.LEVEL_NODE) ==
11101             self.owned_locks(locking.LEVEL_NODE_RES))
11102
11103     disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
11104     if not disks_ok:
11105       raise errors.OpExecError("Cannot activate block device to grow")
11106
11107     feedback_fn("Growing disk %s of instance '%s' by %s" %
11108                 (self.op.disk, instance.name,
11109                  utils.FormatUnit(self.op.amount, "h")))
11110
11111     # First run all grow ops in dry-run mode
11112     for node in instance.all_nodes:
11113       self.cfg.SetDiskID(disk, node)
11114       result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, True)
11115       result.Raise("Grow request failed to node %s" % node)
11116
11117     # We know that (as far as we can test) operations across different
11118     # nodes will succeed, time to run it for real
11119     for node in instance.all_nodes:
11120       self.cfg.SetDiskID(disk, node)
11121       result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, False)
11122       result.Raise("Grow request failed to node %s" % node)
11123
11124       # TODO: Rewrite code to work properly
11125       # DRBD goes into sync mode for a short amount of time after executing the
11126       # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
11127       # calling "resize" in sync mode fails. Sleeping for a short amount of
11128       # time is a work-around.
11129       time.sleep(5)
11130
11131     disk.RecordGrow(self.op.amount)
11132     self.cfg.Update(instance, feedback_fn)
11133
11134     # Changes have been recorded, release node lock
11135     _ReleaseLocks(self, locking.LEVEL_NODE)
11136
11137     # Downgrade lock while waiting for sync
11138     self.glm.downgrade(locking.LEVEL_INSTANCE)
11139
11140     if self.op.wait_for_sync:
11141       disk_abort = not _WaitForSync(self, instance, disks=[disk])
11142       if disk_abort:
11143         self.proc.LogWarning("Disk sync-ing has not returned a good"
11144                              " status; please check the instance")
11145       if instance.admin_state != constants.ADMINST_UP:
11146         _SafeShutdownInstanceDisks(self, instance, disks=[disk])
11147     elif instance.admin_state != constants.ADMINST_UP:
11148       self.proc.LogWarning("Not shutting down the disk even if the instance is"
11149                            " not supposed to be running because no wait for"
11150                            " sync mode was requested")
11151
11152     assert self.owned_locks(locking.LEVEL_NODE_RES)
11153     assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11154
11155
11156 class LUInstanceQueryData(NoHooksLU):
11157   """Query runtime instance data.
11158
11159   """
11160   REQ_BGL = False
11161
11162   def ExpandNames(self):
11163     self.needed_locks = {}
11164
11165     # Use locking if requested or when non-static information is wanted
11166     if not (self.op.static or self.op.use_locking):
11167       self.LogWarning("Non-static data requested, locks need to be acquired")
11168       self.op.use_locking = True
11169
11170     if self.op.instances or not self.op.use_locking:
11171       # Expand instance names right here
11172       self.wanted_names = _GetWantedInstances(self, self.op.instances)
11173     else:
11174       # Will use acquired locks
11175       self.wanted_names = None
11176
11177     if self.op.use_locking:
11178       self.share_locks = _ShareAll()
11179
11180       if self.wanted_names is None:
11181         self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
11182       else:
11183         self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
11184
11185       self.needed_locks[locking.LEVEL_NODE] = []
11186       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11187
11188   def DeclareLocks(self, level):
11189     if self.op.use_locking and level == locking.LEVEL_NODE:
11190       self._LockInstancesNodes()
11191
11192   def CheckPrereq(self):
11193     """Check prerequisites.
11194
11195     This only checks the optional instance list against the existing names.
11196
11197     """
11198     if self.wanted_names is None:
11199       assert self.op.use_locking, "Locking was not used"
11200       self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
11201
11202     self.wanted_instances = \
11203         map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
11204
11205   def _ComputeBlockdevStatus(self, node, instance_name, dev):
11206     """Returns the status of a block device
11207
11208     """
11209     if self.op.static or not node:
11210       return None
11211
11212     self.cfg.SetDiskID(dev, node)
11213
11214     result = self.rpc.call_blockdev_find(node, dev)
11215     if result.offline:
11216       return None
11217
11218     result.Raise("Can't compute disk status for %s" % instance_name)
11219
11220     status = result.payload
11221     if status is None:
11222       return None
11223
11224     return (status.dev_path, status.major, status.minor,
11225             status.sync_percent, status.estimated_time,
11226             status.is_degraded, status.ldisk_status)
11227
11228   def _ComputeDiskStatus(self, instance, snode, dev):
11229     """Compute block device status.
11230
11231     """
11232     if dev.dev_type in constants.LDS_DRBD:
11233       # we change the snode then (otherwise we use the one passed in)
11234       if dev.logical_id[0] == instance.primary_node:
11235         snode = dev.logical_id[1]
11236       else:
11237         snode = dev.logical_id[0]
11238
11239     dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
11240                                               instance.name, dev)
11241     dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
11242
11243     if dev.children:
11244       dev_children = map(compat.partial(self._ComputeDiskStatus,
11245                                         instance, snode),
11246                          dev.children)
11247     else:
11248       dev_children = []
11249
11250     return {
11251       "iv_name": dev.iv_name,
11252       "dev_type": dev.dev_type,
11253       "logical_id": dev.logical_id,
11254       "physical_id": dev.physical_id,
11255       "pstatus": dev_pstatus,
11256       "sstatus": dev_sstatus,
11257       "children": dev_children,
11258       "mode": dev.mode,
11259       "size": dev.size,
11260       }
11261
11262   def Exec(self, feedback_fn):
11263     """Gather and return data"""
11264     result = {}
11265
11266     cluster = self.cfg.GetClusterInfo()
11267
11268     pri_nodes = self.cfg.GetMultiNodeInfo(i.primary_node
11269                                           for i in self.wanted_instances)
11270     for instance, (_, pnode) in zip(self.wanted_instances, pri_nodes):
11271       if self.op.static or pnode.offline:
11272         remote_state = None
11273         if pnode.offline:
11274           self.LogWarning("Primary node %s is marked offline, returning static"
11275                           " information only for instance %s" %
11276                           (pnode.name, instance.name))
11277       else:
11278         remote_info = self.rpc.call_instance_info(instance.primary_node,
11279                                                   instance.name,
11280                                                   instance.hypervisor)
11281         remote_info.Raise("Error checking node %s" % instance.primary_node)
11282         remote_info = remote_info.payload
11283         if remote_info and "state" in remote_info:
11284           remote_state = "up"
11285         else:
11286           if instance.admin_state == constants.ADMINST_UP:
11287             remote_state = "down"
11288           else:
11289             remote_state = instance.admin_state
11290
11291       disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
11292                   instance.disks)
11293
11294       result[instance.name] = {
11295         "name": instance.name,
11296         "config_state": instance.admin_state,
11297         "run_state": remote_state,
11298         "pnode": instance.primary_node,
11299         "snodes": instance.secondary_nodes,
11300         "os": instance.os,
11301         # this happens to be the same format used for hooks
11302         "nics": _NICListToTuple(self, instance.nics),
11303         "disk_template": instance.disk_template,
11304         "disks": disks,
11305         "hypervisor": instance.hypervisor,
11306         "network_port": instance.network_port,
11307         "hv_instance": instance.hvparams,
11308         "hv_actual": cluster.FillHV(instance, skip_globals=True),
11309         "be_instance": instance.beparams,
11310         "be_actual": cluster.FillBE(instance),
11311         "os_instance": instance.osparams,
11312         "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
11313         "serial_no": instance.serial_no,
11314         "mtime": instance.mtime,
11315         "ctime": instance.ctime,
11316         "uuid": instance.uuid,
11317         }
11318
11319     return result
11320
11321
11322 class LUInstanceSetParams(LogicalUnit):
11323   """Modifies an instances's parameters.
11324
11325   """
11326   HPATH = "instance-modify"
11327   HTYPE = constants.HTYPE_INSTANCE
11328   REQ_BGL = False
11329
11330   def CheckArguments(self):
11331     if not (self.op.nics or self.op.disks or self.op.disk_template or
11332             self.op.hvparams or self.op.beparams or self.op.os_name or
11333             self.op.online_inst or self.op.offline_inst):
11334       raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
11335
11336     if self.op.hvparams:
11337       _CheckGlobalHvParams(self.op.hvparams)
11338
11339     # Disk validation
11340     disk_addremove = 0
11341     for disk_op, disk_dict in self.op.disks:
11342       utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
11343       if disk_op == constants.DDM_REMOVE:
11344         disk_addremove += 1
11345         continue
11346       elif disk_op == constants.DDM_ADD:
11347         disk_addremove += 1
11348       else:
11349         if not isinstance(disk_op, int):
11350           raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
11351         if not isinstance(disk_dict, dict):
11352           msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
11353           raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
11354
11355       if disk_op == constants.DDM_ADD:
11356         mode = disk_dict.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
11357         if mode not in constants.DISK_ACCESS_SET:
11358           raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
11359                                      errors.ECODE_INVAL)
11360         size = disk_dict.get(constants.IDISK_SIZE, None)
11361         if size is None:
11362           raise errors.OpPrereqError("Required disk parameter size missing",
11363                                      errors.ECODE_INVAL)
11364         try:
11365           size = int(size)
11366         except (TypeError, ValueError), err:
11367           raise errors.OpPrereqError("Invalid disk size parameter: %s" %
11368                                      str(err), errors.ECODE_INVAL)
11369         disk_dict[constants.IDISK_SIZE] = size
11370       else:
11371         # modification of disk
11372         if constants.IDISK_SIZE in disk_dict:
11373           raise errors.OpPrereqError("Disk size change not possible, use"
11374                                      " grow-disk", errors.ECODE_INVAL)
11375
11376     if disk_addremove > 1:
11377       raise errors.OpPrereqError("Only one disk add or remove operation"
11378                                  " supported at a time", errors.ECODE_INVAL)
11379
11380     if self.op.disks and self.op.disk_template is not None:
11381       raise errors.OpPrereqError("Disk template conversion and other disk"
11382                                  " changes not supported at the same time",
11383                                  errors.ECODE_INVAL)
11384
11385     if (self.op.disk_template and
11386         self.op.disk_template in constants.DTS_INT_MIRROR and
11387         self.op.remote_node is None):
11388       raise errors.OpPrereqError("Changing the disk template to a mirrored"
11389                                  " one requires specifying a secondary node",
11390                                  errors.ECODE_INVAL)
11391
11392     # NIC validation
11393     nic_addremove = 0
11394     for nic_op, nic_dict in self.op.nics:
11395       utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
11396       if nic_op == constants.DDM_REMOVE:
11397         nic_addremove += 1
11398         continue
11399       elif nic_op == constants.DDM_ADD:
11400         nic_addremove += 1
11401       else:
11402         if not isinstance(nic_op, int):
11403           raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
11404         if not isinstance(nic_dict, dict):
11405           msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
11406           raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
11407
11408       # nic_dict should be a dict
11409       nic_ip = nic_dict.get(constants.INIC_IP, None)
11410       if nic_ip is not None:
11411         if nic_ip.lower() == constants.VALUE_NONE:
11412           nic_dict[constants.INIC_IP] = None
11413         else:
11414           if not netutils.IPAddress.IsValid(nic_ip):
11415             raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
11416                                        errors.ECODE_INVAL)
11417
11418       nic_bridge = nic_dict.get("bridge", None)
11419       nic_link = nic_dict.get(constants.INIC_LINK, None)
11420       if nic_bridge and nic_link:
11421         raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
11422                                    " at the same time", errors.ECODE_INVAL)
11423       elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
11424         nic_dict["bridge"] = None
11425       elif nic_link and nic_link.lower() == constants.VALUE_NONE:
11426         nic_dict[constants.INIC_LINK] = None
11427
11428       if nic_op == constants.DDM_ADD:
11429         nic_mac = nic_dict.get(constants.INIC_MAC, None)
11430         if nic_mac is None:
11431           nic_dict[constants.INIC_MAC] = constants.VALUE_AUTO
11432
11433       if constants.INIC_MAC in nic_dict:
11434         nic_mac = nic_dict[constants.INIC_MAC]
11435         if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
11436           nic_mac = utils.NormalizeAndValidateMac(nic_mac)
11437
11438         if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
11439           raise errors.OpPrereqError("'auto' is not a valid MAC address when"
11440                                      " modifying an existing nic",
11441                                      errors.ECODE_INVAL)
11442
11443     if nic_addremove > 1:
11444       raise errors.OpPrereqError("Only one NIC add or remove operation"
11445                                  " supported at a time", errors.ECODE_INVAL)
11446
11447   def ExpandNames(self):
11448     self._ExpandAndLockInstance()
11449     # Can't even acquire node locks in shared mode as upcoming changes in
11450     # Ganeti 2.6 will start to modify the node object on disk conversion
11451     self.needed_locks[locking.LEVEL_NODE] = []
11452     self.needed_locks[locking.LEVEL_NODE_RES] = []
11453     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11454
11455   def DeclareLocks(self, level):
11456     if level == locking.LEVEL_NODE:
11457       self._LockInstancesNodes()
11458       if self.op.disk_template and self.op.remote_node:
11459         self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11460         self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
11461     elif level == locking.LEVEL_NODE_RES and self.op.disk_template:
11462       # Copy node locks
11463       self.needed_locks[locking.LEVEL_NODE_RES] = \
11464         self.needed_locks[locking.LEVEL_NODE][:]
11465
11466   def BuildHooksEnv(self):
11467     """Build hooks env.
11468
11469     This runs on the master, primary and secondaries.
11470
11471     """
11472     args = dict()
11473     if constants.BE_MINMEM in self.be_new:
11474       args["minmem"] = self.be_new[constants.BE_MINMEM]
11475     if constants.BE_MAXMEM in self.be_new:
11476       args["maxmem"] = self.be_new[constants.BE_MAXMEM]
11477     if constants.BE_VCPUS in self.be_new:
11478       args["vcpus"] = self.be_new[constants.BE_VCPUS]
11479     # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
11480     # information at all.
11481     if self.op.nics:
11482       args["nics"] = []
11483       nic_override = dict(self.op.nics)
11484       for idx, nic in enumerate(self.instance.nics):
11485         if idx in nic_override:
11486           this_nic_override = nic_override[idx]
11487         else:
11488           this_nic_override = {}
11489         if constants.INIC_IP in this_nic_override:
11490           ip = this_nic_override[constants.INIC_IP]
11491         else:
11492           ip = nic.ip
11493         if constants.INIC_MAC in this_nic_override:
11494           mac = this_nic_override[constants.INIC_MAC]
11495         else:
11496           mac = nic.mac
11497         if idx in self.nic_pnew:
11498           nicparams = self.nic_pnew[idx]
11499         else:
11500           nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
11501         mode = nicparams[constants.NIC_MODE]
11502         link = nicparams[constants.NIC_LINK]
11503         args["nics"].append((ip, mac, mode, link))
11504       if constants.DDM_ADD in nic_override:
11505         ip = nic_override[constants.DDM_ADD].get(constants.INIC_IP, None)
11506         mac = nic_override[constants.DDM_ADD][constants.INIC_MAC]
11507         nicparams = self.nic_pnew[constants.DDM_ADD]
11508         mode = nicparams[constants.NIC_MODE]
11509         link = nicparams[constants.NIC_LINK]
11510         args["nics"].append((ip, mac, mode, link))
11511       elif constants.DDM_REMOVE in nic_override:
11512         del args["nics"][-1]
11513
11514     env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
11515     if self.op.disk_template:
11516       env["NEW_DISK_TEMPLATE"] = self.op.disk_template
11517
11518     return env
11519
11520   def BuildHooksNodes(self):
11521     """Build hooks nodes.
11522
11523     """
11524     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
11525     return (nl, nl)
11526
11527   def CheckPrereq(self):
11528     """Check prerequisites.
11529
11530     This only checks the instance list against the existing names.
11531
11532     """
11533     # checking the new params on the primary/secondary nodes
11534
11535     instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11536     cluster = self.cluster = self.cfg.GetClusterInfo()
11537     assert self.instance is not None, \
11538       "Cannot retrieve locked instance %s" % self.op.instance_name
11539     pnode = instance.primary_node
11540     nodelist = list(instance.all_nodes)
11541     pnode_info = self.cfg.GetNodeInfo(pnode)
11542     self.diskparams = self.cfg.GetNodeGroup(pnode_info.group).diskparams
11543
11544     # OS change
11545     if self.op.os_name and not self.op.force:
11546       _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
11547                       self.op.force_variant)
11548       instance_os = self.op.os_name
11549     else:
11550       instance_os = instance.os
11551
11552     if self.op.disk_template:
11553       if instance.disk_template == self.op.disk_template:
11554         raise errors.OpPrereqError("Instance already has disk template %s" %
11555                                    instance.disk_template, errors.ECODE_INVAL)
11556
11557       if (instance.disk_template,
11558           self.op.disk_template) not in self._DISK_CONVERSIONS:
11559         raise errors.OpPrereqError("Unsupported disk template conversion from"
11560                                    " %s to %s" % (instance.disk_template,
11561                                                   self.op.disk_template),
11562                                    errors.ECODE_INVAL)
11563       _CheckInstanceState(self, instance, INSTANCE_DOWN,
11564                           msg="cannot change disk template")
11565       if self.op.disk_template in constants.DTS_INT_MIRROR:
11566         if self.op.remote_node == pnode:
11567           raise errors.OpPrereqError("Given new secondary node %s is the same"
11568                                      " as the primary node of the instance" %
11569                                      self.op.remote_node, errors.ECODE_STATE)
11570         _CheckNodeOnline(self, self.op.remote_node)
11571         _CheckNodeNotDrained(self, self.op.remote_node)
11572         # FIXME: here we assume that the old instance type is DT_PLAIN
11573         assert instance.disk_template == constants.DT_PLAIN
11574         disks = [{constants.IDISK_SIZE: d.size,
11575                   constants.IDISK_VG: d.logical_id[0]}
11576                  for d in instance.disks]
11577         required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
11578         _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
11579
11580         snode_info = self.cfg.GetNodeInfo(self.op.remote_node)
11581         if pnode_info.group != snode_info.group:
11582           self.LogWarning("The primary and secondary nodes are in two"
11583                           " different node groups; the disk parameters"
11584                           " from the first disk's node group will be"
11585                           " used")
11586
11587     # hvparams processing
11588     if self.op.hvparams:
11589       hv_type = instance.hypervisor
11590       i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
11591       utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
11592       hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
11593
11594       # local check
11595       hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
11596       _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
11597       self.hv_proposed = self.hv_new = hv_new # the new actual values
11598       self.hv_inst = i_hvdict # the new dict (without defaults)
11599     else:
11600       self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
11601                                               instance.hvparams)
11602       self.hv_new = self.hv_inst = {}
11603
11604     # beparams processing
11605     if self.op.beparams:
11606       i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
11607                                    use_none=True)
11608       objects.UpgradeBeParams(i_bedict)
11609       utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
11610       be_new = cluster.SimpleFillBE(i_bedict)
11611       self.be_proposed = self.be_new = be_new # the new actual values
11612       self.be_inst = i_bedict # the new dict (without defaults)
11613     else:
11614       self.be_new = self.be_inst = {}
11615       self.be_proposed = cluster.SimpleFillBE(instance.beparams)
11616     be_old = cluster.FillBE(instance)
11617
11618     # CPU param validation -- checking every time a paramtere is
11619     # changed to cover all cases where either CPU mask or vcpus have
11620     # changed
11621     if (constants.BE_VCPUS in self.be_proposed and
11622         constants.HV_CPU_MASK in self.hv_proposed):
11623       cpu_list = \
11624         utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
11625       # Verify mask is consistent with number of vCPUs. Can skip this
11626       # test if only 1 entry in the CPU mask, which means same mask
11627       # is applied to all vCPUs.
11628       if (len(cpu_list) > 1 and
11629           len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
11630         raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
11631                                    " CPU mask [%s]" %
11632                                    (self.be_proposed[constants.BE_VCPUS],
11633                                     self.hv_proposed[constants.HV_CPU_MASK]),
11634                                    errors.ECODE_INVAL)
11635
11636       # Only perform this test if a new CPU mask is given
11637       if constants.HV_CPU_MASK in self.hv_new:
11638         # Calculate the largest CPU number requested
11639         max_requested_cpu = max(map(max, cpu_list))
11640         # Check that all of the instance's nodes have enough physical CPUs to
11641         # satisfy the requested CPU mask
11642         _CheckNodesPhysicalCPUs(self, instance.all_nodes,
11643                                 max_requested_cpu + 1, instance.hypervisor)
11644
11645     # osparams processing
11646     if self.op.osparams:
11647       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
11648       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
11649       self.os_inst = i_osdict # the new dict (without defaults)
11650     else:
11651       self.os_inst = {}
11652
11653     self.warn = []
11654
11655     #TODO(dynmem): do the appropriate check involving MINMEM
11656     if (constants.BE_MAXMEM in self.op.beparams and not self.op.force and
11657         be_new[constants.BE_MAXMEM] > be_old[constants.BE_MAXMEM]):
11658       mem_check_list = [pnode]
11659       if be_new[constants.BE_AUTO_BALANCE]:
11660         # either we changed auto_balance to yes or it was from before
11661         mem_check_list.extend(instance.secondary_nodes)
11662       instance_info = self.rpc.call_instance_info(pnode, instance.name,
11663                                                   instance.hypervisor)
11664       nodeinfo = self.rpc.call_node_info(mem_check_list, None,
11665                                          [instance.hypervisor])
11666       pninfo = nodeinfo[pnode]
11667       msg = pninfo.fail_msg
11668       if msg:
11669         # Assume the primary node is unreachable and go ahead
11670         self.warn.append("Can't get info from primary node %s: %s" %
11671                          (pnode, msg))
11672       else:
11673         (_, _, (pnhvinfo, )) = pninfo.payload
11674         if not isinstance(pnhvinfo.get("memory_free", None), int):
11675           self.warn.append("Node data from primary node %s doesn't contain"
11676                            " free memory information" % pnode)
11677         elif instance_info.fail_msg:
11678           self.warn.append("Can't get instance runtime information: %s" %
11679                           instance_info.fail_msg)
11680         else:
11681           if instance_info.payload:
11682             current_mem = int(instance_info.payload["memory"])
11683           else:
11684             # Assume instance not running
11685             # (there is a slight race condition here, but it's not very
11686             # probable, and we have no other way to check)
11687             # TODO: Describe race condition
11688             current_mem = 0
11689           #TODO(dynmem): do the appropriate check involving MINMEM
11690           miss_mem = (be_new[constants.BE_MAXMEM] - current_mem -
11691                       pnhvinfo["memory_free"])
11692           if miss_mem > 0:
11693             raise errors.OpPrereqError("This change will prevent the instance"
11694                                        " from starting, due to %d MB of memory"
11695                                        " missing on its primary node" %
11696                                        miss_mem,
11697                                        errors.ECODE_NORES)
11698
11699       if be_new[constants.BE_AUTO_BALANCE]:
11700         for node, nres in nodeinfo.items():
11701           if node not in instance.secondary_nodes:
11702             continue
11703           nres.Raise("Can't get info from secondary node %s" % node,
11704                      prereq=True, ecode=errors.ECODE_STATE)
11705           (_, _, (nhvinfo, )) = nres.payload
11706           if not isinstance(nhvinfo.get("memory_free", None), int):
11707             raise errors.OpPrereqError("Secondary node %s didn't return free"
11708                                        " memory information" % node,
11709                                        errors.ECODE_STATE)
11710           #TODO(dynmem): do the appropriate check involving MINMEM
11711           elif be_new[constants.BE_MAXMEM] > nhvinfo["memory_free"]:
11712             raise errors.OpPrereqError("This change will prevent the instance"
11713                                        " from failover to its secondary node"
11714                                        " %s, due to not enough memory" % node,
11715                                        errors.ECODE_STATE)
11716
11717     # NIC processing
11718     self.nic_pnew = {}
11719     self.nic_pinst = {}
11720     for nic_op, nic_dict in self.op.nics:
11721       if nic_op == constants.DDM_REMOVE:
11722         if not instance.nics:
11723           raise errors.OpPrereqError("Instance has no NICs, cannot remove",
11724                                      errors.ECODE_INVAL)
11725         continue
11726       if nic_op != constants.DDM_ADD:
11727         # an existing nic
11728         if not instance.nics:
11729           raise errors.OpPrereqError("Invalid NIC index %s, instance has"
11730                                      " no NICs" % nic_op,
11731                                      errors.ECODE_INVAL)
11732         if nic_op < 0 or nic_op >= len(instance.nics):
11733           raise errors.OpPrereqError("Invalid NIC index %s, valid values"
11734                                      " are 0 to %d" %
11735                                      (nic_op, len(instance.nics) - 1),
11736                                      errors.ECODE_INVAL)
11737         old_nic_params = instance.nics[nic_op].nicparams
11738         old_nic_ip = instance.nics[nic_op].ip
11739       else:
11740         old_nic_params = {}
11741         old_nic_ip = None
11742
11743       update_params_dict = dict([(key, nic_dict[key])
11744                                  for key in constants.NICS_PARAMETERS
11745                                  if key in nic_dict])
11746
11747       if "bridge" in nic_dict:
11748         update_params_dict[constants.NIC_LINK] = nic_dict["bridge"]
11749
11750       new_nic_params = _GetUpdatedParams(old_nic_params,
11751                                          update_params_dict)
11752       utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
11753       new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
11754       objects.NIC.CheckParameterSyntax(new_filled_nic_params)
11755       self.nic_pinst[nic_op] = new_nic_params
11756       self.nic_pnew[nic_op] = new_filled_nic_params
11757       new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
11758
11759       if new_nic_mode == constants.NIC_MODE_BRIDGED:
11760         nic_bridge = new_filled_nic_params[constants.NIC_LINK]
11761         msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
11762         if msg:
11763           msg = "Error checking bridges on node %s: %s" % (pnode, msg)
11764           if self.op.force:
11765             self.warn.append(msg)
11766           else:
11767             raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
11768       if new_nic_mode == constants.NIC_MODE_ROUTED:
11769         if constants.INIC_IP in nic_dict:
11770           nic_ip = nic_dict[constants.INIC_IP]
11771         else:
11772           nic_ip = old_nic_ip
11773         if nic_ip is None:
11774           raise errors.OpPrereqError("Cannot set the nic ip to None"
11775                                      " on a routed nic", errors.ECODE_INVAL)
11776       if constants.INIC_MAC in nic_dict:
11777         nic_mac = nic_dict[constants.INIC_MAC]
11778         if nic_mac is None:
11779           raise errors.OpPrereqError("Cannot set the nic mac to None",
11780                                      errors.ECODE_INVAL)
11781         elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
11782           # otherwise generate the mac
11783           nic_dict[constants.INIC_MAC] = \
11784             self.cfg.GenerateMAC(self.proc.GetECId())
11785         else:
11786           # or validate/reserve the current one
11787           try:
11788             self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
11789           except errors.ReservationError:
11790             raise errors.OpPrereqError("MAC address %s already in use"
11791                                        " in cluster" % nic_mac,
11792                                        errors.ECODE_NOTUNIQUE)
11793
11794     # DISK processing
11795     if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
11796       raise errors.OpPrereqError("Disk operations not supported for"
11797                                  " diskless instances",
11798                                  errors.ECODE_INVAL)
11799     for disk_op, _ in self.op.disks:
11800       if disk_op == constants.DDM_REMOVE:
11801         if len(instance.disks) == 1:
11802           raise errors.OpPrereqError("Cannot remove the last disk of"
11803                                      " an instance", errors.ECODE_INVAL)
11804         _CheckInstanceState(self, instance, INSTANCE_DOWN,
11805                             msg="cannot remove disks")
11806
11807       if (disk_op == constants.DDM_ADD and
11808           len(instance.disks) >= constants.MAX_DISKS):
11809         raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
11810                                    " add more" % constants.MAX_DISKS,
11811                                    errors.ECODE_STATE)
11812       if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
11813         # an existing disk
11814         if disk_op < 0 or disk_op >= len(instance.disks):
11815           raise errors.OpPrereqError("Invalid disk index %s, valid values"
11816                                      " are 0 to %d" %
11817                                      (disk_op, len(instance.disks)),
11818                                      errors.ECODE_INVAL)
11819
11820     # disabling the instance
11821     if self.op.offline_inst:
11822       _CheckInstanceState(self, instance, INSTANCE_DOWN,
11823                           msg="cannot change instance state to offline")
11824
11825     # enabling the instance
11826     if self.op.online_inst:
11827       _CheckInstanceState(self, instance, INSTANCE_OFFLINE,
11828                           msg="cannot make instance go online")
11829
11830   def _ConvertPlainToDrbd(self, feedback_fn):
11831     """Converts an instance from plain to drbd.
11832
11833     """
11834     feedback_fn("Converting template to drbd")
11835     instance = self.instance
11836     pnode = instance.primary_node
11837     snode = self.op.remote_node
11838
11839     assert instance.disk_template == constants.DT_PLAIN
11840
11841     # create a fake disk info for _GenerateDiskTemplate
11842     disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
11843                   constants.IDISK_VG: d.logical_id[0]}
11844                  for d in instance.disks]
11845     new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
11846                                       instance.name, pnode, [snode],
11847                                       disk_info, None, None, 0, feedback_fn,
11848                                       self.diskparams)
11849     info = _GetInstanceInfoText(instance)
11850     feedback_fn("Creating aditional volumes...")
11851     # first, create the missing data and meta devices
11852     for disk in new_disks:
11853       # unfortunately this is... not too nice
11854       _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
11855                             info, True)
11856       for child in disk.children:
11857         _CreateSingleBlockDev(self, snode, instance, child, info, True)
11858     # at this stage, all new LVs have been created, we can rename the
11859     # old ones
11860     feedback_fn("Renaming original volumes...")
11861     rename_list = [(o, n.children[0].logical_id)
11862                    for (o, n) in zip(instance.disks, new_disks)]
11863     result = self.rpc.call_blockdev_rename(pnode, rename_list)
11864     result.Raise("Failed to rename original LVs")
11865
11866     feedback_fn("Initializing DRBD devices...")
11867     # all child devices are in place, we can now create the DRBD devices
11868     for disk in new_disks:
11869       for node in [pnode, snode]:
11870         f_create = node == pnode
11871         _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
11872
11873     # at this point, the instance has been modified
11874     instance.disk_template = constants.DT_DRBD8
11875     instance.disks = new_disks
11876     self.cfg.Update(instance, feedback_fn)
11877
11878     # Release node locks while waiting for sync
11879     _ReleaseLocks(self, locking.LEVEL_NODE)
11880
11881     # disks are created, waiting for sync
11882     disk_abort = not _WaitForSync(self, instance,
11883                                   oneshot=not self.op.wait_for_sync)
11884     if disk_abort:
11885       raise errors.OpExecError("There are some degraded disks for"
11886                                " this instance, please cleanup manually")
11887
11888     # Node resource locks will be released by caller
11889
11890   def _ConvertDrbdToPlain(self, feedback_fn):
11891     """Converts an instance from drbd to plain.
11892
11893     """
11894     instance = self.instance
11895
11896     assert len(instance.secondary_nodes) == 1
11897     assert instance.disk_template == constants.DT_DRBD8
11898
11899     pnode = instance.primary_node
11900     snode = instance.secondary_nodes[0]
11901     feedback_fn("Converting template to plain")
11902
11903     old_disks = instance.disks
11904     new_disks = [d.children[0] for d in old_disks]
11905
11906     # copy over size and mode
11907     for parent, child in zip(old_disks, new_disks):
11908       child.size = parent.size
11909       child.mode = parent.mode
11910
11911     # update instance structure
11912     instance.disks = new_disks
11913     instance.disk_template = constants.DT_PLAIN
11914     self.cfg.Update(instance, feedback_fn)
11915
11916     # Release locks in case removing disks takes a while
11917     _ReleaseLocks(self, locking.LEVEL_NODE)
11918
11919     feedback_fn("Removing volumes on the secondary node...")
11920     for disk in old_disks:
11921       self.cfg.SetDiskID(disk, snode)
11922       msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
11923       if msg:
11924         self.LogWarning("Could not remove block device %s on node %s,"
11925                         " continuing anyway: %s", disk.iv_name, snode, msg)
11926
11927     feedback_fn("Removing unneeded volumes on the primary node...")
11928     for idx, disk in enumerate(old_disks):
11929       meta = disk.children[1]
11930       self.cfg.SetDiskID(meta, pnode)
11931       msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
11932       if msg:
11933         self.LogWarning("Could not remove metadata for disk %d on node %s,"
11934                         " continuing anyway: %s", idx, pnode, msg)
11935
11936     # this is a DRBD disk, return its port to the pool
11937     for disk in old_disks:
11938       tcp_port = disk.logical_id[2]
11939       self.cfg.AddTcpUdpPort(tcp_port)
11940
11941     # Node resource locks will be released by caller
11942
11943   def Exec(self, feedback_fn):
11944     """Modifies an instance.
11945
11946     All parameters take effect only at the next restart of the instance.
11947
11948     """
11949     # Process here the warnings from CheckPrereq, as we don't have a
11950     # feedback_fn there.
11951     for warn in self.warn:
11952       feedback_fn("WARNING: %s" % warn)
11953
11954     assert ((self.op.disk_template is None) ^
11955             bool(self.owned_locks(locking.LEVEL_NODE_RES))), \
11956       "Not owning any node resource locks"
11957
11958     result = []
11959     instance = self.instance
11960     # disk changes
11961     for disk_op, disk_dict in self.op.disks:
11962       if disk_op == constants.DDM_REMOVE:
11963         # remove the last disk
11964         device = instance.disks.pop()
11965         device_idx = len(instance.disks)
11966         for node, disk in device.ComputeNodeTree(instance.primary_node):
11967           self.cfg.SetDiskID(disk, node)
11968           msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
11969           if msg:
11970             self.LogWarning("Could not remove disk/%d on node %s: %s,"
11971                             " continuing anyway", device_idx, node, msg)
11972         result.append(("disk/%d" % device_idx, "remove"))
11973
11974         # if this is a DRBD disk, return its port to the pool
11975         if device.dev_type in constants.LDS_DRBD:
11976           tcp_port = device.logical_id[2]
11977           self.cfg.AddTcpUdpPort(tcp_port)
11978       elif disk_op == constants.DDM_ADD:
11979         # add a new disk
11980         if instance.disk_template in (constants.DT_FILE,
11981                                         constants.DT_SHARED_FILE):
11982           file_driver, file_path = instance.disks[0].logical_id
11983           file_path = os.path.dirname(file_path)
11984         else:
11985           file_driver = file_path = None
11986         disk_idx_base = len(instance.disks)
11987         new_disk = _GenerateDiskTemplate(self,
11988                                          instance.disk_template,
11989                                          instance.name, instance.primary_node,
11990                                          instance.secondary_nodes,
11991                                          [disk_dict],
11992                                          file_path,
11993                                          file_driver,
11994                                          disk_idx_base,
11995                                          feedback_fn,
11996                                          self.diskparams)[0]
11997         instance.disks.append(new_disk)
11998         info = _GetInstanceInfoText(instance)
11999
12000         logging.info("Creating volume %s for instance %s",
12001                      new_disk.iv_name, instance.name)
12002         # Note: this needs to be kept in sync with _CreateDisks
12003         #HARDCODE
12004         for node in instance.all_nodes:
12005           f_create = node == instance.primary_node
12006           try:
12007             _CreateBlockDev(self, node, instance, new_disk,
12008                             f_create, info, f_create)
12009           except errors.OpExecError, err:
12010             self.LogWarning("Failed to create volume %s (%s) on"
12011                             " node %s: %s",
12012                             new_disk.iv_name, new_disk, node, err)
12013         result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
12014                        (new_disk.size, new_disk.mode)))
12015       else:
12016         # change a given disk
12017         instance.disks[disk_op].mode = disk_dict[constants.IDISK_MODE]
12018         result.append(("disk.mode/%d" % disk_op,
12019                        disk_dict[constants.IDISK_MODE]))
12020
12021     if self.op.disk_template:
12022       if __debug__:
12023         check_nodes = set(instance.all_nodes)
12024         if self.op.remote_node:
12025           check_nodes.add(self.op.remote_node)
12026         for level in [locking.LEVEL_NODE, locking.LEVEL_NODE_RES]:
12027           owned = self.owned_locks(level)
12028           assert not (check_nodes - owned), \
12029             ("Not owning the correct locks, owning %r, expected at least %r" %
12030              (owned, check_nodes))
12031
12032       r_shut = _ShutdownInstanceDisks(self, instance)
12033       if not r_shut:
12034         raise errors.OpExecError("Cannot shutdown instance disks, unable to"
12035                                  " proceed with disk template conversion")
12036       mode = (instance.disk_template, self.op.disk_template)
12037       try:
12038         self._DISK_CONVERSIONS[mode](self, feedback_fn)
12039       except:
12040         self.cfg.ReleaseDRBDMinors(instance.name)
12041         raise
12042       result.append(("disk_template", self.op.disk_template))
12043
12044       assert instance.disk_template == self.op.disk_template, \
12045         ("Expected disk template '%s', found '%s'" %
12046          (self.op.disk_template, instance.disk_template))
12047
12048     # Release node and resource locks if there are any (they might already have
12049     # been released during disk conversion)
12050     _ReleaseLocks(self, locking.LEVEL_NODE)
12051     _ReleaseLocks(self, locking.LEVEL_NODE_RES)
12052
12053     # NIC changes
12054     for nic_op, nic_dict in self.op.nics:
12055       if nic_op == constants.DDM_REMOVE:
12056         # remove the last nic
12057         del instance.nics[-1]
12058         result.append(("nic.%d" % len(instance.nics), "remove"))
12059       elif nic_op == constants.DDM_ADD:
12060         # mac and bridge should be set, by now
12061         mac = nic_dict[constants.INIC_MAC]
12062         ip = nic_dict.get(constants.INIC_IP, None)
12063         nicparams = self.nic_pinst[constants.DDM_ADD]
12064         new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
12065         instance.nics.append(new_nic)
12066         result.append(("nic.%d" % (len(instance.nics) - 1),
12067                        "add:mac=%s,ip=%s,mode=%s,link=%s" %
12068                        (new_nic.mac, new_nic.ip,
12069                         self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
12070                         self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
12071                        )))
12072       else:
12073         for key in (constants.INIC_MAC, constants.INIC_IP):
12074           if key in nic_dict:
12075             setattr(instance.nics[nic_op], key, nic_dict[key])
12076         if nic_op in self.nic_pinst:
12077           instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
12078         for key, val in nic_dict.iteritems():
12079           result.append(("nic.%s/%d" % (key, nic_op), val))
12080
12081     # hvparams changes
12082     if self.op.hvparams:
12083       instance.hvparams = self.hv_inst
12084       for key, val in self.op.hvparams.iteritems():
12085         result.append(("hv/%s" % key, val))
12086
12087     # beparams changes
12088     if self.op.beparams:
12089       instance.beparams = self.be_inst
12090       for key, val in self.op.beparams.iteritems():
12091         result.append(("be/%s" % key, val))
12092
12093     # OS change
12094     if self.op.os_name:
12095       instance.os = self.op.os_name
12096
12097     # osparams changes
12098     if self.op.osparams:
12099       instance.osparams = self.os_inst
12100       for key, val in self.op.osparams.iteritems():
12101         result.append(("os/%s" % key, val))
12102
12103     # online/offline instance
12104     if self.op.online_inst:
12105       self.cfg.MarkInstanceDown(instance.name)
12106       result.append(("admin_state", constants.ADMINST_DOWN))
12107     if self.op.offline_inst:
12108       self.cfg.MarkInstanceOffline(instance.name)
12109       result.append(("admin_state", constants.ADMINST_OFFLINE))
12110
12111     self.cfg.Update(instance, feedback_fn)
12112
12113     assert not (self.owned_locks(locking.LEVEL_NODE_RES) or
12114                 self.owned_locks(locking.LEVEL_NODE)), \
12115       "All node locks should have been released by now"
12116
12117     return result
12118
12119   _DISK_CONVERSIONS = {
12120     (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
12121     (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
12122     }
12123
12124
12125 class LUInstanceChangeGroup(LogicalUnit):
12126   HPATH = "instance-change-group"
12127   HTYPE = constants.HTYPE_INSTANCE
12128   REQ_BGL = False
12129
12130   def ExpandNames(self):
12131     self.share_locks = _ShareAll()
12132     self.needed_locks = {
12133       locking.LEVEL_NODEGROUP: [],
12134       locking.LEVEL_NODE: [],
12135       }
12136
12137     self._ExpandAndLockInstance()
12138
12139     if self.op.target_groups:
12140       self.req_target_uuids = map(self.cfg.LookupNodeGroup,
12141                                   self.op.target_groups)
12142     else:
12143       self.req_target_uuids = None
12144
12145     self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
12146
12147   def DeclareLocks(self, level):
12148     if level == locking.LEVEL_NODEGROUP:
12149       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
12150
12151       if self.req_target_uuids:
12152         lock_groups = set(self.req_target_uuids)
12153
12154         # Lock all groups used by instance optimistically; this requires going
12155         # via the node before it's locked, requiring verification later on
12156         instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
12157         lock_groups.update(instance_groups)
12158       else:
12159         # No target groups, need to lock all of them
12160         lock_groups = locking.ALL_SET
12161
12162       self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
12163
12164     elif level == locking.LEVEL_NODE:
12165       if self.req_target_uuids:
12166         # Lock all nodes used by instances
12167         self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
12168         self._LockInstancesNodes()
12169
12170         # Lock all nodes in all potential target groups
12171         lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
12172                        self.cfg.GetInstanceNodeGroups(self.op.instance_name))
12173         member_nodes = [node_name
12174                         for group in lock_groups
12175                         for node_name in self.cfg.GetNodeGroup(group).members]
12176         self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
12177       else:
12178         # Lock all nodes as all groups are potential targets
12179         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12180
12181   def CheckPrereq(self):
12182     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12183     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12184     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12185
12186     assert (self.req_target_uuids is None or
12187             owned_groups.issuperset(self.req_target_uuids))
12188     assert owned_instances == set([self.op.instance_name])
12189
12190     # Get instance information
12191     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12192
12193     # Check if node groups for locked instance are still correct
12194     assert owned_nodes.issuperset(self.instance.all_nodes), \
12195       ("Instance %s's nodes changed while we kept the lock" %
12196        self.op.instance_name)
12197
12198     inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
12199                                            owned_groups)
12200
12201     if self.req_target_uuids:
12202       # User requested specific target groups
12203       self.target_uuids = self.req_target_uuids
12204     else:
12205       # All groups except those used by the instance are potential targets
12206       self.target_uuids = owned_groups - inst_groups
12207
12208     conflicting_groups = self.target_uuids & inst_groups
12209     if conflicting_groups:
12210       raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
12211                                  " used by the instance '%s'" %
12212                                  (utils.CommaJoin(conflicting_groups),
12213                                   self.op.instance_name),
12214                                  errors.ECODE_INVAL)
12215
12216     if not self.target_uuids:
12217       raise errors.OpPrereqError("There are no possible target groups",
12218                                  errors.ECODE_INVAL)
12219
12220   def BuildHooksEnv(self):
12221     """Build hooks env.
12222
12223     """
12224     assert self.target_uuids
12225
12226     env = {
12227       "TARGET_GROUPS": " ".join(self.target_uuids),
12228       }
12229
12230     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
12231
12232     return env
12233
12234   def BuildHooksNodes(self):
12235     """Build hooks nodes.
12236
12237     """
12238     mn = self.cfg.GetMasterNode()
12239     return ([mn], [mn])
12240
12241   def Exec(self, feedback_fn):
12242     instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
12243
12244     assert instances == [self.op.instance_name], "Instance not locked"
12245
12246     ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
12247                      instances=instances, target_groups=list(self.target_uuids))
12248
12249     ial.Run(self.op.iallocator)
12250
12251     if not ial.success:
12252       raise errors.OpPrereqError("Can't compute solution for changing group of"
12253                                  " instance '%s' using iallocator '%s': %s" %
12254                                  (self.op.instance_name, self.op.iallocator,
12255                                   ial.info),
12256                                  errors.ECODE_NORES)
12257
12258     jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
12259
12260     self.LogInfo("Iallocator returned %s job(s) for changing group of"
12261                  " instance '%s'", len(jobs), self.op.instance_name)
12262
12263     return ResultWithJobs(jobs)
12264
12265
12266 class LUBackupQuery(NoHooksLU):
12267   """Query the exports list
12268
12269   """
12270   REQ_BGL = False
12271
12272   def ExpandNames(self):
12273     self.needed_locks = {}
12274     self.share_locks[locking.LEVEL_NODE] = 1
12275     if not self.op.nodes:
12276       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12277     else:
12278       self.needed_locks[locking.LEVEL_NODE] = \
12279         _GetWantedNodes(self, self.op.nodes)
12280
12281   def Exec(self, feedback_fn):
12282     """Compute the list of all the exported system images.
12283
12284     @rtype: dict
12285     @return: a dictionary with the structure node->(export-list)
12286         where export-list is a list of the instances exported on
12287         that node.
12288
12289     """
12290     self.nodes = self.owned_locks(locking.LEVEL_NODE)
12291     rpcresult = self.rpc.call_export_list(self.nodes)
12292     result = {}
12293     for node in rpcresult:
12294       if rpcresult[node].fail_msg:
12295         result[node] = False
12296       else:
12297         result[node] = rpcresult[node].payload
12298
12299     return result
12300
12301
12302 class LUBackupPrepare(NoHooksLU):
12303   """Prepares an instance for an export and returns useful information.
12304
12305   """
12306   REQ_BGL = False
12307
12308   def ExpandNames(self):
12309     self._ExpandAndLockInstance()
12310
12311   def CheckPrereq(self):
12312     """Check prerequisites.
12313
12314     """
12315     instance_name = self.op.instance_name
12316
12317     self.instance = self.cfg.GetInstanceInfo(instance_name)
12318     assert self.instance is not None, \
12319           "Cannot retrieve locked instance %s" % self.op.instance_name
12320     _CheckNodeOnline(self, self.instance.primary_node)
12321
12322     self._cds = _GetClusterDomainSecret()
12323
12324   def Exec(self, feedback_fn):
12325     """Prepares an instance for an export.
12326
12327     """
12328     instance = self.instance
12329
12330     if self.op.mode == constants.EXPORT_MODE_REMOTE:
12331       salt = utils.GenerateSecret(8)
12332
12333       feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
12334       result = self.rpc.call_x509_cert_create(instance.primary_node,
12335                                               constants.RIE_CERT_VALIDITY)
12336       result.Raise("Can't create X509 key and certificate on %s" % result.node)
12337
12338       (name, cert_pem) = result.payload
12339
12340       cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
12341                                              cert_pem)
12342
12343       return {
12344         "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
12345         "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
12346                           salt),
12347         "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
12348         }
12349
12350     return None
12351
12352
12353 class LUBackupExport(LogicalUnit):
12354   """Export an instance to an image in the cluster.
12355
12356   """
12357   HPATH = "instance-export"
12358   HTYPE = constants.HTYPE_INSTANCE
12359   REQ_BGL = False
12360
12361   def CheckArguments(self):
12362     """Check the arguments.
12363
12364     """
12365     self.x509_key_name = self.op.x509_key_name
12366     self.dest_x509_ca_pem = self.op.destination_x509_ca
12367
12368     if self.op.mode == constants.EXPORT_MODE_REMOTE:
12369       if not self.x509_key_name:
12370         raise errors.OpPrereqError("Missing X509 key name for encryption",
12371                                    errors.ECODE_INVAL)
12372
12373       if not self.dest_x509_ca_pem:
12374         raise errors.OpPrereqError("Missing destination X509 CA",
12375                                    errors.ECODE_INVAL)
12376
12377   def ExpandNames(self):
12378     self._ExpandAndLockInstance()
12379
12380     # Lock all nodes for local exports
12381     if self.op.mode == constants.EXPORT_MODE_LOCAL:
12382       # FIXME: lock only instance primary and destination node
12383       #
12384       # Sad but true, for now we have do lock all nodes, as we don't know where
12385       # the previous export might be, and in this LU we search for it and
12386       # remove it from its current node. In the future we could fix this by:
12387       #  - making a tasklet to search (share-lock all), then create the
12388       #    new one, then one to remove, after
12389       #  - removing the removal operation altogether
12390       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12391
12392   def DeclareLocks(self, level):
12393     """Last minute lock declaration."""
12394     # All nodes are locked anyway, so nothing to do here.
12395
12396   def BuildHooksEnv(self):
12397     """Build hooks env.
12398
12399     This will run on the master, primary node and target node.
12400
12401     """
12402     env = {
12403       "EXPORT_MODE": self.op.mode,
12404       "EXPORT_NODE": self.op.target_node,
12405       "EXPORT_DO_SHUTDOWN": self.op.shutdown,
12406       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
12407       # TODO: Generic function for boolean env variables
12408       "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
12409       }
12410
12411     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
12412
12413     return env
12414
12415   def BuildHooksNodes(self):
12416     """Build hooks nodes.
12417
12418     """
12419     nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
12420
12421     if self.op.mode == constants.EXPORT_MODE_LOCAL:
12422       nl.append(self.op.target_node)
12423
12424     return (nl, nl)
12425
12426   def CheckPrereq(self):
12427     """Check prerequisites.
12428
12429     This checks that the instance and node names are valid.
12430
12431     """
12432     instance_name = self.op.instance_name
12433
12434     self.instance = self.cfg.GetInstanceInfo(instance_name)
12435     assert self.instance is not None, \
12436           "Cannot retrieve locked instance %s" % self.op.instance_name
12437     _CheckNodeOnline(self, self.instance.primary_node)
12438
12439     if (self.op.remove_instance and
12440         self.instance.admin_state == constants.ADMINST_UP and
12441         not self.op.shutdown):
12442       raise errors.OpPrereqError("Can not remove instance without shutting it"
12443                                  " down before")
12444
12445     if self.op.mode == constants.EXPORT_MODE_LOCAL:
12446       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
12447       self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
12448       assert self.dst_node is not None
12449
12450       _CheckNodeOnline(self, self.dst_node.name)
12451       _CheckNodeNotDrained(self, self.dst_node.name)
12452
12453       self._cds = None
12454       self.dest_disk_info = None
12455       self.dest_x509_ca = None
12456
12457     elif self.op.mode == constants.EXPORT_MODE_REMOTE:
12458       self.dst_node = None
12459
12460       if len(self.op.target_node) != len(self.instance.disks):
12461         raise errors.OpPrereqError(("Received destination information for %s"
12462                                     " disks, but instance %s has %s disks") %
12463                                    (len(self.op.target_node), instance_name,
12464                                     len(self.instance.disks)),
12465                                    errors.ECODE_INVAL)
12466
12467       cds = _GetClusterDomainSecret()
12468
12469       # Check X509 key name
12470       try:
12471         (key_name, hmac_digest, hmac_salt) = self.x509_key_name
12472       except (TypeError, ValueError), err:
12473         raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
12474
12475       if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
12476         raise errors.OpPrereqError("HMAC for X509 key name is wrong",
12477                                    errors.ECODE_INVAL)
12478
12479       # Load and verify CA
12480       try:
12481         (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
12482       except OpenSSL.crypto.Error, err:
12483         raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
12484                                    (err, ), errors.ECODE_INVAL)
12485
12486       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
12487       if errcode is not None:
12488         raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
12489                                    (msg, ), errors.ECODE_INVAL)
12490
12491       self.dest_x509_ca = cert
12492
12493       # Verify target information
12494       disk_info = []
12495       for idx, disk_data in enumerate(self.op.target_node):
12496         try:
12497           (host, port, magic) = \
12498             masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
12499         except errors.GenericError, err:
12500           raise errors.OpPrereqError("Target info for disk %s: %s" %
12501                                      (idx, err), errors.ECODE_INVAL)
12502
12503         disk_info.append((host, port, magic))
12504
12505       assert len(disk_info) == len(self.op.target_node)
12506       self.dest_disk_info = disk_info
12507
12508     else:
12509       raise errors.ProgrammerError("Unhandled export mode %r" %
12510                                    self.op.mode)
12511
12512     # instance disk type verification
12513     # TODO: Implement export support for file-based disks
12514     for disk in self.instance.disks:
12515       if disk.dev_type == constants.LD_FILE:
12516         raise errors.OpPrereqError("Export not supported for instances with"
12517                                    " file-based disks", errors.ECODE_INVAL)
12518
12519   def _CleanupExports(self, feedback_fn):
12520     """Removes exports of current instance from all other nodes.
12521
12522     If an instance in a cluster with nodes A..D was exported to node C, its
12523     exports will be removed from the nodes A, B and D.
12524
12525     """
12526     assert self.op.mode != constants.EXPORT_MODE_REMOTE
12527
12528     nodelist = self.cfg.GetNodeList()
12529     nodelist.remove(self.dst_node.name)
12530
12531     # on one-node clusters nodelist will be empty after the removal
12532     # if we proceed the backup would be removed because OpBackupQuery
12533     # substitutes an empty list with the full cluster node list.
12534     iname = self.instance.name
12535     if nodelist:
12536       feedback_fn("Removing old exports for instance %s" % iname)
12537       exportlist = self.rpc.call_export_list(nodelist)
12538       for node in exportlist:
12539         if exportlist[node].fail_msg:
12540           continue
12541         if iname in exportlist[node].payload:
12542           msg = self.rpc.call_export_remove(node, iname).fail_msg
12543           if msg:
12544             self.LogWarning("Could not remove older export for instance %s"
12545                             " on node %s: %s", iname, node, msg)
12546
12547   def Exec(self, feedback_fn):
12548     """Export an instance to an image in the cluster.
12549
12550     """
12551     assert self.op.mode in constants.EXPORT_MODES
12552
12553     instance = self.instance
12554     src_node = instance.primary_node
12555
12556     if self.op.shutdown:
12557       # shutdown the instance, but not the disks
12558       feedback_fn("Shutting down instance %s" % instance.name)
12559       result = self.rpc.call_instance_shutdown(src_node, instance,
12560                                                self.op.shutdown_timeout)
12561       # TODO: Maybe ignore failures if ignore_remove_failures is set
12562       result.Raise("Could not shutdown instance %s on"
12563                    " node %s" % (instance.name, src_node))
12564
12565     # set the disks ID correctly since call_instance_start needs the
12566     # correct drbd minor to create the symlinks
12567     for disk in instance.disks:
12568       self.cfg.SetDiskID(disk, src_node)
12569
12570     activate_disks = (instance.admin_state != constants.ADMINST_UP)
12571
12572     if activate_disks:
12573       # Activate the instance disks if we'exporting a stopped instance
12574       feedback_fn("Activating disks for %s" % instance.name)
12575       _StartInstanceDisks(self, instance, None)
12576
12577     try:
12578       helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
12579                                                      instance)
12580
12581       helper.CreateSnapshots()
12582       try:
12583         if (self.op.shutdown and
12584             instance.admin_state == constants.ADMINST_UP and
12585             not self.op.remove_instance):
12586           assert not activate_disks
12587           feedback_fn("Starting instance %s" % instance.name)
12588           result = self.rpc.call_instance_start(src_node,
12589                                                 (instance, None, None), False)
12590           msg = result.fail_msg
12591           if msg:
12592             feedback_fn("Failed to start instance: %s" % msg)
12593             _ShutdownInstanceDisks(self, instance)
12594             raise errors.OpExecError("Could not start instance: %s" % msg)
12595
12596         if self.op.mode == constants.EXPORT_MODE_LOCAL:
12597           (fin_resu, dresults) = helper.LocalExport(self.dst_node)
12598         elif self.op.mode == constants.EXPORT_MODE_REMOTE:
12599           connect_timeout = constants.RIE_CONNECT_TIMEOUT
12600           timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
12601
12602           (key_name, _, _) = self.x509_key_name
12603
12604           dest_ca_pem = \
12605             OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
12606                                             self.dest_x509_ca)
12607
12608           (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
12609                                                      key_name, dest_ca_pem,
12610                                                      timeouts)
12611       finally:
12612         helper.Cleanup()
12613
12614       # Check for backwards compatibility
12615       assert len(dresults) == len(instance.disks)
12616       assert compat.all(isinstance(i, bool) for i in dresults), \
12617              "Not all results are boolean: %r" % dresults
12618
12619     finally:
12620       if activate_disks:
12621         feedback_fn("Deactivating disks for %s" % instance.name)
12622         _ShutdownInstanceDisks(self, instance)
12623
12624     if not (compat.all(dresults) and fin_resu):
12625       failures = []
12626       if not fin_resu:
12627         failures.append("export finalization")
12628       if not compat.all(dresults):
12629         fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
12630                                if not dsk)
12631         failures.append("disk export: disk(s) %s" % fdsk)
12632
12633       raise errors.OpExecError("Export failed, errors in %s" %
12634                                utils.CommaJoin(failures))
12635
12636     # At this point, the export was successful, we can cleanup/finish
12637
12638     # Remove instance if requested
12639     if self.op.remove_instance:
12640       feedback_fn("Removing instance %s" % instance.name)
12641       _RemoveInstance(self, feedback_fn, instance,
12642                       self.op.ignore_remove_failures)
12643
12644     if self.op.mode == constants.EXPORT_MODE_LOCAL:
12645       self._CleanupExports(feedback_fn)
12646
12647     return fin_resu, dresults
12648
12649
12650 class LUBackupRemove(NoHooksLU):
12651   """Remove exports related to the named instance.
12652
12653   """
12654   REQ_BGL = False
12655
12656   def ExpandNames(self):
12657     self.needed_locks = {}
12658     # We need all nodes to be locked in order for RemoveExport to work, but we
12659     # don't need to lock the instance itself, as nothing will happen to it (and
12660     # we can remove exports also for a removed instance)
12661     self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12662
12663   def Exec(self, feedback_fn):
12664     """Remove any export.
12665
12666     """
12667     instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
12668     # If the instance was not found we'll try with the name that was passed in.
12669     # This will only work if it was an FQDN, though.
12670     fqdn_warn = False
12671     if not instance_name:
12672       fqdn_warn = True
12673       instance_name = self.op.instance_name
12674
12675     locked_nodes = self.owned_locks(locking.LEVEL_NODE)
12676     exportlist = self.rpc.call_export_list(locked_nodes)
12677     found = False
12678     for node in exportlist:
12679       msg = exportlist[node].fail_msg
12680       if msg:
12681         self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
12682         continue
12683       if instance_name in exportlist[node].payload:
12684         found = True
12685         result = self.rpc.call_export_remove(node, instance_name)
12686         msg = result.fail_msg
12687         if msg:
12688           logging.error("Could not remove export for instance %s"
12689                         " on node %s: %s", instance_name, node, msg)
12690
12691     if fqdn_warn and not found:
12692       feedback_fn("Export not found. If trying to remove an export belonging"
12693                   " to a deleted instance please use its Fully Qualified"
12694                   " Domain Name.")
12695
12696
12697 class LUGroupAdd(LogicalUnit):
12698   """Logical unit for creating node groups.
12699
12700   """
12701   HPATH = "group-add"
12702   HTYPE = constants.HTYPE_GROUP
12703   REQ_BGL = False
12704
12705   def ExpandNames(self):
12706     # We need the new group's UUID here so that we can create and acquire the
12707     # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
12708     # that it should not check whether the UUID exists in the configuration.
12709     self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
12710     self.needed_locks = {}
12711     self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
12712
12713   def CheckPrereq(self):
12714     """Check prerequisites.
12715
12716     This checks that the given group name is not an existing node group
12717     already.
12718
12719     """
12720     try:
12721       existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12722     except errors.OpPrereqError:
12723       pass
12724     else:
12725       raise errors.OpPrereqError("Desired group name '%s' already exists as a"
12726                                  " node group (UUID: %s)" %
12727                                  (self.op.group_name, existing_uuid),
12728                                  errors.ECODE_EXISTS)
12729
12730     if self.op.ndparams:
12731       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
12732
12733     if self.op.diskparams:
12734       for templ in constants.DISK_TEMPLATES:
12735         if templ not in self.op.diskparams:
12736           self.op.diskparams[templ] = {}
12737         utils.ForceDictType(self.op.diskparams[templ], constants.DISK_DT_TYPES)
12738     else:
12739       self.op.diskparams = self.cfg.GetClusterInfo().diskparams
12740
12741   def BuildHooksEnv(self):
12742     """Build hooks env.
12743
12744     """
12745     return {
12746       "GROUP_NAME": self.op.group_name,
12747       }
12748
12749   def BuildHooksNodes(self):
12750     """Build hooks nodes.
12751
12752     """
12753     mn = self.cfg.GetMasterNode()
12754     return ([mn], [mn])
12755
12756   def Exec(self, feedback_fn):
12757     """Add the node group to the cluster.
12758
12759     """
12760     group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
12761                                   uuid=self.group_uuid,
12762                                   alloc_policy=self.op.alloc_policy,
12763                                   ndparams=self.op.ndparams,
12764                                   diskparams=self.op.diskparams)
12765
12766     self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
12767     del self.remove_locks[locking.LEVEL_NODEGROUP]
12768
12769
12770 class LUGroupAssignNodes(NoHooksLU):
12771   """Logical unit for assigning nodes to groups.
12772
12773   """
12774   REQ_BGL = False
12775
12776   def ExpandNames(self):
12777     # These raise errors.OpPrereqError on their own:
12778     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12779     self.op.nodes = _GetWantedNodes(self, self.op.nodes)
12780
12781     # We want to lock all the affected nodes and groups. We have readily
12782     # available the list of nodes, and the *destination* group. To gather the
12783     # list of "source" groups, we need to fetch node information later on.
12784     self.needed_locks = {
12785       locking.LEVEL_NODEGROUP: set([self.group_uuid]),
12786       locking.LEVEL_NODE: self.op.nodes,
12787       }
12788
12789   def DeclareLocks(self, level):
12790     if level == locking.LEVEL_NODEGROUP:
12791       assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
12792
12793       # Try to get all affected nodes' groups without having the group or node
12794       # lock yet. Needs verification later in the code flow.
12795       groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
12796
12797       self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
12798
12799   def CheckPrereq(self):
12800     """Check prerequisites.
12801
12802     """
12803     assert self.needed_locks[locking.LEVEL_NODEGROUP]
12804     assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
12805             frozenset(self.op.nodes))
12806
12807     expected_locks = (set([self.group_uuid]) |
12808                       self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
12809     actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
12810     if actual_locks != expected_locks:
12811       raise errors.OpExecError("Nodes changed groups since locks were acquired,"
12812                                " current groups are '%s', used to be '%s'" %
12813                                (utils.CommaJoin(expected_locks),
12814                                 utils.CommaJoin(actual_locks)))
12815
12816     self.node_data = self.cfg.GetAllNodesInfo()
12817     self.group = self.cfg.GetNodeGroup(self.group_uuid)
12818     instance_data = self.cfg.GetAllInstancesInfo()
12819
12820     if self.group is None:
12821       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12822                                (self.op.group_name, self.group_uuid))
12823
12824     (new_splits, previous_splits) = \
12825       self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
12826                                              for node in self.op.nodes],
12827                                             self.node_data, instance_data)
12828
12829     if new_splits:
12830       fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
12831
12832       if not self.op.force:
12833         raise errors.OpExecError("The following instances get split by this"
12834                                  " change and --force was not given: %s" %
12835                                  fmt_new_splits)
12836       else:
12837         self.LogWarning("This operation will split the following instances: %s",
12838                         fmt_new_splits)
12839
12840         if previous_splits:
12841           self.LogWarning("In addition, these already-split instances continue"
12842                           " to be split across groups: %s",
12843                           utils.CommaJoin(utils.NiceSort(previous_splits)))
12844
12845   def Exec(self, feedback_fn):
12846     """Assign nodes to a new group.
12847
12848     """
12849     mods = [(node_name, self.group_uuid) for node_name in self.op.nodes]
12850
12851     self.cfg.AssignGroupNodes(mods)
12852
12853   @staticmethod
12854   def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
12855     """Check for split instances after a node assignment.
12856
12857     This method considers a series of node assignments as an atomic operation,
12858     and returns information about split instances after applying the set of
12859     changes.
12860
12861     In particular, it returns information about newly split instances, and
12862     instances that were already split, and remain so after the change.
12863
12864     Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
12865     considered.
12866
12867     @type changes: list of (node_name, new_group_uuid) pairs.
12868     @param changes: list of node assignments to consider.
12869     @param node_data: a dict with data for all nodes
12870     @param instance_data: a dict with all instances to consider
12871     @rtype: a two-tuple
12872     @return: a list of instances that were previously okay and result split as a
12873       consequence of this change, and a list of instances that were previously
12874       split and this change does not fix.
12875
12876     """
12877     changed_nodes = dict((node, group) for node, group in changes
12878                          if node_data[node].group != group)
12879
12880     all_split_instances = set()
12881     previously_split_instances = set()
12882
12883     def InstanceNodes(instance):
12884       return [instance.primary_node] + list(instance.secondary_nodes)
12885
12886     for inst in instance_data.values():
12887       if inst.disk_template not in constants.DTS_INT_MIRROR:
12888         continue
12889
12890       instance_nodes = InstanceNodes(inst)
12891
12892       if len(set(node_data[node].group for node in instance_nodes)) > 1:
12893         previously_split_instances.add(inst.name)
12894
12895       if len(set(changed_nodes.get(node, node_data[node].group)
12896                  for node in instance_nodes)) > 1:
12897         all_split_instances.add(inst.name)
12898
12899     return (list(all_split_instances - previously_split_instances),
12900             list(previously_split_instances & all_split_instances))
12901
12902
12903 class _GroupQuery(_QueryBase):
12904   FIELDS = query.GROUP_FIELDS
12905
12906   def ExpandNames(self, lu):
12907     lu.needed_locks = {}
12908
12909     self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
12910     name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
12911
12912     if not self.names:
12913       self.wanted = [name_to_uuid[name]
12914                      for name in utils.NiceSort(name_to_uuid.keys())]
12915     else:
12916       # Accept names to be either names or UUIDs.
12917       missing = []
12918       self.wanted = []
12919       all_uuid = frozenset(self._all_groups.keys())
12920
12921       for name in self.names:
12922         if name in all_uuid:
12923           self.wanted.append(name)
12924         elif name in name_to_uuid:
12925           self.wanted.append(name_to_uuid[name])
12926         else:
12927           missing.append(name)
12928
12929       if missing:
12930         raise errors.OpPrereqError("Some groups do not exist: %s" %
12931                                    utils.CommaJoin(missing),
12932                                    errors.ECODE_NOENT)
12933
12934   def DeclareLocks(self, lu, level):
12935     pass
12936
12937   def _GetQueryData(self, lu):
12938     """Computes the list of node groups and their attributes.
12939
12940     """
12941     do_nodes = query.GQ_NODE in self.requested_data
12942     do_instances = query.GQ_INST in self.requested_data
12943
12944     group_to_nodes = None
12945     group_to_instances = None
12946
12947     # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
12948     # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
12949     # latter GetAllInstancesInfo() is not enough, for we have to go through
12950     # instance->node. Hence, we will need to process nodes even if we only need
12951     # instance information.
12952     if do_nodes or do_instances:
12953       all_nodes = lu.cfg.GetAllNodesInfo()
12954       group_to_nodes = dict((uuid, []) for uuid in self.wanted)
12955       node_to_group = {}
12956
12957       for node in all_nodes.values():
12958         if node.group in group_to_nodes:
12959           group_to_nodes[node.group].append(node.name)
12960           node_to_group[node.name] = node.group
12961
12962       if do_instances:
12963         all_instances = lu.cfg.GetAllInstancesInfo()
12964         group_to_instances = dict((uuid, []) for uuid in self.wanted)
12965
12966         for instance in all_instances.values():
12967           node = instance.primary_node
12968           if node in node_to_group:
12969             group_to_instances[node_to_group[node]].append(instance.name)
12970
12971         if not do_nodes:
12972           # Do not pass on node information if it was not requested.
12973           group_to_nodes = None
12974
12975     return query.GroupQueryData([self._all_groups[uuid]
12976                                  for uuid in self.wanted],
12977                                 group_to_nodes, group_to_instances)
12978
12979
12980 class LUGroupQuery(NoHooksLU):
12981   """Logical unit for querying node groups.
12982
12983   """
12984   REQ_BGL = False
12985
12986   def CheckArguments(self):
12987     self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
12988                           self.op.output_fields, False)
12989
12990   def ExpandNames(self):
12991     self.gq.ExpandNames(self)
12992
12993   def DeclareLocks(self, level):
12994     self.gq.DeclareLocks(self, level)
12995
12996   def Exec(self, feedback_fn):
12997     return self.gq.OldStyleQuery(self)
12998
12999
13000 class LUGroupSetParams(LogicalUnit):
13001   """Modifies the parameters of a node group.
13002
13003   """
13004   HPATH = "group-modify"
13005   HTYPE = constants.HTYPE_GROUP
13006   REQ_BGL = False
13007
13008   def CheckArguments(self):
13009     all_changes = [
13010       self.op.ndparams,
13011       self.op.diskparams,
13012       self.op.alloc_policy,
13013       self.op.hv_state,
13014       self.op.disk_state
13015       ]
13016
13017     if all_changes.count(None) == len(all_changes):
13018       raise errors.OpPrereqError("Please pass at least one modification",
13019                                  errors.ECODE_INVAL)
13020
13021   def ExpandNames(self):
13022     # This raises errors.OpPrereqError on its own:
13023     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13024
13025     self.needed_locks = {
13026       locking.LEVEL_NODEGROUP: [self.group_uuid],
13027       }
13028
13029   def CheckPrereq(self):
13030     """Check prerequisites.
13031
13032     """
13033     self.group = self.cfg.GetNodeGroup(self.group_uuid)
13034
13035     if self.group is None:
13036       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13037                                (self.op.group_name, self.group_uuid))
13038
13039     if self.op.ndparams:
13040       new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
13041       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
13042       self.new_ndparams = new_ndparams
13043
13044     if self.op.diskparams:
13045       self.new_diskparams = dict()
13046       for templ in constants.DISK_TEMPLATES:
13047         if templ not in self.op.diskparams:
13048           self.op.diskparams[templ] = {}
13049         new_templ_params = _GetUpdatedParams(self.group.diskparams[templ],
13050                                              self.op.diskparams[templ])
13051         utils.ForceDictType(new_templ_params, constants.DISK_DT_TYPES)
13052         self.new_diskparams[templ] = new_templ_params
13053
13054     if self.op.hv_state:
13055       self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
13056                                                  self.group.hv_state_static)
13057
13058     if self.op.disk_state:
13059       self.new_disk_state = \
13060         _MergeAndVerifyDiskState(self.op.disk_state,
13061                                  self.group.disk_state_static)
13062
13063   def BuildHooksEnv(self):
13064     """Build hooks env.
13065
13066     """
13067     return {
13068       "GROUP_NAME": self.op.group_name,
13069       "NEW_ALLOC_POLICY": self.op.alloc_policy,
13070       }
13071
13072   def BuildHooksNodes(self):
13073     """Build hooks nodes.
13074
13075     """
13076     mn = self.cfg.GetMasterNode()
13077     return ([mn], [mn])
13078
13079   def Exec(self, feedback_fn):
13080     """Modifies the node group.
13081
13082     """
13083     result = []
13084
13085     if self.op.ndparams:
13086       self.group.ndparams = self.new_ndparams
13087       result.append(("ndparams", str(self.group.ndparams)))
13088
13089     if self.op.diskparams:
13090       self.group.diskparams = self.new_diskparams
13091       result.append(("diskparams", str(self.group.diskparams)))
13092
13093     if self.op.alloc_policy:
13094       self.group.alloc_policy = self.op.alloc_policy
13095
13096     if self.op.hv_state:
13097       self.group.hv_state_static = self.new_hv_state
13098
13099     if self.op.disk_state:
13100       self.group.disk_state_static = self.new_disk_state
13101
13102     self.cfg.Update(self.group, feedback_fn)
13103     return result
13104
13105
13106 class LUGroupRemove(LogicalUnit):
13107   HPATH = "group-remove"
13108   HTYPE = constants.HTYPE_GROUP
13109   REQ_BGL = False
13110
13111   def ExpandNames(self):
13112     # This will raises errors.OpPrereqError on its own:
13113     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13114     self.needed_locks = {
13115       locking.LEVEL_NODEGROUP: [self.group_uuid],
13116       }
13117
13118   def CheckPrereq(self):
13119     """Check prerequisites.
13120
13121     This checks that the given group name exists as a node group, that is
13122     empty (i.e., contains no nodes), and that is not the last group of the
13123     cluster.
13124
13125     """
13126     # Verify that the group is empty.
13127     group_nodes = [node.name
13128                    for node in self.cfg.GetAllNodesInfo().values()
13129                    if node.group == self.group_uuid]
13130
13131     if group_nodes:
13132       raise errors.OpPrereqError("Group '%s' not empty, has the following"
13133                                  " nodes: %s" %
13134                                  (self.op.group_name,
13135                                   utils.CommaJoin(utils.NiceSort(group_nodes))),
13136                                  errors.ECODE_STATE)
13137
13138     # Verify the cluster would not be left group-less.
13139     if len(self.cfg.GetNodeGroupList()) == 1:
13140       raise errors.OpPrereqError("Group '%s' is the only group,"
13141                                  " cannot be removed" %
13142                                  self.op.group_name,
13143                                  errors.ECODE_STATE)
13144
13145   def BuildHooksEnv(self):
13146     """Build hooks env.
13147
13148     """
13149     return {
13150       "GROUP_NAME": self.op.group_name,
13151       }
13152
13153   def BuildHooksNodes(self):
13154     """Build hooks nodes.
13155
13156     """
13157     mn = self.cfg.GetMasterNode()
13158     return ([mn], [mn])
13159
13160   def Exec(self, feedback_fn):
13161     """Remove the node group.
13162
13163     """
13164     try:
13165       self.cfg.RemoveNodeGroup(self.group_uuid)
13166     except errors.ConfigurationError:
13167       raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
13168                                (self.op.group_name, self.group_uuid))
13169
13170     self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
13171
13172
13173 class LUGroupRename(LogicalUnit):
13174   HPATH = "group-rename"
13175   HTYPE = constants.HTYPE_GROUP
13176   REQ_BGL = False
13177
13178   def ExpandNames(self):
13179     # This raises errors.OpPrereqError on its own:
13180     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13181
13182     self.needed_locks = {
13183       locking.LEVEL_NODEGROUP: [self.group_uuid],
13184       }
13185
13186   def CheckPrereq(self):
13187     """Check prerequisites.
13188
13189     Ensures requested new name is not yet used.
13190
13191     """
13192     try:
13193       new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
13194     except errors.OpPrereqError:
13195       pass
13196     else:
13197       raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
13198                                  " node group (UUID: %s)" %
13199                                  (self.op.new_name, new_name_uuid),
13200                                  errors.ECODE_EXISTS)
13201
13202   def BuildHooksEnv(self):
13203     """Build hooks env.
13204
13205     """
13206     return {
13207       "OLD_NAME": self.op.group_name,
13208       "NEW_NAME": self.op.new_name,
13209       }
13210
13211   def BuildHooksNodes(self):
13212     """Build hooks nodes.
13213
13214     """
13215     mn = self.cfg.GetMasterNode()
13216
13217     all_nodes = self.cfg.GetAllNodesInfo()
13218     all_nodes.pop(mn, None)
13219
13220     run_nodes = [mn]
13221     run_nodes.extend(node.name for node in all_nodes.values()
13222                      if node.group == self.group_uuid)
13223
13224     return (run_nodes, run_nodes)
13225
13226   def Exec(self, feedback_fn):
13227     """Rename the node group.
13228
13229     """
13230     group = self.cfg.GetNodeGroup(self.group_uuid)
13231
13232     if group is None:
13233       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13234                                (self.op.group_name, self.group_uuid))
13235
13236     group.name = self.op.new_name
13237     self.cfg.Update(group, feedback_fn)
13238
13239     return self.op.new_name
13240
13241
13242 class LUGroupEvacuate(LogicalUnit):
13243   HPATH = "group-evacuate"
13244   HTYPE = constants.HTYPE_GROUP
13245   REQ_BGL = False
13246
13247   def ExpandNames(self):
13248     # This raises errors.OpPrereqError on its own:
13249     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13250
13251     if self.op.target_groups:
13252       self.req_target_uuids = map(self.cfg.LookupNodeGroup,
13253                                   self.op.target_groups)
13254     else:
13255       self.req_target_uuids = []
13256
13257     if self.group_uuid in self.req_target_uuids:
13258       raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
13259                                  " as a target group (targets are %s)" %
13260                                  (self.group_uuid,
13261                                   utils.CommaJoin(self.req_target_uuids)),
13262                                  errors.ECODE_INVAL)
13263
13264     self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
13265
13266     self.share_locks = _ShareAll()
13267     self.needed_locks = {
13268       locking.LEVEL_INSTANCE: [],
13269       locking.LEVEL_NODEGROUP: [],
13270       locking.LEVEL_NODE: [],
13271       }
13272
13273   def DeclareLocks(self, level):
13274     if level == locking.LEVEL_INSTANCE:
13275       assert not self.needed_locks[locking.LEVEL_INSTANCE]
13276
13277       # Lock instances optimistically, needs verification once node and group
13278       # locks have been acquired
13279       self.needed_locks[locking.LEVEL_INSTANCE] = \
13280         self.cfg.GetNodeGroupInstances(self.group_uuid)
13281
13282     elif level == locking.LEVEL_NODEGROUP:
13283       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
13284
13285       if self.req_target_uuids:
13286         lock_groups = set([self.group_uuid] + self.req_target_uuids)
13287
13288         # Lock all groups used by instances optimistically; this requires going
13289         # via the node before it's locked, requiring verification later on
13290         lock_groups.update(group_uuid
13291                            for instance_name in
13292                              self.owned_locks(locking.LEVEL_INSTANCE)
13293                            for group_uuid in
13294                              self.cfg.GetInstanceNodeGroups(instance_name))
13295       else:
13296         # No target groups, need to lock all of them
13297         lock_groups = locking.ALL_SET
13298
13299       self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
13300
13301     elif level == locking.LEVEL_NODE:
13302       # This will only lock the nodes in the group to be evacuated which
13303       # contain actual instances
13304       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
13305       self._LockInstancesNodes()
13306
13307       # Lock all nodes in group to be evacuated and target groups
13308       owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
13309       assert self.group_uuid in owned_groups
13310       member_nodes = [node_name
13311                       for group in owned_groups
13312                       for node_name in self.cfg.GetNodeGroup(group).members]
13313       self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
13314
13315   def CheckPrereq(self):
13316     owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
13317     owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
13318     owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
13319
13320     assert owned_groups.issuperset(self.req_target_uuids)
13321     assert self.group_uuid in owned_groups
13322
13323     # Check if locked instances are still correct
13324     _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
13325
13326     # Get instance information
13327     self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
13328
13329     # Check if node groups for locked instances are still correct
13330     for instance_name in owned_instances:
13331       inst = self.instances[instance_name]
13332       assert owned_nodes.issuperset(inst.all_nodes), \
13333         "Instance %s's nodes changed while we kept the lock" % instance_name
13334
13335       inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
13336                                              owned_groups)
13337
13338       assert self.group_uuid in inst_groups, \
13339         "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
13340
13341     if self.req_target_uuids:
13342       # User requested specific target groups
13343       self.target_uuids = self.req_target_uuids
13344     else:
13345       # All groups except the one to be evacuated are potential targets
13346       self.target_uuids = [group_uuid for group_uuid in owned_groups
13347                            if group_uuid != self.group_uuid]
13348
13349       if not self.target_uuids:
13350         raise errors.OpPrereqError("There are no possible target groups",
13351                                    errors.ECODE_INVAL)
13352
13353   def BuildHooksEnv(self):
13354     """Build hooks env.
13355
13356     """
13357     return {
13358       "GROUP_NAME": self.op.group_name,
13359       "TARGET_GROUPS": " ".join(self.target_uuids),
13360       }
13361
13362   def BuildHooksNodes(self):
13363     """Build hooks nodes.
13364
13365     """
13366     mn = self.cfg.GetMasterNode()
13367
13368     assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
13369
13370     run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
13371
13372     return (run_nodes, run_nodes)
13373
13374   def Exec(self, feedback_fn):
13375     instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
13376
13377     assert self.group_uuid not in self.target_uuids
13378
13379     ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
13380                      instances=instances, target_groups=self.target_uuids)
13381
13382     ial.Run(self.op.iallocator)
13383
13384     if not ial.success:
13385       raise errors.OpPrereqError("Can't compute group evacuation using"
13386                                  " iallocator '%s': %s" %
13387                                  (self.op.iallocator, ial.info),
13388                                  errors.ECODE_NORES)
13389
13390     jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
13391
13392     self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
13393                  len(jobs), self.op.group_name)
13394
13395     return ResultWithJobs(jobs)
13396
13397
13398 class TagsLU(NoHooksLU): # pylint: disable=W0223
13399   """Generic tags LU.
13400
13401   This is an abstract class which is the parent of all the other tags LUs.
13402
13403   """
13404   def ExpandNames(self):
13405     self.group_uuid = None
13406     self.needed_locks = {}
13407     if self.op.kind == constants.TAG_NODE:
13408       self.op.name = _ExpandNodeName(self.cfg, self.op.name)
13409       self.needed_locks[locking.LEVEL_NODE] = self.op.name
13410     elif self.op.kind == constants.TAG_INSTANCE:
13411       self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
13412       self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
13413     elif self.op.kind == constants.TAG_NODEGROUP:
13414       self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
13415
13416     # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
13417     # not possible to acquire the BGL based on opcode parameters)
13418
13419   def CheckPrereq(self):
13420     """Check prerequisites.
13421
13422     """
13423     if self.op.kind == constants.TAG_CLUSTER:
13424       self.target = self.cfg.GetClusterInfo()
13425     elif self.op.kind == constants.TAG_NODE:
13426       self.target = self.cfg.GetNodeInfo(self.op.name)
13427     elif self.op.kind == constants.TAG_INSTANCE:
13428       self.target = self.cfg.GetInstanceInfo(self.op.name)
13429     elif self.op.kind == constants.TAG_NODEGROUP:
13430       self.target = self.cfg.GetNodeGroup(self.group_uuid)
13431     else:
13432       raise errors.OpPrereqError("Wrong tag type requested (%s)" %
13433                                  str(self.op.kind), errors.ECODE_INVAL)
13434
13435
13436 class LUTagsGet(TagsLU):
13437   """Returns the tags of a given object.
13438
13439   """
13440   REQ_BGL = False
13441
13442   def ExpandNames(self):
13443     TagsLU.ExpandNames(self)
13444
13445     # Share locks as this is only a read operation
13446     self.share_locks = _ShareAll()
13447
13448   def Exec(self, feedback_fn):
13449     """Returns the tag list.
13450
13451     """
13452     return list(self.target.GetTags())
13453
13454
13455 class LUTagsSearch(NoHooksLU):
13456   """Searches the tags for a given pattern.
13457
13458   """
13459   REQ_BGL = False
13460
13461   def ExpandNames(self):
13462     self.needed_locks = {}
13463
13464   def CheckPrereq(self):
13465     """Check prerequisites.
13466
13467     This checks the pattern passed for validity by compiling it.
13468
13469     """
13470     try:
13471       self.re = re.compile(self.op.pattern)
13472     except re.error, err:
13473       raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
13474                                  (self.op.pattern, err), errors.ECODE_INVAL)
13475
13476   def Exec(self, feedback_fn):
13477     """Returns the tag list.
13478
13479     """
13480     cfg = self.cfg
13481     tgts = [("/cluster", cfg.GetClusterInfo())]
13482     ilist = cfg.GetAllInstancesInfo().values()
13483     tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
13484     nlist = cfg.GetAllNodesInfo().values()
13485     tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
13486     tgts.extend(("/nodegroup/%s" % n.name, n)
13487                 for n in cfg.GetAllNodeGroupsInfo().values())
13488     results = []
13489     for path, target in tgts:
13490       for tag in target.GetTags():
13491         if self.re.search(tag):
13492           results.append((path, tag))
13493     return results
13494
13495
13496 class LUTagsSet(TagsLU):
13497   """Sets a tag on a given object.
13498
13499   """
13500   REQ_BGL = False
13501
13502   def CheckPrereq(self):
13503     """Check prerequisites.
13504
13505     This checks the type and length of the tag name and value.
13506
13507     """
13508     TagsLU.CheckPrereq(self)
13509     for tag in self.op.tags:
13510       objects.TaggableObject.ValidateTag(tag)
13511
13512   def Exec(self, feedback_fn):
13513     """Sets the tag.
13514
13515     """
13516     try:
13517       for tag in self.op.tags:
13518         self.target.AddTag(tag)
13519     except errors.TagError, err:
13520       raise errors.OpExecError("Error while setting tag: %s" % str(err))
13521     self.cfg.Update(self.target, feedback_fn)
13522
13523
13524 class LUTagsDel(TagsLU):
13525   """Delete a list of tags from a given object.
13526
13527   """
13528   REQ_BGL = False
13529
13530   def CheckPrereq(self):
13531     """Check prerequisites.
13532
13533     This checks that we have the given tag.
13534
13535     """
13536     TagsLU.CheckPrereq(self)
13537     for tag in self.op.tags:
13538       objects.TaggableObject.ValidateTag(tag)
13539     del_tags = frozenset(self.op.tags)
13540     cur_tags = self.target.GetTags()
13541
13542     diff_tags = del_tags - cur_tags
13543     if diff_tags:
13544       diff_names = ("'%s'" % i for i in sorted(diff_tags))
13545       raise errors.OpPrereqError("Tag(s) %s not found" %
13546                                  (utils.CommaJoin(diff_names), ),
13547                                  errors.ECODE_NOENT)
13548
13549   def Exec(self, feedback_fn):
13550     """Remove the tag from the object.
13551
13552     """
13553     for tag in self.op.tags:
13554       self.target.RemoveTag(tag)
13555     self.cfg.Update(self.target, feedback_fn)
13556
13557
13558 class LUTestDelay(NoHooksLU):
13559   """Sleep for a specified amount of time.
13560
13561   This LU sleeps on the master and/or nodes for a specified amount of
13562   time.
13563
13564   """
13565   REQ_BGL = False
13566
13567   def ExpandNames(self):
13568     """Expand names and set required locks.
13569
13570     This expands the node list, if any.
13571
13572     """
13573     self.needed_locks = {}
13574     if self.op.on_nodes:
13575       # _GetWantedNodes can be used here, but is not always appropriate to use
13576       # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
13577       # more information.
13578       self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
13579       self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
13580
13581   def _TestDelay(self):
13582     """Do the actual sleep.
13583
13584     """
13585     if self.op.on_master:
13586       if not utils.TestDelay(self.op.duration):
13587         raise errors.OpExecError("Error during master delay test")
13588     if self.op.on_nodes:
13589       result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
13590       for node, node_result in result.items():
13591         node_result.Raise("Failure during rpc call to node %s" % node)
13592
13593   def Exec(self, feedback_fn):
13594     """Execute the test delay opcode, with the wanted repetitions.
13595
13596     """
13597     if self.op.repeat == 0:
13598       self._TestDelay()
13599     else:
13600       top_value = self.op.repeat - 1
13601       for i in range(self.op.repeat):
13602         self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
13603         self._TestDelay()
13604
13605
13606 class LUTestJqueue(NoHooksLU):
13607   """Utility LU to test some aspects of the job queue.
13608
13609   """
13610   REQ_BGL = False
13611
13612   # Must be lower than default timeout for WaitForJobChange to see whether it
13613   # notices changed jobs
13614   _CLIENT_CONNECT_TIMEOUT = 20.0
13615   _CLIENT_CONFIRM_TIMEOUT = 60.0
13616
13617   @classmethod
13618   def _NotifyUsingSocket(cls, cb, errcls):
13619     """Opens a Unix socket and waits for another program to connect.
13620
13621     @type cb: callable
13622     @param cb: Callback to send socket name to client
13623     @type errcls: class
13624     @param errcls: Exception class to use for errors
13625
13626     """
13627     # Using a temporary directory as there's no easy way to create temporary
13628     # sockets without writing a custom loop around tempfile.mktemp and
13629     # socket.bind
13630     tmpdir = tempfile.mkdtemp()
13631     try:
13632       tmpsock = utils.PathJoin(tmpdir, "sock")
13633
13634       logging.debug("Creating temporary socket at %s", tmpsock)
13635       sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
13636       try:
13637         sock.bind(tmpsock)
13638         sock.listen(1)
13639
13640         # Send details to client
13641         cb(tmpsock)
13642
13643         # Wait for client to connect before continuing
13644         sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
13645         try:
13646           (conn, _) = sock.accept()
13647         except socket.error, err:
13648           raise errcls("Client didn't connect in time (%s)" % err)
13649       finally:
13650         sock.close()
13651     finally:
13652       # Remove as soon as client is connected
13653       shutil.rmtree(tmpdir)
13654
13655     # Wait for client to close
13656     try:
13657       try:
13658         # pylint: disable=E1101
13659         # Instance of '_socketobject' has no ... member
13660         conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
13661         conn.recv(1)
13662       except socket.error, err:
13663         raise errcls("Client failed to confirm notification (%s)" % err)
13664     finally:
13665       conn.close()
13666
13667   def _SendNotification(self, test, arg, sockname):
13668     """Sends a notification to the client.
13669
13670     @type test: string
13671     @param test: Test name
13672     @param arg: Test argument (depends on test)
13673     @type sockname: string
13674     @param sockname: Socket path
13675
13676     """
13677     self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
13678
13679   def _Notify(self, prereq, test, arg):
13680     """Notifies the client of a test.
13681
13682     @type prereq: bool
13683     @param prereq: Whether this is a prereq-phase test
13684     @type test: string
13685     @param test: Test name
13686     @param arg: Test argument (depends on test)
13687
13688     """
13689     if prereq:
13690       errcls = errors.OpPrereqError
13691     else:
13692       errcls = errors.OpExecError
13693
13694     return self._NotifyUsingSocket(compat.partial(self._SendNotification,
13695                                                   test, arg),
13696                                    errcls)
13697
13698   def CheckArguments(self):
13699     self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
13700     self.expandnames_calls = 0
13701
13702   def ExpandNames(self):
13703     checkargs_calls = getattr(self, "checkargs_calls", 0)
13704     if checkargs_calls < 1:
13705       raise errors.ProgrammerError("CheckArguments was not called")
13706
13707     self.expandnames_calls += 1
13708
13709     if self.op.notify_waitlock:
13710       self._Notify(True, constants.JQT_EXPANDNAMES, None)
13711
13712     self.LogInfo("Expanding names")
13713
13714     # Get lock on master node (just to get a lock, not for a particular reason)
13715     self.needed_locks = {
13716       locking.LEVEL_NODE: self.cfg.GetMasterNode(),
13717       }
13718
13719   def Exec(self, feedback_fn):
13720     if self.expandnames_calls < 1:
13721       raise errors.ProgrammerError("ExpandNames was not called")
13722
13723     if self.op.notify_exec:
13724       self._Notify(False, constants.JQT_EXEC, None)
13725
13726     self.LogInfo("Executing")
13727
13728     if self.op.log_messages:
13729       self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
13730       for idx, msg in enumerate(self.op.log_messages):
13731         self.LogInfo("Sending log message %s", idx + 1)
13732         feedback_fn(constants.JQT_MSGPREFIX + msg)
13733         # Report how many test messages have been sent
13734         self._Notify(False, constants.JQT_LOGMSG, idx + 1)
13735
13736     if self.op.fail:
13737       raise errors.OpExecError("Opcode failure was requested")
13738
13739     return True
13740
13741
13742 class IAllocator(object):
13743   """IAllocator framework.
13744
13745   An IAllocator instance has three sets of attributes:
13746     - cfg that is needed to query the cluster
13747     - input data (all members of the _KEYS class attribute are required)
13748     - four buffer attributes (in|out_data|text), that represent the
13749       input (to the external script) in text and data structure format,
13750       and the output from it, again in two formats
13751     - the result variables from the script (success, info, nodes) for
13752       easy usage
13753
13754   """
13755   # pylint: disable=R0902
13756   # lots of instance attributes
13757
13758   def __init__(self, cfg, rpc_runner, mode, **kwargs):
13759     self.cfg = cfg
13760     self.rpc = rpc_runner
13761     # init buffer variables
13762     self.in_text = self.out_text = self.in_data = self.out_data = None
13763     # init all input fields so that pylint is happy
13764     self.mode = mode
13765     self.memory = self.disks = self.disk_template = None
13766     self.os = self.tags = self.nics = self.vcpus = None
13767     self.hypervisor = None
13768     self.relocate_from = None
13769     self.name = None
13770     self.instances = None
13771     self.evac_mode = None
13772     self.target_groups = []
13773     # computed fields
13774     self.required_nodes = None
13775     # init result fields
13776     self.success = self.info = self.result = None
13777
13778     try:
13779       (fn, keydata, self._result_check) = self._MODE_DATA[self.mode]
13780     except KeyError:
13781       raise errors.ProgrammerError("Unknown mode '%s' passed to the"
13782                                    " IAllocator" % self.mode)
13783
13784     keyset = [n for (n, _) in keydata]
13785
13786     for key in kwargs:
13787       if key not in keyset:
13788         raise errors.ProgrammerError("Invalid input parameter '%s' to"
13789                                      " IAllocator" % key)
13790       setattr(self, key, kwargs[key])
13791
13792     for key in keyset:
13793       if key not in kwargs:
13794         raise errors.ProgrammerError("Missing input parameter '%s' to"
13795                                      " IAllocator" % key)
13796     self._BuildInputData(compat.partial(fn, self), keydata)
13797
13798   def _ComputeClusterData(self):
13799     """Compute the generic allocator input data.
13800
13801     This is the data that is independent of the actual operation.
13802
13803     """
13804     cfg = self.cfg
13805     cluster_info = cfg.GetClusterInfo()
13806     # cluster data
13807     data = {
13808       "version": constants.IALLOCATOR_VERSION,
13809       "cluster_name": cfg.GetClusterName(),
13810       "cluster_tags": list(cluster_info.GetTags()),
13811       "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
13812       # we don't have job IDs
13813       }
13814     ninfo = cfg.GetAllNodesInfo()
13815     iinfo = cfg.GetAllInstancesInfo().values()
13816     i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
13817
13818     # node data
13819     node_list = [n.name for n in ninfo.values() if n.vm_capable]
13820
13821     if self.mode == constants.IALLOCATOR_MODE_ALLOC:
13822       hypervisor_name = self.hypervisor
13823     elif self.mode == constants.IALLOCATOR_MODE_RELOC:
13824       hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
13825     else:
13826       hypervisor_name = cluster_info.primary_hypervisor
13827
13828     node_data = self.rpc.call_node_info(node_list, [cfg.GetVGName()],
13829                                         [hypervisor_name])
13830     node_iinfo = \
13831       self.rpc.call_all_instances_info(node_list,
13832                                        cluster_info.enabled_hypervisors)
13833
13834     data["nodegroups"] = self._ComputeNodeGroupData(cfg)
13835
13836     config_ndata = self._ComputeBasicNodeData(ninfo)
13837     data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
13838                                                  i_list, config_ndata)
13839     assert len(data["nodes"]) == len(ninfo), \
13840         "Incomplete node data computed"
13841
13842     data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
13843
13844     self.in_data = data
13845
13846   @staticmethod
13847   def _ComputeNodeGroupData(cfg):
13848     """Compute node groups data.
13849
13850     """
13851     ng = dict((guuid, {
13852       "name": gdata.name,
13853       "alloc_policy": gdata.alloc_policy,
13854       })
13855       for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
13856
13857     return ng
13858
13859   @staticmethod
13860   def _ComputeBasicNodeData(node_cfg):
13861     """Compute global node data.
13862
13863     @rtype: dict
13864     @returns: a dict of name: (node dict, node config)
13865
13866     """
13867     # fill in static (config-based) values
13868     node_results = dict((ninfo.name, {
13869       "tags": list(ninfo.GetTags()),
13870       "primary_ip": ninfo.primary_ip,
13871       "secondary_ip": ninfo.secondary_ip,
13872       "offline": ninfo.offline,
13873       "drained": ninfo.drained,
13874       "master_candidate": ninfo.master_candidate,
13875       "group": ninfo.group,
13876       "master_capable": ninfo.master_capable,
13877       "vm_capable": ninfo.vm_capable,
13878       })
13879       for ninfo in node_cfg.values())
13880
13881     return node_results
13882
13883   @staticmethod
13884   def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
13885                               node_results):
13886     """Compute global node data.
13887
13888     @param node_results: the basic node structures as filled from the config
13889
13890     """
13891     #TODO(dynmem): compute the right data on MAX and MIN memory
13892     # make a copy of the current dict
13893     node_results = dict(node_results)
13894     for nname, nresult in node_data.items():
13895       assert nname in node_results, "Missing basic data for node %s" % nname
13896       ninfo = node_cfg[nname]
13897
13898       if not (ninfo.offline or ninfo.drained):
13899         nresult.Raise("Can't get data for node %s" % nname)
13900         node_iinfo[nname].Raise("Can't get node instance info from node %s" %
13901                                 nname)
13902         remote_info = _MakeLegacyNodeInfo(nresult.payload)
13903
13904         for attr in ["memory_total", "memory_free", "memory_dom0",
13905                      "vg_size", "vg_free", "cpu_total"]:
13906           if attr not in remote_info:
13907             raise errors.OpExecError("Node '%s' didn't return attribute"
13908                                      " '%s'" % (nname, attr))
13909           if not isinstance(remote_info[attr], int):
13910             raise errors.OpExecError("Node '%s' returned invalid value"
13911                                      " for '%s': %s" %
13912                                      (nname, attr, remote_info[attr]))
13913         # compute memory used by primary instances
13914         i_p_mem = i_p_up_mem = 0
13915         for iinfo, beinfo in i_list:
13916           if iinfo.primary_node == nname:
13917             i_p_mem += beinfo[constants.BE_MAXMEM]
13918             if iinfo.name not in node_iinfo[nname].payload:
13919               i_used_mem = 0
13920             else:
13921               i_used_mem = int(node_iinfo[nname].payload[iinfo.name]["memory"])
13922             i_mem_diff = beinfo[constants.BE_MAXMEM] - i_used_mem
13923             remote_info["memory_free"] -= max(0, i_mem_diff)
13924
13925             if iinfo.admin_state == constants.ADMINST_UP:
13926               i_p_up_mem += beinfo[constants.BE_MAXMEM]
13927
13928         # compute memory used by instances
13929         pnr_dyn = {
13930           "total_memory": remote_info["memory_total"],
13931           "reserved_memory": remote_info["memory_dom0"],
13932           "free_memory": remote_info["memory_free"],
13933           "total_disk": remote_info["vg_size"],
13934           "free_disk": remote_info["vg_free"],
13935           "total_cpus": remote_info["cpu_total"],
13936           "i_pri_memory": i_p_mem,
13937           "i_pri_up_memory": i_p_up_mem,
13938           }
13939         pnr_dyn.update(node_results[nname])
13940         node_results[nname] = pnr_dyn
13941
13942     return node_results
13943
13944   @staticmethod
13945   def _ComputeInstanceData(cluster_info, i_list):
13946     """Compute global instance data.
13947
13948     """
13949     instance_data = {}
13950     for iinfo, beinfo in i_list:
13951       nic_data = []
13952       for nic in iinfo.nics:
13953         filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
13954         nic_dict = {
13955           "mac": nic.mac,
13956           "ip": nic.ip,
13957           "mode": filled_params[constants.NIC_MODE],
13958           "link": filled_params[constants.NIC_LINK],
13959           }
13960         if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
13961           nic_dict["bridge"] = filled_params[constants.NIC_LINK]
13962         nic_data.append(nic_dict)
13963       pir = {
13964         "tags": list(iinfo.GetTags()),
13965         "admin_state": iinfo.admin_state,
13966         "vcpus": beinfo[constants.BE_VCPUS],
13967         "memory": beinfo[constants.BE_MAXMEM],
13968         "os": iinfo.os,
13969         "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
13970         "nics": nic_data,
13971         "disks": [{constants.IDISK_SIZE: dsk.size,
13972                    constants.IDISK_MODE: dsk.mode}
13973                   for dsk in iinfo.disks],
13974         "disk_template": iinfo.disk_template,
13975         "hypervisor": iinfo.hypervisor,
13976         }
13977       pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
13978                                                  pir["disks"])
13979       instance_data[iinfo.name] = pir
13980
13981     return instance_data
13982
13983   def _AddNewInstance(self):
13984     """Add new instance data to allocator structure.
13985
13986     This in combination with _AllocatorGetClusterData will create the
13987     correct structure needed as input for the allocator.
13988
13989     The checks for the completeness of the opcode must have already been
13990     done.
13991
13992     """
13993     disk_space = _ComputeDiskSize(self.disk_template, self.disks)
13994
13995     if self.disk_template in constants.DTS_INT_MIRROR:
13996       self.required_nodes = 2
13997     else:
13998       self.required_nodes = 1
13999
14000     request = {
14001       "name": self.name,
14002       "disk_template": self.disk_template,
14003       "tags": self.tags,
14004       "os": self.os,
14005       "vcpus": self.vcpus,
14006       "memory": self.memory,
14007       "disks": self.disks,
14008       "disk_space_total": disk_space,
14009       "nics": self.nics,
14010       "required_nodes": self.required_nodes,
14011       "hypervisor": self.hypervisor,
14012       }
14013
14014     return request
14015
14016   def _AddRelocateInstance(self):
14017     """Add relocate instance data to allocator structure.
14018
14019     This in combination with _IAllocatorGetClusterData will create the
14020     correct structure needed as input for the allocator.
14021
14022     The checks for the completeness of the opcode must have already been
14023     done.
14024
14025     """
14026     instance = self.cfg.GetInstanceInfo(self.name)
14027     if instance is None:
14028       raise errors.ProgrammerError("Unknown instance '%s' passed to"
14029                                    " IAllocator" % self.name)
14030
14031     if instance.disk_template not in constants.DTS_MIRRORED:
14032       raise errors.OpPrereqError("Can't relocate non-mirrored instances",
14033                                  errors.ECODE_INVAL)
14034
14035     if instance.disk_template in constants.DTS_INT_MIRROR and \
14036         len(instance.secondary_nodes) != 1:
14037       raise errors.OpPrereqError("Instance has not exactly one secondary node",
14038                                  errors.ECODE_STATE)
14039
14040     self.required_nodes = 1
14041     disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
14042     disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
14043
14044     request = {
14045       "name": self.name,
14046       "disk_space_total": disk_space,
14047       "required_nodes": self.required_nodes,
14048       "relocate_from": self.relocate_from,
14049       }
14050     return request
14051
14052   def _AddNodeEvacuate(self):
14053     """Get data for node-evacuate requests.
14054
14055     """
14056     return {
14057       "instances": self.instances,
14058       "evac_mode": self.evac_mode,
14059       }
14060
14061   def _AddChangeGroup(self):
14062     """Get data for node-evacuate requests.
14063
14064     """
14065     return {
14066       "instances": self.instances,
14067       "target_groups": self.target_groups,
14068       }
14069
14070   def _BuildInputData(self, fn, keydata):
14071     """Build input data structures.
14072
14073     """
14074     self._ComputeClusterData()
14075
14076     request = fn()
14077     request["type"] = self.mode
14078     for keyname, keytype in keydata:
14079       if keyname not in request:
14080         raise errors.ProgrammerError("Request parameter %s is missing" %
14081                                      keyname)
14082       val = request[keyname]
14083       if not keytype(val):
14084         raise errors.ProgrammerError("Request parameter %s doesn't pass"
14085                                      " validation, value %s, expected"
14086                                      " type %s" % (keyname, val, keytype))
14087     self.in_data["request"] = request
14088
14089     self.in_text = serializer.Dump(self.in_data)
14090
14091   _STRING_LIST = ht.TListOf(ht.TString)
14092   _JOB_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
14093      # pylint: disable=E1101
14094      # Class '...' has no 'OP_ID' member
14095      "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
14096                           opcodes.OpInstanceMigrate.OP_ID,
14097                           opcodes.OpInstanceReplaceDisks.OP_ID])
14098      })))
14099
14100   _NEVAC_MOVED = \
14101     ht.TListOf(ht.TAnd(ht.TIsLength(3),
14102                        ht.TItems([ht.TNonEmptyString,
14103                                   ht.TNonEmptyString,
14104                                   ht.TListOf(ht.TNonEmptyString),
14105                                  ])))
14106   _NEVAC_FAILED = \
14107     ht.TListOf(ht.TAnd(ht.TIsLength(2),
14108                        ht.TItems([ht.TNonEmptyString,
14109                                   ht.TMaybeString,
14110                                  ])))
14111   _NEVAC_RESULT = ht.TAnd(ht.TIsLength(3),
14112                           ht.TItems([_NEVAC_MOVED, _NEVAC_FAILED, _JOB_LIST]))
14113
14114   _MODE_DATA = {
14115     constants.IALLOCATOR_MODE_ALLOC:
14116       (_AddNewInstance,
14117        [
14118         ("name", ht.TString),
14119         ("memory", ht.TInt),
14120         ("disks", ht.TListOf(ht.TDict)),
14121         ("disk_template", ht.TString),
14122         ("os", ht.TString),
14123         ("tags", _STRING_LIST),
14124         ("nics", ht.TListOf(ht.TDict)),
14125         ("vcpus", ht.TInt),
14126         ("hypervisor", ht.TString),
14127         ], ht.TList),
14128     constants.IALLOCATOR_MODE_RELOC:
14129       (_AddRelocateInstance,
14130        [("name", ht.TString), ("relocate_from", _STRING_LIST)],
14131        ht.TList),
14132      constants.IALLOCATOR_MODE_NODE_EVAC:
14133       (_AddNodeEvacuate, [
14134         ("instances", _STRING_LIST),
14135         ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)),
14136         ], _NEVAC_RESULT),
14137      constants.IALLOCATOR_MODE_CHG_GROUP:
14138       (_AddChangeGroup, [
14139         ("instances", _STRING_LIST),
14140         ("target_groups", _STRING_LIST),
14141         ], _NEVAC_RESULT),
14142     }
14143
14144   def Run(self, name, validate=True, call_fn=None):
14145     """Run an instance allocator and return the results.
14146
14147     """
14148     if call_fn is None:
14149       call_fn = self.rpc.call_iallocator_runner
14150
14151     result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
14152     result.Raise("Failure while running the iallocator script")
14153
14154     self.out_text = result.payload
14155     if validate:
14156       self._ValidateResult()
14157
14158   def _ValidateResult(self):
14159     """Process the allocator results.
14160
14161     This will process and if successful save the result in
14162     self.out_data and the other parameters.
14163
14164     """
14165     try:
14166       rdict = serializer.Load(self.out_text)
14167     except Exception, err:
14168       raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
14169
14170     if not isinstance(rdict, dict):
14171       raise errors.OpExecError("Can't parse iallocator results: not a dict")
14172
14173     # TODO: remove backwards compatiblity in later versions
14174     if "nodes" in rdict and "result" not in rdict:
14175       rdict["result"] = rdict["nodes"]
14176       del rdict["nodes"]
14177
14178     for key in "success", "info", "result":
14179       if key not in rdict:
14180         raise errors.OpExecError("Can't parse iallocator results:"
14181                                  " missing key '%s'" % key)
14182       setattr(self, key, rdict[key])
14183
14184     if not self._result_check(self.result):
14185       raise errors.OpExecError("Iallocator returned invalid result,"
14186                                " expected %s, got %s" %
14187                                (self._result_check, self.result),
14188                                errors.ECODE_INVAL)
14189
14190     if self.mode == constants.IALLOCATOR_MODE_RELOC:
14191       assert self.relocate_from is not None
14192       assert self.required_nodes == 1
14193
14194       node2group = dict((name, ndata["group"])
14195                         for (name, ndata) in self.in_data["nodes"].items())
14196
14197       fn = compat.partial(self._NodesToGroups, node2group,
14198                           self.in_data["nodegroups"])
14199
14200       instance = self.cfg.GetInstanceInfo(self.name)
14201       request_groups = fn(self.relocate_from + [instance.primary_node])
14202       result_groups = fn(rdict["result"] + [instance.primary_node])
14203
14204       if self.success and not set(result_groups).issubset(request_groups):
14205         raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
14206                                  " differ from original groups (%s)" %
14207                                  (utils.CommaJoin(result_groups),
14208                                   utils.CommaJoin(request_groups)))
14209
14210     elif self.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
14211       assert self.evac_mode in constants.IALLOCATOR_NEVAC_MODES
14212
14213     self.out_data = rdict
14214
14215   @staticmethod
14216   def _NodesToGroups(node2group, groups, nodes):
14217     """Returns a list of unique group names for a list of nodes.
14218
14219     @type node2group: dict
14220     @param node2group: Map from node name to group UUID
14221     @type groups: dict
14222     @param groups: Group information
14223     @type nodes: list
14224     @param nodes: Node names
14225
14226     """
14227     result = set()
14228
14229     for node in nodes:
14230       try:
14231         group_uuid = node2group[node]
14232       except KeyError:
14233         # Ignore unknown node
14234         pass
14235       else:
14236         try:
14237           group = groups[group_uuid]
14238         except KeyError:
14239           # Can't find group, let's use UUID
14240           group_name = group_uuid
14241         else:
14242           group_name = group["name"]
14243
14244         result.add(group_name)
14245
14246     return sorted(result)
14247
14248
14249 class LUTestAllocator(NoHooksLU):
14250   """Run allocator tests.
14251
14252   This LU runs the allocator tests
14253
14254   """
14255   def CheckPrereq(self):
14256     """Check prerequisites.
14257
14258     This checks the opcode parameters depending on the director and mode test.
14259
14260     """
14261     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
14262       for attr in ["memory", "disks", "disk_template",
14263                    "os", "tags", "nics", "vcpus"]:
14264         if not hasattr(self.op, attr):
14265           raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
14266                                      attr, errors.ECODE_INVAL)
14267       iname = self.cfg.ExpandInstanceName(self.op.name)
14268       if iname is not None:
14269         raise errors.OpPrereqError("Instance '%s' already in the cluster" %
14270                                    iname, errors.ECODE_EXISTS)
14271       if not isinstance(self.op.nics, list):
14272         raise errors.OpPrereqError("Invalid parameter 'nics'",
14273                                    errors.ECODE_INVAL)
14274       if not isinstance(self.op.disks, list):
14275         raise errors.OpPrereqError("Invalid parameter 'disks'",
14276                                    errors.ECODE_INVAL)
14277       for row in self.op.disks:
14278         if (not isinstance(row, dict) or
14279             constants.IDISK_SIZE not in row or
14280             not isinstance(row[constants.IDISK_SIZE], int) or
14281             constants.IDISK_MODE not in row or
14282             row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
14283           raise errors.OpPrereqError("Invalid contents of the 'disks'"
14284                                      " parameter", errors.ECODE_INVAL)
14285       if self.op.hypervisor is None:
14286         self.op.hypervisor = self.cfg.GetHypervisorType()
14287     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
14288       fname = _ExpandInstanceName(self.cfg, self.op.name)
14289       self.op.name = fname
14290       self.relocate_from = \
14291           list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
14292     elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
14293                           constants.IALLOCATOR_MODE_NODE_EVAC):
14294       if not self.op.instances:
14295         raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
14296       self.op.instances = _GetWantedInstances(self, self.op.instances)
14297     else:
14298       raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
14299                                  self.op.mode, errors.ECODE_INVAL)
14300
14301     if self.op.direction == constants.IALLOCATOR_DIR_OUT:
14302       if self.op.allocator is None:
14303         raise errors.OpPrereqError("Missing allocator name",
14304                                    errors.ECODE_INVAL)
14305     elif self.op.direction != constants.IALLOCATOR_DIR_IN:
14306       raise errors.OpPrereqError("Wrong allocator test '%s'" %
14307                                  self.op.direction, errors.ECODE_INVAL)
14308
14309   def Exec(self, feedback_fn):
14310     """Run the allocator test.
14311
14312     """
14313     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
14314       ial = IAllocator(self.cfg, self.rpc,
14315                        mode=self.op.mode,
14316                        name=self.op.name,
14317                        memory=self.op.memory,
14318                        disks=self.op.disks,
14319                        disk_template=self.op.disk_template,
14320                        os=self.op.os,
14321                        tags=self.op.tags,
14322                        nics=self.op.nics,
14323                        vcpus=self.op.vcpus,
14324                        hypervisor=self.op.hypervisor,
14325                        )
14326     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
14327       ial = IAllocator(self.cfg, self.rpc,
14328                        mode=self.op.mode,
14329                        name=self.op.name,
14330                        relocate_from=list(self.relocate_from),
14331                        )
14332     elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
14333       ial = IAllocator(self.cfg, self.rpc,
14334                        mode=self.op.mode,
14335                        instances=self.op.instances,
14336                        target_groups=self.op.target_groups)
14337     elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
14338       ial = IAllocator(self.cfg, self.rpc,
14339                        mode=self.op.mode,
14340                        instances=self.op.instances,
14341                        evac_mode=self.op.evac_mode)
14342     else:
14343       raise errors.ProgrammerError("Uncatched mode %s in"
14344                                    " LUTestAllocator.Exec", self.op.mode)
14345
14346     if self.op.direction == constants.IALLOCATOR_DIR_IN:
14347       result = ial.in_text
14348     else:
14349       ial.Run(self.op.allocator, validate=False)
14350       result = ial.out_text
14351     return result
14352
14353
14354 #: Query type implementations
14355 _QUERY_IMPL = {
14356   constants.QR_INSTANCE: _InstanceQuery,
14357   constants.QR_NODE: _NodeQuery,
14358   constants.QR_GROUP: _GroupQuery,
14359   constants.QR_OS: _OsQuery,
14360   }
14361
14362 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
14363
14364
14365 def _GetQueryImplementation(name):
14366   """Returns the implemtnation for a query type.
14367
14368   @param name: Query type, must be one of L{constants.QR_VIA_OP}
14369
14370   """
14371   try:
14372     return _QUERY_IMPL[name]
14373   except KeyError:
14374     raise errors.OpPrereqError("Unknown query resource '%s'" % name,
14375                                errors.ECODE_INVAL)